Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

new format "diff-file" as a delta between the base and the delta file #54

Open
wants to merge 6 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ Csvdiff is a difftool to compute changes between two csv files.
- Supports specifying group of columns as primary-key i.e uniquely identify a row.
- Support ignoring columns e.g ignore columns like `created_at` timestamps.
- Compares csvs of million records csv in under 2 seconds.
- Supports lot of output formats e.g colored git style output or JSON for post-processing.
- Supports lots of output formats, e.g colored git style output or JSON, as well as a diff csv file for post-processing.

## Why?

Expand Down Expand Up @@ -51,7 +51,7 @@ Usage:

Flags:
--columns ints Selectively compare positions in CSV Eg: 1,2. Default is entire row
-o, --format string Available (rowmark|json|legacy-json|diff|word-diff|color-words) (default "diff")
-o, --format string Available (rowmark|json|legacy-json|diff|word-diff|color-words|diff-file) (default "diff")
-h, --help help for csvdiff
--ignore-columns ints Inverse of --columns flag. This cannot be used if --columns are specified
--include ints Include positions in CSV to display Eg: 1,2. Default is entire row
Expand Down Expand Up @@ -116,6 +116,7 @@ There are a number of formats supported
- `json`: JSON serialization of result
- `legacy-json`: JSON serialization of result in old format
- `rowmark`: Marks each row with ADDED or MODIFIED status.
- `diff-file`: Creates a delta/diff CSV file between the base and delta files. The CSV file contains the headers of the delta file, as well as all additions and current modifications.

## Miscellaneous features

Expand Down
19 changes: 19 additions & 0 deletions cmd/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ import (
"encoding/csv"
"fmt"
"io"
"io/ioutil"
"os"
"strings"

"github.com/spf13/afero"
Expand All @@ -22,6 +24,7 @@ type Context struct {
deltaFilename string
baseFile afero.File
deltaFile afero.File
tmpFile afero.File
recordCount int
separator rune
lazyQuotes bool
Expand Down Expand Up @@ -67,10 +70,20 @@ func NewContext(
if err != nil {
return nil, err
}

deltaFile, err := fs.Open(deltaFilename)
if err != nil {
return nil, err
}

var tmpFile *os.File

if format == diffFile {
if tmpFile, err = ioutil.TempFile(os.TempDir(), "csvdiff-"); err != nil {
return nil, err
}
}

ctx := &Context{
fs: fs,
primaryKeyPositions: primaryKeyPositions,
Expand All @@ -81,6 +94,7 @@ func NewContext(
deltaFilename: deltaFilename,
baseFile: baseFile,
deltaFile: deltaFile,
tmpFile: tmpFile,
recordCount: baseRecordCount,
separator: separator,
lazyQuotes: lazyQuotes,
Expand Down Expand Up @@ -232,7 +246,12 @@ func (c *Context) Close() {
if c.baseFile != nil {
_ = c.baseFile.Close()
}

if c.deltaFile != nil {
_ = c.deltaFile.Close()
}

if c.tmpFile != nil {
_ = c.tmpFile.Close()
}
}
78 changes: 76 additions & 2 deletions cmd/formatter.go
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
package cmd

import (
"encoding/csv"
"encoding/json"
"fmt"
"io"

"github.com/aswinkarthik/csvdiff/pkg/digest"
"github.com/fatih/color"
"io"
"github.com/pkg/errors"
)

const (
Expand All @@ -15,9 +18,18 @@ const (
lineDiff = "diff"
wordDiff = "word-diff"
colorWords = "color-words"
diffFile = "diff-file"
)

var allFormats = []string{rowmark, jsonFormat, legacyJSONFormat, lineDiff, wordDiff, colorWords}
var allFormats = []string{
rowmark,
jsonFormat,
legacyJSONFormat,
lineDiff,
wordDiff,
colorWords,
diffFile,
}

// Formatter can print the differences to stdout
// and accompanying metadata to stderr
Expand Down Expand Up @@ -51,6 +63,8 @@ func (f *Formatter) Format(diff digest.Differences) error {
return f.wordDiff(diff)
case colorWords:
return f.colorWords(diff)
case diffFile:
return f.diffFile(diff)
default:
return fmt.Errorf("formatter not found")
}
Expand Down Expand Up @@ -221,6 +235,66 @@ func (f *Formatter) colorWords(diff digest.Differences) error {
return f.wordLevelDiffs(diff, "%s", "%s")
}

func (f *Formatter) diffFile(diff digest.Differences) (err error) {
diff.Deletions = nil

df, err := f.ctx.fs.Open(f.ctx.deltaFilename)
if err != nil {
return errors.Wrapf(err, "unable to open delta file %s", f.ctx.deltaFilename)
}

defer df.Close()

r := csv.NewReader(df)
r.Comma = f.ctx.separator
r.LazyQuotes = f.ctx.lazyQuotes

headers, err := r.Read()
if err != nil {
if err == io.EOF {
return errors.Wrap(err, "unable to process headers from csv file for delta file. EOF reached. invalid CSV file")
}

return errors.Wrap(err, "unable to process headers from csv file")
}
_, _ = fmt.Fprintf(f.stdout, "created delta file: %s\n", f.ctx.tmpFile.Name())

w := csv.NewWriter(f.ctx.tmpFile)
w.Comma = f.ctx.separator

if err = writeToCSVFile(w, headers); err != nil {
return errors.Wrap(err, "unable to write headers to delta csv file")
}

for _, addition := range diff.Additions {
if err = writeToCSVFile(w, addition); err != nil {
return errors.Wrap(err, "unable to write additions to delta csv file")
}
}

diff.Additions = nil

for _, modification := range diff.Modifications {
if err = writeToCSVFile(w, modification.Current); err != nil {
return errors.Wrap(err, "unable to write current modifications to delta csv file")
}
}

diff.Modifications = nil

w.Flush()

return nil
}

func writeToCSVFile(w *csv.Writer, fields []string) error {
if err := w.Write(fields); err != nil {
return errors.Wrap(err, "Failed to write csv row to temporary file")
}

return nil
}

func (f *Formatter) wordLevelDiffs(diff digest.Differences, deletionFormat, additionFormat string) error {
includes := f.ctx.GetIncludeColumnPositions()
if len(includes) <= 0 {
Expand Down
43 changes: 43 additions & 0 deletions cmd/formatter_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,11 @@ package cmd

import (
"bytes"
"os"
"testing"

"github.com/aswinkarthik/csvdiff/pkg/digest"
"github.com/spf13/afero"

"github.com/stretchr/testify/assert"
)
Expand Down Expand Up @@ -253,6 +255,47 @@ deletions
assert.Equal(t, expectedStderr, stderr.String())
}

func TestDiffFile(t *testing.T) {
diff := digest.Differences{
Additions: []digest.Addition{[]string{"addition"}},
Modifications: []digest.Modification{{Original: []string{"original"}, Current: []string{"modification"}}},
Deletions: []digest.Deletion{{"deletions"}},
}

expectedDiff := []byte("col\naddition\nmodification\n")
expectedStdout := "created delta file: /tmp/csvdiff-tmp\n"

var stdout bytes.Buffer
var stderr bytes.Buffer

fs := afero.NewMemMapFs()

if err := afero.WriteFile(fs, "/delta.csv", []byte("col"), os.ModePerm); err != nil {
t.Fatal("error while preparing delta file within in-memory filesystem")
}

tmpFile, err := fs.Create("/tmp/csvdiff-tmp")
if err != nil {
t.Fatal("error while creating tmp file")
}

formatter := NewFormatter(&stdout, &stderr, Context{
fs: fs,
format: "diff-file",
deltaFilename: "/delta.csv",
tmpFile: tmpFile,
})

err = formatter.Format(diff)

assert.NoError(t, err)
assert.Equal(t, expectedStdout, stdout.String())

givenDiff, _ := afero.ReadFile(fs, "/tmp/csvdiff-tmp")

assert.Equal(t, string(expectedDiff), string(givenDiff))
}

func TestWrongFormatter(t *testing.T) {
diff := digest.Differences{}
formatter := NewFormatter(nil, nil, Context{format: "random-str"})
Expand Down
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ require (
github.com/cespare/xxhash v1.1.0
github.com/fatih/color v1.7.0
github.com/mattn/go-colorable v0.1.2 // indirect
github.com/pkg/errors v0.9.1
github.com/spaolacci/murmur3 v1.1.0 // indirect
github.com/spf13/afero v1.1.2
github.com/spf13/cobra v0.0.5
Expand Down
9 changes: 3 additions & 6 deletions go.sum
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
github.com/OneOfOne/xxhash v1.2.2 h1:KMrpdQIwFcEqXDklaen+P1axHaj9BSKzvpUUfnHldSE=
github.com/OneOfOne/xxhash v1.2.2/go.mod h1:HSdplMjZKSmBqAxg5vPj2TmRDmfkzw+cTzAElWljhcU=
github.com/OneOfOne/xxhash v1.2.5 h1:zl/OfRA6nftbBK9qTohYBJ5xvw6C/oNKizR7cZGl3cI=
github.com/OneOfOne/xxhash v1.2.5/go.mod h1:eZbhyaAYD41SGSSsnmcpxVoRiQ/MPUTjUdIIOT9Um7Q=
Expand All @@ -10,7 +9,6 @@ github.com/coreos/etcd v3.3.10+incompatible/go.mod h1:uF7uidLiAD3TWHmW31ZFd/JWoc
github.com/coreos/go-etcd v2.0.0+incompatible/go.mod h1:Jez6KQU2B/sWsbdaef3ED8NzMklzPG4d5KIOhIy30Tk=
github.com/coreos/go-semver v0.2.0/go.mod h1:nnelYz7RCh+5ahJtPPxZlU+153eP4D4r3EedlOD2RNk=
github.com/cpuguy83/go-md2man v1.0.10/go.mod h1:SmD6nW6nTyfqj6ABTjUi3V3JVMnlJmwcJI5acqYI6dE=
github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
Expand All @@ -28,10 +26,11 @@ github.com/mattn/go-isatty v0.0.8/go.mod h1:Iq45c/XA43vh69/j3iqttzPXn0bhXyGjM0Hd
github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0=
github.com/mitchellh/mapstructure v1.1.2/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh9fWfEaFds41c1Y=
github.com/pelletier/go-toml v1.2.0/go.mod h1:5z9KED0ma1S8pY6P1sdut58dfprrGBbd/94hg7ilaic=
github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/russross/blackfriday v1.5.2/go.mod h1:JO/DiYxRf+HjHt06OyowR9PTA263kcR/rfWxYHBV53g=
github.com/spaolacci/murmur3 v0.0.0-20180118202830-f09979ecbc72 h1:qLC7fQah7D6K1B0ujays3HV9gkFtllcxhzImRR7ArPQ=
github.com/spaolacci/murmur3 v0.0.0-20180118202830-f09979ecbc72/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA=
github.com/spaolacci/murmur3 v1.1.0 h1:7c1g84S4BPRrfL5Xrdp6fOJ206sU9y293DDHaoy0bLI=
github.com/spaolacci/murmur3 v1.1.0/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA=
Expand All @@ -46,20 +45,18 @@ github.com/spf13/pflag v1.0.3/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnIn
github.com/spf13/viper v1.3.2/go.mod h1:ZiWeW+zYFKm7srdB9IoDzzZXaJaI5eL9QjNiN/DMA2s=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
github.com/stretchr/testify v1.3.0 h1:TivCn/peBQ7UY8ooIcPgZFpTNSz0Q2U6UrFlUfqbe0Q=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
github.com/stretchr/testify v1.4.0 h1:2E4SXV/wtOkTonXsotYi4li6zVWxYlZuYNCXe9XRJyk=
github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
github.com/ugorji/go/codec v0.0.0-20181204163529-d75b2dcb6bc8/go.mod h1:VFNgLljTbGfSG7qAOspJ7OScBnGdDN/yBr0sguwnwf0=
github.com/xordataexchange/crypt v0.0.3-0.20170626215501-b2862e3d0a77/go.mod h1:aYKd//L2LvnjZzWKhF00oedf4jCCReLcmhLdhm1A27Q=
golang.org/x/crypto v0.0.0-20181203042331-505ab145d0a9/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4=
golang.org/x/sys v0.0.0-20181205085412-a5c9d58dba9a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190222072716-a9d3bda3a223 h1:DH4skfRX4EBpamg7iV4ZlCpblAHI6s6TDM39bFZumv8=
golang.org/x/sys v0.0.0-20190222072716-a9d3bda3a223/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190804053845-51ab0e2deafa h1:KIDDMLT1O0Nr7TSxp8xM5tJcdn8tgyAONntO829og1M=
golang.org/x/sys v0.0.0-20190804053845-51ab0e2deafa/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/text v0.3.0 h1:g61tztE5qeGQ89tm6NTjjM9VPIm088od1l6aSorWRWg=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v2 v2.2.2 h1:ZCJp+EgiOT7lHqUV2J862kp8Qj64Jo6az82+3Td9dZw=
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=