Skip to content

Commit

Permalink
files: WIP of supporting arbitrary merging of ACH files
Browse files Browse the repository at this point in the history
  • Loading branch information
adamdecaf committed May 23, 2019
1 parent 8704605 commit ba4f34f
Show file tree
Hide file tree
Showing 4 changed files with 322 additions and 0 deletions.
25 changes: 25 additions & 0 deletions file.go
Original file line number Diff line number Diff line change
Expand Up @@ -376,6 +376,31 @@ func (f *File) AddBatch(batch Batcher) []Batcher {
return f.Batches
}

// RemoveBatch will delete a given Batcher from an ach.File
func (f *File) RemoveBatch(batch Batcher) {
if batch.Category() == CategoryNOC {
for i := range f.NotificationOfChange {
if f.NotificationOfChange[i].Equal(batch) {
f.NotificationOfChange = append(f.NotificationOfChange[:i], f.NotificationOfChange[i+1:]...)
return
}
}
}
if batch.Category() == CategoryReturn {
for i := range f.ReturnEntries {
if f.ReturnEntries[i].Equal(batch) {
f.ReturnEntries = append(f.ReturnEntries[:1], f.ReturnEntries[i+1:]...)
return
}
}
}
for i := range f.Batches {
if f.Batches[i].Equal(batch) {
f.Batches = append(f.Batches[:i], f.Batches[:i+1]...)
}
}
}

// AddIATBatch appends a IATBatch to the ach.File
func (f *File) AddIATBatch(iatBatch IATBatch) []IATBatch {
f.IATBatches = append(f.IATBatches, iatBatch)
Expand Down
113 changes: 113 additions & 0 deletions merge.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
// Copyright 2019 The Moov Authors
// Use of this source code is governed by an Apache License
// license that can be found in the LICENSE file.

package ach

import (
"bufio"
"bytes"
"fmt"
"time"
)

// MergeFiles is a helper function for consolidating an array of ACH Files into as few files
// as possible. This is useful for optimizing cost and network efficiency.
//
// Per NACHA rules files must remain under 10,000 lines (when rendered in their ASCII encoding)
//
// File Batches can only be merged if they are unique and routed to and from the same ABA routing numbers.
func MergeFiles(files []*File) ([]*File, error) {
fs := &mergableFiles{infiles: files}
for i := range fs.infiles {
outf := fs.lookupByHeader(fs.infiles[i])
for j := range fs.infiles[i].Batches {
batchExistsInMerged := false
for k := range outf.Batches {
if fs.infiles[i].Batches[j].Equal(outf.Batches[k]) {
batchExistsInMerged = true
}
}
if !batchExistsInMerged {
outf.AddBatch(fs.infiles[i].Batches[j])
if err := fs.infiles[i].Create(); err != nil {
return nil, err
}
n := lineCount(outf)
if n == 0 {
return nil, fmt.Errorf("problem getting line count of File=%s", outf.ID)
}
if n > 10000 {
outf.RemoveBatch(fs.infiles[i].Batches[j])
if err := outf.Create(); err != nil { // rebalance ACH file after removing the Batch
return nil, err
}
fs.locMaxed = append(fs.locMaxed, outf)

outf = fs.create(outf) // replace output file with the one we just created
outf.AddBatch(fs.infiles[i].Batches[j])
}
}
}
}

// TODO(adam): We should also look at consolidating EntryDetail records inside Batches

return append(fs.locMaxed, fs.outfiles...), nil // return LOC-maxed files and merged files
}

type mergableFiles struct {
infiles []*File
outfiles []*File
locMaxed []*File
}

// create returns the index of a newly created file in fs.outfiles given the details from f.Header
func (fs *mergableFiles) create(f *File) *File { // returns the outfiles index of the created file
now := time.Now()

out := NewFile()
out.Header = f.Header
out.Header.FileCreationDate = now.Format("060102") // YYMMDD
out.Header.FileCreationTime = now.Format("1504") // HHMM
out.Create()

return out
}

// lookupByHeader optionally returns a File from fs.files if the FileHeaders match.
// This is done because we append batches into files to minimize the count of output files.
//
// lookupByHeader will return the existing file (stored in outfiles) if no matching file exists.
func (fs *mergableFiles) lookupByHeader(f *File) *File {
for i := range fs.outfiles {
if fs.outfiles[i].Header.ImmediateDestination == f.Header.ImmediateDestination &&
fs.outfiles[i].Header.ImmediateOrigin == f.Header.ImmediateOrigin {
// found a matching file, so return it
return fs.outfiles[i]
}
}
fs.outfiles = append(fs.outfiles, f)
return f
}

func lineCount(f *File) int {
if len(f.Batches) < 100 {
// Ignore Files with low batch counts by returning a valid count.
// Calling Writer.Write() is costly and so we're going to ignore it in easy cases.
return 1
}

var buf bytes.Buffer
if err := NewWriter(&buf).Write(f); err != nil {
return 0
}
lines := 0
s := bufio.NewScanner(&buf)
for s.Scan() {
if v := s.Text(); v != "" {
lines++
}
}
return lines
}
170 changes: 170 additions & 0 deletions merge_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,170 @@
// Copyright 2019 The Moov Authors
// Use of this source code is governed by an Apache License
// license that can be found in the LICENSE file.

package ach

import (
"fmt"
"path/filepath"
"testing"
)

func filesAreEqual(f1, f2 *File) error {
// File Header
if f1.Header.ImmediateOrigin != f2.Header.ImmediateOrigin {
return fmt.Errorf("f1.Header.ImmediateOrigin=%s vs f2.Header.ImmediateOrigin=%s", f1.Header.ImmediateOrigin, f2.Header.ImmediateOrigin)
}
if f1.Header.ImmediateDestination != f2.Header.ImmediateDestination {
return fmt.Errorf("f1.Header.ImmediateDestination=%s vs f2.Header.ImmediateDestination=%s", f1.Header.ImmediateDestination, f2.Header.ImmediateDestination)
}

// Batches
if len(f1.Batches) != len(f2.Batches) {
return fmt.Errorf("len(f1.Batches)=%d vs len(f2.Batches)=%d", len(f1.Batches), len(f2.Batches))
}
for i := range f1.Batches {
for j := range f2.Batches {
if f1.Batches[i].Equal(f2.Batches[j]) {
goto next
}
}
return fmt.Errorf("unable to find batch in f2: %v", f1.Batches[i])
next:
// check the next batch
}

// IATBatches
if len(f1.IATBatches) != len(f2.IATBatches) {
return fmt.Errorf("len(f1.IATBatches)=%d vs len(f2.IATBatches)=%d", len(f1.IATBatches), len(f2.IATBatches))
}

// File Control
if f1.Control.EntryAddendaCount != f2.Control.EntryAddendaCount {
return fmt.Errorf("f1.Control.EntryAddendaCount=%d vs f2.Control.EntryAddendaCount=%d", f1.Control.EntryAddendaCount, f2.Control.EntryAddendaCount)
}
if f1.Control.TotalDebitEntryDollarAmountInFile != f2.Control.TotalDebitEntryDollarAmountInFile {
return fmt.Errorf("f1.Control.TotalDebitEntryDollarAmountInFile=%d vs f2.Control.TotalDebitEntryDollarAmountInFile=%d", f1.Control.TotalDebitEntryDollarAmountInFile, f2.Control.TotalDebitEntryDollarAmountInFile)
}
if f1.Control.TotalCreditEntryDollarAmountInFile != f2.Control.TotalCreditEntryDollarAmountInFile {
return fmt.Errorf("f1.Control.TotalCreditEntryDollarAmountInFile=%d vs f2.Control.TotalCreditEntryDollarAmountInFile=%d", f1.Control.TotalCreditEntryDollarAmountInFile, f2.Control.TotalCreditEntryDollarAmountInFile)
}

return nil
}

func TestMergeFiles__filesAreEqual(t *testing.T) {
file, err := readACHFilepath(filepath.Join("test", "testdata", "ppd-debit.ach"))
if err != nil {
t.Fatal(err)
}

// compare a file against itself
if err := filesAreEqual(file, file); err != nil {
t.Fatalf("same file: %v", err)
}

// break the equality
f2 := *file
f2.Header.ImmediateOrigin = "12"
if err := filesAreEqual(file, &f2); err == nil {
t.Fatal("expected error")
}
}

func TestMergeFiles__identity(t *testing.T) {
file, err := readACHFilepath(filepath.Join("test", "testdata", "ppd-debit.ach"))
if err != nil {
t.Fatal(err)
}

out, err := MergeFiles([]*File{file})
if err != nil {
t.Fatal(err)
}
if len(out) != 1 {
t.Errorf("got %d merged ACH files", len(out))
}
if err := filesAreEqual(file, out[0]); err != nil {
t.Errorf("unequal files:%v", err)
}
}

func TestMergeFiles__together(t *testing.T) {
f1, err := readACHFilepath(filepath.Join("test", "testdata", "ppd-debit.ach"))
if err != nil {
t.Fatal(err)
}
f2, err := readACHFilepath(filepath.Join("test", "testdata", "web-debit.ach"))
if err != nil {
t.Fatal(err)
}
f2.Header = f1.Header // replace Header so they're merged into one file

if len(f1.Batches) != 1 || len(f2.Batches) != 3 {
t.Errorf("did batch counts change? f1:%d f2:%d", len(f1.Batches), len(f2.Batches))
}

out, err := MergeFiles([]*File{f1, f2})
if err != nil {
t.Fatal(err)
}
if len(out) != 1 {
t.Errorf("got %d merged ACH files", len(out))
}
if len(out[0].Batches) != 4 {
t.Errorf("got %d batches", len(out[0].Batches))
}
}

func TestMergeFiles__apart(t *testing.T) {
f1, err := readACHFilepath(filepath.Join("test", "testdata", "ppd-debit.ach"))
if err != nil {
t.Fatal(err)
}
f2, err := readACHFilepath(filepath.Join("test", "testdata", "web-debit.ach"))
if err != nil {
t.Fatal(err)
}

out, err := MergeFiles([]*File{f1, f2})
if err != nil {
t.Fatal(err)
}
if len(out) != 2 {
t.Errorf("got %d merged ACH files", len(out))
}
if len(out[0].Batches) != 1 {
t.Errorf("got %d batches", len(out[0].Batches))
}
if len(out[1].Batches) != 3 {
t.Errorf("got %d batches", len(out[1].Batches))
}
}

func TestMergeFiles__lineCount(t *testing.T) {
file, err := readACHFilepath(filepath.Join("test", "testdata", "ppd-debit.ach"))
if err != nil {
t.Fatal(err)
}
if err := file.Create(); err != nil {
t.Fatal(err)
}

if n := lineCount(file); n != 1 {
// We've optimized small file line counts to bypass writing out the file
// into plain text as it's costly.
t.Errorf("did we change optimizations? n=%d", n)
}

// Add 100 batches to file and get a real line count
for i := 0; i < 100; i++ {
file.AddBatch(file.Batches[0])
}
if err := file.Create(); err != nil {
t.Fatal(err)
}
if n := lineCount(file); n != 310 {
t.Errorf("unexpected line count of %d", n)
}
}
14 changes: 14 additions & 0 deletions reader_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,20 @@ import (
"github.com/moov-io/base"
)

func readACHFilepath(path string) (*File, error) {
fd, err := os.Open(path)
if err != nil {
return nil, err
}
defer fd.Close()

file, err := NewReader(fd).Read()
if err != nil {
return nil, err
}
return &file, nil
}

func TestReader__crashers(t *testing.T) {
dir := filepath.Join("test", "testdata", "crashes")
fds, err := ioutil.ReadDir(dir)
Expand Down

0 comments on commit ba4f34f

Please sign in to comment.