Skip to content

Commit

Permalink
Implement --csv-trim-leading-space flag
Browse files Browse the repository at this point in the history
  • Loading branch information
johnkerl committed Apr 20, 2023
1 parent b31c2ed commit eb34c98
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 8 deletions.
9 changes: 9 additions & 0 deletions internal/pkg/cli/option_parse.go
Original file line number Diff line number Diff line change
Expand Up @@ -2173,6 +2173,15 @@ var CSVTSVOnlyFlagSection = FlagSection{
},
},

{
name: "--csv-trim-leading-space",
help: `Trims leading spaces in CSV data. Use this for data like '"foo", "bar' which is non-RFC-4180 compliant, but common.`,
parser: func(args []string, argc int, pargi *int, options *TOptions) {
options.ReaderOptions.CSVTrimLeadingSpace = true
*pargi += 1
},
},

{
name: "--quote-all",
help: "Force double-quoting of CSV fields.",
Expand Down
1 change: 1 addition & 0 deletions internal/pkg/cli/option_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ type TReaderOptions struct {
UseImplicitCSVHeader bool
AllowRaggedCSVInput bool
CSVLazyQuotes bool
CSVTrimLeadingSpace bool

CommentHandling TCommentHandling
CommentString string
Expand Down
19 changes: 11 additions & 8 deletions internal/pkg/input/record_reader_csv.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,11 @@ import (

// ----------------------------------------------------------------
type RecordReaderCSV struct {
readerOptions *cli.TReaderOptions
recordsPerBatch int64 // distinct from readerOptions.RecordsPerBatch for join/repl
ifs0 byte // Go's CSV library only lets its 'Comma' be a single character
csvLazyQuotes bool // Maps directly to Go's CSV library's LazyQuotes
readerOptions *cli.TReaderOptions
recordsPerBatch int64 // distinct from readerOptions.RecordsPerBatch for join/repl
ifs0 byte // Go's CSV library only lets its 'Comma' be a single character
csvLazyQuotes bool // Maps directly to Go's CSV library's LazyQuotes
csvTrimLeadingSpace bool // Maps directly to Go's CSV library's TrimLeadingSpace

filename string
rowNumber int64
Expand All @@ -40,10 +41,11 @@ func NewRecordReaderCSV(
return nil, fmt.Errorf("for CSV, IFS can only be a single character")
}
return &RecordReaderCSV{
readerOptions: readerOptions,
ifs0: readerOptions.IFS[0],
recordsPerBatch: recordsPerBatch,
csvLazyQuotes: readerOptions.CSVLazyQuotes,
readerOptions: readerOptions,
ifs0: readerOptions.IFS[0],
recordsPerBatch: recordsPerBatch,
csvLazyQuotes: readerOptions.CSVLazyQuotes,
csvTrimLeadingSpace: readerOptions.CSVTrimLeadingSpace,
}, nil
}

Expand Down Expand Up @@ -105,6 +107,7 @@ func (reader *RecordReaderCSV) processHandle(
csvReader := csv.NewReader(NewBOMStrippingReader(handle))
csvReader.Comma = rune(reader.ifs0)
csvReader.LazyQuotes = reader.csvLazyQuotes
csvReader.TrimLeadingSpace = reader.csvTrimLeadingSpace
csvRecordsChannel := make(chan *list.List, recordsPerBatch)
go channelizedCSVRecordScanner(csvReader, csvRecordsChannel, downstreamDoneChannel, errorChannel,
recordsPerBatch)
Expand Down

0 comments on commit eb34c98

Please sign in to comment.