From aae2f1f04413572c13cd11eb8964f75c39fa1bfc Mon Sep 17 00:00:00 2001 From: Rudy Date: Tue, 2 Oct 2018 01:26:56 +0700 Subject: [PATCH 1/5] Add new config for csv column explict type conversion --- plugins/parsers/csv/README.md | 5 +++++ plugins/parsers/csv/parser.go | 30 ++++++++++++++++++++++++++++++ 2 files changed, 35 insertions(+) diff --git a/plugins/parsers/csv/README.md b/plugins/parsers/csv/README.md index 532980991ee5b..e4cfbfc372842 100644 --- a/plugins/parsers/csv/README.md +++ b/plugins/parsers/csv/README.md @@ -27,6 +27,11 @@ values. ## If `csv_header_row_count` is set to 0, this config must be used csv_column_names = [] + ## For assigning explicit data types to columns. + ## Supported types: "int", "float", "bool", "string". + ## If this is not specified, type conversion will be done on the types above. + csv_column_types = [] + ## Indicates the number of rows to skip before looking for header information. csv_skip_rows = 0 diff --git a/plugins/parsers/csv/parser.go b/plugins/parsers/csv/parser.go index 8e0b8b47e9132..8807d7529032c 100644 --- a/plugins/parsers/csv/parser.go +++ b/plugins/parsers/csv/parser.go @@ -21,6 +21,7 @@ type Parser struct { Comment string TrimSpace bool ColumnNames []string + ColumnTypes []string TagColumns []string MeasurementColumn string TimestampColumn string @@ -148,6 +149,35 @@ outer: } } + // In case if defined column names & types count don't match. + if i < len(p.ColumnTypes) { + var val interface{} + var err error + + switch p.ColumnTypes[i] { + case "int": + val, err = strconv.ParseInt(value, 10, 64) + if err != nil { + return nil, fmt.Errorf("column type: parse int error %s", err) + } + case "float": + val, err = strconv.ParseFloat(value, 64) + if err != nil { + return nil, fmt.Errorf("column type: parse float error %s", err) + } + case "bool": + val, err = strconv.ParseBool(value) + if err != nil { + return nil, fmt.Errorf("column type: parse bool error %s", err) + } + default: + val = value + } + + recordFields[fieldName] = val + continue + } + // attempt type conversions if iValue, err := strconv.ParseInt(value, 10, 64); err == nil { recordFields[fieldName] = iValue From c7998e611a6ea9080ee84c225de283b3d491536b Mon Sep 17 00:00:00 2001 From: Rudy Date: Tue, 2 Oct 2018 07:26:24 +0700 Subject: [PATCH 2/5] Add missing declarations for csv_column_types everywhere --- internal/config/config.go | 13 +++++++++++++ plugins/parsers/registry.go | 4 ++++ 2 files changed, 17 insertions(+) diff --git a/internal/config/config.go b/internal/config/config.go index 3d05109788f24..bd919d44a2ce9 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -1460,6 +1460,18 @@ func getParserConfig(name string, tbl *ast.Table) (*parsers.Config, error) { } } + if node, ok := tbl.Fields["csv_column_types"]; ok { + if kv, ok := node.(*ast.KeyValue); ok { + if ary, ok := kv.Value.(*ast.Array); ok { + for _, elem := range ary.Value { + if str, ok := elem.(*ast.String); ok { + c.CSVColumnTypes = append(c.CSVColumnTypes, str.Value) + } + } + } + } + } + if node, ok := tbl.Fields["csv_tag_columns"]; ok { if kv, ok := node.(*ast.KeyValue); ok { if ary, ok := kv.Value.(*ast.Array); ok { @@ -1588,6 +1600,7 @@ func getParserConfig(name string, tbl *ast.Table) (*parsers.Config, error) { delete(tbl.Fields, "grok_custom_pattern_files") delete(tbl.Fields, "grok_timezone") delete(tbl.Fields, "csv_column_names") + delete(tbl.Fields, "csv_column_types") delete(tbl.Fields, "csv_comment") delete(tbl.Fields, "csv_delimiter") delete(tbl.Fields, "csv_field_columns") diff --git a/plugins/parsers/registry.go b/plugins/parsers/registry.go index c662cf30034a6..3ec823084a2db 100644 --- a/plugins/parsers/registry.go +++ b/plugins/parsers/registry.go @@ -127,6 +127,7 @@ type Config struct { //csv configuration CSVColumnNames []string `toml:"csv_column_names"` + CSVColumnTypes []string `toml:"csv_column_types"` CSVComment string `toml:"csv_comment"` CSVDelimiter string `toml:"csv_delimiter"` CSVHeaderRowCount int `toml:"csv_header_row_count"` @@ -195,6 +196,7 @@ func NewParser(config *Config) (Parser, error) { config.CSVComment, config.CSVTrimSpace, config.CSVColumnNames, + config.CSVColumnTypes, config.CSVTagColumns, config.CSVMeasurementColumn, config.CSVTimestampColumn, @@ -216,6 +218,7 @@ func newCSVParser(metricName string, comment string, trimSpace bool, columnNames []string, + columnTypes []string, tagColumns []string, nameColumn string, timestampColumn string, @@ -249,6 +252,7 @@ func newCSVParser(metricName string, Comment: comment, TrimSpace: trimSpace, ColumnNames: columnNames, + ColumnTypes: columnTypes, TagColumns: tagColumns, MeasurementColumn: nameColumn, TimestampColumn: timestampColumn, From d11d470edb26f817be6b78d1d56ba4fc12392984 Mon Sep 17 00:00:00 2001 From: Rudy Date: Tue, 2 Oct 2018 07:30:07 +0700 Subject: [PATCH 3/5] Throw error on first init on invalid csv column counts --- plugins/parsers/csv/parser.go | 59 ++++++++++++++--------------------- plugins/parsers/registry.go | 4 +++ 2 files changed, 27 insertions(+), 36 deletions(-) diff --git a/plugins/parsers/csv/parser.go b/plugins/parsers/csv/parser.go index 8807d7529032c..438db3094b2d6 100644 --- a/plugins/parsers/csv/parser.go +++ b/plugins/parsers/csv/parser.go @@ -149,45 +149,32 @@ outer: } } - // In case if defined column names & types count don't match. - if i < len(p.ColumnTypes) { - var val interface{} - var err error - - switch p.ColumnTypes[i] { - case "int": - val, err = strconv.ParseInt(value, 10, 64) - if err != nil { - return nil, fmt.Errorf("column type: parse int error %s", err) - } - case "float": - val, err = strconv.ParseFloat(value, 64) - if err != nil { - return nil, fmt.Errorf("column type: parse float error %s", err) - } - case "bool": - val, err = strconv.ParseBool(value) - if err != nil { - return nil, fmt.Errorf("column type: parse bool error %s", err) - } - default: - val = value + var val interface{} + var err error + + // Access array directly as we've thrown error on first parser init if + // column counts doesn't match. + switch p.ColumnTypes[i] { + case "int": + val, err = strconv.ParseInt(value, 10, 64) + if err != nil { + return nil, fmt.Errorf("column type: parse int error %s", err) } - - recordFields[fieldName] = val - continue + case "float": + val, err = strconv.ParseFloat(value, 64) + if err != nil { + return nil, fmt.Errorf("column type: parse float error %s", err) + } + case "bool": + val, err = strconv.ParseBool(value) + if err != nil { + return nil, fmt.Errorf("column type: parse bool error %s", err) + } + default: + val = value } - // attempt type conversions - if iValue, err := strconv.ParseInt(value, 10, 64); err == nil { - recordFields[fieldName] = iValue - } else if fValue, err := strconv.ParseFloat(value, 64); err == nil { - recordFields[fieldName] = fValue - } else if bValue, err := strconv.ParseBool(value); err == nil { - recordFields[fieldName] = bValue - } else { - recordFields[fieldName] = value - } + recordFields[fieldName] = val } } diff --git a/plugins/parsers/registry.go b/plugins/parsers/registry.go index 3ec823084a2db..d65d61fab91fa 100644 --- a/plugins/parsers/registry.go +++ b/plugins/parsers/registry.go @@ -243,6 +243,10 @@ func newCSVParser(metricName string, } } + if len(columnNames) != len(columnTypes) { + return nil, fmt.Errorf("csv_column_names field count doesn't match with csv_column_types") + } + parser := &csv.Parser{ MetricName: metricName, HeaderRowCount: headerRowCount, From 36ce0e97629fe2faf4ee4920c2c0d9013c38e6f4 Mon Sep 17 00:00:00 2001 From: Rudy Date: Tue, 2 Oct 2018 07:47:18 +0700 Subject: [PATCH 4/5] Re-add old logic & improve column check before err throw Column types field should be optional with this commit, only if defined it'll do explicit type conversion. --- plugins/parsers/csv/parser.go | 62 ++++++++++++++++++++++------------- plugins/parsers/registry.go | 2 +- 2 files changed, 41 insertions(+), 23 deletions(-) diff --git a/plugins/parsers/csv/parser.go b/plugins/parsers/csv/parser.go index 438db3094b2d6..f18068eb70075 100644 --- a/plugins/parsers/csv/parser.go +++ b/plugins/parsers/csv/parser.go @@ -149,32 +149,50 @@ outer: } } - var val interface{} - var err error - - // Access array directly as we've thrown error on first parser init if - // column counts doesn't match. - switch p.ColumnTypes[i] { - case "int": - val, err = strconv.ParseInt(value, 10, 64) - if err != nil { - return nil, fmt.Errorf("column type: parse int error %s", err) + // Try explicit conversion only when column types is defined. + if len(p.ColumnTypes) > 0 { + // Throw error if current column count exceeds defined types. + if i >= len(p.ColumnTypes) { + return nil, fmt.Errorf("column type: column count exceeded") } - case "float": - val, err = strconv.ParseFloat(value, 64) - if err != nil { - return nil, fmt.Errorf("column type: parse float error %s", err) - } - case "bool": - val, err = strconv.ParseBool(value) - if err != nil { - return nil, fmt.Errorf("column type: parse bool error %s", err) + + var val interface{} + var err error + + switch p.ColumnTypes[i] { + case "int": + val, err = strconv.ParseInt(value, 10, 64) + if err != nil { + return nil, fmt.Errorf("column type: parse int error %s", err) + } + case "float": + val, err = strconv.ParseFloat(value, 64) + if err != nil { + return nil, fmt.Errorf("column type: parse float error %s", err) + } + case "bool": + val, err = strconv.ParseBool(value) + if err != nil { + return nil, fmt.Errorf("column type: parse bool error %s", err) + } + default: + val = value } - default: - val = value + + recordFields[fieldName] = val + continue } - recordFields[fieldName] = val + // attempt type conversions + if iValue, err := strconv.ParseInt(value, 10, 64); err == nil { + recordFields[fieldName] = iValue + } else if fValue, err := strconv.ParseFloat(value, 64); err == nil { + recordFields[fieldName] = fValue + } else if bValue, err := strconv.ParseBool(value); err == nil { + recordFields[fieldName] = bValue + } else { + recordFields[fieldName] = value + } } } diff --git a/plugins/parsers/registry.go b/plugins/parsers/registry.go index d65d61fab91fa..83f871487707f 100644 --- a/plugins/parsers/registry.go +++ b/plugins/parsers/registry.go @@ -243,7 +243,7 @@ func newCSVParser(metricName string, } } - if len(columnNames) != len(columnTypes) { + if len(columnNames) > 0 && len(columnTypes) > 0 && len(columnNames) != len(columnTypes) { return nil, fmt.Errorf("csv_column_names field count doesn't match with csv_column_types") } From 3759ebafe1f06d226346f69d8d2709d9466cde00 Mon Sep 17 00:00:00 2001 From: Rudy Date: Tue, 2 Oct 2018 08:23:21 +0700 Subject: [PATCH 5/5] Add test case for csvexplicit type conversion --- plugins/parsers/csv/parser_test.go | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/plugins/parsers/csv/parser_test.go b/plugins/parsers/csv/parser_test.go index e3668d3ac8c65..eff6f953f5651 100644 --- a/plugins/parsers/csv/parser_test.go +++ b/plugins/parsers/csv/parser_test.go @@ -147,6 +147,18 @@ func TestValueConversion(t *testing.T) { //deep equal fields require.Equal(t, expectedMetric.Fields(), returnedMetric.Fields()) + + // Test explicit type conversion. + p.ColumnTypes = []string{"float", "int", "bool", "string"} + + metrics, err = p.Parse([]byte(testCSV)) + require.NoError(t, err) + + returnedMetric, err2 = metric.New(metrics[0].Name(), metrics[0].Tags(), metrics[0].Fields(), time.Unix(0, 0)) + require.NoError(t, err2) + + //deep equal fields + require.Equal(t, expectedMetric.Fields(), returnedMetric.Fields()) } func TestSkipComment(t *testing.T) {