From e12eced211aa1d185c89e959a874b6fb3aff0fb9 Mon Sep 17 00:00:00 2001 From: Max U Date: Thu, 21 Jun 2018 11:26:14 -0700 Subject: [PATCH 01/51] input plugin that reads files each interval --- plugins/inputs/reader/reader.go | 106 +++++++++++++++++++++ plugins/inputs/reader/reader_test.go | 41 ++++++++ plugins/inputs/reader/testfiles/json_a.log | 14 +++ 3 files changed, 161 insertions(+) create mode 100644 plugins/inputs/reader/reader.go create mode 100644 plugins/inputs/reader/reader_test.go create mode 100644 plugins/inputs/reader/testfiles/json_a.log diff --git a/plugins/inputs/reader/reader.go b/plugins/inputs/reader/reader.go new file mode 100644 index 0000000000000..74b180e250e6b --- /dev/null +++ b/plugins/inputs/reader/reader.go @@ -0,0 +1,106 @@ +package reader + +import ( + "io/ioutil" + "log" + + "github.com/influxdata/telegraf" + "github.com/influxdata/telegraf/internal/globpath" + "github.com/influxdata/telegraf/plugins/parsers" +) + +type Reader struct { + Filepaths []string `toml:"files"` + FromBeginning bool + DataFormat string `toml:"data_format"` + ParserConfig parsers.Config + Parser parsers.Parser + Tags []string + + Filenames []string +} + +const sampleConfig = `## Log files to parse. +## These accept standard unix glob matching rules, but with the addition of +## ** as a "super asterisk". ie: +## /var/log/**.log -> recursively find all .log files in /var/log +## /var/log/*/*.log -> find all .log files with a parent dir in /var/log +## /var/log/apache.log -> only tail the apache log file +files = ["/var/log/apache/access.log"] + +## The dataformat to be read from files +## Each data format has its own unique set of configuration options, read +## more about them here: +## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md +data_format = "" +'''` + +// SampleConfig returns the default configuration of the Input +func (r *Reader) SampleConfig() string { + return sampleConfig +} + +func (r *Reader) Description() string { + return "reload and gather from file[s] on telegraf's interval" +} + +func (r *Reader) Gather(acc telegraf.Accumulator) error { + r.refreshFilePaths() + for _, k := range r.Filenames { + metrics, err := r.readMetric(k) + if err != nil { + return err + } + + for _, m := range metrics { + acc.AddFields(m.Name(), m.Fields(), m.Tags()) + } + } + return nil +} + +func (r *Reader) compileParser() { + if r.DataFormat == "grok" { + log.Printf("Grok isn't supported yet") + return + } + r.ParserConfig = parsers.Config{ + DataFormat: r.DataFormat, + TagKeys: r.Tags, + } + nParser, err := parsers.NewParser(&r.ParserConfig) + if err != nil { + log.Printf("E! Error building parser: %v", err) + } + + r.Parser = nParser +} + +func (r *Reader) refreshFilePaths() { + var allFiles []string + for _, filepath := range r.Filepaths { + g, err := globpath.Compile(filepath) + if err != nil { + log.Printf("E! Error Glob %s failed to compile, %s", filepath, err) + continue + } + files := g.Match() + + for k := range files { + allFiles = append(allFiles, k) + } + } + + r.Filenames = allFiles +} + +//requires that Parser has been compiled +func (r *Reader) readMetric(filename string) ([]telegraf.Metric, error) { + fileContents, err := ioutil.ReadFile(filename) + if err != nil { + log.Printf("E! File could not be opened: %v", filename) + } + + return r.Parser.Parse(fileContents) + +} diff --git a/plugins/inputs/reader/reader_test.go b/plugins/inputs/reader/reader_test.go new file mode 100644 index 0000000000000..e073a6f549c18 --- /dev/null +++ b/plugins/inputs/reader/reader_test.go @@ -0,0 +1,41 @@ +package reader + +import ( + "log" + "runtime" + "strings" + "testing" + + "github.com/influxdata/telegraf/testutil" + "github.com/stretchr/testify/assert" +) + +func TestRefreshFilePaths(t *testing.T) { + testDir := getPluginDir() + r := Reader{ + Filepaths: []string{testDir + "/logparser/grok/testdata/**.log"}, + } + + r.refreshFilePaths() + //log.Printf("filenames: %v", filenames) + assert.Equal(t, len(r.Filenames), 2) +} +func TestJSONParserCompile(t *testing.T) { + testDir := getPluginDir() + var acc testutil.Accumulator + r := Reader{ + Filepaths: []string{testDir + "/reader/testfiles/**.log"}, + DataFormat: "json", + Tags: []string{"parent_ignored_child"}, + } + r.compileParser() + r.Gather(&acc) + log.Printf("acc: %v", acc.Metrics[0].Tags) + assert.Equal(t, map[string]string{"parent_ignored_child": "hi"}, acc.Metrics[0].Tags) + assert.Equal(t, 5, len(acc.Metrics[0].Fields)) +} + +func getPluginDir() string { + _, filename, _, _ := runtime.Caller(1) + return strings.Replace(filename, "/reader/reader_test.go", "", 1) +} diff --git a/plugins/inputs/reader/testfiles/json_a.log b/plugins/inputs/reader/testfiles/json_a.log new file mode 100644 index 0000000000000..739fd65d89ca1 --- /dev/null +++ b/plugins/inputs/reader/testfiles/json_a.log @@ -0,0 +1,14 @@ +{ + "parent": { + "child": 3.0, + "ignored_child": "hi" + }, + "ignored_null": null, + "integer": 4, + "list": [3, 4], + "ignored_parent": { + "another_ignored_null": null, + "ignored_string": "hello, world!" + }, + "another_list": [4] + } \ No newline at end of file From 08a11d7bfd3a9ca2414e777a5fee3b33856a17e1 Mon Sep 17 00:00:00 2001 From: Max U Date: Thu, 21 Jun 2018 11:44:02 -0700 Subject: [PATCH 02/51] change config file --- plugins/inputs/reader/reader.go | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/plugins/inputs/reader/reader.go b/plugins/inputs/reader/reader.go index 74b180e250e6b..a8bf5c468a808 100644 --- a/plugins/inputs/reader/reader.go +++ b/plugins/inputs/reader/reader.go @@ -20,7 +20,7 @@ type Reader struct { Filenames []string } -const sampleConfig = `## Log files to parse. +const sampleConfig = `## Files to parse. ## These accept standard unix glob matching rules, but with the addition of ## ** as a "super asterisk". ie: ## /var/log/**.log -> recursively find all .log files in /var/log @@ -32,8 +32,7 @@ files = ["/var/log/apache/access.log"] ## Each data format has its own unique set of configuration options, read ## more about them here: ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md -data_format = "" -'''` +data_format = ""` // SampleConfig returns the default configuration of the Input func (r *Reader) SampleConfig() string { From 9c4b52256ddab66f696745d5e0a4d652e0cfe026 Mon Sep 17 00:00:00 2001 From: Max U Date: Thu, 21 Jun 2018 13:13:46 -0700 Subject: [PATCH 03/51] tweak metric output --- plugins/inputs/reader/reader.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/plugins/inputs/reader/reader.go b/plugins/inputs/reader/reader.go index a8bf5c468a808..274f22a668dba 100644 --- a/plugins/inputs/reader/reader.go +++ b/plugins/inputs/reader/reader.go @@ -59,6 +59,10 @@ func (r *Reader) Gather(acc telegraf.Accumulator) error { } func (r *Reader) compileParser() { + if r.DataFormat == "" { + log.Printf("E! No data_format specified") + return + } if r.DataFormat == "grok" { log.Printf("Grok isn't supported yet") return From 4e24a1bbe35abf9b850b4c29bcd5d97b6b63c053 Mon Sep 17 00:00:00 2001 From: Max U Date: Thu, 21 Jun 2018 15:56:20 -0700 Subject: [PATCH 04/51] add grok as a top level parser --- plugins/inputs/reader/reader.go | 16 ++++++++++---- plugins/inputs/reader/reader_test.go | 17 +++++++++++++++ plugins/parsers/registry.go | 31 ++++++++++++++++++++++++++++ 3 files changed, 60 insertions(+), 4 deletions(-) diff --git a/plugins/inputs/reader/reader.go b/plugins/inputs/reader/reader.go index 274f22a668dba..36c2a742ebff8 100644 --- a/plugins/inputs/reader/reader.go +++ b/plugins/inputs/reader/reader.go @@ -18,6 +18,12 @@ type Reader struct { Tags []string Filenames []string + + //for grok parser + Patterns []string + namedPatterns []string + CustomPatterns string + CustomPatternFiles []string } const sampleConfig = `## Files to parse. @@ -63,13 +69,15 @@ func (r *Reader) compileParser() { log.Printf("E! No data_format specified") return } - if r.DataFormat == "grok" { - log.Printf("Grok isn't supported yet") - return - } r.ParserConfig = parsers.Config{ DataFormat: r.DataFormat, TagKeys: r.Tags, + + //grok settings + Patterns: r.Patterns, + NamedPatterns: r.namedPatterns, + CustomPatterns: r.CustomPatterns, + CustomPatternFiles: r.CustomPatternFiles, } nParser, err := parsers.NewParser(&r.ParserConfig) if err != nil { diff --git a/plugins/inputs/reader/reader_test.go b/plugins/inputs/reader/reader_test.go index e073a6f549c18..a30ec2f929b0d 100644 --- a/plugins/inputs/reader/reader_test.go +++ b/plugins/inputs/reader/reader_test.go @@ -35,6 +35,23 @@ func TestJSONParserCompile(t *testing.T) { assert.Equal(t, 5, len(acc.Metrics[0].Fields)) } +func TestGrokParser(t *testing.T) { + testDir := getPluginDir() + var acc testutil.Accumulator + r := Reader{ + Filepaths: []string{testDir + "/reader/testfiles/grok_a.log"}, + DataFormat: "grok", + Patterns: []string{"%{COMMON_LOG_FORMAT}"}, + } + + r.compileParser() + err := r.Gather(&acc) + log.Printf("err: %v", err) + log.Printf("metric[0]_tags: %v, metric[0]_fields: %v", acc.Metrics[0].Tags, acc.Metrics[0].Fields) + log.Printf("metric[1]_tags: %v, metric[1]_fields: %v", acc.Metrics[1].Tags, acc.Metrics[1].Fields) + t.Error() +} + func getPluginDir() string { _, filename, _, _ := runtime.Caller(1) return strings.Replace(filename, "/reader/reader_test.go", "", 1) diff --git a/plugins/parsers/registry.go b/plugins/parsers/registry.go index 58fce1722b66d..e15632b8e32ee 100644 --- a/plugins/parsers/registry.go +++ b/plugins/parsers/registry.go @@ -8,6 +8,7 @@ import ( "github.com/influxdata/telegraf/plugins/parsers/collectd" "github.com/influxdata/telegraf/plugins/parsers/dropwizard" "github.com/influxdata/telegraf/plugins/parsers/graphite" + "github.com/influxdata/telegraf/plugins/parsers/grok" "github.com/influxdata/telegraf/plugins/parsers/influx" "github.com/influxdata/telegraf/plugins/parsers/json" "github.com/influxdata/telegraf/plugins/parsers/nagios" @@ -87,6 +88,12 @@ type Config struct { // an optional map containing tag names as keys and json paths to retrieve the tag values from as values // used if TagsPath is empty or doesn't return any tags DropwizardTagPathsMap map[string]string + + //grok patterns + Patterns []string + NamedPatterns []string + CustomPatterns string + CustomPatternFiles []string } // NewParser returns a Parser interface based on the given config. @@ -120,12 +127,36 @@ func NewParser(config *Config) (Parser, error) { config.DefaultTags, config.Separator, config.Templates) + case "grok": + parser, err = NewGrokParser( + config.MetricName, + config.Patterns, + config.NamedPatterns, + config.CustomPatterns, + config.CustomPatternFiles) default: err = fmt.Errorf("Invalid data format: %s", config.DataFormat) } return parser, err } +func NewGrokParser(metricName string, + patterns []string, + nPatterns []string, + cPatterns string, + cPatternFiles []string) (Parser, error) { + parser := grok.Parser{ + Measurement: metricName, + Patterns: patterns, + NamedPatterns: nPatterns, + CustomPatterns: cPatterns, + CustomPatternFiles: cPatternFiles, + } + + parser.Compile() + return &parser, nil +} + func NewJSONParser( metricName string, tagKeys []string, From ec7f13111f7a12e0b0b7668882379af145b3cefa Mon Sep 17 00:00:00 2001 From: Max U Date: Thu, 21 Jun 2018 16:06:36 -0700 Subject: [PATCH 05/51] add more test files --- plugins/inputs/reader/testfiles/grok_a.log | 2 + plugins/parsers/grok/influx-patterns | 73 +++ plugins/parsers/grok/influx_patterns.go | 78 +++ plugins/parsers/grok/parser.go | 527 +++++++++++++++++++++ plugins/parsers/grok/parser_test.go | 21 + 5 files changed, 701 insertions(+) create mode 100644 plugins/inputs/reader/testfiles/grok_a.log create mode 100644 plugins/parsers/grok/influx-patterns create mode 100644 plugins/parsers/grok/influx_patterns.go create mode 100644 plugins/parsers/grok/parser.go create mode 100644 plugins/parsers/grok/parser_test.go diff --git a/plugins/inputs/reader/testfiles/grok_a.log b/plugins/inputs/reader/testfiles/grok_a.log new file mode 100644 index 0000000000000..5295fcb75152a --- /dev/null +++ b/plugins/inputs/reader/testfiles/grok_a.log @@ -0,0 +1,2 @@ +127.0.0.1 user-identifier frank [10/Oct/2000:13:55:36 -0700] "GET /apache_pb.gif HTTP/1.0" 200 2326 +128.0.0.1 user-identifier tony [10/Oct/2000:13:55:36 -0800] "GET /apache_pb.gif HTTP/1.0" 300 45 \ No newline at end of file diff --git a/plugins/parsers/grok/influx-patterns b/plugins/parsers/grok/influx-patterns new file mode 100644 index 0000000000000..931b61bc8985f --- /dev/null +++ b/plugins/parsers/grok/influx-patterns @@ -0,0 +1,73 @@ +# Captures are a slightly modified version of logstash "grok" patterns, with +# the format %{[:][:]} +# By default all named captures are converted into string fields. +# Modifiers can be used to convert captures to other types or tags. +# Timestamp modifiers can be used to convert captures to the timestamp of the +# parsed metric. + +# View logstash grok pattern docs here: +# https://www.elastic.co/guide/en/logstash/current/plugins-filters-grok.html +# All default logstash patterns are supported, these can be viewed here: +# https://github.com/logstash-plugins/logstash-patterns-core/blob/master/patterns/grok-patterns + +# Available modifiers: +# string (default if nothing is specified) +# int +# float +# duration (ie, 5.23ms gets converted to int nanoseconds) +# tag (converts the field into a tag) +# drop (drops the field completely) +# Timestamp modifiers: +# ts-ansic ("Mon Jan _2 15:04:05 2006") +# ts-unix ("Mon Jan _2 15:04:05 MST 2006") +# ts-ruby ("Mon Jan 02 15:04:05 -0700 2006") +# ts-rfc822 ("02 Jan 06 15:04 MST") +# ts-rfc822z ("02 Jan 06 15:04 -0700") +# ts-rfc850 ("Monday, 02-Jan-06 15:04:05 MST") +# ts-rfc1123 ("Mon, 02 Jan 2006 15:04:05 MST") +# ts-rfc1123z ("Mon, 02 Jan 2006 15:04:05 -0700") +# ts-rfc3339 ("2006-01-02T15:04:05Z07:00") +# ts-rfc3339nano ("2006-01-02T15:04:05.999999999Z07:00") +# ts-httpd ("02/Jan/2006:15:04:05 -0700") +# ts-epoch (seconds since unix epoch) +# ts-epochnano (nanoseconds since unix epoch) +# ts-"CUSTOM" +# CUSTOM time layouts must be within quotes and be the representation of the +# "reference time", which is Mon Jan 2 15:04:05 -0700 MST 2006 +# See https://golang.org/pkg/time/#Parse for more details. + +# Example log file pattern, example log looks like this: +# [04/Jun/2016:12:41:45 +0100] 1.25 200 192.168.1.1 5.432µs +# Breakdown of the DURATION pattern below: +# NUMBER is a builtin logstash grok pattern matching float & int numbers. +# [nuµm]? is a regex specifying 0 or 1 of the characters within brackets. +# s is also regex, this pattern must end in "s". +# so DURATION will match something like '5.324ms' or '6.1µs' or '10s' +DURATION %{NUMBER}[nuµm]?s +RESPONSE_CODE %{NUMBER:response_code:tag} +RESPONSE_TIME %{DURATION:response_time_ns:duration} +EXAMPLE_LOG \[%{HTTPDATE:ts:ts-httpd}\] %{NUMBER:myfloat:float} %{RESPONSE_CODE} %{IPORHOST:clientip} %{RESPONSE_TIME} + +# Wider-ranging username matching vs. logstash built-in %{USER} +NGUSERNAME [a-zA-Z0-9\.\@\-\+_%]+ +NGUSER %{NGUSERNAME} +# Wider-ranging client IP matching +CLIENT (?:%{IPORHOST}|%{HOSTPORT}|::1) + +## +## COMMON LOG PATTERNS +## + +# apache & nginx logs, this is also known as the "common log format" +# see https://en.wikipedia.org/wiki/Common_Log_Format +COMMON_LOG_FORMAT %{CLIENT:client_ip} %{NOTSPACE:ident} %{NOTSPACE:auth} \[%{HTTPDATE:ts:ts-httpd}\] "(?:%{WORD:verb:tag} %{NOTSPACE:request}(?: HTTP/%{NUMBER:http_version:float})?|%{DATA})" %{NUMBER:resp_code:tag} (?:%{NUMBER:resp_bytes:int}|-) + +# Combined log format is the same as the common log format but with the addition +# of two quoted strings at the end for "referrer" and "agent" +# See Examples at http://httpd.apache.org/docs/current/mod/mod_log_config.html +COMBINED_LOG_FORMAT %{COMMON_LOG_FORMAT} %{QS:referrer} %{QS:agent} + +# HTTPD log formats +HTTPD20_ERRORLOG \[%{HTTPDERROR_DATE:timestamp}\] \[%{LOGLEVEL:loglevel:tag}\] (?:\[client %{IPORHOST:clientip}\] ){0,1}%{GREEDYDATA:errormsg} +HTTPD24_ERRORLOG \[%{HTTPDERROR_DATE:timestamp}\] \[%{WORD:module}:%{LOGLEVEL:loglevel:tag}\] \[pid %{POSINT:pid:int}:tid %{NUMBER:tid:int}\]( \(%{POSINT:proxy_errorcode:int}\)%{DATA:proxy_errormessage}:)?( \[client %{IPORHOST:client}:%{POSINT:clientport}\])? %{DATA:errorcode}: %{GREEDYDATA:message} +HTTPD_ERRORLOG %{HTTPD20_ERRORLOG}|%{HTTPD24_ERRORLOG} diff --git a/plugins/parsers/grok/influx_patterns.go b/plugins/parsers/grok/influx_patterns.go new file mode 100644 index 0000000000000..6dc990622a305 --- /dev/null +++ b/plugins/parsers/grok/influx_patterns.go @@ -0,0 +1,78 @@ +package grok + +// DEFAULT_PATTERNS SHOULD BE KEPT IN-SYNC WITH patterns/influx-patterns +const DEFAULT_PATTERNS = ` +# Captures are a slightly modified version of logstash "grok" patterns, with +# the format %{[:][:]} +# By default all named captures are converted into string fields. +# Modifiers can be used to convert captures to other types or tags. +# Timestamp modifiers can be used to convert captures to the timestamp of the +# parsed metric. + +# View logstash grok pattern docs here: +# https://www.elastic.co/guide/en/logstash/current/plugins-filters-grok.html +# All default logstash patterns are supported, these can be viewed here: +# https://github.com/logstash-plugins/logstash-patterns-core/blob/master/patterns/grok-patterns + +# Available modifiers: +# string (default if nothing is specified) +# int +# float +# duration (ie, 5.23ms gets converted to int nanoseconds) +# tag (converts the field into a tag) +# drop (drops the field completely) +# Timestamp modifiers: +# ts-ansic ("Mon Jan _2 15:04:05 2006") +# ts-unix ("Mon Jan _2 15:04:05 MST 2006") +# ts-ruby ("Mon Jan 02 15:04:05 -0700 2006") +# ts-rfc822 ("02 Jan 06 15:04 MST") +# ts-rfc822z ("02 Jan 06 15:04 -0700") +# ts-rfc850 ("Monday, 02-Jan-06 15:04:05 MST") +# ts-rfc1123 ("Mon, 02 Jan 2006 15:04:05 MST") +# ts-rfc1123z ("Mon, 02 Jan 2006 15:04:05 -0700") +# ts-rfc3339 ("2006-01-02T15:04:05Z07:00") +# ts-rfc3339nano ("2006-01-02T15:04:05.999999999Z07:00") +# ts-httpd ("02/Jan/2006:15:04:05 -0700") +# ts-epoch (seconds since unix epoch) +# ts-epochnano (nanoseconds since unix epoch) +# ts-"CUSTOM" +# CUSTOM time layouts must be within quotes and be the representation of the +# "reference time", which is Mon Jan 2 15:04:05 -0700 MST 2006 +# See https://golang.org/pkg/time/#Parse for more details. + +# Example log file pattern, example log looks like this: +# [04/Jun/2016:12:41:45 +0100] 1.25 200 192.168.1.1 5.432µs +# Breakdown of the DURATION pattern below: +# NUMBER is a builtin logstash grok pattern matching float & int numbers. +# [nuµm]? is a regex specifying 0 or 1 of the characters within brackets. +# s is also regex, this pattern must end in "s". +# so DURATION will match something like '5.324ms' or '6.1µs' or '10s' +DURATION %{NUMBER}[nuµm]?s +RESPONSE_CODE %{NUMBER:response_code:tag} +RESPONSE_TIME %{DURATION:response_time_ns:duration} +EXAMPLE_LOG \[%{HTTPDATE:ts:ts-httpd}\] %{NUMBER:myfloat:float} %{RESPONSE_CODE} %{IPORHOST:clientip} %{RESPONSE_TIME} + +# Wider-ranging username matching vs. logstash built-in %{USER} +NGUSERNAME [a-zA-Z0-9\.\@\-\+_%]+ +NGUSER %{NGUSERNAME} +# Wider-ranging client IP matching +CLIENT (?:%{IPV6}|%{IPV4}|%{HOSTNAME}|%{HOSTPORT}) + +## +## COMMON LOG PATTERNS +## + +# apache & nginx logs, this is also known as the "common log format" +# see https://en.wikipedia.org/wiki/Common_Log_Format +COMMON_LOG_FORMAT %{CLIENT:client_ip} %{NOTSPACE:ident} %{NOTSPACE:auth} \[%{HTTPDATE:ts:ts-httpd}\] "(?:%{WORD:verb:tag} %{NOTSPACE:request}(?: HTTP/%{NUMBER:http_version:float})?|%{DATA})" %{NUMBER:resp_code:tag} (?:%{NUMBER:resp_bytes:int}|-) + +# Combined log format is the same as the common log format but with the addition +# of two quoted strings at the end for "referrer" and "agent" +# See Examples at http://httpd.apache.org/docs/current/mod/mod_log_config.html +COMBINED_LOG_FORMAT %{COMMON_LOG_FORMAT} %{QS:referrer} %{QS:agent} + +# HTTPD log formats +HTTPD20_ERRORLOG \[%{HTTPDERROR_DATE:timestamp}\] \[%{LOGLEVEL:loglevel:tag}\] (?:\[client %{IPORHOST:clientip}\] ){0,1}%{GREEDYDATA:errormsg} +HTTPD24_ERRORLOG \[%{HTTPDERROR_DATE:timestamp}\] \[%{WORD:module}:%{LOGLEVEL:loglevel:tag}\] \[pid %{POSINT:pid:int}:tid %{NUMBER:tid:int}\]( \(%{POSINT:proxy_errorcode:int}\)%{DATA:proxy_errormessage}:)?( \[client %{IPORHOST:client}:%{POSINT:clientport}\])? %{DATA:errorcode}: %{GREEDYDATA:message} +HTTPD_ERRORLOG %{HTTPD20_ERRORLOG}|%{HTTPD24_ERRORLOG} +` diff --git a/plugins/parsers/grok/parser.go b/plugins/parsers/grok/parser.go new file mode 100644 index 0000000000000..e3d2acf3bb3ac --- /dev/null +++ b/plugins/parsers/grok/parser.go @@ -0,0 +1,527 @@ +package grok + +import ( + "bufio" + "fmt" + "log" + "os" + "regexp" + "strconv" + "strings" + "time" + + "github.com/vjeantet/grok" + + "github.com/influxdata/telegraf" + "github.com/influxdata/telegraf/metric" +) + +var timeLayouts = map[string]string{ + "ts-ansic": "Mon Jan _2 15:04:05 2006", + "ts-unix": "Mon Jan _2 15:04:05 MST 2006", + "ts-ruby": "Mon Jan 02 15:04:05 -0700 2006", + "ts-rfc822": "02 Jan 06 15:04 MST", + "ts-rfc822z": "02 Jan 06 15:04 -0700", // RFC822 with numeric zone + "ts-rfc850": "Monday, 02-Jan-06 15:04:05 MST", + "ts-rfc1123": "Mon, 02 Jan 2006 15:04:05 MST", + "ts-rfc1123z": "Mon, 02 Jan 2006 15:04:05 -0700", // RFC1123 with numeric zone + "ts-rfc3339": "2006-01-02T15:04:05Z07:00", + "ts-rfc3339nano": "2006-01-02T15:04:05.999999999Z07:00", + "ts-httpd": "02/Jan/2006:15:04:05 -0700", + // These three are not exactly "layouts", but they are special cases that + // will get handled in the ParseLine function. + "ts-epoch": "EPOCH", + "ts-epochnano": "EPOCH_NANO", + "ts-syslog": "SYSLOG_TIMESTAMP", + "ts": "GENERIC_TIMESTAMP", // try parsing all known timestamp layouts. +} + +const ( + INT = "int" + TAG = "tag" + FLOAT = "float" + STRING = "string" + DURATION = "duration" + DROP = "drop" + EPOCH = "EPOCH" + EPOCH_NANO = "EPOCH_NANO" + SYSLOG_TIMESTAMP = "SYSLOG_TIMESTAMP" + GENERIC_TIMESTAMP = "GENERIC_TIMESTAMP" +) + +var ( + // matches named captures that contain a modifier. + // ie, + // %{NUMBER:bytes:int} + // %{IPORHOST:clientip:tag} + // %{HTTPDATE:ts1:ts-http} + // %{HTTPDATE:ts2:ts-"02 Jan 06 15:04"} + modifierRe = regexp.MustCompile(`%{\w+:(\w+):(ts-".+"|t?s?-?\w+)}`) + // matches a plain pattern name. ie, %{NUMBER} + patternOnlyRe = regexp.MustCompile(`%{(\w+)}`) +) + +// Parser is the primary struct to handle and grok-patterns defined in the config toml +type Parser struct { + Patterns []string + // namedPatterns is a list of internally-assigned names to the patterns + // specified by the user in Patterns. + // They will look like: + // GROK_INTERNAL_PATTERN_0, GROK_INTERNAL_PATTERN_1, etc. + NamedPatterns []string + CustomPatterns string + CustomPatternFiles []string + Measurement string + + // Timezone is an optional component to help render log dates to + // your chosen zone. + // Default: "" which renders UTC + // Options are as follows: + // 1. Local -- interpret based on machine localtime + // 2. "America/Chicago" -- Unix TZ values like those found in https://en.wikipedia.org/wiki/List_of_tz_database_time_zones + // 3. UTC -- or blank/unspecified, will return timestamp in UTC + Timezone string + loc *time.Location + + // typeMap is a map of patterns -> capture name -> modifier, + // ie, { + // "%{TESTLOG}": + // { + // "bytes": "int", + // "clientip": "tag" + // } + // } + typeMap map[string]map[string]string + // tsMap is a map of patterns -> capture name -> timestamp layout. + // ie, { + // "%{TESTLOG}": + // { + // "httptime": "02/Jan/2006:15:04:05 -0700" + // } + // } + tsMap map[string]map[string]string + // patterns is a map of all of the parsed patterns from CustomPatterns + // and CustomPatternFiles. + // ie, { + // "DURATION": "%{NUMBER}[nuµm]?s" + // "RESPONSE_CODE": "%{NUMBER:rc:tag}" + // } + patterns map[string]string + // foundTsLayouts is a slice of timestamp patterns that have been found + // in the log lines. This slice gets updated if the user uses the generic + // 'ts' modifier for timestamps. This slice is checked first for matches, + // so that previously-matched layouts get priority over all other timestamp + // layouts. + foundTsLayouts []string + + timeFunc func() time.Time + g *grok.Grok + tsModder *tsModder +} + +// Compile is a bound method to Parser which will process the options for our parser +func (p *Parser) Compile() error { + p.typeMap = make(map[string]map[string]string) + p.tsMap = make(map[string]map[string]string) + p.patterns = make(map[string]string) + p.tsModder = &tsModder{} + var err error + p.g, err = grok.NewWithConfig(&grok.Config{NamedCapturesOnly: true}) + if err != nil { + return err + } + + // Give Patterns fake names so that they can be treated as named + // "custom patterns" + p.NamedPatterns = make([]string, 0, len(p.Patterns)) + for i, pattern := range p.Patterns { + pattern = strings.TrimSpace(pattern) + if pattern == "" { + continue + } + name := fmt.Sprintf("GROK_INTERNAL_PATTERN_%d", i) + p.CustomPatterns += "\n" + name + " " + pattern + "\n" + p.NamedPatterns = append(p.NamedPatterns, "%{"+name+"}") + } + + if len(p.NamedPatterns) == 0 { + return fmt.Errorf("pattern required") + } + + // Combine user-supplied CustomPatterns with DEFAULT_PATTERNS and parse + // them together as the same type of pattern. + p.CustomPatterns = DEFAULT_PATTERNS + p.CustomPatterns + if len(p.CustomPatterns) != 0 { + scanner := bufio.NewScanner(strings.NewReader(p.CustomPatterns)) + p.addCustomPatterns(scanner) + } + + // Parse any custom pattern files supplied. + for _, filename := range p.CustomPatternFiles { + file, fileErr := os.Open(filename) + if fileErr != nil { + return fileErr + } + + scanner := bufio.NewScanner(bufio.NewReader(file)) + p.addCustomPatterns(scanner) + } + + if p.Measurement == "" { + p.Measurement = "logparser_grok" + } + + p.loc, err = time.LoadLocation(p.Timezone) + if err != nil { + log.Printf("W! improper timezone supplied (%s), setting loc to UTC", p.Timezone) + p.loc, _ = time.LoadLocation("UTC") + } + + if p.timeFunc == nil { + p.timeFunc = time.Now + } + + return p.compileCustomPatterns() +} + +// ParseLine is the primary function to process individual lines, returning the metrics +func (p *Parser) ParseLine(line string) (telegraf.Metric, error) { + var err error + // values are the parsed fields from the log line + var values map[string]string + // the matching pattern string + var patternName string + for _, pattern := range p.NamedPatterns { + if values, err = p.g.Parse(pattern, line); err != nil { + return nil, err + } + if len(values) != 0 { + patternName = pattern + break + } + } + + if len(values) == 0 { + log.Printf("D! Grok no match found for: %q", line) + return nil, nil + } + + fields := make(map[string]interface{}) + tags := make(map[string]string) + timestamp := time.Now() + for k, v := range values { + if k == "" || v == "" { + continue + } + + // t is the modifier of the field + var t string + // check if pattern has some modifiers + if types, ok := p.typeMap[patternName]; ok { + t = types[k] + } + // if we didn't find a modifier, check if we have a timestamp layout + if t == "" { + if ts, ok := p.tsMap[patternName]; ok { + // check if the modifier is a timestamp layout + if layout, ok := ts[k]; ok { + t = layout + } + } + } + // if we didn't find a type OR timestamp modifier, assume string + if t == "" { + t = STRING + } + + switch t { + case INT: + iv, err := strconv.ParseInt(v, 10, 64) + if err != nil { + log.Printf("E! Error parsing %s to int: %s", v, err) + } else { + fields[k] = iv + } + case FLOAT: + fv, err := strconv.ParseFloat(v, 64) + if err != nil { + log.Printf("E! Error parsing %s to float: %s", v, err) + } else { + fields[k] = fv + } + case DURATION: + d, err := time.ParseDuration(v) + if err != nil { + log.Printf("E! Error parsing %s to duration: %s", v, err) + } else { + fields[k] = int64(d) + } + case TAG: + tags[k] = v + case STRING: + fields[k] = strings.Trim(v, `"`) + case EPOCH: + parts := strings.SplitN(v, ".", 2) + if len(parts) == 0 { + log.Printf("E! Error parsing %s to timestamp: %s", v, err) + break + } + + sec, err := strconv.ParseInt(parts[0], 10, 64) + if err != nil { + log.Printf("E! Error parsing %s to timestamp: %s", v, err) + break + } + ts := time.Unix(sec, 0) + + if len(parts) == 2 { + padded := fmt.Sprintf("%-9s", parts[1]) + nsString := strings.Replace(padded[:9], " ", "0", -1) + nanosec, err := strconv.ParseInt(nsString, 10, 64) + if err != nil { + log.Printf("E! Error parsing %s to timestamp: %s", v, err) + break + } + ts = ts.Add(time.Duration(nanosec) * time.Nanosecond) + } + timestamp = ts + case EPOCH_NANO: + iv, err := strconv.ParseInt(v, 10, 64) + if err != nil { + log.Printf("E! Error parsing %s to int: %s", v, err) + } else { + timestamp = time.Unix(0, iv) + } + case SYSLOG_TIMESTAMP: + ts, err := time.ParseInLocation("Jan 02 15:04:05", v, p.loc) + if err == nil { + if ts.Year() == 0 { + ts = ts.AddDate(timestamp.Year(), 0, 0) + } + timestamp = ts + } else { + log.Printf("E! Error parsing %s to time layout [%s]: %s", v, t, err) + } + case GENERIC_TIMESTAMP: + var foundTs bool + // first try timestamp layouts that we've already found + for _, layout := range p.foundTsLayouts { + ts, err := time.ParseInLocation(layout, v, p.loc) + if err == nil { + timestamp = ts + foundTs = true + break + } + } + // if we haven't found a timestamp layout yet, try all timestamp + // layouts. + if !foundTs { + for _, layout := range timeLayouts { + ts, err := time.ParseInLocation(layout, v, p.loc) + if err == nil { + timestamp = ts + foundTs = true + p.foundTsLayouts = append(p.foundTsLayouts, layout) + break + } + } + } + // if we still haven't found a timestamp layout, log it and we will + // just use time.Now() + if !foundTs { + log.Printf("E! Error parsing timestamp [%s], could not find any "+ + "suitable time layouts.", v) + } + case DROP: + // goodbye! + default: + ts, err := time.ParseInLocation(t, v, p.loc) + if err == nil { + timestamp = ts + } else { + log.Printf("E! Error parsing %s to time layout [%s]: %s", v, t, err) + } + } + } + + if len(fields) == 0 { + return nil, fmt.Errorf("logparser_grok: must have one or more fields") + } + + return metric.New(p.Measurement, tags, fields, p.tsModder.tsMod(timestamp)) +} + +func (p *Parser) Parse(buf []byte) ([]telegraf.Metric, error) { + lines := strings.Split(string(buf), "\n") + var metrics []telegraf.Metric + + for _, line := range lines { + m, err := p.ParseLine(line) + if err != nil { + return nil, err + } + metrics = append(metrics, m) + } + + return metrics, nil +} + +func (p *Parser) SetDefaultTags(tags map[string]string) { + //needs implementation +} + +func (p *Parser) addCustomPatterns(scanner *bufio.Scanner) { + for scanner.Scan() { + line := strings.TrimSpace(scanner.Text()) + if len(line) > 0 && line[0] != '#' { + names := strings.SplitN(line, " ", 2) + p.patterns[names[0]] = names[1] + } + } +} + +func (p *Parser) compileCustomPatterns() error { + var err error + // check if the pattern contains a subpattern that is already defined + // replace it with the subpattern for modifier inheritance. + for i := 0; i < 2; i++ { + for name, pattern := range p.patterns { + subNames := patternOnlyRe.FindAllStringSubmatch(pattern, -1) + for _, subName := range subNames { + if subPattern, ok := p.patterns[subName[1]]; ok { + pattern = strings.Replace(pattern, subName[0], subPattern, 1) + } + } + p.patterns[name] = pattern + } + } + + // check if pattern contains modifiers. Parse them out if it does. + for name, pattern := range p.patterns { + if modifierRe.MatchString(pattern) { + // this pattern has modifiers, so parse out the modifiers + pattern, err = p.parseTypedCaptures(name, pattern) + if err != nil { + return err + } + p.patterns[name] = pattern + } + } + + return p.g.AddPatternsFromMap(p.patterns) +} + +// parseTypedCaptures parses the capture modifiers, and then deletes the +// modifier from the line so that it is a valid "grok" pattern again. +// ie, +// %{NUMBER:bytes:int} => %{NUMBER:bytes} (stores %{NUMBER}->bytes->int) +// %{IPORHOST:clientip:tag} => %{IPORHOST:clientip} (stores %{IPORHOST}->clientip->tag) +func (p *Parser) parseTypedCaptures(name, pattern string) (string, error) { + matches := modifierRe.FindAllStringSubmatch(pattern, -1) + + // grab the name of the capture pattern + patternName := "%{" + name + "}" + // create type map for this pattern + p.typeMap[patternName] = make(map[string]string) + p.tsMap[patternName] = make(map[string]string) + + // boolean to verify that each pattern only has a single ts- data type. + hasTimestamp := false + for _, match := range matches { + // regex capture 1 is the name of the capture + // regex capture 2 is the modifier of the capture + if strings.HasPrefix(match[2], "ts") { + if hasTimestamp { + return pattern, fmt.Errorf("logparser pattern compile error: "+ + "Each pattern is allowed only one named "+ + "timestamp data type. pattern: %s", pattern) + } + if layout, ok := timeLayouts[match[2]]; ok { + // built-in time format + p.tsMap[patternName][match[1]] = layout + } else { + // custom time format + p.tsMap[patternName][match[1]] = strings.TrimSuffix(strings.TrimPrefix(match[2], `ts-"`), `"`) + } + hasTimestamp = true + } else { + p.typeMap[patternName][match[1]] = match[2] + } + + // the modifier is not a valid part of a "grok" pattern, so remove it + // from the pattern. + pattern = strings.Replace(pattern, ":"+match[2]+"}", "}", 1) + } + + return pattern, nil +} + +// tsModder is a struct for incrementing identical timestamps of log lines +// so that we don't push identical metrics that will get overwritten. +type tsModder struct { + dupe time.Time + last time.Time + incr time.Duration + incrn time.Duration + rollover time.Duration +} + +// tsMod increments the given timestamp one unit more from the previous +// duplicate timestamp. +// the increment unit is determined as the next smallest time unit below the +// most significant time unit of ts. +// ie, if the input is at ms precision, it will increment it 1µs. +func (t *tsModder) tsMod(ts time.Time) time.Time { + defer func() { t.last = ts }() + // don't mod the time if we don't need to + if t.last.IsZero() || ts.IsZero() { + t.incrn = 0 + t.rollover = 0 + return ts + } + if !ts.Equal(t.last) && !ts.Equal(t.dupe) { + t.incr = 0 + t.incrn = 0 + t.rollover = 0 + return ts + } + + if ts.Equal(t.last) { + t.dupe = ts + } + + if ts.Equal(t.dupe) && t.incr == time.Duration(0) { + tsNano := ts.UnixNano() + + d := int64(10) + counter := 1 + for { + a := tsNano % d + if a > 0 { + break + } + d = d * 10 + counter++ + } + + switch { + case counter <= 6: + t.incr = time.Nanosecond + case counter <= 9: + t.incr = time.Microsecond + case counter > 9: + t.incr = time.Millisecond + } + } + + t.incrn++ + if t.incrn == 999 && t.incr > time.Nanosecond { + t.rollover = t.incr * t.incrn + t.incrn = 1 + t.incr = t.incr / 1000 + if t.incr < time.Nanosecond { + t.incr = time.Nanosecond + } + } + return ts.Add(t.incr*t.incrn + t.rollover) +} diff --git a/plugins/parsers/grok/parser_test.go b/plugins/parsers/grok/parser_test.go new file mode 100644 index 0000000000000..5dc01911c5c44 --- /dev/null +++ b/plugins/parsers/grok/parser_test.go @@ -0,0 +1,21 @@ +package grok + +import ( + "log" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestGrokParse(t *testing.T) { + parser := Parser{ + Measurement: "t_met", + Patterns: []string{"%{COMMON_LOG_FORMAT}"}, + } + //var acc testutil.Accumulator + parser.Compile() + metrics, err := parser.Parse([]byte(`127.0.0.1 user-identifier frank [10/Oct/2000:13:55:36 -0700] "GET /apache_pb.gif HTTP/1.0" 200 2326`)) + log.Printf("metric_tags: %v, metric_fields: %v", metrics[0].Tags(), metrics[0].Fields()) + assert.NoError(t, err) + //t.Error() +} From 504d9784465a6250b232e82211e51efe1fd854aa Mon Sep 17 00:00:00 2001 From: Max U Date: Thu, 21 Jun 2018 16:12:26 -0700 Subject: [PATCH 06/51] clean up some test cases --- plugins/inputs/reader/reader_test.go | 2 +- plugins/parsers/grok/parser_test.go | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/plugins/inputs/reader/reader_test.go b/plugins/inputs/reader/reader_test.go index a30ec2f929b0d..339eba16d9245 100644 --- a/plugins/inputs/reader/reader_test.go +++ b/plugins/inputs/reader/reader_test.go @@ -49,7 +49,7 @@ func TestGrokParser(t *testing.T) { log.Printf("err: %v", err) log.Printf("metric[0]_tags: %v, metric[0]_fields: %v", acc.Metrics[0].Tags, acc.Metrics[0].Fields) log.Printf("metric[1]_tags: %v, metric[1]_fields: %v", acc.Metrics[1].Tags, acc.Metrics[1].Fields) - t.Error() + assert.Equal(t, 2, len(acc.Metrics)) } func getPluginDir() string { diff --git a/plugins/parsers/grok/parser_test.go b/plugins/parsers/grok/parser_test.go index 5dc01911c5c44..b87c112d2f5e3 100644 --- a/plugins/parsers/grok/parser_test.go +++ b/plugins/parsers/grok/parser_test.go @@ -17,5 +17,4 @@ func TestGrokParse(t *testing.T) { metrics, err := parser.Parse([]byte(`127.0.0.1 user-identifier frank [10/Oct/2000:13:55:36 -0700] "GET /apache_pb.gif HTTP/1.0" 200 2326`)) log.Printf("metric_tags: %v, metric_fields: %v", metrics[0].Tags(), metrics[0].Fields()) assert.NoError(t, err) - //t.Error() } From 542c030dc834abebc83f36637cbaf04c4a63c481 Mon Sep 17 00:00:00 2001 From: Max U Date: Thu, 21 Jun 2018 16:23:06 -0700 Subject: [PATCH 07/51] knock more errors from test files --- plugins/inputs/reader/reader_test.go | 2 +- plugins/parsers/grok/parser_test.go | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/plugins/inputs/reader/reader_test.go b/plugins/inputs/reader/reader_test.go index 339eba16d9245..cb6fb44a8b136 100644 --- a/plugins/inputs/reader/reader_test.go +++ b/plugins/inputs/reader/reader_test.go @@ -24,7 +24,7 @@ func TestJSONParserCompile(t *testing.T) { testDir := getPluginDir() var acc testutil.Accumulator r := Reader{ - Filepaths: []string{testDir + "/reader/testfiles/**.log"}, + Filepaths: []string{testDir + "/reader/testfiles/json_a.log"}, DataFormat: "json", Tags: []string{"parent_ignored_child"}, } diff --git a/plugins/parsers/grok/parser_test.go b/plugins/parsers/grok/parser_test.go index b87c112d2f5e3..77818199af47c 100644 --- a/plugins/parsers/grok/parser_test.go +++ b/plugins/parsers/grok/parser_test.go @@ -12,7 +12,6 @@ func TestGrokParse(t *testing.T) { Measurement: "t_met", Patterns: []string{"%{COMMON_LOG_FORMAT}"}, } - //var acc testutil.Accumulator parser.Compile() metrics, err := parser.Parse([]byte(`127.0.0.1 user-identifier frank [10/Oct/2000:13:55:36 -0700] "GET /apache_pb.gif HTTP/1.0" 200 2326`)) log.Printf("metric_tags: %v, metric_fields: %v", metrics[0].Tags(), metrics[0].Fields()) From 554b960339faf67ce633bdde46ddc36980ee8b38 Mon Sep 17 00:00:00 2001 From: Max U Date: Mon, 25 Jun 2018 09:53:35 -0700 Subject: [PATCH 08/51] add setparser to reader --- plugins/inputs/reader/reader.go | 41 ++++++++++++++++++++++++++++++++- plugins/parsers/registry.go | 7 ++++-- 2 files changed, 45 insertions(+), 3 deletions(-) diff --git a/plugins/inputs/reader/reader.go b/plugins/inputs/reader/reader.go index 36c2a742ebff8..504a40b3a0bcf 100644 --- a/plugins/inputs/reader/reader.go +++ b/plugins/inputs/reader/reader.go @@ -24,6 +24,7 @@ type Reader struct { namedPatterns []string CustomPatterns string CustomPatternFiles []string + TZone string } const sampleConfig = `## Files to parse. @@ -38,7 +39,40 @@ files = ["/var/log/apache/access.log"] ## Each data format has its own unique set of configuration options, read ## more about them here: ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md -data_format = ""` +data_format = "" + +## Parse logstash-style "grok" patterns: +## Telegraf built-in parsing patterns: https://goo.gl/dkay10 +[inputs.logparser.grok] + ## This is a list of patterns to check the given log file(s) for. + ## Note that adding patterns here increases processing time. The most + ## efficient configuration is to have one pattern per logparser. + ## Other common built-in patterns are: + ## %{COMMON_LOG_FORMAT} (plain apache & nginx access logs) + ## %{COMBINED_LOG_FORMAT} (access logs + referrer & agent) + patterns = ["%{COMBINED_LOG_FORMAT}"] + + ## Name of the outputted measurement name. + measurement = "apache_access_log" + + ## Full path(s) to custom pattern files. + custom_pattern_files = [] + + ## Custom patterns can also be defined here. Put one pattern per line. + custom_patterns = ''' + ''' + + ## Timezone allows you to provide an override for timestamps that + ## don't already include an offset + ## e.g. 04/06/2016 12:41:45 data one two 5.43µs + ## + ## Default: "" which renders UTC + ## Options are as follows: + ## 1. Local -- interpret based on machine localtime + ## 2. "Canada/Eastern" -- Unix TZ values like those found in https://en.wikipedia.org/wiki/List_of_tz_database_time_zones + ## 3. UTC -- or blank/unspecified, will return timestamp in UTC + timezone = "Canada/Eastern" +` // SampleConfig returns the default configuration of the Input func (r *Reader) SampleConfig() string { @@ -64,6 +98,10 @@ func (r *Reader) Gather(acc telegraf.Accumulator) error { return nil } +func (r *Reader) SetParser(p parsers.Parser) { + r.Parser = p +} + func (r *Reader) compileParser() { if r.DataFormat == "" { log.Printf("E! No data_format specified") @@ -78,6 +116,7 @@ func (r *Reader) compileParser() { NamedPatterns: r.namedPatterns, CustomPatterns: r.CustomPatterns, CustomPatternFiles: r.CustomPatternFiles, + TimeZone: r.TZone, } nParser, err := parsers.NewParser(&r.ParserConfig) if err != nil { diff --git a/plugins/parsers/registry.go b/plugins/parsers/registry.go index e15632b8e32ee..b657cf83f1004 100644 --- a/plugins/parsers/registry.go +++ b/plugins/parsers/registry.go @@ -94,6 +94,7 @@ type Config struct { NamedPatterns []string CustomPatterns string CustomPatternFiles []string + TimeZone string } // NewParser returns a Parser interface based on the given config. @@ -133,7 +134,8 @@ func NewParser(config *Config) (Parser, error) { config.Patterns, config.NamedPatterns, config.CustomPatterns, - config.CustomPatternFiles) + config.CustomPatternFiles, + config.TimeZone) default: err = fmt.Errorf("Invalid data format: %s", config.DataFormat) } @@ -144,13 +146,14 @@ func NewGrokParser(metricName string, patterns []string, nPatterns []string, cPatterns string, - cPatternFiles []string) (Parser, error) { + cPatternFiles []string, tZone string) (Parser, error) { parser := grok.Parser{ Measurement: metricName, Patterns: patterns, NamedPatterns: nPatterns, CustomPatterns: cPatterns, CustomPatternFiles: cPatternFiles, + Timezone: tZone, } parser.Compile() From f40371e3615dd40b210b08f6fd90ff31968f5729 Mon Sep 17 00:00:00 2001 From: Max U Date: Mon, 25 Jun 2018 10:15:32 -0700 Subject: [PATCH 09/51] add init function to reader --- plugins/inputs/reader/reader.go | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/plugins/inputs/reader/reader.go b/plugins/inputs/reader/reader.go index 504a40b3a0bcf..853405745edab 100644 --- a/plugins/inputs/reader/reader.go +++ b/plugins/inputs/reader/reader.go @@ -6,6 +6,7 @@ import ( "github.com/influxdata/telegraf" "github.com/influxdata/telegraf/internal/globpath" + "github.com/influxdata/telegraf/plugins/inputs" "github.com/influxdata/telegraf/plugins/parsers" ) @@ -154,3 +155,9 @@ func (r *Reader) readMetric(filename string) ([]telegraf.Metric, error) { return r.Parser.Parse(fileContents) } + +func init() { + inputs.Add("reader", func() telegraf.Input { + return &Reader{} + }) +} From 9c845950a7973d1e92e12dba8a3ae84fe8f55927 Mon Sep 17 00:00:00 2001 From: Max U Date: Mon, 25 Jun 2018 15:32:27 -0700 Subject: [PATCH 10/51] add grok as a top level parser, still need README --- internal/config/config.go | 58 ++++++++++++++++++++++ plugins/inputs/all/all.go | 2 +- plugins/inputs/reader/reader.go | 73 ++-------------------------- plugins/inputs/reader/reader_test.go | 26 ++++++++-- 4 files changed, 84 insertions(+), 75 deletions(-) diff --git a/internal/config/config.go b/internal/config/config.go index 8a31c271e9028..1a98c61c5c2fd 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -1338,6 +1338,59 @@ func buildParser(name string, tbl *ast.Table) (parsers.Parser, error) { } } + //for grok data_format + if node, ok := tbl.Fields["named_patterns"]; ok { + if kv, ok := node.(*ast.KeyValue); ok { + if ary, ok := kv.Value.(*ast.Array); ok { + for _, elem := range ary.Value { + if str, ok := elem.(*ast.String); ok { + c.NamedPatterns = append(c.NamedPatterns, str.Value) + } + } + } + } + } + + if node, ok := tbl.Fields["patterns"]; ok { + if kv, ok := node.(*ast.KeyValue); ok { + if ary, ok := kv.Value.(*ast.Array); ok { + for _, elem := range ary.Value { + if str, ok := elem.(*ast.String); ok { + c.Patterns = append(c.Patterns, str.Value) + } + } + } + } + } + + if node, ok := tbl.Fields["custom_patterns"]; ok { + if kv, ok := node.(*ast.KeyValue); ok { + if str, ok := kv.Value.(*ast.String); ok { + c.CustomPatterns = str.Value + } + } + } + + if node, ok := tbl.Fields["custom_pattern_files"]; ok { + if kv, ok := node.(*ast.KeyValue); ok { + if ary, ok := kv.Value.(*ast.Array); ok { + for _, elem := range ary.Value { + if str, ok := elem.(*ast.String); ok { + c.CustomPatternFiles = append(c.CustomPatternFiles, str.Value) + } + } + } + } + } + + if node, ok := tbl.Fields["timezone"]; ok { + if kv, ok := node.(*ast.KeyValue); ok { + if str, ok := kv.Value.(*ast.String); ok { + c.TimeZone = str.Value + } + } + } + c.MetricName = name delete(tbl.Fields, "data_format") @@ -1353,6 +1406,11 @@ func buildParser(name string, tbl *ast.Table) (parsers.Parser, error) { delete(tbl.Fields, "dropwizard_time_format") delete(tbl.Fields, "dropwizard_tags_path") delete(tbl.Fields, "dropwizard_tag_paths") + delete(tbl.Fields, "named_patterns") + delete(tbl.Fields, "patterns") + delete(tbl.Fields, "custom_patterns") + delete(tbl.Fields, "custom_pattern_files") + delete(tbl.Fields, "timezone") return parsers.NewParser(c) } diff --git a/plugins/inputs/all/all.go b/plugins/inputs/all/all.go index b2be2be5a55ca..de34847d64f7f 100644 --- a/plugins/inputs/all/all.go +++ b/plugins/inputs/all/all.go @@ -85,7 +85,7 @@ import ( _ "github.com/influxdata/telegraf/plugins/inputs/puppetagent" _ "github.com/influxdata/telegraf/plugins/inputs/rabbitmq" _ "github.com/influxdata/telegraf/plugins/inputs/raindrops" - _ "github.com/influxdata/telegraf/plugins/inputs/redis" + _ "github.com/influxdata/telegraf/plugins/inputs/reader" _ "github.com/influxdata/telegraf/plugins/inputs/rethinkdb" _ "github.com/influxdata/telegraf/plugins/inputs/riak" _ "github.com/influxdata/telegraf/plugins/inputs/salesforce" diff --git a/plugins/inputs/reader/reader.go b/plugins/inputs/reader/reader.go index 853405745edab..bfccb87d3cd33 100644 --- a/plugins/inputs/reader/reader.go +++ b/plugins/inputs/reader/reader.go @@ -13,19 +13,9 @@ import ( type Reader struct { Filepaths []string `toml:"files"` FromBeginning bool - DataFormat string `toml:"data_format"` - ParserConfig parsers.Config - Parser parsers.Parser - Tags []string + parser parsers.Parser Filenames []string - - //for grok parser - Patterns []string - namedPatterns []string - CustomPatterns string - CustomPatternFiles []string - TZone string } const sampleConfig = `## Files to parse. @@ -41,38 +31,6 @@ files = ["/var/log/apache/access.log"] ## more about them here: ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md data_format = "" - -## Parse logstash-style "grok" patterns: -## Telegraf built-in parsing patterns: https://goo.gl/dkay10 -[inputs.logparser.grok] - ## This is a list of patterns to check the given log file(s) for. - ## Note that adding patterns here increases processing time. The most - ## efficient configuration is to have one pattern per logparser. - ## Other common built-in patterns are: - ## %{COMMON_LOG_FORMAT} (plain apache & nginx access logs) - ## %{COMBINED_LOG_FORMAT} (access logs + referrer & agent) - patterns = ["%{COMBINED_LOG_FORMAT}"] - - ## Name of the outputted measurement name. - measurement = "apache_access_log" - - ## Full path(s) to custom pattern files. - custom_pattern_files = [] - - ## Custom patterns can also be defined here. Put one pattern per line. - custom_patterns = ''' - ''' - - ## Timezone allows you to provide an override for timestamps that - ## don't already include an offset - ## e.g. 04/06/2016 12:41:45 data one two 5.43µs - ## - ## Default: "" which renders UTC - ## Options are as follows: - ## 1. Local -- interpret based on machine localtime - ## 2. "Canada/Eastern" -- Unix TZ values like those found in https://en.wikipedia.org/wiki/List_of_tz_database_time_zones - ## 3. UTC -- or blank/unspecified, will return timestamp in UTC - timezone = "Canada/Eastern" ` // SampleConfig returns the default configuration of the Input @@ -100,31 +58,7 @@ func (r *Reader) Gather(acc telegraf.Accumulator) error { } func (r *Reader) SetParser(p parsers.Parser) { - r.Parser = p -} - -func (r *Reader) compileParser() { - if r.DataFormat == "" { - log.Printf("E! No data_format specified") - return - } - r.ParserConfig = parsers.Config{ - DataFormat: r.DataFormat, - TagKeys: r.Tags, - - //grok settings - Patterns: r.Patterns, - NamedPatterns: r.namedPatterns, - CustomPatterns: r.CustomPatterns, - CustomPatternFiles: r.CustomPatternFiles, - TimeZone: r.TZone, - } - nParser, err := parsers.NewParser(&r.ParserConfig) - if err != nil { - log.Printf("E! Error building parser: %v", err) - } - - r.Parser = nParser + r.parser = p } func (r *Reader) refreshFilePaths() { @@ -151,8 +85,7 @@ func (r *Reader) readMetric(filename string) ([]telegraf.Metric, error) { if err != nil { log.Printf("E! File could not be opened: %v", filename) } - - return r.Parser.Parse(fileContents) + return r.parser.Parse(fileContents) } diff --git a/plugins/inputs/reader/reader_test.go b/plugins/inputs/reader/reader_test.go index cb6fb44a8b136..dcfa9ffc33cfc 100644 --- a/plugins/inputs/reader/reader_test.go +++ b/plugins/inputs/reader/reader_test.go @@ -6,6 +6,7 @@ import ( "strings" "testing" + "github.com/influxdata/telegraf/plugins/parsers" "github.com/influxdata/telegraf/testutil" "github.com/stretchr/testify/assert" ) @@ -28,7 +29,14 @@ func TestJSONParserCompile(t *testing.T) { DataFormat: "json", Tags: []string{"parent_ignored_child"}, } - r.compileParser() + parserConfig := parsers.Config{ + DataFormat: r.DataFormat, + TagKeys: r.Tags, + } + nParser, err := parsers.NewParser(&parserConfig) + r.parser = nParser + assert.NoError(t, err) + r.Gather(&acc) log.Printf("acc: %v", acc.Metrics[0].Tags) assert.Equal(t, map[string]string{"parent_ignored_child": "hi"}, acc.Metrics[0].Tags) @@ -41,15 +49,25 @@ func TestGrokParser(t *testing.T) { r := Reader{ Filepaths: []string{testDir + "/reader/testfiles/grok_a.log"}, DataFormat: "grok", - Patterns: []string{"%{COMMON_LOG_FORMAT}"}, } - r.compileParser() - err := r.Gather(&acc) + parserConfig := parsers.Config{ + DataFormat: r.DataFormat, + TagKeys: r.Tags, + Patterns: []string{"{%COMMON-LOG-FORMAT}"}, + } + + nParser, err := parsers.NewParser(&parserConfig) + r.parser = nParser + assert.NoError(t, err) + + log.Printf("path: %v", r.Filepaths[0]) + err = r.Gather(&acc) log.Printf("err: %v", err) log.Printf("metric[0]_tags: %v, metric[0]_fields: %v", acc.Metrics[0].Tags, acc.Metrics[0].Fields) log.Printf("metric[1]_tags: %v, metric[1]_fields: %v", acc.Metrics[1].Tags, acc.Metrics[1].Fields) assert.Equal(t, 2, len(acc.Metrics)) + t.Error() } func getPluginDir() string { From cc406299bac7735056961feb28196caf0ce89165 Mon Sep 17 00:00:00 2001 From: Max U Date: Mon, 25 Jun 2018 15:52:43 -0700 Subject: [PATCH 11/51] allow for import from plugins/all --- plugins/inputs/all/all.go | 1 + plugins/inputs/reader/reader_test.go | 17 ++++++----------- 2 files changed, 7 insertions(+), 11 deletions(-) diff --git a/plugins/inputs/all/all.go b/plugins/inputs/all/all.go index de34847d64f7f..fc38b70f59ca9 100644 --- a/plugins/inputs/all/all.go +++ b/plugins/inputs/all/all.go @@ -86,6 +86,7 @@ import ( _ "github.com/influxdata/telegraf/plugins/inputs/rabbitmq" _ "github.com/influxdata/telegraf/plugins/inputs/raindrops" _ "github.com/influxdata/telegraf/plugins/inputs/reader" + _ "github.com/influxdata/telegraf/plugins/inputs/redis" _ "github.com/influxdata/telegraf/plugins/inputs/rethinkdb" _ "github.com/influxdata/telegraf/plugins/inputs/riak" _ "github.com/influxdata/telegraf/plugins/inputs/salesforce" diff --git a/plugins/inputs/reader/reader_test.go b/plugins/inputs/reader/reader_test.go index dcfa9ffc33cfc..ca076350b9f35 100644 --- a/plugins/inputs/reader/reader_test.go +++ b/plugins/inputs/reader/reader_test.go @@ -25,13 +25,11 @@ func TestJSONParserCompile(t *testing.T) { testDir := getPluginDir() var acc testutil.Accumulator r := Reader{ - Filepaths: []string{testDir + "/reader/testfiles/json_a.log"}, - DataFormat: "json", - Tags: []string{"parent_ignored_child"}, + Filepaths: []string{testDir + "/reader/testfiles/json_a.log"}, } parserConfig := parsers.Config{ - DataFormat: r.DataFormat, - TagKeys: r.Tags, + DataFormat: "json", + TagKeys: []string{"parent_ignored_child"}, } nParser, err := parsers.NewParser(&parserConfig) r.parser = nParser @@ -47,14 +45,12 @@ func TestGrokParser(t *testing.T) { testDir := getPluginDir() var acc testutil.Accumulator r := Reader{ - Filepaths: []string{testDir + "/reader/testfiles/grok_a.log"}, - DataFormat: "grok", + Filepaths: []string{testDir + "/reader/testfiles/grok_a.log"}, } parserConfig := parsers.Config{ - DataFormat: r.DataFormat, - TagKeys: r.Tags, - Patterns: []string{"{%COMMON-LOG-FORMAT}"}, + DataFormat: "grok", + Patterns: []string{"%{COMMON_LOG_FORMAT}"}, } nParser, err := parsers.NewParser(&parserConfig) @@ -67,7 +63,6 @@ func TestGrokParser(t *testing.T) { log.Printf("metric[0]_tags: %v, metric[0]_fields: %v", acc.Metrics[0].Tags, acc.Metrics[0].Fields) log.Printf("metric[1]_tags: %v, metric[1]_fields: %v", acc.Metrics[1].Tags, acc.Metrics[1].Fields) assert.Equal(t, 2, len(acc.Metrics)) - t.Error() } func getPluginDir() string { From 79d9ea476187878b91dcc742c6ecfd75f93806a6 Mon Sep 17 00:00:00 2001 From: Max U Date: Tue, 26 Jun 2018 10:26:48 -0700 Subject: [PATCH 12/51] add docker-image spin up for reader --- Makefile | 14 ++- internal/config/config.go | 9 ++ plugins/inputs/reader/dev/docker-compse.yml | 13 +++ plugins/inputs/reader/dev/telegraf.conf | 106 ++++++++++++++++++++ plugins/inputs/reader/reader.go | 8 +- 5 files changed, 148 insertions(+), 2 deletions(-) create mode 100644 plugins/inputs/reader/dev/docker-compse.yml create mode 100644 plugins/inputs/reader/dev/telegraf.conf diff --git a/Makefile b/Makefile index 8650df2677ecf..99d7a3a396ce0 100644 --- a/Makefile +++ b/Makefile @@ -92,4 +92,16 @@ docker-image: plugins/parsers/influx/machine.go: plugins/parsers/influx/machine.go.rl ragel -Z -G2 $^ -o $@ -.PHONY: deps telegraf install test test-windows lint vet test-all package clean docker-image fmtcheck uint64 +static: + @echo "Building static linux binary..." + @CGO_ENABLED=0 \ + GOOS=linux \ + GOARCH=amd64 \ + go build -ldflags "$(LDFLAGS)" ./cmd/telegraf + +plugin-%: + @echo "Starting dev environment for $${$(@)} input plugin..." + @docker-compose -f plugins/inputs/$${$(@)}/dev/docker-compose.yml up + +.PHONY: deps telegraf install test test-windows lint vet test-all package clean docker-image fmtcheck uint64 static + diff --git a/internal/config/config.go b/internal/config/config.go index 1a98c61c5c2fd..845b0b92c6f9b 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -1393,6 +1393,14 @@ func buildParser(name string, tbl *ast.Table) (parsers.Parser, error) { c.MetricName = name + if node, ok := tbl.Fields["name_override"]; ok { + if kv, ok := node.(*ast.KeyValue); ok { + if str, ok := kv.Value.(*ast.String); ok { + c.MetricName = str.Value + } + } + } + delete(tbl.Fields, "data_format") delete(tbl.Fields, "separator") delete(tbl.Fields, "templates") @@ -1411,6 +1419,7 @@ func buildParser(name string, tbl *ast.Table) (parsers.Parser, error) { delete(tbl.Fields, "custom_patterns") delete(tbl.Fields, "custom_pattern_files") delete(tbl.Fields, "timezone") + delete(tbl.Fields, "name_override") return parsers.NewParser(c) } diff --git a/plugins/inputs/reader/dev/docker-compse.yml b/plugins/inputs/reader/dev/docker-compse.yml new file mode 100644 index 0000000000000..a5abde8ac63d3 --- /dev/null +++ b/plugins/inputs/reader/dev/docker-compse.yml @@ -0,0 +1,13 @@ +version: '3' + +# services: + telegraf: + image: glinton/scratch + volumes: + - ./telegraf.conf:/telegraf.conf + - ../../../../telegraf:/telegraf + - ./test.log:/var/log/test.log + entrypoint: + - /telegraf + - --config + - /telegraf.conf \ No newline at end of file diff --git a/plugins/inputs/reader/dev/telegraf.conf b/plugins/inputs/reader/dev/telegraf.conf new file mode 100644 index 0000000000000..9718456829ffc --- /dev/null +++ b/plugins/inputs/reader/dev/telegraf.conf @@ -0,0 +1,106 @@ + +# Global tags can be specified here in key="value" format. +[global_tags] + # dc = "us-east-1" # will tag all metrics with dc=us-east-1 + # rack = "1a" + ## Environment variables can be used as tags, and throughout the config file + # user = "$USER" + + +# Configuration for telegraf agent +[agent] + ## Default data collection interval for all inputs + interval = "10s" + ## Rounds collection interval to 'interval' + ## ie, if interval="10s" then always collect on :00, :10, :20, etc. + round_interval = true + + ## Telegraf will send metrics to outputs in batches of at most + ## metric_batch_size metrics. + ## This controls the size of writes that Telegraf sends to output plugins. + metric_batch_size = 1000 + + ## For failed writes, telegraf will cache metric_buffer_limit metrics for each + ## output, and will flush this buffer on a successful write. Oldest metrics + ## are dropped first when this buffer fills. + ## This buffer only fills when writes fail to output plugin(s). + metric_buffer_limit = 10000 + + ## Collection jitter is used to jitter the collection by a random amount. + ## Each plugin will sleep for a random time within jitter before collecting. + ## This can be used to avoid many plugins querying things like sysfs at the + ## same time, which can have a measurable effect on the system. + collection_jitter = "0s" + + ## Default flushing interval for all outputs. You shouldn't set this below + ## interval. Maximum flush_interval will be flush_interval + flush_jitter + flush_interval = "10s" + ## Jitter the flush interval by a random amount. This is primarily to avoid + ## large write spikes for users running a large number of telegraf instances. + ## ie, a jitter of 5s and interval 10s means flushes will happen every 10-15s + flush_jitter = "0s" + + ## By default or when set to "0s", precision will be set to the same + ## timestamp order as the collection interval, with the maximum being 1s. + ## ie, when interval = "10s", precision will be "1s" + ## when interval = "250ms", precision will be "1ms" + ## Precision will NOT be used for service inputs. It is up to each individual + ## service input to set the timestamp at the appropriate precision. + ## Valid time units are "ns", "us" (or "µs"), "ms", "s". + precision = "" + + ## Logging configuration: + ## Run telegraf with debug log messages. + debug = false + ## Run telegraf in quiet mode (error log messages only). + quiet = false + ## Specify the log file name. The empty string means to log to stderr. + logfile = "" + + ## Override default hostname, if empty use os.Hostname() + hostname = "" + ## If set to true, do no set the "host" tag in the telegraf agent. + omit_hostname = false + +# # reload and gather from file[s] on telegraf's interval +[[inputs.reader]] +# ## These accept standard unix glob matching rules, but with the addition of +# ## ** as a "super asterisk". ie: +# ## /var/log/**.log -> recursively find all .log files in /var/log +# ## /var/log/*/*.log -> find all .log files with a parent dir in /var/log +# ## /var/log/apache.log -> only tail the apache log file +files = ["/Users/maxu/go/src/github.com/influxdata/telegraf/plugins/inputs/reader/testfiles/grok_a.log"] +# +# ## The dataformat to be read from files +# ## Each data format has its own unique set of configuration options, read +# ## more about them here: +# ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md +data_format = "grok" +# + +patterns = ["%{COMMON_LOG_FORMAT}"] +# +# ## Name of the outputted measurement name. +measurement = "grok_reader" +# +# ## Full path(s) to custom pattern files. +# custom_pattern_files = [] +# +# ## Custom patterns can also be defined here. Put one pattern per line. +# custom_patterns = ''' +# ''' +# +# ## Timezone allows you to provide an override for timestamps that +# ## don't already include an offset +# ## e.g. 04/06/2016 12:41:45 data one two 5.43µs +# ## +# ## Default: "" which renders UTC +# ## Options are as follows: +# ## 1. Local -- interpret based on machine localtime +# ## 2. "Canada/Eastern" -- Unix TZ values like those found in https://en.wikipedia.org/wiki/List_of_tz_database_time_zones +# ## 3. UTC -- or blank/unspecified, will return timestamp in UTC +# timezone = "Canada/Eastern" + + +[[outputs.file]] + files = ["stdout"] \ No newline at end of file diff --git a/plugins/inputs/reader/reader.go b/plugins/inputs/reader/reader.go index bfccb87d3cd33..eb2ce48049c7b 100644 --- a/plugins/inputs/reader/reader.go +++ b/plugins/inputs/reader/reader.go @@ -50,7 +50,13 @@ func (r *Reader) Gather(acc telegraf.Accumulator) error { return err } - for _, m := range metrics { + for i, m := range metrics { + + //error if m is nil + if m == nil { + log.Printf("E! Metric could not be parsed from: %v, on line %v", k, i) + continue + } acc.AddFields(m.Name(), m.Fields(), m.Tags()) } } From bbd68b382091f8f581c02d5db75ad9fb43e0bbce Mon Sep 17 00:00:00 2001 From: Max U Date: Tue, 26 Jun 2018 11:26:43 -0700 Subject: [PATCH 13/51] docker will spin up --- internal/config/config.go | 9 --------- plugins/inputs/reader/dev/docker-compse.yml | 13 ------------- plugins/inputs/reader/dev/telegraf.conf | 10 +++++----- plugins/inputs/reader/reader.go | 2 +- 4 files changed, 6 insertions(+), 28 deletions(-) delete mode 100644 plugins/inputs/reader/dev/docker-compse.yml diff --git a/internal/config/config.go b/internal/config/config.go index 845b0b92c6f9b..1a98c61c5c2fd 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -1393,14 +1393,6 @@ func buildParser(name string, tbl *ast.Table) (parsers.Parser, error) { c.MetricName = name - if node, ok := tbl.Fields["name_override"]; ok { - if kv, ok := node.(*ast.KeyValue); ok { - if str, ok := kv.Value.(*ast.String); ok { - c.MetricName = str.Value - } - } - } - delete(tbl.Fields, "data_format") delete(tbl.Fields, "separator") delete(tbl.Fields, "templates") @@ -1419,7 +1411,6 @@ func buildParser(name string, tbl *ast.Table) (parsers.Parser, error) { delete(tbl.Fields, "custom_patterns") delete(tbl.Fields, "custom_pattern_files") delete(tbl.Fields, "timezone") - delete(tbl.Fields, "name_override") return parsers.NewParser(c) } diff --git a/plugins/inputs/reader/dev/docker-compse.yml b/plugins/inputs/reader/dev/docker-compse.yml deleted file mode 100644 index a5abde8ac63d3..0000000000000 --- a/plugins/inputs/reader/dev/docker-compse.yml +++ /dev/null @@ -1,13 +0,0 @@ -version: '3' - -# services: - telegraf: - image: glinton/scratch - volumes: - - ./telegraf.conf:/telegraf.conf - - ../../../../telegraf:/telegraf - - ./test.log:/var/log/test.log - entrypoint: - - /telegraf - - --config - - /telegraf.conf \ No newline at end of file diff --git a/plugins/inputs/reader/dev/telegraf.conf b/plugins/inputs/reader/dev/telegraf.conf index 9718456829ffc..76f57f0ad8e82 100644 --- a/plugins/inputs/reader/dev/telegraf.conf +++ b/plugins/inputs/reader/dev/telegraf.conf @@ -69,22 +69,22 @@ # ## /var/log/**.log -> recursively find all .log files in /var/log # ## /var/log/*/*.log -> find all .log files with a parent dir in /var/log # ## /var/log/apache.log -> only tail the apache log file -files = ["/Users/maxu/go/src/github.com/influxdata/telegraf/plugins/inputs/reader/testfiles/grok_a.log"] +files = ["/var/log/test.log"] # # ## The dataformat to be read from files # ## Each data format has its own unique set of configuration options, read # ## more about them here: # ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md -data_format = "grok" +data_format = "json" # -patterns = ["%{COMMON_LOG_FORMAT}"] +#patterns = ["%{TEST_LOG_B}","%{TEST_LOG_A}"] # # ## Name of the outputted measurement name. -measurement = "grok_reader" +#name_override = "grok_reader" # # ## Full path(s) to custom pattern files. -# custom_pattern_files = [] +#custom_pattern_files = ["/Users/maxu/go/src/github.com/influxdata/telegraf/plugins/inputs/logparser/grok/testdata/test-patterns"] # # ## Custom patterns can also be defined here. Put one pattern per line. # custom_patterns = ''' diff --git a/plugins/inputs/reader/reader.go b/plugins/inputs/reader/reader.go index eb2ce48049c7b..e21295611d1f8 100644 --- a/plugins/inputs/reader/reader.go +++ b/plugins/inputs/reader/reader.go @@ -18,7 +18,7 @@ type Reader struct { Filenames []string } -const sampleConfig = `## Files to parse. +const sampleConfig = `## Files to parse each interval. ## These accept standard unix glob matching rules, but with the addition of ## ** as a "super asterisk". ie: ## /var/log/**.log -> recursively find all .log files in /var/log From bf7220d2ceac69f2a5b66f273d30497ae9781dfb Mon Sep 17 00:00:00 2001 From: Max U Date: Tue, 26 Jun 2018 11:53:40 -0700 Subject: [PATCH 14/51] add test file to docker spin up --- plugins/inputs/reader/dev/docker-compose.yml | 13 +++ plugins/inputs/reader/dev/json_a.log | 14 +++ telegraf.conf | 104 +++++++++++++++++++ 3 files changed, 131 insertions(+) create mode 100644 plugins/inputs/reader/dev/docker-compose.yml create mode 100644 plugins/inputs/reader/dev/json_a.log create mode 100644 telegraf.conf diff --git a/plugins/inputs/reader/dev/docker-compose.yml b/plugins/inputs/reader/dev/docker-compose.yml new file mode 100644 index 0000000000000..abeaf931542ba --- /dev/null +++ b/plugins/inputs/reader/dev/docker-compose.yml @@ -0,0 +1,13 @@ +version: '3' + +services: + telegraf: + image: glinton/scratch + volumes: + - ./telegraf.conf:/telegraf.conf + - ../../../../telegraf:/telegraf + - ./json_a.log:/var/log/test.log + entrypoint: + - /telegraf + - --config + - /telegraf.conf \ No newline at end of file diff --git a/plugins/inputs/reader/dev/json_a.log b/plugins/inputs/reader/dev/json_a.log new file mode 100644 index 0000000000000..739fd65d89ca1 --- /dev/null +++ b/plugins/inputs/reader/dev/json_a.log @@ -0,0 +1,14 @@ +{ + "parent": { + "child": 3.0, + "ignored_child": "hi" + }, + "ignored_null": null, + "integer": 4, + "list": [3, 4], + "ignored_parent": { + "another_ignored_null": null, + "ignored_string": "hello, world!" + }, + "another_list": [4] + } \ No newline at end of file diff --git a/telegraf.conf b/telegraf.conf new file mode 100644 index 0000000000000..1a43b5a187515 --- /dev/null +++ b/telegraf.conf @@ -0,0 +1,104 @@ + +# Global tags can be specified here in key="value" format. +[global_tags] + # dc = "us-east-1" # will tag all metrics with dc=us-east-1 + # rack = "1a" + ## Environment variables can be used as tags, and throughout the config file + # user = "$USER" + + +# Configuration for telegraf agent +[agent] + ## Default data collection interval for all inputs + interval = "10s" + ## Rounds collection interval to 'interval' + ## ie, if interval="10s" then always collect on :00, :10, :20, etc. + round_interval = true + + ## Telegraf will send metrics to outputs in batches of at most + ## metric_batch_size metrics. + ## This controls the size of writes that Telegraf sends to output plugins. + metric_batch_size = 1000 + + ## For failed writes, telegraf will cache metric_buffer_limit metrics for each + ## output, and will flush this buffer on a successful write. Oldest metrics + ## are dropped first when this buffer fills. + ## This buffer only fills when writes fail to output plugin(s). + metric_buffer_limit = 10000 + + ## Collection jitter is used to jitter the collection by a random amount. + ## Each plugin will sleep for a random time within jitter before collecting. + ## This can be used to avoid many plugins querying things like sysfs at the + ## same time, which can have a measurable effect on the system. + collection_jitter = "0s" + + ## Default flushing interval for all outputs. You shouldn't set this below + ## interval. Maximum flush_interval will be flush_interval + flush_jitter + flush_interval = "10s" + ## Jitter the flush interval by a random amount. This is primarily to avoid + ## large write spikes for users running a large number of telegraf instances. + ## ie, a jitter of 5s and interval 10s means flushes will happen every 10-15s + flush_jitter = "0s" + + ## By default or when set to "0s", precision will be set to the same + ## timestamp order as the collection interval, with the maximum being 1s. + ## ie, when interval = "10s", precision will be "1s" + ## when interval = "250ms", precision will be "1ms" + ## Precision will NOT be used for service inputs. It is up to each individual + ## service input to set the timestamp at the appropriate precision. + ## Valid time units are "ns", "us" (or "µs"), "ms", "s". + precision = "" + + ## Logging configuration: + ## Run telegraf with debug log messages. + debug = false + ## Run telegraf in quiet mode (error log messages only). + quiet = false + ## Specify the log file name. The empty string means to log to stderr. + logfile = "" + + ## Override default hostname, if empty use os.Hostname() + hostname = "" + ## If set to true, do no set the "host" tag in the telegraf agent. + omit_hostname = false + +# # reload and gather from file[s] on telegraf's interval +[[inputs.reader]] +# ## These accept standard unix glob matching rules, but with the addition of +# ## ** as a "super asterisk". ie: +# ## /var/log/**.log -> recursively find all .log files in /var/log +# ## /var/log/*/*.log -> find all .log files with a parent dir in /var/log +# ## /var/log/apache.log -> only tail the apache log file +files = ["/Users/maxu/go/src/github.com/influxdata/telegraf/plugins/inputs/logparser/grok/testdata/**.log"] +# +# ## The dataformat to be read from files +# ## Each data format has its own unique set of configuration options, read +# ## more about them here: +# ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md +data_format = "grok" +# + +patterns = ["%{TEST_LOG_B}","%{TEST_LOG_A}"] +# +# ## Name of the outputted measurement name. +name_override = "grok_reader" +# +# ## Full path(s) to custom pattern files. +custom_pattern_files = ["/Users/maxu/go/src/github.com/influxdata/telegraf/plugins/inputs/logparser/grok/testdata/test-patterns"] +# +# ## Custom patterns can also be defined here. Put one pattern per line. +# custom_patterns = ''' +# ''' +# +# ## Timezone allows you to provide an override for timestamps that +# ## don't already include an offset +# ## e.g. 04/06/2016 12:41:45 data one two 5.43µs +# ## +# ## Default: "" which renders UTC +# ## Options are as follows: +# ## 1. Local -- interpret based on machine localtime +# ## 2. "Canada/Eastern" -- Unix TZ values like those found in https://en.wikipedia.org/wiki/List_of_tz_database_time_zones +# ## 3. UTC -- or blank/unspecified, will return timestamp in UTC +# timezone = "Canada/Eastern" + + From a931eb1c900d71fdff44b7c440d3a8e248c4e7b4 Mon Sep 17 00:00:00 2001 From: Max U Date: Tue, 26 Jun 2018 12:06:35 -0700 Subject: [PATCH 15/51] update DATA_FORMATS_INPUT.MD to include grok --- docs/DATA_FORMATS_INPUT.md | 35 ++++++++++++++++++++++++- plugins/inputs/reader/dev/telegraf.conf | 2 +- 2 files changed, 35 insertions(+), 2 deletions(-) diff --git a/docs/DATA_FORMATS_INPUT.md b/docs/DATA_FORMATS_INPUT.md index c1192e72b371b..fa30b54c5bffd 100644 --- a/docs/DATA_FORMATS_INPUT.md +++ b/docs/DATA_FORMATS_INPUT.md @@ -9,6 +9,7 @@ Telegraf is able to parse the following input data formats into metrics: 1. [Nagios](https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md#nagios) (exec input only) 1. [Collectd](https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md#collectd) 1. [Dropwizard](https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md#dropwizard) +1. [Grok](https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md#grok) Telegraf metrics, like InfluxDB [points](https://docs.influxdata.com/influxdb/v0.10/write_protocols/line/), @@ -651,5 +652,37 @@ For more information about the dropwizard json format see # [inputs.exec.dropwizard_tag_paths] # tag1 = "tags.tag1" # tag2 = "tags.tag2" +``` -``` \ No newline at end of file +#### Grok +Parse logstash-style "grok" patterns: +```toml + [inputs.reader] + ## This is a list of patterns to check the given log file(s) for. + ## Note that adding patterns here increases processing time. The most + ## efficient configuration is to have one pattern per logparser. + ## Other common built-in patterns are: + ## %{COMMON_LOG_FORMAT} (plain apache & nginx access logs) + ## %{COMBINED_LOG_FORMAT} (access logs + referrer & agent) + patterns = ["%{COMBINED_LOG_FORMAT}"] + + ## Name of the outputted measurement name. + name_override = "apache_access_log" + + ## Full path(s) to custom pattern files. + custom_pattern_files = [] + + ## Custom patterns can also be defined here. Put one pattern per line. + custom_patterns = ''' + + ## Timezone allows you to provide an override for timestamps that + ## don't already include an offset + ## e.g. 04/06/2016 12:41:45 data one two 5.43µs + ## + ## Default: "" which renders UTC + ## Options are as follows: + ## 1. Local -- interpret based on machine localtime + ## 2. "Canada/Eastern" -- Unix TZ values like those found in https://en.wikipedia.org/wiki/List_of_tz_database_time_zones + ## 3. UTC -- or blank/unspecified, will return timestamp in UTC + timezone = "Canada/Eastern" + ``` \ No newline at end of file diff --git a/plugins/inputs/reader/dev/telegraf.conf b/plugins/inputs/reader/dev/telegraf.conf index 76f57f0ad8e82..e8da72c1325cd 100644 --- a/plugins/inputs/reader/dev/telegraf.conf +++ b/plugins/inputs/reader/dev/telegraf.conf @@ -10,7 +10,7 @@ # Configuration for telegraf agent [agent] ## Default data collection interval for all inputs - interval = "10s" + interval = "15s" ## Rounds collection interval to 'interval' ## ie, if interval="10s" then always collect on :00, :10, :20, etc. round_interval = true From e450b266ec6d6b9a99026caa62d4aea6a15baac6 Mon Sep 17 00:00:00 2001 From: Max U Date: Tue, 26 Jun 2018 13:10:46 -0700 Subject: [PATCH 16/51] remove comments --- plugins/inputs/reader/dev/telegraf.conf | 100 +----------------------- plugins/inputs/reader/reader_test.go | 6 -- 2 files changed, 3 insertions(+), 103 deletions(-) diff --git a/plugins/inputs/reader/dev/telegraf.conf b/plugins/inputs/reader/dev/telegraf.conf index e8da72c1325cd..fd89ae4f3632a 100644 --- a/plugins/inputs/reader/dev/telegraf.conf +++ b/plugins/inputs/reader/dev/telegraf.conf @@ -1,105 +1,11 @@ -# Global tags can be specified here in key="value" format. -[global_tags] - # dc = "us-east-1" # will tag all metrics with dc=us-east-1 - # rack = "1a" - ## Environment variables can be used as tags, and throughout the config file - # user = "$USER" - - -# Configuration for telegraf agent -[agent] - ## Default data collection interval for all inputs - interval = "15s" - ## Rounds collection interval to 'interval' - ## ie, if interval="10s" then always collect on :00, :10, :20, etc. - round_interval = true - - ## Telegraf will send metrics to outputs in batches of at most - ## metric_batch_size metrics. - ## This controls the size of writes that Telegraf sends to output plugins. - metric_batch_size = 1000 - - ## For failed writes, telegraf will cache metric_buffer_limit metrics for each - ## output, and will flush this buffer on a successful write. Oldest metrics - ## are dropped first when this buffer fills. - ## This buffer only fills when writes fail to output plugin(s). - metric_buffer_limit = 10000 - - ## Collection jitter is used to jitter the collection by a random amount. - ## Each plugin will sleep for a random time within jitter before collecting. - ## This can be used to avoid many plugins querying things like sysfs at the - ## same time, which can have a measurable effect on the system. - collection_jitter = "0s" - - ## Default flushing interval for all outputs. You shouldn't set this below - ## interval. Maximum flush_interval will be flush_interval + flush_jitter - flush_interval = "10s" - ## Jitter the flush interval by a random amount. This is primarily to avoid - ## large write spikes for users running a large number of telegraf instances. - ## ie, a jitter of 5s and interval 10s means flushes will happen every 10-15s - flush_jitter = "0s" - - ## By default or when set to "0s", precision will be set to the same - ## timestamp order as the collection interval, with the maximum being 1s. - ## ie, when interval = "10s", precision will be "1s" - ## when interval = "250ms", precision will be "1ms" - ## Precision will NOT be used for service inputs. It is up to each individual - ## service input to set the timestamp at the appropriate precision. - ## Valid time units are "ns", "us" (or "µs"), "ms", "s". - precision = "" - - ## Logging configuration: - ## Run telegraf with debug log messages. - debug = false - ## Run telegraf in quiet mode (error log messages only). - quiet = false - ## Specify the log file name. The empty string means to log to stderr. - logfile = "" - - ## Override default hostname, if empty use os.Hostname() - hostname = "" - ## If set to true, do no set the "host" tag in the telegraf agent. - omit_hostname = false - -# # reload and gather from file[s] on telegraf's interval [[inputs.reader]] -# ## These accept standard unix glob matching rules, but with the addition of -# ## ** as a "super asterisk". ie: -# ## /var/log/**.log -> recursively find all .log files in /var/log -# ## /var/log/*/*.log -> find all .log files with a parent dir in /var/log -# ## /var/log/apache.log -> only tail the apache log file + files = ["/var/log/test.log"] -# -# ## The dataformat to be read from files -# ## Each data format has its own unique set of configuration options, read -# ## more about them here: -# ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md + data_format = "json" -# -#patterns = ["%{TEST_LOG_B}","%{TEST_LOG_A}"] -# -# ## Name of the outputted measurement name. -#name_override = "grok_reader" -# -# ## Full path(s) to custom pattern files. -#custom_pattern_files = ["/Users/maxu/go/src/github.com/influxdata/telegraf/plugins/inputs/logparser/grok/testdata/test-patterns"] -# -# ## Custom patterns can also be defined here. Put one pattern per line. -# custom_patterns = ''' -# ''' -# -# ## Timezone allows you to provide an override for timestamps that -# ## don't already include an offset -# ## e.g. 04/06/2016 12:41:45 data one two 5.43µs -# ## -# ## Default: "" which renders UTC -# ## Options are as follows: -# ## 1. Local -- interpret based on machine localtime -# ## 2. "Canada/Eastern" -- Unix TZ values like those found in https://en.wikipedia.org/wiki/List_of_tz_database_time_zones -# ## 3. UTC -- or blank/unspecified, will return timestamp in UTC -# timezone = "Canada/Eastern" +name_override = "json_reader" [[outputs.file]] diff --git a/plugins/inputs/reader/reader_test.go b/plugins/inputs/reader/reader_test.go index ca076350b9f35..8295b294bd414 100644 --- a/plugins/inputs/reader/reader_test.go +++ b/plugins/inputs/reader/reader_test.go @@ -1,7 +1,6 @@ package reader import ( - "log" "runtime" "strings" "testing" @@ -36,7 +35,6 @@ func TestJSONParserCompile(t *testing.T) { assert.NoError(t, err) r.Gather(&acc) - log.Printf("acc: %v", acc.Metrics[0].Tags) assert.Equal(t, map[string]string{"parent_ignored_child": "hi"}, acc.Metrics[0].Tags) assert.Equal(t, 5, len(acc.Metrics[0].Fields)) } @@ -57,11 +55,7 @@ func TestGrokParser(t *testing.T) { r.parser = nParser assert.NoError(t, err) - log.Printf("path: %v", r.Filepaths[0]) err = r.Gather(&acc) - log.Printf("err: %v", err) - log.Printf("metric[0]_tags: %v, metric[0]_fields: %v", acc.Metrics[0].Tags, acc.Metrics[0].Fields) - log.Printf("metric[1]_tags: %v, metric[1]_fields: %v", acc.Metrics[1].Tags, acc.Metrics[1].Fields) assert.Equal(t, 2, len(acc.Metrics)) } From 001658af30b420120488f406fd9b215e99bc38ce Mon Sep 17 00:00:00 2001 From: Max U Date: Tue, 26 Jun 2018 13:18:43 -0700 Subject: [PATCH 17/51] condense telegraf.conf --- plugins/inputs/reader/dev/telegraf.conf | 10 +++------- plugins/inputs/reader/reader_test.go | 1 - 2 files changed, 3 insertions(+), 8 deletions(-) diff --git a/plugins/inputs/reader/dev/telegraf.conf b/plugins/inputs/reader/dev/telegraf.conf index fd89ae4f3632a..caeaae4fd3a25 100644 --- a/plugins/inputs/reader/dev/telegraf.conf +++ b/plugins/inputs/reader/dev/telegraf.conf @@ -1,12 +1,8 @@ [[inputs.reader]] - -files = ["/var/log/test.log"] - -data_format = "json" - -name_override = "json_reader" - + files = ["/var/log/test.log"] + data_format = "json" + name_override = "json_reader" [[outputs.file]] files = ["stdout"] \ No newline at end of file diff --git a/plugins/inputs/reader/reader_test.go b/plugins/inputs/reader/reader_test.go index 8295b294bd414..c46eb027279f4 100644 --- a/plugins/inputs/reader/reader_test.go +++ b/plugins/inputs/reader/reader_test.go @@ -17,7 +17,6 @@ func TestRefreshFilePaths(t *testing.T) { } r.refreshFilePaths() - //log.Printf("filenames: %v", filenames) assert.Equal(t, len(r.Filenames), 2) } func TestJSONParserCompile(t *testing.T) { From 7fa27f400d63f04b90961307d52dd7e7ca39b89c Mon Sep 17 00:00:00 2001 From: Max U Date: Tue, 26 Jun 2018 13:19:53 -0700 Subject: [PATCH 18/51] more condensing --- plugins/inputs/reader/dev/telegraf.conf | 1 - 1 file changed, 1 deletion(-) diff --git a/plugins/inputs/reader/dev/telegraf.conf b/plugins/inputs/reader/dev/telegraf.conf index caeaae4fd3a25..abaf90b830b75 100644 --- a/plugins/inputs/reader/dev/telegraf.conf +++ b/plugins/inputs/reader/dev/telegraf.conf @@ -1,4 +1,3 @@ - [[inputs.reader]] files = ["/var/log/test.log"] data_format = "json" From 1be2a8eeaf9801fd7674ce1bf11347adb1cd727b Mon Sep 17 00:00:00 2001 From: Greg Linton Date: Tue, 26 Jun 2018 16:20:57 -0600 Subject: [PATCH 19/51] Formatting and revert Makefile --- Makefile | 14 +-- plugins/inputs/reader/dev/docker-compose.yml | 2 +- plugins/inputs/reader/dev/json_a.log | 26 ++--- plugins/inputs/reader/dev/telegraf.conf | 2 +- telegraf.conf | 104 ------------------- 5 files changed, 16 insertions(+), 132 deletions(-) delete mode 100644 telegraf.conf diff --git a/Makefile b/Makefile index 99d7a3a396ce0..8650df2677ecf 100644 --- a/Makefile +++ b/Makefile @@ -92,16 +92,4 @@ docker-image: plugins/parsers/influx/machine.go: plugins/parsers/influx/machine.go.rl ragel -Z -G2 $^ -o $@ -static: - @echo "Building static linux binary..." - @CGO_ENABLED=0 \ - GOOS=linux \ - GOARCH=amd64 \ - go build -ldflags "$(LDFLAGS)" ./cmd/telegraf - -plugin-%: - @echo "Starting dev environment for $${$(@)} input plugin..." - @docker-compose -f plugins/inputs/$${$(@)}/dev/docker-compose.yml up - -.PHONY: deps telegraf install test test-windows lint vet test-all package clean docker-image fmtcheck uint64 static - +.PHONY: deps telegraf install test test-windows lint vet test-all package clean docker-image fmtcheck uint64 diff --git a/plugins/inputs/reader/dev/docker-compose.yml b/plugins/inputs/reader/dev/docker-compose.yml index abeaf931542ba..3c16fca909ebd 100644 --- a/plugins/inputs/reader/dev/docker-compose.yml +++ b/plugins/inputs/reader/dev/docker-compose.yml @@ -10,4 +10,4 @@ services: entrypoint: - /telegraf - --config - - /telegraf.conf \ No newline at end of file + - /telegraf.conf diff --git a/plugins/inputs/reader/dev/json_a.log b/plugins/inputs/reader/dev/json_a.log index 739fd65d89ca1..609c40a09d600 100644 --- a/plugins/inputs/reader/dev/json_a.log +++ b/plugins/inputs/reader/dev/json_a.log @@ -1,14 +1,14 @@ { - "parent": { - "child": 3.0, - "ignored_child": "hi" - }, - "ignored_null": null, - "integer": 4, - "list": [3, 4], - "ignored_parent": { - "another_ignored_null": null, - "ignored_string": "hello, world!" - }, - "another_list": [4] - } \ No newline at end of file + "parent": { + "child": 3.0, + "ignored_child": "hi" + }, + "ignored_null": null, + "integer": 4, + "list": [3, 4], + "ignored_parent": { + "another_ignored_null": null, + "ignored_string": "hello, world!" + }, + "another_list": [4] +} diff --git a/plugins/inputs/reader/dev/telegraf.conf b/plugins/inputs/reader/dev/telegraf.conf index abaf90b830b75..4d50bd5e036a3 100644 --- a/plugins/inputs/reader/dev/telegraf.conf +++ b/plugins/inputs/reader/dev/telegraf.conf @@ -4,4 +4,4 @@ name_override = "json_reader" [[outputs.file]] - files = ["stdout"] \ No newline at end of file + files = ["stdout"] diff --git a/telegraf.conf b/telegraf.conf deleted file mode 100644 index 1a43b5a187515..0000000000000 --- a/telegraf.conf +++ /dev/null @@ -1,104 +0,0 @@ - -# Global tags can be specified here in key="value" format. -[global_tags] - # dc = "us-east-1" # will tag all metrics with dc=us-east-1 - # rack = "1a" - ## Environment variables can be used as tags, and throughout the config file - # user = "$USER" - - -# Configuration for telegraf agent -[agent] - ## Default data collection interval for all inputs - interval = "10s" - ## Rounds collection interval to 'interval' - ## ie, if interval="10s" then always collect on :00, :10, :20, etc. - round_interval = true - - ## Telegraf will send metrics to outputs in batches of at most - ## metric_batch_size metrics. - ## This controls the size of writes that Telegraf sends to output plugins. - metric_batch_size = 1000 - - ## For failed writes, telegraf will cache metric_buffer_limit metrics for each - ## output, and will flush this buffer on a successful write. Oldest metrics - ## are dropped first when this buffer fills. - ## This buffer only fills when writes fail to output plugin(s). - metric_buffer_limit = 10000 - - ## Collection jitter is used to jitter the collection by a random amount. - ## Each plugin will sleep for a random time within jitter before collecting. - ## This can be used to avoid many plugins querying things like sysfs at the - ## same time, which can have a measurable effect on the system. - collection_jitter = "0s" - - ## Default flushing interval for all outputs. You shouldn't set this below - ## interval. Maximum flush_interval will be flush_interval + flush_jitter - flush_interval = "10s" - ## Jitter the flush interval by a random amount. This is primarily to avoid - ## large write spikes for users running a large number of telegraf instances. - ## ie, a jitter of 5s and interval 10s means flushes will happen every 10-15s - flush_jitter = "0s" - - ## By default or when set to "0s", precision will be set to the same - ## timestamp order as the collection interval, with the maximum being 1s. - ## ie, when interval = "10s", precision will be "1s" - ## when interval = "250ms", precision will be "1ms" - ## Precision will NOT be used for service inputs. It is up to each individual - ## service input to set the timestamp at the appropriate precision. - ## Valid time units are "ns", "us" (or "µs"), "ms", "s". - precision = "" - - ## Logging configuration: - ## Run telegraf with debug log messages. - debug = false - ## Run telegraf in quiet mode (error log messages only). - quiet = false - ## Specify the log file name. The empty string means to log to stderr. - logfile = "" - - ## Override default hostname, if empty use os.Hostname() - hostname = "" - ## If set to true, do no set the "host" tag in the telegraf agent. - omit_hostname = false - -# # reload and gather from file[s] on telegraf's interval -[[inputs.reader]] -# ## These accept standard unix glob matching rules, but with the addition of -# ## ** as a "super asterisk". ie: -# ## /var/log/**.log -> recursively find all .log files in /var/log -# ## /var/log/*/*.log -> find all .log files with a parent dir in /var/log -# ## /var/log/apache.log -> only tail the apache log file -files = ["/Users/maxu/go/src/github.com/influxdata/telegraf/plugins/inputs/logparser/grok/testdata/**.log"] -# -# ## The dataformat to be read from files -# ## Each data format has its own unique set of configuration options, read -# ## more about them here: -# ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md -data_format = "grok" -# - -patterns = ["%{TEST_LOG_B}","%{TEST_LOG_A}"] -# -# ## Name of the outputted measurement name. -name_override = "grok_reader" -# -# ## Full path(s) to custom pattern files. -custom_pattern_files = ["/Users/maxu/go/src/github.com/influxdata/telegraf/plugins/inputs/logparser/grok/testdata/test-patterns"] -# -# ## Custom patterns can also be defined here. Put one pattern per line. -# custom_patterns = ''' -# ''' -# -# ## Timezone allows you to provide an override for timestamps that -# ## don't already include an offset -# ## e.g. 04/06/2016 12:41:45 data one two 5.43µs -# ## -# ## Default: "" which renders UTC -# ## Options are as follows: -# ## 1. Local -- interpret based on machine localtime -# ## 2. "Canada/Eastern" -- Unix TZ values like those found in https://en.wikipedia.org/wiki/List_of_tz_database_time_zones -# ## 3. UTC -- or blank/unspecified, will return timestamp in UTC -# timezone = "Canada/Eastern" - - From aa750ec2b88e31de729ab5383f509f24bc93435a Mon Sep 17 00:00:00 2001 From: Max U Date: Wed, 27 Jun 2018 10:13:52 -0700 Subject: [PATCH 20/51] add reader README.md --- docs/DATA_FORMATS_INPUT.md | 4 +++- plugins/inputs/reader/README.md | 21 +++++++++++++++++++++ 2 files changed, 24 insertions(+), 1 deletion(-) create mode 100644 plugins/inputs/reader/README.md diff --git a/docs/DATA_FORMATS_INPUT.md b/docs/DATA_FORMATS_INPUT.md index fa30b54c5bffd..96dd111b7e277 100644 --- a/docs/DATA_FORMATS_INPUT.md +++ b/docs/DATA_FORMATS_INPUT.md @@ -655,7 +655,9 @@ For more information about the dropwizard json format see ``` #### Grok -Parse logstash-style "grok" patterns: +Parse logstash-style "grok" patterns. Patterns can be added to patterns, or custom patterns read from custom_pattern_files. + +#### Grok Configuration: ```toml [inputs.reader] ## This is a list of patterns to check the given log file(s) for. diff --git a/plugins/inputs/reader/README.md b/plugins/inputs/reader/README.md new file mode 100644 index 0000000000000..190bdb1f51798 --- /dev/null +++ b/plugins/inputs/reader/README.md @@ -0,0 +1,21 @@ +# Reader Input Plugin + +The Reader Plugin updates a list of files every interval and parses the data inside. +This plugin can parse any "data_format" formats. + +### Configuration: +```toml +## Files to parse each interval. +## These accept standard unix glob matching rules, but with the addition of +## ** as a "super asterisk". ie: +## /var/log/**.log -> recursively find all .log files in /var/log +## /var/log/*/*.log -> find all .log files with a parent dir in /var/log +## /var/log/apache.log -> only tail the apache log file +files = ["/var/log/apache/access.log"] + +## The dataformat to be read from files +## Each data format has its own unique set of configuration options, read +## more about them here: +## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md +data_format = "" +``` \ No newline at end of file From 892c95aa6ecd9bcf85b4a6da5c45181f12fbda4c Mon Sep 17 00:00:00 2001 From: Max U Date: Wed, 27 Jun 2018 13:30:59 -0700 Subject: [PATCH 21/51] update readmes --- docs/DATA_FORMATS_INPUT.md | 67 +++++++++++++++++++++++++++++++++ plugins/inputs/reader/README.md | 4 +- 2 files changed, 70 insertions(+), 1 deletion(-) diff --git a/docs/DATA_FORMATS_INPUT.md b/docs/DATA_FORMATS_INPUT.md index 96dd111b7e277..b79a182beac82 100644 --- a/docs/DATA_FORMATS_INPUT.md +++ b/docs/DATA_FORMATS_INPUT.md @@ -657,6 +657,73 @@ For more information about the dropwizard json format see #### Grok Parse logstash-style "grok" patterns. Patterns can be added to patterns, or custom patterns read from custom_pattern_files. +# View logstash grok pattern docs here: +# https://www.elastic.co/guide/en/logstash/current/plugins-filters-grok.html +# All default logstash patterns are supported, these can be viewed here: +# https://github.com/logstash-plugins/logstash-patterns-core/blob/master/patterns/grok-patterns + +# Available modifiers: +# string (default if nothing is specified) +# int +# float +# duration (ie, 5.23ms gets converted to int nanoseconds) +# tag (converts the field into a tag) +# drop (drops the field completely) +# Timestamp modifiers: +# ts-ansic ("Mon Jan _2 15:04:05 2006") +# ts-unix ("Mon Jan _2 15:04:05 MST 2006") +# ts-ruby ("Mon Jan 02 15:04:05 -0700 2006") +# ts-rfc822 ("02 Jan 06 15:04 MST") +# ts-rfc822z ("02 Jan 06 15:04 -0700") +# ts-rfc850 ("Monday, 02-Jan-06 15:04:05 MST") +# ts-rfc1123 ("Mon, 02 Jan 2006 15:04:05 MST") +# ts-rfc1123z ("Mon, 02 Jan 2006 15:04:05 -0700") +# ts-rfc3339 ("2006-01-02T15:04:05Z07:00") +# ts-rfc3339nano ("2006-01-02T15:04:05.999999999Z07:00") +# ts-httpd ("02/Jan/2006:15:04:05 -0700") +# ts-epoch (seconds since unix epoch) +# ts-epochnano (nanoseconds since unix epoch) +# ts-"CUSTOM" +# CUSTOM time layouts must be within quotes and be the representation of the +# "reference time", which is Mon Jan 2 15:04:05 -0700 MST 2006 +# See https://golang.org/pkg/time/#Parse for more details. + +# Example log file pattern, example log looks like this: +# [04/Jun/2016:12:41:45 +0100] 1.25 200 192.168.1.1 5.432µs +# Breakdown of the DURATION pattern below: +# NUMBER is a builtin logstash grok pattern matching float & int numbers. +# [nuµm]? is a regex specifying 0 or 1 of the characters within brackets. +# s is also regex, this pattern must end in "s". +# so DURATION will match something like '5.324ms' or '6.1µs' or '10s' +DURATION %{NUMBER}[nuµm]?s +RESPONSE_CODE %{NUMBER:response_code:tag} +RESPONSE_TIME %{DURATION:response_time_ns:duration} +EXAMPLE_LOG \[%{HTTPDATE:ts:ts-httpd}\] %{NUMBER:myfloat:float} %{RESPONSE_CODE} %{IPORHOST:clientip} %{RESPONSE_TIME} + +# Wider-ranging username matching vs. logstash built-in %{USER} +NGUSERNAME [a-zA-Z0-9\.\@\-\+_%]+ +NGUSER %{NGUSERNAME} +# Wider-ranging client IP matching +CLIENT (?:%{IPORHOST}|%{HOSTPORT}|::1) + +## +## COMMON LOG PATTERNS +## + +# apache & nginx logs, this is also known as the "common log format" +# see https://en.wikipedia.org/wiki/Common_Log_Format +COMMON_LOG_FORMAT %{CLIENT:client_ip} %{NOTSPACE:ident} %{NOTSPACE:auth} \[%{HTTPDATE:ts:ts-httpd}\] "(?:%{WORD:verb:tag} %{NOTSPACE:request}(?: HTTP/%{NUMBER:http_version:float})?|%{DATA})" %{NUMBER:resp_code:tag} (?:%{NUMBER:resp_bytes:int}|-) + +# Combined log format is the same as the common log format but with the addition +# of two quoted strings at the end for "referrer" and "agent" +# See Examples at http://httpd.apache.org/docs/current/mod/mod_log_config.html +COMBINED_LOG_FORMAT %{COMMON_LOG_FORMAT} %{QS:referrer} %{QS:agent} + +# HTTPD log formats +HTTPD20_ERRORLOG \[%{HTTPDERROR_DATE:timestamp}\] \[%{LOGLEVEL:loglevel:tag}\] (?:\[client %{IPORHOST:clientip}\] ){0,1}%{GREEDYDATA:errormsg} +HTTPD24_ERRORLOG \[%{HTTPDERROR_DATE:timestamp}\] \[%{WORD:module}:%{LOGLEVEL:loglevel:tag}\] \[pid %{POSINT:pid:int}:tid %{NUMBER:tid:int}\]( \(%{POSINT:proxy_errorcode:int}\)%{DATA:proxy_errormessage}:)?( \[client %{IPORHOST:client}:%{POSINT:clientport}\])? %{DATA:errorcode}: %{GREEDYDATA:message} +HTTPD_ERRORLOG %{HTTPD20_ERRORLOG}|%{HTTPD24_ERRORLOG} + #### Grok Configuration: ```toml [inputs.reader] diff --git a/plugins/inputs/reader/README.md b/plugins/inputs/reader/README.md index 190bdb1f51798..39adb253d1782 100644 --- a/plugins/inputs/reader/README.md +++ b/plugins/inputs/reader/README.md @@ -1,10 +1,12 @@ # Reader Input Plugin -The Reader Plugin updates a list of files every interval and parses the data inside. +The Reader Plugin updates a list of files every interval and parses the data inside. +Files will always be read from the beginning. This plugin can parse any "data_format" formats. ### Configuration: ```toml +[[inputs.reader]] ## Files to parse each interval. ## These accept standard unix glob matching rules, but with the addition of ## ** as a "super asterisk". ie: From 04f09d65bf7564c44b2fd4500a3314e3d69c564e Mon Sep 17 00:00:00 2001 From: Max U Date: Thu, 28 Jun 2018 14:45:14 -0700 Subject: [PATCH 22/51] grok parser func unexported --- plugins/parsers/registry.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/plugins/parsers/registry.go b/plugins/parsers/registry.go index b657cf83f1004..95b07a52c755b 100644 --- a/plugins/parsers/registry.go +++ b/plugins/parsers/registry.go @@ -129,7 +129,7 @@ func NewParser(config *Config) (Parser, error) { config.Separator, config.Templates) case "grok": - parser, err = NewGrokParser( + parser, err = newGrokParser( config.MetricName, config.Patterns, config.NamedPatterns, @@ -142,7 +142,7 @@ func NewParser(config *Config) (Parser, error) { return parser, err } -func NewGrokParser(metricName string, +func newGrokParser(metricName string, patterns []string, nPatterns []string, cPatterns string, From 8063b38b2d3f2f1e572e81428c7183480b2ac3a5 Mon Sep 17 00:00:00 2001 From: Max U Date: Tue, 3 Jul 2018 11:29:11 -0700 Subject: [PATCH 23/51] address some of Daniel's comments --- docs/DATA_FORMATS_INPUT.md | 58 ++++++++++++++--------------- internal/config/config.go | 20 +++++----- plugins/inputs/reader/reader.go | 21 ++++------- plugins/parsers/grok/parser.go | 8 +--- plugins/parsers/grok/parser_test.go | 4 +- plugins/parsers/registry.go | 2 +- 6 files changed, 51 insertions(+), 62 deletions(-) diff --git a/docs/DATA_FORMATS_INPUT.md b/docs/DATA_FORMATS_INPUT.md index b79a182beac82..66b784305200b 100644 --- a/docs/DATA_FORMATS_INPUT.md +++ b/docs/DATA_FORMATS_INPUT.md @@ -726,32 +726,32 @@ HTTPD_ERRORLOG %{HTTPD20_ERRORLOG}|%{HTTPD24_ERRORLOG} #### Grok Configuration: ```toml - [inputs.reader] - ## This is a list of patterns to check the given log file(s) for. - ## Note that adding patterns here increases processing time. The most - ## efficient configuration is to have one pattern per logparser. - ## Other common built-in patterns are: - ## %{COMMON_LOG_FORMAT} (plain apache & nginx access logs) - ## %{COMBINED_LOG_FORMAT} (access logs + referrer & agent) - patterns = ["%{COMBINED_LOG_FORMAT}"] - - ## Name of the outputted measurement name. - name_override = "apache_access_log" - - ## Full path(s) to custom pattern files. - custom_pattern_files = [] - - ## Custom patterns can also be defined here. Put one pattern per line. - custom_patterns = ''' - - ## Timezone allows you to provide an override for timestamps that - ## don't already include an offset - ## e.g. 04/06/2016 12:41:45 data one two 5.43µs - ## - ## Default: "" which renders UTC - ## Options are as follows: - ## 1. Local -- interpret based on machine localtime - ## 2. "Canada/Eastern" -- Unix TZ values like those found in https://en.wikipedia.org/wiki/List_of_tz_database_time_zones - ## 3. UTC -- or blank/unspecified, will return timestamp in UTC - timezone = "Canada/Eastern" - ``` \ No newline at end of file +[[inputs.reader]] + ## This is a list of patterns to check the given log file(s) for. + ## Note that adding patterns here increases processing time. The most + ## efficient configuration is to have one pattern per logparser. + ## Other common built-in patterns are: + ## %{COMMON_LOG_FORMAT} (plain apache & nginx access logs) + ## %{COMBINED_LOG_FORMAT} (access logs + referrer & agent) + grok_patterns = ["%{COMBINED_LOG_FORMAT}"] + + ## Name of the outputted measurement name. + grok_name_override = "apache_access_log" + + ## Full path(s) to custom pattern files. + grok_custom_pattern_files = [] + + ## Custom patterns can also be defined here. Put one pattern per line. + grok_custom_patterns = ''' + + ## Timezone allows you to provide an override for timestamps that + ## don't already include an offset + ## e.g. 04/06/2016 12:41:45 data one two 5.43µs + ## + ## Default: "" which renders UTC + ## Options are as follows: + ## 1. Local -- interpret based on machine localtime + ## 2. "Canada/Eastern" -- Unix TZ values like those found in https://en.wikipedia.org/wiki/List_of_tz_database_time_zones + ## 3. UTC -- or blank/unspecified, will return timestamp in UTC + grok_timezone = "Canada/Eastern" +``` \ No newline at end of file diff --git a/internal/config/config.go b/internal/config/config.go index 1a98c61c5c2fd..c7f5177ac883d 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -1339,7 +1339,7 @@ func buildParser(name string, tbl *ast.Table) (parsers.Parser, error) { } //for grok data_format - if node, ok := tbl.Fields["named_patterns"]; ok { + if node, ok := tbl.Fields["grok_named_patterns"]; ok { if kv, ok := node.(*ast.KeyValue); ok { if ary, ok := kv.Value.(*ast.Array); ok { for _, elem := range ary.Value { @@ -1351,7 +1351,7 @@ func buildParser(name string, tbl *ast.Table) (parsers.Parser, error) { } } - if node, ok := tbl.Fields["patterns"]; ok { + if node, ok := tbl.Fields["grok_patterns"]; ok { if kv, ok := node.(*ast.KeyValue); ok { if ary, ok := kv.Value.(*ast.Array); ok { for _, elem := range ary.Value { @@ -1363,7 +1363,7 @@ func buildParser(name string, tbl *ast.Table) (parsers.Parser, error) { } } - if node, ok := tbl.Fields["custom_patterns"]; ok { + if node, ok := tbl.Fields["grok_custom_patterns"]; ok { if kv, ok := node.(*ast.KeyValue); ok { if str, ok := kv.Value.(*ast.String); ok { c.CustomPatterns = str.Value @@ -1371,7 +1371,7 @@ func buildParser(name string, tbl *ast.Table) (parsers.Parser, error) { } } - if node, ok := tbl.Fields["custom_pattern_files"]; ok { + if node, ok := tbl.Fields["grok_custom_pattern_files"]; ok { if kv, ok := node.(*ast.KeyValue); ok { if ary, ok := kv.Value.(*ast.Array); ok { for _, elem := range ary.Value { @@ -1383,7 +1383,7 @@ func buildParser(name string, tbl *ast.Table) (parsers.Parser, error) { } } - if node, ok := tbl.Fields["timezone"]; ok { + if node, ok := tbl.Fields["grok_timezone"]; ok { if kv, ok := node.(*ast.KeyValue); ok { if str, ok := kv.Value.(*ast.String); ok { c.TimeZone = str.Value @@ -1406,11 +1406,11 @@ func buildParser(name string, tbl *ast.Table) (parsers.Parser, error) { delete(tbl.Fields, "dropwizard_time_format") delete(tbl.Fields, "dropwizard_tags_path") delete(tbl.Fields, "dropwizard_tag_paths") - delete(tbl.Fields, "named_patterns") - delete(tbl.Fields, "patterns") - delete(tbl.Fields, "custom_patterns") - delete(tbl.Fields, "custom_pattern_files") - delete(tbl.Fields, "timezone") + delete(tbl.Fields, "grok_named_patterns") + delete(tbl.Fields, "grok_patterns") + delete(tbl.Fields, "grok_custom_patterns") + delete(tbl.Fields, "grok_custom_pattern_files") + delete(tbl.Fields, "grok_timezone") return parsers.NewParser(c) } diff --git a/plugins/inputs/reader/reader.go b/plugins/inputs/reader/reader.go index e21295611d1f8..d0f7035405cf8 100644 --- a/plugins/inputs/reader/reader.go +++ b/plugins/inputs/reader/reader.go @@ -1,8 +1,8 @@ package reader import ( + "fmt" "io/ioutil" - "log" "github.com/influxdata/telegraf" "github.com/influxdata/telegraf/internal/globpath" @@ -50,14 +50,8 @@ func (r *Reader) Gather(acc telegraf.Accumulator) error { return err } - for i, m := range metrics { - - //error if m is nil - if m == nil { - log.Printf("E! Metric could not be parsed from: %v, on line %v", k, i) - continue - } - acc.AddFields(m.Name(), m.Fields(), m.Tags()) + for _, m := range metrics { + acc.AddFields(m.Name(), m.Fields(), m.Tags(), m.Time()) } } return nil @@ -67,13 +61,12 @@ func (r *Reader) SetParser(p parsers.Parser) { r.parser = p } -func (r *Reader) refreshFilePaths() { +func (r *Reader) refreshFilePaths() error { var allFiles []string for _, filepath := range r.Filepaths { g, err := globpath.Compile(filepath) if err != nil { - log.Printf("E! Error Glob %s failed to compile, %s", filepath, err) - continue + return fmt.Errorf("E! Error Glob: %v could not be compiled, %s", filepath, err) } files := g.Match() @@ -83,13 +76,13 @@ func (r *Reader) refreshFilePaths() { } r.Filenames = allFiles + return nil } -//requires that Parser has been compiled func (r *Reader) readMetric(filename string) ([]telegraf.Metric, error) { fileContents, err := ioutil.ReadFile(filename) if err != nil { - log.Printf("E! File could not be opened: %v", filename) + return nil, fmt.Errorf("E! Error file: %v could not be read, %s", filename, err) } return r.parser.Parse(fileContents) diff --git a/plugins/parsers/grok/parser.go b/plugins/parsers/grok/parser.go index e3d2acf3bb3ac..aae1fe82f2357 100644 --- a/plugins/parsers/grok/parser.go +++ b/plugins/parsers/grok/parser.go @@ -71,7 +71,7 @@ type Parser struct { NamedPatterns []string CustomPatterns string CustomPatternFiles []string - Measurement string + MetricName string // Timezone is an optional component to help render log dates to // your chosen zone. @@ -167,10 +167,6 @@ func (p *Parser) Compile() error { p.addCustomPatterns(scanner) } - if p.Measurement == "" { - p.Measurement = "logparser_grok" - } - p.loc, err = time.LoadLocation(p.Timezone) if err != nil { log.Printf("W! improper timezone supplied (%s), setting loc to UTC", p.Timezone) @@ -348,7 +344,7 @@ func (p *Parser) ParseLine(line string) (telegraf.Metric, error) { return nil, fmt.Errorf("logparser_grok: must have one or more fields") } - return metric.New(p.Measurement, tags, fields, p.tsModder.tsMod(timestamp)) + return metric.New(p.MetricName, tags, fields, p.tsModder.tsMod(timestamp)) } func (p *Parser) Parse(buf []byte) ([]telegraf.Metric, error) { diff --git a/plugins/parsers/grok/parser_test.go b/plugins/parsers/grok/parser_test.go index 77818199af47c..524e562bbcc26 100644 --- a/plugins/parsers/grok/parser_test.go +++ b/plugins/parsers/grok/parser_test.go @@ -9,8 +9,8 @@ import ( func TestGrokParse(t *testing.T) { parser := Parser{ - Measurement: "t_met", - Patterns: []string{"%{COMMON_LOG_FORMAT}"}, + MetricName: "t_met", + Patterns: []string{"%{COMMON_LOG_FORMAT}"}, } parser.Compile() metrics, err := parser.Parse([]byte(`127.0.0.1 user-identifier frank [10/Oct/2000:13:55:36 -0700] "GET /apache_pb.gif HTTP/1.0" 200 2326`)) diff --git a/plugins/parsers/registry.go b/plugins/parsers/registry.go index 95b07a52c755b..f45067ea757d4 100644 --- a/plugins/parsers/registry.go +++ b/plugins/parsers/registry.go @@ -148,7 +148,7 @@ func newGrokParser(metricName string, cPatterns string, cPatternFiles []string, tZone string) (Parser, error) { parser := grok.Parser{ - Measurement: metricName, + MetricName: metricName, Patterns: patterns, NamedPatterns: nPatterns, CustomPatterns: cPatterns, From bfc13a744bfb467f442ebd1617338c58bcfff5e2 Mon Sep 17 00:00:00 2001 From: Max U Date: Tue, 3 Jul 2018 15:45:15 -0700 Subject: [PATCH 24/51] incomplete changes to logparser plugin --- plugins/inputs/logparser/logparser.go | 49 +++++++++++++++------- plugins/inputs/logparser/logparser_test.go | 31 ++++++++------ 2 files changed, 53 insertions(+), 27 deletions(-) diff --git a/plugins/inputs/logparser/logparser.go b/plugins/inputs/logparser/logparser.go index cb9e1e8b61ccd..b144283878c02 100644 --- a/plugins/inputs/logparser/logparser.go +++ b/plugins/inputs/logparser/logparser.go @@ -14,9 +14,8 @@ import ( "github.com/influxdata/telegraf" "github.com/influxdata/telegraf/internal/globpath" "github.com/influxdata/telegraf/plugins/inputs" - + "github.com/influxdata/telegraf/plugins/parsers" // Parsers - "github.com/influxdata/telegraf/plugins/inputs/logparser/grok" ) const ( @@ -45,11 +44,17 @@ type LogParserPlugin struct { done chan struct{} wg sync.WaitGroup acc telegraf.Accumulator - parsers []LogParser + parsers []parsers.Parser sync.Mutex - GrokParser *grok.Parser `toml:"grok"` + GrokParser *parsers.Parser `toml:"grok"` + + Patterns []string + NamedPatterns []string + CustomPatterns string + CustomPatternFiles []string + TimeZone string } const sampleConfig = ` @@ -131,16 +136,30 @@ func (l *LogParserPlugin) Start(acc telegraf.Accumulator) error { l.tailers = make(map[string]*tail.Tail) // Looks for fields which implement LogParser interface - l.parsers = []LogParser{} + l.parsers = []parsers.Parser{} + config := &parsers.Config{ + Patterns: l.Patterns, + NamedPatterns: l.NamedPatterns, + CustomPatterns: l.CustomPatterns, + CustomPatternFiles: l.CustomPatternFiles, + TimeZone: l.TimeZone, + DataFormat: "grok", + } + var err error + *l.GrokParser, err = parsers.NewParser(config) + if err != nil { + return err + } + s := reflect.ValueOf(l).Elem() for i := 0; i < s.NumField(); i++ { f := s.Field(i) - + log.Printf("got field %v: %v", i, f) if !f.CanInterface() { continue } - if lpPlugin, ok := f.Interface().(LogParser); ok { + if lpPlugin, ok := f.Interface().(parsers.Parser); ok { if reflect.ValueOf(lpPlugin).IsNil() { continue } @@ -152,12 +171,12 @@ func (l *LogParserPlugin) Start(acc telegraf.Accumulator) error { return fmt.Errorf("logparser input plugin: no parser defined") } - // compile log parser patterns: - for _, parser := range l.parsers { - if err := parser.Compile(); err != nil { - return err - } - } + // //compile log parser patterns: + // for _, parser := range l.parsers { + // if err := parser.Compile(); err != nil { + // return err + // } + // } l.wg.Add(1) go l.parser() @@ -247,8 +266,8 @@ func (l *LogParserPlugin) receiver(tailer *tail.Tail) { } } -// parser is launched as a goroutine to watch the l.lines channel. -// when a line is available, parser parses it and adds the metric(s) to the +// parse is launched as a goroutine to watch the l.lines channel. +// when a line is available, parse parses it and adds the metric(s) to the // accumulator. func (l *LogParserPlugin) parser() { defer l.wg.Done() diff --git a/plugins/inputs/logparser/logparser_test.go b/plugins/inputs/logparser/logparser_test.go index 98567b4c24248..939a95732f002 100644 --- a/plugins/inputs/logparser/logparser_test.go +++ b/plugins/inputs/logparser/logparser_test.go @@ -9,7 +9,7 @@ import ( "github.com/influxdata/telegraf/testutil" - "github.com/influxdata/telegraf/plugins/inputs/logparser/grok" + "github.com/influxdata/telegraf/plugins/parsers" "github.com/stretchr/testify/assert" ) @@ -26,39 +26,43 @@ func TestStartNoParsers(t *testing.T) { func TestGrokParseLogFilesNonExistPattern(t *testing.T) { thisdir := getCurrentDir() - p := &grok.Parser{ + c := &parsers.Config{ Patterns: []string{"%{FOOBAR}"}, CustomPatternFiles: []string{thisdir + "grok/testdata/test-patterns"}, + DataFormat: "grok", } + p, err := parsers.NewParser(c) logparser := &LogParserPlugin{ FromBeginning: true, Files: []string{thisdir + "grok/testdata/*.log"}, - GrokParser: p, + GrokParser: &p, } acc := testutil.Accumulator{} - err := logparser.Start(&acc) + err = logparser.Start(&acc) assert.Error(t, err) } func TestGrokParseLogFiles(t *testing.T) { thisdir := getCurrentDir() - p := &grok.Parser{ + c := parsers.Config{ Patterns: []string{"%{TEST_LOG_A}", "%{TEST_LOG_B}"}, CustomPatternFiles: []string{thisdir + "grok/testdata/test-patterns"}, + DataFormat: "grok", } + p, _ := parsers.NewParser(&c) logparser := &LogParserPlugin{ FromBeginning: true, Files: []string{thisdir + "grok/testdata/*.log"}, - GrokParser: p, + GrokParser: &p, } acc := testutil.Accumulator{} assert.NoError(t, logparser.Start(&acc)) - acc.Wait(2) + //acc.Wait(2) logparser.Stop() @@ -91,15 +95,17 @@ func TestGrokParseLogFilesAppearLater(t *testing.T) { assert.NoError(t, err) thisdir := getCurrentDir() - p := &grok.Parser{ + c := &parsers.Config{ Patterns: []string{"%{TEST_LOG_A}", "%{TEST_LOG_B}"}, CustomPatternFiles: []string{thisdir + "grok/testdata/test-patterns"}, + DataFormat: "grok", } + p, err := parsers.NewParser(c) logparser := &LogParserPlugin{ FromBeginning: true, Files: []string{emptydir + "/*.log"}, - GrokParser: p, + GrokParser: &p, } acc := testutil.Accumulator{} @@ -130,16 +136,17 @@ func TestGrokParseLogFilesAppearLater(t *testing.T) { // pattern available for test_b.log func TestGrokParseLogFilesOneBad(t *testing.T) { thisdir := getCurrentDir() - p := &grok.Parser{ + c := &parsers.Config{ Patterns: []string{"%{TEST_LOG_A}", "%{TEST_LOG_BAD}"}, CustomPatternFiles: []string{thisdir + "grok/testdata/test-patterns"}, + DataFormat: "grok", } - assert.NoError(t, p.Compile()) + p, _ := parsers.NewParser(c) logparser := &LogParserPlugin{ FromBeginning: true, Files: []string{thisdir + "grok/testdata/test_a.log"}, - GrokParser: p, + GrokParser: &p, } acc := testutil.Accumulator{} From 67db14332f998fa71741d9b8209c893dcb0f64b0 Mon Sep 17 00:00:00 2001 From: Max U Date: Tue, 3 Jul 2018 16:27:11 -0700 Subject: [PATCH 25/51] still unfinished logparser changes --- plugins/inputs/logparser/logparser.go | 59 ++++++---------------- plugins/inputs/logparser/logparser_test.go | 48 +++++++----------- 2 files changed, 34 insertions(+), 73 deletions(-) diff --git a/plugins/inputs/logparser/logparser.go b/plugins/inputs/logparser/logparser.go index b144283878c02..aabc8b9804673 100644 --- a/plugins/inputs/logparser/logparser.go +++ b/plugins/inputs/logparser/logparser.go @@ -3,9 +3,7 @@ package logparser import ( - "fmt" "log" - "reflect" "strings" "sync" @@ -35,9 +33,10 @@ type logEntry struct { // LogParserPlugin is the primary struct to implement the interface for logparser plugin type LogParserPlugin struct { - Files []string - FromBeginning bool - WatchMethod string + Files []string + FromBeginning bool + WatchMethod string + MeasurementName string `toml:"measurement"` tailers map[string]*tail.Tail lines chan logEntry @@ -48,7 +47,7 @@ type LogParserPlugin struct { sync.Mutex - GrokParser *parsers.Parser `toml:"grok"` + GrokParser parsers.Parser `toml:"grok"` Patterns []string NamedPatterns []string @@ -146,38 +145,11 @@ func (l *LogParserPlugin) Start(acc telegraf.Accumulator) error { DataFormat: "grok", } var err error - *l.GrokParser, err = parsers.NewParser(config) + l.GrokParser, err = parsers.NewParser(config) if err != nil { return err } - s := reflect.ValueOf(l).Elem() - for i := 0; i < s.NumField(); i++ { - f := s.Field(i) - log.Printf("got field %v: %v", i, f) - if !f.CanInterface() { - continue - } - - if lpPlugin, ok := f.Interface().(parsers.Parser); ok { - if reflect.ValueOf(lpPlugin).IsNil() { - continue - } - l.parsers = append(l.parsers, lpPlugin) - } - } - - if len(l.parsers) == 0 { - return fmt.Errorf("logparser input plugin: no parser defined") - } - - // //compile log parser patterns: - // for _, parser := range l.parsers { - // if err := parser.Compile(); err != nil { - // return err - // } - // } - l.wg.Add(1) go l.parser() @@ -284,18 +256,17 @@ func (l *LogParserPlugin) parser() { continue } } - for _, parser := range l.parsers { - m, err = parser.ParseLine(entry.line) - if err == nil { - if m != nil { - tags := m.Tags() - tags["path"] = entry.path - l.acc.AddFields(m.Name(), m.Fields(), tags, m.Time()) - } - } else { - log.Println("E! Error parsing log line: " + err.Error()) + m, err = l.GrokParser.ParseLine(entry.line) + if err == nil { + if m != nil { + tags := m.Tags() + tags["path"] = entry.path + l.acc.AddFields(l.MeasurementName, m.Fields(), tags, m.Time()) } + } else { + log.Println("E! Error parsing log line: " + err.Error()) } + } } diff --git a/plugins/inputs/logparser/logparser_test.go b/plugins/inputs/logparser/logparser_test.go index 939a95732f002..6b527ef85351c 100644 --- a/plugins/inputs/logparser/logparser_test.go +++ b/plugins/inputs/logparser/logparser_test.go @@ -2,6 +2,7 @@ package logparser import ( "io/ioutil" + "log" "os" "runtime" "strings" @@ -36,7 +37,7 @@ func TestGrokParseLogFilesNonExistPattern(t *testing.T) { logparser := &LogParserPlugin{ FromBeginning: true, Files: []string{thisdir + "grok/testdata/*.log"}, - GrokParser: &p, + GrokParser: p, } acc := testutil.Accumulator{} @@ -46,26 +47,23 @@ func TestGrokParseLogFilesNonExistPattern(t *testing.T) { func TestGrokParseLogFiles(t *testing.T) { thisdir := getCurrentDir() - c := parsers.Config{ - Patterns: []string{"%{TEST_LOG_A}", "%{TEST_LOG_B}"}, - CustomPatternFiles: []string{thisdir + "grok/testdata/test-patterns"}, - DataFormat: "grok", - } - p, _ := parsers.NewParser(&c) logparser := &LogParserPlugin{ - FromBeginning: true, - Files: []string{thisdir + "grok/testdata/*.log"}, - GrokParser: &p, + Patterns: []string{"%{TEST_LOG_A}", "%{TEST_LOG_B}"}, + CustomPatternFiles: []string{thisdir + "grok/testdata/test-patterns"}, + FromBeginning: true, + Files: []string{thisdir + "grok/testdata/*.log"}, + MeasurementName: "logparser_grok", } acc := testutil.Accumulator{} assert.NoError(t, logparser.Start(&acc)) - //acc.Wait(2) + acc.Wait(2) logparser.Stop() + log.Printf("metric[0] %v, tags: %v, fields: %v", acc.Metrics[0].Measurement, acc.Metrics[0].Tags, acc.Metrics[0].Fields) acc.AssertContainsTaggedFields(t, "logparser_grok", map[string]interface{}{ "clientip": "192.168.1.1", @@ -95,17 +93,13 @@ func TestGrokParseLogFilesAppearLater(t *testing.T) { assert.NoError(t, err) thisdir := getCurrentDir() - c := &parsers.Config{ - Patterns: []string{"%{TEST_LOG_A}", "%{TEST_LOG_B}"}, - CustomPatternFiles: []string{thisdir + "grok/testdata/test-patterns"}, - DataFormat: "grok", - } - p, err := parsers.NewParser(c) logparser := &LogParserPlugin{ - FromBeginning: true, - Files: []string{emptydir + "/*.log"}, - GrokParser: &p, + FromBeginning: true, + Files: []string{emptydir + "/*.log"}, + Patterns: []string{"%{TEST_LOG_A}", "%{TEST_LOG_B}"}, + CustomPatternFiles: []string{thisdir + "grok/testdata/test-patterns"}, + MeasurementName: "logparser_grok", } acc := testutil.Accumulator{} @@ -136,17 +130,13 @@ func TestGrokParseLogFilesAppearLater(t *testing.T) { // pattern available for test_b.log func TestGrokParseLogFilesOneBad(t *testing.T) { thisdir := getCurrentDir() - c := &parsers.Config{ - Patterns: []string{"%{TEST_LOG_A}", "%{TEST_LOG_BAD}"}, - CustomPatternFiles: []string{thisdir + "grok/testdata/test-patterns"}, - DataFormat: "grok", - } - p, _ := parsers.NewParser(c) logparser := &LogParserPlugin{ - FromBeginning: true, - Files: []string{thisdir + "grok/testdata/test_a.log"}, - GrokParser: &p, + FromBeginning: true, + Files: []string{thisdir + "grok/testdata/test_a.log"}, + Patterns: []string{"%{TEST_LOG_A}", "%{TEST_LOG_BAD}"}, + CustomPatternFiles: []string{thisdir + "grok/testdata/test-patterns"}, + MeasurementName: "logparser_grok", } acc := testutil.Accumulator{} From 8a9da28734da30a80936ec315b227dfa0bff83d0 Mon Sep 17 00:00:00 2001 From: Max U Date: Fri, 6 Jul 2018 11:22:14 -0700 Subject: [PATCH 26/51] logparser is linked to grok parser --- plugins/inputs/logparser/logparser.go | 27 ++++++++++++++++++++-- plugins/inputs/logparser/logparser_test.go | 17 ++++---------- 2 files changed, 30 insertions(+), 14 deletions(-) diff --git a/plugins/inputs/logparser/logparser.go b/plugins/inputs/logparser/logparser.go index aabc8b9804673..912a5bc51abad 100644 --- a/plugins/inputs/logparser/logparser.go +++ b/plugins/inputs/logparser/logparser.go @@ -3,7 +3,9 @@ package logparser import ( + "fmt" "log" + "reflect" "strings" "sync" @@ -43,7 +45,7 @@ type LogParserPlugin struct { done chan struct{} wg sync.WaitGroup acc telegraf.Accumulator - parsers []parsers.Parser + parsers []LogParser sync.Mutex @@ -135,7 +137,7 @@ func (l *LogParserPlugin) Start(acc telegraf.Accumulator) error { l.tailers = make(map[string]*tail.Tail) // Looks for fields which implement LogParser interface - l.parsers = []parsers.Parser{} + l.parsers = []LogParser{} config := &parsers.Config{ Patterns: l.Patterns, NamedPatterns: l.NamedPatterns, @@ -144,12 +146,33 @@ func (l *LogParserPlugin) Start(acc telegraf.Accumulator) error { TimeZone: l.TimeZone, DataFormat: "grok", } + var err error l.GrokParser, err = parsers.NewParser(config) if err != nil { return err } + s := reflect.ValueOf(l).Elem() + for i := 0; i < s.NumField(); i++ { + f := s.Field(i) + + if !f.CanInterface() { + continue + } + + if lpPlugin, ok := f.Interface().(LogParser); ok { + if reflect.ValueOf(lpPlugin).IsNil() { + continue + } + l.parsers = append(l.parsers, lpPlugin) + } + } + + if len(l.parsers) == 0 { + return fmt.Errorf("logparser input plugin: no parser defined") + } + l.wg.Add(1) go l.parser() diff --git a/plugins/inputs/logparser/logparser_test.go b/plugins/inputs/logparser/logparser_test.go index 6b527ef85351c..ac7b2c82e2977 100644 --- a/plugins/inputs/logparser/logparser_test.go +++ b/plugins/inputs/logparser/logparser_test.go @@ -10,8 +10,6 @@ import ( "github.com/influxdata/telegraf/testutil" - "github.com/influxdata/telegraf/plugins/parsers" - "github.com/stretchr/testify/assert" ) @@ -27,21 +25,16 @@ func TestStartNoParsers(t *testing.T) { func TestGrokParseLogFilesNonExistPattern(t *testing.T) { thisdir := getCurrentDir() - c := &parsers.Config{ - Patterns: []string{"%{FOOBAR}"}, - CustomPatternFiles: []string{thisdir + "grok/testdata/test-patterns"}, - DataFormat: "grok", - } - p, err := parsers.NewParser(c) logparser := &LogParserPlugin{ - FromBeginning: true, - Files: []string{thisdir + "grok/testdata/*.log"}, - GrokParser: p, + FromBeginning: true, + Files: []string{thisdir + "grok/testdata/*.log"}, + Patterns: []string{"%{FOOBAR}"}, + CustomPatternFiles: []string{thisdir + "grok/testdata/test-patterns"}, } acc := testutil.Accumulator{} - err = logparser.Start(&acc) + err := logparser.Start(&acc) assert.Error(t, err) } From cafa95e536eedd62c9182ea34c271d250e59a51e Mon Sep 17 00:00:00 2001 From: Max U Date: Fri, 6 Jul 2018 13:16:51 -0700 Subject: [PATCH 27/51] logparser no longer uses seperate grok --- plugins/inputs/logparser/grok/grok.go | 511 --------- plugins/inputs/logparser/grok/grok_test.go | 1002 ----------------- .../inputs/logparser/grok/influx_patterns.go | 78 -- plugins/parsers/registry.go | 4 +- 4 files changed, 2 insertions(+), 1593 deletions(-) delete mode 100644 plugins/inputs/logparser/grok/grok.go delete mode 100644 plugins/inputs/logparser/grok/grok_test.go delete mode 100644 plugins/inputs/logparser/grok/influx_patterns.go diff --git a/plugins/inputs/logparser/grok/grok.go b/plugins/inputs/logparser/grok/grok.go deleted file mode 100644 index 766d149fe4fe1..0000000000000 --- a/plugins/inputs/logparser/grok/grok.go +++ /dev/null @@ -1,511 +0,0 @@ -package grok - -import ( - "bufio" - "fmt" - "log" - "os" - "regexp" - "strconv" - "strings" - "time" - - "github.com/vjeantet/grok" - - "github.com/influxdata/telegraf" - "github.com/influxdata/telegraf/metric" -) - -var timeLayouts = map[string]string{ - "ts-ansic": "Mon Jan _2 15:04:05 2006", - "ts-unix": "Mon Jan _2 15:04:05 MST 2006", - "ts-ruby": "Mon Jan 02 15:04:05 -0700 2006", - "ts-rfc822": "02 Jan 06 15:04 MST", - "ts-rfc822z": "02 Jan 06 15:04 -0700", // RFC822 with numeric zone - "ts-rfc850": "Monday, 02-Jan-06 15:04:05 MST", - "ts-rfc1123": "Mon, 02 Jan 2006 15:04:05 MST", - "ts-rfc1123z": "Mon, 02 Jan 2006 15:04:05 -0700", // RFC1123 with numeric zone - "ts-rfc3339": "2006-01-02T15:04:05Z07:00", - "ts-rfc3339nano": "2006-01-02T15:04:05.999999999Z07:00", - "ts-httpd": "02/Jan/2006:15:04:05 -0700", - // These three are not exactly "layouts", but they are special cases that - // will get handled in the ParseLine function. - "ts-epoch": "EPOCH", - "ts-epochnano": "EPOCH_NANO", - "ts-syslog": "SYSLOG_TIMESTAMP", - "ts": "GENERIC_TIMESTAMP", // try parsing all known timestamp layouts. -} - -const ( - INT = "int" - TAG = "tag" - FLOAT = "float" - STRING = "string" - DURATION = "duration" - DROP = "drop" - EPOCH = "EPOCH" - EPOCH_NANO = "EPOCH_NANO" - SYSLOG_TIMESTAMP = "SYSLOG_TIMESTAMP" - GENERIC_TIMESTAMP = "GENERIC_TIMESTAMP" -) - -var ( - // matches named captures that contain a modifier. - // ie, - // %{NUMBER:bytes:int} - // %{IPORHOST:clientip:tag} - // %{HTTPDATE:ts1:ts-http} - // %{HTTPDATE:ts2:ts-"02 Jan 06 15:04"} - modifierRe = regexp.MustCompile(`%{\w+:(\w+):(ts-".+"|t?s?-?\w+)}`) - // matches a plain pattern name. ie, %{NUMBER} - patternOnlyRe = regexp.MustCompile(`%{(\w+)}`) -) - -// Parser is the primary struct to handle and grok-patterns defined in the config toml -type Parser struct { - Patterns []string - // namedPatterns is a list of internally-assigned names to the patterns - // specified by the user in Patterns. - // They will look like: - // GROK_INTERNAL_PATTERN_0, GROK_INTERNAL_PATTERN_1, etc. - namedPatterns []string - CustomPatterns string - CustomPatternFiles []string - Measurement string - - // Timezone is an optional component to help render log dates to - // your chosen zone. - // Default: "" which renders UTC - // Options are as follows: - // 1. Local -- interpret based on machine localtime - // 2. "America/Chicago" -- Unix TZ values like those found in https://en.wikipedia.org/wiki/List_of_tz_database_time_zones - // 3. UTC -- or blank/unspecified, will return timestamp in UTC - Timezone string - loc *time.Location - - // typeMap is a map of patterns -> capture name -> modifier, - // ie, { - // "%{TESTLOG}": - // { - // "bytes": "int", - // "clientip": "tag" - // } - // } - typeMap map[string]map[string]string - // tsMap is a map of patterns -> capture name -> timestamp layout. - // ie, { - // "%{TESTLOG}": - // { - // "httptime": "02/Jan/2006:15:04:05 -0700" - // } - // } - tsMap map[string]map[string]string - // patterns is a map of all of the parsed patterns from CustomPatterns - // and CustomPatternFiles. - // ie, { - // "DURATION": "%{NUMBER}[nuµm]?s" - // "RESPONSE_CODE": "%{NUMBER:rc:tag}" - // } - patterns map[string]string - // foundTsLayouts is a slice of timestamp patterns that have been found - // in the log lines. This slice gets updated if the user uses the generic - // 'ts' modifier for timestamps. This slice is checked first for matches, - // so that previously-matched layouts get priority over all other timestamp - // layouts. - foundTsLayouts []string - - timeFunc func() time.Time - g *grok.Grok - tsModder *tsModder -} - -// Compile is a bound method to Parser which will process the options for our parser -func (p *Parser) Compile() error { - p.typeMap = make(map[string]map[string]string) - p.tsMap = make(map[string]map[string]string) - p.patterns = make(map[string]string) - p.tsModder = &tsModder{} - var err error - p.g, err = grok.NewWithConfig(&grok.Config{NamedCapturesOnly: true}) - if err != nil { - return err - } - - // Give Patterns fake names so that they can be treated as named - // "custom patterns" - p.namedPatterns = make([]string, 0, len(p.Patterns)) - for i, pattern := range p.Patterns { - pattern = strings.TrimSpace(pattern) - if pattern == "" { - continue - } - name := fmt.Sprintf("GROK_INTERNAL_PATTERN_%d", i) - p.CustomPatterns += "\n" + name + " " + pattern + "\n" - p.namedPatterns = append(p.namedPatterns, "%{"+name+"}") - } - - if len(p.namedPatterns) == 0 { - return fmt.Errorf("pattern required") - } - - // Combine user-supplied CustomPatterns with DEFAULT_PATTERNS and parse - // them together as the same type of pattern. - p.CustomPatterns = DEFAULT_PATTERNS + p.CustomPatterns - if len(p.CustomPatterns) != 0 { - scanner := bufio.NewScanner(strings.NewReader(p.CustomPatterns)) - p.addCustomPatterns(scanner) - } - - // Parse any custom pattern files supplied. - for _, filename := range p.CustomPatternFiles { - file, fileErr := os.Open(filename) - if fileErr != nil { - return fileErr - } - - scanner := bufio.NewScanner(bufio.NewReader(file)) - p.addCustomPatterns(scanner) - } - - if p.Measurement == "" { - p.Measurement = "logparser_grok" - } - - p.loc, err = time.LoadLocation(p.Timezone) - if err != nil { - log.Printf("W! improper timezone supplied (%s), setting loc to UTC", p.Timezone) - p.loc, _ = time.LoadLocation("UTC") - } - - if p.timeFunc == nil { - p.timeFunc = time.Now - } - - return p.compileCustomPatterns() -} - -// ParseLine is the primary function to process individual lines, returning the metrics -func (p *Parser) ParseLine(line string) (telegraf.Metric, error) { - var err error - // values are the parsed fields from the log line - var values map[string]string - // the matching pattern string - var patternName string - for _, pattern := range p.namedPatterns { - if values, err = p.g.Parse(pattern, line); err != nil { - return nil, err - } - if len(values) != 0 { - patternName = pattern - break - } - } - - if len(values) == 0 { - log.Printf("D! Grok no match found for: %q", line) - return nil, nil - } - - fields := make(map[string]interface{}) - tags := make(map[string]string) - timestamp := time.Now() - for k, v := range values { - if k == "" || v == "" { - continue - } - - // t is the modifier of the field - var t string - // check if pattern has some modifiers - if types, ok := p.typeMap[patternName]; ok { - t = types[k] - } - // if we didn't find a modifier, check if we have a timestamp layout - if t == "" { - if ts, ok := p.tsMap[patternName]; ok { - // check if the modifier is a timestamp layout - if layout, ok := ts[k]; ok { - t = layout - } - } - } - // if we didn't find a type OR timestamp modifier, assume string - if t == "" { - t = STRING - } - - switch t { - case INT: - iv, err := strconv.ParseInt(v, 10, 64) - if err != nil { - log.Printf("E! Error parsing %s to int: %s", v, err) - } else { - fields[k] = iv - } - case FLOAT: - fv, err := strconv.ParseFloat(v, 64) - if err != nil { - log.Printf("E! Error parsing %s to float: %s", v, err) - } else { - fields[k] = fv - } - case DURATION: - d, err := time.ParseDuration(v) - if err != nil { - log.Printf("E! Error parsing %s to duration: %s", v, err) - } else { - fields[k] = int64(d) - } - case TAG: - tags[k] = v - case STRING: - fields[k] = strings.Trim(v, `"`) - case EPOCH: - parts := strings.SplitN(v, ".", 2) - if len(parts) == 0 { - log.Printf("E! Error parsing %s to timestamp: %s", v, err) - break - } - - sec, err := strconv.ParseInt(parts[0], 10, 64) - if err != nil { - log.Printf("E! Error parsing %s to timestamp: %s", v, err) - break - } - ts := time.Unix(sec, 0) - - if len(parts) == 2 { - padded := fmt.Sprintf("%-9s", parts[1]) - nsString := strings.Replace(padded[:9], " ", "0", -1) - nanosec, err := strconv.ParseInt(nsString, 10, 64) - if err != nil { - log.Printf("E! Error parsing %s to timestamp: %s", v, err) - break - } - ts = ts.Add(time.Duration(nanosec) * time.Nanosecond) - } - timestamp = ts - case EPOCH_NANO: - iv, err := strconv.ParseInt(v, 10, 64) - if err != nil { - log.Printf("E! Error parsing %s to int: %s", v, err) - } else { - timestamp = time.Unix(0, iv) - } - case SYSLOG_TIMESTAMP: - ts, err := time.ParseInLocation("Jan 02 15:04:05", v, p.loc) - if err == nil { - if ts.Year() == 0 { - ts = ts.AddDate(timestamp.Year(), 0, 0) - } - timestamp = ts - } else { - log.Printf("E! Error parsing %s to time layout [%s]: %s", v, t, err) - } - case GENERIC_TIMESTAMP: - var foundTs bool - // first try timestamp layouts that we've already found - for _, layout := range p.foundTsLayouts { - ts, err := time.ParseInLocation(layout, v, p.loc) - if err == nil { - timestamp = ts - foundTs = true - break - } - } - // if we haven't found a timestamp layout yet, try all timestamp - // layouts. - if !foundTs { - for _, layout := range timeLayouts { - ts, err := time.ParseInLocation(layout, v, p.loc) - if err == nil { - timestamp = ts - foundTs = true - p.foundTsLayouts = append(p.foundTsLayouts, layout) - break - } - } - } - // if we still haven't found a timestamp layout, log it and we will - // just use time.Now() - if !foundTs { - log.Printf("E! Error parsing timestamp [%s], could not find any "+ - "suitable time layouts.", v) - } - case DROP: - // goodbye! - default: - // Replace commas with dot character - v = strings.Replace(v, ",", ".", -1) - - ts, err := time.ParseInLocation(t, v, p.loc) - if err == nil { - timestamp = ts - } else { - log.Printf("E! Error parsing %s to time layout [%s]: %s", v, t, err) - } - } - } - - if len(fields) == 0 { - return nil, fmt.Errorf("logparser_grok: must have one or more fields") - } - - return metric.New(p.Measurement, tags, fields, p.tsModder.tsMod(timestamp)) -} - -func (p *Parser) addCustomPatterns(scanner *bufio.Scanner) { - for scanner.Scan() { - line := strings.TrimSpace(scanner.Text()) - if len(line) > 0 && line[0] != '#' { - names := strings.SplitN(line, " ", 2) - p.patterns[names[0]] = names[1] - } - } -} - -func (p *Parser) compileCustomPatterns() error { - var err error - // check if the pattern contains a subpattern that is already defined - // replace it with the subpattern for modifier inheritance. - for i := 0; i < 2; i++ { - for name, pattern := range p.patterns { - subNames := patternOnlyRe.FindAllStringSubmatch(pattern, -1) - for _, subName := range subNames { - if subPattern, ok := p.patterns[subName[1]]; ok { - pattern = strings.Replace(pattern, subName[0], subPattern, 1) - } - } - p.patterns[name] = pattern - } - } - - // check if pattern contains modifiers. Parse them out if it does. - for name, pattern := range p.patterns { - if modifierRe.MatchString(pattern) { - // this pattern has modifiers, so parse out the modifiers - pattern, err = p.parseTypedCaptures(name, pattern) - if err != nil { - return err - } - p.patterns[name] = pattern - } - } - - return p.g.AddPatternsFromMap(p.patterns) -} - -// parseTypedCaptures parses the capture modifiers, and then deletes the -// modifier from the line so that it is a valid "grok" pattern again. -// ie, -// %{NUMBER:bytes:int} => %{NUMBER:bytes} (stores %{NUMBER}->bytes->int) -// %{IPORHOST:clientip:tag} => %{IPORHOST:clientip} (stores %{IPORHOST}->clientip->tag) -func (p *Parser) parseTypedCaptures(name, pattern string) (string, error) { - matches := modifierRe.FindAllStringSubmatch(pattern, -1) - - // grab the name of the capture pattern - patternName := "%{" + name + "}" - // create type map for this pattern - p.typeMap[patternName] = make(map[string]string) - p.tsMap[patternName] = make(map[string]string) - - // boolean to verify that each pattern only has a single ts- data type. - hasTimestamp := false - for _, match := range matches { - // regex capture 1 is the name of the capture - // regex capture 2 is the modifier of the capture - if strings.HasPrefix(match[2], "ts") { - if hasTimestamp { - return pattern, fmt.Errorf("logparser pattern compile error: "+ - "Each pattern is allowed only one named "+ - "timestamp data type. pattern: %s", pattern) - } - if layout, ok := timeLayouts[match[2]]; ok { - // built-in time format - p.tsMap[patternName][match[1]] = layout - } else { - // custom time format - p.tsMap[patternName][match[1]] = strings.TrimSuffix(strings.TrimPrefix(match[2], `ts-"`), `"`) - } - hasTimestamp = true - } else { - p.typeMap[patternName][match[1]] = match[2] - } - - // the modifier is not a valid part of a "grok" pattern, so remove it - // from the pattern. - pattern = strings.Replace(pattern, ":"+match[2]+"}", "}", 1) - } - - return pattern, nil -} - -// tsModder is a struct for incrementing identical timestamps of log lines -// so that we don't push identical metrics that will get overwritten. -type tsModder struct { - dupe time.Time - last time.Time - incr time.Duration - incrn time.Duration - rollover time.Duration -} - -// tsMod increments the given timestamp one unit more from the previous -// duplicate timestamp. -// the increment unit is determined as the next smallest time unit below the -// most significant time unit of ts. -// ie, if the input is at ms precision, it will increment it 1µs. -func (t *tsModder) tsMod(ts time.Time) time.Time { - defer func() { t.last = ts }() - // don't mod the time if we don't need to - if t.last.IsZero() || ts.IsZero() { - t.incrn = 0 - t.rollover = 0 - return ts - } - if !ts.Equal(t.last) && !ts.Equal(t.dupe) { - t.incr = 0 - t.incrn = 0 - t.rollover = 0 - return ts - } - - if ts.Equal(t.last) { - t.dupe = ts - } - - if ts.Equal(t.dupe) && t.incr == time.Duration(0) { - tsNano := ts.UnixNano() - - d := int64(10) - counter := 1 - for { - a := tsNano % d - if a > 0 { - break - } - d = d * 10 - counter++ - } - - switch { - case counter <= 6: - t.incr = time.Nanosecond - case counter <= 9: - t.incr = time.Microsecond - case counter > 9: - t.incr = time.Millisecond - } - } - - t.incrn++ - if t.incrn == 999 && t.incr > time.Nanosecond { - t.rollover = t.incr * t.incrn - t.incrn = 1 - t.incr = t.incr / 1000 - if t.incr < time.Nanosecond { - t.incr = time.Nanosecond - } - } - return ts.Add(t.incr*t.incrn + t.rollover) -} diff --git a/plugins/inputs/logparser/grok/grok_test.go b/plugins/inputs/logparser/grok/grok_test.go deleted file mode 100644 index 6a143bb7dcfc4..0000000000000 --- a/plugins/inputs/logparser/grok/grok_test.go +++ /dev/null @@ -1,1002 +0,0 @@ -package grok - -import ( - "testing" - "time" - - "github.com/influxdata/telegraf" - - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" -) - -var benchM telegraf.Metric - -func Benchmark_ParseLine_CommonLogFormat(b *testing.B) { - p := &Parser{ - Patterns: []string{"%{COMMON_LOG_FORMAT}"}, - } - _ = p.Compile() - - var m telegraf.Metric - for n := 0; n < b.N; n++ { - m, _ = p.ParseLine(`127.0.0.1 user-identifier frank [10/Oct/2000:13:55:36 -0700] "GET /apache_pb.gif HTTP/1.0" 200 2326`) - } - benchM = m -} - -func Benchmark_ParseLine_CombinedLogFormat(b *testing.B) { - p := &Parser{ - Patterns: []string{"%{COMBINED_LOG_FORMAT}"}, - } - _ = p.Compile() - - var m telegraf.Metric - for n := 0; n < b.N; n++ { - m, _ = p.ParseLine(`127.0.0.1 user-identifier frank [10/Oct/2000:13:55:36 -0700] "GET /apache_pb.gif HTTP/1.0" 200 2326 "-" "Mozilla"`) - } - benchM = m -} - -func Benchmark_ParseLine_CustomPattern(b *testing.B) { - p := &Parser{ - Patterns: []string{"%{TEST_LOG_A}", "%{TEST_LOG_B}"}, - CustomPatterns: ` - DURATION %{NUMBER}[nuµm]?s - RESPONSE_CODE %{NUMBER:response_code:tag} - RESPONSE_TIME %{DURATION:response_time:duration} - TEST_LOG_A %{NUMBER:myfloat:float} %{RESPONSE_CODE} %{IPORHOST:clientip} %{RESPONSE_TIME} - `, - } - _ = p.Compile() - - var m telegraf.Metric - for n := 0; n < b.N; n++ { - m, _ = p.ParseLine(`[04/Jun/2016:12:41:45 +0100] 1.25 200 192.168.1.1 5.432µs 101`) - } - benchM = m -} - -// Test a very simple parse pattern. -func TestSimpleParse(t *testing.T) { - p := &Parser{ - Patterns: []string{"%{TESTLOG}"}, - CustomPatterns: ` - TESTLOG %{NUMBER:num:int} %{WORD:client} - `, - } - assert.NoError(t, p.Compile()) - - m, err := p.ParseLine(`142 bot`) - assert.NoError(t, err) - require.NotNil(t, m) - - assert.Equal(t, - map[string]interface{}{ - "num": int64(142), - "client": "bot", - }, - m.Fields()) -} - -// Verify that patterns with a regex lookahead fail at compile time. -func TestParsePatternsWithLookahead(t *testing.T) { - p := &Parser{ - Patterns: []string{"%{MYLOG}"}, - CustomPatterns: ` - NOBOT ((?!bot|crawl).)* - MYLOG %{NUMBER:num:int} %{NOBOT:client} - `, - } - assert.NoError(t, p.Compile()) - - _, err := p.ParseLine(`1466004605359052000 bot`) - assert.Error(t, err) -} - -func TestMeasurementName(t *testing.T) { - p := &Parser{ - Measurement: "my_web_log", - Patterns: []string{"%{COMMON_LOG_FORMAT}"}, - } - assert.NoError(t, p.Compile()) - - // Parse an influxdb POST request - m, err := p.ParseLine(`127.0.0.1 user-identifier frank [10/Oct/2000:13:55:36 -0700] "GET /apache_pb.gif HTTP/1.0" 200 2326`) - require.NotNil(t, m) - assert.NoError(t, err) - assert.Equal(t, - map[string]interface{}{ - "resp_bytes": int64(2326), - "auth": "frank", - "client_ip": "127.0.0.1", - "http_version": float64(1.0), - "ident": "user-identifier", - "request": "/apache_pb.gif", - }, - m.Fields()) - assert.Equal(t, map[string]string{"verb": "GET", "resp_code": "200"}, m.Tags()) - assert.Equal(t, "my_web_log", m.Name()) -} - -func TestCLF_IPv6(t *testing.T) { - p := &Parser{ - Measurement: "my_web_log", - Patterns: []string{"%{COMMON_LOG_FORMAT}"}, - } - assert.NoError(t, p.Compile()) - - m, err := p.ParseLine(`2001:0db8:85a3:0000:0000:8a2e:0370:7334 user-identifier frank [10/Oct/2000:13:55:36 -0700] "GET /apache_pb.gif HTTP/1.0" 200 2326`) - require.NotNil(t, m) - assert.NoError(t, err) - assert.Equal(t, - map[string]interface{}{ - "resp_bytes": int64(2326), - "auth": "frank", - "client_ip": "2001:0db8:85a3:0000:0000:8a2e:0370:7334", - "http_version": float64(1.0), - "ident": "user-identifier", - "request": "/apache_pb.gif", - }, - m.Fields()) - assert.Equal(t, map[string]string{"verb": "GET", "resp_code": "200"}, m.Tags()) - assert.Equal(t, "my_web_log", m.Name()) - - m, err = p.ParseLine(`::1 user-identifier frank [10/Oct/2000:13:55:36 -0700] "GET /apache_pb.gif HTTP/1.0" 200 2326`) - require.NotNil(t, m) - assert.NoError(t, err) - assert.Equal(t, - map[string]interface{}{ - "resp_bytes": int64(2326), - "auth": "frank", - "client_ip": "::1", - "http_version": float64(1.0), - "ident": "user-identifier", - "request": "/apache_pb.gif", - }, - m.Fields()) - assert.Equal(t, map[string]string{"verb": "GET", "resp_code": "200"}, m.Tags()) - assert.Equal(t, "my_web_log", m.Name()) -} - -func TestCustomInfluxdbHttpd(t *testing.T) { - p := &Parser{ - Patterns: []string{`\[httpd\] %{COMBINED_LOG_FORMAT} %{UUID:uuid:drop} %{NUMBER:response_time_us:int}`}, - } - assert.NoError(t, p.Compile()) - - // Parse an influxdb POST request - m, err := p.ParseLine(`[httpd] ::1 - - [14/Jun/2016:11:33:29 +0100] "POST /write?consistency=any&db=telegraf&precision=ns&rp= HTTP/1.1" 204 0 "-" "InfluxDBClient" 6f61bc44-321b-11e6-8050-000000000000 2513`) - require.NotNil(t, m) - assert.NoError(t, err) - assert.Equal(t, - map[string]interface{}{ - "resp_bytes": int64(0), - "auth": "-", - "client_ip": "::1", - "http_version": float64(1.1), - "ident": "-", - "referrer": "-", - "request": "/write?consistency=any&db=telegraf&precision=ns&rp=", - "response_time_us": int64(2513), - "agent": "InfluxDBClient", - }, - m.Fields()) - assert.Equal(t, map[string]string{"verb": "POST", "resp_code": "204"}, m.Tags()) - - // Parse an influxdb GET request - m, err = p.ParseLine(`[httpd] ::1 - - [14/Jun/2016:12:10:02 +0100] "GET /query?db=telegraf&q=SELECT+bytes%2Cresponse_time_us+FROM+logparser_grok+WHERE+http_method+%3D+%27GET%27+AND+response_time_us+%3E+0+AND+time+%3E+now%28%29+-+1h HTTP/1.1" 200 578 "http://localhost:8083/" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.84 Safari/537.36" 8a3806f1-3220-11e6-8006-000000000000 988`) - require.NotNil(t, m) - assert.NoError(t, err) - assert.Equal(t, - map[string]interface{}{ - "resp_bytes": int64(578), - "auth": "-", - "client_ip": "::1", - "http_version": float64(1.1), - "ident": "-", - "referrer": "http://localhost:8083/", - "request": "/query?db=telegraf&q=SELECT+bytes%2Cresponse_time_us+FROM+logparser_grok+WHERE+http_method+%3D+%27GET%27+AND+response_time_us+%3E+0+AND+time+%3E+now%28%29+-+1h", - "response_time_us": int64(988), - "agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.84 Safari/537.36", - }, - m.Fields()) - assert.Equal(t, map[string]string{"verb": "GET", "resp_code": "200"}, m.Tags()) -} - -// common log format -// 127.0.0.1 user-identifier frank [10/Oct/2000:13:55:36 -0700] "GET /apache_pb.gif HTTP/1.0" 200 2326 -func TestBuiltinCommonLogFormat(t *testing.T) { - p := &Parser{ - Patterns: []string{"%{COMMON_LOG_FORMAT}"}, - } - assert.NoError(t, p.Compile()) - - // Parse an influxdb POST request - m, err := p.ParseLine(`127.0.0.1 user-identifier frank [10/Oct/2000:13:55:36 -0700] "GET /apache_pb.gif HTTP/1.0" 200 2326`) - require.NotNil(t, m) - assert.NoError(t, err) - assert.Equal(t, - map[string]interface{}{ - "resp_bytes": int64(2326), - "auth": "frank", - "client_ip": "127.0.0.1", - "http_version": float64(1.0), - "ident": "user-identifier", - "request": "/apache_pb.gif", - }, - m.Fields()) - assert.Equal(t, map[string]string{"verb": "GET", "resp_code": "200"}, m.Tags()) -} - -// common log format -// 127.0.0.1 user1234 frank1234 [10/Oct/2000:13:55:36 -0700] "GET /apache_pb.gif HTTP/1.0" 200 2326 -func TestBuiltinCommonLogFormatWithNumbers(t *testing.T) { - p := &Parser{ - Patterns: []string{"%{COMMON_LOG_FORMAT}"}, - } - assert.NoError(t, p.Compile()) - - // Parse an influxdb POST request - m, err := p.ParseLine(`127.0.0.1 user1234 frank1234 [10/Oct/2000:13:55:36 -0700] "GET /apache_pb.gif HTTP/1.0" 200 2326`) - require.NotNil(t, m) - assert.NoError(t, err) - assert.Equal(t, - map[string]interface{}{ - "resp_bytes": int64(2326), - "auth": "frank1234", - "client_ip": "127.0.0.1", - "http_version": float64(1.0), - "ident": "user1234", - "request": "/apache_pb.gif", - }, - m.Fields()) - assert.Equal(t, map[string]string{"verb": "GET", "resp_code": "200"}, m.Tags()) -} - -// combined log format -// 127.0.0.1 user-identifier frank [10/Oct/2000:13:55:36 -0700] "GET /apache_pb.gif HTTP/1.0" 200 2326 "-" "Mozilla" -func TestBuiltinCombinedLogFormat(t *testing.T) { - p := &Parser{ - Patterns: []string{"%{COMBINED_LOG_FORMAT}"}, - } - assert.NoError(t, p.Compile()) - - // Parse an influxdb POST request - m, err := p.ParseLine(`127.0.0.1 user-identifier frank [10/Oct/2000:13:55:36 -0700] "GET /apache_pb.gif HTTP/1.0" 200 2326 "-" "Mozilla"`) - require.NotNil(t, m) - assert.NoError(t, err) - assert.Equal(t, - map[string]interface{}{ - "resp_bytes": int64(2326), - "auth": "frank", - "client_ip": "127.0.0.1", - "http_version": float64(1.0), - "ident": "user-identifier", - "request": "/apache_pb.gif", - "referrer": "-", - "agent": "Mozilla", - }, - m.Fields()) - assert.Equal(t, map[string]string{"verb": "GET", "resp_code": "200"}, m.Tags()) -} - -func TestCompileStringAndParse(t *testing.T) { - p := &Parser{ - Patterns: []string{"%{TEST_LOG_A}"}, - CustomPatterns: ` - DURATION %{NUMBER}[nuµm]?s - RESPONSE_CODE %{NUMBER:response_code:tag} - RESPONSE_TIME %{DURATION:response_time:duration} - TEST_LOG_A %{NUMBER:myfloat:float} %{RESPONSE_CODE} %{IPORHOST:clientip} %{RESPONSE_TIME} - `, - } - assert.NoError(t, p.Compile()) - - metricA, err := p.ParseLine(`1.25 200 192.168.1.1 5.432µs`) - require.NotNil(t, metricA) - assert.NoError(t, err) - assert.Equal(t, - map[string]interface{}{ - "clientip": "192.168.1.1", - "myfloat": float64(1.25), - "response_time": int64(5432), - }, - metricA.Fields()) - assert.Equal(t, map[string]string{"response_code": "200"}, metricA.Tags()) -} - -func TestCompileErrorsOnInvalidPattern(t *testing.T) { - p := &Parser{ - Patterns: []string{"%{TEST_LOG_A}", "%{TEST_LOG_B}"}, - CustomPatterns: ` - DURATION %{NUMBER}[nuµm]?s - RESPONSE_CODE %{NUMBER:response_code:tag} - RESPONSE_TIME %{DURATION:response_time:duration} - TEST_LOG_A %{NUMBER:myfloat:float} %{RESPONSE_CODE} %{IPORHOST:clientip} %{RESPONSE_TIME} - `, - } - assert.Error(t, p.Compile()) - - metricA, _ := p.ParseLine(`1.25 200 192.168.1.1 5.432µs`) - require.Nil(t, metricA) -} - -func TestParsePatternsWithoutCustom(t *testing.T) { - p := &Parser{ - Patterns: []string{"%{POSINT:ts:ts-epochnano} response_time=%{POSINT:response_time:int} mymetric=%{NUMBER:metric:float}"}, - } - assert.NoError(t, p.Compile()) - - metricA, err := p.ParseLine(`1466004605359052000 response_time=20821 mymetric=10890.645`) - require.NotNil(t, metricA) - assert.NoError(t, err) - assert.Equal(t, - map[string]interface{}{ - "response_time": int64(20821), - "metric": float64(10890.645), - }, - metricA.Fields()) - assert.Equal(t, map[string]string{}, metricA.Tags()) - assert.Equal(t, time.Unix(0, 1466004605359052000), metricA.Time()) -} - -func TestParseEpochNano(t *testing.T) { - p := &Parser{ - Patterns: []string{"%{MYAPP}"}, - CustomPatterns: ` - MYAPP %{POSINT:ts:ts-epochnano} response_time=%{POSINT:response_time:int} mymetric=%{NUMBER:metric:float} - `, - } - assert.NoError(t, p.Compile()) - - metricA, err := p.ParseLine(`1466004605359052000 response_time=20821 mymetric=10890.645`) - require.NotNil(t, metricA) - assert.NoError(t, err) - assert.Equal(t, - map[string]interface{}{ - "response_time": int64(20821), - "metric": float64(10890.645), - }, - metricA.Fields()) - assert.Equal(t, map[string]string{}, metricA.Tags()) - assert.Equal(t, time.Unix(0, 1466004605359052000), metricA.Time()) -} - -func TestParseEpoch(t *testing.T) { - p := &Parser{ - Patterns: []string{"%{MYAPP}"}, - CustomPatterns: ` - MYAPP %{POSINT:ts:ts-epoch} response_time=%{POSINT:response_time:int} mymetric=%{NUMBER:metric:float} - `, - } - assert.NoError(t, p.Compile()) - - metricA, err := p.ParseLine(`1466004605 response_time=20821 mymetric=10890.645`) - require.NotNil(t, metricA) - assert.NoError(t, err) - assert.Equal(t, - map[string]interface{}{ - "response_time": int64(20821), - "metric": float64(10890.645), - }, - metricA.Fields()) - assert.Equal(t, map[string]string{}, metricA.Tags()) - assert.Equal(t, time.Unix(1466004605, 0), metricA.Time()) -} - -func TestParseEpochDecimal(t *testing.T) { - var tests = []struct { - name string - line string - noMatch bool - err error - tags map[string]string - fields map[string]interface{} - time time.Time - }{ - { - name: "ns precision", - line: "1466004605.359052000 value=42", - tags: map[string]string{}, - fields: map[string]interface{}{ - "value": int64(42), - }, - time: time.Unix(0, 1466004605359052000), - }, - { - name: "ms precision", - line: "1466004605.359 value=42", - tags: map[string]string{}, - fields: map[string]interface{}{ - "value": int64(42), - }, - time: time.Unix(0, 1466004605359000000), - }, - { - name: "second precision", - line: "1466004605 value=42", - tags: map[string]string{}, - fields: map[string]interface{}{ - "value": int64(42), - }, - time: time.Unix(0, 1466004605000000000), - }, - { - name: "sub ns precision", - line: "1466004605.123456789123 value=42", - tags: map[string]string{}, - fields: map[string]interface{}{ - "value": int64(42), - }, - time: time.Unix(0, 1466004605123456789), - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - parser := &Parser{ - Patterns: []string{"%{NUMBER:ts:ts-epoch} value=%{NUMBER:value:int}"}, - } - assert.NoError(t, parser.Compile()) - m, err := parser.ParseLine(tt.line) - - if tt.noMatch { - require.Nil(t, m) - require.Nil(t, err) - return - } - - require.Equal(t, tt.err, err) - - require.NotNil(t, m) - require.Equal(t, tt.tags, m.Tags()) - require.Equal(t, tt.fields, m.Fields()) - require.Equal(t, tt.time, m.Time()) - }) - } -} - -func TestParseEpochErrors(t *testing.T) { - p := &Parser{ - Patterns: []string{"%{MYAPP}"}, - CustomPatterns: ` - MYAPP %{WORD:ts:ts-epoch} response_time=%{POSINT:response_time:int} mymetric=%{NUMBER:metric:float} - `, - } - assert.NoError(t, p.Compile()) - - _, err := p.ParseLine(`foobar response_time=20821 mymetric=10890.645`) - assert.NoError(t, err) - - p = &Parser{ - Patterns: []string{"%{MYAPP}"}, - CustomPatterns: ` - MYAPP %{WORD:ts:ts-epochnano} response_time=%{POSINT:response_time:int} mymetric=%{NUMBER:metric:float} - `, - } - assert.NoError(t, p.Compile()) - - _, err = p.ParseLine(`foobar response_time=20821 mymetric=10890.645`) - assert.NoError(t, err) -} - -func TestParseGenericTimestamp(t *testing.T) { - p := &Parser{ - Patterns: []string{`\[%{HTTPDATE:ts:ts}\] response_time=%{POSINT:response_time:int} mymetric=%{NUMBER:metric:float}`}, - } - assert.NoError(t, p.Compile()) - - metricA, err := p.ParseLine(`[09/Jun/2016:03:37:03 +0000] response_time=20821 mymetric=10890.645`) - require.NotNil(t, metricA) - assert.NoError(t, err) - assert.Equal(t, - map[string]interface{}{ - "response_time": int64(20821), - "metric": float64(10890.645), - }, - metricA.Fields()) - assert.Equal(t, map[string]string{}, metricA.Tags()) - assert.Equal(t, time.Unix(1465443423, 0).UTC(), metricA.Time().UTC()) - - metricB, err := p.ParseLine(`[09/Jun/2016:03:37:04 +0000] response_time=20821 mymetric=10890.645`) - require.NotNil(t, metricB) - assert.NoError(t, err) - assert.Equal(t, - map[string]interface{}{ - "response_time": int64(20821), - "metric": float64(10890.645), - }, - metricB.Fields()) - assert.Equal(t, map[string]string{}, metricB.Tags()) - assert.Equal(t, time.Unix(1465443424, 0).UTC(), metricB.Time().UTC()) -} - -func TestParseGenericTimestampNotFound(t *testing.T) { - p := &Parser{ - Patterns: []string{`\[%{NOTSPACE:ts:ts}\] response_time=%{POSINT:response_time:int} mymetric=%{NUMBER:metric:float}`}, - } - assert.NoError(t, p.Compile()) - - metricA, err := p.ParseLine(`[foobar] response_time=20821 mymetric=10890.645`) - require.NotNil(t, metricA) - assert.NoError(t, err) - assert.Equal(t, - map[string]interface{}{ - "response_time": int64(20821), - "metric": float64(10890.645), - }, - metricA.Fields()) - assert.Equal(t, map[string]string{}, metricA.Tags()) -} - -func TestCompileFileAndParse(t *testing.T) { - p := &Parser{ - Patterns: []string{"%{TEST_LOG_A}", "%{TEST_LOG_B}"}, - CustomPatternFiles: []string{"./testdata/test-patterns"}, - } - assert.NoError(t, p.Compile()) - - metricA, err := p.ParseLine(`[04/Jun/2016:12:41:45 +0100] 1.25 200 192.168.1.1 5.432µs 101`) - require.NotNil(t, metricA) - assert.NoError(t, err) - assert.Equal(t, - map[string]interface{}{ - "clientip": "192.168.1.1", - "myfloat": float64(1.25), - "response_time": int64(5432), - "myint": int64(101), - }, - metricA.Fields()) - assert.Equal(t, map[string]string{"response_code": "200"}, metricA.Tags()) - assert.Equal(t, - time.Date(2016, time.June, 4, 12, 41, 45, 0, time.FixedZone("foo", 60*60)).Nanosecond(), - metricA.Time().Nanosecond()) - - metricB, err := p.ParseLine(`[04/06/2016--12:41:45] 1.25 mystring dropme nomodifier`) - require.NotNil(t, metricB) - assert.NoError(t, err) - assert.Equal(t, - map[string]interface{}{ - "myfloat": 1.25, - "mystring": "mystring", - "nomodifier": "nomodifier", - }, - metricB.Fields()) - assert.Equal(t, map[string]string{}, metricB.Tags()) - assert.Equal(t, - time.Date(2016, time.June, 4, 12, 41, 45, 0, time.FixedZone("foo", 60*60)).Nanosecond(), - metricB.Time().Nanosecond()) -} - -func TestCompileNoModifiersAndParse(t *testing.T) { - p := &Parser{ - Patterns: []string{"%{TEST_LOG_C}"}, - CustomPatterns: ` - DURATION %{NUMBER}[nuµm]?s - TEST_LOG_C %{NUMBER:myfloat} %{NUMBER} %{IPORHOST:clientip} %{DURATION:rt} - `, - } - assert.NoError(t, p.Compile()) - - metricA, err := p.ParseLine(`1.25 200 192.168.1.1 5.432µs`) - require.NotNil(t, metricA) - assert.NoError(t, err) - assert.Equal(t, - map[string]interface{}{ - "clientip": "192.168.1.1", - "myfloat": "1.25", - "rt": "5.432µs", - }, - metricA.Fields()) - assert.Equal(t, map[string]string{}, metricA.Tags()) -} - -func TestCompileNoNamesAndParse(t *testing.T) { - p := &Parser{ - Patterns: []string{"%{TEST_LOG_C}"}, - CustomPatterns: ` - DURATION %{NUMBER}[nuµm]?s - TEST_LOG_C %{NUMBER} %{NUMBER} %{IPORHOST} %{DURATION} - `, - } - assert.NoError(t, p.Compile()) - - metricA, err := p.ParseLine(`1.25 200 192.168.1.1 5.432µs`) - require.Nil(t, metricA) - assert.NoError(t, err) -} - -func TestParseNoMatch(t *testing.T) { - p := &Parser{ - Patterns: []string{"%{TEST_LOG_A}", "%{TEST_LOG_B}"}, - CustomPatternFiles: []string{"./testdata/test-patterns"}, - } - assert.NoError(t, p.Compile()) - - metricA, err := p.ParseLine(`[04/Jun/2016:12:41:45 +0100] notnumber 200 192.168.1.1 5.432µs 101`) - assert.NoError(t, err) - assert.Nil(t, metricA) -} - -func TestCompileErrors(t *testing.T) { - // Compile fails because there are multiple timestamps: - p := &Parser{ - Patterns: []string{"%{TEST_LOG_A}", "%{TEST_LOG_B}"}, - CustomPatterns: ` - TEST_LOG_A %{HTTPDATE:ts1:ts-httpd} %{HTTPDATE:ts2:ts-httpd} %{NUMBER:mynum:int} - `, - } - assert.Error(t, p.Compile()) - - // Compile fails because file doesn't exist: - p = &Parser{ - Patterns: []string{"%{TEST_LOG_A}", "%{TEST_LOG_B}"}, - CustomPatternFiles: []string{"/tmp/foo/bar/baz"}, - } - assert.Error(t, p.Compile()) -} - -func TestParseErrors(t *testing.T) { - // Parse fails because the pattern doesn't exist - p := &Parser{ - Patterns: []string{"%{TEST_LOG_B}"}, - CustomPatterns: ` - TEST_LOG_A %{HTTPDATE:ts:ts-httpd} %{WORD:myword:int} %{} - `, - } - assert.Error(t, p.Compile()) - _, err := p.ParseLine(`[04/Jun/2016:12:41:45 +0100] notnumber 200 192.168.1.1 5.432µs 101`) - assert.Error(t, err) - - // Parse fails because myword is not an int - p = &Parser{ - Patterns: []string{"%{TEST_LOG_A}"}, - CustomPatterns: ` - TEST_LOG_A %{HTTPDATE:ts:ts-httpd} %{WORD:myword:int} - `, - } - assert.NoError(t, p.Compile()) - _, err = p.ParseLine(`04/Jun/2016:12:41:45 +0100 notnumber`) - assert.Error(t, err) - - // Parse fails because myword is not a float - p = &Parser{ - Patterns: []string{"%{TEST_LOG_A}"}, - CustomPatterns: ` - TEST_LOG_A %{HTTPDATE:ts:ts-httpd} %{WORD:myword:float} - `, - } - assert.NoError(t, p.Compile()) - _, err = p.ParseLine(`04/Jun/2016:12:41:45 +0100 notnumber`) - assert.Error(t, err) - - // Parse fails because myword is not a duration - p = &Parser{ - Patterns: []string{"%{TEST_LOG_A}"}, - CustomPatterns: ` - TEST_LOG_A %{HTTPDATE:ts:ts-httpd} %{WORD:myword:duration} - `, - } - assert.NoError(t, p.Compile()) - _, err = p.ParseLine(`04/Jun/2016:12:41:45 +0100 notnumber`) - assert.Error(t, err) - - // Parse fails because the time layout is wrong. - p = &Parser{ - Patterns: []string{"%{TEST_LOG_A}"}, - CustomPatterns: ` - TEST_LOG_A %{HTTPDATE:ts:ts-unix} %{WORD:myword:duration} - `, - } - assert.NoError(t, p.Compile()) - _, err = p.ParseLine(`04/Jun/2016:12:41:45 +0100 notnumber`) - assert.Error(t, err) -} - -func TestTsModder(t *testing.T) { - tsm := &tsModder{} - - reftime := time.Date(2006, time.December, 1, 1, 1, 1, int(time.Millisecond), time.UTC) - modt := tsm.tsMod(reftime) - assert.Equal(t, reftime, modt) - modt = tsm.tsMod(reftime) - assert.Equal(t, reftime.Add(time.Microsecond*1), modt) - modt = tsm.tsMod(reftime) - assert.Equal(t, reftime.Add(time.Microsecond*2), modt) - modt = tsm.tsMod(reftime) - assert.Equal(t, reftime.Add(time.Microsecond*3), modt) - - reftime = time.Date(2006, time.December, 1, 1, 1, 1, int(time.Microsecond), time.UTC) - modt = tsm.tsMod(reftime) - assert.Equal(t, reftime, modt) - modt = tsm.tsMod(reftime) - assert.Equal(t, reftime.Add(time.Nanosecond*1), modt) - modt = tsm.tsMod(reftime) - assert.Equal(t, reftime.Add(time.Nanosecond*2), modt) - modt = tsm.tsMod(reftime) - assert.Equal(t, reftime.Add(time.Nanosecond*3), modt) - - reftime = time.Date(2006, time.December, 1, 1, 1, 1, int(time.Microsecond)*999, time.UTC) - modt = tsm.tsMod(reftime) - assert.Equal(t, reftime, modt) - modt = tsm.tsMod(reftime) - assert.Equal(t, reftime.Add(time.Nanosecond*1), modt) - modt = tsm.tsMod(reftime) - assert.Equal(t, reftime.Add(time.Nanosecond*2), modt) - modt = tsm.tsMod(reftime) - assert.Equal(t, reftime.Add(time.Nanosecond*3), modt) - - reftime = time.Date(2006, time.December, 1, 1, 1, 1, 0, time.UTC) - modt = tsm.tsMod(reftime) - assert.Equal(t, reftime, modt) - modt = tsm.tsMod(reftime) - assert.Equal(t, reftime.Add(time.Millisecond*1), modt) - modt = tsm.tsMod(reftime) - assert.Equal(t, reftime.Add(time.Millisecond*2), modt) - modt = tsm.tsMod(reftime) - assert.Equal(t, reftime.Add(time.Millisecond*3), modt) - - reftime = time.Time{} - modt = tsm.tsMod(reftime) - assert.Equal(t, reftime, modt) -} - -func TestTsModder_Rollover(t *testing.T) { - tsm := &tsModder{} - - reftime := time.Date(2006, time.December, 1, 1, 1, 1, int(time.Millisecond), time.UTC) - modt := tsm.tsMod(reftime) - for i := 1; i < 1000; i++ { - modt = tsm.tsMod(reftime) - } - assert.Equal(t, reftime.Add(time.Microsecond*999+time.Nanosecond), modt) - - reftime = time.Date(2006, time.December, 1, 1, 1, 1, int(time.Microsecond), time.UTC) - modt = tsm.tsMod(reftime) - for i := 1; i < 1001; i++ { - modt = tsm.tsMod(reftime) - } - assert.Equal(t, reftime.Add(time.Nanosecond*1000), modt) -} - -func TestShortPatternRegression(t *testing.T) { - p := &Parser{ - Patterns: []string{"%{TS_UNIX:timestamp:ts-unix} %{NUMBER:value:int}"}, - CustomPatterns: ` - TS_UNIX %{DAY} %{MONTH} %{MONTHDAY} %{HOUR}:%{MINUTE}:%{SECOND} %{TZ} %{YEAR} - `, - } - require.NoError(t, p.Compile()) - - metric, err := p.ParseLine(`Wed Apr 12 13:10:34 PST 2017 42`) - require.NoError(t, err) - require.NotNil(t, metric) - - require.Equal(t, - map[string]interface{}{ - "value": int64(42), - }, - metric.Fields()) -} - -func TestTimezoneEmptyCompileFileAndParse(t *testing.T) { - p := &Parser{ - Patterns: []string{"%{TEST_LOG_A}", "%{TEST_LOG_B}"}, - CustomPatternFiles: []string{"./testdata/test-patterns"}, - Timezone: "", - } - assert.NoError(t, p.Compile()) - - metricA, err := p.ParseLine(`[04/Jun/2016:12:41:45 +0100] 1.25 200 192.168.1.1 5.432µs 101`) - require.NotNil(t, metricA) - assert.NoError(t, err) - assert.Equal(t, - map[string]interface{}{ - "clientip": "192.168.1.1", - "myfloat": float64(1.25), - "response_time": int64(5432), - "myint": int64(101), - }, - metricA.Fields()) - assert.Equal(t, map[string]string{"response_code": "200"}, metricA.Tags()) - assert.Equal(t, int64(1465040505000000000), metricA.Time().UnixNano()) - - metricB, err := p.ParseLine(`[04/06/2016--12:41:45] 1.25 mystring dropme nomodifier`) - require.NotNil(t, metricB) - assert.NoError(t, err) - assert.Equal(t, - map[string]interface{}{ - "myfloat": 1.25, - "mystring": "mystring", - "nomodifier": "nomodifier", - }, - metricB.Fields()) - assert.Equal(t, map[string]string{}, metricB.Tags()) - assert.Equal(t, int64(1465044105000000000), metricB.Time().UnixNano()) -} - -func TestTimezoneMalformedCompileFileAndParse(t *testing.T) { - p := &Parser{ - Patterns: []string{"%{TEST_LOG_A}", "%{TEST_LOG_B}"}, - CustomPatternFiles: []string{"./testdata/test-patterns"}, - Timezone: "Something/Weird", - } - assert.NoError(t, p.Compile()) - - metricA, err := p.ParseLine(`[04/Jun/2016:12:41:45 +0100] 1.25 200 192.168.1.1 5.432µs 101`) - require.NotNil(t, metricA) - assert.NoError(t, err) - assert.Equal(t, - map[string]interface{}{ - "clientip": "192.168.1.1", - "myfloat": float64(1.25), - "response_time": int64(5432), - "myint": int64(101), - }, - metricA.Fields()) - assert.Equal(t, map[string]string{"response_code": "200"}, metricA.Tags()) - assert.Equal(t, int64(1465040505000000000), metricA.Time().UnixNano()) - - metricB, err := p.ParseLine(`[04/06/2016--12:41:45] 1.25 mystring dropme nomodifier`) - require.NotNil(t, metricB) - assert.NoError(t, err) - assert.Equal(t, - map[string]interface{}{ - "myfloat": 1.25, - "mystring": "mystring", - "nomodifier": "nomodifier", - }, - metricB.Fields()) - assert.Equal(t, map[string]string{}, metricB.Tags()) - assert.Equal(t, int64(1465044105000000000), metricB.Time().UnixNano()) -} - -func TestTimezoneEuropeCompileFileAndParse(t *testing.T) { - p := &Parser{ - Patterns: []string{"%{TEST_LOG_A}", "%{TEST_LOG_B}"}, - CustomPatternFiles: []string{"./testdata/test-patterns"}, - Timezone: "Europe/Berlin", - } - assert.NoError(t, p.Compile()) - - metricA, err := p.ParseLine(`[04/Jun/2016:12:41:45 +0100] 1.25 200 192.168.1.1 5.432µs 101`) - require.NotNil(t, metricA) - assert.NoError(t, err) - assert.Equal(t, - map[string]interface{}{ - "clientip": "192.168.1.1", - "myfloat": float64(1.25), - "response_time": int64(5432), - "myint": int64(101), - }, - metricA.Fields()) - assert.Equal(t, map[string]string{"response_code": "200"}, metricA.Tags()) - assert.Equal(t, int64(1465040505000000000), metricA.Time().UnixNano()) - - metricB, err := p.ParseLine(`[04/06/2016--12:41:45] 1.25 mystring dropme nomodifier`) - require.NotNil(t, metricB) - assert.NoError(t, err) - assert.Equal(t, - map[string]interface{}{ - "myfloat": 1.25, - "mystring": "mystring", - "nomodifier": "nomodifier", - }, - metricB.Fields()) - assert.Equal(t, map[string]string{}, metricB.Tags()) - assert.Equal(t, int64(1465036905000000000), metricB.Time().UnixNano()) -} - -func TestTimezoneAmericasCompileFileAndParse(t *testing.T) { - p := &Parser{ - Patterns: []string{"%{TEST_LOG_A}", "%{TEST_LOG_B}"}, - CustomPatternFiles: []string{"./testdata/test-patterns"}, - Timezone: "Canada/Eastern", - } - assert.NoError(t, p.Compile()) - - metricA, err := p.ParseLine(`[04/Jun/2016:12:41:45 +0100] 1.25 200 192.168.1.1 5.432µs 101`) - require.NotNil(t, metricA) - assert.NoError(t, err) - assert.Equal(t, - map[string]interface{}{ - "clientip": "192.168.1.1", - "myfloat": float64(1.25), - "response_time": int64(5432), - "myint": int64(101), - }, - metricA.Fields()) - assert.Equal(t, map[string]string{"response_code": "200"}, metricA.Tags()) - assert.Equal(t, int64(1465040505000000000), metricA.Time().UnixNano()) - - metricB, err := p.ParseLine(`[04/06/2016--12:41:45] 1.25 mystring dropme nomodifier`) - require.NotNil(t, metricB) - assert.NoError(t, err) - assert.Equal(t, - map[string]interface{}{ - "myfloat": 1.25, - "mystring": "mystring", - "nomodifier": "nomodifier", - }, - metricB.Fields()) - assert.Equal(t, map[string]string{}, metricB.Tags()) - assert.Equal(t, int64(1465058505000000000), metricB.Time().UnixNano()) -} - -func TestTimezoneLocalCompileFileAndParse(t *testing.T) { - p := &Parser{ - Patterns: []string{"%{TEST_LOG_A}", "%{TEST_LOG_B}"}, - CustomPatternFiles: []string{"./testdata/test-patterns"}, - Timezone: "Local", - } - assert.NoError(t, p.Compile()) - - metricA, err := p.ParseLine(`[04/Jun/2016:12:41:45 +0100] 1.25 200 192.168.1.1 5.432µs 101`) - require.NotNil(t, metricA) - assert.NoError(t, err) - assert.Equal(t, - map[string]interface{}{ - "clientip": "192.168.1.1", - "myfloat": float64(1.25), - "response_time": int64(5432), - "myint": int64(101), - }, - metricA.Fields()) - assert.Equal(t, map[string]string{"response_code": "200"}, metricA.Tags()) - assert.Equal(t, int64(1465040505000000000), metricA.Time().UnixNano()) - - metricB, err := p.ParseLine(`[04/06/2016--12:41:45] 1.25 mystring dropme nomodifier`) - require.NotNil(t, metricB) - assert.NoError(t, err) - assert.Equal(t, - map[string]interface{}{ - "myfloat": 1.25, - "mystring": "mystring", - "nomodifier": "nomodifier", - }, - metricB.Fields()) - assert.Equal(t, map[string]string{}, metricB.Tags()) - assert.Equal(t, time.Date(2016, time.June, 4, 12, 41, 45, 0, time.Local).UnixNano(), metricB.Time().UnixNano()) -} - -func TestNewlineInPatterns(t *testing.T) { - p := &Parser{ - Patterns: []string{` - %{SYSLOGTIMESTAMP:timestamp} - `}, - } - require.NoError(t, p.Compile()) - m, err := p.ParseLine("Apr 10 05:11:57") - require.NoError(t, err) - require.NotNil(t, m) -} - -func TestSyslogTimestampParser(t *testing.T) { - p := &Parser{ - Patterns: []string{`%{SYSLOGTIMESTAMP:timestamp:ts-syslog} value=%{NUMBER:value:int}`}, - timeFunc: func() time.Time { return time.Date(2018, time.April, 1, 0, 0, 0, 0, nil) }, - } - require.NoError(t, p.Compile()) - m, err := p.ParseLine("Sep 25 09:01:55 value=42") - require.NoError(t, err) - require.NotNil(t, m) - require.Equal(t, 2018, m.Time().Year()) -} - -func TestReplaceTimestampComma(t *testing.T) { - - p := &Parser{ - Patterns: []string{`%{TIMESTAMP_ISO8601:timestamp:ts-"2006-01-02 15:04:05.000"} successfulMatches=%{NUMBER:value:int}`}, - } - - require.NoError(t, p.Compile()) - m, err := p.ParseLine("2018-02-21 13:10:34,555 successfulMatches=1") - require.NoError(t, err) - require.NotNil(t, m) - - require.Equal(t, 2018, m.Time().Year()) - require.Equal(t, 13, m.Time().Hour()) - require.Equal(t, 34, m.Time().Second()) - //Convert Nanosecond to milisecond for compare - require.Equal(t, 555, m.Time().Nanosecond()/1000000) -} diff --git a/plugins/inputs/logparser/grok/influx_patterns.go b/plugins/inputs/logparser/grok/influx_patterns.go deleted file mode 100644 index 6dc990622a305..0000000000000 --- a/plugins/inputs/logparser/grok/influx_patterns.go +++ /dev/null @@ -1,78 +0,0 @@ -package grok - -// DEFAULT_PATTERNS SHOULD BE KEPT IN-SYNC WITH patterns/influx-patterns -const DEFAULT_PATTERNS = ` -# Captures are a slightly modified version of logstash "grok" patterns, with -# the format %{[:][:]} -# By default all named captures are converted into string fields. -# Modifiers can be used to convert captures to other types or tags. -# Timestamp modifiers can be used to convert captures to the timestamp of the -# parsed metric. - -# View logstash grok pattern docs here: -# https://www.elastic.co/guide/en/logstash/current/plugins-filters-grok.html -# All default logstash patterns are supported, these can be viewed here: -# https://github.com/logstash-plugins/logstash-patterns-core/blob/master/patterns/grok-patterns - -# Available modifiers: -# string (default if nothing is specified) -# int -# float -# duration (ie, 5.23ms gets converted to int nanoseconds) -# tag (converts the field into a tag) -# drop (drops the field completely) -# Timestamp modifiers: -# ts-ansic ("Mon Jan _2 15:04:05 2006") -# ts-unix ("Mon Jan _2 15:04:05 MST 2006") -# ts-ruby ("Mon Jan 02 15:04:05 -0700 2006") -# ts-rfc822 ("02 Jan 06 15:04 MST") -# ts-rfc822z ("02 Jan 06 15:04 -0700") -# ts-rfc850 ("Monday, 02-Jan-06 15:04:05 MST") -# ts-rfc1123 ("Mon, 02 Jan 2006 15:04:05 MST") -# ts-rfc1123z ("Mon, 02 Jan 2006 15:04:05 -0700") -# ts-rfc3339 ("2006-01-02T15:04:05Z07:00") -# ts-rfc3339nano ("2006-01-02T15:04:05.999999999Z07:00") -# ts-httpd ("02/Jan/2006:15:04:05 -0700") -# ts-epoch (seconds since unix epoch) -# ts-epochnano (nanoseconds since unix epoch) -# ts-"CUSTOM" -# CUSTOM time layouts must be within quotes and be the representation of the -# "reference time", which is Mon Jan 2 15:04:05 -0700 MST 2006 -# See https://golang.org/pkg/time/#Parse for more details. - -# Example log file pattern, example log looks like this: -# [04/Jun/2016:12:41:45 +0100] 1.25 200 192.168.1.1 5.432µs -# Breakdown of the DURATION pattern below: -# NUMBER is a builtin logstash grok pattern matching float & int numbers. -# [nuµm]? is a regex specifying 0 or 1 of the characters within brackets. -# s is also regex, this pattern must end in "s". -# so DURATION will match something like '5.324ms' or '6.1µs' or '10s' -DURATION %{NUMBER}[nuµm]?s -RESPONSE_CODE %{NUMBER:response_code:tag} -RESPONSE_TIME %{DURATION:response_time_ns:duration} -EXAMPLE_LOG \[%{HTTPDATE:ts:ts-httpd}\] %{NUMBER:myfloat:float} %{RESPONSE_CODE} %{IPORHOST:clientip} %{RESPONSE_TIME} - -# Wider-ranging username matching vs. logstash built-in %{USER} -NGUSERNAME [a-zA-Z0-9\.\@\-\+_%]+ -NGUSER %{NGUSERNAME} -# Wider-ranging client IP matching -CLIENT (?:%{IPV6}|%{IPV4}|%{HOSTNAME}|%{HOSTPORT}) - -## -## COMMON LOG PATTERNS -## - -# apache & nginx logs, this is also known as the "common log format" -# see https://en.wikipedia.org/wiki/Common_Log_Format -COMMON_LOG_FORMAT %{CLIENT:client_ip} %{NOTSPACE:ident} %{NOTSPACE:auth} \[%{HTTPDATE:ts:ts-httpd}\] "(?:%{WORD:verb:tag} %{NOTSPACE:request}(?: HTTP/%{NUMBER:http_version:float})?|%{DATA})" %{NUMBER:resp_code:tag} (?:%{NUMBER:resp_bytes:int}|-) - -# Combined log format is the same as the common log format but with the addition -# of two quoted strings at the end for "referrer" and "agent" -# See Examples at http://httpd.apache.org/docs/current/mod/mod_log_config.html -COMBINED_LOG_FORMAT %{COMMON_LOG_FORMAT} %{QS:referrer} %{QS:agent} - -# HTTPD log formats -HTTPD20_ERRORLOG \[%{HTTPDERROR_DATE:timestamp}\] \[%{LOGLEVEL:loglevel:tag}\] (?:\[client %{IPORHOST:clientip}\] ){0,1}%{GREEDYDATA:errormsg} -HTTPD24_ERRORLOG \[%{HTTPDERROR_DATE:timestamp}\] \[%{WORD:module}:%{LOGLEVEL:loglevel:tag}\] \[pid %{POSINT:pid:int}:tid %{NUMBER:tid:int}\]( \(%{POSINT:proxy_errorcode:int}\)%{DATA:proxy_errormessage}:)?( \[client %{IPORHOST:client}:%{POSINT:clientport}\])? %{DATA:errorcode}: %{GREEDYDATA:message} -HTTPD_ERRORLOG %{HTTPD20_ERRORLOG}|%{HTTPD24_ERRORLOG} -` diff --git a/plugins/parsers/registry.go b/plugins/parsers/registry.go index f45067ea757d4..5748fc7d358e6 100644 --- a/plugins/parsers/registry.go +++ b/plugins/parsers/registry.go @@ -156,8 +156,8 @@ func newGrokParser(metricName string, Timezone: tZone, } - parser.Compile() - return &parser, nil + err := parser.Compile() + return &parser, err } func NewJSONParser( From c6087abf095d2d134b47ff817a0c2985ae0ab1b8 Mon Sep 17 00:00:00 2001 From: Max U Date: Fri, 6 Jul 2018 14:52:37 -0700 Subject: [PATCH 28/51] add more unit tests to grok parser --- plugins/parsers/grok/parser_test.go | 948 +++++++++++++++++++- plugins/parsers/grok/testdata/.DS_Store | Bin 0 -> 6148 bytes plugins/parsers/grok/testdata/test-patterns | 14 + plugins/parsers/grok/testdata/test_a.log | 1 + plugins/parsers/grok/testdata/test_b.log | 1 + 5 files changed, 961 insertions(+), 3 deletions(-) create mode 100644 plugins/parsers/grok/testdata/.DS_Store create mode 100644 plugins/parsers/grok/testdata/test-patterns create mode 100644 plugins/parsers/grok/testdata/test_a.log create mode 100644 plugins/parsers/grok/testdata/test_b.log diff --git a/plugins/parsers/grok/parser_test.go b/plugins/parsers/grok/parser_test.go index 524e562bbcc26..b5810dc648a08 100644 --- a/plugins/parsers/grok/parser_test.go +++ b/plugins/parsers/grok/parser_test.go @@ -1,10 +1,11 @@ package grok import ( - "log" "testing" + "time" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" ) func TestGrokParse(t *testing.T) { @@ -13,7 +14,948 @@ func TestGrokParse(t *testing.T) { Patterns: []string{"%{COMMON_LOG_FORMAT}"}, } parser.Compile() - metrics, err := parser.Parse([]byte(`127.0.0.1 user-identifier frank [10/Oct/2000:13:55:36 -0700] "GET /apache_pb.gif HTTP/1.0" 200 2326`)) - log.Printf("metric_tags: %v, metric_fields: %v", metrics[0].Tags(), metrics[0].Fields()) + _, err := parser.Parse([]byte(`127.0.0.1 user-identifier frank [10/Oct/2000:13:55:36 -0700] "GET /apache_pb.gif HTTP/1.0" 200 2326`)) assert.NoError(t, err) } + +// Verify that patterns with a regex lookahead fail at compile time. +func TestParsePatternsWithLookahead(t *testing.T) { + p := &Parser{ + Patterns: []string{"%{MYLOG}"}, + CustomPatterns: ` + NOBOT ((?!bot|crawl).)* + MYLOG %{NUMBER:num:int} %{NOBOT:client} + `, + } + assert.NoError(t, p.Compile()) + + _, err := p.ParseLine(`1466004605359052000 bot`) + assert.Error(t, err) +} + +func TestMeasurementName(t *testing.T) { + p := &Parser{ + Patterns: []string{"%{COMMON_LOG_FORMAT}"}, + } + assert.NoError(t, p.Compile()) + + // Parse an influxdb POST request + m, err := p.ParseLine(`127.0.0.1 user-identifier frank [10/Oct/2000:13:55:36 -0700] "GET /apache_pb.gif HTTP/1.0" 200 2326`) + require.NotNil(t, m) + assert.NoError(t, err) + assert.Equal(t, + map[string]interface{}{ + "resp_bytes": int64(2326), + "auth": "frank", + "client_ip": "127.0.0.1", + "http_version": float64(1.0), + "ident": "user-identifier", + "request": "/apache_pb.gif", + }, + m.Fields()) + assert.Equal(t, map[string]string{"verb": "GET", "resp_code": "200"}, m.Tags()) +} + +func TestCLF_IPv6(t *testing.T) { + p := &Parser{ + Patterns: []string{"%{COMMON_LOG_FORMAT}"}, + } + assert.NoError(t, p.Compile()) + + m, err := p.ParseLine(`2001:0db8:85a3:0000:0000:8a2e:0370:7334 user-identifier frank [10/Oct/2000:13:55:36 -0700] "GET /apache_pb.gif HTTP/1.0" 200 2326`) + require.NotNil(t, m) + assert.NoError(t, err) + assert.Equal(t, + map[string]interface{}{ + "resp_bytes": int64(2326), + "auth": "frank", + "client_ip": "2001:0db8:85a3:0000:0000:8a2e:0370:7334", + "http_version": float64(1.0), + "ident": "user-identifier", + "request": "/apache_pb.gif", + }, + m.Fields()) + assert.Equal(t, map[string]string{"verb": "GET", "resp_code": "200"}, m.Tags()) + + m, err = p.ParseLine(`::1 user-identifier frank [10/Oct/2000:13:55:36 -0700] "GET /apache_pb.gif HTTP/1.0" 200 2326`) + require.NotNil(t, m) + assert.NoError(t, err) + assert.Equal(t, + map[string]interface{}{ + "resp_bytes": int64(2326), + "auth": "frank", + "client_ip": "::1", + "http_version": float64(1.0), + "ident": "user-identifier", + "request": "/apache_pb.gif", + }, + m.Fields()) + assert.Equal(t, map[string]string{"verb": "GET", "resp_code": "200"}, m.Tags()) +} + +func TestCustomInfluxdbHttpd(t *testing.T) { + p := &Parser{ + Patterns: []string{`\[httpd\] %{COMBINED_LOG_FORMAT} %{UUID:uuid:drop} %{NUMBER:response_time_us:int}`}, + } + assert.NoError(t, p.Compile()) + + // Parse an influxdb POST request + m, err := p.ParseLine(`[httpd] ::1 - - [14/Jun/2016:11:33:29 +0100] "POST /write?consistency=any&db=telegraf&precision=ns&rp= HTTP/1.1" 204 0 "-" "InfluxDBClient" 6f61bc44-321b-11e6-8050-000000000000 2513`) + require.NotNil(t, m) + assert.NoError(t, err) + assert.Equal(t, + map[string]interface{}{ + "resp_bytes": int64(0), + "auth": "-", + "client_ip": "::1", + "http_version": float64(1.1), + "ident": "-", + "referrer": "-", + "request": "/write?consistency=any&db=telegraf&precision=ns&rp=", + "response_time_us": int64(2513), + "agent": "InfluxDBClient", + }, + m.Fields()) + assert.Equal(t, map[string]string{"verb": "POST", "resp_code": "204"}, m.Tags()) + + // Parse an influxdb GET request + m, err = p.ParseLine(`[httpd] ::1 - - [14/Jun/2016:12:10:02 +0100] "GET /query?db=telegraf&q=SELECT+bytes%2Cresponse_time_us+FROM+logparser_grok+WHERE+http_method+%3D+%27GET%27+AND+response_time_us+%3E+0+AND+time+%3E+now%28%29+-+1h HTTP/1.1" 200 578 "http://localhost:8083/" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.84 Safari/537.36" 8a3806f1-3220-11e6-8006-000000000000 988`) + require.NotNil(t, m) + assert.NoError(t, err) + assert.Equal(t, + map[string]interface{}{ + "resp_bytes": int64(578), + "auth": "-", + "client_ip": "::1", + "http_version": float64(1.1), + "ident": "-", + "referrer": "http://localhost:8083/", + "request": "/query?db=telegraf&q=SELECT+bytes%2Cresponse_time_us+FROM+logparser_grok+WHERE+http_method+%3D+%27GET%27+AND+response_time_us+%3E+0+AND+time+%3E+now%28%29+-+1h", + "response_time_us": int64(988), + "agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.84 Safari/537.36", + }, + m.Fields()) + assert.Equal(t, map[string]string{"verb": "GET", "resp_code": "200"}, m.Tags()) +} + +// common log format +// 127.0.0.1 user-identifier frank [10/Oct/2000:13:55:36 -0700] "GET /apache_pb.gif HTTP/1.0" 200 2326 +func TestBuiltinCommonLogFormat(t *testing.T) { + p := &Parser{ + Patterns: []string{"%{COMMON_LOG_FORMAT}"}, + } + assert.NoError(t, p.Compile()) + + // Parse an influxdb POST request + m, err := p.ParseLine(`127.0.0.1 user-identifier frank [10/Oct/2000:13:55:36 -0700] "GET /apache_pb.gif HTTP/1.0" 200 2326`) + require.NotNil(t, m) + assert.NoError(t, err) + assert.Equal(t, + map[string]interface{}{ + "resp_bytes": int64(2326), + "auth": "frank", + "client_ip": "127.0.0.1", + "http_version": float64(1.0), + "ident": "user-identifier", + "request": "/apache_pb.gif", + }, + m.Fields()) + assert.Equal(t, map[string]string{"verb": "GET", "resp_code": "200"}, m.Tags()) +} + +// common log format +// 127.0.0.1 user1234 frank1234 [10/Oct/2000:13:55:36 -0700] "GET /apache_pb.gif HTTP/1.0" 200 2326 +func TestBuiltinCommonLogFormatWithNumbers(t *testing.T) { + p := &Parser{ + Patterns: []string{"%{COMMON_LOG_FORMAT}"}, + } + assert.NoError(t, p.Compile()) + + // Parse an influxdb POST request + m, err := p.ParseLine(`127.0.0.1 user1234 frank1234 [10/Oct/2000:13:55:36 -0700] "GET /apache_pb.gif HTTP/1.0" 200 2326`) + require.NotNil(t, m) + assert.NoError(t, err) + assert.Equal(t, + map[string]interface{}{ + "resp_bytes": int64(2326), + "auth": "frank1234", + "client_ip": "127.0.0.1", + "http_version": float64(1.0), + "ident": "user1234", + "request": "/apache_pb.gif", + }, + m.Fields()) + assert.Equal(t, map[string]string{"verb": "GET", "resp_code": "200"}, m.Tags()) +} + +// combined log format +// 127.0.0.1 user-identifier frank [10/Oct/2000:13:55:36 -0700] "GET /apache_pb.gif HTTP/1.0" 200 2326 "-" "Mozilla" +func TestBuiltinCombinedLogFormat(t *testing.T) { + p := &Parser{ + Patterns: []string{"%{COMBINED_LOG_FORMAT}"}, + } + assert.NoError(t, p.Compile()) + + // Parse an influxdb POST request + m, err := p.ParseLine(`127.0.0.1 user-identifier frank [10/Oct/2000:13:55:36 -0700] "GET /apache_pb.gif HTTP/1.0" 200 2326 "-" "Mozilla"`) + require.NotNil(t, m) + assert.NoError(t, err) + assert.Equal(t, + map[string]interface{}{ + "resp_bytes": int64(2326), + "auth": "frank", + "client_ip": "127.0.0.1", + "http_version": float64(1.0), + "ident": "user-identifier", + "request": "/apache_pb.gif", + "referrer": "-", + "agent": "Mozilla", + }, + m.Fields()) + assert.Equal(t, map[string]string{"verb": "GET", "resp_code": "200"}, m.Tags()) +} + +func TestCompileStringAndParse(t *testing.T) { + p := &Parser{ + Patterns: []string{"%{TEST_LOG_A}"}, + CustomPatterns: ` + DURATION %{NUMBER}[nuµm]?s + RESPONSE_CODE %{NUMBER:response_code:tag} + RESPONSE_TIME %{DURATION:response_time:duration} + TEST_LOG_A %{NUMBER:myfloat:float} %{RESPONSE_CODE} %{IPORHOST:clientip} %{RESPONSE_TIME} + `, + } + assert.NoError(t, p.Compile()) + + metricA, err := p.ParseLine(`1.25 200 192.168.1.1 5.432µs`) + require.NotNil(t, metricA) + assert.NoError(t, err) + assert.Equal(t, + map[string]interface{}{ + "clientip": "192.168.1.1", + "myfloat": float64(1.25), + "response_time": int64(5432), + }, + metricA.Fields()) + assert.Equal(t, map[string]string{"response_code": "200"}, metricA.Tags()) +} + +func TestCompileErrorsOnInvalidPattern(t *testing.T) { + p := &Parser{ + Patterns: []string{"%{TEST_LOG_A}", "%{TEST_LOG_B}"}, + CustomPatterns: ` + DURATION %{NUMBER}[nuµm]?s + RESPONSE_CODE %{NUMBER:response_code:tag} + RESPONSE_TIME %{DURATION:response_time:duration} + TEST_LOG_A %{NUMBER:myfloat:float} %{RESPONSE_CODE} %{IPORHOST:clientip} %{RESPONSE_TIME} + `, + } + assert.Error(t, p.Compile()) + + metricA, _ := p.ParseLine(`1.25 200 192.168.1.1 5.432µs`) + require.Nil(t, metricA) +} + +func TestParsePatternsWithoutCustom(t *testing.T) { + p := &Parser{ + Patterns: []string{"%{POSINT:ts:ts-epochnano} response_time=%{POSINT:response_time:int} mymetric=%{NUMBER:metric:float}"}, + } + assert.NoError(t, p.Compile()) + + metricA, err := p.ParseLine(`1466004605359052000 response_time=20821 mymetric=10890.645`) + require.NotNil(t, metricA) + assert.NoError(t, err) + assert.Equal(t, + map[string]interface{}{ + "response_time": int64(20821), + "metric": float64(10890.645), + }, + metricA.Fields()) + assert.Equal(t, map[string]string{}, metricA.Tags()) + assert.Equal(t, time.Unix(0, 1466004605359052000), metricA.Time()) +} + +func TestParseEpochNano(t *testing.T) { + p := &Parser{ + Patterns: []string{"%{MYAPP}"}, + CustomPatterns: ` + MYAPP %{POSINT:ts:ts-epochnano} response_time=%{POSINT:response_time:int} mymetric=%{NUMBER:metric:float} + `, + } + assert.NoError(t, p.Compile()) + + metricA, err := p.ParseLine(`1466004605359052000 response_time=20821 mymetric=10890.645`) + require.NotNil(t, metricA) + assert.NoError(t, err) + assert.Equal(t, + map[string]interface{}{ + "response_time": int64(20821), + "metric": float64(10890.645), + }, + metricA.Fields()) + assert.Equal(t, map[string]string{}, metricA.Tags()) + assert.Equal(t, time.Unix(0, 1466004605359052000), metricA.Time()) +} + +func TestParseEpoch(t *testing.T) { + p := &Parser{ + Patterns: []string{"%{MYAPP}"}, + CustomPatterns: ` + MYAPP %{POSINT:ts:ts-epoch} response_time=%{POSINT:response_time:int} mymetric=%{NUMBER:metric:float} + `, + } + assert.NoError(t, p.Compile()) + + metricA, err := p.ParseLine(`1466004605 response_time=20821 mymetric=10890.645`) + require.NotNil(t, metricA) + assert.NoError(t, err) + assert.Equal(t, + map[string]interface{}{ + "response_time": int64(20821), + "metric": float64(10890.645), + }, + metricA.Fields()) + assert.Equal(t, map[string]string{}, metricA.Tags()) + assert.Equal(t, time.Unix(1466004605, 0), metricA.Time()) +} + +func TestParseEpochDecimal(t *testing.T) { + var tests = []struct { + name string + line string + noMatch bool + err error + tags map[string]string + fields map[string]interface{} + time time.Time + }{ + { + name: "ns precision", + line: "1466004605.359052000 value=42", + tags: map[string]string{}, + fields: map[string]interface{}{ + "value": int64(42), + }, + time: time.Unix(0, 1466004605359052000), + }, + { + name: "ms precision", + line: "1466004605.359 value=42", + tags: map[string]string{}, + fields: map[string]interface{}{ + "value": int64(42), + }, + time: time.Unix(0, 1466004605359000000), + }, + { + name: "second precision", + line: "1466004605 value=42", + tags: map[string]string{}, + fields: map[string]interface{}{ + "value": int64(42), + }, + time: time.Unix(0, 1466004605000000000), + }, + { + name: "sub ns precision", + line: "1466004605.123456789123 value=42", + tags: map[string]string{}, + fields: map[string]interface{}{ + "value": int64(42), + }, + time: time.Unix(0, 1466004605123456789), + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + parser := &Parser{ + Patterns: []string{"%{NUMBER:ts:ts-epoch} value=%{NUMBER:value:int}"}, + } + assert.NoError(t, parser.Compile()) + m, err := parser.ParseLine(tt.line) + + if tt.noMatch { + require.Nil(t, m) + require.Nil(t, err) + return + } + + require.Equal(t, tt.err, err) + + require.NotNil(t, m) + require.Equal(t, tt.tags, m.Tags()) + require.Equal(t, tt.fields, m.Fields()) + require.Equal(t, tt.time, m.Time()) + }) + } +} + +func TestParseEpochErrors(t *testing.T) { + p := &Parser{ + Patterns: []string{"%{MYAPP}"}, + CustomPatterns: ` + MYAPP %{WORD:ts:ts-epoch} response_time=%{POSINT:response_time:int} mymetric=%{NUMBER:metric:float} + `, + } + assert.NoError(t, p.Compile()) + + _, err := p.ParseLine(`foobar response_time=20821 mymetric=10890.645`) + assert.NoError(t, err) + + p = &Parser{ + Patterns: []string{"%{MYAPP}"}, + CustomPatterns: ` + MYAPP %{WORD:ts:ts-epochnano} response_time=%{POSINT:response_time:int} mymetric=%{NUMBER:metric:float} + `, + } + assert.NoError(t, p.Compile()) + + _, err = p.ParseLine(`foobar response_time=20821 mymetric=10890.645`) + assert.NoError(t, err) +} + +func TestParseGenericTimestamp(t *testing.T) { + p := &Parser{ + Patterns: []string{`\[%{HTTPDATE:ts:ts}\] response_time=%{POSINT:response_time:int} mymetric=%{NUMBER:metric:float}`}, + } + assert.NoError(t, p.Compile()) + + metricA, err := p.ParseLine(`[09/Jun/2016:03:37:03 +0000] response_time=20821 mymetric=10890.645`) + require.NotNil(t, metricA) + assert.NoError(t, err) + assert.Equal(t, + map[string]interface{}{ + "response_time": int64(20821), + "metric": float64(10890.645), + }, + metricA.Fields()) + assert.Equal(t, map[string]string{}, metricA.Tags()) + assert.Equal(t, time.Unix(1465443423, 0).UTC(), metricA.Time().UTC()) + + metricB, err := p.ParseLine(`[09/Jun/2016:03:37:04 +0000] response_time=20821 mymetric=10890.645`) + require.NotNil(t, metricB) + assert.NoError(t, err) + assert.Equal(t, + map[string]interface{}{ + "response_time": int64(20821), + "metric": float64(10890.645), + }, + metricB.Fields()) + assert.Equal(t, map[string]string{}, metricB.Tags()) + assert.Equal(t, time.Unix(1465443424, 0).UTC(), metricB.Time().UTC()) +} + +func TestParseGenericTimestampNotFound(t *testing.T) { + p := &Parser{ + Patterns: []string{`\[%{NOTSPACE:ts:ts}\] response_time=%{POSINT:response_time:int} mymetric=%{NUMBER:metric:float}`}, + } + assert.NoError(t, p.Compile()) + + metricA, err := p.ParseLine(`[foobar] response_time=20821 mymetric=10890.645`) + require.NotNil(t, metricA) + assert.NoError(t, err) + assert.Equal(t, + map[string]interface{}{ + "response_time": int64(20821), + "metric": float64(10890.645), + }, + metricA.Fields()) + assert.Equal(t, map[string]string{}, metricA.Tags()) +} + +func TestCompileFileAndParse(t *testing.T) { + p := &Parser{ + Patterns: []string{"%{TEST_LOG_A}", "%{TEST_LOG_B}"}, + CustomPatternFiles: []string{"./testdata/test-patterns"}, + } + assert.NoError(t, p.Compile()) + + metricA, err := p.ParseLine(`[04/Jun/2016:12:41:45 +0100] 1.25 200 192.168.1.1 5.432µs 101`) + require.NotNil(t, metricA) + assert.NoError(t, err) + assert.Equal(t, + map[string]interface{}{ + "clientip": "192.168.1.1", + "myfloat": float64(1.25), + "response_time": int64(5432), + "myint": int64(101), + }, + metricA.Fields()) + assert.Equal(t, map[string]string{"response_code": "200"}, metricA.Tags()) + assert.Equal(t, + time.Date(2016, time.June, 4, 12, 41, 45, 0, time.FixedZone("foo", 60*60)).Nanosecond(), + metricA.Time().Nanosecond()) + + metricB, err := p.ParseLine(`[04/06/2016--12:41:45] 1.25 mystring dropme nomodifier`) + require.NotNil(t, metricB) + assert.NoError(t, err) + assert.Equal(t, + map[string]interface{}{ + "myfloat": 1.25, + "mystring": "mystring", + "nomodifier": "nomodifier", + }, + metricB.Fields()) + assert.Equal(t, map[string]string{}, metricB.Tags()) + assert.Equal(t, + time.Date(2016, time.June, 4, 12, 41, 45, 0, time.FixedZone("foo", 60*60)).Nanosecond(), + metricB.Time().Nanosecond()) +} + +func TestCompileNoModifiersAndParse(t *testing.T) { + p := &Parser{ + Patterns: []string{"%{TEST_LOG_C}"}, + CustomPatterns: ` + DURATION %{NUMBER}[nuµm]?s + TEST_LOG_C %{NUMBER:myfloat} %{NUMBER} %{IPORHOST:clientip} %{DURATION:rt} + `, + } + assert.NoError(t, p.Compile()) + + metricA, err := p.ParseLine(`1.25 200 192.168.1.1 5.432µs`) + require.NotNil(t, metricA) + assert.NoError(t, err) + assert.Equal(t, + map[string]interface{}{ + "clientip": "192.168.1.1", + "myfloat": "1.25", + "rt": "5.432µs", + }, + metricA.Fields()) + assert.Equal(t, map[string]string{}, metricA.Tags()) +} + +func TestCompileNoNamesAndParse(t *testing.T) { + p := &Parser{ + Patterns: []string{"%{TEST_LOG_C}"}, + CustomPatterns: ` + DURATION %{NUMBER}[nuµm]?s + TEST_LOG_C %{NUMBER} %{NUMBER} %{IPORHOST} %{DURATION} + `, + } + assert.NoError(t, p.Compile()) + + metricA, err := p.ParseLine(`1.25 200 192.168.1.1 5.432µs`) + require.Nil(t, metricA) + assert.NoError(t, err) +} + +func TestParseNoMatch(t *testing.T) { + p := &Parser{ + Patterns: []string{"%{TEST_LOG_A}", "%{TEST_LOG_B}"}, + CustomPatternFiles: []string{"./testdata/test-patterns"}, + } + assert.NoError(t, p.Compile()) + + metricA, err := p.ParseLine(`[04/Jun/2016:12:41:45 +0100] notnumber 200 192.168.1.1 5.432µs 101`) + assert.NoError(t, err) + assert.Nil(t, metricA) +} + +func TestCompileErrors(t *testing.T) { + // Compile fails because there are multiple timestamps: + p := &Parser{ + Patterns: []string{"%{TEST_LOG_A}", "%{TEST_LOG_B}"}, + CustomPatterns: ` + TEST_LOG_A %{HTTPDATE:ts1:ts-httpd} %{HTTPDATE:ts2:ts-httpd} %{NUMBER:mynum:int} + `, + } + assert.Error(t, p.Compile()) + + // Compile fails because file doesn't exist: + p = &Parser{ + Patterns: []string{"%{TEST_LOG_A}", "%{TEST_LOG_B}"}, + CustomPatternFiles: []string{"/tmp/foo/bar/baz"}, + } + assert.Error(t, p.Compile()) +} + +func TestParseErrors(t *testing.T) { + // Parse fails because the pattern doesn't exist + p := &Parser{ + Patterns: []string{"%{TEST_LOG_B}"}, + CustomPatterns: ` + TEST_LOG_A %{HTTPDATE:ts:ts-httpd} %{WORD:myword:int} %{} + `, + } + assert.Error(t, p.Compile()) + _, err := p.ParseLine(`[04/Jun/2016:12:41:45 +0100] notnumber 200 192.168.1.1 5.432µs 101`) + assert.Error(t, err) + + // Parse fails because myword is not an int + p = &Parser{ + Patterns: []string{"%{TEST_LOG_A}"}, + CustomPatterns: ` + TEST_LOG_A %{HTTPDATE:ts:ts-httpd} %{WORD:myword:int} + `, + } + assert.NoError(t, p.Compile()) + _, err = p.ParseLine(`04/Jun/2016:12:41:45 +0100 notnumber`) + assert.Error(t, err) + + // Parse fails because myword is not a float + p = &Parser{ + Patterns: []string{"%{TEST_LOG_A}"}, + CustomPatterns: ` + TEST_LOG_A %{HTTPDATE:ts:ts-httpd} %{WORD:myword:float} + `, + } + assert.NoError(t, p.Compile()) + _, err = p.ParseLine(`04/Jun/2016:12:41:45 +0100 notnumber`) + assert.Error(t, err) + + // Parse fails because myword is not a duration + p = &Parser{ + Patterns: []string{"%{TEST_LOG_A}"}, + CustomPatterns: ` + TEST_LOG_A %{HTTPDATE:ts:ts-httpd} %{WORD:myword:duration} + `, + } + assert.NoError(t, p.Compile()) + _, err = p.ParseLine(`04/Jun/2016:12:41:45 +0100 notnumber`) + assert.Error(t, err) + + // Parse fails because the time layout is wrong. + p = &Parser{ + Patterns: []string{"%{TEST_LOG_A}"}, + CustomPatterns: ` + TEST_LOG_A %{HTTPDATE:ts:ts-unix} %{WORD:myword:duration} + `, + } + assert.NoError(t, p.Compile()) + _, err = p.ParseLine(`04/Jun/2016:12:41:45 +0100 notnumber`) + assert.Error(t, err) +} + +func TestTsModder(t *testing.T) { + tsm := &tsModder{} + + reftime := time.Date(2006, time.December, 1, 1, 1, 1, int(time.Millisecond), time.UTC) + modt := tsm.tsMod(reftime) + assert.Equal(t, reftime, modt) + modt = tsm.tsMod(reftime) + assert.Equal(t, reftime.Add(time.Microsecond*1), modt) + modt = tsm.tsMod(reftime) + assert.Equal(t, reftime.Add(time.Microsecond*2), modt) + modt = tsm.tsMod(reftime) + assert.Equal(t, reftime.Add(time.Microsecond*3), modt) + + reftime = time.Date(2006, time.December, 1, 1, 1, 1, int(time.Microsecond), time.UTC) + modt = tsm.tsMod(reftime) + assert.Equal(t, reftime, modt) + modt = tsm.tsMod(reftime) + assert.Equal(t, reftime.Add(time.Nanosecond*1), modt) + modt = tsm.tsMod(reftime) + assert.Equal(t, reftime.Add(time.Nanosecond*2), modt) + modt = tsm.tsMod(reftime) + assert.Equal(t, reftime.Add(time.Nanosecond*3), modt) + + reftime = time.Date(2006, time.December, 1, 1, 1, 1, int(time.Microsecond)*999, time.UTC) + modt = tsm.tsMod(reftime) + assert.Equal(t, reftime, modt) + modt = tsm.tsMod(reftime) + assert.Equal(t, reftime.Add(time.Nanosecond*1), modt) + modt = tsm.tsMod(reftime) + assert.Equal(t, reftime.Add(time.Nanosecond*2), modt) + modt = tsm.tsMod(reftime) + assert.Equal(t, reftime.Add(time.Nanosecond*3), modt) + + reftime = time.Date(2006, time.December, 1, 1, 1, 1, 0, time.UTC) + modt = tsm.tsMod(reftime) + assert.Equal(t, reftime, modt) + modt = tsm.tsMod(reftime) + assert.Equal(t, reftime.Add(time.Millisecond*1), modt) + modt = tsm.tsMod(reftime) + assert.Equal(t, reftime.Add(time.Millisecond*2), modt) + modt = tsm.tsMod(reftime) + assert.Equal(t, reftime.Add(time.Millisecond*3), modt) + + reftime = time.Time{} + modt = tsm.tsMod(reftime) + assert.Equal(t, reftime, modt) +} + +func TestTsModder_Rollover(t *testing.T) { + tsm := &tsModder{} + + reftime := time.Date(2006, time.December, 1, 1, 1, 1, int(time.Millisecond), time.UTC) + modt := tsm.tsMod(reftime) + for i := 1; i < 1000; i++ { + modt = tsm.tsMod(reftime) + } + assert.Equal(t, reftime.Add(time.Microsecond*999+time.Nanosecond), modt) + + reftime = time.Date(2006, time.December, 1, 1, 1, 1, int(time.Microsecond), time.UTC) + modt = tsm.tsMod(reftime) + for i := 1; i < 1001; i++ { + modt = tsm.tsMod(reftime) + } + assert.Equal(t, reftime.Add(time.Nanosecond*1000), modt) +} + +func TestShortPatternRegression(t *testing.T) { + p := &Parser{ + Patterns: []string{"%{TS_UNIX:timestamp:ts-unix} %{NUMBER:value:int}"}, + CustomPatterns: ` + TS_UNIX %{DAY} %{MONTH} %{MONTHDAY} %{HOUR}:%{MINUTE}:%{SECOND} %{TZ} %{YEAR} + `, + } + require.NoError(t, p.Compile()) + + metric, err := p.ParseLine(`Wed Apr 12 13:10:34 PST 2017 42`) + require.NoError(t, err) + require.NotNil(t, metric) + + require.Equal(t, + map[string]interface{}{ + "value": int64(42), + }, + metric.Fields()) +} + +func TestTimezoneEmptyCompileFileAndParse(t *testing.T) { + p := &Parser{ + Patterns: []string{"%{TEST_LOG_A}", "%{TEST_LOG_B}"}, + CustomPatternFiles: []string{"./testdata/test-patterns"}, + Timezone: "", + } + assert.NoError(t, p.Compile()) + + metricA, err := p.ParseLine(`[04/Jun/2016:12:41:45 +0100] 1.25 200 192.168.1.1 5.432µs 101`) + require.NotNil(t, metricA) + assert.NoError(t, err) + assert.Equal(t, + map[string]interface{}{ + "clientip": "192.168.1.1", + "myfloat": float64(1.25), + "response_time": int64(5432), + "myint": int64(101), + }, + metricA.Fields()) + assert.Equal(t, map[string]string{"response_code": "200"}, metricA.Tags()) + assert.Equal(t, int64(1465040505000000000), metricA.Time().UnixNano()) + + metricB, err := p.ParseLine(`[04/06/2016--12:41:45] 1.25 mystring dropme nomodifier`) + require.NotNil(t, metricB) + assert.NoError(t, err) + assert.Equal(t, + map[string]interface{}{ + "myfloat": 1.25, + "mystring": "mystring", + "nomodifier": "nomodifier", + }, + metricB.Fields()) + assert.Equal(t, map[string]string{}, metricB.Tags()) + assert.Equal(t, int64(1465044105000000000), metricB.Time().UnixNano()) +} + +func TestTimezoneMalformedCompileFileAndParse(t *testing.T) { + p := &Parser{ + Patterns: []string{"%{TEST_LOG_A}", "%{TEST_LOG_B}"}, + CustomPatternFiles: []string{"./testdata/test-patterns"}, + Timezone: "Something/Weird", + } + assert.NoError(t, p.Compile()) + + metricA, err := p.ParseLine(`[04/Jun/2016:12:41:45 +0100] 1.25 200 192.168.1.1 5.432µs 101`) + require.NotNil(t, metricA) + assert.NoError(t, err) + assert.Equal(t, + map[string]interface{}{ + "clientip": "192.168.1.1", + "myfloat": float64(1.25), + "response_time": int64(5432), + "myint": int64(101), + }, + metricA.Fields()) + assert.Equal(t, map[string]string{"response_code": "200"}, metricA.Tags()) + assert.Equal(t, int64(1465040505000000000), metricA.Time().UnixNano()) + + metricB, err := p.ParseLine(`[04/06/2016--12:41:45] 1.25 mystring dropme nomodifier`) + require.NotNil(t, metricB) + assert.NoError(t, err) + assert.Equal(t, + map[string]interface{}{ + "myfloat": 1.25, + "mystring": "mystring", + "nomodifier": "nomodifier", + }, + metricB.Fields()) + assert.Equal(t, map[string]string{}, metricB.Tags()) + assert.Equal(t, int64(1465044105000000000), metricB.Time().UnixNano()) +} + +func TestTimezoneEuropeCompileFileAndParse(t *testing.T) { + p := &Parser{ + Patterns: []string{"%{TEST_LOG_A}", "%{TEST_LOG_B}"}, + CustomPatternFiles: []string{"./testdata/test-patterns"}, + Timezone: "Europe/Berlin", + } + assert.NoError(t, p.Compile()) + + metricA, err := p.ParseLine(`[04/Jun/2016:12:41:45 +0100] 1.25 200 192.168.1.1 5.432µs 101`) + require.NotNil(t, metricA) + assert.NoError(t, err) + assert.Equal(t, + map[string]interface{}{ + "clientip": "192.168.1.1", + "myfloat": float64(1.25), + "response_time": int64(5432), + "myint": int64(101), + }, + metricA.Fields()) + assert.Equal(t, map[string]string{"response_code": "200"}, metricA.Tags()) + assert.Equal(t, int64(1465040505000000000), metricA.Time().UnixNano()) + + metricB, err := p.ParseLine(`[04/06/2016--12:41:45] 1.25 mystring dropme nomodifier`) + require.NotNil(t, metricB) + assert.NoError(t, err) + assert.Equal(t, + map[string]interface{}{ + "myfloat": 1.25, + "mystring": "mystring", + "nomodifier": "nomodifier", + }, + metricB.Fields()) + assert.Equal(t, map[string]string{}, metricB.Tags()) + assert.Equal(t, int64(1465036905000000000), metricB.Time().UnixNano()) +} + +func TestTimezoneAmericasCompileFileAndParse(t *testing.T) { + p := &Parser{ + Patterns: []string{"%{TEST_LOG_A}", "%{TEST_LOG_B}"}, + CustomPatternFiles: []string{"./testdata/test-patterns"}, + Timezone: "Canada/Eastern", + } + assert.NoError(t, p.Compile()) + + metricA, err := p.ParseLine(`[04/Jun/2016:12:41:45 +0100] 1.25 200 192.168.1.1 5.432µs 101`) + require.NotNil(t, metricA) + assert.NoError(t, err) + assert.Equal(t, + map[string]interface{}{ + "clientip": "192.168.1.1", + "myfloat": float64(1.25), + "response_time": int64(5432), + "myint": int64(101), + }, + metricA.Fields()) + assert.Equal(t, map[string]string{"response_code": "200"}, metricA.Tags()) + assert.Equal(t, int64(1465040505000000000), metricA.Time().UnixNano()) + + metricB, err := p.ParseLine(`[04/06/2016--12:41:45] 1.25 mystring dropme nomodifier`) + require.NotNil(t, metricB) + assert.NoError(t, err) + assert.Equal(t, + map[string]interface{}{ + "myfloat": 1.25, + "mystring": "mystring", + "nomodifier": "nomodifier", + }, + metricB.Fields()) + assert.Equal(t, map[string]string{}, metricB.Tags()) + assert.Equal(t, int64(1465058505000000000), metricB.Time().UnixNano()) +} + +func TestTimezoneLocalCompileFileAndParse(t *testing.T) { + p := &Parser{ + Patterns: []string{"%{TEST_LOG_A}", "%{TEST_LOG_B}"}, + CustomPatternFiles: []string{"./testdata/test-patterns"}, + Timezone: "Local", + } + assert.NoError(t, p.Compile()) + + metricA, err := p.ParseLine(`[04/Jun/2016:12:41:45 +0100] 1.25 200 192.168.1.1 5.432µs 101`) + require.NotNil(t, metricA) + assert.NoError(t, err) + assert.Equal(t, + map[string]interface{}{ + "clientip": "192.168.1.1", + "myfloat": float64(1.25), + "response_time": int64(5432), + "myint": int64(101), + }, + metricA.Fields()) + assert.Equal(t, map[string]string{"response_code": "200"}, metricA.Tags()) + assert.Equal(t, int64(1465040505000000000), metricA.Time().UnixNano()) + + metricB, err := p.ParseLine(`[04/06/2016--12:41:45] 1.25 mystring dropme nomodifier`) + require.NotNil(t, metricB) + assert.NoError(t, err) + assert.Equal(t, + map[string]interface{}{ + "myfloat": 1.25, + "mystring": "mystring", + "nomodifier": "nomodifier", + }, + metricB.Fields()) + assert.Equal(t, map[string]string{}, metricB.Tags()) + assert.Equal(t, time.Date(2016, time.June, 4, 12, 41, 45, 0, time.Local).UnixNano(), metricB.Time().UnixNano()) +} + +func TestNewlineInPatterns(t *testing.T) { + p := &Parser{ + Patterns: []string{` + %{SYSLOGTIMESTAMP:timestamp} + `}, + } + require.NoError(t, p.Compile()) + m, err := p.ParseLine("Apr 10 05:11:57") + require.NoError(t, err) + require.NotNil(t, m) +} + +func TestSyslogTimestamp(t *testing.T) { + tests := []struct { + name string + line string + expected time.Time + }{ + { + name: "two digit day of month", + line: "Sep 25 09:01:55 value=42", + expected: time.Date(2018, time.September, 25, 9, 1, 55, 0, time.UTC), + }, + { + name: "one digit day of month single space", + line: "Sep 2 09:01:55 value=42", + expected: time.Date(2018, time.September, 2, 9, 1, 55, 0, time.UTC), + }, + { + name: "one digit day of month double space", + line: "Sep 2 09:01:55 value=42", + expected: time.Date(2018, time.September, 2, 9, 1, 55, 0, time.UTC), + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + p := &Parser{ + Patterns: []string{`%{SYSLOGTIMESTAMP:timestamp:ts-syslog} value=%{NUMBER:value:int}`}, + timeFunc: func() time.Time { return time.Date(2017, time.April, 1, 0, 0, 0, 0, time.UTC) }, + } + require.NoError(t, p.Compile()) + m, err := p.ParseLine(tt.line) + require.NoError(t, err) + require.NotNil(t, m) + require.Equal(t, tt.expected, m.Time()) + }) + } +} + +func TestReplaceTimestampComma(t *testing.T) { + + p := &Parser{ + Patterns: []string{`%{TIMESTAMP_ISO8601:timestamp:ts-"2006-01-02 15:04:05.000"} successfulMatches=%{NUMBER:value:int}`}, + } + + require.NoError(t, p.Compile()) + m, err := p.ParseLine("2018-02-21 13:10:34,555 successfulMatches=1") + require.NoError(t, err) + require.NotNil(t, m) + + require.Equal(t, 2018, m.Time().Year()) + require.Equal(t, 13, m.Time().Hour()) + require.Equal(t, 34, m.Time().Second()) + //Convert Nanosecond to milisecond for compare + require.Equal(t, 555, m.Time().Nanosecond()/1000000) +} diff --git a/plugins/parsers/grok/testdata/.DS_Store b/plugins/parsers/grok/testdata/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..15d123ef0efe733881b859864ca23e55a26587a3 GIT binary patch literal 6148 zcmeHKyH3PF47A~j6DOqWpxlxMBx;)IbQIKl01!Yl5jkbEMnQdlg!@R)0kED}c?uHyW`G(Ve!Fm3`vB}b<0;xbM zkP4&%KcWEcY}$0@m_8Lq1yX^H0z4lI3b6^Sj&^jwSqcE0pxF#{<`U$Tz$UOdq6el< z1v*tD#ZafCy+m9SSRI`%8p(&o%x}qy+Uyv=Sh#58m_8Lq1%?VR_l3=Q|DWQM8BOvn zBwZ?y3j9|Ebh12OF0i|}TR-iScWptrM4>URvI0YW@DqR+o+JC#>HURu#5IA{k)yEP Q=)n9DFhSC#0>7ZZD@y@41poj5 literal 0 HcmV?d00001 diff --git a/plugins/parsers/grok/testdata/test-patterns b/plugins/parsers/grok/testdata/test-patterns new file mode 100644 index 0000000000000..ba995fbd1770f --- /dev/null +++ b/plugins/parsers/grok/testdata/test-patterns @@ -0,0 +1,14 @@ +# Test A log line: +# [04/Jun/2016:12:41:45 +0100] 1.25 200 192.168.1.1 5.432µs 101 +DURATION %{NUMBER}[nuµm]?s +RESPONSE_CODE %{NUMBER:response_code:tag} +RESPONSE_TIME %{DURATION:response_time:duration} +TEST_LOG_A \[%{HTTPDATE:timestamp:ts-httpd}\] %{NUMBER:myfloat:float} %{RESPONSE_CODE} %{IPORHOST:clientip} %{RESPONSE_TIME} %{NUMBER:myint:int} + +# Test B log line: +# [04/06/2016--12:41:45] 1.25 mystring dropme nomodifier +TEST_TIMESTAMP %{MONTHDAY}/%{MONTHNUM}/%{YEAR}--%{TIME} +TEST_LOG_B \[%{TEST_TIMESTAMP:timestamp:ts-"02/01/2006--15:04:05"}\] %{NUMBER:myfloat:float} %{WORD:mystring:string} %{WORD:dropme:drop} %{WORD:nomodifier} + +TEST_TIMESTAMP %{MONTHDAY}/%{MONTHNUM}/%{YEAR}--%{TIME} +TEST_LOG_BAD \[%{TEST_TIMESTAMP:timestamp:ts-"02/01/2006--15:04:05"}\] %{NUMBER:myfloat:float} %{WORD:mystring:int} %{WORD:dropme:drop} %{WORD:nomodifier} diff --git a/plugins/parsers/grok/testdata/test_a.log b/plugins/parsers/grok/testdata/test_a.log new file mode 100644 index 0000000000000..a44d72fdf4e19 --- /dev/null +++ b/plugins/parsers/grok/testdata/test_a.log @@ -0,0 +1 @@ +[04/Jun/2016:12:41:45 +0100] 1.25 200 192.168.1.1 5.432µs 101 diff --git a/plugins/parsers/grok/testdata/test_b.log b/plugins/parsers/grok/testdata/test_b.log new file mode 100644 index 0000000000000..49e2983e872a8 --- /dev/null +++ b/plugins/parsers/grok/testdata/test_b.log @@ -0,0 +1 @@ +[04/06/2016--12:41:45] 1.25 mystring dropme nomodifier From e4b6f236b66b70b03cce6daadac9cae9a015c6e3 Mon Sep 17 00:00:00 2001 From: Max U Date: Fri, 6 Jul 2018 15:47:03 -0700 Subject: [PATCH 29/51] fix unit tests for grok parser --- .../logparser/grok/testdata/test-patterns | 14 ----- .../inputs/logparser/grok/testdata/test_a.log | 1 - .../inputs/logparser/grok/testdata/test_b.log | 1 - plugins/parsers/grok/parser_test.go | 55 ------------------- 4 files changed, 71 deletions(-) delete mode 100644 plugins/inputs/logparser/grok/testdata/test-patterns delete mode 100644 plugins/inputs/logparser/grok/testdata/test_a.log delete mode 100644 plugins/inputs/logparser/grok/testdata/test_b.log diff --git a/plugins/inputs/logparser/grok/testdata/test-patterns b/plugins/inputs/logparser/grok/testdata/test-patterns deleted file mode 100644 index ba995fbd1770f..0000000000000 --- a/plugins/inputs/logparser/grok/testdata/test-patterns +++ /dev/null @@ -1,14 +0,0 @@ -# Test A log line: -# [04/Jun/2016:12:41:45 +0100] 1.25 200 192.168.1.1 5.432µs 101 -DURATION %{NUMBER}[nuµm]?s -RESPONSE_CODE %{NUMBER:response_code:tag} -RESPONSE_TIME %{DURATION:response_time:duration} -TEST_LOG_A \[%{HTTPDATE:timestamp:ts-httpd}\] %{NUMBER:myfloat:float} %{RESPONSE_CODE} %{IPORHOST:clientip} %{RESPONSE_TIME} %{NUMBER:myint:int} - -# Test B log line: -# [04/06/2016--12:41:45] 1.25 mystring dropme nomodifier -TEST_TIMESTAMP %{MONTHDAY}/%{MONTHNUM}/%{YEAR}--%{TIME} -TEST_LOG_B \[%{TEST_TIMESTAMP:timestamp:ts-"02/01/2006--15:04:05"}\] %{NUMBER:myfloat:float} %{WORD:mystring:string} %{WORD:dropme:drop} %{WORD:nomodifier} - -TEST_TIMESTAMP %{MONTHDAY}/%{MONTHNUM}/%{YEAR}--%{TIME} -TEST_LOG_BAD \[%{TEST_TIMESTAMP:timestamp:ts-"02/01/2006--15:04:05"}\] %{NUMBER:myfloat:float} %{WORD:mystring:int} %{WORD:dropme:drop} %{WORD:nomodifier} diff --git a/plugins/inputs/logparser/grok/testdata/test_a.log b/plugins/inputs/logparser/grok/testdata/test_a.log deleted file mode 100644 index a44d72fdf4e19..0000000000000 --- a/plugins/inputs/logparser/grok/testdata/test_a.log +++ /dev/null @@ -1 +0,0 @@ -[04/Jun/2016:12:41:45 +0100] 1.25 200 192.168.1.1 5.432µs 101 diff --git a/plugins/inputs/logparser/grok/testdata/test_b.log b/plugins/inputs/logparser/grok/testdata/test_b.log deleted file mode 100644 index 49e2983e872a8..0000000000000 --- a/plugins/inputs/logparser/grok/testdata/test_b.log +++ /dev/null @@ -1 +0,0 @@ -[04/06/2016--12:41:45] 1.25 mystring dropme nomodifier diff --git a/plugins/parsers/grok/parser_test.go b/plugins/parsers/grok/parser_test.go index b5810dc648a08..fb9ad442f9b3c 100644 --- a/plugins/parsers/grok/parser_test.go +++ b/plugins/parsers/grok/parser_test.go @@ -904,58 +904,3 @@ func TestNewlineInPatterns(t *testing.T) { require.NoError(t, err) require.NotNil(t, m) } - -func TestSyslogTimestamp(t *testing.T) { - tests := []struct { - name string - line string - expected time.Time - }{ - { - name: "two digit day of month", - line: "Sep 25 09:01:55 value=42", - expected: time.Date(2018, time.September, 25, 9, 1, 55, 0, time.UTC), - }, - { - name: "one digit day of month single space", - line: "Sep 2 09:01:55 value=42", - expected: time.Date(2018, time.September, 2, 9, 1, 55, 0, time.UTC), - }, - { - name: "one digit day of month double space", - line: "Sep 2 09:01:55 value=42", - expected: time.Date(2018, time.September, 2, 9, 1, 55, 0, time.UTC), - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - p := &Parser{ - Patterns: []string{`%{SYSLOGTIMESTAMP:timestamp:ts-syslog} value=%{NUMBER:value:int}`}, - timeFunc: func() time.Time { return time.Date(2017, time.April, 1, 0, 0, 0, 0, time.UTC) }, - } - require.NoError(t, p.Compile()) - m, err := p.ParseLine(tt.line) - require.NoError(t, err) - require.NotNil(t, m) - require.Equal(t, tt.expected, m.Time()) - }) - } -} - -func TestReplaceTimestampComma(t *testing.T) { - - p := &Parser{ - Patterns: []string{`%{TIMESTAMP_ISO8601:timestamp:ts-"2006-01-02 15:04:05.000"} successfulMatches=%{NUMBER:value:int}`}, - } - - require.NoError(t, p.Compile()) - m, err := p.ParseLine("2018-02-21 13:10:34,555 successfulMatches=1") - require.NoError(t, err) - require.NotNil(t, m) - - require.Equal(t, 2018, m.Time().Year()) - require.Equal(t, 13, m.Time().Hour()) - require.Equal(t, 34, m.Time().Second()) - //Convert Nanosecond to milisecond for compare - require.Equal(t, 555, m.Time().Nanosecond()/1000000) -} From d224673617f8bac3cb00004fed7444f0db9e3be4 Mon Sep 17 00:00:00 2001 From: Max U Date: Mon, 9 Jul 2018 09:48:11 -0700 Subject: [PATCH 30/51] change logparser unit tests --- plugins/inputs/logparser/logparser_test.go | 3 --- 1 file changed, 3 deletions(-) diff --git a/plugins/inputs/logparser/logparser_test.go b/plugins/inputs/logparser/logparser_test.go index ac7b2c82e2977..391d1262c8794 100644 --- a/plugins/inputs/logparser/logparser_test.go +++ b/plugins/inputs/logparser/logparser_test.go @@ -2,7 +2,6 @@ package logparser import ( "io/ioutil" - "log" "os" "runtime" "strings" @@ -51,12 +50,10 @@ func TestGrokParseLogFiles(t *testing.T) { acc := testutil.Accumulator{} assert.NoError(t, logparser.Start(&acc)) - acc.Wait(2) logparser.Stop() - log.Printf("metric[0] %v, tags: %v, fields: %v", acc.Metrics[0].Measurement, acc.Metrics[0].Tags, acc.Metrics[0].Fields) acc.AssertContainsTaggedFields(t, "logparser_grok", map[string]interface{}{ "clientip": "192.168.1.1", From f52ceeb9faaf0ff653b7c5776119aea8ad8cab00 Mon Sep 17 00:00:00 2001 From: Max U Date: Mon, 9 Jul 2018 09:56:21 -0700 Subject: [PATCH 31/51] test files added for logparser --- plugins/inputs/logparser/grok/testdata/.DS_Store | Bin 0 -> 6148 bytes .../inputs/logparser/grok/testdata/test-patterns | 14 ++++++++++++++ .../inputs/logparser/grok/testdata/test_a.log | 1 + .../inputs/logparser/grok/testdata/test_b.log | 1 + 4 files changed, 16 insertions(+) create mode 100644 plugins/inputs/logparser/grok/testdata/.DS_Store create mode 100644 plugins/inputs/logparser/grok/testdata/test-patterns create mode 100644 plugins/inputs/logparser/grok/testdata/test_a.log create mode 100644 plugins/inputs/logparser/grok/testdata/test_b.log diff --git a/plugins/inputs/logparser/grok/testdata/.DS_Store b/plugins/inputs/logparser/grok/testdata/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..15d123ef0efe733881b859864ca23e55a26587a3 GIT binary patch literal 6148 zcmeHKyH3PF47A~j6DOqWpxlxMBx;)IbQIKl01!Yl5jkbEMnQdlg!@R)0kED}c?uHyW`G(Ve!Fm3`vB}b<0;xbM zkP4&%KcWEcY}$0@m_8Lq1yX^H0z4lI3b6^Sj&^jwSqcE0pxF#{<`U$Tz$UOdq6el< z1v*tD#ZafCy+m9SSRI`%8p(&o%x}qy+Uyv=Sh#58m_8Lq1%?VR_l3=Q|DWQM8BOvn zBwZ?y3j9|Ebh12OF0i|}TR-iScWptrM4>URvI0YW@DqR+o+JC#>HURu#5IA{k)yEP Q=)n9DFhSC#0>7ZZD@y@41poj5 literal 0 HcmV?d00001 diff --git a/plugins/inputs/logparser/grok/testdata/test-patterns b/plugins/inputs/logparser/grok/testdata/test-patterns new file mode 100644 index 0000000000000..ba995fbd1770f --- /dev/null +++ b/plugins/inputs/logparser/grok/testdata/test-patterns @@ -0,0 +1,14 @@ +# Test A log line: +# [04/Jun/2016:12:41:45 +0100] 1.25 200 192.168.1.1 5.432µs 101 +DURATION %{NUMBER}[nuµm]?s +RESPONSE_CODE %{NUMBER:response_code:tag} +RESPONSE_TIME %{DURATION:response_time:duration} +TEST_LOG_A \[%{HTTPDATE:timestamp:ts-httpd}\] %{NUMBER:myfloat:float} %{RESPONSE_CODE} %{IPORHOST:clientip} %{RESPONSE_TIME} %{NUMBER:myint:int} + +# Test B log line: +# [04/06/2016--12:41:45] 1.25 mystring dropme nomodifier +TEST_TIMESTAMP %{MONTHDAY}/%{MONTHNUM}/%{YEAR}--%{TIME} +TEST_LOG_B \[%{TEST_TIMESTAMP:timestamp:ts-"02/01/2006--15:04:05"}\] %{NUMBER:myfloat:float} %{WORD:mystring:string} %{WORD:dropme:drop} %{WORD:nomodifier} + +TEST_TIMESTAMP %{MONTHDAY}/%{MONTHNUM}/%{YEAR}--%{TIME} +TEST_LOG_BAD \[%{TEST_TIMESTAMP:timestamp:ts-"02/01/2006--15:04:05"}\] %{NUMBER:myfloat:float} %{WORD:mystring:int} %{WORD:dropme:drop} %{WORD:nomodifier} diff --git a/plugins/inputs/logparser/grok/testdata/test_a.log b/plugins/inputs/logparser/grok/testdata/test_a.log new file mode 100644 index 0000000000000..a44d72fdf4e19 --- /dev/null +++ b/plugins/inputs/logparser/grok/testdata/test_a.log @@ -0,0 +1 @@ +[04/Jun/2016:12:41:45 +0100] 1.25 200 192.168.1.1 5.432µs 101 diff --git a/plugins/inputs/logparser/grok/testdata/test_b.log b/plugins/inputs/logparser/grok/testdata/test_b.log new file mode 100644 index 0000000000000..49e2983e872a8 --- /dev/null +++ b/plugins/inputs/logparser/grok/testdata/test_b.log @@ -0,0 +1 @@ +[04/06/2016--12:41:45] 1.25 mystring dropme nomodifier From 0c3ac29eff7da9cd58d454501cc3ff1f4e51f1bc Mon Sep 17 00:00:00 2001 From: Max U Date: Thu, 12 Jul 2018 13:27:54 -0700 Subject: [PATCH 32/51] addresses daniel's comments --- docs/DATA_FORMATS_INPUT.md | 1 + plugins/inputs/logparser/logparser.go | 24 --------- plugins/inputs/reader/dev/json_a.log | 24 ++++----- plugins/inputs/reader/reader.go | 12 ++--- plugins/inputs/reader/reader_test.go | 12 ++--- plugins/parsers/grok/influx-patterns | 73 --------------------------- plugins/parsers/grok/parser.go | 19 +++++-- plugins/parsers/grok/parser_test.go | 4 +- plugins/parsers/registry.go | 22 ++++---- 9 files changed, 53 insertions(+), 138 deletions(-) delete mode 100644 plugins/parsers/grok/influx-patterns diff --git a/docs/DATA_FORMATS_INPUT.md b/docs/DATA_FORMATS_INPUT.md index 67e5970c15a5c..24335a4531ad0 100644 --- a/docs/DATA_FORMATS_INPUT.md +++ b/docs/DATA_FORMATS_INPUT.md @@ -749,6 +749,7 @@ HTTPD_ERRORLOG %{HTTPD20_ERRORLOG}|%{HTTPD24_ERRORLOG} ## Custom patterns can also be defined here. Put one pattern per line. grok_custom_patterns = ''' + ''' ## Timezone allows you to provide an override for timestamps that ## don't already include an offset diff --git a/plugins/inputs/logparser/logparser.go b/plugins/inputs/logparser/logparser.go index b98e6d71a805b..4e63d7f5d787a 100644 --- a/plugins/inputs/logparser/logparser.go +++ b/plugins/inputs/logparser/logparser.go @@ -3,9 +3,7 @@ package logparser import ( - "fmt" "log" - "reflect" "strings" "sync" @@ -45,7 +43,6 @@ type LogParserPlugin struct { done chan struct{} wg sync.WaitGroup acc telegraf.Accumulator - parsers []LogParser sync.Mutex @@ -137,7 +134,6 @@ func (l *LogParserPlugin) Start(acc telegraf.Accumulator) error { l.tailers = make(map[string]*tail.Tail) // Looks for fields which implement LogParser interface - l.parsers = []LogParser{} config := &parsers.Config{ Patterns: l.Patterns, NamedPatterns: l.NamedPatterns, @@ -153,26 +149,6 @@ func (l *LogParserPlugin) Start(acc telegraf.Accumulator) error { return err } - s := reflect.ValueOf(l).Elem() - for i := 0; i < s.NumField(); i++ { - f := s.Field(i) - - if !f.CanInterface() { - continue - } - - if lpPlugin, ok := f.Interface().(LogParser); ok { - if reflect.ValueOf(lpPlugin).IsNil() { - continue - } - l.parsers = append(l.parsers, lpPlugin) - } - } - - if len(l.parsers) == 0 { - return fmt.Errorf("logparser input plugin: no parser defined") - } - l.wg.Add(1) go l.parser() diff --git a/plugins/inputs/reader/dev/json_a.log b/plugins/inputs/reader/dev/json_a.log index 609c40a09d600..0f52e9d1e3b57 100644 --- a/plugins/inputs/reader/dev/json_a.log +++ b/plugins/inputs/reader/dev/json_a.log @@ -1,14 +1,14 @@ { - "parent": { - "child": 3.0, - "ignored_child": "hi" - }, - "ignored_null": null, - "integer": 4, - "list": [3, 4], - "ignored_parent": { - "another_ignored_null": null, - "ignored_string": "hello, world!" - }, - "another_list": [4] +"parent": { + "child": 3.0, + "ignored_child": "hi" +}, +"ignored_null": null, +"integer": 4, +"list": [3, 4], +"ignored_parent": { + "another_ignored_null": null, + "ignored_string": "hello, world!" +}, +"another_list": [4] } diff --git a/plugins/inputs/reader/reader.go b/plugins/inputs/reader/reader.go index d0f7035405cf8..745592df24bbb 100644 --- a/plugins/inputs/reader/reader.go +++ b/plugins/inputs/reader/reader.go @@ -11,11 +11,11 @@ import ( ) type Reader struct { - Filepaths []string `toml:"files"` + Files []string `toml:"files"` FromBeginning bool parser parsers.Parser - Filenames []string + filenames []string } const sampleConfig = `## Files to parse each interval. @@ -44,7 +44,7 @@ func (r *Reader) Description() string { func (r *Reader) Gather(acc telegraf.Accumulator) error { r.refreshFilePaths() - for _, k := range r.Filenames { + for _, k := range r.filenames { metrics, err := r.readMetric(k) if err != nil { return err @@ -63,10 +63,10 @@ func (r *Reader) SetParser(p parsers.Parser) { func (r *Reader) refreshFilePaths() error { var allFiles []string - for _, filepath := range r.Filepaths { + for _, filepath := range r.Files { g, err := globpath.Compile(filepath) if err != nil { - return fmt.Errorf("E! Error Glob: %v could not be compiled, %s", filepath, err) + return fmt.Errorf("could not compile glob %v: %v", filepath, err) } files := g.Match() @@ -75,7 +75,7 @@ func (r *Reader) refreshFilePaths() error { } } - r.Filenames = allFiles + r.filenames = allFiles return nil } diff --git a/plugins/inputs/reader/reader_test.go b/plugins/inputs/reader/reader_test.go index c46eb027279f4..b2441eee538d2 100644 --- a/plugins/inputs/reader/reader_test.go +++ b/plugins/inputs/reader/reader_test.go @@ -13,17 +13,17 @@ import ( func TestRefreshFilePaths(t *testing.T) { testDir := getPluginDir() r := Reader{ - Filepaths: []string{testDir + "/logparser/grok/testdata/**.log"}, + Files: []string{testDir + "/reader/testfiles/**.log"}, } r.refreshFilePaths() - assert.Equal(t, len(r.Filenames), 2) + assert.Equal(t, len(r.filenames), 2) } func TestJSONParserCompile(t *testing.T) { testDir := getPluginDir() var acc testutil.Accumulator r := Reader{ - Filepaths: []string{testDir + "/reader/testfiles/json_a.log"}, + Files: []string{testDir + "/reader/testfiles/json_a.log"}, } parserConfig := parsers.Config{ DataFormat: "json", @@ -42,12 +42,12 @@ func TestGrokParser(t *testing.T) { testDir := getPluginDir() var acc testutil.Accumulator r := Reader{ - Filepaths: []string{testDir + "/reader/testfiles/grok_a.log"}, + Files: []string{testDir + "/reader/testfiles/grok_a.log"}, } parserConfig := parsers.Config{ - DataFormat: "grok", - Patterns: []string{"%{COMMON_LOG_FORMAT}"}, + DataFormat: "grok", + GrokPatterns: []string{"%{COMMON_LOG_FORMAT}"}, } nParser, err := parsers.NewParser(&parserConfig) diff --git a/plugins/parsers/grok/influx-patterns b/plugins/parsers/grok/influx-patterns deleted file mode 100644 index 931b61bc8985f..0000000000000 --- a/plugins/parsers/grok/influx-patterns +++ /dev/null @@ -1,73 +0,0 @@ -# Captures are a slightly modified version of logstash "grok" patterns, with -# the format %{[:][:]} -# By default all named captures are converted into string fields. -# Modifiers can be used to convert captures to other types or tags. -# Timestamp modifiers can be used to convert captures to the timestamp of the -# parsed metric. - -# View logstash grok pattern docs here: -# https://www.elastic.co/guide/en/logstash/current/plugins-filters-grok.html -# All default logstash patterns are supported, these can be viewed here: -# https://github.com/logstash-plugins/logstash-patterns-core/blob/master/patterns/grok-patterns - -# Available modifiers: -# string (default if nothing is specified) -# int -# float -# duration (ie, 5.23ms gets converted to int nanoseconds) -# tag (converts the field into a tag) -# drop (drops the field completely) -# Timestamp modifiers: -# ts-ansic ("Mon Jan _2 15:04:05 2006") -# ts-unix ("Mon Jan _2 15:04:05 MST 2006") -# ts-ruby ("Mon Jan 02 15:04:05 -0700 2006") -# ts-rfc822 ("02 Jan 06 15:04 MST") -# ts-rfc822z ("02 Jan 06 15:04 -0700") -# ts-rfc850 ("Monday, 02-Jan-06 15:04:05 MST") -# ts-rfc1123 ("Mon, 02 Jan 2006 15:04:05 MST") -# ts-rfc1123z ("Mon, 02 Jan 2006 15:04:05 -0700") -# ts-rfc3339 ("2006-01-02T15:04:05Z07:00") -# ts-rfc3339nano ("2006-01-02T15:04:05.999999999Z07:00") -# ts-httpd ("02/Jan/2006:15:04:05 -0700") -# ts-epoch (seconds since unix epoch) -# ts-epochnano (nanoseconds since unix epoch) -# ts-"CUSTOM" -# CUSTOM time layouts must be within quotes and be the representation of the -# "reference time", which is Mon Jan 2 15:04:05 -0700 MST 2006 -# See https://golang.org/pkg/time/#Parse for more details. - -# Example log file pattern, example log looks like this: -# [04/Jun/2016:12:41:45 +0100] 1.25 200 192.168.1.1 5.432µs -# Breakdown of the DURATION pattern below: -# NUMBER is a builtin logstash grok pattern matching float & int numbers. -# [nuµm]? is a regex specifying 0 or 1 of the characters within brackets. -# s is also regex, this pattern must end in "s". -# so DURATION will match something like '5.324ms' or '6.1µs' or '10s' -DURATION %{NUMBER}[nuµm]?s -RESPONSE_CODE %{NUMBER:response_code:tag} -RESPONSE_TIME %{DURATION:response_time_ns:duration} -EXAMPLE_LOG \[%{HTTPDATE:ts:ts-httpd}\] %{NUMBER:myfloat:float} %{RESPONSE_CODE} %{IPORHOST:clientip} %{RESPONSE_TIME} - -# Wider-ranging username matching vs. logstash built-in %{USER} -NGUSERNAME [a-zA-Z0-9\.\@\-\+_%]+ -NGUSER %{NGUSERNAME} -# Wider-ranging client IP matching -CLIENT (?:%{IPORHOST}|%{HOSTPORT}|::1) - -## -## COMMON LOG PATTERNS -## - -# apache & nginx logs, this is also known as the "common log format" -# see https://en.wikipedia.org/wiki/Common_Log_Format -COMMON_LOG_FORMAT %{CLIENT:client_ip} %{NOTSPACE:ident} %{NOTSPACE:auth} \[%{HTTPDATE:ts:ts-httpd}\] "(?:%{WORD:verb:tag} %{NOTSPACE:request}(?: HTTP/%{NUMBER:http_version:float})?|%{DATA})" %{NUMBER:resp_code:tag} (?:%{NUMBER:resp_bytes:int}|-) - -# Combined log format is the same as the common log format but with the addition -# of two quoted strings at the end for "referrer" and "agent" -# See Examples at http://httpd.apache.org/docs/current/mod/mod_log_config.html -COMBINED_LOG_FORMAT %{COMMON_LOG_FORMAT} %{QS:referrer} %{QS:agent} - -# HTTPD log formats -HTTPD20_ERRORLOG \[%{HTTPDERROR_DATE:timestamp}\] \[%{LOGLEVEL:loglevel:tag}\] (?:\[client %{IPORHOST:clientip}\] ){0,1}%{GREEDYDATA:errormsg} -HTTPD24_ERRORLOG \[%{HTTPDERROR_DATE:timestamp}\] \[%{WORD:module}:%{LOGLEVEL:loglevel:tag}\] \[pid %{POSINT:pid:int}:tid %{NUMBER:tid:int}\]( \(%{POSINT:proxy_errorcode:int}\)%{DATA:proxy_errormessage}:)?( \[client %{IPORHOST:client}:%{POSINT:clientport}\])? %{DATA:errorcode}: %{GREEDYDATA:message} -HTTPD_ERRORLOG %{HTTPD20_ERRORLOG}|%{HTTPD24_ERRORLOG} diff --git a/plugins/parsers/grok/parser.go b/plugins/parsers/grok/parser.go index f43a203d1915c..e17f127fcfee6 100644 --- a/plugins/parsers/grok/parser.go +++ b/plugins/parsers/grok/parser.go @@ -71,7 +71,8 @@ type Parser struct { NamedPatterns []string CustomPatterns string CustomPatternFiles []string - MetricName string + Measurement string + DefaultTags map[string]string // Timezone is an optional component to help render log dates to // your chosen zone. @@ -204,6 +205,12 @@ func (p *Parser) ParseLine(line string) (telegraf.Metric, error) { fields := make(map[string]interface{}) tags := make(map[string]string) + + //add default tags + for k, v := range p.DefaultTags { + tags[k] = v + } + timestamp := time.Now() for k, v := range values { if k == "" || v == "" { @@ -345,11 +352,15 @@ func (p *Parser) ParseLine(line string) (telegraf.Metric, error) { return nil, fmt.Errorf("logparser_grok: must have one or more fields") } - return metric.New(p.MetricName, tags, fields, p.tsModder.tsMod(timestamp)) + return metric.New(p.Measurement, tags, fields, p.tsModder.tsMod(timestamp)) } func (p *Parser) Parse(buf []byte) ([]telegraf.Metric, error) { - lines := strings.Split(string(buf), "\n") + scanner := bufio.NewScanner(strings.NewReader(string(buf))) + var lines []string + for scanner.Scan() { + lines = append(lines, scanner.Text()) + } var metrics []telegraf.Metric for _, line := range lines { @@ -364,7 +375,7 @@ func (p *Parser) Parse(buf []byte) ([]telegraf.Metric, error) { } func (p *Parser) SetDefaultTags(tags map[string]string) { - //needs implementation + p.DefaultTags = tags } func (p *Parser) addCustomPatterns(scanner *bufio.Scanner) { diff --git a/plugins/parsers/grok/parser_test.go b/plugins/parsers/grok/parser_test.go index b5810dc648a08..09f8fa16d89b5 100644 --- a/plugins/parsers/grok/parser_test.go +++ b/plugins/parsers/grok/parser_test.go @@ -10,8 +10,8 @@ import ( func TestGrokParse(t *testing.T) { parser := Parser{ - MetricName: "t_met", - Patterns: []string{"%{COMMON_LOG_FORMAT}"}, + Measurement: "t_met", + Patterns: []string{"%{COMMON_LOG_FORMAT}"}, } parser.Compile() _, err := parser.Parse([]byte(`127.0.0.1 user-identifier frank [10/Oct/2000:13:55:36 -0700] "GET /apache_pb.gif HTTP/1.0" 200 2326`)) diff --git a/plugins/parsers/registry.go b/plugins/parsers/registry.go index ac8a8c253794a..24e73d4b63ca6 100644 --- a/plugins/parsers/registry.go +++ b/plugins/parsers/registry.go @@ -93,11 +93,11 @@ type Config struct { DropwizardTagPathsMap map[string]string //grok patterns - Patterns []string - NamedPatterns []string - CustomPatterns string - CustomPatternFiles []string - TimeZone string + GrokPatterns []string + GrokNamedPatterns []string + GrokCustomPatterns string + GrokCustomPatternFiles []string + GrokTimeZone string } // NewParser returns a Parser interface based on the given config. @@ -134,11 +134,11 @@ func NewParser(config *Config) (Parser, error) { case "grok": parser, err = newGrokParser( config.MetricName, - config.Patterns, - config.NamedPatterns, - config.CustomPatterns, - config.CustomPatternFiles, - config.TimeZone) + config.GrokPatterns, + config.GrokNamedPatterns, + config.GrokCustomPatterns, + config.GrokCustomPatternFiles, + config.GrokTimeZone) default: err = fmt.Errorf("Invalid data format: %s", config.DataFormat) } @@ -151,7 +151,7 @@ func newGrokParser(metricName string, cPatterns string, cPatternFiles []string, tZone string) (Parser, error) { parser := grok.Parser{ - MetricName: metricName, + Measurement: metricName, Patterns: patterns, NamedPatterns: nPatterns, CustomPatterns: cPatterns, From 74900edd26a02099afd3f80bedbfc74f052ed130 Mon Sep 17 00:00:00 2001 From: Max U Date: Thu, 12 Jul 2018 14:20:08 -0700 Subject: [PATCH 33/51] change parser config names --- internal/config/config.go | 10 +++++----- plugins/inputs/logparser/logparser.go | 12 ++++++------ 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/internal/config/config.go b/internal/config/config.go index 43a2e9c20fb28..21c71d94695c2 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -1352,7 +1352,7 @@ func buildParser(name string, tbl *ast.Table) (parsers.Parser, error) { if ary, ok := kv.Value.(*ast.Array); ok { for _, elem := range ary.Value { if str, ok := elem.(*ast.String); ok { - c.NamedPatterns = append(c.NamedPatterns, str.Value) + c.GrokNamedPatterns = append(c.GrokNamedPatterns, str.Value) } } } @@ -1364,7 +1364,7 @@ func buildParser(name string, tbl *ast.Table) (parsers.Parser, error) { if ary, ok := kv.Value.(*ast.Array); ok { for _, elem := range ary.Value { if str, ok := elem.(*ast.String); ok { - c.Patterns = append(c.Patterns, str.Value) + c.GrokPatterns = append(c.GrokPatterns, str.Value) } } } @@ -1374,7 +1374,7 @@ func buildParser(name string, tbl *ast.Table) (parsers.Parser, error) { if node, ok := tbl.Fields["grok_custom_patterns"]; ok { if kv, ok := node.(*ast.KeyValue); ok { if str, ok := kv.Value.(*ast.String); ok { - c.CustomPatterns = str.Value + c.GrokCustomPatterns = str.Value } } } @@ -1384,7 +1384,7 @@ func buildParser(name string, tbl *ast.Table) (parsers.Parser, error) { if ary, ok := kv.Value.(*ast.Array); ok { for _, elem := range ary.Value { if str, ok := elem.(*ast.String); ok { - c.CustomPatternFiles = append(c.CustomPatternFiles, str.Value) + c.GrokCustomPatternFiles = append(c.GrokCustomPatternFiles, str.Value) } } } @@ -1394,7 +1394,7 @@ func buildParser(name string, tbl *ast.Table) (parsers.Parser, error) { if node, ok := tbl.Fields["grok_timezone"]; ok { if kv, ok := node.(*ast.KeyValue); ok { if str, ok := kv.Value.(*ast.String); ok { - c.TimeZone = str.Value + c.GrokTimeZone = str.Value } } } diff --git a/plugins/inputs/logparser/logparser.go b/plugins/inputs/logparser/logparser.go index 4e63d7f5d787a..4f40eea9a3766 100644 --- a/plugins/inputs/logparser/logparser.go +++ b/plugins/inputs/logparser/logparser.go @@ -135,12 +135,12 @@ func (l *LogParserPlugin) Start(acc telegraf.Accumulator) error { // Looks for fields which implement LogParser interface config := &parsers.Config{ - Patterns: l.Patterns, - NamedPatterns: l.NamedPatterns, - CustomPatterns: l.CustomPatterns, - CustomPatternFiles: l.CustomPatternFiles, - TimeZone: l.TimeZone, - DataFormat: "grok", + GrokPatterns: l.Patterns, + GrokNamedPatterns: l.NamedPatterns, + GrokCustomPatterns: l.CustomPatterns, + GrokCustomPatternFiles: l.CustomPatternFiles, + GrokTimeZone: l.TimeZone, + DataFormat: "grok", } var err error From d0f538942ea30d80673a46ad4bb397e6b45b0dee Mon Sep 17 00:00:00 2001 From: Max U Date: Thu, 12 Jul 2018 16:20:01 -0700 Subject: [PATCH 34/51] allow for original config and functionality of logparser --- plugins/inputs/logparser/logparser.go | 28 ++++++------- plugins/inputs/logparser/logparser_test.go | 46 +++++++++++++--------- 2 files changed, 40 insertions(+), 34 deletions(-) diff --git a/plugins/inputs/logparser/logparser.go b/plugins/inputs/logparser/logparser.go index 4f40eea9a3766..ed6b66908f47f 100644 --- a/plugins/inputs/logparser/logparser.go +++ b/plugins/inputs/logparser/logparser.go @@ -21,9 +21,12 @@ const ( ) // LogParser in the primary interface for the plugin -type LogParser interface { - ParseLine(line string) (telegraf.Metric, error) - Compile() error +type GrokConfig struct { + Patterns []string + NamedPatterns []string + CustomPatterns string + CustomPatternFiles []string + TimeZone string } type logEntry struct { @@ -46,13 +49,8 @@ type LogParserPlugin struct { sync.Mutex - GrokParser parsers.Parser `toml:"grok"` - - Patterns []string - NamedPatterns []string - CustomPatterns string - CustomPatternFiles []string - TimeZone string + GrokParser parsers.Parser + GrokConfig GrokConfig `toml:"grok"` } const sampleConfig = ` @@ -135,11 +133,11 @@ func (l *LogParserPlugin) Start(acc telegraf.Accumulator) error { // Looks for fields which implement LogParser interface config := &parsers.Config{ - GrokPatterns: l.Patterns, - GrokNamedPatterns: l.NamedPatterns, - GrokCustomPatterns: l.CustomPatterns, - GrokCustomPatternFiles: l.CustomPatternFiles, - GrokTimeZone: l.TimeZone, + GrokPatterns: l.GrokConfig.Patterns, + GrokNamedPatterns: l.GrokConfig.NamedPatterns, + GrokCustomPatterns: l.GrokConfig.CustomPatterns, + GrokCustomPatternFiles: l.GrokConfig.CustomPatternFiles, + GrokTimeZone: l.GrokConfig.TimeZone, DataFormat: "grok", } diff --git a/plugins/inputs/logparser/logparser_test.go b/plugins/inputs/logparser/logparser_test.go index 391d1262c8794..d99f0577739f4 100644 --- a/plugins/inputs/logparser/logparser_test.go +++ b/plugins/inputs/logparser/logparser_test.go @@ -26,10 +26,12 @@ func TestGrokParseLogFilesNonExistPattern(t *testing.T) { thisdir := getCurrentDir() logparser := &LogParserPlugin{ - FromBeginning: true, - Files: []string{thisdir + "grok/testdata/*.log"}, - Patterns: []string{"%{FOOBAR}"}, - CustomPatternFiles: []string{thisdir + "grok/testdata/test-patterns"}, + FromBeginning: true, + Files: []string{thisdir + "grok/testdata/*.log"}, + GrokConfig: GrokConfig{ + Patterns: []string{"%{FOOBAR}"}, + CustomPatternFiles: []string{thisdir + "grok/testdata/test-patterns"}, + }, } acc := testutil.Accumulator{} @@ -41,11 +43,13 @@ func TestGrokParseLogFiles(t *testing.T) { thisdir := getCurrentDir() logparser := &LogParserPlugin{ - Patterns: []string{"%{TEST_LOG_A}", "%{TEST_LOG_B}"}, - CustomPatternFiles: []string{thisdir + "grok/testdata/test-patterns"}, - FromBeginning: true, - Files: []string{thisdir + "grok/testdata/*.log"}, - MeasurementName: "logparser_grok", + GrokConfig: GrokConfig{ + Patterns: []string{"%{TEST_LOG_A}", "%{TEST_LOG_B}"}, + CustomPatternFiles: []string{thisdir + "grok/testdata/test-patterns"}, + }, + FromBeginning: true, + Files: []string{thisdir + "grok/testdata/*.log"}, + MeasurementName: "logparser_grok", } acc := testutil.Accumulator{} @@ -85,11 +89,13 @@ func TestGrokParseLogFilesAppearLater(t *testing.T) { thisdir := getCurrentDir() logparser := &LogParserPlugin{ - FromBeginning: true, - Files: []string{emptydir + "/*.log"}, - Patterns: []string{"%{TEST_LOG_A}", "%{TEST_LOG_B}"}, - CustomPatternFiles: []string{thisdir + "grok/testdata/test-patterns"}, - MeasurementName: "logparser_grok", + FromBeginning: true, + Files: []string{emptydir + "/*.log"}, + GrokConfig: GrokConfig{ + Patterns: []string{"%{TEST_LOG_A}", "%{TEST_LOG_B}"}, + CustomPatternFiles: []string{thisdir + "grok/testdata/test-patterns"}, + }, + MeasurementName: "logparser_grok", } acc := testutil.Accumulator{} @@ -122,11 +128,13 @@ func TestGrokParseLogFilesOneBad(t *testing.T) { thisdir := getCurrentDir() logparser := &LogParserPlugin{ - FromBeginning: true, - Files: []string{thisdir + "grok/testdata/test_a.log"}, - Patterns: []string{"%{TEST_LOG_A}", "%{TEST_LOG_BAD}"}, - CustomPatternFiles: []string{thisdir + "grok/testdata/test-patterns"}, - MeasurementName: "logparser_grok", + FromBeginning: true, + Files: []string{thisdir + "grok/testdata/test_a.log"}, + GrokConfig: GrokConfig{ + Patterns: []string{"%{TEST_LOG_A}", "%{TEST_LOG_BAD}"}, + CustomPatternFiles: []string{thisdir + "grok/testdata/test-patterns"}, + }, + MeasurementName: "logparser_grok", } acc := testutil.Accumulator{} From b10f5927284979229bcc6f36e469bb88c26e8ead Mon Sep 17 00:00:00 2001 From: Max U Date: Fri, 13 Jul 2018 14:17:46 -0700 Subject: [PATCH 35/51] unfinished playing w grok parser --- plugins/parsers/grok/parser.go | 4 ++++ plugins/parsers/grok/parser_test.go | 14 ++++++++++++++ 2 files changed, 18 insertions(+) diff --git a/plugins/parsers/grok/parser.go b/plugins/parsers/grok/parser.go index e17f127fcfee6..4841e67240970 100644 --- a/plugins/parsers/grok/parser.go +++ b/plugins/parsers/grok/parser.go @@ -37,6 +37,7 @@ var timeLayouts = map[string]string{ } const ( + MEASUREMENT = "measurement" INT = "int" TAG = "tag" FLOAT = "float" @@ -222,6 +223,7 @@ func (p *Parser) ParseLine(line string) (telegraf.Metric, error) { // check if pattern has some modifiers if types, ok := p.typeMap[patternName]; ok { t = types[k] + log.Printf("key: %v, val: %v, tag: %v", k, v, t) } // if we didn't find a modifier, check if we have a timestamp layout if t == "" { @@ -238,6 +240,8 @@ func (p *Parser) ParseLine(line string) (telegraf.Metric, error) { } switch t { + case MEASUREMENT: + p.Measurement = v case INT: iv, err := strconv.ParseInt(v, 10, 64) if err != nil { diff --git a/plugins/parsers/grok/parser_test.go b/plugins/parsers/grok/parser_test.go index 09f8fa16d89b5..02f246159cac8 100644 --- a/plugins/parsers/grok/parser_test.go +++ b/plugins/parsers/grok/parser_test.go @@ -1,6 +1,7 @@ package grok import ( + "log" "testing" "time" @@ -959,3 +960,16 @@ func TestReplaceTimestampComma(t *testing.T) { //Convert Nanosecond to milisecond for compare require.Equal(t, 555, m.Time().Nanosecond()/1000000) } + +func TestMeasurementModifier(t *testing.T) { + p := &Parser{ + Patterns: []string{"%{TEST::measurement}"}, + CustomPatterns: "TEST %{NUMBER:var1:float} %{NUMBER:var2:float} %{WORD:var3:string}", + } + + require.NoError(t, p.Compile()) + m, err := p.ParseLine("4 5 hello") + require.NoError(t, err) + log.Printf("m: %v", m) + t.Error() +} From 441bc417e183bcfe15a3724e394e8d0249fb85bf Mon Sep 17 00:00:00 2001 From: Max U Date: Mon, 16 Jul 2018 15:12:34 -0700 Subject: [PATCH 36/51] add modifier for setting metric name for grok parser --- plugins/parsers/grok/parser.go | 23 +++++++++++++++++++++-- plugins/parsers/grok/parser_test.go | 10 ++++------ 2 files changed, 25 insertions(+), 8 deletions(-) diff --git a/plugins/parsers/grok/parser.go b/plugins/parsers/grok/parser.go index 4841e67240970..ba9e4a8f998da 100644 --- a/plugins/parsers/grok/parser.go +++ b/plugins/parsers/grok/parser.go @@ -75,6 +75,9 @@ type Parser struct { Measurement string DefaultTags map[string]string + //holds any modifiers set on named user patterns + patternModifiers map[string][]string + // Timezone is an optional component to help render log dates to // your chosen zone. // Default: "" which renders UTC @@ -127,6 +130,7 @@ func (p *Parser) Compile() error { p.tsMap = make(map[string]map[string]string) p.patterns = make(map[string]string) p.tsModder = &tsModder{} + p.patternModifiers = make(map[string][]string) var err error p.g, err = grok.NewWithConfig(&grok.Config{NamedCapturesOnly: true}) if err != nil { @@ -138,11 +142,18 @@ func (p *Parser) Compile() error { p.NamedPatterns = make([]string, 0, len(p.Patterns)) for i, pattern := range p.Patterns { pattern = strings.TrimSpace(pattern) + + //extract any modifiers off pattern + pattern = strings.Trim(pattern, "%{}") + splitPattern := strings.SplitN(pattern, ":", 3) if pattern == "" { continue } name := fmt.Sprintf("GROK_INTERNAL_PATTERN_%d", i) - p.CustomPatterns += "\n" + name + " " + pattern + "\n" + + //map pattern modifiers by name + p.patternModifiers["%{"+name+"}"] = splitPattern[1:3] + p.CustomPatterns += "\n" + name + " " + "%{" + splitPattern[0] + "}" + "\n" p.NamedPatterns = append(p.NamedPatterns, "%{"+name+"}") } @@ -223,7 +234,6 @@ func (p *Parser) ParseLine(line string) (telegraf.Metric, error) { // check if pattern has some modifiers if types, ok := p.typeMap[patternName]; ok { t = types[k] - log.Printf("key: %v, val: %v, tag: %v", k, v, t) } // if we didn't find a modifier, check if we have a timestamp layout if t == "" { @@ -352,6 +362,15 @@ func (p *Parser) ParseLine(line string) (telegraf.Metric, error) { } } + //check the modifiers on the pattern + modifiers, ok := p.patternModifiers[patternName] + if ok && modifiers[1] == "measurement" { + if p.patternModifiers[patternName][0] == "" { + return nil, fmt.Errorf("pattern: %v must be named to use 'measurement' modifier", patternName) + } + p.Measurement = p.patternModifiers[patternName][0] + } + if len(fields) == 0 { return nil, fmt.Errorf("logparser_grok: must have one or more fields") } diff --git a/plugins/parsers/grok/parser_test.go b/plugins/parsers/grok/parser_test.go index 02f246159cac8..d8624cf7f2b95 100644 --- a/plugins/parsers/grok/parser_test.go +++ b/plugins/parsers/grok/parser_test.go @@ -1,7 +1,6 @@ package grok import ( - "log" "testing" "time" @@ -961,15 +960,14 @@ func TestReplaceTimestampComma(t *testing.T) { require.Equal(t, 555, m.Time().Nanosecond()/1000000) } -func TestMeasurementModifier(t *testing.T) { +func TestDynamicMeasurementModifier(t *testing.T) { p := &Parser{ - Patterns: []string{"%{TEST::measurement}"}, - CustomPatterns: "TEST %{NUMBER:var1:float} %{NUMBER:var2:float} %{WORD:var3:string}", + Patterns: []string{"%{TEST}"}, + CustomPatterns: "TEST %{NUMBER:var1:tag} %{NUMBER:var2:float} %{WORD:var3:measurement}", } require.NoError(t, p.Compile()) m, err := p.ParseLine("4 5 hello") require.NoError(t, err) - log.Printf("m: %v", m) - t.Error() + require.Equal(t, m.Name(), "hello") } From b7ed886e446a14f9188194a52c5f6c964ea5330f Mon Sep 17 00:00:00 2001 From: Max U Date: Tue, 17 Jul 2018 09:47:06 -0700 Subject: [PATCH 37/51] unfinished config changes --- docs/DATA_FORMATS_INPUT.md | 21 ++++++++++++++++ plugins/parsers/grok/parser.go | 32 +++++++++++++++-------- plugins/parsers/grok/parser_test.go | 39 +++++++++++++++++++++++++++++ 3 files changed, 81 insertions(+), 11 deletions(-) diff --git a/docs/DATA_FORMATS_INPUT.md b/docs/DATA_FORMATS_INPUT.md index 24335a4531ad0..84e6accf03b8e 100644 --- a/docs/DATA_FORMATS_INPUT.md +++ b/docs/DATA_FORMATS_INPUT.md @@ -663,6 +663,26 @@ For more information about the dropwizard json format see #### Grok Parse logstash-style "grok" patterns. Patterns can be added to patterns, or custom patterns read from custom_pattern_files. +Modifiers can be appended to the end of a grok field to specify how that field should be handled. +There are also timestamp modifiers, which can be used to specify the format of time data. +Available modifiers can be found below. + +The 'measurement' modifier has two seperate use cases, one for static measurement names and one for +dynamic measurement names. + +For setting a static measurement name, apply the 'measurement' modifier to the 'patterns' field. +If grok matches the pattern, the measurement name will be changed to the specified name. +So the config: `patterns = ["%{TEST:test_name:measurement}"]` would output a metric named "test_name" if grok +matches the pattern. It is important to only specify one pattern per element in the patterns array field +or an error will be thrown. +So the config: `patterns = ["%{TEST:test_name:measurement}|%{TEST2:test2_name:measurement}"]` would need to be changed +to: `patterns = ["%{TEST:test_name:measurement}","%{TEST2:test2_name:measurement}"]` + +For setting a dynamic measurement name, simply apply the 'measurement' modifier to a value in a custom pattern. +If the pattern is matched, the measurement name will be set to the value of the field it was applied to. +Each pattern should only have one 'measurement' modifier applied to it. The modifier should only apply to fields +of a single value type, not to another grok pattern. + # View logstash grok pattern docs here: # https://www.elastic.co/guide/en/logstash/current/plugins-filters-grok.html # All default logstash patterns are supported, these can be viewed here: @@ -675,6 +695,7 @@ Parse logstash-style "grok" patterns. Patterns can be added to patterns, or cust # duration (ie, 5.23ms gets converted to int nanoseconds) # tag (converts the field into a tag) # drop (drops the field completely) +# measurement (sets the metric name to designated field) # Timestamp modifiers: # ts-ansic ("Mon Jan _2 15:04:05 2006") # ts-unix ("Mon Jan _2 15:04:05 MST 2006") diff --git a/plugins/parsers/grok/parser.go b/plugins/parsers/grok/parser.go index ba9e4a8f998da..ebed7d3e171f3 100644 --- a/plugins/parsers/grok/parser.go +++ b/plugins/parsers/grok/parser.go @@ -143,18 +143,28 @@ func (p *Parser) Compile() error { for i, pattern := range p.Patterns { pattern = strings.TrimSpace(pattern) - //extract any modifiers off pattern - pattern = strings.Trim(pattern, "%{}") - splitPattern := strings.SplitN(pattern, ":", 3) - if pattern == "" { - continue - } - name := fmt.Sprintf("GROK_INTERNAL_PATTERN_%d", i) + //checks that there is only one named field in pattern and 2 ':' indicating a modifier + //then extract any modifiers off pattern + if strings.Count(pattern, "%") == 1 && strings.Count(pattern, ":") == 2 { + pattern = strings.Trim(pattern, "%{}") + splitPattern := strings.SplitN(pattern, ":", 3) + if pattern == "" { + continue + } + name := fmt.Sprintf("GROK_INTERNAL_PATTERN_%d", i) - //map pattern modifiers by name - p.patternModifiers["%{"+name+"}"] = splitPattern[1:3] - p.CustomPatterns += "\n" + name + " " + "%{" + splitPattern[0] + "}" + "\n" - p.NamedPatterns = append(p.NamedPatterns, "%{"+name+"}") + //map pattern modifiers by name + p.patternModifiers["%{"+name+"}"] = splitPattern[1:3] + p.CustomPatterns += "\n" + name + " " + "%{" + splitPattern[0] + "}" + "\n" + p.NamedPatterns = append(p.NamedPatterns, "%{"+name+"}") + } else { + if pattern == "" { + continue + } + name := fmt.Sprintf("GROK_INTERNAL_PATTERN_%d", i) + p.CustomPatterns += "\n" + name + " " + pattern + "\n" + p.NamedPatterns = append(p.NamedPatterns, "%{"+name+"}") + } } if len(p.NamedPatterns) == 0 { diff --git a/plugins/parsers/grok/parser_test.go b/plugins/parsers/grok/parser_test.go index d8624cf7f2b95..a8a657ffe7f77 100644 --- a/plugins/parsers/grok/parser_test.go +++ b/plugins/parsers/grok/parser_test.go @@ -971,3 +971,42 @@ func TestDynamicMeasurementModifier(t *testing.T) { require.NoError(t, err) require.Equal(t, m.Name(), "hello") } + +func TestStaticMeasurementModifier(t *testing.T) { + p := &Parser{ + Patterns: []string{"%{TEST:test_name:measurement}"}, + CustomPatterns: "TEST %{NUMBER:var1:tag} %{NUMBER:var2:float} %{WORD:var3:string}", + } + + require.NoError(t, p.Compile()) + m, err := p.ParseLine("4 5 hello") + require.NoError(t, err) + require.Equal(t, m.Name(), "test_name") +} + +func TestStaticAndDynamicMeasurementModifier(t *testing.T) { + p := &Parser{ + Patterns: []string{"%{TEST:test_name:measurement}"}, + CustomPatterns: "TEST %{NUMBER:var1:tag} %{NUMBER:var2:float} %{WORD:var3:measurement}", + } + + require.NoError(t, p.Compile()) + m, err := p.ParseLine("4 5 hello") + require.NoError(t, err) + require.Equal(t, m.Name(), "test_name") +} + +func TestMultipleMeasurementModifier(t *testing.T) { + p := &Parser{ + Patterns: []string{"%{TEST:test_name:measurement}", "%{TEST2:test2_name:measurement"}, + CustomPatterns: `TEST %{NUMBER:var1:tag} %{NUMBER:var2:float} %{WORD:var_string:string} + TEST2 %{WORD:stringer1:tag} %{NUMBER:var2:float} %{NUMBER:var3:float}`, + } + + require.NoError(t, p.Compile()) + m, err := p.ParseLine("4 5 hello") + m2, err := p.ParseLine("mystr 5 9.5") + require.NoError(t, err) + require.Equal(t, m.Name(), "test_name") + require.Equal(t, m2.Name(), "test2_name") +} From 903a9779ab5a2cd8a410de1247d38315167d82e1 Mon Sep 17 00:00:00 2001 From: Max U Date: Tue, 17 Jul 2018 13:09:08 -0700 Subject: [PATCH 38/51] additional test cases and README updated --- docs/DATA_FORMATS_INPUT.md | 10 ++++++---- plugins/parsers/grok/parser.go | 17 ++++++++++++++--- plugins/parsers/grok/parser_test.go | 22 ++++++++++++++++++++++ 3 files changed, 42 insertions(+), 7 deletions(-) diff --git a/docs/DATA_FORMATS_INPUT.md b/docs/DATA_FORMATS_INPUT.md index 84e6accf03b8e..ee0a9ab1cf5a9 100644 --- a/docs/DATA_FORMATS_INPUT.md +++ b/docs/DATA_FORMATS_INPUT.md @@ -670,18 +670,20 @@ Available modifiers can be found below. The 'measurement' modifier has two seperate use cases, one for static measurement names and one for dynamic measurement names. -For setting a static measurement name, apply the 'measurement' modifier to the 'patterns' field. -If grok matches the pattern, the measurement name will be changed to the specified name. +For setting a static measurement name, apply the 'measurement' modifier to the a single pattern in the +'patterns' field. If grok matches the pattern, the measurement name will be changed to the specified name. So the config: `patterns = ["%{TEST:test_name:measurement}"]` would output a metric named "test_name" if grok matches the pattern. It is important to only specify one pattern per element in the patterns array field or an error will be thrown. So the config: `patterns = ["%{TEST:test_name:measurement}|%{TEST2:test2_name:measurement}"]` would need to be changed to: `patterns = ["%{TEST:test_name:measurement}","%{TEST2:test2_name:measurement}"]` -For setting a dynamic measurement name, simply apply the 'measurement' modifier to a value in a custom pattern. +For setting a dynamic measurement name, apply the 'measurement' modifier to a value in a custom pattern. If the pattern is matched, the measurement name will be set to the value of the field it was applied to. Each pattern should only have one 'measurement' modifier applied to it. The modifier should only apply to fields -of a single value type, not to another grok pattern. +of a single value type, not to another grok pattern. The name of the field with the measurement modifier applied +will be ignored, so these formats are the same: `custom_patterns = {"TEST %{WORD:ignored_name:measurement}"}` +`custom_patterns = {"TEST %{WORD::measurement}"}` # View logstash grok pattern docs here: # https://www.elastic.co/guide/en/logstash/current/plugins-filters-grok.html diff --git a/plugins/parsers/grok/parser.go b/plugins/parsers/grok/parser.go index ebed7d3e171f3..76cd23635d9ee 100644 --- a/plugins/parsers/grok/parser.go +++ b/plugins/parsers/grok/parser.go @@ -57,7 +57,7 @@ var ( // %{IPORHOST:clientip:tag} // %{HTTPDATE:ts1:ts-http} // %{HTTPDATE:ts2:ts-"02 Jan 06 15:04"} - modifierRe = regexp.MustCompile(`%{\w+:(\w+):(ts-".+"|t?s?-?\w+)}`) + modifierRe = regexp.MustCompile(`%{\w+:(\w+|):(ts-".+"|t?s?-?\w+)}`) // matches a plain pattern name. ie, %{NUMBER} patternOnlyRe = regexp.MustCompile(`%{(\w+)}`) ) @@ -158,6 +158,9 @@ func (p *Parser) Compile() error { p.CustomPatterns += "\n" + name + " " + "%{" + splitPattern[0] + "}" + "\n" p.NamedPatterns = append(p.NamedPatterns, "%{"+name+"}") } else { + if strings.Count(pattern, ":measurement}") > 0 { + return fmt.Errorf("pattern with measurement modifier must have own 'pattern' field") + } if pattern == "" { continue } @@ -235,7 +238,8 @@ func (p *Parser) ParseLine(line string) (telegraf.Metric, error) { timestamp := time.Now() for k, v := range values { - if k == "" || v == "" { + if (k == "" || v == "") && p.typeMap[patternName][k] != "measurement" { + log.Printf("skipping key: %v", k) continue } @@ -262,6 +266,7 @@ func (p *Parser) ParseLine(line string) (telegraf.Metric, error) { switch t { case MEASUREMENT: p.Measurement = v + log.Printf("measurement") case INT: iv, err := strconv.ParseInt(v, 10, 64) if err != nil { @@ -382,7 +387,7 @@ func (p *Parser) ParseLine(line string) (telegraf.Metric, error) { } if len(fields) == 0 { - return nil, fmt.Errorf("logparser_grok: must have one or more fields") + return nil, fmt.Errorf("grok: must have one or more fields") } return metric.New(p.Measurement, tags, fields, p.tsModder.tsMod(timestamp)) @@ -486,6 +491,12 @@ func (p *Parser) parseTypedCaptures(name, pattern string) (string, error) { } hasTimestamp = true } else { + //for handling measurement tag with no name + if match[1] == "" && match[2] == "measurement" { + match[1] = "measurement_name" + //add "measurement_name" to pattern so it is valid grok + pattern = strings.Replace(pattern, "::measurement", ":measurement_name:measurement", 1) + } p.typeMap[patternName][match[1]] = match[2] } diff --git a/plugins/parsers/grok/parser_test.go b/plugins/parsers/grok/parser_test.go index a8a657ffe7f77..7af416b4ef7d8 100644 --- a/plugins/parsers/grok/parser_test.go +++ b/plugins/parsers/grok/parser_test.go @@ -1010,3 +1010,25 @@ func TestMultipleMeasurementModifier(t *testing.T) { require.Equal(t, m.Name(), "test_name") require.Equal(t, m2.Name(), "test2_name") } + +func TestMeasurementModifierNoName(t *testing.T) { + p := &Parser{ + Patterns: []string{"%{TEST}"}, + CustomPatterns: "TEST %{NUMBER:var1:tag} %{NUMBER:var2:float} %{WORD::measurement}", + } + + require.NoError(t, p.Compile()) + m, err := p.ParseLine("4 5 hello") + require.NoError(t, err) + require.Equal(t, m.Name(), "hello") +} + +func TestMeasurementErrors(t *testing.T) { + p := &Parser{ + Patterns: []string{"%{TEST:test_name:measurement}|%{TEST2:test2_name}"}, + CustomPatterns: `TEST %{NUMBER:var1:tag} %{NUMBER:var2:float} %{WORD:var_string:string} + TEST2 %{WORD:stringer1:tag} %{NUMBER:var2:float} %{NUMBER:var3:float}`, + } + err := p.Compile() + require.Error(t, err) +} From 0040530ecd18ced05262efcea44dc1ddf71397f4 Mon Sep 17 00:00:00 2001 From: Max U Date: Fri, 27 Jul 2018 11:01:41 -0700 Subject: [PATCH 39/51] address greg's comments --- plugins/inputs/logparser/grok/testdata/.DS_Store | Bin 6148 -> 0 bytes plugins/inputs/reader/README.md | 2 +- plugins/inputs/reader/dev/docker-compose.yml | 2 +- plugins/inputs/reader/dev/json_a.log | 14 -------------- plugins/inputs/reader/reader.go | 9 ++++----- plugins/inputs/reader/reader_test.go | 6 +++--- plugins/inputs/reader/testfiles/grok_a.log | 2 -- plugins/inputs/reader/testfiles/json_a.log | 14 -------------- plugins/parsers/grok/parser.go | 5 ++--- plugins/parsers/grok/parser_test.go | 6 ++++++ plugins/parsers/grok/testdata/.DS_Store | Bin 6148 -> 0 bytes 11 files changed, 17 insertions(+), 43 deletions(-) delete mode 100644 plugins/inputs/logparser/grok/testdata/.DS_Store delete mode 100644 plugins/inputs/reader/dev/json_a.log delete mode 100644 plugins/inputs/reader/testfiles/grok_a.log delete mode 100644 plugins/inputs/reader/testfiles/json_a.log delete mode 100644 plugins/parsers/grok/testdata/.DS_Store diff --git a/plugins/inputs/logparser/grok/testdata/.DS_Store b/plugins/inputs/logparser/grok/testdata/.DS_Store deleted file mode 100644 index 15d123ef0efe733881b859864ca23e55a26587a3..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6148 zcmeHKyH3PF47A~j6DOqWpxlxMBx;)IbQIKl01!Yl5jkbEMnQdlg!@R)0kED}c?uHyW`G(Ve!Fm3`vB}b<0;xbM zkP4&%KcWEcY}$0@m_8Lq1yX^H0z4lI3b6^Sj&^jwSqcE0pxF#{<`U$Tz$UOdq6el< z1v*tD#ZafCy+m9SSRI`%8p(&o%x}qy+Uyv=Sh#58m_8Lq1%?VR_l3=Q|DWQM8BOvn zBwZ?y3j9|Ebh12OF0i|}TR-iScWptrM4>URvI0YW@DqR+o+JC#>HURu#5IA{k)yEP Q=)n9DFhSC#0>7ZZD@y@41poj5 diff --git a/plugins/inputs/reader/README.md b/plugins/inputs/reader/README.md index 39adb253d1782..26e82e8e2d940 100644 --- a/plugins/inputs/reader/README.md +++ b/plugins/inputs/reader/README.md @@ -12,7 +12,7 @@ This plugin can parse any "data_format" formats. ## ** as a "super asterisk". ie: ## /var/log/**.log -> recursively find all .log files in /var/log ## /var/log/*/*.log -> find all .log files with a parent dir in /var/log -## /var/log/apache.log -> only tail the apache log file +## /var/log/apache.log -> only read the apache log file files = ["/var/log/apache/access.log"] ## The dataformat to be read from files diff --git a/plugins/inputs/reader/dev/docker-compose.yml b/plugins/inputs/reader/dev/docker-compose.yml index 3c16fca909ebd..ac3e87b083b0d 100644 --- a/plugins/inputs/reader/dev/docker-compose.yml +++ b/plugins/inputs/reader/dev/docker-compose.yml @@ -6,7 +6,7 @@ services: volumes: - ./telegraf.conf:/telegraf.conf - ../../../../telegraf:/telegraf - - ./json_a.log:/var/log/test.log + - ./testfiles/json_a.log:/var/log/test.log entrypoint: - /telegraf - --config diff --git a/plugins/inputs/reader/dev/json_a.log b/plugins/inputs/reader/dev/json_a.log deleted file mode 100644 index 0f52e9d1e3b57..0000000000000 --- a/plugins/inputs/reader/dev/json_a.log +++ /dev/null @@ -1,14 +0,0 @@ -{ -"parent": { - "child": 3.0, - "ignored_child": "hi" -}, -"ignored_null": null, -"integer": 4, -"list": [3, 4], -"ignored_parent": { - "another_ignored_null": null, - "ignored_string": "hello, world!" -}, -"another_list": [4] -} diff --git a/plugins/inputs/reader/reader.go b/plugins/inputs/reader/reader.go index 745592df24bbb..27ff485f6e312 100644 --- a/plugins/inputs/reader/reader.go +++ b/plugins/inputs/reader/reader.go @@ -11,9 +11,8 @@ import ( ) type Reader struct { - Files []string `toml:"files"` - FromBeginning bool - parser parsers.Parser + Files []string `toml:"files"` + parser parsers.Parser filenames []string } @@ -23,7 +22,7 @@ const sampleConfig = `## Files to parse each interval. ## ** as a "super asterisk". ie: ## /var/log/**.log -> recursively find all .log files in /var/log ## /var/log/*/*.log -> find all .log files with a parent dir in /var/log -## /var/log/apache.log -> only tail the apache log file +## /var/log/apache.log -> only read the apache log file files = ["/var/log/apache/access.log"] ## The dataformat to be read from files @@ -39,7 +38,7 @@ func (r *Reader) SampleConfig() string { } func (r *Reader) Description() string { - return "reload and gather from file[s] on telegraf's interval" + return "Reload and gather from file[s] on telegraf's interval." } func (r *Reader) Gather(acc telegraf.Accumulator) error { diff --git a/plugins/inputs/reader/reader_test.go b/plugins/inputs/reader/reader_test.go index b2441eee538d2..dec43c97efdee 100644 --- a/plugins/inputs/reader/reader_test.go +++ b/plugins/inputs/reader/reader_test.go @@ -13,7 +13,7 @@ import ( func TestRefreshFilePaths(t *testing.T) { testDir := getPluginDir() r := Reader{ - Files: []string{testDir + "/reader/testfiles/**.log"}, + Files: []string{testDir + "/reader/dev/testfiles/**.log"}, } r.refreshFilePaths() @@ -23,7 +23,7 @@ func TestJSONParserCompile(t *testing.T) { testDir := getPluginDir() var acc testutil.Accumulator r := Reader{ - Files: []string{testDir + "/reader/testfiles/json_a.log"}, + Files: []string{testDir + "/reader/dev/testfiles/json_a.log"}, } parserConfig := parsers.Config{ DataFormat: "json", @@ -42,7 +42,7 @@ func TestGrokParser(t *testing.T) { testDir := getPluginDir() var acc testutil.Accumulator r := Reader{ - Files: []string{testDir + "/reader/testfiles/grok_a.log"}, + Files: []string{testDir + "/reader/dev/testfiles/grok_a.log"}, } parserConfig := parsers.Config{ diff --git a/plugins/inputs/reader/testfiles/grok_a.log b/plugins/inputs/reader/testfiles/grok_a.log deleted file mode 100644 index 5295fcb75152a..0000000000000 --- a/plugins/inputs/reader/testfiles/grok_a.log +++ /dev/null @@ -1,2 +0,0 @@ -127.0.0.1 user-identifier frank [10/Oct/2000:13:55:36 -0700] "GET /apache_pb.gif HTTP/1.0" 200 2326 -128.0.0.1 user-identifier tony [10/Oct/2000:13:55:36 -0800] "GET /apache_pb.gif HTTP/1.0" 300 45 \ No newline at end of file diff --git a/plugins/inputs/reader/testfiles/json_a.log b/plugins/inputs/reader/testfiles/json_a.log deleted file mode 100644 index 739fd65d89ca1..0000000000000 --- a/plugins/inputs/reader/testfiles/json_a.log +++ /dev/null @@ -1,14 +0,0 @@ -{ - "parent": { - "child": 3.0, - "ignored_child": "hi" - }, - "ignored_null": null, - "integer": 4, - "list": [3, 4], - "ignored_parent": { - "another_ignored_null": null, - "ignored_string": "hello, world!" - }, - "another_list": [4] - } \ No newline at end of file diff --git a/plugins/parsers/grok/parser.go b/plugins/parsers/grok/parser.go index 76cd23635d9ee..aadb1b9f43eda 100644 --- a/plugins/parsers/grok/parser.go +++ b/plugins/parsers/grok/parser.go @@ -158,7 +158,7 @@ func (p *Parser) Compile() error { p.CustomPatterns += "\n" + name + " " + "%{" + splitPattern[0] + "}" + "\n" p.NamedPatterns = append(p.NamedPatterns, "%{"+name+"}") } else { - if strings.Count(pattern, ":measurement}") > 0 { + if strings.Contains(pattern, ":measurement}") { return fmt.Errorf("pattern with measurement modifier must have own 'pattern' field") } if pattern == "" { @@ -239,7 +239,7 @@ func (p *Parser) ParseLine(line string) (telegraf.Metric, error) { timestamp := time.Now() for k, v := range values { if (k == "" || v == "") && p.typeMap[patternName][k] != "measurement" { - log.Printf("skipping key: %v", k) + log.Printf("D! skipping key: %v", k) continue } @@ -266,7 +266,6 @@ func (p *Parser) ParseLine(line string) (telegraf.Metric, error) { switch t { case MEASUREMENT: p.Measurement = v - log.Printf("measurement") case INT: iv, err := strconv.ParseInt(v, 10, 64) if err != nil { diff --git a/plugins/parsers/grok/parser_test.go b/plugins/parsers/grok/parser_test.go index 7af416b4ef7d8..4a722d9496faf 100644 --- a/plugins/parsers/grok/parser_test.go +++ b/plugins/parsers/grok/parser_test.go @@ -1,6 +1,8 @@ package grok import ( + "log" + "strings" "testing" "time" @@ -1031,4 +1033,8 @@ func TestMeasurementErrors(t *testing.T) { } err := p.Compile() require.Error(t, err) + str := strings.Trim("%{pattern}:name:measurement", "%{") + str = strings.Trim(str, "}") + log.Printf("{pattern}:name:measurement under trim: %v", str) + t.Error() } diff --git a/plugins/parsers/grok/testdata/.DS_Store b/plugins/parsers/grok/testdata/.DS_Store deleted file mode 100644 index 15d123ef0efe733881b859864ca23e55a26587a3..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6148 zcmeHKyH3PF47A~j6DOqWpxlxMBx;)IbQIKl01!Yl5jkbEMnQdlg!@R)0kED}c?uHyW`G(Ve!Fm3`vB}b<0;xbM zkP4&%KcWEcY}$0@m_8Lq1yX^H0z4lI3b6^Sj&^jwSqcE0pxF#{<`U$Tz$UOdq6el< z1v*tD#ZafCy+m9SSRI`%8p(&o%x}qy+Uyv=Sh#58m_8Lq1%?VR_l3=Q|DWQM8BOvn zBwZ?y3j9|Ebh12OF0i|}TR-iScWptrM4>URvI0YW@DqR+o+JC#>HURu#5IA{k)yEP Q=)n9DFhSC#0>7ZZD@y@41poj5 From 054c20e3816e926a54bcd4de4395aaffbfe3d7c4 Mon Sep 17 00:00:00 2001 From: Max U Date: Fri, 27 Jul 2018 11:02:52 -0700 Subject: [PATCH 40/51] fix a unit test --- plugins/parsers/grok/parser_test.go | 3 --- 1 file changed, 3 deletions(-) diff --git a/plugins/parsers/grok/parser_test.go b/plugins/parsers/grok/parser_test.go index 4a722d9496faf..42f3781d6027c 100644 --- a/plugins/parsers/grok/parser_test.go +++ b/plugins/parsers/grok/parser_test.go @@ -1,7 +1,6 @@ package grok import ( - "log" "strings" "testing" "time" @@ -1035,6 +1034,4 @@ func TestMeasurementErrors(t *testing.T) { require.Error(t, err) str := strings.Trim("%{pattern}:name:measurement", "%{") str = strings.Trim(str, "}") - log.Printf("{pattern}:name:measurement under trim: %v", str) - t.Error() } From 0e5e1153c323acce64540169b934986db822afab Mon Sep 17 00:00:00 2001 From: Max U Date: Fri, 27 Jul 2018 11:03:32 -0700 Subject: [PATCH 41/51] whips... --- plugins/parsers/grok/parser_test.go | 3 --- 1 file changed, 3 deletions(-) diff --git a/plugins/parsers/grok/parser_test.go b/plugins/parsers/grok/parser_test.go index 42f3781d6027c..7af416b4ef7d8 100644 --- a/plugins/parsers/grok/parser_test.go +++ b/plugins/parsers/grok/parser_test.go @@ -1,7 +1,6 @@ package grok import ( - "strings" "testing" "time" @@ -1032,6 +1031,4 @@ func TestMeasurementErrors(t *testing.T) { } err := p.Compile() require.Error(t, err) - str := strings.Trim("%{pattern}:name:measurement", "%{") - str = strings.Trim(str, "}") } From 1b8ce4af7cd42b15a3cbe9aec861084d62f89c23 Mon Sep 17 00:00:00 2001 From: Max U Date: Fri, 27 Jul 2018 14:00:01 -0700 Subject: [PATCH 42/51] addresses comments and merges with master --- plugins/inputs/file/README.md | 2 +- plugins/inputs/file/{ => dev}/testfiles/grok_a.log | 0 plugins/inputs/file/{ => dev}/testfiles/json_a.log | 0 plugins/inputs/file/file.go | 9 ++++----- plugins/inputs/file/file_test.go | 6 +++--- 5 files changed, 8 insertions(+), 9 deletions(-) rename plugins/inputs/file/{ => dev}/testfiles/grok_a.log (100%) rename plugins/inputs/file/{ => dev}/testfiles/json_a.log (100%) diff --git a/plugins/inputs/file/README.md b/plugins/inputs/file/README.md index 73a3a2362e0f0..4358b67ad2668 100644 --- a/plugins/inputs/file/README.md +++ b/plugins/inputs/file/README.md @@ -14,7 +14,7 @@ use the [tail input plugin](/plugins/inputs/tail) instead. ## ** as a "super asterisk". ie: ## /var/log/**.log -> recursively find all .log files in /var/log ## /var/log/*/*.log -> find all .log files with a parent dir in /var/log - ## /var/log/apache.log -> only tail the apache log file + ## /var/log/apache.log -> only read the apache log file files = ["/var/log/apache/access.log"] ## Data format to consume. diff --git a/plugins/inputs/file/testfiles/grok_a.log b/plugins/inputs/file/dev/testfiles/grok_a.log similarity index 100% rename from plugins/inputs/file/testfiles/grok_a.log rename to plugins/inputs/file/dev/testfiles/grok_a.log diff --git a/plugins/inputs/file/testfiles/json_a.log b/plugins/inputs/file/dev/testfiles/json_a.log similarity index 100% rename from plugins/inputs/file/testfiles/json_a.log rename to plugins/inputs/file/dev/testfiles/json_a.log diff --git a/plugins/inputs/file/file.go b/plugins/inputs/file/file.go index 2779561fc2ffb..d6714301eaed2 100644 --- a/plugins/inputs/file/file.go +++ b/plugins/inputs/file/file.go @@ -11,9 +11,8 @@ import ( ) type File struct { - Files []string `toml:"files"` - FromBeginning bool - parser parsers.Parser + Files []string `toml:"files"` + parser parsers.Parser filenames []string } @@ -24,7 +23,7 @@ const sampleConfig = ` ## ** as a "super asterisk". ie: ## /var/log/**.log -> recursively find all .log files in /var/log ## /var/log/*/*.log -> find all .log files with a parent dir in /var/log - ## /var/log/apache.log -> only tail the apache log file + ## /var/log/apache.log -> only read the apache log file files = ["/var/log/apache/access.log"] ## The dataformat to be read from files @@ -40,7 +39,7 @@ func (f *File) SampleConfig() string { } func (f *File) Description() string { - return "reload and gather from file[s] on telegraf's interval" + return "Reload and gather from file[s] on telegraf's interval." } func (f *File) Gather(acc telegraf.Accumulator) error { diff --git a/plugins/inputs/file/file_test.go b/plugins/inputs/file/file_test.go index 28105664615a1..cae3078d920cd 100644 --- a/plugins/inputs/file/file_test.go +++ b/plugins/inputs/file/file_test.go @@ -14,7 +14,7 @@ import ( func TestRefreshFilePaths(t *testing.T) { wd, err := os.Getwd() r := File{ - Files: []string{filepath.Join(wd, "testfiles/**.log")}, + Files: []string{filepath.Join(wd, "dev/testfiles/**.log")}, } err = r.refreshFilePaths() @@ -25,7 +25,7 @@ func TestJSONParserCompile(t *testing.T) { var acc testutil.Accumulator wd, _ := os.Getwd() r := File{ - Files: []string{filepath.Join(wd, "testfiles/json_a.log")}, + Files: []string{filepath.Join(wd, "dev/testfiles/json_a.log")}, } parserConfig := parsers.Config{ DataFormat: "json", @@ -44,7 +44,7 @@ func TestGrokParser(t *testing.T) { wd, _ := os.Getwd() var acc testutil.Accumulator r := File{ - Files: []string{filepath.Join(wd, "testfiles/grok_a.log")}, + Files: []string{filepath.Join(wd, "dev/testfiles/grok_a.log")}, } parserConfig := parsers.Config{ From 797f9bd7e232c1f8a33324de15555cf9ce90cf09 Mon Sep 17 00:00:00 2001 From: Max U Date: Fri, 27 Jul 2018 14:30:24 -0700 Subject: [PATCH 43/51] remove reader directory --- plugins/inputs/file/dev/docker-compose.yml | 2 +- plugins/inputs/file/dev/json_a.log | 14 --- plugins/inputs/file/file_test.go | 6 +- plugins/inputs/reader/README.md | 23 ----- plugins/inputs/reader/dev/docker-compose.yml | 13 --- plugins/inputs/reader/dev/telegraf.conf | 7 -- plugins/inputs/reader/reader.go | 94 -------------------- plugins/inputs/reader/reader_test.go | 64 ------------- 8 files changed, 4 insertions(+), 219 deletions(-) delete mode 100644 plugins/inputs/file/dev/json_a.log delete mode 100644 plugins/inputs/reader/README.md delete mode 100644 plugins/inputs/reader/dev/docker-compose.yml delete mode 100644 plugins/inputs/reader/dev/telegraf.conf delete mode 100644 plugins/inputs/reader/reader.go delete mode 100644 plugins/inputs/reader/reader_test.go diff --git a/plugins/inputs/file/dev/docker-compose.yml b/plugins/inputs/file/dev/docker-compose.yml index 3c16fca909ebd..efce389f78424 100644 --- a/plugins/inputs/file/dev/docker-compose.yml +++ b/plugins/inputs/file/dev/docker-compose.yml @@ -6,7 +6,7 @@ services: volumes: - ./telegraf.conf:/telegraf.conf - ../../../../telegraf:/telegraf - - ./json_a.log:/var/log/test.log + - ./dev/json_a.log:/var/log/test.log entrypoint: - /telegraf - --config diff --git a/plugins/inputs/file/dev/json_a.log b/plugins/inputs/file/dev/json_a.log deleted file mode 100644 index 0f52e9d1e3b57..0000000000000 --- a/plugins/inputs/file/dev/json_a.log +++ /dev/null @@ -1,14 +0,0 @@ -{ -"parent": { - "child": 3.0, - "ignored_child": "hi" -}, -"ignored_null": null, -"integer": 4, -"list": [3, 4], -"ignored_parent": { - "another_ignored_null": null, - "ignored_string": "hello, world!" -}, -"another_list": [4] -} diff --git a/plugins/inputs/file/file_test.go b/plugins/inputs/file/file_test.go index cae3078d920cd..43322c2e84cf9 100644 --- a/plugins/inputs/file/file_test.go +++ b/plugins/inputs/file/file_test.go @@ -19,7 +19,7 @@ func TestRefreshFilePaths(t *testing.T) { err = r.refreshFilePaths() require.NoError(t, err) - assert.Equal(t, len(r.filenames), 2) + assert.Equal(t, 2, len(r.filenames)) } func TestJSONParserCompile(t *testing.T) { var acc testutil.Accumulator @@ -32,8 +32,8 @@ func TestJSONParserCompile(t *testing.T) { TagKeys: []string{"parent_ignored_child"}, } nParser, err := parsers.NewParser(&parserConfig) - r.parser = nParser assert.NoError(t, err) + r.parser = nParser r.Gather(&acc) assert.Equal(t, map[string]string{"parent_ignored_child": "hi"}, acc.Metrics[0].Tags) @@ -57,5 +57,5 @@ func TestGrokParser(t *testing.T) { assert.NoError(t, err) err = r.Gather(&acc) - assert.Equal(t, 2, len(acc.Metrics)) + assert.Equal(t, len(acc.Metrics), 2) } diff --git a/plugins/inputs/reader/README.md b/plugins/inputs/reader/README.md deleted file mode 100644 index 26e82e8e2d940..0000000000000 --- a/plugins/inputs/reader/README.md +++ /dev/null @@ -1,23 +0,0 @@ -# Reader Input Plugin - -The Reader Plugin updates a list of files every interval and parses the data inside. -Files will always be read from the beginning. -This plugin can parse any "data_format" formats. - -### Configuration: -```toml -[[inputs.reader]] -## Files to parse each interval. -## These accept standard unix glob matching rules, but with the addition of -## ** as a "super asterisk". ie: -## /var/log/**.log -> recursively find all .log files in /var/log -## /var/log/*/*.log -> find all .log files with a parent dir in /var/log -## /var/log/apache.log -> only read the apache log file -files = ["/var/log/apache/access.log"] - -## The dataformat to be read from files -## Each data format has its own unique set of configuration options, read -## more about them here: -## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md -data_format = "" -``` \ No newline at end of file diff --git a/plugins/inputs/reader/dev/docker-compose.yml b/plugins/inputs/reader/dev/docker-compose.yml deleted file mode 100644 index ac3e87b083b0d..0000000000000 --- a/plugins/inputs/reader/dev/docker-compose.yml +++ /dev/null @@ -1,13 +0,0 @@ -version: '3' - -services: - telegraf: - image: glinton/scratch - volumes: - - ./telegraf.conf:/telegraf.conf - - ../../../../telegraf:/telegraf - - ./testfiles/json_a.log:/var/log/test.log - entrypoint: - - /telegraf - - --config - - /telegraf.conf diff --git a/plugins/inputs/reader/dev/telegraf.conf b/plugins/inputs/reader/dev/telegraf.conf deleted file mode 100644 index 4d50bd5e036a3..0000000000000 --- a/plugins/inputs/reader/dev/telegraf.conf +++ /dev/null @@ -1,7 +0,0 @@ -[[inputs.reader]] - files = ["/var/log/test.log"] - data_format = "json" - name_override = "json_reader" - -[[outputs.file]] - files = ["stdout"] diff --git a/plugins/inputs/reader/reader.go b/plugins/inputs/reader/reader.go deleted file mode 100644 index 27ff485f6e312..0000000000000 --- a/plugins/inputs/reader/reader.go +++ /dev/null @@ -1,94 +0,0 @@ -package reader - -import ( - "fmt" - "io/ioutil" - - "github.com/influxdata/telegraf" - "github.com/influxdata/telegraf/internal/globpath" - "github.com/influxdata/telegraf/plugins/inputs" - "github.com/influxdata/telegraf/plugins/parsers" -) - -type Reader struct { - Files []string `toml:"files"` - parser parsers.Parser - - filenames []string -} - -const sampleConfig = `## Files to parse each interval. -## These accept standard unix glob matching rules, but with the addition of -## ** as a "super asterisk". ie: -## /var/log/**.log -> recursively find all .log files in /var/log -## /var/log/*/*.log -> find all .log files with a parent dir in /var/log -## /var/log/apache.log -> only read the apache log file -files = ["/var/log/apache/access.log"] - -## The dataformat to be read from files -## Each data format has its own unique set of configuration options, read -## more about them here: -## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md -data_format = "" -` - -// SampleConfig returns the default configuration of the Input -func (r *Reader) SampleConfig() string { - return sampleConfig -} - -func (r *Reader) Description() string { - return "Reload and gather from file[s] on telegraf's interval." -} - -func (r *Reader) Gather(acc telegraf.Accumulator) error { - r.refreshFilePaths() - for _, k := range r.filenames { - metrics, err := r.readMetric(k) - if err != nil { - return err - } - - for _, m := range metrics { - acc.AddFields(m.Name(), m.Fields(), m.Tags(), m.Time()) - } - } - return nil -} - -func (r *Reader) SetParser(p parsers.Parser) { - r.parser = p -} - -func (r *Reader) refreshFilePaths() error { - var allFiles []string - for _, filepath := range r.Files { - g, err := globpath.Compile(filepath) - if err != nil { - return fmt.Errorf("could not compile glob %v: %v", filepath, err) - } - files := g.Match() - - for k := range files { - allFiles = append(allFiles, k) - } - } - - r.filenames = allFiles - return nil -} - -func (r *Reader) readMetric(filename string) ([]telegraf.Metric, error) { - fileContents, err := ioutil.ReadFile(filename) - if err != nil { - return nil, fmt.Errorf("E! Error file: %v could not be read, %s", filename, err) - } - return r.parser.Parse(fileContents) - -} - -func init() { - inputs.Add("reader", func() telegraf.Input { - return &Reader{} - }) -} diff --git a/plugins/inputs/reader/reader_test.go b/plugins/inputs/reader/reader_test.go deleted file mode 100644 index dec43c97efdee..0000000000000 --- a/plugins/inputs/reader/reader_test.go +++ /dev/null @@ -1,64 +0,0 @@ -package reader - -import ( - "runtime" - "strings" - "testing" - - "github.com/influxdata/telegraf/plugins/parsers" - "github.com/influxdata/telegraf/testutil" - "github.com/stretchr/testify/assert" -) - -func TestRefreshFilePaths(t *testing.T) { - testDir := getPluginDir() - r := Reader{ - Files: []string{testDir + "/reader/dev/testfiles/**.log"}, - } - - r.refreshFilePaths() - assert.Equal(t, len(r.filenames), 2) -} -func TestJSONParserCompile(t *testing.T) { - testDir := getPluginDir() - var acc testutil.Accumulator - r := Reader{ - Files: []string{testDir + "/reader/dev/testfiles/json_a.log"}, - } - parserConfig := parsers.Config{ - DataFormat: "json", - TagKeys: []string{"parent_ignored_child"}, - } - nParser, err := parsers.NewParser(&parserConfig) - r.parser = nParser - assert.NoError(t, err) - - r.Gather(&acc) - assert.Equal(t, map[string]string{"parent_ignored_child": "hi"}, acc.Metrics[0].Tags) - assert.Equal(t, 5, len(acc.Metrics[0].Fields)) -} - -func TestGrokParser(t *testing.T) { - testDir := getPluginDir() - var acc testutil.Accumulator - r := Reader{ - Files: []string{testDir + "/reader/dev/testfiles/grok_a.log"}, - } - - parserConfig := parsers.Config{ - DataFormat: "grok", - GrokPatterns: []string{"%{COMMON_LOG_FORMAT}"}, - } - - nParser, err := parsers.NewParser(&parserConfig) - r.parser = nParser - assert.NoError(t, err) - - err = r.Gather(&acc) - assert.Equal(t, 2, len(acc.Metrics)) -} - -func getPluginDir() string { - _, filename, _, _ := runtime.Caller(1) - return strings.Replace(filename, "/reader/reader_test.go", "", 1) -} From 255e596421187e77456f243cc5570e87d0d37a9c Mon Sep 17 00:00:00 2001 From: Max U Date: Fri, 27 Jul 2018 14:47:33 -0700 Subject: [PATCH 44/51] remove reader from all.go --- plugins/inputs/all/all.go | 1 - 1 file changed, 1 deletion(-) diff --git a/plugins/inputs/all/all.go b/plugins/inputs/all/all.go index e9b504d0aeb1d..8594db0a91361 100644 --- a/plugins/inputs/all/all.go +++ b/plugins/inputs/all/all.go @@ -95,7 +95,6 @@ import ( _ "github.com/influxdata/telegraf/plugins/inputs/puppetagent" _ "github.com/influxdata/telegraf/plugins/inputs/rabbitmq" _ "github.com/influxdata/telegraf/plugins/inputs/raindrops" - _ "github.com/influxdata/telegraf/plugins/inputs/reader" _ "github.com/influxdata/telegraf/plugins/inputs/redis" _ "github.com/influxdata/telegraf/plugins/inputs/rethinkdb" _ "github.com/influxdata/telegraf/plugins/inputs/riak" From 34075e34aa9cae21e2a3548a30ee4ef4de835bd9 Mon Sep 17 00:00:00 2001 From: Max U Date: Thu, 16 Aug 2018 11:07:46 -0700 Subject: [PATCH 45/51] readme changes --- docs/DATA_FORMATS_INPUT.md | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/docs/DATA_FORMATS_INPUT.md b/docs/DATA_FORMATS_INPUT.md index e7b173b42f6b7..a84d56d591160 100644 --- a/docs/DATA_FORMATS_INPUT.md +++ b/docs/DATA_FORMATS_INPUT.md @@ -672,22 +672,20 @@ There are also timestamp modifiers, which can be used to specify the format of t Available modifiers can be found below. The 'measurement' modifier has two seperate use cases, one for static measurement names and one for -dynamic measurement names. +dynamic measurement names. Static measurement names need a semantic name for a pattern, while dynamic +measurement names are left empty. +Static measurement modifier: `patterns = ["%{WORD:test_name:measurement}"]` +Dynamic measurement modifier: `patterns = ["%{WORD::measurement}"]` -For setting a static measurement name, apply the 'measurement' modifier to the a single pattern in the -'patterns' field. If grok matches the pattern, the measurement name will be changed to the specified name. +For setting a static measurement name, apply the 'measurement' modifier with a semantic name attached to the +field. If grok matches the pattern, the measurement name will be changed to the specified name. So the config: `patterns = ["%{TEST:test_name:measurement}"]` would output a metric named "test_name" if grok matches the pattern. It is important to only specify one pattern per element in the patterns array field or an error will be thrown. So the config: `patterns = ["%{TEST:test_name:measurement}|%{TEST2:test2_name:measurement}"]` would need to be changed to: `patterns = ["%{TEST:test_name:measurement}","%{TEST2:test2_name:measurement}"]` -For setting a dynamic measurement name, apply the 'measurement' modifier to a value in a custom pattern. -If the pattern is matched, the measurement name will be set to the value of the field it was applied to. -Each pattern should only have one 'measurement' modifier applied to it. The modifier should only apply to fields -of a single value type, not to another grok pattern. The name of the field with the measurement modifier applied -will be ignored, so these formats are the same: `custom_patterns = {"TEST %{WORD:ignored_name:measurement}"}` -`custom_patterns = {"TEST %{WORD::measurement}"}` +For setting a dynamic measurement name, apply the 'measurement' modifier to a specific field in a pattern without a semantic name: `custom_patterns = {"TEST %{WORD::measurement}"}`. If the pattern is matched, the measurement name will be set to the value of the field it was applied to. Each pattern should only have one 'measurement' modifier applied to it. The modifier should only apply to fields of a single value type, not to another grok pattern. The best way to get acquainted with grok patterns is to read the logstash docs, which are available here: From a246c11d8a24c597ad2cff0162652fb1bacf09d8 Mon Sep 17 00:00:00 2001 From: Max U Date: Thu, 16 Aug 2018 16:59:48 -0700 Subject: [PATCH 46/51] breaking stuff --- plugins/parsers/grok/parser.go | 12 +++++++----- plugins/parsers/grok/parser_test.go | 11 +++++++---- 2 files changed, 14 insertions(+), 9 deletions(-) diff --git a/plugins/parsers/grok/parser.go b/plugins/parsers/grok/parser.go index 5d624f3e0cc3c..0560ce6f5fc9e 100644 --- a/plugins/parsers/grok/parser.go +++ b/plugins/parsers/grok/parser.go @@ -159,9 +159,6 @@ func (p *Parser) Compile() error { p.CustomPatterns += "\n" + name + " " + "%{" + splitPattern[0] + "}" + "\n" p.NamedPatterns = append(p.NamedPatterns, "%{"+name+"}") } else { - if strings.Contains(pattern, ":measurement}") { - return fmt.Errorf("pattern with measurement modifier must have own 'pattern' field") - } if pattern == "" { continue } @@ -266,7 +263,11 @@ func (p *Parser) ParseLine(line string) (telegraf.Metric, error) { switch t { case MEASUREMENT: - p.Measurement = v + if k == "measurement_name" { + p.Measurement = v + } else { + p.Measurement = k + } case INT: iv, err := strconv.ParseInt(v, 10, 64) if err != nil { @@ -381,7 +382,7 @@ func (p *Parser) ParseLine(line string) (telegraf.Metric, error) { modifiers, ok := p.patternModifiers[patternName] if ok && modifiers[1] == "measurement" { if p.patternModifiers[patternName][0] == "" { - return nil, fmt.Errorf("pattern: %v must be named to use 'measurement' modifier", patternName) + //return nil, fmt.Errorf("pattern: %v must be a field to use dynamic 'measurement' modifier", patternName) } p.Measurement = p.patternModifiers[patternName][0] } @@ -499,6 +500,7 @@ func (p *Parser) parseTypedCaptures(name, pattern string) (string, error) { //add "measurement_name" to pattern so it is valid grok pattern = strings.Replace(pattern, "::measurement", ":measurement_name:measurement", 1) } + log.Printf("typed: %v, name: %v, modifier: %v", patternName, match[1], match[2]) p.typeMap[patternName][match[1]] = match[2] } diff --git a/plugins/parsers/grok/parser_test.go b/plugins/parsers/grok/parser_test.go index 7af416b4ef7d8..34e8c271fa53b 100644 --- a/plugins/parsers/grok/parser_test.go +++ b/plugins/parsers/grok/parser_test.go @@ -1,6 +1,7 @@ package grok import ( + "log" "testing" "time" @@ -969,21 +970,23 @@ func TestDynamicMeasurementModifier(t *testing.T) { require.NoError(t, p.Compile()) m, err := p.ParseLine("4 5 hello") require.NoError(t, err) - require.Equal(t, m.Name(), "hello") + require.Equal(t, m.Name(), "var3") } func TestStaticMeasurementModifier(t *testing.T) { p := &Parser{ - Patterns: []string{"%{TEST:test_name:measurement}"}, - CustomPatterns: "TEST %{NUMBER:var1:tag} %{NUMBER:var2:float} %{WORD:var3:string}", + Patterns: []string{"%{NUMBER:hi:string} %{WORD:hi:string}"}, + //CustomPatterns: "TEST %{NUMBER:var1:tag} %{NUMBER:var2:float} %{WORD:var3:tag}", } require.NoError(t, p.Compile()) - m, err := p.ParseLine("4 5 hello") + m, err := p.ParseLine("42 hi") + log.Printf("%v", m) require.NoError(t, err) require.Equal(t, m.Name(), "test_name") } +// tests that the top level measurement name is used func TestStaticAndDynamicMeasurementModifier(t *testing.T) { p := &Parser{ Patterns: []string{"%{TEST:test_name:measurement}"}, From 4ae64bd16df3a71f7be8b0a687ab5a9b0b7db2a1 Mon Sep 17 00:00:00 2001 From: Max U Date: Thu, 16 Aug 2018 17:14:00 -0700 Subject: [PATCH 47/51] get rid of static measurement names, only dynamic --- docs/DATA_FORMATS_INPUT.md | 18 ++--------- plugins/parsers/grok/parser.go | 47 ++++------------------------- plugins/parsers/grok/parser_test.go | 42 +++++--------------------- 3 files changed, 17 insertions(+), 90 deletions(-) diff --git a/docs/DATA_FORMATS_INPUT.md b/docs/DATA_FORMATS_INPUT.md index a84d56d591160..40ff635c3aa8a 100644 --- a/docs/DATA_FORMATS_INPUT.md +++ b/docs/DATA_FORMATS_INPUT.md @@ -671,21 +671,9 @@ Modifiers can be appended to the end of a grok field to specify how that field s There are also timestamp modifiers, which can be used to specify the format of time data. Available modifiers can be found below. -The 'measurement' modifier has two seperate use cases, one for static measurement names and one for -dynamic measurement names. Static measurement names need a semantic name for a pattern, while dynamic -measurement names are left empty. -Static measurement modifier: `patterns = ["%{WORD:test_name:measurement}"]` -Dynamic measurement modifier: `patterns = ["%{WORD::measurement}"]` - -For setting a static measurement name, apply the 'measurement' modifier with a semantic name attached to the -field. If grok matches the pattern, the measurement name will be changed to the specified name. -So the config: `patterns = ["%{TEST:test_name:measurement}"]` would output a metric named "test_name" if grok -matches the pattern. It is important to only specify one pattern per element in the patterns array field -or an error will be thrown. -So the config: `patterns = ["%{TEST:test_name:measurement}|%{TEST2:test2_name:measurement}"]` would need to be changed -to: `patterns = ["%{TEST:test_name:measurement}","%{TEST2:test2_name:measurement}"]` - -For setting a dynamic measurement name, apply the 'measurement' modifier to a specific field in a pattern without a semantic name: `custom_patterns = {"TEST %{WORD::measurement}"}`. If the pattern is matched, the measurement name will be set to the value of the field it was applied to. Each pattern should only have one 'measurement' modifier applied to it. The modifier should only apply to fields of a single value type, not to another grok pattern. +The 'measurement' modifier is used for dynamic measurement names. For setting a dynamic measurement name, +apply the 'measurement' modifier to a specific field in a pattern. +The configuration: `custom_patterns = {"TEST %{WORD::measurement}"}` is treated the same as `custom_patterns = {"TEST %{WORD:ignored_name:measurement}"}`, as semantic names are ignored. If the pattern is matched, the measurement name will be set to the value of the field it was applied to. Each pattern should only have one 'measurement' modifier applied to it. The modifier should only apply to fields of a single value type, not to another grok pattern. The best way to get acquainted with grok patterns is to read the logstash docs, which are available here: diff --git a/plugins/parsers/grok/parser.go b/plugins/parsers/grok/parser.go index 0560ce6f5fc9e..bbefe2bc6fbab 100644 --- a/plugins/parsers/grok/parser.go +++ b/plugins/parsers/grok/parser.go @@ -76,9 +76,6 @@ type Parser struct { Measurement string DefaultTags map[string]string - //holds any modifiers set on named user patterns - patternModifiers map[string][]string - // Timezone is an optional component to help render log dates to // your chosen zone. // Default: "" which renders UTC @@ -131,7 +128,6 @@ func (p *Parser) Compile() error { p.tsMap = make(map[string]map[string]string) p.patterns = make(map[string]string) p.tsModder = &tsModder{} - p.patternModifiers = make(map[string][]string) var err error p.g, err = grok.NewWithConfig(&grok.Config{NamedCapturesOnly: true}) if err != nil { @@ -143,29 +139,12 @@ func (p *Parser) Compile() error { p.NamedPatterns = make([]string, 0, len(p.Patterns)) for i, pattern := range p.Patterns { pattern = strings.TrimSpace(pattern) - - //checks that there is only one named field in pattern and 2 ':' indicating a modifier - //then extract any modifiers off pattern - if strings.Count(pattern, "%") == 1 && strings.Count(pattern, ":") == 2 { - pattern = strings.Trim(pattern, "%{}") - splitPattern := strings.SplitN(pattern, ":", 3) - if pattern == "" { - continue - } - name := fmt.Sprintf("GROK_INTERNAL_PATTERN_%d", i) - - //map pattern modifiers by name - p.patternModifiers["%{"+name+"}"] = splitPattern[1:3] - p.CustomPatterns += "\n" + name + " " + "%{" + splitPattern[0] + "}" + "\n" - p.NamedPatterns = append(p.NamedPatterns, "%{"+name+"}") - } else { - if pattern == "" { - continue - } - name := fmt.Sprintf("GROK_INTERNAL_PATTERN_%d", i) - p.CustomPatterns += "\n" + name + " " + pattern + "\n" - p.NamedPatterns = append(p.NamedPatterns, "%{"+name+"}") + if pattern == "" { + continue } + name := fmt.Sprintf("GROK_INTERNAL_PATTERN_%d", i) + p.CustomPatterns += "\n" + name + " " + pattern + "\n" + p.NamedPatterns = append(p.NamedPatterns, "%{"+name+"}") } if len(p.NamedPatterns) == 0 { @@ -263,11 +242,7 @@ func (p *Parser) ParseLine(line string) (telegraf.Metric, error) { switch t { case MEASUREMENT: - if k == "measurement_name" { - p.Measurement = v - } else { - p.Measurement = k - } + p.Measurement = v case INT: iv, err := strconv.ParseInt(v, 10, 64) if err != nil { @@ -378,15 +353,6 @@ func (p *Parser) ParseLine(line string) (telegraf.Metric, error) { } } - //check the modifiers on the pattern - modifiers, ok := p.patternModifiers[patternName] - if ok && modifiers[1] == "measurement" { - if p.patternModifiers[patternName][0] == "" { - //return nil, fmt.Errorf("pattern: %v must be a field to use dynamic 'measurement' modifier", patternName) - } - p.Measurement = p.patternModifiers[patternName][0] - } - if len(fields) == 0 { return nil, fmt.Errorf("grok: must have one or more fields") } @@ -500,7 +466,6 @@ func (p *Parser) parseTypedCaptures(name, pattern string) (string, error) { //add "measurement_name" to pattern so it is valid grok pattern = strings.Replace(pattern, "::measurement", ":measurement_name:measurement", 1) } - log.Printf("typed: %v, name: %v, modifier: %v", patternName, match[1], match[2]) p.typeMap[patternName][match[1]] = match[2] } diff --git a/plugins/parsers/grok/parser_test.go b/plugins/parsers/grok/parser_test.go index 34e8c271fa53b..f1a8e0bafb7ee 100644 --- a/plugins/parsers/grok/parser_test.go +++ b/plugins/parsers/grok/parser_test.go @@ -964,54 +964,38 @@ func TestReplaceTimestampComma(t *testing.T) { func TestDynamicMeasurementModifier(t *testing.T) { p := &Parser{ Patterns: []string{"%{TEST}"}, - CustomPatterns: "TEST %{NUMBER:var1:tag} %{NUMBER:var2:float} %{WORD:var3:measurement}", + CustomPatterns: "TEST %{NUMBER:var1:tag} %{NUMBER:var2:float} %{WORD::measurement}", } require.NoError(t, p.Compile()) m, err := p.ParseLine("4 5 hello") require.NoError(t, err) - require.Equal(t, m.Name(), "var3") + require.Equal(t, m.Name(), "hello") } func TestStaticMeasurementModifier(t *testing.T) { p := &Parser{ - Patterns: []string{"%{NUMBER:hi:string} %{WORD:hi:string}"}, - //CustomPatterns: "TEST %{NUMBER:var1:tag} %{NUMBER:var2:float} %{WORD:var3:tag}", + Patterns: []string{"%{WORD:hi:measurement} %{NUMBER:num:string}"}, } require.NoError(t, p.Compile()) - m, err := p.ParseLine("42 hi") + m, err := p.ParseLine("test_name 42") log.Printf("%v", m) require.NoError(t, err) - require.Equal(t, m.Name(), "test_name") + require.Equal(t, "test_name", m.Name()) } // tests that the top level measurement name is used -func TestStaticAndDynamicMeasurementModifier(t *testing.T) { +func TestTwoMeasurementModifier(t *testing.T) { p := &Parser{ Patterns: []string{"%{TEST:test_name:measurement}"}, - CustomPatterns: "TEST %{NUMBER:var1:tag} %{NUMBER:var2:float} %{WORD:var3:measurement}", - } - - require.NoError(t, p.Compile()) - m, err := p.ParseLine("4 5 hello") - require.NoError(t, err) - require.Equal(t, m.Name(), "test_name") -} - -func TestMultipleMeasurementModifier(t *testing.T) { - p := &Parser{ - Patterns: []string{"%{TEST:test_name:measurement}", "%{TEST2:test2_name:measurement"}, - CustomPatterns: `TEST %{NUMBER:var1:tag} %{NUMBER:var2:float} %{WORD:var_string:string} - TEST2 %{WORD:stringer1:tag} %{NUMBER:var2:float} %{NUMBER:var3:float}`, + CustomPatterns: "TEST %{NUMBER:var1:tag} %{NUMBER:var2:measurement} %{WORD:var3:measurement}", } require.NoError(t, p.Compile()) m, err := p.ParseLine("4 5 hello") - m2, err := p.ParseLine("mystr 5 9.5") require.NoError(t, err) - require.Equal(t, m.Name(), "test_name") - require.Equal(t, m2.Name(), "test2_name") + require.Equal(t, m.Name(), "4 5 hello") } func TestMeasurementModifierNoName(t *testing.T) { @@ -1025,13 +1009,3 @@ func TestMeasurementModifierNoName(t *testing.T) { require.NoError(t, err) require.Equal(t, m.Name(), "hello") } - -func TestMeasurementErrors(t *testing.T) { - p := &Parser{ - Patterns: []string{"%{TEST:test_name:measurement}|%{TEST2:test2_name}"}, - CustomPatterns: `TEST %{NUMBER:var1:tag} %{NUMBER:var2:float} %{WORD:var_string:string} - TEST2 %{WORD:stringer1:tag} %{NUMBER:var2:float} %{NUMBER:var3:float}`, - } - err := p.Compile() - require.Error(t, err) -} From c019cfa33766c4f75cb6275f314d0d7f06e52d8d Mon Sep 17 00:00:00 2001 From: Max U Date: Fri, 17 Aug 2018 10:50:42 -0700 Subject: [PATCH 48/51] no longer accepts no semantic name --- docs/DATA_FORMATS_INPUT.md | 3 +-- plugins/parsers/grok/parser.go | 2 +- plugins/parsers/grok/parser_test.go | 4 ++-- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/docs/DATA_FORMATS_INPUT.md b/docs/DATA_FORMATS_INPUT.md index 40ff635c3aa8a..d546e6937c344 100644 --- a/docs/DATA_FORMATS_INPUT.md +++ b/docs/DATA_FORMATS_INPUT.md @@ -672,8 +672,7 @@ There are also timestamp modifiers, which can be used to specify the format of t Available modifiers can be found below. The 'measurement' modifier is used for dynamic measurement names. For setting a dynamic measurement name, -apply the 'measurement' modifier to a specific field in a pattern. -The configuration: `custom_patterns = {"TEST %{WORD::measurement}"}` is treated the same as `custom_patterns = {"TEST %{WORD:ignored_name:measurement}"}`, as semantic names are ignored. If the pattern is matched, the measurement name will be set to the value of the field it was applied to. Each pattern should only have one 'measurement' modifier applied to it. The modifier should only apply to fields of a single value type, not to another grok pattern. +apply the 'measurement' modifier to a specific field in a pattern. The semantic name will be ignored. If the pattern is matched, the measurement name will be set to the value of the field it was applied to. Each pattern should only have one 'measurement' modifier applied to it. The best way to get acquainted with grok patterns is to read the logstash docs, which are available here: diff --git a/plugins/parsers/grok/parser.go b/plugins/parsers/grok/parser.go index bbefe2bc6fbab..4b8e2b87629d7 100644 --- a/plugins/parsers/grok/parser.go +++ b/plugins/parsers/grok/parser.go @@ -58,7 +58,7 @@ var ( // %{IPORHOST:clientip:tag} // %{HTTPDATE:ts1:ts-http} // %{HTTPDATE:ts2:ts-"02 Jan 06 15:04"} - modifierRe = regexp.MustCompile(`%{\w+:(\w+|):(ts-".+"|t?s?-?\w+)}`) + modifierRe = regexp.MustCompile(`%{\w+:(\w+):(ts-".+"|t?s?-?\w+)}`) // matches a plain pattern name. ie, %{NUMBER} patternOnlyRe = regexp.MustCompile(`%{(\w+)}`) ) diff --git a/plugins/parsers/grok/parser_test.go b/plugins/parsers/grok/parser_test.go index f1a8e0bafb7ee..8133d30212156 100644 --- a/plugins/parsers/grok/parser_test.go +++ b/plugins/parsers/grok/parser_test.go @@ -964,7 +964,7 @@ func TestReplaceTimestampComma(t *testing.T) { func TestDynamicMeasurementModifier(t *testing.T) { p := &Parser{ Patterns: []string{"%{TEST}"}, - CustomPatterns: "TEST %{NUMBER:var1:tag} %{NUMBER:var2:float} %{WORD::measurement}", + CustomPatterns: "TEST %{NUMBER:var1:tag} %{NUMBER:var2:float} %{WORD:test:measurement}", } require.NoError(t, p.Compile()) @@ -1001,7 +1001,7 @@ func TestTwoMeasurementModifier(t *testing.T) { func TestMeasurementModifierNoName(t *testing.T) { p := &Parser{ Patterns: []string{"%{TEST}"}, - CustomPatterns: "TEST %{NUMBER:var1:tag} %{NUMBER:var2:float} %{WORD::measurement}", + CustomPatterns: "TEST %{NUMBER:var1:tag} %{NUMBER:var2:float} %{WORD:hi:measurement}", } require.NoError(t, p.Compile()) From 7e20044513350101c34713dc0a7ed9cda59cc00f Mon Sep 17 00:00:00 2001 From: Daniel Nelson Date: Fri, 17 Aug 2018 11:43:12 -0700 Subject: [PATCH 49/51] Fix documentation --- docs/DATA_FORMATS_INPUT.md | 144 +++++++-------------- plugins/inputs/logparser/logparser.go | 2 +- plugins/inputs/logparser/logparser_test.go | 2 +- 3 files changed, 50 insertions(+), 98 deletions(-) diff --git a/docs/DATA_FORMATS_INPUT.md b/docs/DATA_FORMATS_INPUT.md index d546e6937c344..ded0170ec80d2 100644 --- a/docs/DATA_FORMATS_INPUT.md +++ b/docs/DATA_FORMATS_INPUT.md @@ -661,108 +661,15 @@ For more information about the dropwizard json format see # tag2 = "tags.tag2" ``` -# Grok +# Grok: + The grok data format parses line delimited data using a regular expression like language. -Patterns can be added to patterns, or custom patterns read from custom_pattern_files. - -Modifiers can be appended to the end of a grok field to specify how that field should be handled. -There are also timestamp modifiers, which can be used to specify the format of time data. -Available modifiers can be found below. - -The 'measurement' modifier is used for dynamic measurement names. For setting a dynamic measurement name, -apply the 'measurement' modifier to a specific field in a pattern. The semantic name will be ignored. If the pattern is matched, the measurement name will be set to the value of the field it was applied to. Each pattern should only have one 'measurement' modifier applied to it. - The best way to get acquainted with grok patterns is to read the logstash docs, which are available here: https://www.elastic.co/guide/en/logstash/current/plugins-filters-grok.html -Available modifiers: -- string (default if nothing is specified) -- int -- float -- duration (ie, 5.23ms gets converted to int nanoseconds) -- tag (converts the field into a tag) -- drop (drops the field completely) -- measurement (sets the metric name to designated field) -Timestamp modifiers: -- ts-ansic ("Mon Jan _2 15:04:05 2006") -- ts-unix ("Mon Jan _2 15:04:05 MST 2006") -- ts-ruby ("Mon Jan 02 15:04:05 -0700 2006") -- ts-rfc822 ("02 Jan 06 15:04 MST") -- ts-rfc822z ("02 Jan 06 15:04 -0700") -- ts-rfc850 ("Monday, 02-Jan-06 15:04:05 MST") -- ts-rfc1123 ("Mon, 02 Jan 2006 15:04:05 MST") -- ts-rfc1123z ("Mon, 02 Jan 2006 15:04:05 -0700") -- ts-rfc3339 ("2006-01-02T15:04:05Z07:00") -- ts-rfc3339nano ("2006-01-02T15:04:05.999999999Z07:00") -- ts-httpd ("02/Jan/2006:15:04:05 -0700") -- ts-epoch (seconds since unix epoch) -- ts-epochnano (nanoseconds since unix epoch) -- ts-"CUSTOM" -CUSTOM time layouts must be within quotes and be the representation of the -"reference time", which is Mon Jan 2 15:04:05 -0700 MST 2006 -See https://golang.org/pkg/time/#Parse for more details. - -Breakdown of the DURATION pattern below: -`NUMBER` is a builtin logstash grok pattern matching float & int numbers. -`[nuµm]?` is a regex specifying 0 or 1 of the characters within brackets. -`s` is also regex, this pattern must end in "s". -so DURATION will match something like '5.324ms' or '6.1µs' or '10s' -DURATION %{NUMBER}[nuµm]?s -RESPONSE_CODE %{NUMBER:response_code:tag} -RESPONSE_TIME %{DURATION:response_time_ns:duration} -EXAMPLE_LOG \[%{HTTPDATE:ts:ts-httpd}\] %{NUMBER:myfloat:float} %{RESPONSE_CODE} %{IPORHOST:clientip} %{RESPONSE_TIME} - -The best way to get acquainted with grok patterns is to read the logstash docs, -which are available here: - https://www.elastic.co/guide/en/logstash/current/plugins-filters-grok.html - -#### Grok Configuration: -```toml -[[inputs.file]] - ## Files to parse each interval. - ## These accept standard unix glob matching rules, but with the addition of - ## ** as a "super asterisk". ie: - ## /var/log/**.log -> recursively find all .log files in /var/log - ## /var/log/*/*.log -> find all .log files with a parent dir in /var/log - ## /var/log/apache.log -> only tail the apache log file - files = ["/var/log/apache/access.log"] - - ## The dataformat to be read from files - ## Each data format has its own unique set of configuration options, read - ## more about them here: - ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md - data_format = "grok" - - ## This is a list of patterns to check the given log file(s) for. - ## Note that adding patterns here increases processing time. The most - ## efficient configuration is to have one pattern. - ## Other common built-in patterns are: - ## %{COMMON_LOG_FORMAT} (plain apache & nginx access logs) - ## %{COMBINED_LOG_FORMAT} (access logs + referrer & agent) - grok_patterns = ["%{COMBINED_LOG_FORMAT}"] - - ## Full path(s) to custom pattern files. - grok_custom_pattern_files = [] - - ## Custom patterns can also be defined here. Put one pattern per line. - grok_custom_patterns = ''' - ''' - - ## Timezone allows you to provide an override for timestamps that - ## don't already include an offset - ## e.g. 04/06/2016 12:41:45 data one two 5.43µs - ## - ## Default: "" which renders UTC - ## Options are as follows: - ## 1. Local -- interpret based on machine localtime - ## 2. "Canada/Eastern" -- Unix TZ values like those found in https://en.wikipedia.org/wiki/List_of_tz_database_time_zones - ## 3. UTC -- or blank/unspecified, will return timestamp in UTC - grok_timezone = "Canada/Eastern" -``` - The grok parser uses a slightly modified version of logstash "grok" patterns, with the format: @@ -789,6 +696,7 @@ You must capture at least one field per line. - duration (ie, 5.23ms gets converted to int nanoseconds) - tag (converts the field into a tag) - drop (drops the field completely) + - measurement (use the matched text as the measurement name) - Timestamp modifiers: - ts (This will auto-learn the timestamp format) - ts-ansic ("Mon Jan _2 15:04:05 2006") @@ -808,7 +716,7 @@ You must capture at least one field per line. - ts-"CUSTOM" CUSTOM time layouts must be within quotes and be the representation of the -"reference time", which is `Mon Jan 2 15:04:05 -0700 MST 2006`. +"reference time", which is `Mon Jan 2 15:04:05 -0700 MST 2006`. To match a comma decimal point you can use a period. For example `%{TIMESTAMP:timestamp:ts-"2006-01-02 15:04:05.000"}` can be used to match `"2018-01-02 15:04:05,000"` To match a comma decimal point you can use a period in the pattern string. See https://golang.org/pkg/time/#Parse for more details. @@ -822,6 +730,50 @@ logstash patterns that depend on these are not supported._ If you need help building patterns to match your logs, you will find the https://grokdebug.herokuapp.com application quite useful! +#### Grok Configuration: +```toml +[[inputs.file]] + ## Files to parse each interval. + ## These accept standard unix glob matching rules, but with the addition of + ## ** as a "super asterisk". ie: + ## /var/log/**.log -> recursively find all .log files in /var/log + ## /var/log/*/*.log -> find all .log files with a parent dir in /var/log + ## /var/log/apache.log -> only tail the apache log file + files = ["/var/log/apache/access.log"] + + ## The dataformat to be read from files + ## Each data format has its own unique set of configuration options, read + ## more about them here: + ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md + data_format = "grok" + + ## This is a list of patterns to check the given log file(s) for. + ## Note that adding patterns here increases processing time. The most + ## efficient configuration is to have one pattern. + ## Other common built-in patterns are: + ## %{COMMON_LOG_FORMAT} (plain apache & nginx access logs) + ## %{COMBINED_LOG_FORMAT} (access logs + referrer & agent) + grok_patterns = ["%{COMBINED_LOG_FORMAT}"] + + ## Full path(s) to custom pattern files. + grok_custom_pattern_files = [] + + ## Custom patterns can also be defined here. Put one pattern per line. + grok_custom_patterns = ''' + ''' + + ## Timezone allows you to provide an override for timestamps that + ## don't already include an offset + ## e.g. 04/06/2016 12:41:45 data one two 5.43µs + ## + ## Default: "" which renders UTC + ## Options are as follows: + ## 1. Local -- interpret based on machine localtime + ## 2. "Canada/Eastern" -- Unix TZ values like those found in https://en.wikipedia.org/wiki/List_of_tz_database_time_zones + ## 3. UTC -- or blank/unspecified, will return timestamp in UTC + grok_timezone = "Canada/Eastern" +``` + #### Timestamp Examples This example input and config parses a file using a custom timestamp conversion: diff --git a/plugins/inputs/logparser/logparser.go b/plugins/inputs/logparser/logparser.go index 2dc99b2f47544..b6ce72546992b 100644 --- a/plugins/inputs/logparser/logparser.go +++ b/plugins/inputs/logparser/logparser.go @@ -22,12 +22,12 @@ const ( // LogParser in the primary interface for the plugin type GrokConfig struct { + MeasurementName string `toml:"measurement"` Patterns []string NamedPatterns []string CustomPatterns string CustomPatternFiles []string TimeZone string - MeasurementName string `toml:"measurement"` } type logEntry struct { diff --git a/plugins/inputs/logparser/logparser_test.go b/plugins/inputs/logparser/logparser_test.go index a6fda07e581ee..3f0ab4daab9e9 100644 --- a/plugins/inputs/logparser/logparser_test.go +++ b/plugins/inputs/logparser/logparser_test.go @@ -44,9 +44,9 @@ func TestGrokParseLogFiles(t *testing.T) { logparser := &LogParserPlugin{ GrokConfig: GrokConfig{ + MeasurementName: "logparser_grok", Patterns: []string{"%{TEST_LOG_A}", "%{TEST_LOG_B}"}, CustomPatternFiles: []string{thisdir + "grok/testdata/test-patterns"}, - MeasurementName: "logparser_grok", }, FromBeginning: true, Files: []string{thisdir + "grok/testdata/*.log"}, From 0be9d85a24601b6e92c82eedc6622ff5fcc749a1 Mon Sep 17 00:00:00 2001 From: Max U Date: Fri, 17 Aug 2018 11:47:24 -0700 Subject: [PATCH 50/51] remove more support for no semantic name --- plugins/parsers/grok/parser.go | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/plugins/parsers/grok/parser.go b/plugins/parsers/grok/parser.go index 4b8e2b87629d7..42708ea7ad659 100644 --- a/plugins/parsers/grok/parser.go +++ b/plugins/parsers/grok/parser.go @@ -215,11 +215,6 @@ func (p *Parser) ParseLine(line string) (telegraf.Metric, error) { timestamp := time.Now() for k, v := range values { - if (k == "" || v == "") && p.typeMap[patternName][k] != "measurement" { - log.Printf("D! skipping key: %v", k) - continue - } - // t is the modifier of the field var t string // check if pattern has some modifiers @@ -460,12 +455,6 @@ func (p *Parser) parseTypedCaptures(name, pattern string) (string, error) { } hasTimestamp = true } else { - //for handling measurement tag with no name - if match[1] == "" && match[2] == "measurement" { - match[1] = "measurement_name" - //add "measurement_name" to pattern so it is valid grok - pattern = strings.Replace(pattern, "::measurement", ":measurement_name:measurement", 1) - } p.typeMap[patternName][match[1]] = match[2] } From 5b1bbbdd108771bf33ce1ea524e125e3fb086394 Mon Sep 17 00:00:00 2001 From: Max U Date: Fri, 17 Aug 2018 13:24:26 -0700 Subject: [PATCH 51/51] small fix --- plugins/parsers/grok/parser.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/plugins/parsers/grok/parser.go b/plugins/parsers/grok/parser.go index 42708ea7ad659..bc65588eb9841 100644 --- a/plugins/parsers/grok/parser.go +++ b/plugins/parsers/grok/parser.go @@ -215,6 +215,9 @@ func (p *Parser) ParseLine(line string) (telegraf.Metric, error) { timestamp := time.Now() for k, v := range values { + if k == "" || v == "" { + continue + } // t is the modifier of the field var t string // check if pattern has some modifiers