-
Notifications
You must be signed in to change notification settings - Fork 5.6k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Feature/3484 - set metric name with 'measurement' modifier for grok parser #4433
Changes from 41 commits
e12eced
08a11d7
9c4b522
4e24a1b
ec7f131
504d978
542c030
554b960
36a23ea
f40371e
9c84595
cc40629
79d9ea4
bbd68b3
bf7220d
a931eb1
e450b26
001658a
7fa27f4
1be2a8e
aa750ec
892c95a
04f09d6
8063b38
bfc13a7
67db143
8a9da28
cafa95e
c6087ab
e4b6f23
d224673
f52ceeb
285cf0b
0c3ac29
74900ed
d0f5389
b10f592
441bc41
d1e0c7c
b7ed886
903a977
0040530
054c20e
0e5e115
e3d9ca0
1b8ce4a
797f9bd
255e596
6d49188
34075e3
a246c11
4ae64bd
c019cfa
7e20044
0be9d85
5b1bbbd
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -22,12 +22,12 @@ const ( | |
|
||
// LogParser in the primary interface for the plugin | ||
type GrokConfig struct { | ||
MeasurementName string `toml:"measurement"` | ||
Patterns []string | ||
NamedPatterns []string | ||
CustomPatterns string | ||
CustomPatternFiles []string | ||
TimeZone string | ||
MeasurementName string `toml:"measurement"` | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Revert change so there is less churn in the git history and to keep the real changes clear. |
||
} | ||
|
||
type logEntry struct { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -44,9 +44,9 @@ func TestGrokParseLogFiles(t *testing.T) { | |
|
||
logparser := &LogParserPlugin{ | ||
GrokConfig: GrokConfig{ | ||
MeasurementName: "logparser_grok", | ||
Patterns: []string{"%{TEST_LOG_A}", "%{TEST_LOG_B}"}, | ||
CustomPatternFiles: []string{thisdir + "grok/testdata/test-patterns"}, | ||
MeasurementName: "logparser_grok", | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Revert change |
||
}, | ||
FromBeginning: true, | ||
Files: []string{thisdir + "grok/testdata/*.log"}, | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
# Reader Input Plugin | ||
|
||
The Reader Plugin updates a list of files every interval and parses the data inside. | ||
Files will always be read from the beginning. | ||
This plugin can parse any "data_format" formats. | ||
|
||
### Configuration: | ||
```toml | ||
[[inputs.reader]] | ||
## Files to parse each interval. | ||
## These accept standard unix glob matching rules, but with the addition of | ||
## ** as a "super asterisk". ie: | ||
## /var/log/**.log -> recursively find all .log files in /var/log | ||
## /var/log/*/*.log -> find all .log files with a parent dir in /var/log | ||
## /var/log/apache.log -> only tail the apache log file | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This may cause some slight confusion, |
||
files = ["/var/log/apache/access.log"] | ||
|
||
## The dataformat to be read from files | ||
## Each data format has its own unique set of configuration options, read | ||
## more about them here: | ||
## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md | ||
data_format = "" | ||
``` |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
version: '3' | ||
|
||
services: | ||
telegraf: | ||
image: glinton/scratch | ||
volumes: | ||
- ./telegraf.conf:/telegraf.conf | ||
- ../../../../telegraf:/telegraf | ||
- ./json_a.log:/var/log/test.log | ||
entrypoint: | ||
- /telegraf | ||
- --config | ||
- /telegraf.conf |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
{ | ||
"parent": { | ||
"child": 3.0, | ||
"ignored_child": "hi" | ||
}, | ||
"ignored_null": null, | ||
"integer": 4, | ||
"list": [3, 4], | ||
"ignored_parent": { | ||
"another_ignored_null": null, | ||
"ignored_string": "hello, world!" | ||
}, | ||
"another_list": [4] | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
[[inputs.reader]] | ||
files = ["/var/log/test.log"] | ||
data_format = "json" | ||
name_override = "json_reader" | ||
|
||
[[outputs.file]] | ||
files = ["stdout"] |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,95 @@ | ||
package reader | ||
|
||
import ( | ||
"fmt" | ||
"io/ioutil" | ||
|
||
"github.com/influxdata/telegraf" | ||
"github.com/influxdata/telegraf/internal/globpath" | ||
"github.com/influxdata/telegraf/plugins/inputs" | ||
"github.com/influxdata/telegraf/plugins/parsers" | ||
) | ||
|
||
type Reader struct { | ||
Files []string `toml:"files"` | ||
FromBeginning bool | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Remove as |
||
parser parsers.Parser | ||
|
||
filenames []string | ||
} | ||
|
||
const sampleConfig = `## Files to parse each interval. | ||
## These accept standard unix glob matching rules, but with the addition of | ||
## ** as a "super asterisk". ie: | ||
## /var/log/**.log -> recursively find all .log files in /var/log | ||
## /var/log/*/*.log -> find all .log files with a parent dir in /var/log | ||
## /var/log/apache.log -> only tail the apache log file | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. again, |
||
files = ["/var/log/apache/access.log"] | ||
|
||
## The dataformat to be read from files | ||
## Each data format has its own unique set of configuration options, read | ||
## more about them here: | ||
## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md | ||
data_format = "" | ||
` | ||
|
||
// SampleConfig returns the default configuration of the Input | ||
func (r *Reader) SampleConfig() string { | ||
return sampleConfig | ||
} | ||
|
||
func (r *Reader) Description() string { | ||
return "reload and gather from file[s] on telegraf's interval" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should the description be sentence-like (capitalization, punctuation)? |
||
} | ||
|
||
func (r *Reader) Gather(acc telegraf.Accumulator) error { | ||
r.refreshFilePaths() | ||
for _, k := range r.filenames { | ||
metrics, err := r.readMetric(k) | ||
if err != nil { | ||
return err | ||
} | ||
|
||
for _, m := range metrics { | ||
acc.AddFields(m.Name(), m.Fields(), m.Tags(), m.Time()) | ||
} | ||
} | ||
return nil | ||
} | ||
|
||
func (r *Reader) SetParser(p parsers.Parser) { | ||
r.parser = p | ||
} | ||
|
||
func (r *Reader) refreshFilePaths() error { | ||
var allFiles []string | ||
for _, filepath := range r.Files { | ||
g, err := globpath.Compile(filepath) | ||
if err != nil { | ||
return fmt.Errorf("could not compile glob %v: %v", filepath, err) | ||
} | ||
files := g.Match() | ||
|
||
for k := range files { | ||
allFiles = append(allFiles, k) | ||
} | ||
} | ||
|
||
r.filenames = allFiles | ||
return nil | ||
} | ||
|
||
func (r *Reader) readMetric(filename string) ([]telegraf.Metric, error) { | ||
fileContents, err := ioutil.ReadFile(filename) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This seems dangerous in the right conditions, as it reads the entire contents of the file into memory, then passes it onto be |
||
if err != nil { | ||
return nil, fmt.Errorf("E! Error file: %v could not be read, %s", filename, err) | ||
} | ||
return r.parser.Parse(fileContents) | ||
|
||
} | ||
|
||
func init() { | ||
inputs.Add("reader", func() telegraf.Input { | ||
return &Reader{} | ||
}) | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
package reader | ||
|
||
import ( | ||
"runtime" | ||
"strings" | ||
"testing" | ||
|
||
"github.com/influxdata/telegraf/plugins/parsers" | ||
"github.com/influxdata/telegraf/testutil" | ||
"github.com/stretchr/testify/assert" | ||
) | ||
|
||
func TestRefreshFilePaths(t *testing.T) { | ||
testDir := getPluginDir() | ||
r := Reader{ | ||
Files: []string{testDir + "/reader/testfiles/**.log"}, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'd combine the |
||
} | ||
|
||
r.refreshFilePaths() | ||
assert.Equal(t, len(r.filenames), 2) | ||
} | ||
func TestJSONParserCompile(t *testing.T) { | ||
testDir := getPluginDir() | ||
var acc testutil.Accumulator | ||
r := Reader{ | ||
Files: []string{testDir + "/reader/testfiles/json_a.log"}, | ||
} | ||
parserConfig := parsers.Config{ | ||
DataFormat: "json", | ||
TagKeys: []string{"parent_ignored_child"}, | ||
} | ||
nParser, err := parsers.NewParser(&parserConfig) | ||
r.parser = nParser | ||
assert.NoError(t, err) | ||
|
||
r.Gather(&acc) | ||
assert.Equal(t, map[string]string{"parent_ignored_child": "hi"}, acc.Metrics[0].Tags) | ||
assert.Equal(t, 5, len(acc.Metrics[0].Fields)) | ||
} | ||
|
||
func TestGrokParser(t *testing.T) { | ||
testDir := getPluginDir() | ||
var acc testutil.Accumulator | ||
r := Reader{ | ||
Files: []string{testDir + "/reader/testfiles/grok_a.log"}, | ||
} | ||
|
||
parserConfig := parsers.Config{ | ||
DataFormat: "grok", | ||
GrokPatterns: []string{"%{COMMON_LOG_FORMAT}"}, | ||
} | ||
|
||
nParser, err := parsers.NewParser(&parserConfig) | ||
r.parser = nParser | ||
assert.NoError(t, err) | ||
|
||
err = r.Gather(&acc) | ||
assert.Equal(t, 2, len(acc.Metrics)) | ||
} | ||
|
||
func getPluginDir() string { | ||
_, filename, _, _ := runtime.Caller(1) | ||
return strings.Replace(filename, "/reader/reader_test.go", "", 1) | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
127.0.0.1 user-identifier frank [10/Oct/2000:13:55:36 -0700] "GET /apache_pb.gif HTTP/1.0" 200 2326 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. combine |
||
128.0.0.1 user-identifier tony [10/Oct/2000:13:55:36 -0800] "GET /apache_pb.gif HTTP/1.0" 300 45 |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
{ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. combine |
||
"parent": { | ||
"child": 3.0, | ||
"ignored_child": "hi" | ||
}, | ||
"ignored_null": null, | ||
"integer": 4, | ||
"list": [3, 4], | ||
"ignored_parent": { | ||
"another_ignored_null": null, | ||
"ignored_string": "hello, world!" | ||
}, | ||
"another_list": [4] | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think the patterns should behave the same regardless of if it is in the
pattern
orcustom_pattern
. Can we just say%{WORD::measurement}
will use the matched text and%{TEST:test_name:measurement}
will use the static valuetest_name
?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yea I think that should be made clear. The issue with that would be if people use the dynamic name on an entire pattern, ie not a single field, it would add the text of the whole pattern as a metric name.