Skip to content

Commit

Permalink
[pkg/stanza] Remove SplitterConfig (#26537)
Browse files Browse the repository at this point in the history
Subset of #26241

This finally removes the `SplitterConfig` struct, which at this point is
only a wrapper around `MultilineConfig`.
  • Loading branch information
djaglowski authored Sep 8, 2023
1 parent 841e692 commit b78c41f
Show file tree
Hide file tree
Showing 10 changed files with 122 additions and 140 deletions.
27 changes: 27 additions & 0 deletions .chloggen/pkg-stanza-rm-splitterconfig.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# Use this changelog template to create an entry for release notes.

# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix'
change_type: breaking

# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver)
component: pkg/stanza

# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`).
note: Remove tokenize.SplitterConfig

# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists.
issues: [26537]

# (Optional) One or more lines of additional information to render under the primary note.
# These lines will be padded with 2 spaces and then inserted directly into the document.
# Use pipe (|) for multiline entries.
subtext:

# If your change doesn't affect end users or the exported elements of any package,
# you should instead start your pull request title with [chore] or use the "Skip Changelog" label.
# Optional: The change log or logs in which this entry should be included.
# e.g. '[user]' or '[user, api]'
# Include 'user' if the change is relevant to end users.
# Include 'api' if there is a change to a library API.
# Default: '[user]'
change_logs: [api]
36 changes: 18 additions & 18 deletions pkg/stanza/fileconsumer/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ func NewConfig() *Config {
IncludeFileNameResolved: false,
IncludeFilePathResolved: false,
PollInterval: 200 * time.Millisecond,
Splitter: tokenize.NewSplitterConfig(),
Multiline: tokenize.NewMultilineConfig(),
Encoding: defaultEncoding,
StartAt: "end",
FingerprintSize: fingerprint.DefaultSize,
Expand All @@ -66,22 +66,22 @@ func NewConfig() *Config {
// Config is the configuration of a file input operator
type Config struct {
matcher.Criteria `mapstructure:",squash"`
IncludeFileName bool `mapstructure:"include_file_name,omitempty"`
IncludeFilePath bool `mapstructure:"include_file_path,omitempty"`
IncludeFileNameResolved bool `mapstructure:"include_file_name_resolved,omitempty"`
IncludeFilePathResolved bool `mapstructure:"include_file_path_resolved,omitempty"`
PollInterval time.Duration `mapstructure:"poll_interval,omitempty"`
StartAt string `mapstructure:"start_at,omitempty"`
FingerprintSize helper.ByteSize `mapstructure:"fingerprint_size,omitempty"`
MaxLogSize helper.ByteSize `mapstructure:"max_log_size,omitempty"`
MaxConcurrentFiles int `mapstructure:"max_concurrent_files,omitempty"`
MaxBatches int `mapstructure:"max_batches,omitempty"`
DeleteAfterRead bool `mapstructure:"delete_after_read,omitempty"`
Splitter tokenize.SplitterConfig `mapstructure:",squash,omitempty"`
TrimConfig trim.Config `mapstructure:",squash,omitempty"`
Encoding string `mapstructure:"encoding,omitempty"`
FlushPeriod time.Duration `mapstructure:"force_flush_period,omitempty"`
Header *HeaderConfig `mapstructure:"header,omitempty"`
IncludeFileName bool `mapstructure:"include_file_name,omitempty"`
IncludeFilePath bool `mapstructure:"include_file_path,omitempty"`
IncludeFileNameResolved bool `mapstructure:"include_file_name_resolved,omitempty"`
IncludeFilePathResolved bool `mapstructure:"include_file_path_resolved,omitempty"`
PollInterval time.Duration `mapstructure:"poll_interval,omitempty"`
StartAt string `mapstructure:"start_at,omitempty"`
FingerprintSize helper.ByteSize `mapstructure:"fingerprint_size,omitempty"`
MaxLogSize helper.ByteSize `mapstructure:"max_log_size,omitempty"`
MaxConcurrentFiles int `mapstructure:"max_concurrent_files,omitempty"`
MaxBatches int `mapstructure:"max_batches,omitempty"`
DeleteAfterRead bool `mapstructure:"delete_after_read,omitempty"`
Multiline tokenize.MultilineConfig `mapstructure:"multiline,omitempty"`
TrimConfig trim.Config `mapstructure:",squash,omitempty"`
Encoding string `mapstructure:"encoding,omitempty"`
FlushPeriod time.Duration `mapstructure:"force_flush_period,omitempty"`
Header *HeaderConfig `mapstructure:"header,omitempty"`
}

type HeaderConfig struct {
Expand All @@ -101,7 +101,7 @@ func (c Config) Build(logger *zap.SugaredLogger, emit emit.Callback) (*Manager,
}

// Ensure that splitter is buildable
factory := splitter.NewMultilineFactory(c.Splitter, enc, int(c.MaxLogSize), c.TrimConfig.Func(), c.FlushPeriod)
factory := splitter.NewMultilineFactory(c.Multiline, enc, int(c.MaxLogSize), c.TrimConfig.Func(), c.FlushPeriod)
if _, err := factory.Build(); err != nil {
return nil, err
}
Expand Down
45 changes: 19 additions & 26 deletions pkg/stanza/fileconsumer/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -280,39 +280,39 @@ func TestUnmarshal(t *testing.T) {
Name: "multiline_line_start_string",
Expect: func() *mockOperatorConfig {
cfg := NewConfig()
newSplit := tokenize.NewSplitterConfig()
newSplit.Multiline.LineStartPattern = "Start"
cfg.Splitter = newSplit
cfg.Multiline = tokenize.MultilineConfig{
LineStartPattern: "Start",
}
return newMockOperatorConfig(cfg)
}(),
},
{
Name: "multiline_line_start_special",
Expect: func() *mockOperatorConfig {
cfg := NewConfig()
newSplit := tokenize.NewSplitterConfig()
newSplit.Multiline.LineStartPattern = "%"
cfg.Splitter = newSplit
cfg.Multiline = tokenize.MultilineConfig{
LineStartPattern: "%",
}
return newMockOperatorConfig(cfg)
}(),
},
{
Name: "multiline_line_end_string",
Expect: func() *mockOperatorConfig {
cfg := NewConfig()
newSplit := tokenize.NewSplitterConfig()
newSplit.Multiline.LineEndPattern = "Start"
cfg.Splitter = newSplit
cfg.Multiline = tokenize.MultilineConfig{
LineEndPattern: "Start",
}
return newMockOperatorConfig(cfg)
}(),
},
{
Name: "multiline_line_end_special",
Expect: func() *mockOperatorConfig {
cfg := NewConfig()
newSplit := tokenize.NewSplitterConfig()
newSplit.Multiline.LineEndPattern = "%"
cfg.Splitter = newSplit
cfg.Multiline = tokenize.MultilineConfig{
LineEndPattern: "%",
}
return newMockOperatorConfig(cfg)
}(),
},
Expand Down Expand Up @@ -452,8 +452,7 @@ func TestBuild(t *testing.T) {
{
"MultilineConfiguredStartAndEndPatterns",
func(f *Config) {
f.Splitter = tokenize.NewSplitterConfig()
f.Splitter.Multiline = tokenize.MultilineConfig{
f.Multiline = tokenize.MultilineConfig{
LineEndPattern: "Exists",
LineStartPattern: "Exists",
}
Expand All @@ -464,8 +463,7 @@ func TestBuild(t *testing.T) {
{
"MultilineConfiguredStartPattern",
func(f *Config) {
f.Splitter = tokenize.NewSplitterConfig()
f.Splitter.Multiline = tokenize.MultilineConfig{
f.Multiline = tokenize.MultilineConfig{
LineStartPattern: "START.*",
}
},
Expand All @@ -475,8 +473,7 @@ func TestBuild(t *testing.T) {
{
"MultilineConfiguredEndPattern",
func(f *Config) {
f.Splitter = tokenize.NewSplitterConfig()
f.Splitter.Multiline = tokenize.MultilineConfig{
f.Multiline = tokenize.MultilineConfig{
LineEndPattern: "END.*",
}
},
Expand All @@ -494,8 +491,7 @@ func TestBuild(t *testing.T) {
{
"LineStartAndEnd",
func(f *Config) {
f.Splitter = tokenize.NewSplitterConfig()
f.Splitter.Multiline = tokenize.MultilineConfig{
f.Multiline = tokenize.MultilineConfig{
LineStartPattern: ".*",
LineEndPattern: ".*",
}
Expand All @@ -506,17 +502,15 @@ func TestBuild(t *testing.T) {
{
"NoLineStartOrEnd",
func(f *Config) {
f.Splitter = tokenize.NewSplitterConfig()
f.Splitter.Multiline = tokenize.MultilineConfig{}
f.Multiline = tokenize.MultilineConfig{}
},
require.NoError,
func(t *testing.T, f *Manager) {},
},
{
"InvalidLineStartRegex",
func(f *Config) {
f.Splitter = tokenize.NewSplitterConfig()
f.Splitter.Multiline = tokenize.MultilineConfig{
f.Multiline = tokenize.MultilineConfig{
LineStartPattern: "(",
}
},
Expand All @@ -526,8 +520,7 @@ func TestBuild(t *testing.T) {
{
"InvalidLineEndRegex",
func(f *Config) {
f.Splitter = tokenize.NewSplitterConfig()
f.Splitter.Multiline = tokenize.MultilineConfig{
f.Multiline = tokenize.MultilineConfig{
LineEndPattern: "(",
}
},
Expand Down
2 changes: 1 addition & 1 deletion pkg/stanza/fileconsumer/file_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -547,7 +547,7 @@ func TestNoNewline(t *testing.T) {
tempDir := t.TempDir()
cfg := NewConfig().includeDir(tempDir)
cfg.StartAt = "beginning"
cfg.Splitter = tokenize.NewSplitterConfig()
cfg.Multiline = tokenize.NewMultilineConfig()
cfg.FlushPeriod = time.Nanosecond
operator, emitCalls := buildTestManager(t, cfg)

Expand Down
24 changes: 12 additions & 12 deletions pkg/stanza/fileconsumer/internal/splitter/multiline.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,34 +15,34 @@ import (
)

type multilineFactory struct {
splitterCfg tokenize.SplitterConfig
encoding encoding.Encoding
maxLogSize int
trimFunc trim.Func
flushPeriod time.Duration
multilineCfg tokenize.MultilineConfig
encoding encoding.Encoding
maxLogSize int
trimFunc trim.Func
flushPeriod time.Duration
}

var _ Factory = (*multilineFactory)(nil)

func NewMultilineFactory(
splitterCfg tokenize.SplitterConfig,
multilineCfg tokenize.MultilineConfig,
encoding encoding.Encoding,
maxLogSize int,
trimFunc trim.Func,
flushPeriod time.Duration,
) Factory {
return &multilineFactory{
splitterCfg: splitterCfg,
encoding: encoding,
maxLogSize: maxLogSize,
trimFunc: trimFunc,
flushPeriod: flushPeriod,
multilineCfg: multilineCfg,
encoding: encoding,
maxLogSize: maxLogSize,
trimFunc: trimFunc,
flushPeriod: flushPeriod,
}
}

// Build builds Multiline Splitter struct
func (f *multilineFactory) Build() (bufio.SplitFunc, error) {
splitFunc, err := f.splitterCfg.Build(f.encoding, false, f.maxLogSize, f.trimFunc)
splitFunc, err := f.multilineCfg.Build(f.encoding, false, f.maxLogSize, f.trimFunc)
if err != nil {
return nil, err
}
Expand Down
34 changes: 16 additions & 18 deletions pkg/stanza/fileconsumer/internal/splitter/multiline_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,28 +17,26 @@ import (

func TestMultilineBuild(t *testing.T) {
tests := []struct {
name string
splitterConfig tokenize.SplitterConfig
encoding encoding.Encoding
maxLogSize int
flushPeriod time.Duration
wantErr bool
name string
multilineCfg tokenize.MultilineConfig
encoding encoding.Encoding
maxLogSize int
flushPeriod time.Duration
wantErr bool
}{
{
name: "default configuration",
splitterConfig: tokenize.NewSplitterConfig(),
encoding: unicode.UTF8,
maxLogSize: 1024,
flushPeriod: 100 * time.Millisecond,
wantErr: false,
name: "default configuration",
multilineCfg: tokenize.NewMultilineConfig(),
encoding: unicode.UTF8,
maxLogSize: 1024,
flushPeriod: 100 * time.Millisecond,
wantErr: false,
},
{
name: "Multiline error",
splitterConfig: tokenize.SplitterConfig{
Multiline: tokenize.MultilineConfig{
LineStartPattern: "START",
LineEndPattern: "END",
},
multilineCfg: tokenize.MultilineConfig{
LineStartPattern: "START",
LineEndPattern: "END",
},
flushPeriod: 100 * time.Millisecond,
encoding: unicode.UTF8,
Expand All @@ -48,7 +46,7 @@ func TestMultilineBuild(t *testing.T) {
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
factory := NewMultilineFactory(tt.splitterConfig, tt.encoding, tt.maxLogSize, trim.Nop, tt.flushPeriod)
factory := NewMultilineFactory(tt.multilineCfg, tt.encoding, tt.maxLogSize, trim.Nop, tt.flushPeriod)
got, err := factory.Build()
if (err != nil) != tt.wantErr {
t.Errorf("Build() error = %v, wantErr %v", err, tt.wantErr)
Expand Down
18 changes: 9 additions & 9 deletions pkg/stanza/fileconsumer/reader_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ import (

func TestPersistFlusher(t *testing.T) {
flushPeriod := 100 * time.Millisecond
f, emitChan := testReaderFactory(t, tokenize.NewSplitterConfig(), defaultMaxLogSize, flushPeriod)
f, emitChan := testReaderFactory(t, tokenize.NewMultilineConfig(), defaultMaxLogSize, flushPeriod)

temp := openTemp(t, t.TempDir())
fp, err := f.newFingerprint(temp)
Expand Down Expand Up @@ -110,7 +110,7 @@ func TestTokenization(t *testing.T) {

for _, tc := range testCases {
t.Run(tc.testName, func(t *testing.T) {
f, emitChan := testReaderFactory(t, tokenize.NewSplitterConfig(), defaultMaxLogSize, defaultFlushPeriod)
f, emitChan := testReaderFactory(t, tokenize.NewMultilineConfig(), defaultMaxLogSize, defaultFlushPeriod)

temp := openTemp(t, t.TempDir())
_, err := temp.Write(tc.fileContent)
Expand Down Expand Up @@ -140,7 +140,7 @@ func TestTokenizationTooLong(t *testing.T) {
[]byte("aaa"),
}

f, emitChan := testReaderFactory(t, tokenize.NewSplitterConfig(), 10, defaultFlushPeriod)
f, emitChan := testReaderFactory(t, tokenize.NewMultilineConfig(), 10, defaultFlushPeriod)

temp := openTemp(t, t.TempDir())
_, err := temp.Write(fileContent)
Expand Down Expand Up @@ -170,9 +170,9 @@ func TestTokenizationTooLongWithLineStartPattern(t *testing.T) {
[]byte("2023-01-01 2"),
}

sCfg := tokenize.NewSplitterConfig()
sCfg.Multiline.LineStartPattern = `\d+-\d+-\d+`
f, emitChan := testReaderFactory(t, sCfg, 15, defaultFlushPeriod)
mCfg := tokenize.NewMultilineConfig()
mCfg.LineStartPattern = `\d+-\d+-\d+`
f, emitChan := testReaderFactory(t, mCfg, 15, defaultFlushPeriod)

temp := openTemp(t, t.TempDir())
_, err := temp.Write(fileContent)
Expand All @@ -195,7 +195,7 @@ func TestTokenizationTooLongWithLineStartPattern(t *testing.T) {
func TestHeaderFingerprintIncluded(t *testing.T) {
fileContent := []byte("#header-line\naaa\n")

f, _ := testReaderFactory(t, tokenize.NewSplitterConfig(), 10, defaultFlushPeriod)
f, _ := testReaderFactory(t, tokenize.NewMultilineConfig(), 10, defaultFlushPeriod)

regexConf := regex.NewConfig()
regexConf.Regex = "^#(?P<header>.*)"
Expand Down Expand Up @@ -223,7 +223,7 @@ func TestHeaderFingerprintIncluded(t *testing.T) {
require.Equal(t, []byte("#header-line\naaa\n"), r.Fingerprint.FirstBytes)
}

func testReaderFactory(t *testing.T, sCfg tokenize.SplitterConfig, maxLogSize int, flushPeriod time.Duration) (*readerFactory, chan *emitParams) {
func testReaderFactory(t *testing.T, mCfg tokenize.MultilineConfig, maxLogSize int, flushPeriod time.Duration) (*readerFactory, chan *emitParams) {
emitChan := make(chan *emitParams, 100)
enc, err := decode.LookupEncoding(defaultEncoding)
trimFunc := trim.Whitespace
Expand All @@ -236,7 +236,7 @@ func testReaderFactory(t *testing.T, sCfg tokenize.SplitterConfig, maxLogSize in
emit: testEmitFunc(emitChan),
},
fromBeginning: true,
splitterFactory: splitter.NewMultilineFactory(sCfg, enc, maxLogSize, trimFunc, flushPeriod),
splitterFactory: splitter.NewMultilineFactory(mCfg, enc, maxLogSize, trimFunc, flushPeriod),
encoding: enc,
}, emitChan
}
Expand Down
Loading

0 comments on commit b78c41f

Please sign in to comment.