diff --git a/.chloggen/pkg-stanza-rm-splitterconfig.yaml b/.chloggen/pkg-stanza-rm-splitterconfig.yaml new file mode 100755 index 000000000000..ff7e24f1c772 --- /dev/null +++ b/.chloggen/pkg-stanza-rm-splitterconfig.yaml @@ -0,0 +1,27 @@ +# Use this changelog template to create an entry for release notes. + +# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix' +change_type: breaking + +# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver) +component: pkg/stanza + +# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`). +note: Remove tokenize.SplitterConfig + +# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists. +issues: [26537] + +# (Optional) One or more lines of additional information to render under the primary note. +# These lines will be padded with 2 spaces and then inserted directly into the document. +# Use pipe (|) for multiline entries. +subtext: + +# If your change doesn't affect end users or the exported elements of any package, +# you should instead start your pull request title with [chore] or use the "Skip Changelog" label. +# Optional: The change log or logs in which this entry should be included. +# e.g. '[user]' or '[user, api]' +# Include 'user' if the change is relevant to end users. +# Include 'api' if there is a change to a library API. +# Default: '[user]' +change_logs: [api] diff --git a/pkg/stanza/fileconsumer/config.go b/pkg/stanza/fileconsumer/config.go index 25b39075f030..fa27bb103de1 100644 --- a/pkg/stanza/fileconsumer/config.go +++ b/pkg/stanza/fileconsumer/config.go @@ -53,7 +53,7 @@ func NewConfig() *Config { IncludeFileNameResolved: false, IncludeFilePathResolved: false, PollInterval: 200 * time.Millisecond, - Splitter: tokenize.NewSplitterConfig(), + Multiline: tokenize.NewMultilineConfig(), Encoding: defaultEncoding, StartAt: "end", FingerprintSize: fingerprint.DefaultSize, @@ -66,22 +66,22 @@ func NewConfig() *Config { // Config is the configuration of a file input operator type Config struct { matcher.Criteria `mapstructure:",squash"` - IncludeFileName bool `mapstructure:"include_file_name,omitempty"` - IncludeFilePath bool `mapstructure:"include_file_path,omitempty"` - IncludeFileNameResolved bool `mapstructure:"include_file_name_resolved,omitempty"` - IncludeFilePathResolved bool `mapstructure:"include_file_path_resolved,omitempty"` - PollInterval time.Duration `mapstructure:"poll_interval,omitempty"` - StartAt string `mapstructure:"start_at,omitempty"` - FingerprintSize helper.ByteSize `mapstructure:"fingerprint_size,omitempty"` - MaxLogSize helper.ByteSize `mapstructure:"max_log_size,omitempty"` - MaxConcurrentFiles int `mapstructure:"max_concurrent_files,omitempty"` - MaxBatches int `mapstructure:"max_batches,omitempty"` - DeleteAfterRead bool `mapstructure:"delete_after_read,omitempty"` - Splitter tokenize.SplitterConfig `mapstructure:",squash,omitempty"` - TrimConfig trim.Config `mapstructure:",squash,omitempty"` - Encoding string `mapstructure:"encoding,omitempty"` - FlushPeriod time.Duration `mapstructure:"force_flush_period,omitempty"` - Header *HeaderConfig `mapstructure:"header,omitempty"` + IncludeFileName bool `mapstructure:"include_file_name,omitempty"` + IncludeFilePath bool `mapstructure:"include_file_path,omitempty"` + IncludeFileNameResolved bool `mapstructure:"include_file_name_resolved,omitempty"` + IncludeFilePathResolved bool `mapstructure:"include_file_path_resolved,omitempty"` + PollInterval time.Duration `mapstructure:"poll_interval,omitempty"` + StartAt string `mapstructure:"start_at,omitempty"` + FingerprintSize helper.ByteSize `mapstructure:"fingerprint_size,omitempty"` + MaxLogSize helper.ByteSize `mapstructure:"max_log_size,omitempty"` + MaxConcurrentFiles int `mapstructure:"max_concurrent_files,omitempty"` + MaxBatches int `mapstructure:"max_batches,omitempty"` + DeleteAfterRead bool `mapstructure:"delete_after_read,omitempty"` + Multiline tokenize.MultilineConfig `mapstructure:"multiline,omitempty"` + TrimConfig trim.Config `mapstructure:",squash,omitempty"` + Encoding string `mapstructure:"encoding,omitempty"` + FlushPeriod time.Duration `mapstructure:"force_flush_period,omitempty"` + Header *HeaderConfig `mapstructure:"header,omitempty"` } type HeaderConfig struct { @@ -101,7 +101,7 @@ func (c Config) Build(logger *zap.SugaredLogger, emit emit.Callback) (*Manager, } // Ensure that splitter is buildable - factory := splitter.NewMultilineFactory(c.Splitter, enc, int(c.MaxLogSize), c.TrimConfig.Func(), c.FlushPeriod) + factory := splitter.NewMultilineFactory(c.Multiline, enc, int(c.MaxLogSize), c.TrimConfig.Func(), c.FlushPeriod) if _, err := factory.Build(); err != nil { return nil, err } diff --git a/pkg/stanza/fileconsumer/config_test.go b/pkg/stanza/fileconsumer/config_test.go index 48b82470e127..90df522e59bd 100644 --- a/pkg/stanza/fileconsumer/config_test.go +++ b/pkg/stanza/fileconsumer/config_test.go @@ -280,9 +280,9 @@ func TestUnmarshal(t *testing.T) { Name: "multiline_line_start_string", Expect: func() *mockOperatorConfig { cfg := NewConfig() - newSplit := tokenize.NewSplitterConfig() - newSplit.Multiline.LineStartPattern = "Start" - cfg.Splitter = newSplit + cfg.Multiline = tokenize.MultilineConfig{ + LineStartPattern: "Start", + } return newMockOperatorConfig(cfg) }(), }, @@ -290,9 +290,9 @@ func TestUnmarshal(t *testing.T) { Name: "multiline_line_start_special", Expect: func() *mockOperatorConfig { cfg := NewConfig() - newSplit := tokenize.NewSplitterConfig() - newSplit.Multiline.LineStartPattern = "%" - cfg.Splitter = newSplit + cfg.Multiline = tokenize.MultilineConfig{ + LineStartPattern: "%", + } return newMockOperatorConfig(cfg) }(), }, @@ -300,9 +300,9 @@ func TestUnmarshal(t *testing.T) { Name: "multiline_line_end_string", Expect: func() *mockOperatorConfig { cfg := NewConfig() - newSplit := tokenize.NewSplitterConfig() - newSplit.Multiline.LineEndPattern = "Start" - cfg.Splitter = newSplit + cfg.Multiline = tokenize.MultilineConfig{ + LineEndPattern: "Start", + } return newMockOperatorConfig(cfg) }(), }, @@ -310,9 +310,9 @@ func TestUnmarshal(t *testing.T) { Name: "multiline_line_end_special", Expect: func() *mockOperatorConfig { cfg := NewConfig() - newSplit := tokenize.NewSplitterConfig() - newSplit.Multiline.LineEndPattern = "%" - cfg.Splitter = newSplit + cfg.Multiline = tokenize.MultilineConfig{ + LineEndPattern: "%", + } return newMockOperatorConfig(cfg) }(), }, @@ -452,8 +452,7 @@ func TestBuild(t *testing.T) { { "MultilineConfiguredStartAndEndPatterns", func(f *Config) { - f.Splitter = tokenize.NewSplitterConfig() - f.Splitter.Multiline = tokenize.MultilineConfig{ + f.Multiline = tokenize.MultilineConfig{ LineEndPattern: "Exists", LineStartPattern: "Exists", } @@ -464,8 +463,7 @@ func TestBuild(t *testing.T) { { "MultilineConfiguredStartPattern", func(f *Config) { - f.Splitter = tokenize.NewSplitterConfig() - f.Splitter.Multiline = tokenize.MultilineConfig{ + f.Multiline = tokenize.MultilineConfig{ LineStartPattern: "START.*", } }, @@ -475,8 +473,7 @@ func TestBuild(t *testing.T) { { "MultilineConfiguredEndPattern", func(f *Config) { - f.Splitter = tokenize.NewSplitterConfig() - f.Splitter.Multiline = tokenize.MultilineConfig{ + f.Multiline = tokenize.MultilineConfig{ LineEndPattern: "END.*", } }, @@ -494,8 +491,7 @@ func TestBuild(t *testing.T) { { "LineStartAndEnd", func(f *Config) { - f.Splitter = tokenize.NewSplitterConfig() - f.Splitter.Multiline = tokenize.MultilineConfig{ + f.Multiline = tokenize.MultilineConfig{ LineStartPattern: ".*", LineEndPattern: ".*", } @@ -506,8 +502,7 @@ func TestBuild(t *testing.T) { { "NoLineStartOrEnd", func(f *Config) { - f.Splitter = tokenize.NewSplitterConfig() - f.Splitter.Multiline = tokenize.MultilineConfig{} + f.Multiline = tokenize.MultilineConfig{} }, require.NoError, func(t *testing.T, f *Manager) {}, @@ -515,8 +510,7 @@ func TestBuild(t *testing.T) { { "InvalidLineStartRegex", func(f *Config) { - f.Splitter = tokenize.NewSplitterConfig() - f.Splitter.Multiline = tokenize.MultilineConfig{ + f.Multiline = tokenize.MultilineConfig{ LineStartPattern: "(", } }, @@ -526,8 +520,7 @@ func TestBuild(t *testing.T) { { "InvalidLineEndRegex", func(f *Config) { - f.Splitter = tokenize.NewSplitterConfig() - f.Splitter.Multiline = tokenize.MultilineConfig{ + f.Multiline = tokenize.MultilineConfig{ LineEndPattern: "(", } }, diff --git a/pkg/stanza/fileconsumer/file_test.go b/pkg/stanza/fileconsumer/file_test.go index 5d34cd04d799..509ee20714fd 100644 --- a/pkg/stanza/fileconsumer/file_test.go +++ b/pkg/stanza/fileconsumer/file_test.go @@ -547,7 +547,7 @@ func TestNoNewline(t *testing.T) { tempDir := t.TempDir() cfg := NewConfig().includeDir(tempDir) cfg.StartAt = "beginning" - cfg.Splitter = tokenize.NewSplitterConfig() + cfg.Multiline = tokenize.NewMultilineConfig() cfg.FlushPeriod = time.Nanosecond operator, emitCalls := buildTestManager(t, cfg) diff --git a/pkg/stanza/fileconsumer/internal/splitter/multiline.go b/pkg/stanza/fileconsumer/internal/splitter/multiline.go index 258883e5e6f1..870a8c22f63a 100644 --- a/pkg/stanza/fileconsumer/internal/splitter/multiline.go +++ b/pkg/stanza/fileconsumer/internal/splitter/multiline.go @@ -15,34 +15,34 @@ import ( ) type multilineFactory struct { - splitterCfg tokenize.SplitterConfig - encoding encoding.Encoding - maxLogSize int - trimFunc trim.Func - flushPeriod time.Duration + multilineCfg tokenize.MultilineConfig + encoding encoding.Encoding + maxLogSize int + trimFunc trim.Func + flushPeriod time.Duration } var _ Factory = (*multilineFactory)(nil) func NewMultilineFactory( - splitterCfg tokenize.SplitterConfig, + multilineCfg tokenize.MultilineConfig, encoding encoding.Encoding, maxLogSize int, trimFunc trim.Func, flushPeriod time.Duration, ) Factory { return &multilineFactory{ - splitterCfg: splitterCfg, - encoding: encoding, - maxLogSize: maxLogSize, - trimFunc: trimFunc, - flushPeriod: flushPeriod, + multilineCfg: multilineCfg, + encoding: encoding, + maxLogSize: maxLogSize, + trimFunc: trimFunc, + flushPeriod: flushPeriod, } } // Build builds Multiline Splitter struct func (f *multilineFactory) Build() (bufio.SplitFunc, error) { - splitFunc, err := f.splitterCfg.Build(f.encoding, false, f.maxLogSize, f.trimFunc) + splitFunc, err := f.multilineCfg.Build(f.encoding, false, f.maxLogSize, f.trimFunc) if err != nil { return nil, err } diff --git a/pkg/stanza/fileconsumer/internal/splitter/multiline_test.go b/pkg/stanza/fileconsumer/internal/splitter/multiline_test.go index 9fac28465e9b..efdd227e74f8 100644 --- a/pkg/stanza/fileconsumer/internal/splitter/multiline_test.go +++ b/pkg/stanza/fileconsumer/internal/splitter/multiline_test.go @@ -17,28 +17,26 @@ import ( func TestMultilineBuild(t *testing.T) { tests := []struct { - name string - splitterConfig tokenize.SplitterConfig - encoding encoding.Encoding - maxLogSize int - flushPeriod time.Duration - wantErr bool + name string + multilineCfg tokenize.MultilineConfig + encoding encoding.Encoding + maxLogSize int + flushPeriod time.Duration + wantErr bool }{ { - name: "default configuration", - splitterConfig: tokenize.NewSplitterConfig(), - encoding: unicode.UTF8, - maxLogSize: 1024, - flushPeriod: 100 * time.Millisecond, - wantErr: false, + name: "default configuration", + multilineCfg: tokenize.NewMultilineConfig(), + encoding: unicode.UTF8, + maxLogSize: 1024, + flushPeriod: 100 * time.Millisecond, + wantErr: false, }, { name: "Multiline error", - splitterConfig: tokenize.SplitterConfig{ - Multiline: tokenize.MultilineConfig{ - LineStartPattern: "START", - LineEndPattern: "END", - }, + multilineCfg: tokenize.MultilineConfig{ + LineStartPattern: "START", + LineEndPattern: "END", }, flushPeriod: 100 * time.Millisecond, encoding: unicode.UTF8, @@ -48,7 +46,7 @@ func TestMultilineBuild(t *testing.T) { } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - factory := NewMultilineFactory(tt.splitterConfig, tt.encoding, tt.maxLogSize, trim.Nop, tt.flushPeriod) + factory := NewMultilineFactory(tt.multilineCfg, tt.encoding, tt.maxLogSize, trim.Nop, tt.flushPeriod) got, err := factory.Build() if (err != nil) != tt.wantErr { t.Errorf("Build() error = %v, wantErr %v", err, tt.wantErr) diff --git a/pkg/stanza/fileconsumer/reader_test.go b/pkg/stanza/fileconsumer/reader_test.go index ec444834fc97..84464d225b36 100644 --- a/pkg/stanza/fileconsumer/reader_test.go +++ b/pkg/stanza/fileconsumer/reader_test.go @@ -24,7 +24,7 @@ import ( func TestPersistFlusher(t *testing.T) { flushPeriod := 100 * time.Millisecond - f, emitChan := testReaderFactory(t, tokenize.NewSplitterConfig(), defaultMaxLogSize, flushPeriod) + f, emitChan := testReaderFactory(t, tokenize.NewMultilineConfig(), defaultMaxLogSize, flushPeriod) temp := openTemp(t, t.TempDir()) fp, err := f.newFingerprint(temp) @@ -110,7 +110,7 @@ func TestTokenization(t *testing.T) { for _, tc := range testCases { t.Run(tc.testName, func(t *testing.T) { - f, emitChan := testReaderFactory(t, tokenize.NewSplitterConfig(), defaultMaxLogSize, defaultFlushPeriod) + f, emitChan := testReaderFactory(t, tokenize.NewMultilineConfig(), defaultMaxLogSize, defaultFlushPeriod) temp := openTemp(t, t.TempDir()) _, err := temp.Write(tc.fileContent) @@ -140,7 +140,7 @@ func TestTokenizationTooLong(t *testing.T) { []byte("aaa"), } - f, emitChan := testReaderFactory(t, tokenize.NewSplitterConfig(), 10, defaultFlushPeriod) + f, emitChan := testReaderFactory(t, tokenize.NewMultilineConfig(), 10, defaultFlushPeriod) temp := openTemp(t, t.TempDir()) _, err := temp.Write(fileContent) @@ -170,9 +170,9 @@ func TestTokenizationTooLongWithLineStartPattern(t *testing.T) { []byte("2023-01-01 2"), } - sCfg := tokenize.NewSplitterConfig() - sCfg.Multiline.LineStartPattern = `\d+-\d+-\d+` - f, emitChan := testReaderFactory(t, sCfg, 15, defaultFlushPeriod) + mCfg := tokenize.NewMultilineConfig() + mCfg.LineStartPattern = `\d+-\d+-\d+` + f, emitChan := testReaderFactory(t, mCfg, 15, defaultFlushPeriod) temp := openTemp(t, t.TempDir()) _, err := temp.Write(fileContent) @@ -195,7 +195,7 @@ func TestTokenizationTooLongWithLineStartPattern(t *testing.T) { func TestHeaderFingerprintIncluded(t *testing.T) { fileContent := []byte("#header-line\naaa\n") - f, _ := testReaderFactory(t, tokenize.NewSplitterConfig(), 10, defaultFlushPeriod) + f, _ := testReaderFactory(t, tokenize.NewMultilineConfig(), 10, defaultFlushPeriod) regexConf := regex.NewConfig() regexConf.Regex = "^#(?P
.*)" @@ -223,7 +223,7 @@ func TestHeaderFingerprintIncluded(t *testing.T) { require.Equal(t, []byte("#header-line\naaa\n"), r.Fingerprint.FirstBytes) } -func testReaderFactory(t *testing.T, sCfg tokenize.SplitterConfig, maxLogSize int, flushPeriod time.Duration) (*readerFactory, chan *emitParams) { +func testReaderFactory(t *testing.T, mCfg tokenize.MultilineConfig, maxLogSize int, flushPeriod time.Duration) (*readerFactory, chan *emitParams) { emitChan := make(chan *emitParams, 100) enc, err := decode.LookupEncoding(defaultEncoding) trimFunc := trim.Whitespace @@ -236,7 +236,7 @@ func testReaderFactory(t *testing.T, sCfg tokenize.SplitterConfig, maxLogSize in emit: testEmitFunc(emitChan), }, fromBeginning: true, - splitterFactory: splitter.NewMultilineFactory(sCfg, enc, maxLogSize, trimFunc, flushPeriod), + splitterFactory: splitter.NewMultilineFactory(mCfg, enc, maxLogSize, trimFunc, flushPeriod), encoding: enc, }, emitChan } diff --git a/pkg/stanza/operator/input/file/config_test.go b/pkg/stanza/operator/input/file/config_test.go index b6778f5693be..3c0ef5c171fb 100644 --- a/pkg/stanza/operator/input/file/config_test.go +++ b/pkg/stanza/operator/input/file/config_test.go @@ -315,9 +315,9 @@ func TestUnmarshal(t *testing.T) { ExpectErr: false, Expect: func() *Config { cfg := NewConfig() - newSplit := tokenize.NewSplitterConfig() - newSplit.Multiline.LineStartPattern = "Start" - cfg.Splitter = newSplit + cfg.Multiline = tokenize.MultilineConfig{ + LineStartPattern: "Start", + } return cfg }(), }, @@ -326,9 +326,9 @@ func TestUnmarshal(t *testing.T) { ExpectErr: false, Expect: func() *Config { cfg := NewConfig() - newSplit := tokenize.NewSplitterConfig() - newSplit.Multiline.LineStartPattern = "%" - cfg.Splitter = newSplit + cfg.Multiline = tokenize.MultilineConfig{ + LineStartPattern: "%", + } return cfg }(), }, @@ -337,9 +337,9 @@ func TestUnmarshal(t *testing.T) { ExpectErr: false, Expect: func() *Config { cfg := NewConfig() - newSplit := tokenize.NewSplitterConfig() - newSplit.Multiline.LineEndPattern = "Start" - cfg.Splitter = newSplit + cfg.Multiline = tokenize.MultilineConfig{ + LineEndPattern: "Start", + } return cfg }(), }, @@ -348,9 +348,9 @@ func TestUnmarshal(t *testing.T) { ExpectErr: false, Expect: func() *Config { cfg := NewConfig() - newSplit := tokenize.NewSplitterConfig() - newSplit.Multiline.LineEndPattern = "%" - cfg.Splitter = newSplit + cfg.Multiline = tokenize.MultilineConfig{ + LineEndPattern: "%", + } return cfg }(), }, @@ -476,8 +476,7 @@ func TestBuild(t *testing.T) { { "MultilineConfiguredStartAndEndPatterns", func(f *Config) { - f.Splitter = tokenize.NewSplitterConfig() - f.Splitter.Multiline = tokenize.MultilineConfig{ + f.Multiline = tokenize.MultilineConfig{ LineEndPattern: "Exists", LineStartPattern: "Exists", } @@ -488,8 +487,7 @@ func TestBuild(t *testing.T) { { "MultilineConfiguredStartPattern", func(f *Config) { - f.Splitter = tokenize.NewSplitterConfig() - f.Splitter.Multiline = tokenize.MultilineConfig{ + f.Multiline = tokenize.MultilineConfig{ LineStartPattern: "START.*", } }, @@ -499,8 +497,7 @@ func TestBuild(t *testing.T) { { "MultilineConfiguredEndPattern", func(f *Config) { - f.Splitter = tokenize.NewSplitterConfig() - f.Splitter.Multiline = tokenize.MultilineConfig{ + f.Multiline = tokenize.MultilineConfig{ LineEndPattern: "END.*", } }, @@ -518,8 +515,7 @@ func TestBuild(t *testing.T) { { "LineStartAndEnd", func(f *Config) { - f.Splitter = tokenize.NewSplitterConfig() - f.Splitter.Multiline = tokenize.MultilineConfig{ + f.Multiline = tokenize.MultilineConfig{ LineStartPattern: ".*", LineEndPattern: ".*", } @@ -530,8 +526,7 @@ func TestBuild(t *testing.T) { { "NoLineStartOrEnd", func(f *Config) { - f.Splitter = tokenize.NewSplitterConfig() - f.Splitter.Multiline = tokenize.MultilineConfig{} + f.Multiline = tokenize.MultilineConfig{} }, require.NoError, func(t *testing.T, f *Input) {}, @@ -539,8 +534,7 @@ func TestBuild(t *testing.T) { { "InvalidLineStartRegex", func(f *Config) { - f.Splitter = tokenize.NewSplitterConfig() - f.Splitter.Multiline = tokenize.MultilineConfig{ + f.Multiline = tokenize.MultilineConfig{ LineStartPattern: "(", } }, @@ -550,8 +544,7 @@ func TestBuild(t *testing.T) { { "InvalidLineEndRegex", func(f *Config) { - f.Splitter = tokenize.NewSplitterConfig() - f.Splitter.Multiline = tokenize.MultilineConfig{ + f.Multiline = tokenize.MultilineConfig{ LineEndPattern: "(", } }, diff --git a/pkg/stanza/tokenize/splitter.go b/pkg/stanza/tokenize/splitter.go deleted file mode 100644 index c2d8a7444344..000000000000 --- a/pkg/stanza/tokenize/splitter.go +++ /dev/null @@ -1,29 +0,0 @@ -// Copyright The OpenTelemetry Authors -// SPDX-License-Identifier: Apache-2.0 - -package tokenize // import "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/tokenize" - -import ( - "bufio" - - "golang.org/x/text/encoding" - - "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/trim" -) - -// SplitterConfig consolidates MultilineConfig and FlusherConfig -type SplitterConfig struct { - Multiline MultilineConfig `mapstructure:"multiline,omitempty"` -} - -// NewSplitterConfig returns default SplitterConfig -func NewSplitterConfig() SplitterConfig { - return SplitterConfig{ - Multiline: NewMultilineConfig(), - } -} - -// Build builds bufio.SplitFunc based on the config -func (c *SplitterConfig) Build(enc encoding.Encoding, flushAtEOF bool, maxLogSize int, trimFunc trim.Func) (bufio.SplitFunc, error) { - return c.Multiline.Build(enc, flushAtEOF, maxLogSize, trimFunc) -} diff --git a/receiver/otlpjsonfilereceiver/file_test.go b/receiver/otlpjsonfilereceiver/file_test.go index 7183f7dfc016..e9656dca43c8 100644 --- a/receiver/otlpjsonfilereceiver/file_test.go +++ b/receiver/otlpjsonfilereceiver/file_test.go @@ -124,7 +124,7 @@ func testdataConfigYamlAsMap() *Config { IncludeFileNameResolved: false, IncludeFilePathResolved: false, PollInterval: 200 * time.Millisecond, - Splitter: tokenize.NewSplitterConfig(), + Multiline: tokenize.NewMultilineConfig(), Encoding: "utf-8", StartAt: "end", FingerprintSize: 1000,