From ad1722c88152f3b07fa712e66d12fa7aaa67624c Mon Sep 17 00:00:00 2001 From: Michal Pristas Date: Tue, 27 Oct 2020 11:30:30 +0100 Subject: [PATCH 1/4] [Ingest Manager] Skip flaky gateway tests #22177 [Ingest Manager] Skip flaky gateway tests #22177 --- x-pack/elastic-agent/pkg/agent/application/fleet_gateway_test.go | 1 + 1 file changed, 1 insertion(+) diff --git a/x-pack/elastic-agent/pkg/agent/application/fleet_gateway_test.go b/x-pack/elastic-agent/pkg/agent/application/fleet_gateway_test.go index cfcd1f46994..0d079cdf858 100644 --- a/x-pack/elastic-agent/pkg/agent/application/fleet_gateway_test.go +++ b/x-pack/elastic-agent/pkg/agent/application/fleet_gateway_test.go @@ -162,6 +162,7 @@ func wrapStrToResp(code int, body string) *http.Response { } func TestFleetGateway(t *testing.T) { + t.Skip("Flaky when CI is slower") agentInfo := &testAgentInfo{} settings := &fleetGatewaySettings{ From 367714988baa73e1cc76874556baeb8d96e8df8a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?No=C3=A9mi=20V=C3=A1nyi?= Date: Tue, 27 Oct 2020 11:41:50 +0100 Subject: [PATCH 2/4] Add documentation of filestream input (#21615) --- filebeat/docs/filebeat-options.asciidoc | 2 + .../input-filestream-file-options.asciidoc | 394 ++++++++++++++++++ .../input-filestream-reader-options.asciidoc | 143 +++++++ .../docs/inputs/input-filestream.asciidoc | 165 ++++++++ 4 files changed, 704 insertions(+) create mode 100644 filebeat/docs/inputs/input-filestream-file-options.asciidoc create mode 100644 filebeat/docs/inputs/input-filestream-reader-options.asciidoc create mode 100644 filebeat/docs/inputs/input-filestream.asciidoc diff --git a/filebeat/docs/filebeat-options.asciidoc b/filebeat/docs/filebeat-options.asciidoc index 8bbd06ec7f8..4cfa0961a20 100644 --- a/filebeat/docs/filebeat-options.asciidoc +++ b/filebeat/docs/filebeat-options.asciidoc @@ -94,6 +94,8 @@ include::inputs/input-container.asciidoc[] include::inputs/input-docker.asciidoc[] +include::inputs/input-filestream.asciidoc[] + include::../../x-pack/filebeat/docs/inputs/input-google-pubsub.asciidoc[] include::../../x-pack/filebeat/docs/inputs/input-http-endpoint.asciidoc[] diff --git a/filebeat/docs/inputs/input-filestream-file-options.asciidoc b/filebeat/docs/inputs/input-filestream-file-options.asciidoc new file mode 100644 index 00000000000..768960323f9 --- /dev/null +++ b/filebeat/docs/inputs/input-filestream-file-options.asciidoc @@ -0,0 +1,394 @@ +////////////////////////////////////////////////////////////////////////// +//// This content is shared by Filebeat inputs that use the input +//// to process files on disk (includes options for managing physical files) +//// If you add IDs to sections, make sure you use attributes to create +//// unique IDs for each input that includes this file. Use the format: +//// [id="{beatname_lc}-input-{type}-option-name"] +////////////////////////////////////////////////////////////////////////// + +[float] +[id="{beatname_lc}-input-{type}-exclude-files"] +=== Prospector options + +The prospector is running a file system watcher which looks for files specified +in the `paths` option. At the moment only simple file system scanning is +supported. + +==== Scanner options + +The scanner watches the configured paths. It scans the file system periodically +and returns the file system events to the Prospector. + +===== `prospector.scanner.exclude_files` + +A list of regular expressions to match the files that you want {beatname_uc} to +ignore. By default no files are excluded. + +The following example configures {beatname_uc} to ignore all the files that have +a `gz` extension: + +["source","yaml",subs="attributes"] +---- +{beatname_lc}.inputs: +- type: {type} + ... + prospector.scanner.exclude_files: ['\.gz$'] +---- + +See <> for a list of supported regexp patterns. + +===== `prospector.scanner.symlinks` + +The `symlinks` option allows {beatname_uc} to harvest symlinks in addition to +regular files. When harvesting symlinks, {beatname_uc} opens and reads the +original file even though it reports the path of the symlink. + +When you configure a symlink for harvesting, make sure the original path is +excluded. If a single input is configured to harvest both the symlink and +the original file, {beatname_uc} will detect the problem and only process the +first file it finds. However, if two different inputs are configured (one +to read the symlink and the other the original path), both paths will be +harvested, causing {beatname_uc} to send duplicate data and the inputs to +overwrite each other's state. + +The `symlinks` option can be useful if symlinks to the log files have additional +metadata in the file name, and you want to process the metadata in Logstash. +This is, for example, the case for Kubernetes log files. + +Because this option may lead to data loss, it is disabled by default. + + +[float] +[id="{beatname_lc}-input-{type}-scan-frequency"] +===== `prospector.scanner.check_interval` + +How often {beatname_uc} checks for new files in the paths that are specified +for harvesting. For example, if you specify a glob like `/var/log/*`, the +directory is scanned for files using the frequency specified by +`check_interval`. Specify 1s to scan the directory as frequently as possible +without causing {beatname_uc} to scan too frequently. We do not recommend to set +this value `<1s`. + +If you require log lines to be sent in near real time do not use a very low +`check_interval` but adjust `close.on_state_change.inactive` so the file handler +stays open and constantly polls your files. + +The default setting is 10s. + +[float] +[id="{beatname_lc}-input-{type}-ignore-older"] +===== `ignore_older` + +If this option is enabled, {beatname_uc} ignores any files that were modified +before the specified timespan. Configuring `ignore_older` can be especially +useful if you keep log files for a long time. For example, if you want to start +{beatname_uc}, but only want to send the newest files and files from last week, +you can configure this option. + +You can use time strings like 2h (2 hours) and 5m (5 minutes). The default is 0, +which disables the setting. Commenting out the config has the same effect as +setting it to 0. + +IMPORTANT: You must set `ignore_older` to be greater than `close.on_state_change.inactive`. + +The files affected by this setting fall into two categories: + +* Files that were never harvested +* Files that were harvested but weren't updated for longer than `ignore_older` + +For files which were never seen before, the offset state is set to the end of +the file. If a state already exist, the offset is not changed. In case a file is +updated again later, reading continues at the set offset position. + +The `ignore_older` setting relies on the modification time of the file to +determine if a file is ignored. If the modification time of the file is not +updated when lines are written to a file (which can happen on Windows), the +`ignore_older` setting may cause {beatname_uc} to ignore files even though +content was added at a later time. + +To remove the state of previously harvested files from the registry file, use +the `clean_inactive` configuration option. + +Before a file can be ignored by {beatname_uc}, the file must be closed. To +ensure a file is no longer being harvested when it is ignored, you must set +`ignore_older` to a longer duration than `close.on_state_change.inactive`. + +If a file that's currently being harvested falls under `ignore_older`, the +harvester will first finish reading the file and close it after +`close.on_state_change.inactive` is reached. Then, after that, the file will be ignored. + +[float] +[id="{beatname_lc}-input-{type}-close-options"] +===== `close.*` + +The `close.*` configuration options are used to close the harvester after a +certain criteria or time. Closing the harvester means closing the file handler. +If a file is updated after the harvester is closed, the file will be picked up +again after `prospector.scanner.check_interval` has elapsed. However, if the file +is moved or deleted while the harvester is closed, {beatname_uc} will not be able +to pick up the file again, and any data that the harvester hasn't read will be lost. + +The `close.on_state_change.*` settings are applied asynchronously +to read from a file, meaning that if {beatname_uc} is in a blocked state +due to blocked output, full queue or other issue, a file that would be +closed regardless. + + +[float] +[id="{beatname_lc}-input-{type}-close-inactive"] +===== `close.on_state_change.inactive` + +When this option is enabled, {beatname_uc} closes the file handle if a file has +not been harvested for the specified duration. The counter for the defined +period starts when the last log line was read by the harvester. It is not based +on the modification time of the file. If the closed file changes again, a new +harvester is started and the latest changes will be picked up after +`prospector.scanner.check_interval` has elapsed. + +We recommended that you set `close.on_state_change.inactive` to a value that is +larger than the least frequent updates to your log files. For example, if your +log files get updated every few seconds, you can safely set +`close.on_state_change.inactive` to `1m`. If there are log files with very +different update rates, you can use multiple configurations with different values. + +Setting `close.on_state_change.inactive` to a lower value means that file handles +are closed sooner. However this has the side effect that new log lines are not +sent in near real time if the harvester is closed. + +The timestamp for closing a file does not depend on the modification time of the +file. Instead, {beatname_uc} uses an internal timestamp that reflects when the +file was last harvested. For example, if `close.on_state_change.inactive` is set +to 5 minutes, the countdown for the 5 minutes starts after the harvester reads the +last line of the file. + +You can use time strings like 2h (2 hours) and 5m (5 minutes). The default is +5m. + +[float] +[id="{beatname_lc}-input-{type}-close-renamed"] +===== `close.on_state_change.renamed` + +WARNING: Only use this option if you understand that data loss is a potential +side effect. + +When this option is enabled, {beatname_uc} closes the file handler when a file +is renamed. This happens, for example, when rotating files. By default, the +harvester stays open and keeps reading the file because the file handler does +not depend on the file name. If the `close.on_state_change.renamed` option is +enabled and the file is renamed or moved in such a way that it's no longer +matched by the file patterns specified for the , the file will not be picked +up again. {beatname_uc} will not finish reading the file. + +Do not use this option when `path` based `file_identity` is configured. It does +not make sense to enable the option, as Filebeat cannot detect renames using +path names as unique identifiers. + +WINDOWS: If your Windows log rotation system shows errors because it can't +rotate the files, you should enable this option. + +[float] +[id="{beatname_lc}-input-{type}-close-removed"] +===== `close.on_state_change.removed` + +When this option is enabled, {beatname_uc} closes the harvester when a file is +removed. Normally a file should only be removed after it's inactive for the +duration specified by `close.on_state_change.inactive`. However, if a file is +removed early and you don't enable `close.on_state_change.removed`, {beatname_uc} +keeps the file open to make sure the harvester has completed. If this setting +results in files that are not completely read because they are removed from +disk too early, disable this option. + +This option is enabled by default. If you disable this option, you must also +disable `clean.on_state_change.removed`. + +WINDOWS: If your Windows log rotation system shows errors because it can't +rotate files, make sure this option is enabled. + +[float] +[id="{beatname_lc}-input-{type}-close-eof"] +===== `close.reader.eof` + +WARNING: Only use this option if you understand that data loss is a potential +side effect. + +When this option is enabled, {beatname_uc} closes a file as soon as the end of a +file is reached. This is useful when your files are only written once and not +updated from time to time. For example, this happens when you are writing every +single log event to a new file. This option is disabled by default. + +[float] +[id="{beatname_lc}-input-{type}-close-timeout"] +===== `close.reader.timeout` + +WARNING: Only use this option if you understand that data loss is a potential +side effect. Another side effect is that multiline events might not be +completely sent before the timeout expires. + +When this option is enabled, {beatname_uc} gives every harvester a predefined +lifetime. Regardless of where the reader is in the file, reading will stop after +the `close.reader.after_interval` period has elapsed. This option can be useful for older log +files when you want to spend only a predefined amount of time on the files. +While `close.reader.after_interval` will close the file after the predefined timeout, if the +file is still being updated, {beatname_uc} will start a new harvester again per +the defined `scan_frequency`. And the close.reader.after_interval for this harvester will +start again with the countdown for the timeout. + +This option is particularly useful in case the output is blocked, which makes +{beatname_uc} keep open file handlers even for files that were deleted from the +disk. Setting `close.reader.after_interval` to `5m` ensures that the files are periodically +closed so they can be freed up by the operating system. + +If you set `close.reader.after_interval` to equal `ignore_older`, the file will not be picked +up if it's modified while the harvester is closed. This combination of settings +normally leads to data loss, and the complete file is not sent. + +When you use `close.reader.after_interval` for logs that contain multiline events, the +harvester might stop in the middle of a multiline event, which means that only +parts of the event will be sent. If the harvester is started again and the file +still exists, only the second part of the event will be sent. + +This option is set to 0 by default which means it is disabled. + + +[float] +[id="{beatname_lc}-input-{type}-clean-options"] +===== `clean_*` + +The `clean_*` options are used to clean up the state entries in the registry +file. These settings help to reduce the size of the registry file and can +prevent a potential <>. + +[float] +[id="{beatname_lc}-input-{type}-clean-inactive"] +===== `clean_inactive` + +WARNING: Only use this option if you understand that data loss is a potential +side effect. + +When this option is enabled, {beatname_uc} removes the state of a file after the +specified period of inactivity has elapsed. The state can only be removed if +the file is already ignored by {beatname_uc} (the file is older than +`ignore_older`). The `clean_inactive` setting must be greater than `ignore_older + +scan_frequency` to make sure that no states are removed while a file is still +being harvested. Otherwise, the setting could result in {beatname_uc} resending +the full content constantly because `clean_inactive` removes state for files +that are still detected by {beatname_uc}. If a file is updated or appears +again, the file is read from the beginning. + +The `clean_inactive` configuration option is useful to reduce the size of the +registry file, especially if a large amount of new files are generated every +day. + +This config option is also useful to prevent {beatname_uc} problems resulting +from inode reuse on Linux. For more information, see <>. + +NOTE: Every time a file is renamed, the file state is updated and the counter +for `clean_inactive` starts at 0 again. + +TIP: During testing, you might notice that the registry contains state entries +that should be removed based on the `clean_inactive` setting. This happens +because {beatname_uc} doesn't remove the entries until it opens the registry +again to read a different file. If you are testing the `clean_inactive` setting, +make sure {beatname_uc} is configured to read from more than one file, or the +file state will never be removed from the registry. + +[float] +[id="{beatname_lc}-input-{type}-clean-removed"] +===== `clean_removed` + +When this option is enabled, {beatname_uc} cleans files from the registry if +they cannot be found on disk anymore under the last known name. This means also +files which were renamed after the harvester was finished will be removed. This +option is enabled by default. + +If a shared drive disappears for a short period and appears again, all files +will be read again from the beginning because the states were removed from the +registry file. In such cases, we recommend that you disable the `clean_removed` +option. + +You must disable this option if you also disable `close_removed`. + +[float] +===== `backoff.*` + +The backoff options specify how aggressively {beatname_uc} crawls open files for +updates. You can use the default values in most cases. + +The `backoff` option defines how long {beatname_uc} waits before checking a file +again after EOF is reached. The default is 1s, which means the file is checked +every second if new lines were added. This enables near real-time crawling. +Every time a new line appears in the file, the `backoff` value is reset to the +initial value. The default is 1s. + +[float] +===== `backoff.init` + +The maximum time for {beatname_uc} to wait before checking a file again after +EOF is reached. After having backed off multiple times from checking the file, +the wait time will never exceed `max_backoff` regardless of what is specified +for `backoff_factor`. Because it takes a maximum of 10s to read a new line, +specifying 10s for `max_backoff` means that, at the worst, a new line could be +added to the log file if {beatname_uc} has backed off multiple times. The +default is 10s. + +Requirement: Set `max_backoff` to be greater than or equal to `backoff` and +less than or equal to `scan_frequency` (`backoff <= max_backoff <= scan_frequency`). +If `max_backoff` needs to be higher, it is recommended to close the file handler +instead and let {beatname_uc} pick up the file again. + +[float] +===== `backoff.max` + +The maximum time for {beatname_uc} to wait before checking a file again after +EOF is reached. After having backed off multiple times from checking the file, +the wait time will never exceed `max_backoff` regardless of what is specified +for `backoff_factor`. Because it takes a maximum of 10s to read a new line, +specifying 10s for `max_backoff` means that, at the worst, a new line could be +added to the log file if {beatname_uc} has backed off multiple times. The +default is 10s. + +Requirement: Set `max_backoff` to be greater than or equal to `backoff` and +less than or equal to `scan_frequency` (`backoff <= max_backoff <= scan_frequency`). +If `max_backoff` needs to be higher, it is recommended to close the file handler +instead and let {beatname_uc} pick up the file again. + +[float] +===== `file_identity` + +Different `file_identity` methods can be configured to suit the +environment where you are collecting log messages. + + +*`native`*:: The default behaviour of {beatname_uc} is to differentiate +between files using their inodes and device ids. + +[source,yaml] +---- +file_identity.native: ~ +---- + +*`path`*:: To identify files based on their paths use this strategy. + +WARNING: Only use this strategy if your log files are rotated to a folder +outside of the scope of your input or not at all. Otherwise you end up +with duplicated events. + +WARNING: This strategy does not support renaming files. +If an input file is renamed, {beatname_uc} will read it again if the new path +matches the settings of the input. + +[source,yaml] +---- +file_identity.path: ~ +---- + +*`inode_marker`*:: If the device id changes from time to time, you must use +this method to distinguish files. This option is not supported on Windows. + +Set the location of the marker file the following way: + +[source,yaml] +---- +file_identity.inode_marker.path: /logs/.filebeat-marker +---- + diff --git a/filebeat/docs/inputs/input-filestream-reader-options.asciidoc b/filebeat/docs/inputs/input-filestream-reader-options.asciidoc new file mode 100644 index 00000000000..8b365f1ede2 --- /dev/null +++ b/filebeat/docs/inputs/input-filestream-reader-options.asciidoc @@ -0,0 +1,143 @@ +////////////////////////////////////////////////////////////////////////// +//// This content is shared by Filebeat inputs that use the input +//// but do not process files (the options for managing files +//// on disk are not relevant) +//// If you add IDs to sections, make sure you use attributes to create +//// unique IDs for each input that includes this file. Use the format: +//// [id="{beatname_lc}-input-{type}-option-name"] +////////////////////////////////////////////////////////////////////////// + +[float] +===== `encoding` + +The file encoding to use for reading data that contains international +characters. See the encoding names http://www.w3.org/TR/encoding/[recommended by +the W3C for use in HTML5]. + +Valid encodings: + + * `plain`: plain ASCII encoding + * `utf-8` or `utf8`: UTF-8 encoding + * `gbk`: simplified Chinese charaters + * `iso8859-6e`: ISO8859-6E, Latin/Arabic + * `iso8859-6i`: ISO8859-6I, Latin/Arabic + * `iso8859-8e`: ISO8859-8E, Latin/Hebrew + * `iso8859-8i`: ISO8859-8I, Latin/Hebrew + * `iso8859-1`: ISO8859-1, Latin-1 + * `iso8859-2`: ISO8859-2, Latin-2 + * `iso8859-3`: ISO8859-3, Latin-3 + * `iso8859-4`: ISO8859-4, Latin-4 + * `iso8859-5`: ISO8859-5, Latin/Cyrillic + * `iso8859-6`: ISO8859-6, Latin/Arabic + * `iso8859-7`: ISO8859-7, Latin/Greek + * `iso8859-8`: ISO8859-8, Latin/Hebrew + * `iso8859-9`: ISO8859-9, Latin-5 + * `iso8859-10`: ISO8859-10, Latin-6 + * `iso8859-13`: ISO8859-13, Latin-7 + * `iso8859-14`: ISO8859-14, Latin-8 + * `iso8859-15`: ISO8859-15, Latin-9 + * `iso8859-16`: ISO8859-16, Latin-10 + * `cp437`: IBM CodePage 437 + * `cp850`: IBM CodePage 850 + * `cp852`: IBM CodePage 852 + * `cp855`: IBM CodePage 855 + * `cp858`: IBM CodePage 858 + * `cp860`: IBM CodePage 860 + * `cp862`: IBM CodePage 862 + * `cp863`: IBM CodePage 863 + * `cp865`: IBM CodePage 865 + * `cp866`: IBM CodePage 866 + * `ebcdic-037`: IBM CodePage 037 + * `ebcdic-1040`: IBM CodePage 1140 + * `ebcdic-1047`: IBM CodePage 1047 + * `koi8r`: KOI8-R, Russian (Cyrillic) + * `koi8u`: KOI8-U, Ukranian (Cyrillic) + * `macintosh`: Macintosh encoding + * `macintosh-cyrillic`: Macintosh Cyrillic encoding + * `windows1250`: Windows1250, Central and Eastern European + * `windows1251`: Windows1251, Russian, Serbian (Cyrillic) + * `windows1252`: Windows1252, Legacy + * `windows1253`: Windows1253, Modern Greek + * `windows1254`: Windows1254, Turkish + * `windows1255`: Windows1255, Hebrew + * `windows1256`: Windows1256, Arabic + * `windows1257`: Windows1257, Estonian, Latvian, Lithuanian + * `windows1258`: Windows1258, Vietnamese + * `windows874`: Windows874, ISO/IEC 8859-11, Latin/Thai + * `utf-16-bom`: UTF-16 with required BOM + * `utf-16be-bom`: big endian UTF-16 with required BOM + * `utf-16le-bom`: little endian UTF-16 with required BOM + +The `plain` encoding is special, because it does not validate or transform any input. + +[float] +[id="{beatname_lc}-input-{type}-exclude-lines"] +===== `exclude_lines` + +A list of regular expressions to match the lines that you want {beatname_uc} to +exclude. {beatname_uc} drops any lines that match a regular expression in the +list. By default, no lines are dropped. Empty lines are ignored. + +The following example configures {beatname_uc} to drop any lines that start with +`DBG`. + +["source","yaml",subs="attributes"] +---- +{beatname_lc}.inputs: +- type: {type} + ... + exclude_lines: ['^DBG'] +---- + +See <> for a list of supported regexp patterns. + +[float] +[id="{beatname_lc}-input-{type}-include-lines"] +===== `include_lines` + +A list of regular expressions to match the lines that you want {beatname_uc} to +include. {beatname_uc} exports only the lines that match a regular expression in +the list. By default, all lines are exported. Empty lines are ignored. + +The following example configures {beatname_uc} to export any lines that start +with `ERR` or `WARN`: + +["source","yaml",subs="attributes"] +---- +{beatname_lc}.inputs: +- type: {type} + ... + include_lines: ['^ERR', '^WARN'] +---- + +NOTE: If both `include_lines` and `exclude_lines` are defined, {beatname_uc} +executes `include_lines` first and then executes `exclude_lines`. The order in +which the two options are defined doesn't matter. The `include_lines` option +will always be executed before the `exclude_lines` option, even if +`exclude_lines` appears before `include_lines` in the config file. + +The following example exports all log lines that contain `sometext`, +except for lines that begin with `DBG` (debug messages): + +["source","yaml",subs="attributes"] +---- +{beatname_lc}.inputs: +- type: {type} + ... + include_lines: ['sometext'] + exclude_lines: ['^DBG'] +---- + +See <> for a list of supported regexp patterns. + +[float] +===== `buffer_size` + +The size in bytes of the buffer that each harvester uses when fetching a file. +The default is 16384. + +[float] +===== `message_max_bytes` + +The maximum number of bytes that a single log message can have. All bytes after +`mesage_max_bytes` are discarded and not sent. The default is 10MB (10485760). diff --git a/filebeat/docs/inputs/input-filestream.asciidoc b/filebeat/docs/inputs/input-filestream.asciidoc new file mode 100644 index 00000000000..0a02a865465 --- /dev/null +++ b/filebeat/docs/inputs/input-filestream.asciidoc @@ -0,0 +1,165 @@ +:type: filestream + +[id="{beatname_lc}-input-{type}"] +=== filestream input + +experimental[] + +++++ +filestream +++++ + +Use the `filestream` input to read lines from active log files. It is the +new, improved alternative to the `log` input. However, a few feature are +missing from it, e.g. `multiline` or other special parsing capabilities. +These missing options are probably going to be added again. We strive to +achieve feature parity, if possible. + +To configure this input, specify a list of glob-based <> +that must be crawled to locate and fetch the log lines. + +Example configuration: + +["source","yaml",subs="attributes"] +---- +{beatname_lc}.inputs: +- type: filestream + paths: + - /var/log/messages + - /var/log/*.log +---- + + +You can apply additional +<<{beatname_lc}-input-{type}-options,configuration settings>> (such as `fields`, +`include_lines`, `exclude_lines` and so on) to the lines harvested +from these files. The options that you specify are applied to all the files +harvested by this input. + +To apply different configuration settings to different files, you need to define +multiple input sections: + +["source","yaml",subs="attributes"] +---- +{beatname_lc}.inputs: +- type: filestream <1> + paths: + - /var/log/system.log + - /var/log/wifi.log +- type: filestream <2> + paths: + - "/var/log/apache2/*" + fields: + apache: true +---- + +<1> Harvests lines from two files: `system.log` and +`wifi.log`. +<2> Harvests lines from every file in the `apache2` directory, and uses the +`fields` configuration option to add a field called `apache` to the output. + + +[[filestream-file-identity]] +==== Reading files on network shares and cloud providers + +:WARNING: Filebeat does not support reading from network shares and cloud providers. + +However, one of the limitations of these data sources can be mitigated +if you configure Filebeat adequately. + +By default, {beatname_uc} identifies files based on their inodes and +device IDs. However, on network shares and cloud providers these +values might change during the lifetime of the file. If this happens +{beatname_uc} thinks that file is new and resends the whole content +of the file. To solve this problem you can configure `file_identity` option. Possible +values besides the default `inode_deviceid` are `path` and `inode_marker`. + +Selecting `path` instructs {beatname_uc} to identify files based on their +paths. This is a quick way to avoid rereading files if inode and device ids +might change. However, keep in mind if the files are rotated (renamed), they +will be reread and resubmitted. + +The option `inode_marker` can be used if the inodes stay the same even if +the device id is changed. You should choose this method if your files are +rotated instead of `path` if possible. You have to configure a marker file +readable by {beatname_uc} and set the path in the option `path` of `inode_marker`. + +The content of this file must be unique to the device. You can put the +UUID of the device or mountpoint where the input is stored. The following +example oneliner generates a hidden marker file for the selected mountpoint `/logs`: +Please note that you should not use this option on Windows as file identifiers might be +more volatile. + +["source","sh",subs="attributes"] +---- +$ lsblk -o MOUNTPOINT,UUID | grep /logs | awk '{print $2}' >> /logs/.filebeat-marker +---- + +To set the generated file as a marker for `file_identity` you should configure +the input the following way: + +["source","yaml",subs="attributes"] +---- +{beatname_lc}.inputs: +- type: filestream + paths: + - /logs/*.log + file_identity.inode_marker.path: /logs/.filebeat-marker +---- + + +[[filestream-rotating-logs]] +==== Reading from rotating logs + +When dealing with file rotation, avoid harvesting symlinks. Instead +use the <> setting to point to the original file, and specify +a pattern that matches the file you want to harvest and all of its rotated +files. Also make sure your log rotation strategy prevents lost or duplicate +messages. For more information, see <>. + +Furthermore, to avoid duplicate of rotated log messages, do not use the +`path` method for `file_identity`. Or exclude the rotated files with `exclude_files` +option. + +[id="{beatname_lc}-input-{type}-options"] +==== Prospector options + +The `filestream` input supports the following configuration options plus the +<<{beatname_lc}-input-{type}-common-options>> described later. + +[float] +[[filestream-input-paths]] +===== `paths` + +A list of glob-based paths that will be crawled and fetched. All patterns +supported by https://golang.org/pkg/path/filepath/#Glob[Go Glob] are also +supported here. For example, to fetch all files from a predefined level of +subdirectories, the following pattern can be used: `/var/log/*/*.log`. This +fetches all `.log` files from the subfolders of `/var/log`. It does not +fetch log files from the `/var/log` folder itself. +It is possible to recursively fetch all files in all subdirectories of a directory +using the optional <> settings. + +{beatname_uc} starts a harvester for each file that it finds under the specified +paths. You can specify one path per line. Each line begins with a dash (-). + +[float] +[[filestream-recursive-glob]] +===== `prospector.scanner.recursive_glob` + +Enable expanding `**` into recursive glob patterns. With this feature enabled, +the rightmost `**` in each path is expanded into a fixed number of glob +patterns. For example: `/foo/**` expands to `/foo`, `/foo/*`, `/foo/*/*`, and so +on. If enabled it expands a single `**` into a 8-level deep `*` pattern. + +This feature is enabled by default. Set `prospector.scanner.recursive_glob` to false to +disable it. + +include::../inputs/input-filestream-reader-options.asciidoc[] + +include::../inputs/input-filestream-file-options.asciidoc[] + +[id="{beatname_lc}-input-{type}-common-options"] +include::../inputs/input-common-options.asciidoc[] + +:type!: From d671e5275520c8b2d95ab4d67de9e6cfacb1a054 Mon Sep 17 00:00:00 2001 From: Marc Guasch Date: Tue, 27 Oct 2020 12:28:48 +0100 Subject: [PATCH 3/4] [filebeat][okta] Make cursor optional for okta and update docs (#22091) * Make cursor optional for okta and update docs * Remove keep_state flag --- CHANGELOG.next.asciidoc | 1 + filebeat/docs/modules/okta.asciidoc | 15 +++++++++------ x-pack/filebeat/module/okta/_meta/docs.asciidoc | 15 +++++++++------ 3 files changed, 19 insertions(+), 12 deletions(-) diff --git a/CHANGELOG.next.asciidoc b/CHANGELOG.next.asciidoc index ae48f268977..99008bf7181 100644 --- a/CHANGELOG.next.asciidoc +++ b/CHANGELOG.next.asciidoc @@ -640,6 +640,7 @@ https://github.com/elastic/beats/compare/v7.0.0-alpha2...master[Check the HEAD d - Adding support for FIPS in s3 input {pull}21446[21446] - Add SSL option to checkpoint module {pull}19560[19560] - Add max_number_of_messages config into s3 input. {pull}21993[21993] +- Update Okta documentation for new stateful restarts. {pull}22091[22091] *Heartbeat* diff --git a/filebeat/docs/modules/okta.asciidoc b/filebeat/docs/modules/okta.asciidoc index 038f6d088dd..d1f8e6ea2ec 100644 --- a/filebeat/docs/modules/okta.asciidoc +++ b/filebeat/docs/modules/okta.asciidoc @@ -32,12 +32,6 @@ the logs while honoring any https://developer.okta.com/docs/reference/rate-limits/[rate-limiting] headers sent by Okta. -NOTE: This module does not persist the timestamp of the last read event in -order to facilitate resuming on restart. This feature will be coming in a future -version. When you restart the module will read events from the beginning of the -log. To minimize duplicates documents the module uses the event's Okta UUID -value as the Elasticsearch `_id`. - This is an example configuration for the module. [source,yaml] @@ -99,6 +93,15 @@ information. supported_protocols: [TLSv1.2] ---- +*`var.initial_interval`*:: + +An initial interval can be defined. The first time the module starts, will fetch events from the current moment minus the initial interval value. Following restarts will fetch events starting from the last event read. It defaults to `24h`. ++ +[source,yaml] +---- + var.initial_interval: 24h # will fetch events starting 24h ago. +---- + [float] === Example dashboard diff --git a/x-pack/filebeat/module/okta/_meta/docs.asciidoc b/x-pack/filebeat/module/okta/_meta/docs.asciidoc index 1ea5cc6a66d..297a8644987 100644 --- a/x-pack/filebeat/module/okta/_meta/docs.asciidoc +++ b/x-pack/filebeat/module/okta/_meta/docs.asciidoc @@ -27,12 +27,6 @@ the logs while honoring any https://developer.okta.com/docs/reference/rate-limits/[rate-limiting] headers sent by Okta. -NOTE: This module does not persist the timestamp of the last read event in -order to facilitate resuming on restart. This feature will be coming in a future -version. When you restart the module will read events from the beginning of the -log. To minimize duplicates documents the module uses the event's Okta UUID -value as the Elasticsearch `_id`. - This is an example configuration for the module. [source,yaml] @@ -94,6 +88,15 @@ information. supported_protocols: [TLSv1.2] ---- +*`var.initial_interval`*:: + +An initial interval can be defined. The first time the module starts, will fetch events from the current moment minus the initial interval value. Following restarts will fetch events starting from the last event read. It defaults to `24h`. ++ +[source,yaml] +---- + var.initial_interval: 24h # will fetch events starting 24h ago. +---- + [float] === Example dashboard From f0da6811f95298dedf3cbbfe9fca8591b1365c11 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?No=C3=A9mi=20V=C3=A1nyi?= Date: Tue, 27 Oct 2020 13:01:02 +0100 Subject: [PATCH 4/4] Add new licence status: expired (#22180) ## What does this PR do? This PR adds a new licence state named `Expired`. Previously, this prevented Beats from connecting to ES. ## Why is it important? Beats were not able to parse expired licences. This problem prevented users from using the features of the software which does not require a licence. ## Related issues Closes #21112 --- CHANGELOG.next.asciidoc | 1 + x-pack/libbeat/licenser/license.go | 5 +++++ x-pack/libbeat/licenser/license_test.go | 5 +++++ .../xpack-with-relax-expired-license-uuid.json | 13 +++++++++++++ x-pack/libbeat/licenser/types.go | 2 ++ 5 files changed, 26 insertions(+) create mode 100644 x-pack/libbeat/licenser/testdata/xpack-with-relax-expired-license-uuid.json diff --git a/CHANGELOG.next.asciidoc b/CHANGELOG.next.asciidoc index 99008bf7181..1235422f1dc 100644 --- a/CHANGELOG.next.asciidoc +++ b/CHANGELOG.next.asciidoc @@ -188,6 +188,7 @@ https://github.com/elastic/beats/compare/v7.0.0-alpha2...master[Check the HEAD d - The `o365input` and `o365` module now recover from an authentication problem or other fatal errors, instead of terminating. {pull}21259[21258] - Orderly close processors when processing pipelines are not needed anymore to release their resources. {pull}16349[16349] - Fix memory leak and events duplication in docker autodiscover and add_docker_metadata. {pull}21851[21851] +- Fix parsing of expired licences. {issue}21112[21112] {pull}22180[22180] *Auditbeat* diff --git a/x-pack/libbeat/licenser/license.go b/x-pack/libbeat/licenser/license.go index e1c64fb314b..179c1c2f088 100644 --- a/x-pack/libbeat/licenser/license.go +++ b/x-pack/libbeat/licenser/license.go @@ -68,6 +68,11 @@ func (l *License) IsActive() bool { return l.Status == Active } +// IsExpired returns true if the licence has expired. +func (l *License) IsExpired() bool { + return l.Status == Expired +} + // IsTrial returns true if the remote cluster is in trial mode. func (l *License) IsTrial() bool { return l.Type == Trial diff --git a/x-pack/libbeat/licenser/license_test.go b/x-pack/libbeat/licenser/license_test.go index d8c8882c2fb..f21e6931e9b 100644 --- a/x-pack/libbeat/licenser/license_test.go +++ b/x-pack/libbeat/licenser/license_test.go @@ -132,6 +132,11 @@ func TestIsActive(t *testing.T) { l: License{Status: Inactive}, expected: false, }, + { + name: "expired", + l: License{Status: Expired}, + expected: false, + }, } for _, test := range tests { diff --git a/x-pack/libbeat/licenser/testdata/xpack-with-relax-expired-license-uuid.json b/x-pack/libbeat/licenser/testdata/xpack-with-relax-expired-license-uuid.json new file mode 100644 index 00000000000..9a933ca3de9 --- /dev/null +++ b/x-pack/libbeat/licenser/testdata/xpack-with-relax-expired-license-uuid.json @@ -0,0 +1,13 @@ +{ + "build": { + "hash": "053779d", + "date": "2018-07-20T05:25:16.206115Z" + }, + "license": { + "uid": "hello-license", + "type": "platinum", + "mode": "platinum", + "status": "expired", + "expiry_date_in_millis": 1588261199999 + } +} diff --git a/x-pack/libbeat/licenser/types.go b/x-pack/libbeat/licenser/types.go index 0e819275808..f0d3b64898d 100644 --- a/x-pack/libbeat/licenser/types.go +++ b/x-pack/libbeat/licenser/types.go @@ -25,11 +25,13 @@ type State int const ( Inactive State = iota Active + Expired ) var stateLookup = map[string]State{ "inactive": Inactive, "active": Active, + "expired": Expired, } var licenseLookup = map[string]LicenseType{