Skip to content

Commit

Permalink
Merge pull request #379 from observIQ/w3c-quotes
Browse files Browse the repository at this point in the history
Fix w3c quote support
  • Loading branch information
jmwilliams89 authored Oct 28, 2021
2 parents 03eb518 + d171ec0 commit 2b95246
Showing 1 changed file with 3 additions and 27 deletions.
30 changes: 3 additions & 27 deletions plugins/w3c.yaml
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
# Plugin Info
version: 0.0.3
version: 0.0.4
title: W3C
description: File Input W3C Parser
min_stanza_version: 1.2.0
min_stanza_version: 1.2.12
parameters:
- name: file_log_path
label: File Path
Expand Down Expand Up @@ -140,37 +140,13 @@ pipeline:
- type: filter
expr: '$record matches "^#"'

- type: router
default: csv_parser
routes:
- output: quote_handler_parser
expr: $record matches '.*".*".*' and not ($record matches "^#")

# Some example log entries have quotes. This will cause an error.
# This parses first set of quotes. If more than one set of quotes exist then it will still error.
# All examples from logs seen at time of this comment have only contained one set of quotes.
# Example:
# #Fields: date time c-ip cs-username s-sitename s-computername s-ip s-port cs-method cs-uri-stem cs-uri-query sc-status sc-win32-status sc-bytes cs-bytes time-taken cs-version cs-host cs(User-Agent) cs(Cookie) cs(Referer)
# 2021-06-14 18:36:47 0.0.0.0 OutboundConnectionResponse SMTPSVC1 TSIC-PHOENIX - 25 - - 354+3.0.0+continue.++finished+with+"\r\n.\r\n" 0 0 46 0 531 SMTP - - - -
- id: quote_handler_parser
type: regex_parser
parse_from: $record
regex: '(?P<message1>[^"]*)(?P<first_quote>[\"])(?P<message2>[^"]*)(?P<second_quote>[\"])(?P<message3>.*)'
output: quote_handler_restructurer

# This will remove the first set of parsed quotes.
- id: quote_handler_restructurer
type: add
field: $record
value: 'EXPR($record.message1 + $record.message2 + $record.message3)'
output: csv_parser

# Leverage CSV parser's dynamic field name detection by specifying
# delimiter, header_delimiter, and header_label
- type: csv_parser
delimiter: '{{ $delimiter }}'
header_delimiter: '{{ $header_delimiter }}'
header_label: '{{ $fields_header }}'
lazy_quotes: true

- type: remove
field: '$labels.{{ $fields_header }}'
Expand Down

0 comments on commit 2b95246

Please sign in to comment.