-
Notifications
You must be signed in to change notification settings - Fork 4.9k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[Azure] Sanitize message in case of malformed json (#34874)
* Add sanitization function and test for azure input (cherry picked from commit 4096f9b)
- Loading branch information
1 parent
5814b50
commit b744964
Showing
5 changed files
with
202 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,78 @@ | ||
// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one | ||
// or more contributor license agreements. Licensed under the Elastic License; | ||
// you may not use this file except in compliance with the Elastic License. | ||
|
||
//go:build !aix | ||
// +build !aix | ||
|
||
package azureeventhub | ||
|
||
import ( | ||
"bytes" | ||
"errors" | ||
) | ||
|
||
type sanitizationOption string | ||
|
||
const ( | ||
newLines sanitizationOption = "NEW_LINES" | ||
singleQuotes sanitizationOption = "SINGLE_QUOTES" | ||
) | ||
|
||
// sanitizeOptionsValidate validates for supported sanitization options | ||
func sanitizeOptionsValidate(s string) error { | ||
switch s { | ||
case "NEW_LINES": | ||
return nil | ||
case "SINGLE_QUOTES": | ||
return nil | ||
default: | ||
return errors.New("invalid sanitization option") | ||
} | ||
} | ||
|
||
// sanitize applies the sanitization options specified in the config | ||
// if no sanitization options are provided, the message remains unchanged | ||
func sanitize(jsonByte []byte, opts ...string) []byte { | ||
res := jsonByte | ||
|
||
for _, opt := range opts { | ||
switch sanitizationOption(opt) { | ||
case newLines: | ||
res = sanitizeNewLines(res) | ||
case singleQuotes: | ||
res = sanitizeSingleQuotes(res) | ||
} | ||
} | ||
|
||
return res | ||
} | ||
|
||
// sanitizeNewLines removes newlines found in the message | ||
func sanitizeNewLines(jsonByte []byte) []byte { | ||
return bytes.ReplaceAll(jsonByte, []byte("\n"), []byte{}) | ||
} | ||
|
||
// sanitizeSingleQuotes replaces single quotes with double quotes in the message | ||
// single quotes that are in between double quotes remain unchanged | ||
func sanitizeSingleQuotes(jsonByte []byte) []byte { | ||
var result bytes.Buffer | ||
var prevChar byte | ||
|
||
inDoubleQuotes := false | ||
|
||
for _, r := range jsonByte { | ||
if r == '"' && prevChar != '\\' { | ||
inDoubleQuotes = !inDoubleQuotes | ||
} | ||
|
||
if r == '\'' && !inDoubleQuotes { | ||
result.WriteRune('"') | ||
} else { | ||
result.WriteByte(r) | ||
} | ||
prevChar = r | ||
} | ||
|
||
return result.Bytes() | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,82 @@ | ||
// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one | ||
// or more contributor license agreements. Licensed under the Elastic License; | ||
// you may not use this file except in compliance with the Elastic License. | ||
|
||
//go:build !aix | ||
// +build !aix | ||
|
||
package azureeventhub | ||
|
||
import ( | ||
"fmt" | ||
"testing" | ||
|
||
"github.com/stretchr/testify/assert" | ||
|
||
"github.com/elastic/elastic-agent-libs/logp" | ||
) | ||
|
||
func TestParseMultipleMessagesSanitization(t *testing.T) { | ||
msg := "{\"records\":[{'test':\"this is some message\",\n\n\"time\":\"2019-12-17T13:43:44.4946995Z\"}," + | ||
"{\"test\":\"this is '2nd' message\",\"time\":\"2019-12-17T13:43:44.4946995Z\"}," + | ||
"{\"time\": \"2023-04-11T13:35:20Z\", \"resourceId\": \"/SUBSCRIPTIONS/REDACTED/RESOURCEGROUPS/ELASTIC-FUNCTION-TEST/PROVIDERS/MICROSOFT.WEB/SITES/REDACTED\", \"category\": \"FunctionAppLogs\", \"operationName\": \"Microsoft.Web/sites/functions/log\", \"level\": \"Informational\", \"location\": \"West Europe\", \"properties\": {'appName':'REDACTED','roleInstance':'REDACTED','message':'Elastic Test Function Trigger. ---- West Europe West Europe West Europe West Europe West Europe ','category':'Function.HttpTriggerJava.User','hostVersion':'4.16.5.5','functionInvocationId':'REDACTED','functionName':'HttpTriggerJava','hostInstanceId':'REDACTED','level':'Information','levelId':2,'processId':62}}]}" | ||
msgs := []string{ | ||
"{\"test\":\"this is some message\",\"time\":\"2019-12-17T13:43:44.4946995Z\"}", | ||
"{\"test\":\"this is '2nd' message\",\"time\":\"2019-12-17T13:43:44.4946995Z\"}", | ||
"{\"category\":\"FunctionAppLogs\",\"level\":\"Informational\",\"location\":\"West Europe\",\"operationName\":\"Microsoft.Web/sites/functions/log\",\"properties\":{\"appName\":\"REDACTED\",\"category\":\"Function.HttpTriggerJava.User\",\"functionInvocationId\":\"REDACTED\",\"functionName\":\"HttpTriggerJava\",\"hostInstanceId\":\"REDACTED\",\"hostVersion\":\"4.16.5.5\",\"level\":\"Information\",\"levelId\":2,\"message\":\"Elastic Test Function Trigger. ---- West Europe West Europe West Europe West Europe West Europe \",\"processId\":62,\"roleInstance\":\"REDACTED\"},\"resourceId\":\"/SUBSCRIPTIONS/REDACTED/RESOURCEGROUPS/ELASTIC-FUNCTION-TEST/PROVIDERS/MICROSOFT.WEB/SITES/REDACTED\",\"time\":\"2023-04-11T13:35:20Z\"}", | ||
} | ||
|
||
input := azureInput{ | ||
log: logp.NewLogger(fmt.Sprintf("%s test for input", inputName)), | ||
config: azureInputConfig{ | ||
SanitizeOptions: []string{"SINGLE_QUOTES", "NEW_LINES"}, | ||
}, | ||
} | ||
|
||
messages := input.parseMultipleMessages([]byte(msg)) | ||
assert.NotNil(t, messages) | ||
assert.Equal(t, len(messages), 3) | ||
for _, ms := range messages { | ||
assert.Contains(t, msgs, ms) | ||
} | ||
} | ||
|
||
func TestSanitize(t *testing.T) { | ||
jsonByte := []byte("{'test':\"this is 'some' message\n\",\n\"time\":\"2019-12-17T13:43:44.4946995Z\"}") | ||
|
||
testCases := []struct { | ||
name string | ||
opts []string | ||
expected []byte | ||
}{ | ||
{ | ||
name: "no options", | ||
opts: []string{}, | ||
expected: jsonByte, | ||
}, | ||
{ | ||
name: "NEW_LINES option", | ||
opts: []string{"NEW_LINES"}, | ||
expected: []byte("{'test':\"this is 'some' message\",\"time\":\"2019-12-17T13:43:44.4946995Z\"}"), | ||
}, | ||
{ | ||
name: "SINGLE_QUOTES option", | ||
opts: []string{"SINGLE_QUOTES"}, | ||
expected: []byte("{\"test\":\"this is 'some' message\n\",\n\"time\":\"2019-12-17T13:43:44.4946995Z\"}"), | ||
}, | ||
{ | ||
name: "both options", | ||
opts: []string{"NEW_LINES", "SINGLE_QUOTES"}, | ||
expected: []byte("{\"test\":\"this is 'some' message\",\"time\":\"2019-12-17T13:43:44.4946995Z\"}"), | ||
}, | ||
} | ||
|
||
// Run test cases | ||
for _, tc := range testCases { | ||
tc := tc | ||
t.Run(tc.name, func(t *testing.T) { | ||
res := sanitize(jsonByte, tc.opts...) | ||
assert.Equal(t, tc.expected, res) | ||
}) | ||
} | ||
} |