diff --git a/.chloggen/ottl-parse-simple-xml.yaml b/.chloggen/ottl-parse-simple-xml.yaml
new file mode 100644
index 000000000000..7bd4d39a6c5c
--- /dev/null
+++ b/.chloggen/ottl-parse-simple-xml.yaml
@@ -0,0 +1,27 @@
+# Use this changelog template to create an entry for release notes.
+
+# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix'
+change_type: enhancement
+
+# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver)
+component: pkg/ottl
+
+# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`).
+note: Add ParseSimplifiedXML Converter
+
+# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists.
+issues: [35421]
+
+# (Optional) One or more lines of additional information to render under the primary note.
+# These lines will be padded with 2 spaces and then inserted directly into the document.
+# Use pipe (|) for multiline entries.
+subtext:
+
+# If your change doesn't affect end users or the exported elements of any package,
+# you should instead start your pull request title with [chore] or use the "Skip Changelog" label.
+# Optional: The change log or logs in which this entry should be included.
+# e.g. '[user]' or '[user, api]'
+# Include 'user' if the change is relevant to end users.
+# Include 'api' if there is a change to a library API.
+# Default: '[user]'
+change_logs: []
diff --git a/pkg/ottl/e2e/e2e_test.go b/pkg/ottl/e2e/e2e_test.go
index 97ff3ad107ef..9720a9c2306d 100644
--- a/pkg/ottl/e2e/e2e_test.go
+++ b/pkg/ottl/e2e/e2e_test.go
@@ -609,6 +609,15 @@ func Test_e2e_converters(t *testing.T) {
m.PutStr("k2", "v2__!__v2")
},
},
+ {
+ statement: `set(attributes["test"], ParseSimplifiedXML("1This is a log message!"))`,
+ want: func(tCtx ottllog.TransformContext) {
+ attr := tCtx.GetLogRecord().Attributes().PutEmptyMap("test")
+ log := attr.PutEmptyMap("Log")
+ log.PutStr("id", "1")
+ log.PutStr("Message", "This is a log message!")
+ },
+ },
{
statement: `set(attributes["test"], ParseXML("This is a log message!"))`,
want: func(tCtx ottllog.TransformContext) {
diff --git a/pkg/ottl/ottlfuncs/README.md b/pkg/ottl/ottlfuncs/README.md
index b391cd82c4b2..eb976fcd07c2 100644
--- a/pkg/ottl/ottlfuncs/README.md
+++ b/pkg/ottl/ottlfuncs/README.md
@@ -1250,8 +1250,7 @@ An element has "extraneous text content" when it contains both text and element
#### Parsing logic
-1. The Converter will NOT error due to the presence of attributes or extraneous text content.
- However, it will omit those values from the result.
+1. Declaration elements, attributes, comments, and extraneous text content are ignored.
2. Elements which contain a value are converted into key/value pairs.
e.g. `bar` becomes `"foo": "bar"`
3. Elements which contain child elements are converted into a key/value pair where the value is a map.
@@ -1326,6 +1325,23 @@ Parse a Simplified XML document with multiple elements of the same tag:
}
```
+Parse a Simplified XML document with CDATA element:
+
+```xml
+
+ 1
+
+
+```
+
+```json
+{
+ "a": {
+ "b": ["1", "2"]
+ }
+}
+```
+
### ParseXML
`ParseXML(target)`
diff --git a/pkg/ottl/ottlfuncs/func_parse_simplified_xml.go b/pkg/ottl/ottlfuncs/func_parse_simplified_xml.go
new file mode 100644
index 000000000000..9e3dd381f962
--- /dev/null
+++ b/pkg/ottl/ottlfuncs/func_parse_simplified_xml.go
@@ -0,0 +1,135 @@
+// Copyright The OpenTelemetry Authors
+// SPDX-License-Identifier: Apache-2.0
+
+package ottlfuncs // import "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/ottl/ottlfuncs"
+
+import (
+ "context"
+ "fmt"
+
+ "github.com/antchfx/xmlquery"
+ "go.opentelemetry.io/collector/pdata/pcommon"
+
+ "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/ottl"
+)
+
+type ParseSimplifiedXMLArguments[K any] struct {
+ Target ottl.StringGetter[K]
+}
+
+func NewParseSimplifiedXMLFactory[K any]() ottl.Factory[K] {
+ return ottl.NewFactory("ParseSimplifiedXML", &ParseSimplifiedXMLArguments[K]{}, createParseSimplifiedXMLFunction[K])
+}
+
+func createParseSimplifiedXMLFunction[K any](_ ottl.FunctionContext, oArgs ottl.Arguments) (ottl.ExprFunc[K], error) {
+ args, ok := oArgs.(*ParseSimplifiedXMLArguments[K])
+
+ if !ok {
+ return nil, fmt.Errorf("ParseSimplifiedXML args must be of type *ParseSimplifiedXMLAguments[K]")
+ }
+
+ return parseSimplifiedXML(args.Target), nil
+}
+
+// parseSimplifiedXML returns a XML formatted string that is a result of removing all matching nodes from the target XML.
+// This currently supports removal of elements, attributes, text values, comments, and CharData.
+func parseSimplifiedXML[K any](target ottl.StringGetter[K]) ottl.ExprFunc[K] {
+ return func(ctx context.Context, tCtx K) (any, error) {
+ var doc *xmlquery.Node
+ if targetVal, err := target.Get(ctx, tCtx); err != nil {
+ return nil, err
+ } else if doc, err = parseNodesXML(targetVal); err != nil {
+ return nil, err
+ }
+
+ docMap := pcommon.NewMap()
+ parseElement(doc, &docMap)
+ return docMap, nil
+ }
+}
+
+func parseElement(parent *xmlquery.Node, parentMap *pcommon.Map) {
+
+ // Count the number of each element tag so we know whether it will be a member of a slice or not
+ childTags := make(map[string]int)
+ for child := parent.FirstChild; child != nil; child = child.NextSibling {
+ if child.Type != xmlquery.ElementNode {
+ continue
+ }
+ childTags[child.Data]++
+ }
+ if len(childTags) == 0 {
+ return
+ }
+
+ // Convert the children, now knowing whether they will be a member of a slice or not
+ for child := parent.FirstChild; child != nil; child = child.NextSibling {
+ if child.Type != xmlquery.ElementNode || child.FirstChild == nil {
+ continue
+ }
+
+ leafValue := leafValueFromElement(child)
+
+ // Slice of the same element
+ if childTags[child.Data] > 1 {
+ // Get or create the slice of children
+ var childrenSlice pcommon.Slice
+ childrenValue, ok := parentMap.Get(child.Data)
+ if ok {
+ childrenSlice = childrenValue.Slice()
+ } else {
+ childrenSlice = parentMap.PutEmptySlice(child.Data)
+ }
+
+ // Add the child's text content to the slice
+ if leafValue != "" {
+ childrenSlice.AppendEmpty().SetStr(leafValue)
+ continue
+ }
+
+ // Parse the child to make sure there's something to add
+ childMap := pcommon.NewMap()
+ parseElement(child, &childMap)
+ if childMap.Len() == 0 {
+ continue
+ }
+
+ sliceValue := childrenSlice.AppendEmpty()
+ sliceMap := sliceValue.SetEmptyMap()
+ childMap.CopyTo(sliceMap)
+ continue
+ }
+
+ if leafValue != "" {
+ parentMap.PutStr(child.Data, leafValue)
+ continue
+ }
+
+ // Child will be a map
+ childMap := pcommon.NewMap()
+ parseElement(child, &childMap)
+ if childMap.Len() == 0 {
+ continue
+ }
+
+ childMap.CopyTo(parentMap.PutEmptyMap(child.Data))
+ }
+}
+
+func leafValueFromElement(node *xmlquery.Node) string {
+ // First check if there are any child elements. If there are, ignore any extraneous text.
+ for child := node.FirstChild; child != nil; child = child.NextSibling {
+ if child.Type == xmlquery.ElementNode {
+ return ""
+ }
+ }
+
+ // No child elements, so return the first text or CDATA content
+ for child := node.FirstChild; child != nil; child = child.NextSibling {
+ switch child.Type {
+ case xmlquery.TextNode, xmlquery.CharDataNode:
+ return child.Data
+ }
+ }
+ return ""
+}
diff --git a/pkg/ottl/ottlfuncs/func_parse_simplified_xml_test.go b/pkg/ottl/ottlfuncs/func_parse_simplified_xml_test.go
new file mode 100644
index 000000000000..f95a302cff10
--- /dev/null
+++ b/pkg/ottl/ottlfuncs/func_parse_simplified_xml_test.go
@@ -0,0 +1,278 @@
+// Copyright The OpenTelemetry Authors
+// SPDX-License-Identifier: Apache-2.0
+
+package ottlfuncs // import "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/ottl/ottlfuncs"
+
+import (
+ "context"
+ "testing"
+
+ "github.com/stretchr/testify/assert"
+ "go.opentelemetry.io/collector/pdata/pcommon"
+
+ "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/ottl"
+)
+
+func Test_ParseSimplifiedXML(t *testing.T) {
+ tests := []struct {
+ name string
+ document string
+ want pcommon.Map
+ }{
+ {
+ name: "single leaf",
+ document: `b`,
+ want: func() pcommon.Map {
+ m := pcommon.NewMap()
+ m.PutStr("a", "b")
+ return m
+ }(),
+ },
+ {
+ name: "double leaf",
+ document: `bc`,
+ want: func() pcommon.Map {
+ m := pcommon.NewMap()
+ b := m.PutEmptySlice("a")
+ b.AppendEmpty().SetStr("b")
+ b.AppendEmpty().SetStr("c")
+ return m
+ }(),
+ },
+ {
+ name: "nested maps",
+ document: `1`,
+ want: func() pcommon.Map {
+ m := pcommon.NewMap()
+ a := m.PutEmptyMap("a")
+ a.PutStr("b", "1")
+ return m
+ }(),
+ },
+ {
+ name: "mixed slice",
+ document: `13`,
+ want: func() pcommon.Map {
+ m := pcommon.NewMap()
+ a := m.PutEmptySlice("a")
+ a.AppendEmpty().SetStr("1")
+ a.AppendEmpty().SetStr("2")
+ b := a.AppendEmpty().SetEmptyMap()
+ b.PutStr("b", "3")
+ return m
+ }(),
+ },
+ {
+ name: "char data leaf",
+ document: ``,
+ want: func() pcommon.Map {
+ m := pcommon.NewMap()
+ m.PutStr("a", "b")
+ return m
+ }(),
+ },
+ {
+ name: "ignore attributes",
+ document: `c`,
+ want: func() pcommon.Map {
+ m := pcommon.NewMap()
+ a := m.PutEmptyMap("a")
+ a.PutStr("b", "c")
+ return m
+ }(),
+ },
+ {
+ name: "ignore declaration",
+ document: `b`,
+ want: func() pcommon.Map {
+ m := pcommon.NewMap()
+ m.PutStr("a", "b")
+ return m
+ }(),
+ },
+ {
+ name: "ignore comments",
+ document: `b`,
+ want: func() pcommon.Map {
+ m := pcommon.NewMap()
+ m.PutStr("a", "b")
+ return m
+ }(),
+ },
+ {
+ name: "ignore empty other than comment",
+ document: `2`,
+ want: func() pcommon.Map {
+ m := pcommon.NewMap()
+ a := m.PutEmptyMap("a")
+ a.PutStr("b", "2")
+ return m
+ }(),
+ },
+ {
+ name: "empty other than comment forces slice",
+ document: `24`,
+ want: func() pcommon.Map {
+ m := pcommon.NewMap()
+ a := m.PutEmptyMap("a")
+ a.PutStr("b", "2")
+ c := a.PutEmptySlice("c")
+ c.AppendEmpty().SetStr("4")
+ return m
+ }(),
+ },
+ {
+ name: "ignore extraneous text",
+ document: `extra13extra2`,
+ want: func() pcommon.Map {
+ m := pcommon.NewMap()
+ a := m.PutEmptyMap("a")
+ a.PutStr("b", "3")
+ return m
+ }(),
+ },
+ {
+ name: "ignore extraneous CDATA",
+ document: `3`,
+ want: func() pcommon.Map {
+ m := pcommon.NewMap()
+ a := m.PutEmptyMap("a")
+ a.PutStr("b", "3")
+ return m
+ }(),
+ },
+ {
+ name: "ignore single empty element",
+ document: `3`,
+ want: func() pcommon.Map {
+ m := pcommon.NewMap()
+ a := m.PutEmptyMap("a")
+ a.PutStr("b", "3")
+ return m
+ }(),
+ },
+ {
+ name: "empty element cascade",
+ document: `2`,
+ want: func() pcommon.Map {
+ m := pcommon.NewMap()
+ a := m.PutEmptyMap("a")
+ a.PutStr("d", "2")
+ return m
+ }(),
+ },
+ {
+ name: "empty element forces slice",
+ document: `3`,
+ want: func() pcommon.Map {
+ m := pcommon.NewMap()
+ a := m.PutEmptyMap("a")
+ b := a.PutEmptySlice("b")
+ b.AppendEmpty().SetStr("3")
+ return m
+ }(),
+ },
+ {
+ // ParseSimplifiedXML(ElementizeAttributesXML(ElementizeValuesXML("...")))
+ name: "Simplified WEL",
+ document: `
+ http://schemas.microsoft.com/win/2004/08/events/event
+
+ Microsoft-Windows-Security-Auditing{54849625-5478-4994-a5ba-3e3b0328c30d}
+ 4625
+ 0
+ 0
+ 12544
+ 0
+ 0x8010000000000000
+ 2024-09-04T08:38:09.7477579Z
+ 1361885
+ {b67ee0c2-a671-0001-5f6b-82e8c1eeda01}
+ 6562276
+ Security
+ samuel-vahala
+
+
+
+ SubjectUserSidS-1-0-0
+ TargetUserSidS-1-0-0
+ Status0xc000006d
+ WorkstationNameD-508
+
+`,
+ want: func() pcommon.Map {
+ result := pcommon.NewMap()
+ event := result.PutEmptyMap("Event")
+ event.PutStr("xmlns", "http://schemas.microsoft.com/win/2004/08/events/event")
+ system := event.PutEmptyMap("System")
+ provider := system.PutEmptyMap("Provider")
+ provider.PutStr("Name", "Microsoft-Windows-Security-Auditing")
+ provider.PutStr("Guid", "{54849625-5478-4994-a5ba-3e3b0328c30d}")
+ system.PutStr("EventID", "4625")
+ system.PutStr("Version", "0")
+ system.PutStr("Level", "0")
+ system.PutStr("Task", "12544")
+ system.PutStr("Opcode", "0")
+ system.PutStr("Keywords", "0x8010000000000000")
+ timeCreated := system.PutEmptyMap("TimeCreated")
+ timeCreated.PutStr("SystemTime", "2024-09-04T08:38:09.7477579Z")
+ system.PutStr("EventRecordID", "1361885")
+ correlation := system.PutEmptyMap("Correlation")
+ correlation.PutStr("ActivityID", "{b67ee0c2-a671-0001-5f6b-82e8c1eeda01}")
+ execution := system.PutEmptyMap("Execution")
+ execution.PutStr("ProcessID", "656")
+ execution.PutStr("ThreadID", "2276")
+ system.PutStr("Channel", "Security")
+ system.PutStr("Computer", "samuel-vahala")
+ eventData := event.PutEmptyMap("EventData")
+ data := eventData.PutEmptySlice("Data")
+ data1 := data.AppendEmpty().SetEmptyMap()
+ data1.PutStr("Name", "SubjectUserSid")
+ data1.PutStr("value", "S-1-0-0")
+ data2 := data.AppendEmpty().SetEmptyMap()
+ data2.PutStr("Name", "TargetUserSid")
+ data2.PutStr("value", "S-1-0-0")
+ data3 := data.AppendEmpty().SetEmptyMap()
+ data3.PutStr("Name", "Status")
+ data3.PutStr("value", "0xc000006d")
+ data4 := data.AppendEmpty().SetEmptyMap()
+ data4.PutStr("Name", "WorkstationName")
+ data4.PutStr("value", "D-508")
+ return result
+ }(),
+ },
+ }
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ target := ottl.StandardStringGetter[any]{
+ Getter: func(_ context.Context, _ any) (any, error) {
+ return tt.document, nil
+ },
+ }
+ exprFunc := parseSimplifiedXML(target)
+ result, err := exprFunc(context.Background(), nil)
+ assert.NoError(t, err)
+ assert.Equal(t, tt.want, result)
+ })
+ }
+}
+
+func TestCreateParseSimplifiedXMLFunc(t *testing.T) {
+ factory := NewParseSimplifiedXMLFactory[any]()
+ fCtx := ottl.FunctionContext{}
+
+ // Invalid arg type
+ exprFunc, err := factory.CreateFunction(fCtx, nil)
+ assert.Error(t, err)
+ assert.Nil(t, exprFunc)
+
+ // Invalid XML should error on function execution
+ exprFunc, err = factory.CreateFunction(
+ fCtx, &ParseSimplifiedXMLArguments[any]{
+ Target: invalidXMLGetter(),
+ })
+ assert.NoError(t, err)
+ assert.NotNil(t, exprFunc)
+ _, err = exprFunc(context.Background(), nil)
+ assert.Error(t, err)
+}
diff --git a/pkg/ottl/ottlfuncs/functions.go b/pkg/ottl/ottlfuncs/functions.go
index ca165bf1b999..39a71d48ad0a 100644
--- a/pkg/ottl/ottlfuncs/functions.go
+++ b/pkg/ottl/ottlfuncs/functions.go
@@ -69,6 +69,7 @@ func converters[K any]() []ottl.Factory[K] {
NewParseCSVFactory[K](),
NewParseJSONFactory[K](),
NewParseKeyValueFactory[K](),
+ NewParseSimplifiedXMLFactory[K](),
NewParseXMLFactory[K](),
NewRemoveXMLFactory[K](),
NewSecondsFactory[K](),