Skip to content

Commit

Permalink
[pkg/ottl] Add ConvertTextToElementsXML Converter (#35364)
Browse files Browse the repository at this point in the history
This adds a converter called `ConvertTextToElementsXML `. This serves as
one of the granular transformations described in
#35281
which will allow users to migrate any arbitrary XML document into a
JSON-equivalent state.

Also see #35328
  • Loading branch information
djaglowski authored Oct 10, 2024
1 parent d7e5154 commit db7020c
Show file tree
Hide file tree
Showing 6 changed files with 305 additions and 0 deletions.
27 changes: 27 additions & 0 deletions .chloggen/wrap-hanging-values-xml.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# Use this changelog template to create an entry for release notes.

# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix'
change_type: enhancement

# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver)
component: pkg/ottl

# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`).
note: Add ConvertTextToElements Converter

# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists.
issues: [35364]

# (Optional) One or more lines of additional information to render under the primary note.
# These lines will be padded with 2 spaces and then inserted directly into the document.
# Use pipe (|) for multiline entries.
subtext:

# If your change doesn't affect end users or the exported elements of any package,
# you should instead start your pull request title with [chore] or use the "Skip Changelog" label.
# Optional: The change log or logs in which this entry should be included.
# e.g. '[user]' or '[user, api]'
# Include 'user' if the change is relevant to end users.
# Include 'api' if there is a change to a library API.
# Default: '[user]'
change_logs: []
12 changes: 12 additions & 0 deletions pkg/ottl/e2e/e2e_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -353,6 +353,18 @@ func Test_e2e_converters(t *testing.T) {
tCtx.GetLogRecord().Attributes().PutStr("test", `<Log><Message>This is a log message!</Message><id>1</id></Log>`)
},
},
{
statement: `set(body, ConvertTextToElementsXML("<a><b/>foo</a>"))`,
want: func(tCtx ottllog.TransformContext) {
tCtx.GetLogRecord().Body().SetStr("<a><b></b><value>foo</value></a>")
},
},
{
statement: `set(body, ConvertTextToElementsXML("<a><b/>foo</a><c><b/>bar</c>", "/a", "custom"))`,
want: func(tCtx ottllog.TransformContext) {
tCtx.GetLogRecord().Body().SetStr("<a><b></b><custom>foo</custom></a><c><b></b>bar</c>")
},
},
{
statement: `set(attributes["test"], Double(1.0))`,
want: func(tCtx ottllog.TransformContext) {
Expand Down
31 changes: 31 additions & 0 deletions pkg/ottl/ottlfuncs/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -414,6 +414,7 @@ Available Converters:
- [Concat](#concat)
- [ConvertCase](#convertcase)
- [ConvertAttributesToElementsXML](#convertattributestoelementsxml)
- [ConvertTextToElementsXML](#converttexttoelementsxml)
- [Day](#day)
- [Double](#double)
- [Duration](#duration)
Expand Down Expand Up @@ -572,6 +573,36 @@ Convert only attributes within "Record" elements

- `ConvertAttributesToElementsXML(body, "/Log/Record")`

### ConvertTextToElementsXML

`ConvertTextToElementsXML(target, Optional[xpath], Optional[elementName])`

The `ConvertTextToElementsXML` Converter returns an edited version of an XML string where all text belongs to a dedicated element.

`target` is a Getter that returns a string. This string should be in XML format.
If `target` is not a string, nil, or cannot be parsed as XML, `ConvertTextToElementsXML` will return an error.

`xpath` (optional) is a string that specifies an [XPath](https://www.w3.org/TR/1999/REC-xpath-19991116/) expression that
selects one or more elements. Content will only be converted within the result(s) of the xpath. The default is `/`.

`elementName` (optional) is a string that is used for any element tags that are created to wrap content.
The default is `"value"`.

For example, `<a><b>foo</b>bar</a>` will be converted to `<a><b>foo</b><value>bar</value></a>`.

Examples:

Ensure all text content in a document is wrapped in a dedicated element

- `ConvertTextToElementsXML(body)`

Use a custom name for any new elements

- `ConvertTextToElementsXML(body, elementName = "custom")`

Convert only part of the document

- `ConvertTextToElementsXML(body, "/some/part/", "value")`

### Day

Expand Down
107 changes: 107 additions & 0 deletions pkg/ottl/ottlfuncs/func_convert_text_to_elements_xml.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
// Copyright The OpenTelemetry Authors
// SPDX-License-Identifier: Apache-2.0

package ottlfuncs // import "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/ottl/ottlfuncs"

import (
"context"
"fmt"

"github.com/antchfx/xmlquery"

"github.com/open-telemetry/opentelemetry-collector-contrib/pkg/ottl"
)

type ConvertTextToElementsXMLArguments[K any] struct {
Target ottl.StringGetter[K]
XPath ottl.Optional[string]
ElementName ottl.Optional[string]
}

func NewConvertTextToElementsXMLFactory[K any]() ottl.Factory[K] {
return ottl.NewFactory("ConvertTextToElementsXML", &ConvertTextToElementsXMLArguments[K]{}, createConvertTextToElementsXMLFunction[K])
}

func createConvertTextToElementsXMLFunction[K any](_ ottl.FunctionContext, oArgs ottl.Arguments) (ottl.ExprFunc[K], error) {
args, ok := oArgs.(*ConvertTextToElementsXMLArguments[K])

if !ok {
return nil, fmt.Errorf("ConvertTextToElementsXML args must be of type *ConvertTextToElementsXMLAguments[K]")
}

xPath := args.XPath.Get()
if xPath == "" {
xPath = "/"
} else if err := validateXPath(xPath); err != nil {
return nil, err
}

elementName := args.ElementName.Get()
if elementName == "" {
elementName = "value"
}

return convertTextToElementsXML(args.Target, xPath, elementName), nil
}

// convertTextToElementsXML returns a string that is a result of wrapping any extraneous text nodes with a dedicated element.
func convertTextToElementsXML[K any](target ottl.StringGetter[K], xPath string, elementName string) ottl.ExprFunc[K] {
return func(ctx context.Context, tCtx K) (any, error) {
var doc *xmlquery.Node
if targetVal, err := target.Get(ctx, tCtx); err != nil {
return nil, err
} else if doc, err = parseNodesXML(targetVal); err != nil {
return nil, err
}
for _, n := range xmlquery.Find(doc, xPath) {
convertTextToElementsForNode(n, elementName)
}
return doc.OutputXML(false), nil
}
}

func convertTextToElementsForNode(parent *xmlquery.Node, elementName string) {
switch parent.Type {
case xmlquery.ElementNode: // ok
case xmlquery.DocumentNode: // ok
default:
return
}

if parent.FirstChild == nil {
return
}

// Convert any child nodes and count text and element nodes.
var valueCount, elementCount int
for child := parent.FirstChild; child != nil; child = child.NextSibling {
if child.Type == xmlquery.ElementNode {
convertTextToElementsForNode(child, elementName)
elementCount++
} else if child.Type == xmlquery.TextNode {
valueCount++
}
}

// If there are no values to wrap, or if there is exactly one value OR one element, this node is all set.
if valueCount == 0 || elementCount+valueCount <= 1 {
return
}

// At this point, we either have multiple values, or a mix of values and elements.
// Either way, we need to wrap the values.
for child := parent.FirstChild; child != nil; child = child.NextSibling {
if child.Type != xmlquery.TextNode {
continue
}
newTextNode := &xmlquery.Node{
Type: xmlquery.TextNode,
Data: child.Data,
}
// Change this node into an element
child.Type = xmlquery.ElementNode
child.Data = elementName
child.FirstChild = newTextNode
child.LastChild = newTextNode
}
}
127 changes: 127 additions & 0 deletions pkg/ottl/ottlfuncs/func_convert_text_to_elements_xml_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
// Copyright The OpenTelemetry Authors
// SPDX-License-Identifier: Apache-2.0

package ottlfuncs // import "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/ottl/ottlfuncs"

import (
"context"
"testing"

"github.com/stretchr/testify/assert"

"github.com/open-telemetry/opentelemetry-collector-contrib/pkg/ottl"
)

func Test_ConvertTextToElementsXML(t *testing.T) {
tests := []struct {
name string
document string
xPath string
elementName string
want string
}{
{
name: "nop",
document: `<a><b/></a>`,
want: `<a><b></b></a>`,
},
{
name: "nop declaration",
document: `<?xml version="1.0" encoding="UTF-8"?><a><b/></a>`,
want: `<?xml version="1.0" encoding="UTF-8"?><a><b></b></a>`,
},
{
name: "nop attributes",
document: `<a foo="bar" hello="world"/>`,
want: `<a foo="bar" hello="world"></a>`,
},
{
name: "nop wrapped text",
document: `<a>hello world</a>`,
want: `<a>hello world</a>`,
},
{
name: "simple hanging",
document: `<a><b/>foo</a>`,
want: `<a><b></b><value>foo</value></a>`,
},
{
name: "simple hanging with tag name",
elementName: "bar",
document: `<a><b/>foo</a>`,
want: `<a><b></b><bar>foo</bar></a>`,
},
{
name: "multiple hanging same level",
document: `<a>foo<b/>bar</a>`,
want: `<a><value>foo</value><b></b><value>bar</value></a>`,
},
{
name: "multiple hanging multiple levels",
document: `<a>foo<b/>bar<c/>1<d>not</d>2<e><f/><f/></e></a>`,
elementName: "v",
want: `<a><v>foo</v><b></b><v>bar</v><c></c><v>1</v><d>not</d><v>2</v><e><f></f><f></f></e></a>`,
},
{
name: "xpath select some",
document: `<a><b><c/>foo</b><d><c/>bar</d><b><c/>baz</b></a>`,
xPath: "/a/b",
want: `<a><b><c></c><value>foo</value></b><d><c></c>bar</d><b><c></c><value>baz</value></b></a>`,
},
{
name: "xpath with element name",
document: `<a><b><c/>foo</b><d><c/>bar</d><b><c/>baz</b></a>`,
xPath: "/a/b",
elementName: "V",
want: `<a><b><c></c><V>foo</V></b><d><c></c>bar</d><b><c></c><V>baz</V></b></a>`,
},
}
factory := NewConvertTextToElementsXMLFactory[any]()
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
args := &ConvertTextToElementsXMLArguments[any]{
Target: ottl.StandardStringGetter[any]{
Getter: func(_ context.Context, _ any) (any, error) {
return tt.document, nil
},
},
XPath: ottl.NewTestingOptional(tt.xPath),
ElementName: ottl.NewTestingOptional(tt.elementName),
}
exprFunc, err := factory.CreateFunction(ottl.FunctionContext{}, args)
assert.NoError(t, err)

result, err := exprFunc(context.Background(), nil)
assert.NoError(t, err)
assert.Equal(t, tt.want, result)
})
}
}

func TestCreateConvertTextToElementsXMLFunc(t *testing.T) {
factory := NewConvertTextToElementsXMLFactory[any]()
fCtx := ottl.FunctionContext{}

// Invalid arg type
exprFunc, err := factory.CreateFunction(fCtx, nil)
assert.Error(t, err)
assert.Nil(t, exprFunc)

// Invalid XPath should error on function creation
exprFunc, err = factory.CreateFunction(
fCtx, &ConvertTextToElementsXMLArguments[any]{
XPath: ottl.NewTestingOptional("!"),
})
assert.Error(t, err)
assert.Nil(t, exprFunc)

// Invalid XML should error on function execution
exprFunc, err = factory.CreateFunction(
fCtx, &ConvertTextToElementsXMLArguments[any]{
Target: invalidXMLGetter(),
})
assert.NoError(t, err)
assert.NotNil(t, exprFunc)
_, err = exprFunc(context.Background(), nil)
assert.Error(t, err)
}
1 change: 1 addition & 0 deletions pkg/ottl/ottlfuncs/functions.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ func converters[K any]() []ottl.Factory[K] {
NewConcatFactory[K](),
NewConvertCaseFactory[K](),
NewConvertAttributesToElementsXMLFactory[K](),
NewConvertTextToElementsXMLFactory[K](),
NewDayFactory[K](),
NewDoubleFactory[K](),
NewDurationFactory[K](),
Expand Down

0 comments on commit db7020c

Please sign in to comment.