From db7020c95b94a4512eb0a1ca851f7b737f43ddd6 Mon Sep 17 00:00:00 2001 From: Daniel Jaglowski Date: Thu, 10 Oct 2024 13:36:18 -0400 Subject: [PATCH] [pkg/ottl] Add ConvertTextToElementsXML Converter (#35364) This adds a converter called `ConvertTextToElementsXML `. This serves as one of the granular transformations described in https://github.com/open-telemetry/opentelemetry-collector-contrib/issues/35281 which will allow users to migrate any arbitrary XML document into a JSON-equivalent state. Also see #35328 --- .chloggen/wrap-hanging-values-xml.yaml | 27 ++++ pkg/ottl/e2e/e2e_test.go | 12 ++ pkg/ottl/ottlfuncs/README.md | 31 +++++ .../func_convert_text_to_elements_xml.go | 107 +++++++++++++++ .../func_convert_text_to_elements_xml_test.go | 127 ++++++++++++++++++ pkg/ottl/ottlfuncs/functions.go | 1 + 6 files changed, 305 insertions(+) create mode 100644 .chloggen/wrap-hanging-values-xml.yaml create mode 100644 pkg/ottl/ottlfuncs/func_convert_text_to_elements_xml.go create mode 100644 pkg/ottl/ottlfuncs/func_convert_text_to_elements_xml_test.go diff --git a/.chloggen/wrap-hanging-values-xml.yaml b/.chloggen/wrap-hanging-values-xml.yaml new file mode 100644 index 000000000000..3b0c86fdf865 --- /dev/null +++ b/.chloggen/wrap-hanging-values-xml.yaml @@ -0,0 +1,27 @@ +# Use this changelog template to create an entry for release notes. + +# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix' +change_type: enhancement + +# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver) +component: pkg/ottl + +# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`). +note: Add ConvertTextToElements Converter + +# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists. +issues: [35364] + +# (Optional) One or more lines of additional information to render under the primary note. +# These lines will be padded with 2 spaces and then inserted directly into the document. +# Use pipe (|) for multiline entries. +subtext: + +# If your change doesn't affect end users or the exported elements of any package, +# you should instead start your pull request title with [chore] or use the "Skip Changelog" label. +# Optional: The change log or logs in which this entry should be included. +# e.g. '[user]' or '[user, api]' +# Include 'user' if the change is relevant to end users. +# Include 'api' if there is a change to a library API. +# Default: '[user]' +change_logs: [] diff --git a/pkg/ottl/e2e/e2e_test.go b/pkg/ottl/e2e/e2e_test.go index f7c3fe7324fe..dcf89bd0c5e2 100644 --- a/pkg/ottl/e2e/e2e_test.go +++ b/pkg/ottl/e2e/e2e_test.go @@ -353,6 +353,18 @@ func Test_e2e_converters(t *testing.T) { tCtx.GetLogRecord().Attributes().PutStr("test", `This is a log message!1`) }, }, + { + statement: `set(body, ConvertTextToElementsXML("foo"))`, + want: func(tCtx ottllog.TransformContext) { + tCtx.GetLogRecord().Body().SetStr("foo") + }, + }, + { + statement: `set(body, ConvertTextToElementsXML("foobar", "/a", "custom"))`, + want: func(tCtx ottllog.TransformContext) { + tCtx.GetLogRecord().Body().SetStr("foobar") + }, + }, { statement: `set(attributes["test"], Double(1.0))`, want: func(tCtx ottllog.TransformContext) { diff --git a/pkg/ottl/ottlfuncs/README.md b/pkg/ottl/ottlfuncs/README.md index 11a61d101691..6df4154bd9a9 100644 --- a/pkg/ottl/ottlfuncs/README.md +++ b/pkg/ottl/ottlfuncs/README.md @@ -414,6 +414,7 @@ Available Converters: - [Concat](#concat) - [ConvertCase](#convertcase) - [ConvertAttributesToElementsXML](#convertattributestoelementsxml) +- [ConvertTextToElementsXML](#converttexttoelementsxml) - [Day](#day) - [Double](#double) - [Duration](#duration) @@ -572,6 +573,36 @@ Convert only attributes within "Record" elements - `ConvertAttributesToElementsXML(body, "/Log/Record")` +### ConvertTextToElementsXML + +`ConvertTextToElementsXML(target, Optional[xpath], Optional[elementName])` + +The `ConvertTextToElementsXML` Converter returns an edited version of an XML string where all text belongs to a dedicated element. + +`target` is a Getter that returns a string. This string should be in XML format. +If `target` is not a string, nil, or cannot be parsed as XML, `ConvertTextToElementsXML` will return an error. + +`xpath` (optional) is a string that specifies an [XPath](https://www.w3.org/TR/1999/REC-xpath-19991116/) expression that +selects one or more elements. Content will only be converted within the result(s) of the xpath. The default is `/`. + +`elementName` (optional) is a string that is used for any element tags that are created to wrap content. +The default is `"value"`. + +For example, `foobar` will be converted to `foobar`. + +Examples: + +Ensure all text content in a document is wrapped in a dedicated element + +- `ConvertTextToElementsXML(body)` + +Use a custom name for any new elements + +- `ConvertTextToElementsXML(body, elementName = "custom")` + +Convert only part of the document + +- `ConvertTextToElementsXML(body, "/some/part/", "value")` ### Day diff --git a/pkg/ottl/ottlfuncs/func_convert_text_to_elements_xml.go b/pkg/ottl/ottlfuncs/func_convert_text_to_elements_xml.go new file mode 100644 index 000000000000..a0fb108c4069 --- /dev/null +++ b/pkg/ottl/ottlfuncs/func_convert_text_to_elements_xml.go @@ -0,0 +1,107 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +package ottlfuncs // import "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/ottl/ottlfuncs" + +import ( + "context" + "fmt" + + "github.com/antchfx/xmlquery" + + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/ottl" +) + +type ConvertTextToElementsXMLArguments[K any] struct { + Target ottl.StringGetter[K] + XPath ottl.Optional[string] + ElementName ottl.Optional[string] +} + +func NewConvertTextToElementsXMLFactory[K any]() ottl.Factory[K] { + return ottl.NewFactory("ConvertTextToElementsXML", &ConvertTextToElementsXMLArguments[K]{}, createConvertTextToElementsXMLFunction[K]) +} + +func createConvertTextToElementsXMLFunction[K any](_ ottl.FunctionContext, oArgs ottl.Arguments) (ottl.ExprFunc[K], error) { + args, ok := oArgs.(*ConvertTextToElementsXMLArguments[K]) + + if !ok { + return nil, fmt.Errorf("ConvertTextToElementsXML args must be of type *ConvertTextToElementsXMLAguments[K]") + } + + xPath := args.XPath.Get() + if xPath == "" { + xPath = "/" + } else if err := validateXPath(xPath); err != nil { + return nil, err + } + + elementName := args.ElementName.Get() + if elementName == "" { + elementName = "value" + } + + return convertTextToElementsXML(args.Target, xPath, elementName), nil +} + +// convertTextToElementsXML returns a string that is a result of wrapping any extraneous text nodes with a dedicated element. +func convertTextToElementsXML[K any](target ottl.StringGetter[K], xPath string, elementName string) ottl.ExprFunc[K] { + return func(ctx context.Context, tCtx K) (any, error) { + var doc *xmlquery.Node + if targetVal, err := target.Get(ctx, tCtx); err != nil { + return nil, err + } else if doc, err = parseNodesXML(targetVal); err != nil { + return nil, err + } + for _, n := range xmlquery.Find(doc, xPath) { + convertTextToElementsForNode(n, elementName) + } + return doc.OutputXML(false), nil + } +} + +func convertTextToElementsForNode(parent *xmlquery.Node, elementName string) { + switch parent.Type { + case xmlquery.ElementNode: // ok + case xmlquery.DocumentNode: // ok + default: + return + } + + if parent.FirstChild == nil { + return + } + + // Convert any child nodes and count text and element nodes. + var valueCount, elementCount int + for child := parent.FirstChild; child != nil; child = child.NextSibling { + if child.Type == xmlquery.ElementNode { + convertTextToElementsForNode(child, elementName) + elementCount++ + } else if child.Type == xmlquery.TextNode { + valueCount++ + } + } + + // If there are no values to wrap, or if there is exactly one value OR one element, this node is all set. + if valueCount == 0 || elementCount+valueCount <= 1 { + return + } + + // At this point, we either have multiple values, or a mix of values and elements. + // Either way, we need to wrap the values. + for child := parent.FirstChild; child != nil; child = child.NextSibling { + if child.Type != xmlquery.TextNode { + continue + } + newTextNode := &xmlquery.Node{ + Type: xmlquery.TextNode, + Data: child.Data, + } + // Change this node into an element + child.Type = xmlquery.ElementNode + child.Data = elementName + child.FirstChild = newTextNode + child.LastChild = newTextNode + } +} diff --git a/pkg/ottl/ottlfuncs/func_convert_text_to_elements_xml_test.go b/pkg/ottl/ottlfuncs/func_convert_text_to_elements_xml_test.go new file mode 100644 index 000000000000..7e3b10bcdf12 --- /dev/null +++ b/pkg/ottl/ottlfuncs/func_convert_text_to_elements_xml_test.go @@ -0,0 +1,127 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +package ottlfuncs // import "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/ottl/ottlfuncs" + +import ( + "context" + "testing" + + "github.com/stretchr/testify/assert" + + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/ottl" +) + +func Test_ConvertTextToElementsXML(t *testing.T) { + tests := []struct { + name string + document string + xPath string + elementName string + want string + }{ + { + name: "nop", + document: ``, + want: ``, + }, + { + name: "nop declaration", + document: ``, + want: ``, + }, + { + name: "nop attributes", + document: ``, + want: ``, + }, + { + name: "nop wrapped text", + document: `hello world`, + want: `hello world`, + }, + { + name: "simple hanging", + document: `foo`, + want: `foo`, + }, + { + name: "simple hanging with tag name", + elementName: "bar", + document: `foo`, + want: `foo`, + }, + { + name: "multiple hanging same level", + document: `foobar`, + want: `foobar`, + }, + { + name: "multiple hanging multiple levels", + document: `foobar1not2`, + elementName: "v", + want: `foobar1not2`, + }, + { + name: "xpath select some", + document: `foobarbaz`, + xPath: "/a/b", + want: `foobarbaz`, + }, + { + name: "xpath with element name", + document: `foobarbaz`, + xPath: "/a/b", + elementName: "V", + want: `foobarbaz`, + }, + } + factory := NewConvertTextToElementsXMLFactory[any]() + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + args := &ConvertTextToElementsXMLArguments[any]{ + Target: ottl.StandardStringGetter[any]{ + Getter: func(_ context.Context, _ any) (any, error) { + return tt.document, nil + }, + }, + XPath: ottl.NewTestingOptional(tt.xPath), + ElementName: ottl.NewTestingOptional(tt.elementName), + } + exprFunc, err := factory.CreateFunction(ottl.FunctionContext{}, args) + assert.NoError(t, err) + + result, err := exprFunc(context.Background(), nil) + assert.NoError(t, err) + assert.Equal(t, tt.want, result) + }) + } +} + +func TestCreateConvertTextToElementsXMLFunc(t *testing.T) { + factory := NewConvertTextToElementsXMLFactory[any]() + fCtx := ottl.FunctionContext{} + + // Invalid arg type + exprFunc, err := factory.CreateFunction(fCtx, nil) + assert.Error(t, err) + assert.Nil(t, exprFunc) + + // Invalid XPath should error on function creation + exprFunc, err = factory.CreateFunction( + fCtx, &ConvertTextToElementsXMLArguments[any]{ + XPath: ottl.NewTestingOptional("!"), + }) + assert.Error(t, err) + assert.Nil(t, exprFunc) + + // Invalid XML should error on function execution + exprFunc, err = factory.CreateFunction( + fCtx, &ConvertTextToElementsXMLArguments[any]{ + Target: invalidXMLGetter(), + }) + assert.NoError(t, err) + assert.NotNil(t, exprFunc) + _, err = exprFunc(context.Background(), nil) + assert.Error(t, err) +} diff --git a/pkg/ottl/ottlfuncs/functions.go b/pkg/ottl/ottlfuncs/functions.go index b30c950ed947..1e5086be59a8 100644 --- a/pkg/ottl/ottlfuncs/functions.go +++ b/pkg/ottl/ottlfuncs/functions.go @@ -41,6 +41,7 @@ func converters[K any]() []ottl.Factory[K] { NewConcatFactory[K](), NewConvertCaseFactory[K](), NewConvertAttributesToElementsXMLFactory[K](), + NewConvertTextToElementsXMLFactory[K](), NewDayFactory[K](), NewDoubleFactory[K](), NewDurationFactory[K](),