From 30a2d27974e3b507aa21f57438c1de22c3bdc558 Mon Sep 17 00:00:00 2001 From: Joshua Li Date: Fri, 21 Oct 2022 17:29:18 -0700 Subject: [PATCH] Fix result order of parse with other run time fields (#934) Signed-off-by: Joshua Li --- .../sql/planner/physical/ProjectOperator.java | 32 +++++---- .../planner/physical/ProjectOperatorTest.java | 49 ++++++++++++- .../opensearch/sql/ppl/ParseCommandIT.java | 69 +++++++++++++++++++ 3 files changed, 133 insertions(+), 17 deletions(-) create mode 100644 integ-test/src/test/java/org/opensearch/sql/ppl/ParseCommandIT.java diff --git a/core/src/main/java/org/opensearch/sql/planner/physical/ProjectOperator.java b/core/src/main/java/org/opensearch/sql/planner/physical/ProjectOperator.java index 6e21c969db..496e4e6ddb 100644 --- a/core/src/main/java/org/opensearch/sql/planner/physical/ProjectOperator.java +++ b/core/src/main/java/org/opensearch/sql/planner/physical/ProjectOperator.java @@ -10,6 +10,7 @@ import com.google.common.collect.ImmutableMap.Builder; import java.util.Collections; import java.util.List; +import java.util.Optional; import java.util.stream.Collectors; import lombok.EqualsAndHashCode; import lombok.Getter; @@ -55,32 +56,33 @@ public boolean hasNext() { public ExprValue next() { ExprValue inputValue = input.next(); ImmutableMap.Builder mapBuilder = new Builder<>(); + + // ParseExpression will always override NamedExpression when identifier conflicts + // TODO needs a better implementation, see https://github.com/opensearch-project/sql/issues/458 for (NamedExpression expr : projectList) { ExprValue exprValue = expr.valueOf(inputValue.bindingTuples()); - if (namedParseExpressions.stream() - .noneMatch(parsed -> parsed.getNameOrAlias().equals(expr.getNameOrAlias()))) { + Optional optionalParseExpression = namedParseExpressions.stream() + .filter(parseExpr -> parseExpr.getNameOrAlias().equals(expr.getNameOrAlias())) + .findFirst(); + if (optionalParseExpression.isEmpty()) { mapBuilder.put(expr.getNameOrAlias(), exprValue); - } - } - // ParseExpression will always override NamedExpression when identifier conflicts - // TODO needs a better implementation, see https://github.com/opensearch-project/sql/issues/458 - for (NamedExpression expr : namedParseExpressions) { - if (projectList.stream() - .noneMatch(field -> field.getNameOrAlias().equals(expr.getNameOrAlias()))) { continue; } + + NamedExpression parseExpression = optionalParseExpression.get(); ExprValue sourceFieldValue = inputValue.bindingTuples() - .resolve(((ParseExpression) expr.getDelegated()).getSourceField()); + .resolve(((ParseExpression) parseExpression.getDelegated()).getSourceField()); if (sourceFieldValue.isMissing()) { // source field will be missing after stats command, read from inputValue if it exists // otherwise do nothing since it should not appear as a field - ExprValue exprValue = ExprValueUtils.getTupleValue(inputValue).get(expr.getNameOrAlias()); - if (exprValue != null) { - mapBuilder.put(expr.getNameOrAlias(), exprValue); + ExprValue tupleValue = + ExprValueUtils.getTupleValue(inputValue).get(parseExpression.getNameOrAlias()); + if (tupleValue != null) { + mapBuilder.put(parseExpression.getNameOrAlias(), tupleValue); } } else { - ExprValue parsedValue = expr.valueOf(inputValue.bindingTuples()); - mapBuilder.put(expr.getNameOrAlias(), parsedValue); + ExprValue parsedValue = parseExpression.valueOf(inputValue.bindingTuples()); + mapBuilder.put(parseExpression.getNameOrAlias(), parsedValue); } } return ExprTupleValue.fromExprValueMap(mapBuilder.build()); diff --git a/core/src/test/java/org/opensearch/sql/planner/physical/ProjectOperatorTest.java b/core/src/test/java/org/opensearch/sql/planner/physical/ProjectOperatorTest.java index 3fa312d4c2..24be5eb2b8 100644 --- a/core/src/test/java/org/opensearch/sql/planner/physical/ProjectOperatorTest.java +++ b/core/src/test/java/org/opensearch/sql/planner/physical/ProjectOperatorTest.java @@ -118,7 +118,26 @@ public void project_fields_with_parse_expressions() { DSL.literal("action"))), DSL.named("response", DSL.regex(DSL.ref("response", STRING), DSL.literal("(?\\w+) (?\\d+)"), - DSL.literal("response"))), DSL.named("ignored", + DSL.literal("response")))) + ); + List result = execute(plan); + + assertThat( + result, + allOf( + iterableWithSize(1), + hasItems( + ExprValueUtils.tupleValue(ImmutableMap.of("action", "GET", "response", "200"))))); + } + + @Test + public void project_fields_with_unused_parse_expressions() { + when(inputPlan.hasNext()).thenReturn(true, false); + when(inputPlan.next()) + .thenReturn(ExprValueUtils.tupleValue(ImmutableMap.of("response", "GET 200"))); + PhysicalPlan plan = + project(inputPlan, ImmutableList.of(DSL.named("response", DSL.ref("response", STRING))), + ImmutableList.of(DSL.named("ignored", DSL.regex(DSL.ref("response", STRING), DSL.literal("(?\\w+) (?\\d+)"), DSL.literal("ignored")))) @@ -130,7 +149,33 @@ public void project_fields_with_parse_expressions() { allOf( iterableWithSize(1), hasItems( - ExprValueUtils.tupleValue(ImmutableMap.of("action", "GET", "response", "200"))))); + ExprValueUtils.tupleValue(ImmutableMap.of("response", "GET 200"))))); + } + + @Test + public void project_fields_with_parse_expressions_and_runtime_fields() { + when(inputPlan.hasNext()).thenReturn(true, false); + when(inputPlan.next()) + .thenReturn( + ExprValueUtils.tupleValue(ImmutableMap.of("response", "GET 200", "eval_field", 1))); + PhysicalPlan plan = + project(inputPlan, ImmutableList.of(DSL.named("response", DSL.ref("response", STRING)), + DSL.named("action", DSL.ref("action", STRING)), + DSL.named("eval_field", DSL.ref("eval_field", INTEGER))), + ImmutableList.of(DSL.named("action", + DSL.regex(DSL.ref("response", STRING), + DSL.literal("(?\\w+) (?\\d+)"), + DSL.literal("action")))) + ); + List result = execute(plan); + + assertThat( + result, + allOf( + iterableWithSize(1), + hasItems( + ExprValueUtils.tupleValue( + ImmutableMap.of("response", "GET 200", "action", "GET", "eval_field", 1))))); } @Test diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/ParseCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/ParseCommandIT.java new file mode 100644 index 0000000000..36fcb4bf3b --- /dev/null +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/ParseCommandIT.java @@ -0,0 +1,69 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + + +package org.opensearch.sql.ppl; + +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_BANK; +import static org.opensearch.sql.util.MatcherUtils.rows; +import static org.opensearch.sql.util.MatcherUtils.verifyOrder; + +import java.io.IOException; +import org.json.JSONObject; +import org.junit.Test; + +public class ParseCommandIT extends PPLIntegTestCase { + + @Override + public void init() throws IOException { + loadIndex(Index.BANK); + } + + @Test + public void testParseCommand() throws IOException { + JSONObject result = executeQuery( + String.format("source=%s | parse email '.+@(?.+)' | fields email, host", + TEST_INDEX_BANK)); + verifyOrder( + result, + rows("amberduke@pyrami.com", "pyrami.com"), + rows("hattiebond@netagy.com", "netagy.com"), + rows("nanettebates@quility.com", "quility.com"), + rows("daleadams@boink.com", "boink.com"), + rows("elinorratliff@scentric.com", "scentric.com"), + rows("virginiaayala@filodyne.com", "filodyne.com"), + rows("dillardmcpherson@quailcom.com", "quailcom.com")); + } + + @Test + public void testParseCommandReplaceOriginalField() throws IOException { + JSONObject result = executeQuery( + String.format("source=%s | parse email '.+@(?.+)' | fields email", TEST_INDEX_BANK)); + verifyOrder( + result, + rows("pyrami.com"), + rows("netagy.com"), + rows("quility.com"), + rows("boink.com"), + rows("scentric.com"), + rows("filodyne.com"), + rows("quailcom.com")); + } + + @Test + public void testParseCommandWithOtherRunTimeFields() throws IOException { + JSONObject result = executeQuery(String.format("source=%s | parse email '.+@(?.+)' | " + + "eval eval_result=1 | fields host, eval_result", TEST_INDEX_BANK)); + verifyOrder( + result, + rows("pyrami.com", 1), + rows("netagy.com", 1), + rows("quility.com", 1), + rows("boink.com", 1), + rows("scentric.com", 1), + rows("filodyne.com", 1), + rows("quailcom.com", 1)); + } +}