Skip to content

Commit

Permalink
[ES|QL] Combine Disjunctive CIDRMatch (#111501)
Browse files Browse the repository at this point in the history
* support CIDRMatch in CombineDisjunctions
  • Loading branch information
fang-xing-esql authored Aug 8, 2024
1 parent f90dc31 commit 9c6a2e0
Show file tree
Hide file tree
Showing 11 changed files with 605 additions and 286 deletions.
6 changes: 6 additions & 0 deletions docs/changelog/111501.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
pr: 111501
summary: "[ES|QL] Combine Disjunctive CIDRMatch"
area: ES|QL
type: enhancement
issues:
- 105143
Original file line number Diff line number Diff line change
Expand Up @@ -260,8 +260,12 @@ private static Query boolQuery(Source source, Query left, Query right, boolean i
}
List<Query> queries;
// check if either side is already a bool query to an extra bool query
if (left instanceof BoolQuery bool && bool.isAnd() == isAnd) {
queries = CollectionUtils.combine(bool.queries(), right);
if (left instanceof BoolQuery leftBool && leftBool.isAnd() == isAnd) {
if (right instanceof BoolQuery rightBool && rightBool.isAnd() == isAnd) {
queries = CollectionUtils.combine(leftBool.queries(), rightBool.queries());
} else {
queries = CollectionUtils.combine(leftBool.queries(), right);
}
} else if (right instanceof BoolQuery bool && bool.isAnd() == isAnd) {
queries = CollectionUtils.combine(bool.queries(), left);
} else {
Expand Down
72 changes: 72 additions & 0 deletions x-pack/plugin/esql/qa/testFixtures/src/main/resources/ip.csv-spec
Original file line number Diff line number Diff line change
Expand Up @@ -282,6 +282,78 @@ str1:keyword |str2:keyword |ip1:ip |ip2:ip
// end::to_ip-result[]
;

cdirMatchOrsIPs
required_capability: combine_disjunctive_cidrmatches

FROM hosts
| WHERE CIDR_MATCH(ip1, "127.0.0.2/32") or CIDR_MATCH(ip0, "127.0.0.1") or CIDR_MATCH(ip1, "127.0.0.3/32") or CIDR_MATCH(ip0, "fe80::cae2:65ff:fece:feb9")
| KEEP card, host, ip0, ip1
| sort host, card, ip0, ip1
;
warning:Line 2:20: evaluation of [ip1] failed, treating result as null. Only first 20 failures recorded.
warning:Line 2:20: java.lang.IllegalArgumentException: single-value function encountered multi-value
warning:Line 2:55: evaluation of [ip0] failed, treating result as null. Only first 20 failures recorded.
warning:Line 2:55: java.lang.IllegalArgumentException: single-value function encountered multi-value

card:keyword |host:keyword |ip0:ip |ip1:ip
eth0 |alpha |127.0.0.1 |127.0.0.1
eth0 |beta |127.0.0.1 |::1
eth1 |beta |127.0.0.1 |127.0.0.2
eth1 |beta |127.0.0.1 |128.0.0.1
eth0 |gamma |fe80::cae2:65ff:fece:feb9|127.0.0.3
lo0 |gamma |fe80::cae2:65ff:fece:feb9|fe81::cae2:65ff:fece:feb9
;

cdirMatchEqualsInsOrs
required_capability: combine_disjunctive_cidrmatches

FROM hosts
| WHERE host == "alpha" OR host == "gamma" OR CIDR_MATCH(ip1, "127.0.0.2/32") OR CIDR_MATCH(ip0, "127.0.0.1") OR card IN ("eth0", "eth1") OR CIDR_MATCH(ip1, "127.0.0.3/32") OR card == "lo0" OR CIDR_MATCH(ip0, "fe80::cae2:65ff:fece:feb9") OR host == "beta"
| KEEP card, host, ip0, ip1
| sort host, card, ip0, ip1
;
warning:Line 2:58: evaluation of [ip1] failed, treating result as null. Only first 20 failures recorded.
warning:Line 2:58: java.lang.IllegalArgumentException: single-value function encountered multi-value
warning:Line 2:93: evaluation of [ip0] failed, treating result as null. Only first 20 failures recorded.
warning:Line 2:93: java.lang.IllegalArgumentException: single-value function encountered multi-value

card:keyword |host:keyword |ip0:ip |ip1:ip
eth0 |alpha |127.0.0.1 |127.0.0.1
eth1 |alpha |::1 |::1
eth0 |beta |127.0.0.1 |::1
eth1 |beta |127.0.0.1 |127.0.0.2
eth1 |beta |127.0.0.1 |128.0.0.1
eth0 |epsilon |[fe80::cae2:65ff:fece:feb9, fe80::cae2:65ff:fece:fec0, fe80::cae2:65ff:fece:fec1] |fe80::cae2:65ff:fece:fec1
eth1 |epsilon |null |[127.0.0.1, 127.0.0.2, 127.0.0.3]
eth0 |gamma |fe80::cae2:65ff:fece:feb9 |127.0.0.3
lo0 |gamma |fe80::cae2:65ff:fece:feb9 |fe81::cae2:65ff:fece:feb9
;

cdirMatchEqualsInsOrsIPs
required_capability: combine_disjunctive_cidrmatches

FROM hosts
| WHERE host == "alpha" OR host == "gamma" OR CIDR_MATCH(ip1, "127.0.0.2/32") OR ip0 == "127.0.0.1" OR card IN ("eth0", "eth1") OR ip1 IN ("127.0.0.3", "127.0.0.1") OR card == "lo0" OR CIDR_MATCH(ip0, "fe80::cae2:65ff:fece:feb9") OR host == "beta"
| KEEP card, host, ip0, ip1
| sort host, card, ip0, ip1
;
warning:Line 2:58: evaluation of [ip1] failed, treating result as null. Only first 20 failures recorded.
warning:Line 2:58: java.lang.IllegalArgumentException: single-value function encountered multi-value
warning:Line 2:82: evaluation of [ip0] failed, treating result as null. Only first 20 failures recorded.
warning:Line 2:82: java.lang.IllegalArgumentException: single-value function encountered multi-value

card:keyword |host:keyword |ip0:ip |ip1:ip
eth0 |alpha |127.0.0.1 |127.0.0.1
eth1 |alpha |::1 |::1
eth0 |beta |127.0.0.1 |::1
eth1 |beta |127.0.0.1 |127.0.0.2
eth1 |beta |127.0.0.1 |128.0.0.1
eth0 |epsilon |[fe80::cae2:65ff:fece:feb9, fe80::cae2:65ff:fece:fec0, fe80::cae2:65ff:fece:fec1] |fe80::cae2:65ff:fece:fec1
eth1 |epsilon |null |[127.0.0.1, 127.0.0.2, 127.0.0.3]
eth0 |gamma |fe80::cae2:65ff:fece:feb9 |127.0.0.3
lo0 |gamma |fe80::cae2:65ff:fece:feb9 |fe81::cae2:65ff:fece:feb9
;

pushDownIP
from hosts | where ip1 == to_ip("::1") | keep card, host, ip0, ip1;
ignoreOrder:true
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,12 @@ public enum Cap {
/**
* Support for nanosecond dates as a data type
*/
DATE_NANOS_TYPE(EsqlCorePlugin.DATE_NANOS_FEATURE_FLAG);
DATE_NANOS_TYPE(EsqlCorePlugin.DATE_NANOS_FEATURE_FLAG),

/**
* Support CIDRMatch in CombineDisjunctions rule.
*/
COMBINE_DISJUNCTIVE_CIDRMATCHES;

private final boolean snapshotOnly;
private final FeatureFlag featureFlag;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
import org.elasticsearch.xpack.esql.optimizer.rules.BooleanFunctionEqualsElimination;
import org.elasticsearch.xpack.esql.optimizer.rules.BooleanSimplification;
import org.elasticsearch.xpack.esql.optimizer.rules.CombineBinaryComparisons;
import org.elasticsearch.xpack.esql.optimizer.rules.CombineDisjunctionsToIn;
import org.elasticsearch.xpack.esql.optimizer.rules.CombineDisjunctions;
import org.elasticsearch.xpack.esql.optimizer.rules.CombineEvals;
import org.elasticsearch.xpack.esql.optimizer.rules.CombineProjections;
import org.elasticsearch.xpack.esql.optimizer.rules.ConstantFolding;
Expand Down Expand Up @@ -209,7 +209,7 @@ protected static Batch<LogicalPlan> operators() {
new PropagateNullable(),
new BooleanFunctionEqualsElimination(),
new CombineBinaryComparisons(),
new CombineDisjunctionsToIn(),
new CombineDisjunctions(),
new SimplifyComparisonsArithmetics(DataType::areCompatible),
// prune/elimination
new PruneFilters(),
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

package org.elasticsearch.xpack.esql.optimizer.rules;

import org.apache.lucene.util.BytesRef;
import org.elasticsearch.xpack.esql.core.expression.Expression;
import org.elasticsearch.xpack.esql.core.expression.Literal;
import org.elasticsearch.xpack.esql.core.expression.predicate.logical.Or;
import org.elasticsearch.xpack.esql.core.tree.Source;
import org.elasticsearch.xpack.esql.core.type.DataType;
import org.elasticsearch.xpack.esql.expression.function.scalar.ip.CIDRMatch;
import org.elasticsearch.xpack.esql.expression.predicate.operator.comparison.Equals;
import org.elasticsearch.xpack.esql.expression.predicate.operator.comparison.In;

import java.time.ZoneId;
import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;

import static org.elasticsearch.xpack.esql.core.expression.predicate.Predicates.combineOr;
import static org.elasticsearch.xpack.esql.core.expression.predicate.Predicates.splitOr;
import static org.elasticsearch.xpack.esql.type.EsqlDataTypeConverter.ipToString;

/**
* Combine disjunctive Equals, In or CIDRMatch expressions on the same field into an In or CIDRMatch expression.
* This rule looks for both simple equalities:
* 1. a == 1 OR a == 2 becomes a IN (1, 2)
* and combinations of In
* 2. a == 1 OR a IN (2) becomes a IN (1, 2)
* 3. a IN (1) OR a IN (2) becomes a IN (1, 2)
* and combinations of CIDRMatch
* 4. CIDRMatch(a, ip1) OR CIDRMatch(a, ip2) OR a == ip3 or a IN (ip4, ip5) becomes CIDRMatch(a, ip1, ip2, ip3, ip4, ip5)
* <p>
* This rule does NOT check for type compatibility as that phase has been
* already be verified in the analyzer.
*/
public final class CombineDisjunctions extends OptimizerRules.OptimizerExpressionRule<Or> {
public CombineDisjunctions() {
super(OptimizerRules.TransformDirection.UP);
}

protected static In createIn(Expression key, List<Expression> values, ZoneId zoneId) {
return new In(key.source(), key, values);
}

protected static Equals createEquals(Expression k, Set<Expression> v, ZoneId finalZoneId) {
return new Equals(k.source(), k, v.iterator().next(), finalZoneId);
}

protected static CIDRMatch createCIDRMatch(Expression k, List<Expression> v) {
return new CIDRMatch(k.source(), k, v);
}

@Override
public Expression rule(Or or) {
Expression e = or;
// look only at equals, In and CIDRMatch
List<Expression> exps = splitOr(e);

Map<Expression, Set<Expression>> ins = new LinkedHashMap<>();
Map<Expression, Set<Expression>> cidrs = new LinkedHashMap<>();
Map<Expression, Set<Expression>> ips = new LinkedHashMap<>();
ZoneId zoneId = null;
List<Expression> ors = new LinkedList<>();
boolean changed = false;
for (Expression exp : exps) {
if (exp instanceof Equals eq) {
// consider only equals against foldables
if (eq.right().foldable()) {
ins.computeIfAbsent(eq.left(), k -> new LinkedHashSet<>()).add(eq.right());
if (eq.left().dataType() == DataType.IP) {
Object value = eq.right().fold();
// ImplicitCasting and ConstantFolding(includes explicit casting) are applied before CombineDisjunctions.
// They fold the input IP string to an internal IP format. These happen to Equals and IN, but not for CIDRMatch,
// as CIDRMatch takes strings as input, ImplicitCasting does not apply to it, and the first input to CIDRMatch is a
// field, ConstantFolding does not apply to it either.
// If the data type is IP, convert the internal IP format in Equals and IN to the format that is compatible with
// CIDRMatch, and store them in a separate map, so that they can be combined into existing CIDRMatch later.
if (value instanceof BytesRef bytesRef) {
value = ipToString(bytesRef);
}
ips.computeIfAbsent(eq.left(), k -> new LinkedHashSet<>()).add(new Literal(Source.EMPTY, value, DataType.IP));
}
} else {
ors.add(exp);
}
if (zoneId == null) {
zoneId = eq.zoneId();
}
} else if (exp instanceof In in) {
ins.computeIfAbsent(in.value(), k -> new LinkedHashSet<>()).addAll(in.list());
if (in.value().dataType() == DataType.IP) {
List<Expression> values = new ArrayList<>(in.list().size());
for (Expression i : in.list()) {
Object value = i.fold();
// Same as Equals.
if (value instanceof BytesRef bytesRef) {
value = ipToString(bytesRef);
}
values.add(new Literal(Source.EMPTY, value, DataType.IP));
}
ips.computeIfAbsent(in.value(), k -> new LinkedHashSet<>()).addAll(values);
}
} else if (exp instanceof CIDRMatch cm) {
cidrs.computeIfAbsent(cm.ipField(), k -> new LinkedHashSet<>()).addAll(cm.matches());
} else {
ors.add(exp);
}
}

if (cidrs.isEmpty() == false) {
for (Expression f : ips.keySet()) {
cidrs.computeIfAbsent(f, k -> new LinkedHashSet<>()).addAll(ips.get(f));
ins.remove(f);
}
}

if (ins.isEmpty() == false) {
// combine equals alongside the existing ors
final ZoneId finalZoneId = zoneId;
ins.forEach(
(k, v) -> { ors.add(v.size() == 1 ? createEquals(k, v, finalZoneId) : createIn(k, new ArrayList<>(v), finalZoneId)); }
);

changed = true;
}

if (cidrs.isEmpty() == false) {
cidrs.forEach((k, v) -> { ors.add(createCIDRMatch(k, new ArrayList<>(v))); });
changed = true;
}

if (changed) {
// TODO: this makes a QL `or`, not an ESQL `or`
Expression combineOr = combineOr(ors);
// check the result semantically since the result might different in order
// but be actually the same which can trigger a loop
// e.g. a == 1 OR a == 2 OR null --> null OR a in (1,2) --> literalsOnTheRight --> cycle
if (e.semanticEquals(combineOr) == false) {
e = combineOr;
}
}

return e;
}
}

This file was deleted.

Loading

0 comments on commit 9c6a2e0

Please sign in to comment.