Skip to content

Commit

Permalink
ESQL: Rewrite TO_UPPER/TO_LOWER comparisons (#118870)
Browse files Browse the repository at this point in the history
This adds an optimization rule to rewrite TO_UPPER/TO_LOWER comparisons
against a string into an InsensitiveEquals comparison. The rewrite can
also result right away into a TRUE/FALSE, in case the string doesn't
match the caseness of the function.

This also allows later pushing down the predicate to lucene as a
case-insensitive term-query.

Fixes #118304.
  • Loading branch information
bpintea authored Dec 23, 2024
1 parent 22990df commit d521f89
Show file tree
Hide file tree
Showing 14 changed files with 735 additions and 264 deletions.
6 changes: 6 additions & 0 deletions docs/changelog/118870.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
pr: 118870
summary: Rewrite TO_UPPER/TO_LOWER comparisons
area: ES|QL
type: enhancement
issues:
- 118304
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
import org.elasticsearch.xpack.esql.core.type.DataType;
import org.elasticsearch.xpack.esql.core.type.EsField;

import java.util.regex.Pattern;

import static java.util.Collections.emptyMap;
import static org.elasticsearch.test.ESTestCase.randomAlphaOfLength;
import static org.elasticsearch.test.ESTestCase.randomBoolean;
Expand All @@ -26,6 +28,8 @@
public final class TestUtils {
private TestUtils() {}

private static final Pattern WS_PATTERN = Pattern.compile("\\s");

public static Literal of(Object value) {
return of(Source.EMPTY, value);
}
Expand Down Expand Up @@ -59,4 +63,9 @@ public static FieldAttribute getFieldAttribute(String name) {
public static FieldAttribute getFieldAttribute(String name, DataType dataType) {
return new FieldAttribute(EMPTY, name, new EsField(name + "f", dataType, emptyMap(), true));
}

/** Similar to {@link String#strip()}, but removes the WS throughout the entire string. */
public static String stripThrough(String input) {
return WS_PATTERN.matcher(input).replaceAll(StringUtils.EMPTY);
}
}
183 changes: 183 additions & 0 deletions x-pack/plugin/esql/qa/testFixtures/src/main/resources/string.csv-spec
Original file line number Diff line number Diff line change
Expand Up @@ -1231,6 +1231,189 @@ a:keyword | upper:keyword | lower:keyword
π/2 + a + B + Λ ºC | Π/2 + A + B + Λ ºC | π/2 + a + b + λ ºc
;

equalsToUpperPushedDown[skip:-8.12.99, reason:case insensitive operators implemented in v 8.13]
from employees
| where to_upper(first_name) == "GEORGI"
| keep emp_no, first_name
;

emp_no:integer | first_name:keyword
10001 | Georgi
;

equalsToUpperNestedPushedDown[skip:-8.12.99, reason:case insensitive operators implemented in v 8.13]
from employees
| where to_upper(to_upper(to_lower(first_name))) == "GEORGI"
| keep emp_no, first_name
;

emp_no:integer | first_name:keyword
10001 | Georgi
;

negatedEqualsToUpperPushedDown[skip:-8.12.99, reason:case insensitive operators implemented in v 8.13]
from employees
| sort emp_no
| where not(to_upper(first_name) == "GEORGI")
| keep emp_no, first_name
| limit 1
;

emp_no:integer | first_name:keyword
10002 | Bezalel
;

notEqualsToUpperPushedDown[skip:-8.12.99, reason:case insensitive operators implemented in v 8.13]
from employees
| sort emp_no
| where to_upper(first_name) != "GEORGI"
| keep emp_no, first_name
| limit 1
;

emp_no:integer | first_name:keyword
10002 | Bezalel
;

negatedNotEqualsToUpperPushedDown[skip:-8.12.99, reason:case insensitive operators implemented in v 8.13]
from employees
| sort emp_no
| where not(to_upper(first_name) != "GEORGI")
| keep emp_no, first_name
| limit 1
;

emp_no:integer | first_name:keyword
10001 | Georgi
;

equalsToUpperFolded
from employees
| where to_upper(first_name) == "Georgi"
| keep emp_no, first_name
;

emp_no:integer | first_name:keyword
;

negatedEqualsToUpperFolded
from employees
| where not(to_upper(first_name) == "Georgi")
| stats c = count()
;

c:long
90
;

equalsToUpperNullFolded
from employees
| where to_upper(null) == "Georgi"
| keep emp_no, first_name
;

emp_no:integer | first_name:keyword
;

equalsNullToUpperFolded
from employees
| where to_upper(first_name) == null::keyword
| keep emp_no, first_name
;

emp_no:integer | first_name:keyword
;

notEqualsToUpperNullFolded
from employees
| where to_upper(null) != "Georgi"
| keep emp_no, first_name
;

emp_no:integer | first_name:keyword
;

notEqualsNullToUpperFolded
from employees
| where to_upper(first_name) != null::keyword
| keep emp_no, first_name
;

emp_no:integer | first_name:keyword
;

notEqualsToUpperFolded
from employees
| where to_upper(first_name) != "Georgi"
| stats c = count()
;

c:long
90
;

negatedNotEqualsToUpperFolded
from employees
| where not(to_upper(first_name) != "Georgi")
| stats c = count()
;

c:long
0
;

equalsToLowerPushedDown[skip:-8.12.99, reason:case insensitive operators implemented in v 8.13]
from employees
| where to_lower(first_name) == "georgi"
| keep emp_no, first_name
;

emp_no:integer | first_name:keyword
10001 | Georgi
;

notEqualsToLowerPushedDown[skip:-8.12.99, reason:case insensitive operators implemented in v 8.13]
from employees
| sort emp_no
| where to_lower(first_name) != "georgi"
| keep emp_no, first_name
| limit 1
;

emp_no:integer | first_name:keyword
10002 | Bezalel
;

equalsToLowerFolded
from employees
| where to_lower(first_name) == "Georgi"
| keep emp_no, first_name
;

emp_no:integer | first_name:keyword
;

notEqualsToLowerFolded
from employees
| where to_lower(first_name) != "Georgi"
| stats c = count()
;

c:long
90
;

equalsToLowerWithUnico(rn|d)s
from employees
| where to_lower(concat(first_name, "🦄🦄")) != "georgi🦄🦄"
| stats c = count()
;

// 10 null first names
c:long
89
;

reverse
required_capability: fn_reverse
from employees | sort emp_no | eval name_reversed = REVERSE(first_name) | keep emp_no, first_name, name_reversed | limit 1;
Expand Down

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit d521f89

Please sign in to comment.