diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search/230_interval_query.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search/230_interval_query.yml index dc376a16a91c5..93e73764d8e92 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/search/230_interval_query.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search/230_interval_query.yml @@ -25,6 +25,56 @@ setup: - '{"index": {"_index": "test", "_id": "6"}}' - '{"text" : "that is some cold cold rain"}' +--- +"Test regexp": + - skip: + version: " - 1.2.99" + reason: "regexp introduced in 1.3" + - do: + search: + index: test + body: + query: + intervals: + text: + regexp: + pattern: "at[a-z]{2,}here" + - match: { hits.total.value: 1 } + +--- +"Test regexp, explicit case sensitive": + - skip: + version: " - 1.99.99" + reason: "case_insensitive introduced in 2.0" + - do: + search: + index: test + body: + query: + intervals: + text: + regexp: + pattern: "AT[a-z]{2,}HERE" + case_insensitive: false + - match: { hits.total.value: 0 } + +--- +"Test regexp, explicit case insensitive": + - skip: + version: " - 1.99.99" + reason: "case_insensitive introduced in 2.0" + - do: + search: + index: test + body: + query: + intervals: + text: + regexp: + pattern: "AT[a-z]{2,}HERE" + case_insensitive: true + - match: { hits.total.value: 1 } + --- "Test ordered matching with via mode": - skip: diff --git a/server/src/main/java/org/opensearch/index/query/IntervalsSourceProvider.java b/server/src/main/java/org/opensearch/index/query/IntervalsSourceProvider.java index b610883e777a8..ab2e65b3b65c7 100644 --- a/server/src/main/java/org/opensearch/index/query/IntervalsSourceProvider.java +++ b/server/src/main/java/org/opensearch/index/query/IntervalsSourceProvider.java @@ -40,6 +40,7 @@ import org.apache.lucene.search.FuzzyQuery; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.automaton.CompiledAutomaton; +import org.apache.lucene.util.automaton.RegExp; import org.opensearch.LegacyESVersion; import org.opensearch.Version; import org.opensearch.common.ParseField; @@ -687,12 +688,20 @@ public static class Regexp extends IntervalsSourceProvider { private final int flags; private final String useField; private final Integer maxExpansions; - - public Regexp(String pattern, int flags, String useField, Integer maxExpansions) { + private final boolean caseInsensitive; + + /** + * Constructor + * + * {@code flags} is Lucene's syntax flags + * and {@code caseInsensitive} enables Lucene's only matching flag. + */ + public Regexp(String pattern, int flags, String useField, Integer maxExpansions, boolean caseInsensitive) { this.pattern = pattern; this.flags = flags; this.useField = useField; this.maxExpansions = (maxExpansions != null && maxExpansions > 0) ? maxExpansions : null; + this.caseInsensitive = caseInsensitive; } public Regexp(StreamInput in) throws IOException { @@ -700,11 +709,20 @@ public Regexp(StreamInput in) throws IOException { this.flags = in.readVInt(); this.useField = in.readOptionalString(); this.maxExpansions = in.readOptionalVInt(); + if (in.getVersion().onOrAfter(Version.V_2_0_0)) { + this.caseInsensitive = in.readBoolean(); + } else { + this.caseInsensitive = false; + } } @Override public IntervalsSource getSource(QueryShardContext context, MappedFieldType fieldType) { - final org.apache.lucene.util.automaton.RegExp regexp = new org.apache.lucene.util.automaton.RegExp(pattern, flags); + final org.apache.lucene.util.automaton.RegExp regexp = new org.apache.lucene.util.automaton.RegExp( + pattern, + flags, + caseInsensitive ? RegExp.ASCII_CASE_INSENSITIVE : 0 + ); final CompiledAutomaton automaton = new CompiledAutomaton(regexp.toAutomaton()); if (useField != null) { @@ -745,12 +763,13 @@ public boolean equals(Object o) { return Objects.equals(pattern, regexp.pattern) && Objects.equals(flags, regexp.flags) && Objects.equals(useField, regexp.useField) - && Objects.equals(maxExpansions, regexp.maxExpansions); + && Objects.equals(maxExpansions, regexp.maxExpansions) + && Objects.equals(caseInsensitive, regexp.caseInsensitive); } @Override public int hashCode() { - return Objects.hash(pattern, flags, useField, maxExpansions); + return Objects.hash(pattern, flags, useField, maxExpansions, caseInsensitive); } @Override @@ -764,6 +783,9 @@ public void writeTo(StreamOutput out) throws IOException { out.writeVInt(flags); out.writeOptionalString(useField); out.writeOptionalVInt(maxExpansions); + if (out.getVersion().onOrAfter(Version.V_2_0_0)) { + out.writeBoolean(caseInsensitive); + } } @Override @@ -779,6 +801,9 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws if (maxExpansions != null) { builder.field("max_expansions", maxExpansions); } + if (caseInsensitive) { + builder.field("case_insensitive", caseInsensitive); + } builder.endObject(); return builder; } @@ -789,13 +814,14 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws Integer flagsValue = (Integer) args[2]; String useField = (String) args[3]; Integer maxExpansions = (Integer) args[4]; + boolean caseInsensitive = args[5] != null && (boolean) args[5]; if (flagsValue != null) { - return new Regexp(pattern, flagsValue, useField, maxExpansions); + return new Regexp(pattern, flagsValue, useField, maxExpansions, caseInsensitive); } else if (flags != null) { - return new Regexp(pattern, RegexpFlag.resolveValue(flags), useField, maxExpansions); + return new Regexp(pattern, RegexpFlag.resolveValue(flags), useField, maxExpansions, caseInsensitive); } else { - return new Regexp(pattern, DEFAULT_FLAGS_VALUE, useField, maxExpansions); + return new Regexp(pattern, DEFAULT_FLAGS_VALUE, useField, maxExpansions, caseInsensitive); } }); static { @@ -804,6 +830,7 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws PARSER.declareInt(optionalConstructorArg(), new ParseField("flags_value")); PARSER.declareString(optionalConstructorArg(), new ParseField("use_field")); PARSER.declareInt(optionalConstructorArg(), new ParseField("max_expansions")); + PARSER.declareBoolean(optionalConstructorArg(), new ParseField("case_insensitive")); } public static Regexp fromXContent(XContentParser parser) throws IOException { @@ -825,6 +852,10 @@ String getUseField() { Integer getMaxExpansions() { return maxExpansions; } + + boolean isCaseInsensitive() { + return caseInsensitive; + } } public static class Wildcard extends IntervalsSourceProvider { diff --git a/server/src/test/java/org/opensearch/index/query/IntervalQueryBuilderTests.java b/server/src/test/java/org/opensearch/index/query/IntervalQueryBuilderTests.java index 66035c2fbd17e..d7f57eef5e039 100644 --- a/server/src/test/java/org/opensearch/index/query/IntervalQueryBuilderTests.java +++ b/server/src/test/java/org/opensearch/index/query/IntervalQueryBuilderTests.java @@ -846,7 +846,11 @@ public void testWildcard() throws IOException { } private static IntervalsSource buildRegexpSource(String pattern, int flags, Integer maxExpansions) { - final RegExp regexp = new RegExp(pattern, flags); + return buildRegexpSource(pattern, flags, 0, maxExpansions); + } + + private static IntervalsSource buildRegexpSource(String pattern, int flags, int matchFlags, Integer maxExpansions) { + final RegExp regexp = new RegExp(pattern, flags, matchFlags); CompiledAutomaton automaton = new CompiledAutomaton(regexp.toAutomaton()); if (maxExpansions != null) { @@ -922,6 +926,15 @@ public void testRegexp() throws IOException { expected = new IntervalQuery(TEXT_FIELD_NAME, buildRegexpSource("te.m", DEFAULT_FLAGS, 500)); assertEquals(expected, builder.toQuery(createShardContext())); + String regexp_case_insensitive_json = "{ \"intervals\" : { \"" + + TEXT_FIELD_NAME + + "\": { " + + "\"regexp\" : { \"pattern\" : \"TE.M\", \"case_insensitive\" : true } } } }"; + + builder = (IntervalQueryBuilder) parseQuery(regexp_case_insensitive_json); + expected = new IntervalQuery(TEXT_FIELD_NAME, buildRegexpSource("TE.M", DEFAULT_FLAGS, RegExp.ASCII_CASE_INSENSITIVE, null)); + assertEquals(expected, builder.toQuery(createShardContext())); + String regexp_neg_max_expand_json = "{ \"intervals\" : { \"" + TEXT_FIELD_NAME + "\": { " diff --git a/server/src/test/java/org/opensearch/index/query/RegexpIntervalsSourceProviderTests.java b/server/src/test/java/org/opensearch/index/query/RegexpIntervalsSourceProviderTests.java index ba97bdddf52ff..bf6809e5cb446 100644 --- a/server/src/test/java/org/opensearch/index/query/RegexpIntervalsSourceProviderTests.java +++ b/server/src/test/java/org/opensearch/index/query/RegexpIntervalsSourceProviderTests.java @@ -32,7 +32,8 @@ static Regexp createRandomRegexp() { randomAlphaOfLengthBetween(0, 3) + (randomBoolean() ? ".*?" : "." + randomAlphaOfLength(4)) + randomAlphaOfLengthBetween(0, 5), randomBoolean() ? RegexpFlag.resolveValue(randomFrom(FLAGS)) : RegexpFlag.ALL.value(), randomBoolean() ? randomAlphaOfLength(10) : null, - randomBoolean() ? randomIntBetween(-1, Integer.MAX_VALUE) : null + randomBoolean() ? randomIntBetween(-1, Integer.MAX_VALUE) : null, + randomBoolean() ); } @@ -42,7 +43,9 @@ protected Regexp mutateInstance(Regexp instance) throws IOException { int flags = instance.getFlags(); String useField = instance.getUseField(); Integer maxExpansions = instance.getMaxExpansions(); - int ran = between(0, 3); + boolean caseInsensitive = instance.isCaseInsensitive(); + + int ran = between(0, 4); switch (ran) { case 0: pattern += randomBoolean() ? ".*?" : randomAlphaOfLength(5); @@ -56,10 +59,13 @@ protected Regexp mutateInstance(Regexp instance) throws IOException { case 3: maxExpansions = maxExpansions == null ? randomIntBetween(1, Integer.MAX_VALUE) : null; break; + case 4: + caseInsensitive = !caseInsensitive; + break; default: throw new AssertionError("Illegal randomisation branch"); } - return new Regexp(pattern, flags, useField, maxExpansions); + return new Regexp(pattern, flags, useField, maxExpansions, caseInsensitive); } @Override