diff --git a/docs/changelog/99818.yaml b/docs/changelog/99818.yaml new file mode 100644 index 000000000000..8835bcf28e05 --- /dev/null +++ b/docs/changelog/99818.yaml @@ -0,0 +1,6 @@ +pr: 99818 +summary: Add checks in term and terms queries that input terms are not too long +area: Search +type: enhancement +issues: + - 99802 diff --git a/server/src/main/java/org/elasticsearch/common/lucene/BytesRefs.java b/server/src/main/java/org/elasticsearch/common/lucene/BytesRefs.java index 78fd5a76045c..7bae1976327d 100644 --- a/server/src/main/java/org/elasticsearch/common/lucene/BytesRefs.java +++ b/server/src/main/java/org/elasticsearch/common/lucene/BytesRefs.java @@ -8,6 +8,7 @@ package org.elasticsearch.common.lucene; +import org.apache.lucene.index.IndexWriter; import org.apache.lucene.util.BytesRef; public class BytesRefs { @@ -39,4 +40,34 @@ public static BytesRef toBytesRef(Object value) { return new BytesRef(value.toString()); } + /** + * Checks that the input is not longer than {@link IndexWriter#MAX_TERM_LENGTH} + * @param input a BytesRef + * @return the same BytesRef, if no exception has been thrown + * @throws IllegalArgumentException if the input is too long + */ + public static BytesRef checkIndexableLength(BytesRef input) { + if (input.length > IndexWriter.MAX_TERM_LENGTH) { + throw new IllegalArgumentException( + "Term is longer than maximum indexable length, term starting with [" + safeStringPrefix(input, 10) + ); + } + return input; + } + + /** + * Produces a UTF-string prefix of the input BytesRef. If the prefix cutoff would produce + * ill-formed UTF, it falls back to the hexadecimal representation. + * @param input an input BytesRef + * @return a String prefix + */ + private static String safeStringPrefix(BytesRef input, int prefixLength) { + BytesRef prefix = new BytesRef(input.bytes, input.offset, prefixLength); + try { + return prefix.utf8ToString(); + } catch (Exception e) { + return prefix.toString(); + } + } + } diff --git a/server/src/main/java/org/elasticsearch/index/query/AbstractQueryBuilder.java b/server/src/main/java/org/elasticsearch/index/query/AbstractQueryBuilder.java index 1a9fea929a20..cad6fb77b048 100644 --- a/server/src/main/java/org/elasticsearch/index/query/AbstractQueryBuilder.java +++ b/server/src/main/java/org/elasticsearch/index/query/AbstractQueryBuilder.java @@ -211,9 +211,9 @@ public final int hashCode() { */ static Object maybeConvertToBytesRef(Object obj) { if (obj instanceof String) { - return BytesRefs.toBytesRef(obj); + return BytesRefs.checkIndexableLength(BytesRefs.toBytesRef(obj)); } else if (obj instanceof CharBuffer) { - return new BytesRef((CharBuffer) obj); + return BytesRefs.checkIndexableLength(new BytesRef((CharBuffer) obj)); } else if (obj instanceof BigInteger) { return BytesRefs.toBytesRef(obj); } diff --git a/server/src/test/java/org/elasticsearch/index/query/AbstractQueryBuilderTests.java b/server/src/test/java/org/elasticsearch/index/query/AbstractQueryBuilderTests.java index b5e7dd774139..3bf52eacb6df 100644 --- a/server/src/test/java/org/elasticsearch/index/query/AbstractQueryBuilderTests.java +++ b/server/src/test/java/org/elasticsearch/index/query/AbstractQueryBuilderTests.java @@ -8,6 +8,7 @@ package org.elasticsearch.index.query; +import org.apache.lucene.index.IndexWriter; import org.elasticsearch.common.ParsingException; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.search.SearchModule; @@ -22,6 +23,7 @@ import static java.util.Collections.emptyList; import static org.elasticsearch.index.query.AbstractQueryBuilder.parseInnerQueryBuilder; +import static org.hamcrest.Matchers.containsString; public class AbstractQueryBuilderTests extends ESTestCase { @@ -84,4 +86,10 @@ protected NamedXContentRegistry xContentRegistry() { return xContentRegistry; } + public void testMaybeConvertToBytesRefLongTerm() { + String longTerm = "a".repeat(IndexWriter.MAX_TERM_LENGTH + 1); + Exception e = expectThrows(IllegalArgumentException.class, () -> AbstractQueryBuilder.maybeConvertToBytesRef(longTerm)); + assertThat(e.getMessage(), containsString("term starting with [aaaaa")); + } + } diff --git a/server/src/test/java/org/elasticsearch/index/query/TermQueryBuilderTests.java b/server/src/test/java/org/elasticsearch/index/query/TermQueryBuilderTests.java index 364a4f52f0fc..c8bfbe304918 100644 --- a/server/src/test/java/org/elasticsearch/index/query/TermQueryBuilderTests.java +++ b/server/src/test/java/org/elasticsearch/index/query/TermQueryBuilderTests.java @@ -8,6 +8,7 @@ package org.elasticsearch.index.query; +import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.Term; import org.apache.lucene.search.AutomatonQuery; import org.apache.lucene.search.MatchNoDocsQuery; @@ -20,9 +21,11 @@ import org.elasticsearch.xcontent.json.JsonStringEncoder; import java.io.IOException; +import java.util.Locale; import static org.hamcrest.CoreMatchers.equalTo; import static org.hamcrest.CoreMatchers.instanceOf; +import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.either; public class TermQueryBuilderTests extends AbstractTermQueryTestCase { @@ -227,4 +230,11 @@ public void testMustRewrite() throws IOException { IllegalStateException e = expectThrows(IllegalStateException.class, () -> queryBuilder.toQuery(context)); assertEquals("Rewrite first", e.getMessage()); } + + public void testLongTerm() throws IOException { + String longTerm = "a".repeat(IndexWriter.MAX_TERM_LENGTH + 1); + Exception e = expectThrows(IllegalArgumentException.class, () -> parseQuery(String.format(Locale.ROOT, """ + { "term" : { "foo" : "%s" } }""", longTerm))); + assertThat(e.getMessage(), containsString("term starting with [aaaaa")); + } } diff --git a/server/src/test/java/org/elasticsearch/index/query/TermsQueryBuilderTests.java b/server/src/test/java/org/elasticsearch/index/query/TermsQueryBuilderTests.java index 746331be581f..6d43276c7bd2 100644 --- a/server/src/test/java/org/elasticsearch/index/query/TermsQueryBuilderTests.java +++ b/server/src/test/java/org/elasticsearch/index/query/TermsQueryBuilderTests.java @@ -8,6 +8,7 @@ package org.elasticsearch.index.query; +import org.apache.lucene.index.IndexWriter; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.ConstantScoreQuery; import org.apache.lucene.search.MatchNoDocsQuery; @@ -35,6 +36,7 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.List; +import java.util.Locale; import java.util.Objects; import static org.hamcrest.Matchers.containsString; @@ -71,7 +73,7 @@ protected TermsQueryBuilder doCreateTestQueryBuilder() { || choice.equals(INT_RANGE_FIELD_NAME) || choice.equals(DATE_RANGE_FIELD_NAME) || choice.equals(DATE_NANOS_FIELD_NAME), // TODO: needs testing for date_nanos type - () -> getRandomFieldName() + AbstractQueryTestCase::getRandomFieldName ); Object[] values = new Object[randomInt(5)]; for (int i = 0; i < values.length; i++) { @@ -95,7 +97,7 @@ private TermsLookup randomTermsLookup() { protected void doAssertLuceneQuery(TermsQueryBuilder queryBuilder, Query query, SearchExecutionContext context) throws IOException { if (queryBuilder.termsLookup() == null && (queryBuilder.values() == null || queryBuilder.values().isEmpty())) { assertThat(query, instanceOf(MatchNoDocsQuery.class)); - } else if (queryBuilder.termsLookup() != null && randomTerms.size() == 0) { + } else if (queryBuilder.termsLookup() != null && randomTerms.isEmpty()) { assertThat(query, instanceOf(MatchNoDocsQuery.class)); } else { assertThat( @@ -138,14 +140,14 @@ protected void doAssertLuceneQuery(TermsQueryBuilder queryBuilder, Query query, } } - public void testEmtpyFieldName() { + public void testEmptyFieldName() { IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> new TermsQueryBuilder(null, "term")); assertEquals("field name cannot be null.", e.getMessage()); e = expectThrows(IllegalArgumentException.class, () -> new TermsQueryBuilder("", "term")); assertEquals("field name cannot be null.", e.getMessage()); } - public void testEmtpyTermsLookup() { + public void testEmptyTermsLookup() { IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> new TermsQueryBuilder("field", (TermsLookup) null)); assertEquals("No value or termsLookup specified for terms query", e.getMessage()); } @@ -194,7 +196,7 @@ public GetResponse executeGet(GetRequest getRequest) { try { XContentBuilder builder = XContentFactory.jsonBuilder().prettyPrint(); builder.startObject(); - builder.array(termsPath, randomTerms.toArray(new Object[randomTerms.size()])); + builder.array(termsPath, randomTerms.toArray(Object[]::new)); builder.endObject(); json = Strings.toString(builder); } catch (IOException ex) { @@ -262,7 +264,7 @@ public void testMustRewrite() throws IOException { assertEquals("query must be rewritten first", e.getMessage()); // terms lookup removes null values - List nonNullTerms = randomTerms.stream().filter(x -> x != null).toList(); + List nonNullTerms = randomTerms.stream().filter(Objects::nonNull).toList(); QueryBuilder expected; if (nonNullTerms.isEmpty()) { expected = new MatchNoneQueryBuilder(); @@ -308,6 +310,13 @@ public void testRewriteIndexQueryToNotMatchNone() throws IOException { } } + public void testLongTerm() throws IOException { + String longTerm = "a".repeat(IndexWriter.MAX_TERM_LENGTH + 1); + Exception e = expectThrows(IllegalArgumentException.class, () -> parseQuery(String.format(Locale.getDefault(), """ + { "terms" : { "foo" : [ "q", "%s" ] } }""", longTerm))); + assertThat(e.getMessage(), containsString("term starting with [aaaaa")); + } + @Override protected QueryBuilder parseQuery(XContentParser parser) throws IOException { QueryBuilder query = super.parseQuery(parser);