Skip to content

Commit

Permalink
Add checks in term and terms queries that input terms are not too long (
Browse files Browse the repository at this point in the history
elastic#99818)

Lucene indexes do not allow terms of greater than 32k bytes long. Any queries that
contain terms that exceed this length will by definition not match anything, and can
cause cluster instability by consuming large amounts of heap. They are also generally
always a user error (for example, a termsquery that concatenates all its inputs into
a single string rather than splitting them into json arrays).

This commit adds some checking to Term and Terms query builders that will throw an
exception if any of their input terms are greater than the maximum allowed length by
the lucene IndexWriter.

Fixes elastic#99802
  • Loading branch information
romseygeek committed Sep 25, 2023
1 parent 06aad5a commit 23099a4
Show file tree
Hide file tree
Showing 6 changed files with 72 additions and 8 deletions.
6 changes: 6 additions & 0 deletions docs/changelog/99818.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
pr: 99818
summary: Add checks in term and terms queries that input terms are not too long
area: Search
type: enhancement
issues:
- 99802
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

package org.elasticsearch.common.lucene;

import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.util.BytesRef;

public class BytesRefs {
Expand Down Expand Up @@ -39,4 +40,34 @@ public static BytesRef toBytesRef(Object value) {
return new BytesRef(value.toString());
}

/**
* Checks that the input is not longer than {@link IndexWriter#MAX_TERM_LENGTH}
* @param input a BytesRef
* @return the same BytesRef, if no exception has been thrown
* @throws IllegalArgumentException if the input is too long
*/
public static BytesRef checkIndexableLength(BytesRef input) {
if (input.length > IndexWriter.MAX_TERM_LENGTH) {
throw new IllegalArgumentException(
"Term is longer than maximum indexable length, term starting with [" + safeStringPrefix(input, 10)
);
}
return input;
}

/**
* Produces a UTF-string prefix of the input BytesRef. If the prefix cutoff would produce
* ill-formed UTF, it falls back to the hexadecimal representation.
* @param input an input BytesRef
* @return a String prefix
*/
private static String safeStringPrefix(BytesRef input, int prefixLength) {
BytesRef prefix = new BytesRef(input.bytes, input.offset, prefixLength);
try {
return prefix.utf8ToString();
} catch (Exception e) {
return prefix.toString();
}
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -211,9 +211,9 @@ public final int hashCode() {
*/
static Object maybeConvertToBytesRef(Object obj) {
if (obj instanceof String) {
return BytesRefs.toBytesRef(obj);
return BytesRefs.checkIndexableLength(BytesRefs.toBytesRef(obj));
} else if (obj instanceof CharBuffer) {
return new BytesRef((CharBuffer) obj);
return BytesRefs.checkIndexableLength(new BytesRef((CharBuffer) obj));
} else if (obj instanceof BigInteger) {
return BytesRefs.toBytesRef(obj);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

package org.elasticsearch.index.query;

import org.apache.lucene.index.IndexWriter;
import org.elasticsearch.common.ParsingException;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.search.SearchModule;
Expand All @@ -22,6 +23,7 @@

import static java.util.Collections.emptyList;
import static org.elasticsearch.index.query.AbstractQueryBuilder.parseInnerQueryBuilder;
import static org.hamcrest.Matchers.containsString;

public class AbstractQueryBuilderTests extends ESTestCase {

Expand Down Expand Up @@ -84,4 +86,10 @@ protected NamedXContentRegistry xContentRegistry() {
return xContentRegistry;
}

public void testMaybeConvertToBytesRefLongTerm() {
String longTerm = "a".repeat(IndexWriter.MAX_TERM_LENGTH + 1);
Exception e = expectThrows(IllegalArgumentException.class, () -> AbstractQueryBuilder.maybeConvertToBytesRef(longTerm));
assertThat(e.getMessage(), containsString("term starting with [aaaaa"));
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

package org.elasticsearch.index.query;

import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.AutomatonQuery;
import org.apache.lucene.search.MatchNoDocsQuery;
Expand All @@ -20,9 +21,11 @@
import org.elasticsearch.xcontent.json.JsonStringEncoder;

import java.io.IOException;
import java.util.Locale;

import static org.hamcrest.CoreMatchers.equalTo;
import static org.hamcrest.CoreMatchers.instanceOf;
import static org.hamcrest.Matchers.containsString;
import static org.hamcrest.Matchers.either;

public class TermQueryBuilderTests extends AbstractTermQueryTestCase<TermQueryBuilder> {
Expand Down Expand Up @@ -227,4 +230,11 @@ public void testMustRewrite() throws IOException {
IllegalStateException e = expectThrows(IllegalStateException.class, () -> queryBuilder.toQuery(context));
assertEquals("Rewrite first", e.getMessage());
}

public void testLongTerm() throws IOException {
String longTerm = "a".repeat(IndexWriter.MAX_TERM_LENGTH + 1);
Exception e = expectThrows(IllegalArgumentException.class, () -> parseQuery(String.format(Locale.ROOT, """
{ "term" : { "foo" : "%s" } }""", longTerm)));
assertThat(e.getMessage(), containsString("term starting with [aaaaa"));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

package org.elasticsearch.index.query;

import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.MatchNoDocsQuery;
Expand Down Expand Up @@ -35,6 +36,7 @@
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Locale;
import java.util.Objects;

import static org.hamcrest.Matchers.containsString;
Expand Down Expand Up @@ -71,7 +73,7 @@ protected TermsQueryBuilder doCreateTestQueryBuilder() {
|| choice.equals(INT_RANGE_FIELD_NAME)
|| choice.equals(DATE_RANGE_FIELD_NAME)
|| choice.equals(DATE_NANOS_FIELD_NAME), // TODO: needs testing for date_nanos type
() -> getRandomFieldName()
AbstractQueryTestCase::getRandomFieldName
);
Object[] values = new Object[randomInt(5)];
for (int i = 0; i < values.length; i++) {
Expand All @@ -95,7 +97,7 @@ private TermsLookup randomTermsLookup() {
protected void doAssertLuceneQuery(TermsQueryBuilder queryBuilder, Query query, SearchExecutionContext context) throws IOException {
if (queryBuilder.termsLookup() == null && (queryBuilder.values() == null || queryBuilder.values().isEmpty())) {
assertThat(query, instanceOf(MatchNoDocsQuery.class));
} else if (queryBuilder.termsLookup() != null && randomTerms.size() == 0) {
} else if (queryBuilder.termsLookup() != null && randomTerms.isEmpty()) {
assertThat(query, instanceOf(MatchNoDocsQuery.class));
} else {
assertThat(
Expand Down Expand Up @@ -138,14 +140,14 @@ protected void doAssertLuceneQuery(TermsQueryBuilder queryBuilder, Query query,
}
}

public void testEmtpyFieldName() {
public void testEmptyFieldName() {
IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> new TermsQueryBuilder(null, "term"));
assertEquals("field name cannot be null.", e.getMessage());
e = expectThrows(IllegalArgumentException.class, () -> new TermsQueryBuilder("", "term"));
assertEquals("field name cannot be null.", e.getMessage());
}

public void testEmtpyTermsLookup() {
public void testEmptyTermsLookup() {
IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> new TermsQueryBuilder("field", (TermsLookup) null));
assertEquals("No value or termsLookup specified for terms query", e.getMessage());
}
Expand Down Expand Up @@ -194,7 +196,7 @@ public GetResponse executeGet(GetRequest getRequest) {
try {
XContentBuilder builder = XContentFactory.jsonBuilder().prettyPrint();
builder.startObject();
builder.array(termsPath, randomTerms.toArray(new Object[randomTerms.size()]));
builder.array(termsPath, randomTerms.toArray(Object[]::new));
builder.endObject();
json = Strings.toString(builder);
} catch (IOException ex) {
Expand Down Expand Up @@ -262,7 +264,7 @@ public void testMustRewrite() throws IOException {
assertEquals("query must be rewritten first", e.getMessage());

// terms lookup removes null values
List<Object> nonNullTerms = randomTerms.stream().filter(x -> x != null).toList();
List<Object> nonNullTerms = randomTerms.stream().filter(Objects::nonNull).toList();
QueryBuilder expected;
if (nonNullTerms.isEmpty()) {
expected = new MatchNoneQueryBuilder();
Expand Down Expand Up @@ -308,6 +310,13 @@ public void testRewriteIndexQueryToNotMatchNone() throws IOException {
}
}

public void testLongTerm() throws IOException {
String longTerm = "a".repeat(IndexWriter.MAX_TERM_LENGTH + 1);
Exception e = expectThrows(IllegalArgumentException.class, () -> parseQuery(String.format(Locale.getDefault(), """
{ "terms" : { "foo" : [ "q", "%s" ] } }""", longTerm)));
assertThat(e.getMessage(), containsString("term starting with [aaaaa"));
}

@Override
protected QueryBuilder parseQuery(XContentParser parser) throws IOException {
QueryBuilder query = super.parseQuery(parser);
Expand Down

0 comments on commit 23099a4

Please sign in to comment.