Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Configure IndexSearcher.maxClauseCount() based on Node characteristics #81525

Merged
merged 15 commits into from
Dec 17, 2021
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,13 @@

package org.elasticsearch.search.aggregations.bucket;

import org.apache.lucene.search.IndexSearcher;
import org.elasticsearch.ElasticsearchException;
import org.elasticsearch.action.index.IndexRequestBuilder;
import org.elasticsearch.action.search.SearchPhaseExecutionException;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.query.BoolQueryBuilder;
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.search.SearchModule;
import org.elasticsearch.search.aggregations.InternalAggregation;
import org.elasticsearch.search.aggregations.bucket.adjacency.AdjacencyMatrix;
import org.elasticsearch.search.aggregations.bucket.adjacency.AdjacencyMatrix.Bucket;
Expand Down Expand Up @@ -280,7 +279,7 @@ public void testTooLargeMatrix() throws Exception {

// Create more filters than is permitted by Lucene Bool clause settings.
MapBuilder filtersMap = new MapBuilder();
int maxFilters = SearchModule.INDICES_MAX_CLAUSE_COUNT_SETTING.get(Settings.EMPTY);
int maxFilters = IndexSearcher.getMaxClauseCount();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This number might be super high now, how slow is the test? Maybe we need to set an artificially low limit for this test to keep it reasonable?

for (int i = 0; i <= maxFilters; i++) {
filtersMap.add("tag" + i, termQuery("tag", "tag" + i));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

package org.elasticsearch.search.query;

import org.apache.lucene.search.IndexSearcher;
import org.elasticsearch.ExceptionsHelper;
import org.elasticsearch.action.index.IndexRequestBuilder;
import org.elasticsearch.action.search.SearchResponse;
Expand All @@ -17,12 +18,10 @@
import org.elasticsearch.index.query.QueryStringQueryBuilder;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.SearchHits;
import org.elasticsearch.search.SearchModule;
import org.elasticsearch.test.ESIntegTestCase;
import org.elasticsearch.xcontent.XContentBuilder;
import org.elasticsearch.xcontent.XContentType;
import org.junit.Before;
import org.junit.BeforeClass;

import java.io.IOException;
import java.util.ArrayList;
Expand All @@ -42,28 +41,13 @@

public class QueryStringIT extends ESIntegTestCase {

private static int CLUSTER_MAX_CLAUSE_COUNT;

@BeforeClass
public static void createRandomClusterSetting() {
CLUSTER_MAX_CLAUSE_COUNT = randomIntBetween(50, 100);
}

@Before
public void setup() throws Exception {
String indexBody = copyToStringFromClasspath("/org/elasticsearch/search/query/all-query-index.json");
prepareCreate("test").setSource(indexBody, XContentType.JSON).get();
ensureGreen("test");
}

@Override
protected Settings nodeSettings(int nodeOrdinal, Settings otherSettings) {
return Settings.builder()
.put(super.nodeSettings(nodeOrdinal, otherSettings))
.put(SearchModule.INDICES_MAX_CLAUSE_COUNT_SETTING.getKey(), CLUSTER_MAX_CLAUSE_COUNT)
.build();
}

public void testBasicAllQuery() throws Exception {
List<IndexRequestBuilder> reqs = new ArrayList<>();
reqs.add(client().prepareIndex("test").setId("1").setSource("f1", "foo bar baz"));
Expand Down Expand Up @@ -256,7 +240,7 @@ public void testLimitOnExpandedFieldsButIgnoreUnmappedFields() throws Exception
builder.startObject();
builder.startObject("_doc");
builder.startObject("properties");
for (int i = 0; i < CLUSTER_MAX_CLAUSE_COUNT; i++) {
for (int i = 0; i < IndexSearcher.getMaxClauseCount() - 100; i++) {
builder.startObject("field" + i).field("type", "text").endObject();
}
builder.endObject(); // properties
Expand All @@ -276,14 +260,17 @@ public void testLimitOnExpandedFieldsButIgnoreUnmappedFields() throws Exception
}

public void testLimitOnExpandedFields() throws Exception {

final int maxClauseCount = randomIntBetween(50, 100);

XContentBuilder builder = jsonBuilder();
builder.startObject();
{
builder.startObject("_doc");
{
builder.startObject("properties");
{
for (int i = 0; i < CLUSTER_MAX_CLAUSE_COUNT; i++) {
for (int i = 0; i < maxClauseCount; i++) {
builder.startObject("field_A" + i).field("type", "text").endObject();
builder.startObject("field_B" + i).field("type", "text").endObject();
}
Expand All @@ -296,25 +283,34 @@ public void testLimitOnExpandedFields() throws Exception {

assertAcked(
prepareCreate("testindex").setSettings(
Settings.builder().put(MapperService.INDEX_MAPPING_TOTAL_FIELDS_LIMIT_SETTING.getKey(), CLUSTER_MAX_CLAUSE_COUNT + 100)
Settings.builder().put(MapperService.INDEX_MAPPING_TOTAL_FIELDS_LIMIT_SETTING.getKey(), maxClauseCount + 100)
).setMapping(builder)
);

client().prepareIndex("testindex").setId("1").setSource("field_A0", "foo bar baz").get();
refresh();

// single field shouldn't trigger the limit
doAssertOneHitForQueryString("field_A0:foo");
// expanding to the limit should work
doAssertOneHitForQueryString("field_A\\*:foo");
int originalMaxClauses = IndexSearcher.getMaxClauseCount();
try {

IndexSearcher.setMaxClauseCount(maxClauseCount);

// single field shouldn't trigger the limit
doAssertOneHitForQueryString("field_A0:foo");
// expanding to the limit should work
doAssertOneHitForQueryString("field_A\\*:foo");

// adding a non-existing field on top shouldn't overshoot the limit
doAssertOneHitForQueryString("field_A\\*:foo unmapped:something");
// adding a non-existing field on top shouldn't overshoot the limit
doAssertOneHitForQueryString("field_A\\*:foo unmapped:something");

// the following should exceed the limit
doAssertLimitExceededException("foo", CLUSTER_MAX_CLAUSE_COUNT * 2, "*");
doAssertLimitExceededException("*:foo", CLUSTER_MAX_CLAUSE_COUNT * 2, "*");
doAssertLimitExceededException("field_\\*:foo", CLUSTER_MAX_CLAUSE_COUNT * 2, "field_*");
// the following should exceed the limit
doAssertLimitExceededException("foo", IndexSearcher.getMaxClauseCount() * 2, "*");
doAssertLimitExceededException("*:foo", IndexSearcher.getMaxClauseCount() * 2, "*");
doAssertLimitExceededException("field_\\*:foo", IndexSearcher.getMaxClauseCount() * 2, "field_*");

} finally {
IndexSearcher.setMaxClauseCount(originalMaxClauses);
}
}

private void doAssertOneHitForQueryString(String queryString) {
Expand All @@ -340,7 +336,7 @@ private void doAssertLimitExceededException(String queryString, int exceedingFie
"field expansion for ["
+ inputFieldPattern
+ "] matches too many fields, limit: "
+ CLUSTER_MAX_CLAUSE_COUNT
+ IndexSearcher.getMaxClauseCount()
+ ", got: "
+ exceedingFieldCount
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.search.IndexSearcher;
import org.elasticsearch.ExceptionsHelper;
import org.elasticsearch.action.admin.indices.create.CreateIndexRequestBuilder;
import org.elasticsearch.action.index.IndexRequestBuilder;
Expand All @@ -29,13 +30,11 @@
import org.elasticsearch.plugins.Plugin;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.SearchHits;
import org.elasticsearch.search.SearchModule;
import org.elasticsearch.search.builder.SearchSourceBuilder;
import org.elasticsearch.test.ESIntegTestCase;
import org.elasticsearch.xcontent.XContentBuilder;
import org.elasticsearch.xcontent.XContentFactory;
import org.elasticsearch.xcontent.XContentType;
import org.junit.BeforeClass;

import java.io.IOException;
import java.util.ArrayList;
Expand Down Expand Up @@ -69,21 +68,6 @@
*/
public class SimpleQueryStringIT extends ESIntegTestCase {

private static int CLUSTER_MAX_CLAUSE_COUNT;

@BeforeClass
public static void createRandomClusterSetting() {
CLUSTER_MAX_CLAUSE_COUNT = randomIntBetween(60, 100);
}

@Override
protected Settings nodeSettings(int nodeOrdinal, Settings otherSettings) {
return Settings.builder()
.put(super.nodeSettings(nodeOrdinal, otherSettings))
.put(SearchModule.INDICES_MAX_CLAUSE_COUNT_SETTING.getKey(), CLUSTER_MAX_CLAUSE_COUNT)
.build();
}

@Override
protected Collection<Class<? extends Plugin>> nodePlugins() {
return Collections.singletonList(MockAnalysisPlugin.class);
Expand Down Expand Up @@ -578,11 +562,14 @@ public void testAllFieldsWithSpecifiedLeniency() throws IOException {
}

public void testLimitOnExpandedFields() throws Exception {

final int maxClauseCount = randomIntBetween(50, 100);

XContentBuilder builder = jsonBuilder();
builder.startObject();
builder.startObject("_doc");
builder.startObject("properties");
for (int i = 0; i < CLUSTER_MAX_CLAUSE_COUNT + 1; i++) {
for (int i = 0; i < maxClauseCount + 1; i++) {
builder.startObject("field" + i).field("type", "text").endObject();
}
builder.endObject(); // properties
Expand All @@ -591,15 +578,21 @@ public void testLimitOnExpandedFields() throws Exception {

assertAcked(
prepareCreate("toomanyfields").setSettings(
Settings.builder().put(MapperService.INDEX_MAPPING_TOTAL_FIELDS_LIMIT_SETTING.getKey(), CLUSTER_MAX_CLAUSE_COUNT + 100)
Settings.builder().put(MapperService.INDEX_MAPPING_TOTAL_FIELDS_LIMIT_SETTING.getKey(), maxClauseCount + 100)
).setMapping(builder)
);

client().prepareIndex("toomanyfields").setId("1").setSource("field1", "foo bar baz").get();
refresh();

doAssertLimitExceededException("*", CLUSTER_MAX_CLAUSE_COUNT + 1);
doAssertLimitExceededException("field*", CLUSTER_MAX_CLAUSE_COUNT + 1);
int originalMaxClauses = IndexSearcher.getMaxClauseCount();
try {
IndexSearcher.setMaxClauseCount(maxClauseCount);
doAssertLimitExceededException("*", maxClauseCount + 1);
doAssertLimitExceededException("field*", maxClauseCount + 1);
} finally {
IndexSearcher.setMaxClauseCount(originalMaxClauses);
}
}

private void doAssertLimitExceededException(String field, int exceedingFieldCount) {
Expand All @@ -610,7 +603,9 @@ private void doAssertLimitExceededException(String field, int exceedingFieldCoun
});
assertThat(
ExceptionsHelper.unwrap(e, IllegalArgumentException.class).getMessage(),
containsString("field expansion matches too many fields, limit: " + CLUSTER_MAX_CLAUSE_COUNT + ", got: " + exceedingFieldCount)
containsString(
"field expansion matches too many fields, limit: " + IndexSearcher.getMaxClauseCount() + ", got: " + exceedingFieldCount
)
);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,12 @@

package org.elasticsearch.index.search;

import org.apache.lucene.search.IndexSearcher;
import org.elasticsearch.common.regex.Regex;
import org.elasticsearch.core.Nullable;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.mapper.TextSearchInfo;
import org.elasticsearch.index.query.SearchExecutionContext;
import org.elasticsearch.search.SearchModule;

import java.util.Collection;
import java.util.HashMap;
Expand Down Expand Up @@ -152,7 +152,7 @@ static Map<String, Float> resolveMappingField(
}

static void checkForTooManyFields(int numberOfFields, SearchExecutionContext context, @Nullable String inputPattern) {
Integer limit = SearchModule.INDICES_MAX_CLAUSE_COUNT_SETTING.get(context.getIndexSettings().getSettings());
int limit = IndexSearcher.getMaxClauseCount();
if (numberOfFields > limit) {
StringBuilder errorMsg = new StringBuilder("field expansion ");
if (inputPattern != null) {
Expand Down
2 changes: 2 additions & 0 deletions server/src/main/java/org/elasticsearch/node/Node.java
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,7 @@
import org.elasticsearch.script.ScriptService;
import org.elasticsearch.search.SearchModule;
import org.elasticsearch.search.SearchService;
import org.elasticsearch.search.SearchUtils;
import org.elasticsearch.search.aggregations.support.AggregationUsageService;
import org.elasticsearch.search.fetch.FetchPhase;
import org.elasticsearch.shutdown.PluginShutdownService;
Expand Down Expand Up @@ -472,6 +473,7 @@ protected Node(
final UsageService usageService = new UsageService();

SearchModule searchModule = new SearchModule(settings, pluginsService.filterPlugins(SearchPlugin.class));
SearchUtils.configureMaxClauses(threadPool);
List<NamedWriteableRegistry.Entry> namedWriteables = Stream.of(
NetworkModule.getNamedWriteables().stream(),
IndicesModule.getNamedWriteables().stream(),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@

package org.elasticsearch.search;

import org.apache.lucene.search.BooleanQuery;
import org.elasticsearch.common.CheckedBiConsumer;
import org.elasticsearch.common.NamedRegistry;
import org.elasticsearch.common.io.stream.NamedWriteableRegistry;
Expand Down Expand Up @@ -268,7 +267,8 @@ public class SearchModule {
4096,
1,
Integer.MAX_VALUE,
Setting.Property.NodeScope
Setting.Property.NodeScope,
Setting.Property.DeprecatedWarning
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When would we raise a deprecation warning for node settings, only on startup?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Have confirmed with @pgomulka that this will only be emitted on startup, yes.

);

public static final Setting<Integer> INDICES_MAX_NESTED_DEPTH_SETTING = Setting.intSetting(
Expand All @@ -292,8 +292,6 @@ public class SearchModule {
/**
* Constructs a new SearchModule object
*
* NOTE: This constructor should not be called in production unless an accurate {@link Settings} object is provided.
* When constructed, a static flag is set in Lucene {@link BooleanQuery#setMaxClauseCount} according to the settings.
* @param settings Current settings
* @param plugins List of included {@link SearchPlugin} objects.
*/
Expand Down Expand Up @@ -1057,7 +1055,6 @@ private void registerQueryParsers(List<SearchPlugin> plugins) {
registerQuery(new QuerySpec<>(MatchAllQueryBuilder.NAME, MatchAllQueryBuilder::new, MatchAllQueryBuilder::fromXContent));
registerQuery(new QuerySpec<>(QueryStringQueryBuilder.NAME, QueryStringQueryBuilder::new, QueryStringQueryBuilder::fromXContent));
registerQuery(new QuerySpec<>(BoostingQueryBuilder.NAME, BoostingQueryBuilder::new, BoostingQueryBuilder::fromXContent));
BooleanQuery.setMaxClauseCount(INDICES_MAX_CLAUSE_COUNT_SETTING.get(settings));
registerBoolQuery(new ParseField(BoolQueryBuilder.NAME), BoolQueryBuilder::new);
BoolQueryBuilder.setMaxNestedDepth(INDICES_MAX_NESTED_DEPTH_SETTING.get(settings));
registerQuery(new QuerySpec<>(TermQueryBuilder.NAME, TermQueryBuilder::new, TermQueryBuilder::fromXContent));
Expand Down
36 changes: 36 additions & 0 deletions server/src/main/java/org/elasticsearch/search/SearchUtils.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0 and the Server Side Public License, v 1; you may not use this file except
* in compliance with, at your election, the Elastic License 2.0 or the Server
* Side Public License, v 1.
*/

package org.elasticsearch.search;

import org.apache.lucene.search.IndexSearcher;
import org.elasticsearch.monitor.jvm.JvmStats;
import org.elasticsearch.threadpool.ThreadPool;

public final class SearchUtils {

/**
* Configure the maximum lucene query clause count based on the size of the search thread pool and maximum heap
*/
public static void configureMaxClauses(ThreadPool threadPool) {
int searchThreadPoolSize = threadPool.info(ThreadPool.Names.SEARCH).getMax();
long heapSize = JvmStats.jvmStats().getMem().getHeapMax().getGb();
configureMaxClauses(searchThreadPoolSize, heapSize);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What happens if you start Elasticsearch with 512MB of heap, does it mean you get zero max clauses?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If heapSize is zero then we stick with the default value from lucene, currently 1024. But maybe we should stick with the current ES default of 4096 instead?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Have updated this so that we will always return a minimum of 4096, or larger if the heap/cpu size permits.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would this work if we used the heap size in GB as a fractional number instead?

}

static void configureMaxClauses(long threadPoolSize, long heapInGb) {
if (threadPoolSize <= 0 || heapInGb <= 0) {
return; // If we don't know how to size things, keep the lucene default
}

int maxClauseCount = Math.toIntExact(heapInGb * 65_536 / threadPoolSize);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's add some explanation as to where this 65,536 number comes from?

IndexSearcher.setMaxClauseCount(maxClauseCount);
}

private SearchUtils() {}
}
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,12 @@

package org.elasticsearch.search.aggregations.bucket.adjacency;

import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.index.query.QueryRewriteContext;
import org.elasticsearch.index.query.Rewriteable;
import org.elasticsearch.search.SearchModule;
import org.elasticsearch.search.aggregations.AbstractAggregationBuilder;
import org.elasticsearch.search.aggregations.AggregationBuilder;
import org.elasticsearch.search.aggregations.AggregatorFactories.Builder;
Expand Down Expand Up @@ -204,17 +203,10 @@ protected AdjacencyMatrixAggregationBuilder doRewrite(QueryRewriteContext queryR
@Override
protected AggregatorFactory doBuild(AggregationContext context, AggregatorFactory parent, Builder subFactoriesBuilder)
throws IOException {
int maxFilters = BooleanQuery.getMaxClauseCount();
int maxFilters = IndexSearcher.getMaxClauseCount();
if (filters.size() > maxFilters) {
throw new IllegalArgumentException(
"Number of filters is too large, must be less than or equal to: ["
+ maxFilters
+ "] but was ["
+ filters.size()
+ "]."
+ "This limit can be set by changing the ["
+ SearchModule.INDICES_MAX_CLAUSE_COUNT_SETTING.getKey()
+ "] setting."
"Number of filters is too large, must be less than or equal to: [" + maxFilters + "] but was [" + filters.size() + "]."
);
}
return new AdjacencyMatrixAggregatorFactory(name, filters, separator, context, parent, subFactoriesBuilder, metadata);
Expand Down
Loading