Skip to content

Commit

Permalink
Expose shard field stats (elastic#111525)
Browse files Browse the repository at this point in the history
Previously, we returned the number of segments and the total number of 
fields in those segments in NodeMappingStats (see elastic#111123). However, the
total number of fields returned in that PR might be very inaccurate for
indices having large mappings but only a small number of actual fields.

This change returns a more accurate total number of fields using the 
Lucene FieldInfos from those segments. Since we need to acquire a
searcher to compute this stats, we opt to compute it after a shard is
refreshed and cache the result.

Relates elastic#111123
  • Loading branch information
dnhatn authored Aug 2, 2024
1 parent d0253a6 commit a0480ce
Show file tree
Hide file tree
Showing 3 changed files with 107 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import org.apache.lucene.index.FilterDirectoryReader;
import org.apache.lucene.index.IndexCommit;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.SegmentInfos;
import org.apache.lucene.search.QueryCachingPolicy;
import org.apache.lucene.search.ReferenceManager;
Expand Down Expand Up @@ -224,6 +225,8 @@ public class IndexShard extends AbstractIndexShardComponent implements IndicesCl
private final IndexStorePlugin.SnapshotCommitSupplier snapshotCommitSupplier;
private final Engine.IndexCommitListener indexCommitListener;
private FieldInfos fieldInfos;
private volatile ShardFieldStats shardFieldStats;

// sys prop to disable the field has value feature, defaults to true (enabled) if set to false (disabled) the
// field caps always returns empty fields ignoring the value of the query param `field_caps_empty_fields_filter`.
private final boolean enableFieldHasValue = Booleans.parseBoolean(
Expand Down Expand Up @@ -3489,7 +3492,7 @@ private EngineConfig newEngineConfig(LongSupplier globalCheckpointSupplier) {
cachingPolicy,
translogConfig,
IndexingMemoryController.SHARD_INACTIVE_TIME_SETTING.get(indexSettings.getSettings()),
List.of(refreshListeners, refreshPendingLocationListener, refreshFieldHasValueListener),
List.of(refreshListeners, refreshPendingLocationListener, refreshFieldHasValueListener, new RefreshShardFieldStatsListener()),
Collections.singletonList(new RefreshMetricUpdater(refreshMetric)),
indexSort,
circuitBreakerService,
Expand Down Expand Up @@ -4060,6 +4063,38 @@ public void afterRefresh(boolean didRefresh) {
}
}

/**
* Returns the shard-level field stats, which includes the number of segments in the latest NRT reader of this shard
* and the total number of fields across those segments.
*/
public ShardFieldStats getShardFieldStats() {
return shardFieldStats;
}

private class RefreshShardFieldStatsListener implements ReferenceManager.RefreshListener {
@Override
public void beforeRefresh() {

}

@Override
public void afterRefresh(boolean didRefresh) {
if (shardFieldStats == null || didRefresh) {
try (var searcher = getEngine().acquireSearcher("shard_field_stats", Engine.SearcherScope.INTERNAL)) {
int numSegments = 0;
int totalFields = 0;
for (LeafReaderContext leaf : searcher.getLeafContexts()) {
numSegments++;
totalFields += leaf.reader().getFieldInfos().size();
}
shardFieldStats = new ShardFieldStats(numSegments, totalFields);
} catch (AlreadyClosedException ignored) {

}
}
}
}

/**
* Ensures this shard is search active before invoking the provided listener.
* <p>
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0 and the Server Side Public License, v 1; you may not use this file except
* in compliance with, at your election, the Elastic License 2.0 or the Server
* Side Public License, v 1.
*/

package org.elasticsearch.index.shard;

/**
* A per shard stats including the number of segments and total fields across those segments.
* These stats should be recomputed whenever the shard is refreshed.
*
* @param numSegments the number of segments
* @param totalFields the total number of fields across the segments
*/
public record ShardFieldStats(int numSegments, int totalFields) {

}
Original file line number Diff line number Diff line change
Expand Up @@ -1784,6 +1784,57 @@ public void testExternalRefreshMetric() throws IOException {
closeShards(shard);
}

public void testShardFieldStats() throws IOException {
Settings settings = Settings.builder().put(IndexSettings.INDEX_REFRESH_INTERVAL_SETTING.getKey(), TimeValue.MINUS_ONE).build();
IndexShard shard = newShard(true, settings);
assertNull(shard.getShardFieldStats());
recoverShardFromStore(shard);
ShardFieldStats stats = shard.getShardFieldStats();
assertNotNull(stats);
assertThat(stats.numSegments(), equalTo(0));
assertThat(stats.totalFields(), equalTo(0));
// index some documents
int numDocs = between(1, 10);
for (int i = 0; i < numDocs; i++) {
indexDoc(shard, "_doc", "first_" + i, """
{
"f1": "foo",
"f2": "bar"
}
""");
}
assertThat(shard.getShardFieldStats(), sameInstance(stats));
shard.refresh("test");
stats = shard.getShardFieldStats();
assertThat(stats.numSegments(), equalTo(1));
// _id, _source, _version, _primary_term, _seq_no, f1, f1.keyword, f2, f2.keyword,
assertThat(stats.totalFields(), equalTo(9));
// don't re-compute on refresh without change
shard.refresh("test");
assertThat(shard.getShardFieldStats(), sameInstance(stats));
// index more docs
numDocs = between(1, 10);
for (int i = 0; i < numDocs; i++) {
indexDoc(shard, "_doc", "first_" + i, """
{
"f1": "foo",
"f2": "bar",
"f3": "foobar"
}
""");
}
shard.refresh("test");
stats = shard.getShardFieldStats();
assertThat(stats.numSegments(), equalTo(2));
// 9 + _id, _source, _version, _primary_term, _seq_no, f1, f1.keyword, f2, f2.keyword, f3, f3.keyword
assertThat(stats.totalFields(), equalTo(21));
shard.forceMerge(new ForceMergeRequest().maxNumSegments(1).flush(true));
stats = shard.getShardFieldStats();
assertThat(stats.numSegments(), equalTo(1));
assertThat(stats.totalFields(), equalTo(12));
closeShards(shard);
}

public void testIndexingOperationsListeners() throws IOException {
IndexShard shard = newStartedShard(true);
indexDoc(shard, "_doc", "0", "{\"foo\" : \"bar\"}");
Expand Down

0 comments on commit a0480ce

Please sign in to comment.