Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Lucene segment-level fields stats #111123

Merged
merged 10 commits into from
Jul 23, 2024
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions docs/changelog/111123.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 111123
summary: Add Lucene segment-level fields stats
area: Mapping
type: enhancement
issues: []
Original file line number Diff line number Diff line change
Expand Up @@ -509,6 +509,92 @@
- match: { nodes.$node_id.indices.indices.index1.mappings.total_estimated_overhead_in_bytes: 28672 }

---
"Lucene segment level fields stats":

- requires:
cluster_features: ["mapper.segment_level_fields_stats"]
reason: "segment level fields stats"

- do:
indices.create:
index: index1
body:
settings:
number_of_shards: 1
number_of_replicas: 0
routing.rebalance.enable: none
mappings:
runtime:
a_source_field:
type: keyword
properties:
"@timestamp":
type: date
authors:
properties:
age:
type: long
company:
type: text
fields:
keyword:
type: keyword
ignore_above: 256
name:
properties:
first_name:
type: keyword
full_name:
type: text
last_name:
type: keyword
link:
type: alias
path: url
title:
type: text
url:
type: keyword
- do:
search_shards:
index: index1
- set:
shards.0.0.node: node_id

- do:
nodes.stats: { metric: _all, level: "indices", human: true }

- do:
index:
index: index1
body: { "title": "foo", "@timestamp": "2023-10-15T14:12:12" }
- do:
indices.flush:
index: index1
- do:
nodes.stats: { metric: _all, level: "indices", human: true }

- match: { nodes.$node_id.indices.mappings.total_count: 28 }
- match: { nodes.$node_id.indices.mappings.total_estimated_overhead_in_bytes: 28672 }
- match: { nodes.$node_id.indices.mappings.total_segment_fields: 28 }
- match: { nodes.$node_id.indices.mappings.average_fields_per_segment: 28 }

- do:
index:
index: index1
body: { "title": "bar", "@timestamp": "2023-11-15T14:12:12" }
- do:
indices.flush:
index: index1
- do:
nodes.stats: { metric: _all, level: "indices", human: true }

- match: { nodes.$node_id.indices.mappings.total_count: 28 }
- match: { nodes.$node_id.indices.mappings.total_estimated_overhead_in_bytes: 28672 }
- match: { nodes.$node_id.indices.mappings.total_segment_fields: 56 }
- match: { nodes.$node_id.indices.mappings.average_fields_per_segment: 28 }
---

"indices mappings does not exist in shards level":

- requires:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,7 @@ static TransportVersion def(int id) {
public static final TransportVersion ENTERPRISE_GEOIP_DOWNLOADER = def(8_708_00_0);
public static final TransportVersion NODES_STATS_ENUM_SET = def(8_709_00_0);
public static final TransportVersion MASTER_NODE_METRICS = def(8_710_00_0);
public static final TransportVersion SEGMENT_LEVEL_FIELDS_STATS = def(8_711_00_0);

/*
* STOP! READ THIS FIRST! No, really,
Expand Down
16 changes: 12 additions & 4 deletions server/src/main/java/org/elasticsearch/index/IndexService.java
Original file line number Diff line number Diff line change
Expand Up @@ -332,10 +332,18 @@ public NodeMappingStats getNodeMappingStats() {
if (mapperService == null) {
return null;
}
long totalCount = mapperService().mappingLookup().getTotalMapperCount();
long totalEstimatedOverhead = totalCount * 1024L; // 1KiB estimated per mapping
NodeMappingStats indexNodeMappingStats = new NodeMappingStats(totalCount, totalEstimatedOverhead);
return indexNodeMappingStats;
long numFields = mapperService().mappingLookup().getTotalMapperCount();
long totalEstimatedOverhead = numFields * 1024L; // 1KiB estimated per mapping
// Assume all segments have the same mapping; otherwise, we need to acquire searchers to count the actual fields.
int numLeaves = 0;
for (IndexShard shard : shards.values()) {
try {
numLeaves += shard.commitStats().getNumLeaves();
} catch (AlreadyClosedException ignored) {

}
}
return new NodeMappingStats(numFields, totalEstimatedOverhead, numLeaves, numLeaves * numFields);
}

public Set<Integer> shardIds() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
package org.elasticsearch.index.engine;

import org.apache.lucene.index.SegmentInfos;
import org.elasticsearch.TransportVersions;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.io.stream.Writeable;
Expand All @@ -27,6 +28,7 @@ public final class CommitStats implements Writeable, ToXContentFragment {
private final long generation;
private final String id; // lucene commit id in base 64;
private final int numDocs;
private final int numLeaves;

public CommitStats(SegmentInfos segmentInfos) {
// clone the map to protect against concurrent changes
Expand All @@ -35,26 +37,32 @@ public CommitStats(SegmentInfos segmentInfos) {
generation = segmentInfos.getLastGeneration();
id = Base64.getEncoder().encodeToString(segmentInfos.getId());
numDocs = Lucene.getNumDocs(segmentInfos);
numLeaves = segmentInfos.size();
}

CommitStats(StreamInput in) throws IOException {
userData = in.readImmutableMap(StreamInput::readString);
generation = in.readLong();
id = in.readOptionalString();
numDocs = in.readInt();
numLeaves = in.getTransportVersion().onOrAfter(TransportVersions.SEGMENT_LEVEL_FIELDS_STATS) ? in.readVInt() : 0;
}

@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
CommitStats that = (CommitStats) o;
return userData.equals(that.userData) && generation == that.generation && Objects.equals(id, that.id) && numDocs == that.numDocs;
return userData.equals(that.userData)
&& generation == that.generation
&& Objects.equals(id, that.id)
&& numDocs == that.numDocs
&& numLeaves == that.numLeaves;
}

@Override
public int hashCode() {
return Objects.hash(userData, generation, id, numDocs);
return Objects.hash(userData, generation, id, numDocs, numLeaves);
}

public static CommitStats readOptionalCommitStatsFrom(StreamInput in) throws IOException {
Expand All @@ -81,12 +89,19 @@ public int getNumDocs() {
return numDocs;
}

public int getNumLeaves() {
return numLeaves;
}

@Override
public void writeTo(StreamOutput out) throws IOException {
out.writeMap(userData, StreamOutput::writeString);
out.writeLong(generation);
out.writeOptionalString(id);
out.writeInt(numDocs);
if (out.getTransportVersion().onOrAfter(TransportVersions.SEGMENT_LEVEL_FIELDS_STATS)) {
out.writeVInt(numLeaves);
}
}

static final class Fields {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,8 @@ public Set<NodeFeature> getFeatures() {
DenseVectorFieldMapper.BIT_VECTORS,
DocumentMapper.INDEX_SORTING_ON_NESTED,
KeywordFieldMapper.KEYWORD_DIMENSION_IGNORE_ABOVE,
IndexModeFieldMapper.QUERYING_INDEX_MODE
IndexModeFieldMapper.QUERYING_INDEX_MODE,
NodeMappingStats.SEGMENT_LEVEL_FIELDS_STATS
);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,13 @@

package org.elasticsearch.index.mapper;

import org.elasticsearch.TransportVersions;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.io.stream.Writeable;
import org.elasticsearch.common.unit.ByteSizeValue;
import org.elasticsearch.core.Nullable;
import org.elasticsearch.features.NodeFeature;
import org.elasticsearch.xcontent.ToXContentFragment;
import org.elasticsearch.xcontent.XContentBuilder;

Expand All @@ -25,15 +27,21 @@
*/
public class NodeMappingStats implements Writeable, ToXContentFragment {

public static final NodeFeature SEGMENT_LEVEL_FIELDS_STATS = new NodeFeature("mapper.segment_level_fields_stats");

private static final class Fields {
static final String MAPPINGS = "mappings";
static final String TOTAL_COUNT = "total_count";
static final String TOTAL_ESTIMATED_OVERHEAD = "total_estimated_overhead";
static final String TOTAL_ESTIMATED_OVERHEAD_IN_BYTES = "total_estimated_overhead_in_bytes";
static final String TOTAL_SEGMENT_FIELDS = "total_segment_fields";
static final String AVERAGE_FIELDS_PER_SEGMENT = "average_fields_per_segment";
}

private long totalCount;
private long totalEstimatedOverhead;
private long totalSegments;
private long totalSegmentFields;

public NodeMappingStats() {

Expand All @@ -42,17 +50,25 @@ public NodeMappingStats() {
public NodeMappingStats(StreamInput in) throws IOException {
totalCount = in.readVLong();
totalEstimatedOverhead = in.readVLong();
if (in.getTransportVersion().onOrAfter(TransportVersions.SEGMENT_LEVEL_FIELDS_STATS)) {
totalSegments = in.readVLong();
totalSegmentFields = in.readVLong();
}
}

public NodeMappingStats(long totalCount, long totalEstimatedOverhead) {
public NodeMappingStats(long totalCount, long totalEstimatedOverhead, long totalSegments, long totalSegmentFields) {
this.totalCount = totalCount;
this.totalEstimatedOverhead = totalEstimatedOverhead;
this.totalSegments = totalSegments;
this.totalSegmentFields = totalSegmentFields;
}

public void add(@Nullable NodeMappingStats other) {
if (other == null) return;
this.totalCount += other.totalCount;
this.totalEstimatedOverhead += other.totalEstimatedOverhead;
this.totalSegments += other.totalSegments;
this.totalSegmentFields += other.totalSegmentFields;
}

public long getTotalCount() {
Expand All @@ -63,17 +79,31 @@ public ByteSizeValue getTotalEstimatedOverhead() {
return ByteSizeValue.ofBytes(totalEstimatedOverhead);
}

public long getTotalSegments() {
return totalSegments;
}

public long getTotalSegmentFields() {
return totalSegmentFields;
}

@Override
public void writeTo(StreamOutput out) throws IOException {
out.writeVLong(totalCount);
out.writeVLong(totalEstimatedOverhead);
if (out.getTransportVersion().onOrAfter(TransportVersions.SEGMENT_LEVEL_FIELDS_STATS)) {
out.writeVLong(totalSegments);
out.writeVLong(totalSegmentFields);
}
}

@Override
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
builder.startObject(Fields.MAPPINGS);
builder.field(Fields.TOTAL_COUNT, getTotalCount());
builder.humanReadableField(Fields.TOTAL_ESTIMATED_OVERHEAD_IN_BYTES, Fields.TOTAL_ESTIMATED_OVERHEAD, getTotalEstimatedOverhead());
builder.field(Fields.TOTAL_SEGMENT_FIELDS, totalSegmentFields);
builder.field(Fields.AVERAGE_FIELDS_PER_SEGMENT, totalSegments == 0 ? 0 : totalSegmentFields / totalSegments);
builder.endObject();
return builder;
}
Expand All @@ -83,11 +113,14 @@ public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
NodeMappingStats that = (NodeMappingStats) o;
return totalCount == that.totalCount && totalEstimatedOverhead == that.totalEstimatedOverhead;
return totalCount == that.totalCount
&& totalEstimatedOverhead == that.totalEstimatedOverhead
&& totalSegments == that.totalSegments
&& totalSegmentFields == that.totalSegmentFields;
}

@Override
public int hashCode() {
return Objects.hash(totalCount, totalEstimatedOverhead);
return Objects.hash(totalCount, totalEstimatedOverhead, totalSegments, totalSegmentFields);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -561,7 +561,12 @@ private static int expectedChunks(@Nullable NodeIndicesStats nodeIndicesStats, N

private static CommonStats createIndexLevelCommonStats() {
CommonStats stats = new CommonStats(new CommonStatsFlags().clear().set(CommonStatsFlags.Flag.Mappings, true));
stats.nodeMappings = new NodeMappingStats(randomNonNegativeLong(), randomNonNegativeLong());
stats.nodeMappings = new NodeMappingStats(
randomNonNegativeLong(),
randomNonNegativeLong(),
randomNonNegativeLong(),
randomNonNegativeLong()
);
return stats;
}

Expand Down
Loading