diff --git a/docs/changelog/111123.yaml b/docs/changelog/111123.yaml new file mode 100644 index 0000000000000..605b8607f4082 --- /dev/null +++ b/docs/changelog/111123.yaml @@ -0,0 +1,5 @@ +pr: 111123 +summary: Add Lucene segment-level fields stats +area: Mapping +type: enhancement +issues: [] diff --git a/docs/reference/cluster/nodes-stats.asciidoc b/docs/reference/cluster/nodes-stats.asciidoc index 084ff471367ce..f188a5f2ddf04 100644 --- a/docs/reference/cluster/nodes-stats.asciidoc +++ b/docs/reference/cluster/nodes-stats.asciidoc @@ -808,6 +808,14 @@ This is not shown for the `shards` level, since mappings may be shared across th `total_estimated_overhead_in_bytes`:: (integer) Estimated heap overhead, in bytes, of mappings on this node, which allows for 1kiB of heap for every mapped field. +`total_segments`:: +(integer) Estimated number of Lucene segments on this node + +`total_segment_fields`:: +(integer) Estimated number of fields at the segment level on this node + +`average_fields_per_segment`:: +(integer) Estimated average number of fields per segment on this node ======= `dense_vector`:: diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/nodes.stats/11_indices_metrics.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/nodes.stats/11_indices_metrics.yml index 35089cc4c85a7..9948fbb04a6c7 100644 --- a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/nodes.stats/11_indices_metrics.yml +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/nodes.stats/11_indices_metrics.yml @@ -509,6 +509,94 @@ - match: { nodes.$node_id.indices.indices.index1.mappings.total_estimated_overhead_in_bytes: 28672 } --- +"Lucene segment level fields stats": + + - requires: + cluster_features: ["mapper.segment_level_fields_stats"] + reason: "segment level fields stats" + + - do: + indices.create: + index: index1 + body: + settings: + number_of_shards: 1 + number_of_replicas: 0 + routing.rebalance.enable: none + mappings: + runtime: + a_source_field: + type: keyword + properties: + "@timestamp": + type: date + authors: + properties: + age: + type: long + company: + type: text + fields: + keyword: + type: keyword + ignore_above: 256 + name: + properties: + first_name: + type: keyword + full_name: + type: text + last_name: + type: keyword + link: + type: alias + path: url + title: + type: text + url: + type: keyword + - do: + search_shards: + index: index1 + - set: + shards.0.0.node: node_id + + - do: + nodes.stats: { metric: _all, level: "indices", human: true } + + - do: + index: + index: index1 + body: { "title": "foo", "@timestamp": "2023-10-15T14:12:12" } + - do: + indices.flush: + index: index1 + - do: + nodes.stats: { metric: _all, level: "indices", human: true } + + - match: { nodes.$node_id.indices.mappings.total_count: 28 } + - match: { nodes.$node_id.indices.mappings.total_estimated_overhead_in_bytes: 28672 } + - match: { nodes.$node_id.indices.mappings.total_segments: 1 } + - match: { nodes.$node_id.indices.mappings.total_segment_fields: 28 } + - match: { nodes.$node_id.indices.mappings.average_fields_per_segment: 28 } + + - do: + index: + index: index1 + body: { "title": "bar", "@timestamp": "2023-11-15T14:12:12" } + - do: + indices.flush: + index: index1 + - do: + nodes.stats: { metric: _all, level: "indices", human: true } + + - match: { nodes.$node_id.indices.mappings.total_count: 28 } + - match: { nodes.$node_id.indices.mappings.total_estimated_overhead_in_bytes: 28672 } + - match: { nodes.$node_id.indices.mappings.total_segments: 2 } + - match: { nodes.$node_id.indices.mappings.total_segment_fields: 56 } + - match: { nodes.$node_id.indices.mappings.average_fields_per_segment: 28 } +--- + "indices mappings does not exist in shards level": - requires: diff --git a/server/src/main/java/org/elasticsearch/TransportVersions.java b/server/src/main/java/org/elasticsearch/TransportVersions.java index afd4aa4f11d0d..25dee690a1a32 100644 --- a/server/src/main/java/org/elasticsearch/TransportVersions.java +++ b/server/src/main/java/org/elasticsearch/TransportVersions.java @@ -218,6 +218,7 @@ static TransportVersion def(int id) { public static final TransportVersion ENTERPRISE_GEOIP_DOWNLOADER = def(8_708_00_0); public static final TransportVersion NODES_STATS_ENUM_SET = def(8_709_00_0); public static final TransportVersion MASTER_NODE_METRICS = def(8_710_00_0); + public static final TransportVersion SEGMENT_LEVEL_FIELDS_STATS = def(8_711_00_0); /* * STOP! READ THIS FIRST! No, really, diff --git a/server/src/main/java/org/elasticsearch/index/IndexService.java b/server/src/main/java/org/elasticsearch/index/IndexService.java index 3d81cccc92dc7..413a31c0ccd6d 100644 --- a/server/src/main/java/org/elasticsearch/index/IndexService.java +++ b/server/src/main/java/org/elasticsearch/index/IndexService.java @@ -332,10 +332,18 @@ public NodeMappingStats getNodeMappingStats() { if (mapperService == null) { return null; } - long totalCount = mapperService().mappingLookup().getTotalMapperCount(); - long totalEstimatedOverhead = totalCount * 1024L; // 1KiB estimated per mapping - NodeMappingStats indexNodeMappingStats = new NodeMappingStats(totalCount, totalEstimatedOverhead); - return indexNodeMappingStats; + long numFields = mapperService().mappingLookup().getTotalMapperCount(); + long totalEstimatedOverhead = numFields * 1024L; // 1KiB estimated per mapping + // Assume all segments have the same mapping; otherwise, we need to acquire searchers to count the actual fields. + int numLeaves = 0; + for (IndexShard shard : shards.values()) { + try { + numLeaves += shard.commitStats().getNumLeaves(); + } catch (AlreadyClosedException ignored) { + + } + } + return new NodeMappingStats(numFields, totalEstimatedOverhead, numLeaves, numLeaves * numFields); } public Set shardIds() { diff --git a/server/src/main/java/org/elasticsearch/index/engine/CommitStats.java b/server/src/main/java/org/elasticsearch/index/engine/CommitStats.java index 16b58e001dcbe..1a03cf6c7bf98 100644 --- a/server/src/main/java/org/elasticsearch/index/engine/CommitStats.java +++ b/server/src/main/java/org/elasticsearch/index/engine/CommitStats.java @@ -8,6 +8,7 @@ package org.elasticsearch.index.engine; import org.apache.lucene.index.SegmentInfos; +import org.elasticsearch.TransportVersions; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.io.stream.Writeable; @@ -27,6 +28,7 @@ public final class CommitStats implements Writeable, ToXContentFragment { private final long generation; private final String id; // lucene commit id in base 64; private final int numDocs; + private final int numLeaves; public CommitStats(SegmentInfos segmentInfos) { // clone the map to protect against concurrent changes @@ -35,6 +37,7 @@ public CommitStats(SegmentInfos segmentInfos) { generation = segmentInfos.getLastGeneration(); id = Base64.getEncoder().encodeToString(segmentInfos.getId()); numDocs = Lucene.getNumDocs(segmentInfos); + numLeaves = segmentInfos.size(); } CommitStats(StreamInput in) throws IOException { @@ -42,6 +45,7 @@ public CommitStats(SegmentInfos segmentInfos) { generation = in.readLong(); id = in.readOptionalString(); numDocs = in.readInt(); + numLeaves = in.getTransportVersion().onOrAfter(TransportVersions.SEGMENT_LEVEL_FIELDS_STATS) ? in.readVInt() : 0; } @Override @@ -49,12 +53,16 @@ public boolean equals(Object o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; CommitStats that = (CommitStats) o; - return userData.equals(that.userData) && generation == that.generation && Objects.equals(id, that.id) && numDocs == that.numDocs; + return userData.equals(that.userData) + && generation == that.generation + && Objects.equals(id, that.id) + && numDocs == that.numDocs + && numLeaves == that.numLeaves; } @Override public int hashCode() { - return Objects.hash(userData, generation, id, numDocs); + return Objects.hash(userData, generation, id, numDocs, numLeaves); } public static CommitStats readOptionalCommitStatsFrom(StreamInput in) throws IOException { @@ -81,12 +89,19 @@ public int getNumDocs() { return numDocs; } + public int getNumLeaves() { + return numLeaves; + } + @Override public void writeTo(StreamOutput out) throws IOException { out.writeMap(userData, StreamOutput::writeString); out.writeLong(generation); out.writeOptionalString(id); out.writeInt(numDocs); + if (out.getTransportVersion().onOrAfter(TransportVersions.SEGMENT_LEVEL_FIELDS_STATS)) { + out.writeVInt(numLeaves); + } } static final class Fields { diff --git a/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java b/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java index a5f54ab89602b..ac3275c5e7119 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java @@ -29,7 +29,8 @@ public Set getFeatures() { DenseVectorFieldMapper.BIT_VECTORS, DocumentMapper.INDEX_SORTING_ON_NESTED, KeywordFieldMapper.KEYWORD_DIMENSION_IGNORE_ABOVE, - IndexModeFieldMapper.QUERYING_INDEX_MODE + IndexModeFieldMapper.QUERYING_INDEX_MODE, + NodeMappingStats.SEGMENT_LEVEL_FIELDS_STATS ); } } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/NodeMappingStats.java b/server/src/main/java/org/elasticsearch/index/mapper/NodeMappingStats.java index 87a4f1367e108..77f86816d6fb7 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/NodeMappingStats.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/NodeMappingStats.java @@ -8,11 +8,13 @@ package org.elasticsearch.index.mapper; +import org.elasticsearch.TransportVersions; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.io.stream.Writeable; import org.elasticsearch.common.unit.ByteSizeValue; import org.elasticsearch.core.Nullable; +import org.elasticsearch.features.NodeFeature; import org.elasticsearch.xcontent.ToXContentFragment; import org.elasticsearch.xcontent.XContentBuilder; @@ -25,15 +27,22 @@ */ public class NodeMappingStats implements Writeable, ToXContentFragment { + public static final NodeFeature SEGMENT_LEVEL_FIELDS_STATS = new NodeFeature("mapper.segment_level_fields_stats"); + private static final class Fields { static final String MAPPINGS = "mappings"; static final String TOTAL_COUNT = "total_count"; static final String TOTAL_ESTIMATED_OVERHEAD = "total_estimated_overhead"; static final String TOTAL_ESTIMATED_OVERHEAD_IN_BYTES = "total_estimated_overhead_in_bytes"; + static final String TOTAL_SEGMENTS = "total_segments"; + static final String TOTAL_SEGMENT_FIELDS = "total_segment_fields"; + static final String AVERAGE_FIELDS_PER_SEGMENT = "average_fields_per_segment"; } private long totalCount; private long totalEstimatedOverhead; + private long totalSegments; + private long totalSegmentFields; public NodeMappingStats() { @@ -42,17 +51,25 @@ public NodeMappingStats() { public NodeMappingStats(StreamInput in) throws IOException { totalCount = in.readVLong(); totalEstimatedOverhead = in.readVLong(); + if (in.getTransportVersion().onOrAfter(TransportVersions.SEGMENT_LEVEL_FIELDS_STATS)) { + totalSegments = in.readVLong(); + totalSegmentFields = in.readVLong(); + } } - public NodeMappingStats(long totalCount, long totalEstimatedOverhead) { + public NodeMappingStats(long totalCount, long totalEstimatedOverhead, long totalSegments, long totalSegmentFields) { this.totalCount = totalCount; this.totalEstimatedOverhead = totalEstimatedOverhead; + this.totalSegments = totalSegments; + this.totalSegmentFields = totalSegmentFields; } public void add(@Nullable NodeMappingStats other) { if (other == null) return; this.totalCount += other.totalCount; this.totalEstimatedOverhead += other.totalEstimatedOverhead; + this.totalSegments += other.totalSegments; + this.totalSegmentFields += other.totalSegmentFields; } public long getTotalCount() { @@ -63,10 +80,22 @@ public ByteSizeValue getTotalEstimatedOverhead() { return ByteSizeValue.ofBytes(totalEstimatedOverhead); } + public long getTotalSegments() { + return totalSegments; + } + + public long getTotalSegmentFields() { + return totalSegmentFields; + } + @Override public void writeTo(StreamOutput out) throws IOException { out.writeVLong(totalCount); out.writeVLong(totalEstimatedOverhead); + if (out.getTransportVersion().onOrAfter(TransportVersions.SEGMENT_LEVEL_FIELDS_STATS)) { + out.writeVLong(totalSegments); + out.writeVLong(totalSegmentFields); + } } @Override @@ -74,6 +103,9 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws builder.startObject(Fields.MAPPINGS); builder.field(Fields.TOTAL_COUNT, getTotalCount()); builder.humanReadableField(Fields.TOTAL_ESTIMATED_OVERHEAD_IN_BYTES, Fields.TOTAL_ESTIMATED_OVERHEAD, getTotalEstimatedOverhead()); + builder.field(Fields.TOTAL_SEGMENTS, totalSegments); + builder.field(Fields.TOTAL_SEGMENT_FIELDS, totalSegmentFields); + builder.field(Fields.AVERAGE_FIELDS_PER_SEGMENT, totalSegments == 0 ? 0 : totalSegmentFields / totalSegments); builder.endObject(); return builder; } @@ -83,11 +115,14 @@ public boolean equals(Object o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; NodeMappingStats that = (NodeMappingStats) o; - return totalCount == that.totalCount && totalEstimatedOverhead == that.totalEstimatedOverhead; + return totalCount == that.totalCount + && totalEstimatedOverhead == that.totalEstimatedOverhead + && totalSegments == that.totalSegments + && totalSegmentFields == that.totalSegmentFields; } @Override public int hashCode() { - return Objects.hash(totalCount, totalEstimatedOverhead); + return Objects.hash(totalCount, totalEstimatedOverhead, totalSegments, totalSegmentFields); } } diff --git a/server/src/test/java/org/elasticsearch/action/admin/cluster/node/stats/NodeStatsTests.java b/server/src/test/java/org/elasticsearch/action/admin/cluster/node/stats/NodeStatsTests.java index 82f22a2572c1d..09b0a78849cd9 100644 --- a/server/src/test/java/org/elasticsearch/action/admin/cluster/node/stats/NodeStatsTests.java +++ b/server/src/test/java/org/elasticsearch/action/admin/cluster/node/stats/NodeStatsTests.java @@ -561,7 +561,12 @@ private static int expectedChunks(@Nullable NodeIndicesStats nodeIndicesStats, N private static CommonStats createIndexLevelCommonStats() { CommonStats stats = new CommonStats(new CommonStatsFlags().clear().set(CommonStatsFlags.Flag.Mappings, true)); - stats.nodeMappings = new NodeMappingStats(randomNonNegativeLong(), randomNonNegativeLong()); + stats.nodeMappings = new NodeMappingStats( + randomNonNegativeLong(), + randomNonNegativeLong(), + randomNonNegativeLong(), + randomNonNegativeLong() + ); return stats; } diff --git a/server/src/test/java/org/elasticsearch/index/mapper/NodeMappingStatsTests.java b/server/src/test/java/org/elasticsearch/index/mapper/NodeMappingStatsTests.java index 166e2c33fe263..563a860b084ea 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/NodeMappingStatsTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/NodeMappingStatsTests.java @@ -7,52 +7,62 @@ */ package org.elasticsearch.index.mapper; -import org.elasticsearch.common.io.stream.BytesStreamOutput; -import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.Writeable; +import org.elasticsearch.test.AbstractWireSerializingTestCase; import org.elasticsearch.test.ESTestCase; import java.io.IOException; -public class NodeMappingStatsTests extends ESTestCase { - - public void testSerialize() throws IOException { - NodeMappingStats stats = randomNodeMappingStats(); - BytesStreamOutput out = new BytesStreamOutput(); - stats.writeTo(out); - StreamInput input = out.bytes().streamInput(); - NodeMappingStats read = new NodeMappingStats(input); - assertEquals(-1, input.read()); - assertEquals(stats.getTotalCount(), read.getTotalCount()); - assertEquals(stats.getTotalEstimatedOverhead(), read.getTotalEstimatedOverhead()); - } +public class NodeMappingStatsTests extends AbstractWireSerializingTestCase { - public void testEqualityAndHashCode() { - NodeMappingStats stats = randomNodeMappingStats(); - assertEquals(stats, stats); - assertEquals(stats.hashCode(), stats.hashCode()); + @Override + protected Writeable.Reader instanceReader() { + return NodeMappingStats::new; + } - NodeMappingStats stats1 = new NodeMappingStats(1L, 2L); - NodeMappingStats stats2 = new NodeMappingStats(3L, 5L); - NodeMappingStats stats3 = new NodeMappingStats(3L, 5L); + @Override + protected NodeMappingStats createTestInstance() { + return new NodeMappingStats(randomNonNegativeLong(), randomNonNegativeLong(), randomNonNegativeLong(), randomNonNegativeLong()); + } - assertNotEquals(stats1, stats2); - assertNotEquals(stats1, stats3); - assertEquals(stats2, stats3); + @Override + protected NodeMappingStats mutateInstance(NodeMappingStats in) throws IOException { + return switch (between(0, 3)) { + case 0 -> new NodeMappingStats( + randomValueOtherThan(in.getTotalCount(), ESTestCase::randomNonNegativeLong), + in.getTotalEstimatedOverhead().getBytes(), + in.getTotalSegments(), + in.getTotalSegmentFields() + ); + case 1 -> new NodeMappingStats( + in.getTotalCount(), + randomValueOtherThan(in.getTotalCount(), ESTestCase::randomNonNegativeLong), + in.getTotalSegments(), + in.getTotalSegmentFields() + ); + case 2 -> new NodeMappingStats( + in.getTotalCount(), + in.getTotalEstimatedOverhead().getBytes(), + randomValueOtherThan(in.getTotalSegments(), ESTestCase::randomNonNegativeLong), + in.getTotalSegmentFields() + ); + case 3 -> new NodeMappingStats( + in.getTotalCount(), + in.getTotalEstimatedOverhead().getBytes(), + in.getTotalSegments(), + randomValueOtherThan(in.getTotalSegmentFields(), ESTestCase::randomNonNegativeLong) + ); + default -> throw new AssertionError("invalid option"); + }; } public void testAdd() { - NodeMappingStats stats1 = new NodeMappingStats(1L, 2L); - NodeMappingStats stats2 = new NodeMappingStats(2L, 3L); - NodeMappingStats stats3 = new NodeMappingStats(3L, 5L); + NodeMappingStats stats1 = new NodeMappingStats(1L, 2L, 4L, 6L); + NodeMappingStats stats2 = new NodeMappingStats(2L, 3L, 10L, 20L); + NodeMappingStats stats3 = new NodeMappingStats(3L, 5L, 14L, 26L); stats1.add(stats2); assertEquals(stats1, stats3); assertEquals(stats1.hashCode(), stats3.hashCode()); } - - private static NodeMappingStats randomNodeMappingStats() { - long totalCount = randomIntBetween(1, 100); - long estimatedOverhead = totalCount * 1024; - return new NodeMappingStats(totalCount, estimatedOverhead); - } }