Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Expand segment sorter for all timeseries indices #78639

Merged
merged 4 commits into from
Oct 5, 2021
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
---
"Test that index segments are sorted on timestamp field if @timestamp field is defined in mapping":
- skip:
version: " - 7.99.99"
reason: "sorting segments was added in 7.16"
features: allowed_warnings

- do:
indices.create:
index: test_index1
body:
mappings:
properties:
"@timestamp":
type: date
settings:
number_of_shards: 1
number_of_replicas: 0

# 1st segment
- do:
index:
index: test_index1
body: { "foo": "bar1", "@timestamp": "2021-08-01" }
refresh: true

# 2nd segment
- do:
index:
index: test_index1
body: { "foo": "bar2", "@timestamp": "2021-08-02" }
refresh: true

# test that segments are sorted by @timestamp DESC
- do:
search:
index: test_index1
body:
fields: [{ "field":"@timestamp", "format":"yyyy-MM-dd" }]
- match: { hits.total.value: 2 }
- match: { hits.hits.0.fields.@timestamp: ["2021-08-02"] }
- match: { hits.hits.1.fields.@timestamp: ["2021-08-01"] }

---
"Test that index segments are NOT sorted on timestamp field when @timestamp field is dynamically added":
- skip:
version: " - 7.15.99"
reason: "sorting segments was added in 7.16"
features: allowed_warnings

- do:
indices.create:
index: test_index2
body:
settings:
number_of_shards: 1
number_of_replicas: 0

# 1st segment
- do:
index:
index: test_index2
body: { "foo": "bar1", "@timestamp": "2021-08-01" }
refresh: true

# 2nd segment
- do:
index:
index: test_index2
body: { "foo": "bar2", "@timestamp": "2021-08-02" }
refresh: true

# test that segments are NOT sorted by @timestamp DESC as the field was not
- do:
search:
index: test_index2
body:
fields: [{ "field":"@timestamp", "format":"yyyy-MM-dd" }]
- match: { hits.hits.0.fields.@timestamp: ["2021-08-01"] }
- match: { hits.hits.1.fields.@timestamp: ["2021-08-02"] }

# test that after we reopen the index, segments are sorted by @timestamp DESC
- do:
indices.close:
index: test_index2
- is_true: acknowledged
- do:
indices.open:
index: test_index2
- is_true: acknowledged
- do:
search:
index: test_index2
body:
fields: [{ "field":"@timestamp", "format":"yyyy-MM-dd" }]
- match: { hits.total.value: 2 }
- match: { hits.hits.0.fields.@timestamp: ["2021-08-02"] }
- match: { hits.hits.1.fields.@timestamp: ["2021-08-01"] }

---
"Test if segments are missing @timestamp field we don't get errors":
- skip:
version: " - 7.15.99"
reason: "sorting segments was added in 7.16"
features: allowed_warnings

- do:
indices.create:
index: test_index3
body:
mappings:
properties:
"@timestamp":
type: date
settings:
number_of_shards: 1
number_of_replicas: 0

# 1st segment missing @timestamp field
- do:
index:
index: test_index3
body: { "foo": "bar1"}
refresh: true

# 2nd segment
- do:
index:
index: test_index3
body: { "foo": "bar2", "@timestamp": "2021-08-02" }
refresh: true

- do:
search:
index: test_index3
body:
fields: [{ "field":"@timestamp", "format":"yyyy-MM-dd" }]
- match: { hits.hits.0.fields.@timestamp: ["2021-08-02"] }
- is_false: hits.hits.1.fields.@timestamp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import org.apache.lucene.document.LongPoint;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.PointValues;
import org.elasticsearch.ElasticsearchException;
import org.elasticsearch.cluster.AbstractDiffable;
import org.elasticsearch.cluster.Diff;
import org.elasticsearch.common.Strings;
Expand Down Expand Up @@ -42,23 +43,26 @@ public final class DataStream extends AbstractDiffable<DataStream> implements To

public static final String BACKING_INDEX_PREFIX = ".ds-";
public static final DateFormatter DATE_FORMATTER = DateFormatter.forPattern("uuuu.MM.dd");
// Datastreams' leaf readers should be sorted by desc order of their timestamp field, as it allows search time optimizations
public static Comparator<LeafReader> DATASTREAM_LEAF_READERS_SORTER =
// Timeseries indices' leaf readers should be sorted by desc order of their timestamp field, as it allows search time optimizations
public static Comparator<LeafReader> TIMESERIES_LEAF_READERS_SORTER =
Comparator.comparingLong(
(LeafReader r) -> {
try {
PointValues points = r.getPointValues(DataStream.TimestampField.FIXED_TIMESTAMP_FIELD);
if (points != null) {
byte[] sortValue = points.getMaxPackedValue();
return LongPoint.decodeDimension(sortValue, 0);
} else if (r.numDocs() == 0) {
// points can be null if the segment contains only deleted documents
} else {
// As we apply this segment sorter to any timeseries indices,
// we don't have a guarantee that all docs contain @timestamp field.
// Some segments may have all docs without @timestamp field, in this
// case they will be sorted last.
return Long.MIN_VALUE;
}
} catch (IOException e) {
throw new ElasticsearchException("Can't access [" +
DataStream.TimestampField.FIXED_TIMESTAMP_FIELD + "] field for the index!", e);
}
throw new IllegalStateException("Can't access [" +
DataStream.TimestampField.FIXED_TIMESTAMP_FIELD + "] field for the data stream!");
})
.reversed();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
package org.elasticsearch.index.mapper;

import org.apache.lucene.codecs.PostingsFormat;
import org.elasticsearch.cluster.metadata.DataStream;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.analysis.IndexAnalyzers;
import org.elasticsearch.index.analysis.NamedAnalyzer;
Expand Down Expand Up @@ -389,6 +390,19 @@ public boolean isDataStreamTimestampFieldEnabled() {
return dtfm != null && dtfm.isEnabled();
}

/**
* Returns if this mapping contains a timestamp field that is of type date, indexed and has doc values.
* @return {@code true} if contains a timestamp field of type date that is indexed and has doc values, {@code false} otherwise.
*/
public boolean hasTimestampField() {
final MappedFieldType mappedFieldType = fieldTypesLookup().get(DataStream.TimestampField.FIXED_TIMESTAMP_FIELD);
if (mappedFieldType instanceof DateFieldMapper.DateFieldType) {
return mappedFieldType.isSearchable() && mappedFieldType.hasDocValues();
} else {
return false;
}
}

/**
* Key for the lookup to be used in caches.
*/
Expand Down
13 changes: 11 additions & 2 deletions server/src/main/java/org/elasticsearch/index/shard/IndexShard.java
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@
import java.util.stream.Collectors;
import java.util.stream.StreamSupport;

import static org.elasticsearch.cluster.metadata.DataStream.DATASTREAM_LEAF_READERS_SORTER;
import static org.elasticsearch.cluster.metadata.DataStream.TIMESERIES_LEAF_READERS_SORTER;
import static org.elasticsearch.index.seqno.RetentionLeaseActions.RETAIN_ALL;
import static org.elasticsearch.index.seqno.SequenceNumbers.UNASSIGNED_SEQ_NO;

Expand Down Expand Up @@ -408,6 +408,14 @@ public Sort getIndexSort() {
return indexSortSupplier.get();
}

/**
* Returns if this shard is a part of datastream
* @return {@code true} if this shard is a part of datastream, {@code false} otherwise
*/
public boolean isDataStreamIndex() {
return isDataStreamIndex;
}

public ShardGetService getService() {
return this.getService;
}
Expand Down Expand Up @@ -2905,6 +2913,7 @@ private EngineConfig newEngineConfig(LongSupplier globalCheckpointSupplier) {
this.warmer.warm(reader);
}
};
final boolean isTimeseriesIndex = mapperService == null ? false : mapperService.mappingLookup().hasTimestampField();
return new EngineConfig(
shardId,
threadPool,
Expand All @@ -2928,7 +2937,7 @@ private EngineConfig newEngineConfig(LongSupplier globalCheckpointSupplier) {
replicationTracker::getRetentionLeases,
this::getOperationPrimaryTerm,
snapshotCommitSupplier,
isDataStreamIndex ? DATASTREAM_LEAF_READERS_SORTER : null);
isTimeseriesIndex ? TIMESERIES_LEAF_READERS_SORTER : null);
}

/**
Expand Down

This file was deleted.

2 changes: 2 additions & 0 deletions x-pack/qa/runtime-fields/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,8 @@ subprojects {
// The error messages are different
'search/330_fetch_fields/error includes field name',
'search/330_fetch_fields/error includes glob pattern',
// we need a @timestamp field to be defined in index mapping
'search/380_sort_segments_on_timestamp/*',
/////// NOT SUPPORTED ///////
].join(',')
}
Expand Down