From c756161abc86e9ca38c692003e7a204bc365eb89 Mon Sep 17 00:00:00 2001 From: Luca Cavanna Date: Tue, 7 Feb 2023 21:19:59 +0100 Subject: [PATCH] Sort segments on timestamp in read only engine Ordinary indices support sorting their segments on the timestamp when they have such a field defined in their mappings (see #75195). This was not initially supported as a directory reader could not be created provding a leaf sorter, which is now possible in Lucene and we can make use of. --- .../index/engine/ReadOnlyEngine.java | 11 ++- .../searchable-snapshots/qa/rest/build.gradle | 2 +- .../test/sort_segments_on_timestamp.yml | 88 +++++++++++++++++++ 3 files changed, 96 insertions(+), 5 deletions(-) create mode 100644 x-pack/plugin/searchable-snapshots/qa/rest/src/yamlRestTest/resources/rest-api-spec/test/sort_segments_on_timestamp.yml diff --git a/server/src/main/java/org/elasticsearch/index/engine/ReadOnlyEngine.java b/server/src/main/java/org/elasticsearch/index/engine/ReadOnlyEngine.java index 7d66c96bd4c1e..e5710cd1917cf 100644 --- a/server/src/main/java/org/elasticsearch/index/engine/ReadOnlyEngine.java +++ b/server/src/main/java/org/elasticsearch/index/engine/ReadOnlyEngine.java @@ -213,13 +213,16 @@ protected final ElasticsearchDirectoryReader wrapReader( } protected DirectoryReader open(IndexCommit commit) throws IOException { - // TODO: provide engineConfig.getLeafSorter() when opening a DirectoryReader from a commit - // should be available from Lucene v 8.10 assert Transports.assertNotTransportThread("opening index commit of a read-only engine"); + DirectoryReader directoryReader = DirectoryReader.open( + commit, + org.apache.lucene.util.Version.MIN_SUPPORTED_MAJOR, + engineConfig.getLeafSorter() + ); if (lazilyLoadSoftDeletes) { - return new LazySoftDeletesDirectoryReaderWrapper(DirectoryReader.open(commit), Lucene.SOFT_DELETES_FIELD); + return new LazySoftDeletesDirectoryReaderWrapper(directoryReader, Lucene.SOFT_DELETES_FIELD); } else { - return new SoftDeletesDirectoryReaderWrapper(DirectoryReader.open(commit), Lucene.SOFT_DELETES_FIELD); + return new SoftDeletesDirectoryReaderWrapper(directoryReader, Lucene.SOFT_DELETES_FIELD); } } diff --git a/x-pack/plugin/searchable-snapshots/qa/rest/build.gradle b/x-pack/plugin/searchable-snapshots/qa/rest/build.gradle index a15f0b2b3c835..b8c0127d0586a 100644 --- a/x-pack/plugin/searchable-snapshots/qa/rest/build.gradle +++ b/x-pack/plugin/searchable-snapshots/qa/rest/build.gradle @@ -10,7 +10,7 @@ final File repoDir = file("$buildDir/testclusters/repo") restResources { restApi { - include 'indices', 'search', 'bulk', 'snapshot', 'nodes', '_common', 'searchable_snapshots', 'cluster', 'open_point_in_time', 'close_point_in_time', 'security' + include 'indices', 'search', 'bulk', 'index', 'snapshot', 'nodes', '_common', 'searchable_snapshots', 'cluster', 'open_point_in_time', 'close_point_in_time', 'security' } } diff --git a/x-pack/plugin/searchable-snapshots/qa/rest/src/yamlRestTest/resources/rest-api-spec/test/sort_segments_on_timestamp.yml b/x-pack/plugin/searchable-snapshots/qa/rest/src/yamlRestTest/resources/rest-api-spec/test/sort_segments_on_timestamp.yml new file mode 100644 index 0000000000000..da09a95b53827 --- /dev/null +++ b/x-pack/plugin/searchable-snapshots/qa/rest/src/yamlRestTest/resources/rest-api-spec/test/sort_segments_on_timestamp.yml @@ -0,0 +1,88 @@ +--- +setup: + + - do: + indices.create: + index: test + body: + mappings: + properties: + "@timestamp": + type: date + settings: + number_of_shards: 1 + number_of_replicas: 0 + + # 1st segment + - do: + index: + index: test + body: { "foo": "bar1", "@timestamp": "2021-08-01" } + refresh: true + + # 2nd segment + - do: + index: + index: test + body: { "foo": "bar2", "@timestamp": "2021-08-02" } + refresh: true + + # 3rd segment missing @timestamp field + - do: + index: + index: test + body: { "foo": "bar1"} + refresh: true + + - do: + snapshot.create_repository: + repository: repository-fs + body: + type: fs + settings: + location: "repository-fs" + + # Remove the snapshot if a previous test failed to delete it. + # Useful for third party tests that runs the test against a real external service. + - do: + snapshot.delete: + repository: repository-fs + snapshot: snapshot + ignore: 404 + + - do: + snapshot.create: + repository: repository-fs + snapshot: snapshot + wait_for_completion: true + + - do: + indices.delete: + index: test + +--- +"Test that index segments are sorted on timestamp field if @timestamp field is defined in mapping": + + - do: + searchable_snapshots.mount: + repository: repository-fs + snapshot: snapshot + wait_for_completion: true + body: + index: test + renamed_index: test-default-storage + + - match: { snapshot.snapshot: snapshot } + - match: { snapshot.shards.failed: 0 } + - match: { snapshot.shards.successful: 1 } + + # test that segments are sorted by @timestamp DESC + - do: + search: + index: test-default-storage + body: + fields: [{ "field":"@timestamp", "format":"yyyy-MM-dd" }] + - match: { hits.total.value: 3 } + - match: { hits.hits.0.fields.@timestamp: ["2021-08-02"] } + - match: { hits.hits.1.fields.@timestamp: ["2021-08-01"] } + - is_false: hits.hits.2.fields.@timestamp