Skip to content

Commit

Permalink
Sort segments on timestamp in read only engine
Browse files Browse the repository at this point in the history
Ordinary indices support sorting their segments on the timestamp when they have such a field defined
in their mappings (see elastic#75195). This was not initially supported as a directory reader could not be created provding
a leaf sorter, which is now possible in Lucene and we can make use of.
  • Loading branch information
javanna committed Feb 7, 2023
1 parent 283f8ac commit c756161
Show file tree
Hide file tree
Showing 3 changed files with 96 additions and 5 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -213,13 +213,16 @@ protected final ElasticsearchDirectoryReader wrapReader(
}

protected DirectoryReader open(IndexCommit commit) throws IOException {
// TODO: provide engineConfig.getLeafSorter() when opening a DirectoryReader from a commit
// should be available from Lucene v 8.10
assert Transports.assertNotTransportThread("opening index commit of a read-only engine");
DirectoryReader directoryReader = DirectoryReader.open(
commit,
org.apache.lucene.util.Version.MIN_SUPPORTED_MAJOR,
engineConfig.getLeafSorter()
);
if (lazilyLoadSoftDeletes) {
return new LazySoftDeletesDirectoryReaderWrapper(DirectoryReader.open(commit), Lucene.SOFT_DELETES_FIELD);
return new LazySoftDeletesDirectoryReaderWrapper(directoryReader, Lucene.SOFT_DELETES_FIELD);
} else {
return new SoftDeletesDirectoryReaderWrapper(DirectoryReader.open(commit), Lucene.SOFT_DELETES_FIELD);
return new SoftDeletesDirectoryReaderWrapper(directoryReader, Lucene.SOFT_DELETES_FIELD);
}
}

Expand Down
2 changes: 1 addition & 1 deletion x-pack/plugin/searchable-snapshots/qa/rest/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ final File repoDir = file("$buildDir/testclusters/repo")

restResources {
restApi {
include 'indices', 'search', 'bulk', 'snapshot', 'nodes', '_common', 'searchable_snapshots', 'cluster', 'open_point_in_time', 'close_point_in_time', 'security'
include 'indices', 'search', 'bulk', 'index', 'snapshot', 'nodes', '_common', 'searchable_snapshots', 'cluster', 'open_point_in_time', 'close_point_in_time', 'security'
}
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
---
setup:

- do:
indices.create:
index: test
body:
mappings:
properties:
"@timestamp":
type: date
settings:
number_of_shards: 1
number_of_replicas: 0

# 1st segment
- do:
index:
index: test
body: { "foo": "bar1", "@timestamp": "2021-08-01" }
refresh: true

# 2nd segment
- do:
index:
index: test
body: { "foo": "bar2", "@timestamp": "2021-08-02" }
refresh: true

# 3rd segment missing @timestamp field
- do:
index:
index: test
body: { "foo": "bar1"}
refresh: true

- do:
snapshot.create_repository:
repository: repository-fs
body:
type: fs
settings:
location: "repository-fs"

# Remove the snapshot if a previous test failed to delete it.
# Useful for third party tests that runs the test against a real external service.
- do:
snapshot.delete:
repository: repository-fs
snapshot: snapshot
ignore: 404

- do:
snapshot.create:
repository: repository-fs
snapshot: snapshot
wait_for_completion: true

- do:
indices.delete:
index: test

---
"Test that index segments are sorted on timestamp field if @timestamp field is defined in mapping":

- do:
searchable_snapshots.mount:
repository: repository-fs
snapshot: snapshot
wait_for_completion: true
body:
index: test
renamed_index: test-default-storage

- match: { snapshot.snapshot: snapshot }
- match: { snapshot.shards.failed: 0 }
- match: { snapshot.shards.successful: 1 }

# test that segments are sorted by @timestamp DESC
- do:
search:
index: test-default-storage
body:
fields: [{ "field":"@timestamp", "format":"yyyy-MM-dd" }]
- match: { hits.total.value: 3 }
- match: { hits.hits.0.fields.@timestamp: ["2021-08-02"] }
- match: { hits.hits.1.fields.@timestamp: ["2021-08-01"] }
- is_false: hits.hits.2.fields.@timestamp

0 comments on commit c756161

Please sign in to comment.