From 6b9ba218dfc555cc6900904b9cdb37847f5af54b Mon Sep 17 00:00:00 2001 From: Martijn van Groningen Date: Thu, 1 Jun 2023 13:40:03 +0200 Subject: [PATCH] [8.8] Fix NPE when indexing a document that just has been deleted in a tsdb index (#96476) Backporting #96461 to 8.8 branch. Sometimes a segment only contains tombstone documents. In that case, loading min and max @timestamp field values can result into NPE. Because these documents don't have a @timestamp field. This change fixes that by checking for the existence of the @timestamp field in the a segment's field infos. --- docs/changelog/96461.yaml | 5 ++ .../rest-api-spec/test/delete/70_tsdb.yml | 73 +++++++++++++++++++ .../uid/PerThreadIDVersionAndSeqNoLookup.java | 4 +- .../common/lucene/uid/VersionLookupTests.java | 16 ++++ 4 files changed, 97 insertions(+), 1 deletion(-) create mode 100644 docs/changelog/96461.yaml create mode 100644 rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/delete/70_tsdb.yml diff --git a/docs/changelog/96461.yaml b/docs/changelog/96461.yaml new file mode 100644 index 0000000000000..57ec2a6af2da0 --- /dev/null +++ b/docs/changelog/96461.yaml @@ -0,0 +1,5 @@ +pr: 96461 +summary: Fix NPE when indexing a document that just has been deleted in a tsdb index +area: TSDB +type: bug +issues: [] diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/delete/70_tsdb.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/delete/70_tsdb.yml new file mode 100644 index 0000000000000..4730415a3162c --- /dev/null +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/delete/70_tsdb.yml @@ -0,0 +1,73 @@ +--- +"basic tsdb delete": + - skip: + version: " - 8.8.0" + reason: fixed in 8.8.1 + + - do: + indices.create: + index: weather_sensors + body: + settings: + index: + mode: time_series + routing_path: [sensor_id, location] + time_series: + start_time: 2000-01-01T00:00:00.000Z + end_time: 2099-12-31T23:59:59.999Z + number_of_replicas: 0 + number_of_shards: 1 + mappings: + properties: + "@timestamp": + type: date + humidity: + type: half_float + time_series_metric: gauge + location: + type: keyword + time_series_dimension: true + sensor_id: + type: keyword + time_series_dimension: true + temperature: + type: half_float + time_series_metric: gauge + + - do: + index: + index: weather_sensors + body: + "@timestamp": 2023-05-31T08:41:15.000Z + sensor_id: SYKENET-000001 + location: swamp + temperature: 32.4 + humidity: 88.9 + - match: { _id: crxuhC8WO3aVdhvtAAABiHD35_g } + - match: { result: created } + - match: { _version: 1 } + + - do: + delete: + index: weather_sensors + id: crxuhC8WO3aVdhvtAAABiHD35_g + - match: { _id: crxuhC8WO3aVdhvtAAABiHD35_g } + - match: { result: deleted } + - match: { _version: 2 } + + - do: + indices.flush: + index: weather_sensors + + - do: + index: + index: weather_sensors + body: + "@timestamp": 2023-05-31T08:41:15.000Z + sensor_id: SYKENET-000001 + location: swamp + temperature: 32.4 + humidity: 88.9 + - match: { _id: crxuhC8WO3aVdhvtAAABiHD35_g } + - match: { result: created } + - match: { _version: 3 } diff --git a/server/src/main/java/org/elasticsearch/common/lucene/uid/PerThreadIDVersionAndSeqNoLookup.java b/server/src/main/java/org/elasticsearch/common/lucene/uid/PerThreadIDVersionAndSeqNoLookup.java index 10447f3882ae0..99a2c69985f79 100644 --- a/server/src/main/java/org/elasticsearch/common/lucene/uid/PerThreadIDVersionAndSeqNoLookup.java +++ b/server/src/main/java/org/elasticsearch/common/lucene/uid/PerThreadIDVersionAndSeqNoLookup.java @@ -94,7 +94,9 @@ final class PerThreadIDVersionAndSeqNoLookup { this.readerKey = readerKey; this.loadedTimestampRange = loadTimestampRange; - if (loadTimestampRange) { + // Also check for the existence of the timestamp field, because sometimes a segment can only contain tombstone documents, + // which don't have any mapped fields (also not the timestamp field) and just some meta fields like _id, _seq_no etc. + if (loadTimestampRange && reader.getFieldInfos().fieldInfo(DataStream.TimestampField.FIXED_TIMESTAMP_FIELD) != null) { PointValues tsPointValues = reader.getPointValues(DataStream.TimestampField.FIXED_TIMESTAMP_FIELD); assert tsPointValues != null : "no timestamp field for reader:" + reader + " and parent:" + reader.getContext().parent.reader(); minTimestamp = LongPoint.decodeDimension(tsPointValues.getMinPackedValue(), 0); diff --git a/server/src/test/java/org/elasticsearch/common/lucene/uid/VersionLookupTests.java b/server/src/test/java/org/elasticsearch/common/lucene/uid/VersionLookupTests.java index 4cb7e9552e834..60e5d399ca381 100644 --- a/server/src/test/java/org/elasticsearch/common/lucene/uid/VersionLookupTests.java +++ b/server/src/test/java/org/elasticsearch/common/lucene/uid/VersionLookupTests.java @@ -25,6 +25,7 @@ import org.elasticsearch.common.lucene.Lucene; import org.elasticsearch.common.lucene.uid.VersionsAndSeqNoResolver.DocIdAndVersion; import org.elasticsearch.index.mapper.IdFieldMapper; +import org.elasticsearch.index.mapper.ParsedDocument; import org.elasticsearch.index.mapper.SeqNoFieldMapper; import org.elasticsearch.index.mapper.VersionFieldMapper; import org.elasticsearch.test.ESTestCase; @@ -152,4 +153,19 @@ public void testLoadTimestampRange() throws Exception { writer.close(); dir.close(); } + + public void testLoadTimestampRangeWithDeleteTombstone() throws Exception { + Directory dir = newDirectory(); + IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(Lucene.STANDARD_ANALYZER).setMergePolicy(NoMergePolicy.INSTANCE)); + writer.addDocument(ParsedDocument.deleteTombstone("_id").docs().get(0)); + DirectoryReader reader = DirectoryReader.open(writer); + LeafReaderContext segment = reader.leaves().get(0); + PerThreadIDVersionAndSeqNoLookup lookup = new PerThreadIDVersionAndSeqNoLookup(segment.reader(), IdFieldMapper.NAME, true); + assertTrue(lookup.loadedTimestampRange); + assertEquals(lookup.minTimestamp, 0L); + assertEquals(lookup.maxTimestamp, Long.MAX_VALUE); + reader.close(); + writer.close(); + dir.close(); + } }