From a5b5f947a64d6f562477c6cd2a81c731ce378846 Mon Sep 17 00:00:00 2001 From: Martijn van Groningen Date: Wed, 31 May 2023 17:26:06 +0200 Subject: [PATCH 1/5] Skip empty segments when loading min/max timestamp in PerThreadIDVersionAndSeqNoLookup. It can happen that sometimes segments only contain delete tombstone documents. In that case, loading min and max `@timestamp` field values can result into NPE. Because these documents that have a `@timestamp` field. The change fixes that by checking whether numDocs is not zero. These tombstone documents are marked as deleted, so checking whether there are live documents fixes this bug. --- .../rest-api-spec/test/delete/70_tsdb.yml | 73 +++++++++++++++++++ .../uid/PerThreadIDVersionAndSeqNoLookup.java | 2 +- .../common/lucene/uid/VersionLookupTests.java | 13 ++++ 3 files changed, 87 insertions(+), 1 deletion(-) create mode 100644 rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/delete/70_tsdb.yml diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/delete/70_tsdb.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/delete/70_tsdb.yml new file mode 100644 index 0000000000000..d662ff035a591 --- /dev/null +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/delete/70_tsdb.yml @@ -0,0 +1,73 @@ +--- +"basic tsdb delete": + - skip: + version: " - 8.8.99" + reason: fixed in 8.9.0 + + - do: + indices.create: + index: weather_sensors-dev + body: + settings: + index: + mode: time_series + routing_path: [sensor_id, location] + time_series: + start_time: 2000-01-01T00:00:00.000Z + end_time: 2099-12-31T23:59:59.999Z + number_of_replicas: 0 + number_of_shards: 1 + mappings: + properties: + "@timestamp": + type: date + humidity: + type: half_float + time_series_metric: gauge + location: + type: keyword + time_series_dimension: true + sensor_id: + type: keyword + time_series_dimension: true + temperature: + type: half_float + time_series_metric: gauge + + - do: + index: + index: weather_sensors-dev + body: + "@timestamp": 2023-05-31T08:41:15.000Z + sensor_id: SYKENET-000001 + location: swamp + temperature: 32.4 + humidity: 88.9 + - match: { _id: crxuhC8WO3aVdhvtAAABiHD35_g } + - match: { result: created } + - match: { _version: 1 } + + - do: + delete: + index: weather_sensors-dev + id: crxuhC8WO3aVdhvtAAABiHD35_g + - match: { _id: crxuhC8WO3aVdhvtAAABiHD35_g } + - match: { result: deleted } + - match: { _version: 2 } + + - do: + indices.flush: + index: weather_sensors-dev + + - do: + index: + index: weather_sensors-dev + body: + "@timestamp": 2023-05-31T08:41:15.000Z + sensor_id: SYKENET-000001 + location: swamp + temperature: 32.4 + humidity: 88.9 + - match: { _id: crxuhC8WO3aVdhvtAAABiHD35_g } + - match: { result: created } + - match: { _version: 3 } diff --git a/server/src/main/java/org/elasticsearch/common/lucene/uid/PerThreadIDVersionAndSeqNoLookup.java b/server/src/main/java/org/elasticsearch/common/lucene/uid/PerThreadIDVersionAndSeqNoLookup.java index 10447f3882ae0..3ffea0ce405ff 100644 --- a/server/src/main/java/org/elasticsearch/common/lucene/uid/PerThreadIDVersionAndSeqNoLookup.java +++ b/server/src/main/java/org/elasticsearch/common/lucene/uid/PerThreadIDVersionAndSeqNoLookup.java @@ -94,7 +94,7 @@ final class PerThreadIDVersionAndSeqNoLookup { this.readerKey = readerKey; this.loadedTimestampRange = loadTimestampRange; - if (loadTimestampRange) { + if (loadTimestampRange && reader.numDocs() != 0) { PointValues tsPointValues = reader.getPointValues(DataStream.TimestampField.FIXED_TIMESTAMP_FIELD); assert tsPointValues != null : "no timestamp field for reader:" + reader + " and parent:" + reader.getContext().parent.reader(); minTimestamp = LongPoint.decodeDimension(tsPointValues.getMinPackedValue(), 0); diff --git a/server/src/test/java/org/elasticsearch/common/lucene/uid/VersionLookupTests.java b/server/src/test/java/org/elasticsearch/common/lucene/uid/VersionLookupTests.java index 4cb7e9552e834..af4f297915d1d 100644 --- a/server/src/test/java/org/elasticsearch/common/lucene/uid/VersionLookupTests.java +++ b/server/src/test/java/org/elasticsearch/common/lucene/uid/VersionLookupTests.java @@ -25,6 +25,7 @@ import org.elasticsearch.common.lucene.Lucene; import org.elasticsearch.common.lucene.uid.VersionsAndSeqNoResolver.DocIdAndVersion; import org.elasticsearch.index.mapper.IdFieldMapper; +import org.elasticsearch.index.mapper.ParsedDocument; import org.elasticsearch.index.mapper.SeqNoFieldMapper; import org.elasticsearch.index.mapper.VersionFieldMapper; import org.elasticsearch.test.ESTestCase; @@ -152,4 +153,16 @@ public void testLoadTimestampRange() throws Exception { writer.close(); dir.close(); } + + public void testLoadTimestampRangeWithDeleteTombstone() throws Exception { + Directory dir = newDirectory(); + IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(Lucene.STANDARD_ANALYZER).setMergePolicy(NoMergePolicy.INSTANCE)); + writer.addDocument(ParsedDocument.deleteTombstone("_id").docs().get(0)); + DirectoryReader reader = DirectoryReader.open(writer); + LeafReaderContext segment = reader.leaves().get(0); + PerThreadIDVersionAndSeqNoLookup lookup = new PerThreadIDVersionAndSeqNoLookup(segment.reader(), IdFieldMapper.NAME, true); + assertTrue(lookup.loadedTimestampRange); + assertEquals(lookup.minTimestamp, 0L); + assertEquals(lookup.maxTimestamp, Long.MAX_VALUE); + } } From d0469cd313e423abe1ba59e9b54258798dcd85fa Mon Sep 17 00:00:00 2001 From: Martijn van Groningen Date: Wed, 31 May 2023 17:30:55 +0200 Subject: [PATCH 2/5] Update docs/changelog/96461.yaml --- docs/changelog/96461.yaml | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 docs/changelog/96461.yaml diff --git a/docs/changelog/96461.yaml b/docs/changelog/96461.yaml new file mode 100644 index 0000000000000..57ec2a6af2da0 --- /dev/null +++ b/docs/changelog/96461.yaml @@ -0,0 +1,5 @@ +pr: 96461 +summary: Fix NPE when indexing a document that just has been deleted in a tsdb index +area: TSDB +type: bug +issues: [] From ac8612549a9bf5d88306a5f39b0cc240586d7100 Mon Sep 17 00:00:00 2001 From: Martijn van Groningen Date: Wed, 31 May 2023 17:58:53 +0200 Subject: [PATCH 3/5] Skip segments without `@timestamp field`. It can happen that sometimes segments only contain delete tombstone documents. In that case, loading min and max `@timestamp` field values can result into NPE. Because these documents that have a `@timestamp` field. This change fixes that by checking for the existence of the `@timestamp` field in the a segment's field infos. --- .../common/lucene/uid/PerThreadIDVersionAndSeqNoLookup.java | 2 +- .../elasticsearch/common/lucene/uid/VersionLookupTests.java | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/server/src/main/java/org/elasticsearch/common/lucene/uid/PerThreadIDVersionAndSeqNoLookup.java b/server/src/main/java/org/elasticsearch/common/lucene/uid/PerThreadIDVersionAndSeqNoLookup.java index 3ffea0ce405ff..7cee391b3a76a 100644 --- a/server/src/main/java/org/elasticsearch/common/lucene/uid/PerThreadIDVersionAndSeqNoLookup.java +++ b/server/src/main/java/org/elasticsearch/common/lucene/uid/PerThreadIDVersionAndSeqNoLookup.java @@ -94,7 +94,7 @@ final class PerThreadIDVersionAndSeqNoLookup { this.readerKey = readerKey; this.loadedTimestampRange = loadTimestampRange; - if (loadTimestampRange && reader.numDocs() != 0) { + if (loadTimestampRange && reader.getFieldInfos().fieldInfo(DataStream.TimestampField.FIXED_TIMESTAMP_FIELD) != null) { PointValues tsPointValues = reader.getPointValues(DataStream.TimestampField.FIXED_TIMESTAMP_FIELD); assert tsPointValues != null : "no timestamp field for reader:" + reader + " and parent:" + reader.getContext().parent.reader(); minTimestamp = LongPoint.decodeDimension(tsPointValues.getMinPackedValue(), 0); diff --git a/server/src/test/java/org/elasticsearch/common/lucene/uid/VersionLookupTests.java b/server/src/test/java/org/elasticsearch/common/lucene/uid/VersionLookupTests.java index af4f297915d1d..60e5d399ca381 100644 --- a/server/src/test/java/org/elasticsearch/common/lucene/uid/VersionLookupTests.java +++ b/server/src/test/java/org/elasticsearch/common/lucene/uid/VersionLookupTests.java @@ -164,5 +164,8 @@ public void testLoadTimestampRangeWithDeleteTombstone() throws Exception { assertTrue(lookup.loadedTimestampRange); assertEquals(lookup.minTimestamp, 0L); assertEquals(lookup.maxTimestamp, Long.MAX_VALUE); + reader.close(); + writer.close(); + dir.close(); } } From f3c81dce1cca84669af0a4984f93feb007c5f183 Mon Sep 17 00:00:00 2001 From: Martijn van Groningen Date: Thu, 1 Jun 2023 08:42:14 +0200 Subject: [PATCH 4/5] rename index name --- .../resources/rest-api-spec/test/delete/70_tsdb.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/delete/70_tsdb.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/delete/70_tsdb.yml index d662ff035a591..25cfcd1d5c228 100644 --- a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/delete/70_tsdb.yml +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/delete/70_tsdb.yml @@ -6,7 +6,7 @@ - do: indices.create: - index: weather_sensors-dev + index: weather_sensors body: settings: index: @@ -36,7 +36,7 @@ - do: index: - index: weather_sensors-dev + index: weather_sensors body: "@timestamp": 2023-05-31T08:41:15.000Z sensor_id: SYKENET-000001 @@ -49,7 +49,7 @@ - do: delete: - index: weather_sensors-dev + index: weather_sensors id: crxuhC8WO3aVdhvtAAABiHD35_g - match: { _id: crxuhC8WO3aVdhvtAAABiHD35_g } - match: { result: deleted } @@ -57,11 +57,11 @@ - do: indices.flush: - index: weather_sensors-dev + index: weather_sensors - do: index: - index: weather_sensors-dev + index: weather_sensors body: "@timestamp": 2023-05-31T08:41:15.000Z sensor_id: SYKENET-000001 From 5a8b40d46828d141da39bf1cfa6f7cac035eb5a6 Mon Sep 17 00:00:00 2001 From: Martijn van Groningen Date: Thu, 1 Jun 2023 08:54:55 +0200 Subject: [PATCH 5/5] added comment --- .../common/lucene/uid/PerThreadIDVersionAndSeqNoLookup.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/server/src/main/java/org/elasticsearch/common/lucene/uid/PerThreadIDVersionAndSeqNoLookup.java b/server/src/main/java/org/elasticsearch/common/lucene/uid/PerThreadIDVersionAndSeqNoLookup.java index 7cee391b3a76a..99a2c69985f79 100644 --- a/server/src/main/java/org/elasticsearch/common/lucene/uid/PerThreadIDVersionAndSeqNoLookup.java +++ b/server/src/main/java/org/elasticsearch/common/lucene/uid/PerThreadIDVersionAndSeqNoLookup.java @@ -94,6 +94,8 @@ final class PerThreadIDVersionAndSeqNoLookup { this.readerKey = readerKey; this.loadedTimestampRange = loadTimestampRange; + // Also check for the existence of the timestamp field, because sometimes a segment can only contain tombstone documents, + // which don't have any mapped fields (also not the timestamp field) and just some meta fields like _id, _seq_no etc. if (loadTimestampRange && reader.getFieldInfos().fieldInfo(DataStream.TimestampField.FIXED_TIMESTAMP_FIELD) != null) { PointValues tsPointValues = reader.getPointValues(DataStream.TimestampField.FIXED_TIMESTAMP_FIELD); assert tsPointValues != null : "no timestamp field for reader:" + reader + " and parent:" + reader.getContext().parent.reader();