Skip to content

Commit

Permalink
Fix simulate remove ingest processor throwing illegal_argument_except…
Browse files Browse the repository at this point in the history
…ion (opensearch-project#11607)

* Fix simulate remove ingest processor throwing illegal_argument_exception

Signed-off-by: Gao Binlong <[email protected]>

* modify change log

Signed-off-by: Gao Binlong <[email protected]>

* Create a new test mothod

Signed-off-by: Gao Binlong <[email protected]>

* Use old method to get field value

Signed-off-by: Gao Binlong <[email protected]>

---------

Signed-off-by: Gao Binlong <[email protected]>
  • Loading branch information
gaobinlong authored and rayshrey committed Mar 18, 2024
1 parent 75103c2 commit 7008f20
Show file tree
Hide file tree
Showing 4 changed files with 201 additions and 10 deletions.
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
- Add telemetry tracer/metric enable flag and integ test. ([#10395](https://github.com/opensearch-project/OpenSearch/pull/10395))
- Performance improvement for Datetime field caching ([#4558](https://github.com/opensearch-project/OpenSearch/issues/4558))
- Add instrumentation for indexing in transport bulk action and transport shard bulk action. ([#10273](https://github.com/opensearch-project/OpenSearch/pull/10273))
- Disallow removing some metadata fields by remove ingest processor ([#10895](https://github.com/opensearch-project/OpenSearch/pull/10895))
- Disallow removing some metadata fields by remove ingest processor ([#10895](https://github.com/opensearch-project/OpenSearch/pull/10895), [#11607](https://github.com/opensearch-project/OpenSearch/pull/11607))
- Performance improvement for MultiTerm Queries on Keyword fields ([#7057](https://github.com/opensearch-project/OpenSearch/issues/7057))
- Refactor common parts from the Rounding class into a separate 'round' package ([#11023](https://github.com/opensearch-project/OpenSearch/issues/11023))
- Performance improvement for date histogram aggregations without sub-aggregations ([#11083](https://github.com/opensearch-project/OpenSearch/pull/11083))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -88,16 +88,18 @@ public IngestDocument execute(IngestDocument document) {
throw new IllegalArgumentException("cannot remove metadata field [" + path + "]");
}
// removing _id is disallowed when there's an external version specified in the request
String versionType = document.getFieldValue(IngestDocument.Metadata.VERSION_TYPE.getFieldName(), String.class);
if (path.equals(IngestDocument.Metadata.ID.getFieldName())
&& !Objects.equals(versionType, VersionType.toString(VersionType.INTERNAL))) {
Long version = document.getFieldValue(IngestDocument.Metadata.VERSION.getFieldName(), Long.class);
throw new IllegalArgumentException(
"cannot remove metadata field [_id] when specifying external version for the document, version: "
+ version
+ ", version_type: "
+ versionType
);
&& document.hasField(IngestDocument.Metadata.VERSION_TYPE.getFieldName())) {
String versionType = document.getFieldValue(IngestDocument.Metadata.VERSION_TYPE.getFieldName(), String.class);
if (!Objects.equals(versionType, VersionType.toString(VersionType.INTERNAL))) {
Long version = document.getFieldValue(IngestDocument.Metadata.VERSION.getFieldName(), Long.class, true);
throw new IllegalArgumentException(
"cannot remove metadata field [_id] when specifying external version for the document, version: "
+ version
+ ", version_type: "
+ versionType
);
}
}
document.removeField(path);
});
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@

package org.opensearch.ingest.common;

import org.opensearch.common.lucene.uid.Versions;
import org.opensearch.index.VersionType;
import org.opensearch.ingest.IngestDocument;
import org.opensearch.ingest.Processor;
Expand Down Expand Up @@ -181,4 +182,100 @@ public void testRemoveMetadataField() throws Exception {
}
}
}

public void testRemoveDocumentId() throws Exception {
Map<String, Object> config = new HashMap<>();
config.put("field", IngestDocument.Metadata.ID.getFieldName());
String processorTag = randomAlphaOfLength(10);

// test remove _id when _version_type is external
IngestDocument ingestDocumentWithExternalVersionType = new IngestDocument(
RandomDocumentPicks.randomString(random()),
RandomDocumentPicks.randomString(random()),
RandomDocumentPicks.randomString(random()),
1L,
VersionType.EXTERNAL,
RandomDocumentPicks.randomSource(random())
);

Processor processorForExternalVersionType = new RemoveProcessor.Factory(TestTemplateService.instance()).create(
null,
processorTag,
null,
config
);
assertThrows(
"cannot remove metadata field [_id] when specifying external version for the document, version: "
+ 1
+ ", version_type: "
+ VersionType.EXTERNAL,
IllegalArgumentException.class,
() -> processorForExternalVersionType.execute(ingestDocumentWithExternalVersionType)
);

// test remove _id when _version_type is external_gte
config.put("field", IngestDocument.Metadata.ID.getFieldName());
IngestDocument ingestDocumentWithExternalGTEVersionType = new IngestDocument(
RandomDocumentPicks.randomString(random()),
RandomDocumentPicks.randomString(random()),
RandomDocumentPicks.randomString(random()),
1L,
VersionType.EXTERNAL_GTE,
RandomDocumentPicks.randomSource(random())
);

Processor processorForExternalGTEVersionType = new RemoveProcessor.Factory(TestTemplateService.instance()).create(
null,
processorTag,
null,
config
);
assertThrows(
"cannot remove metadata field [_id] when specifying external version for the document, version: "
+ 1
+ ", version_type: "
+ VersionType.EXTERNAL_GTE,
IllegalArgumentException.class,
() -> processorForExternalGTEVersionType.execute(ingestDocumentWithExternalGTEVersionType)
);

// test remove _id when _version_type is internal
config.put("field", IngestDocument.Metadata.ID.getFieldName());
IngestDocument ingestDocumentWithInternalVersionType = new IngestDocument(
RandomDocumentPicks.randomString(random()),
RandomDocumentPicks.randomString(random()),
RandomDocumentPicks.randomString(random()),
Versions.MATCH_ANY,
VersionType.INTERNAL,
RandomDocumentPicks.randomSource(random())
);

Processor processorForInternalVersionType = new RemoveProcessor.Factory(TestTemplateService.instance()).create(
null,
processorTag,
null,
config
);
processorForInternalVersionType.execute(ingestDocumentWithInternalVersionType);
assertThat(ingestDocumentWithInternalVersionType.hasField(IngestDocument.Metadata.ID.getFieldName()), equalTo(false));

// test remove _id when _version_type is null
config.put("field", IngestDocument.Metadata.ID.getFieldName());
IngestDocument ingestDocumentWithNoVersionType = new IngestDocument(
RandomDocumentPicks.randomString(random()),
RandomDocumentPicks.randomString(random()),
RandomDocumentPicks.randomString(random()),
null,
null,
RandomDocumentPicks.randomSource(random())
);
Processor processorForNullVersionType = new RemoveProcessor.Factory(TestTemplateService.instance()).create(
null,
processorTag,
null,
config
);
processorForNullVersionType.execute(ingestDocumentWithNoVersionType);
assertThat(ingestDocumentWithNoVersionType.hasField(IngestDocument.Metadata.ID.getFieldName()), equalTo(false));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,69 @@ teardown:
id: "my_pipeline"
ignore: 404

---
"Test simulate API works well with remove processor":
- do:
ingest.put_pipeline:
id: "my_pipeline"
body: >
{
"description": "_description",
"processors": [
{
"remove" : {
"field" : "{{foo}}"
}
}
]
}
- match: { acknowledged: true }

# test simulating existing pipeline works well
- do:
ingest.simulate:
id: "my_pipeline"
body: >
{
"docs": [
{
"_source": {
"foo": "bar",
"bar": "zoo"
}
}
]
}
- length: { docs: 1 }
- match: { docs.0.doc._source: { "foo": "bar" } }

# test simulating inflight pipeline works well
- do:
ingest.simulate:
body: >
{
"pipeline": {
"description": "_description",
"processors": [
{
"remove" : {
"field" : "{{foo}}"
}
}
]
},
"docs": [
{
"_source": {
"foo": "bar",
"bar": "zoo"
}
}
]
}
- length: { docs: 1 }
- match: { docs.0.doc._source: { "foo": "bar" } }

---
"Test remove processor with non-existing field and without ignore_missing":
- do:
Expand Down Expand Up @@ -227,3 +290,32 @@ teardown:
version: 1
version_type: "external"
body: { message: "foo bar baz" }

# test simulating pipeline with removing _id
- do:
ingest.simulate:
body: >
{
"pipeline": {
"description": "_description",
"processors": [
{
"remove" : {
"field" : "_id"
}
}
]
},
"docs": [
{
"_version_type": "external_gte",
"_version": 1,
"_source": {
"foo": "bar",
"bar": "zoo"
}
}
]
}
- match: { docs.0.error.type: "illegal_argument_exception" }
- match: { docs.0.error.reason: "cannot remove metadata field [_id] when specifying external version for the document, version: 1, version_type: external_gte" }

0 comments on commit 7008f20

Please sign in to comment.