From b2a070555b9b78cb7de9ab190f1ee7892296f675 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Thu, 14 Jan 2021 17:45:11 +0000 Subject: [PATCH] Support resource_name on AttachmentProcessor (#5205) (#5263) Contributes to #5198 Relates to https://github.com/elastic/elasticsearch/pull/64389 Co-authored-by: Steve Gordon --- .../Processors/Plugins/AttachmentProcessor.cs | 26 ++++++++++++--- tests/Tests/Ingest/ProcessorAssertions.cs | 33 +++++++++++++++++++ 2 files changed, 54 insertions(+), 5 deletions(-) diff --git a/src/Nest/Ingest/Processors/Plugins/AttachmentProcessor.cs b/src/Nest/Ingest/Processors/Plugins/AttachmentProcessor.cs index e89cc34ea91..7cbc8c9775a 100644 --- a/src/Nest/Ingest/Processors/Plugins/AttachmentProcessor.cs +++ b/src/Nest/Ingest/Processors/Plugins/AttachmentProcessor.cs @@ -21,12 +21,12 @@ namespace Nest [InterfaceDataContract] public interface IAttachmentProcessor : IProcessor { - /// The field to get the base64 encoded field from + /// The field to get the base64 encoded field from. [DataMember(Name ="field")] Field Field { get; set; } - /// If `true` and `field` does not exist, the processor quietly exits without modifying the document + /// If `true` and `field` does not exist, the processor quietly exits without modifying the document. [DataMember(Name ="ignore_missing")] bool? IgnoreMissing { get; set; } @@ -37,20 +37,26 @@ public interface IAttachmentProcessor : IProcessor [DataMember(Name ="indexed_chars")] long? IndexedCharacters { get; set; } - /// Field name from which you can overwrite the number of chars being used for extraction. + /// Field name from which you can overwrite the number of chars being used for extraction. [DataMember(Name ="indexed_chars_field")] Field IndexedCharactersField { get; set; } /// /// Properties to select to be stored. Can be content, title, name, author, - /// keywords, date, content_type, content_length, language. Defaults to all + /// keywords, date, content_type, content_length, language. Defaults to all. /// [DataMember(Name ="properties")] IEnumerable Properties { get; set; } - /// The field that will hold the attachment information + /// The field that will hold the attachment information. [DataMember(Name ="target_field")] Field TargetField { get; set; } + + /// The field containing the name of the resource to decode. + /// If specified, the processor passes this resource name to the underlying + /// Tika library to enable 'Resource Name Based Detection'. + [DataMember(Name = "resource_name")] + Field ResourceName { get; set; } } /// @@ -75,6 +81,9 @@ public class AttachmentProcessor : ProcessorBase, IAttachmentProcessor /// public Field TargetField { get; set; } + /// + public Field ResourceName { get; set; } + protected override string Name => "attachment"; } @@ -91,6 +100,7 @@ public class AttachmentProcessorDescriptor Field IAttachmentProcessor.IndexedCharactersField { get; set; } IEnumerable IAttachmentProcessor.Properties { get; set; } Field IAttachmentProcessor.TargetField { get; set; } + Field IAttachmentProcessor.ResourceName { get; set; } /// public AttachmentProcessorDescriptor Field(Field field) => Assign(field, (a, v) => a.Field = v); @@ -122,5 +132,11 @@ public AttachmentProcessorDescriptor IndexedCharactersField(Expressio /// public AttachmentProcessorDescriptor Properties(params string[] properties) => Assign(properties, (a, v) => a.Properties = v); + + /// + public AttachmentProcessorDescriptor ResourceName(Field field) => Assign(field, (a, v) => a.ResourceName = v); + + /// + public AttachmentProcessorDescriptor ResourceName(Expression> objectPath) => Assign(objectPath, (a, v) => a.ResourceName = v); } } diff --git a/tests/Tests/Ingest/ProcessorAssertions.cs b/tests/Tests/Ingest/ProcessorAssertions.cs index ac308fb8451..2eddb6cec99 100644 --- a/tests/Tests/Ingest/ProcessorAssertions.cs +++ b/tests/Tests/Ingest/ProcessorAssertions.cs @@ -463,6 +463,39 @@ public class Attachment : ProcessorAssertion public override string Key => "attachment"; } + [SkipVersion("<7.11.0", "Resource name support was added in 7.11")] + public class Attachment_WithResourceName : ProcessorAssertion + { + public override Func>> Fluent => d => d + .Attachment(ud => ud + .Field(p => p.Description) + .IndexedCharacters(100_000) + .Properties("title", "author") + .IgnoreMissing() + .ResourceName(n => n.Name) + ); + + public override IProcessor Initializer => new AttachmentProcessor + { + Field = "description", + Properties = new[] { "title", "author" }, + IndexedCharacters = 100_000, + IgnoreMissing = true, + ResourceName = "name" + }; + + public override object Json => new + { + field = "description", + ignore_missing = true, + properties = new[] { "title", "author" }, + indexed_chars = 100_000, + resource_name = "name" + }; + + public override string Key => "attachment"; + } + [SkipVersion("<7.4.0", "Circle processor added in 7.4.0")] public class Circle : ProcessorAssertion {