elastic · brianmcgue · Nov 16, 2022 · Nov 4, 2022 · Nov 14, 2022 · Nov 15, 2022
@@ -5,7 +5,11 @@
  * 2.0.
  */
 
-import { IngestSetProcessor, MlTrainedModelConfig } from '@elastic/elasticsearch/lib/api/types';
+import {
+  IngestRemoveProcessor,
+  IngestSetProcessor,
+  MlTrainedModelConfig,
+} from '@elastic/elasticsearch/lib/api/types';
 import { BUILT_IN_MODEL_TAG } from '@kbn/ml-plugin/common/constants/data_frame_analytics';
 import { SUPPORTED_PYTORCH_TASKS } from '@kbn/ml-plugin/common/constants/trained_models';
 
@@ -18,6 +22,7 @@ import {
   getSetProcessorForInferenceType,
   SUPPORTED_PYTORCH_TASKS as LOCAL_SUPPORTED_PYTORCH_TASKS,
   parseMlInferenceParametersFromPipeline,
+  getRemoveProcessorForInferenceType,
 } from '.';
 
 const mockModel: MlTrainedModelConfig = {
@@ -63,6 +68,38 @@ describe('getMlModelTypesForModelConfig lib function', () => {
   });
 });
 
+describe('getRemoveProcessorForInferenceType lib function', () => {
+  const destinationField = 'dest';
+
+  it('should return expected value for TEXT_CLASSIFICATION', () => {
+    const inferenceType = SUPPORTED_PYTORCH_TASKS.TEXT_CLASSIFICATION;
+
+    const expected: IngestRemoveProcessor = {
+      field: destinationField,
+      ignore_missing: true,
+    };
+
+    expect(getRemoveProcessorForInferenceType(destinationField, inferenceType)).toEqual(expected);
+  });
+
+  it('should return expected value for TEXT_EMBEDDING', () => {
+    const inferenceType = SUPPORTED_PYTORCH_TASKS.TEXT_EMBEDDING;
+
+    const expected: IngestRemoveProcessor = {
+      field: destinationField,
+      ignore_missing: true,
+    };
+
+    expect(getRemoveProcessorForInferenceType(destinationField, inferenceType)).toEqual(expected);
+  });
+
+  it('should return undefined for unknown inferenceType', () => {
+    const inferenceType = 'wrongInferenceType';
+
+    expect(getRemoveProcessorForInferenceType(destinationField, inferenceType)).toBeUndefined();
+  });
+});
+
 describe('getSetProcessorForInferenceType lib function', () => {
   const destinationField = 'dest';
 
@@ -79,10 +116,28 @@ describe('getSetProcessorForInferenceType lib function', () => {
         "Copy the predicted_value to 'dest' if the prediction_probability is greater than 0.5",
       field: destinationField,
       if: 'ml.inference.dest.prediction_probability > 0.5',
+      on_failure: [
+        {
+          append: {
+            field: '_source._ingest.set_errors',
+            ignore_failure: true,
+            value: [
+              {
+                message:
+                  "Processor 'set' in pipeline 'my-pipeline' failed with message '{{ _ingest.on_failure_message }}'",
+                pipeline: 'my-pipeline',
+                timestamp: '{{{ _ingest.timestamp }}}',
+              },
+            ],
+          },
+        },
+      ],
       value: undefined,
     };
 
-    expect(getSetProcessorForInferenceType(destinationField, inferenceType)).toEqual(expected);
+    expect(getSetProcessorForInferenceType(destinationField, inferenceType, 'my-pipeline')).toEqual(
+      expected
+    );
   });
 
   it('should return expected value for TEXT_EMBEDDING', () => {
@@ -92,16 +147,36 @@ describe('getSetProcessorForInferenceType lib function', () => {
       copy_from: 'ml.inference.dest.predicted_value',
       description: "Copy the predicted_value to 'dest'",
       field: destinationField,
+      on_failure: [
+        {
+          append: {
+            field: '_source._ingest.set_errors',
+            ignore_failure: true,
+            value: [
+              {
+                message:
+                  "Processor 'set' in pipeline 'my-pipeline' failed with message '{{ _ingest.on_failure_message }}'",
+                pipeline: 'my-pipeline',
+                timestamp: '{{{ _ingest.timestamp }}}',
+              },
+            ],
+          },
+        },
+      ],
       value: undefined,
     };
 
-    expect(getSetProcessorForInferenceType(destinationField, inferenceType)).toEqual(expected);
+    expect(getSetProcessorForInferenceType(destinationField, inferenceType, 'my-pipeline')).toEqual(
+      expected
+    );
   });
 
   it('should return undefined for unknown inferenceType', () => {
     const inferenceType = 'wrongInferenceType';
 
-    expect(getSetProcessorForInferenceType(destinationField, inferenceType)).toBeUndefined();
+    expect(
+      getSetProcessorForInferenceType(destinationField, inferenceType, 'my-pipeline')
+    ).toBeUndefined();
   });
 });
 
@@ -185,13 +260,35 @@ describe('generateMlInferencePipelineBody lib function', () => {
       expect.objectContaining({
         description: expect.any(String),
         processors: expect.arrayContaining([
+          expect.objectContaining({
+            remove: {
+              field: 'my-destination-field',
+              ignore_missing: true,
+            },
+          }),
           expect.objectContaining({
             set: {
               copy_from: 'ml.inference.my-destination-field.predicted_value',
               description:
                 "Copy the predicted_value to 'my-destination-field' if the prediction_probability is greater than 0.5",
               field: 'my-destination-field',
               if: 'ml.inference.my-destination-field.prediction_probability > 0.5',
+              on_failure: [
+                {
+                  append: {
+                    field: '_source._ingest.set_errors',
+                    ignore_failure: true,
+                    value: [
+                      {
+                        message:
+                          "Processor 'set' in pipeline 'my-pipeline' failed with message '{{ _ingest.on_failure_message }}'",
+                        pipeline: 'my-pipeline',
+                        timestamp: '{{{ _ingest.timestamp }}}',
+                      },
+                    ],
+                  },
+                },
+              ],
             },
           }),
         ]),

@@ -7,6 +7,8 @@
 
 import {
   IngestPipeline,
+  IngestProcessorContainer,
+  IngestRemoveProcessor,
   IngestSetProcessor,
   MlTrainedModelConfig,
 } from '@elastic/elasticsearch/lib/api/types';
@@ -53,7 +55,8 @@ export const generateMlInferencePipelineBody = ({
     model.input?.field_names?.length > 0 ? model.input.field_names[0] : 'MODEL_INPUT_FIELD';
 
   const inferenceType = Object.keys(model.inference_config)[0];
-  const set = getSetProcessorForInferenceType(destinationField, inferenceType);
+  const remove = getRemoveProcessorForInferenceType(destinationField, inferenceType);
+  const set = getSetProcessorForInferenceType(destinationField, inferenceType, pipelineName);
 
   return {
     description: description ?? '',
@@ -64,6 +67,7 @@ export const generateMlInferencePipelineBody = ({
           ignore_missing: true,
         },
       },
+      ...(remove ? [{ remove }] : []),
       {
         inference: {
           field_map: {
@@ -108,31 +112,64 @@ export const generateMlInferencePipelineBody = ({
 
 export const getSetProcessorForInferenceType = (
   destinationField: string,
-  inferenceType: string
+  inferenceType: string,
+  pipelineName: string
 ): IngestSetProcessor | undefined => {
   let set: IngestSetProcessor | undefined;
   const prefixedDestinationField = `ml.inference.${destinationField}`;
+  const onFailure: IngestProcessorContainer[] = [
+    {
+      append: {
+        field: '_source._ingest.set_errors',
+        ignore_failure: true,
+        value: [
+          {
+            message: `Processor 'set' in pipeline '${pipelineName}' failed with message '{{ _ingest.on_failure_message }}'`,
+            pipeline: pipelineName,
+            timestamp: '{{{ _ingest.timestamp }}}',
+          },
+        ],
+      },
+    },
+  ];
 
   if (inferenceType === SUPPORTED_PYTORCH_TASKS.TEXT_CLASSIFICATION) {
     set = {
       copy_from: `${prefixedDestinationField}.predicted_value`,
       description: `Copy the predicted_value to '${destinationField}' if the prediction_probability is greater than 0.5`,
       field: destinationField,
       if: `${prefixedDestinationField}.prediction_probability > 0.5`,
+      on_failure: onFailure,
       value: undefined,
     };
   } else if (inferenceType === SUPPORTED_PYTORCH_TASKS.TEXT_EMBEDDING) {
     set = {
       copy_from: `${prefixedDestinationField}.predicted_value`,
       description: `Copy the predicted_value to '${destinationField}'`,
       field: destinationField,
+      on_failure: onFailure,
       value: undefined,
     };
   }
 
   return set;
 };
 
+export const getRemoveProcessorForInferenceType = (
+  destinationField: string,
+  inferenceType: string
+): IngestRemoveProcessor | undefined => {
+  if (
+    inferenceType === SUPPORTED_PYTORCH_TASKS.TEXT_CLASSIFICATION ||
+    inferenceType === SUPPORTED_PYTORCH_TASKS.TEXT_EMBEDDING
+  ) {
+    return {
+      field: destinationField,
+      ignore_missing: true,
+    };
+  }
+};
+
 /**
  * Parses model types list from the given configuration of a trained machine learning model
  * @param trainedModel configuration for a trained machine learning model