From eee356607b21aecffb297aa56e72ded12a3a6664 Mon Sep 17 00:00:00 2001
From: Ed Savage <ed.savage@elastic.co>
Date: Tue, 18 Jun 2024 15:25:05 +1200
Subject: [PATCH] [ML] Handle the "output memory allocator bytes" field
 (#109653)

Handle the "output memory allocator bytes" field if and only if it is present in the model size stats, as reported by the C++ backend.

This PR _must_ be merged prior to the corresponding `ml-cpp` one, to keep CI tests happy.
---
 docs/changelog/109653.yaml                    |  5 +++
 docs/reference/cat/anomaly-detectors.asciidoc | 27 +++++++++-------
 docs/reference/ml/ml-shared.asciidoc          | 12 ++++---
 .../org/elasticsearch/TransportVersions.java  |  3 +-
 .../autodetect/state/ModelSizeStats.java      | 31 +++++++++++++++++++
 .../autodetect/state/ModelSizeStatsTests.java |  1 +
 .../xpack/ml/rest/cat/RestCatJobsAction.java  | 11 +++++++
 7 files changed, 73 insertions(+), 17 deletions(-)
 create mode 100644 docs/changelog/109653.yaml

diff --git a/docs/changelog/109653.yaml b/docs/changelog/109653.yaml
new file mode 100644
index 0000000000000..665163ec2a91b
--- /dev/null
+++ b/docs/changelog/109653.yaml
@@ -0,0 +1,5 @@
+pr: 109653
+summary: Handle the "JSON memory allocator bytes" field
+area: Machine Learning
+type: enhancement
+issues: []
diff --git a/docs/reference/cat/anomaly-detectors.asciidoc b/docs/reference/cat/anomaly-detectors.asciidoc
index 607a88d1e1a5c..3416c256881af 100644
--- a/docs/reference/cat/anomaly-detectors.asciidoc
+++ b/docs/reference/cat/anomaly-detectors.asciidoc
@@ -7,9 +7,9 @@
 
 [IMPORTANT]
 ====
-cat APIs are only intended for human consumption using the command line or {kib} 
-console. They are _not_ intended for use by applications. For application 
-consumption, use the 
+cat APIs are only intended for human consumption using the command line or {kib}
+console. They are _not_ intended for use by applications. For application
+consumption, use the
 <<ml-get-job-stats,get anomaly detection job statistics API>>.
 ====
 
@@ -137,7 +137,7 @@ include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=sparse-bucket-count]
 
 `forecasts.memory.avg`, `fmavg`, `forecastsMemoryAvg`:::
 The average memory usage in bytes for forecasts related to the {anomaly-job}.
-  
+
 `forecasts.memory.max`, `fmmax`, `forecastsMemoryMax`:::
 The maximum memory usage in bytes for forecasts related to the {anomaly-job}.
 
@@ -145,8 +145,8 @@ The maximum memory usage in bytes for forecasts related to the {anomaly-job}.
 The minimum memory usage in bytes for forecasts related to the {anomaly-job}.
 
 `forecasts.memory.total`, `fmt`, `forecastsMemoryTotal`:::
-The total memory usage in bytes for forecasts related to the {anomaly-job}.                      
-  
+The total memory usage in bytes for forecasts related to the {anomaly-job}.
+
 `forecasts.records.avg`, `fravg`, `forecastsRecordsAvg`:::
 The average number of `model_forecast` documents written for forecasts related
 to the {anomaly-job}.
@@ -161,8 +161,8 @@ to the {anomaly-job}.
 
 `forecasts.records.total`, `frt`, `forecastsRecordsTotal`:::
 The total number of `model_forecast` documents written for forecasts related to
-the {anomaly-job}.                         
-                                                   
+the {anomaly-job}.
+
 `forecasts.time.avg`, `ftavg`, `forecastsTimeAvg`:::
 The average runtime in milliseconds for forecasts related to the {anomaly-job}.
 
@@ -198,7 +198,7 @@ include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=model-bytes-exceeded]
 
 `model.categorization_status`, `mcs`, `modelCategorizationStatus`:::
 include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=categorization-status]
-                         
+
 `model.categorized_doc_count`, `mcdc`, `modelCategorizedDocCount`:::
 include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=categorized-doc-count]
 
@@ -221,6 +221,9 @@ include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=model-memory-limit-anomaly-jobs]
 (Default)
 include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=model-memory-status]
 
+`model.output_memory_allocator_bytes`, `momab`, `modelOutputMemoryAllocatorBytes`:::
+include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=output-memory-allocator-bytes]
+
 `model.over_fields`, `mof`, `modelOverFields`:::
 include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=total-over-field-count]
 
@@ -232,10 +235,10 @@ include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=rare-category-count]
 
 `model.timestamp`, `mt`, `modelTimestamp`:::
 include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=model-timestamp]
-                                                           
+
 `model.total_category_count`, `mtcc`, `modelTotalCategoryCount`:::
 include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=total-category-count]
-                            
+
 `node.address`, `na`, `nodeAddress`:::
 The network address of the node.
 +
@@ -261,7 +264,7 @@ include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=open-time]
 
 `state`, `s`:::
 (Default)
-include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=state-anomaly-job] 
+include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=state-anomaly-job]
 
 include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=help]
 
diff --git a/docs/reference/ml/ml-shared.asciidoc b/docs/reference/ml/ml-shared.asciidoc
index 6bbc98db1c2e1..a69fd2f1812e9 100644
--- a/docs/reference/ml/ml-shared.asciidoc
+++ b/docs/reference/ml/ml-shared.asciidoc
@@ -430,16 +430,16 @@ end::daily-model-snapshot-retention-after-days[]
 
 tag::data-description[]
 The data description defines the format of the input data when you send data to
-the job by using the <<ml-post-data,post data>> API. Note that when using a 
-{dfeed}, only the `time_field` needs to be set, the rest of the properties are 
-automatically set. When data is received via the <<ml-post-data,post data>> API, 
+the job by using the <<ml-post-data,post data>> API. Note that when using a
+{dfeed}, only the `time_field` needs to be set, the rest of the properties are
+automatically set. When data is received via the <<ml-post-data,post data>> API,
 it is not stored in {es}. Only the results for {anomaly-detect} are retained.
 +
 .Properties of `data_description`
 [%collapsible%open]
 ====
 `format`:::
-  (string) Only `xcontent` format is supported at this time, and this is the 
+  (string) Only `xcontent` format is supported at this time, and this is the
   default value.
 
 `time_field`:::
@@ -1285,6 +1285,10 @@ tag::job-id-datafeed[]
 The unique identifier for the job to which the {dfeed} sends data.
 end::job-id-datafeed[]
 
+tag::output-memory-allocator-bytes[]
+The amount of memory, in bytes, used to output {anomaly-job} documents.
+end::output-memory-allocator-bytes[]
+
 tag::lambda[]
 Advanced configuration option. Regularization parameter to prevent overfitting
 on the training data set. Multiplies an L2 regularization term which applies to
diff --git a/server/src/main/java/org/elasticsearch/TransportVersions.java b/server/src/main/java/org/elasticsearch/TransportVersions.java
index 3c8a403775802..564831475c960 100644
--- a/server/src/main/java/org/elasticsearch/TransportVersions.java
+++ b/server/src/main/java/org/elasticsearch/TransportVersions.java
@@ -176,7 +176,8 @@ static TransportVersion def(int id) {
     public static final TransportVersion ML_INFERENCE_AZURE_OPENAI_EMBEDDINGS = def(8_634_00_0);
     public static final TransportVersion ILM_SHRINK_ENABLE_WRITE = def(8_635_00_0);
     public static final TransportVersion GEOIP_CACHE_STATS = def(8_636_00_0);
-    public static final TransportVersion SHUTDOWN_REQUEST_TIMEOUTS_FIX_8_14 = def(8_636_00_1);
+    public static final TransportVersion SHUTDOWN_REQUEST_TIMEOUTS_FIX_8_14 = def(8_637_00_1);
+    public static final TransportVersion ML_AD_OUTPUT_MEMORY_ALLOCATOR_FIELD = def(8_638_00_0);
 
     /*
      * STOP! READ THIS FIRST! No, really,
diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/process/autodetect/state/ModelSizeStats.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/process/autodetect/state/ModelSizeStats.java
index 3812c012e2a3d..16eceb1e89a95 100644
--- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/process/autodetect/state/ModelSizeStats.java
+++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/process/autodetect/state/ModelSizeStats.java
@@ -6,6 +6,7 @@
  */
 package org.elasticsearch.xpack.core.ml.job.process.autodetect.state;
 
+import org.elasticsearch.TransportVersions;
 import org.elasticsearch.common.io.stream.StreamInput;
 import org.elasticsearch.common.io.stream.StreamOutput;
 import org.elasticsearch.common.io.stream.Writeable;
@@ -48,6 +49,7 @@ public class ModelSizeStats implements ToXContentObject, Writeable {
     public static final ParseField BUCKET_ALLOCATION_FAILURES_COUNT_FIELD = new ParseField("bucket_allocation_failures_count");
     public static final ParseField MEMORY_STATUS_FIELD = new ParseField("memory_status");
     public static final ParseField ASSIGNMENT_MEMORY_BASIS_FIELD = new ParseField("assignment_memory_basis");
+    public static final ParseField OUTPUT_MEMORY_ALLOCATOR_BYTES_FIELD = new ParseField("output_memory_allocator_bytes");
     public static final ParseField CATEGORIZED_DOC_COUNT_FIELD = new ParseField("categorized_doc_count");
     public static final ParseField TOTAL_CATEGORY_COUNT_FIELD = new ParseField("total_category_count");
     public static final ParseField FREQUENT_CATEGORY_COUNT_FIELD = new ParseField("frequent_category_count");
@@ -85,6 +87,7 @@ private static ConstructingObjectParser<Builder, Void> createParser(boolean igno
             ASSIGNMENT_MEMORY_BASIS_FIELD,
             ValueType.STRING
         );
+        parser.declareLong(Builder::setOutputMemoryAllocatorBytes, OUTPUT_MEMORY_ALLOCATOR_BYTES_FIELD);
         parser.declareLong(Builder::setCategorizedDocCount, CATEGORIZED_DOC_COUNT_FIELD);
         parser.declareLong(Builder::setTotalCategoryCount, TOTAL_CATEGORY_COUNT_FIELD);
         parser.declareLong(Builder::setFrequentCategoryCount, FREQUENT_CATEGORY_COUNT_FIELD);
@@ -188,6 +191,7 @@ public String toString() {
     private final long bucketAllocationFailuresCount;
     private final MemoryStatus memoryStatus;
     private final AssignmentMemoryBasis assignmentMemoryBasis;
+    private final Long outputMemoryAllocatorBytes;
     private final long categorizedDocCount;
     private final long totalCategoryCount;
     private final long frequentCategoryCount;
@@ -210,6 +214,7 @@ private ModelSizeStats(
         long bucketAllocationFailuresCount,
         MemoryStatus memoryStatus,
         AssignmentMemoryBasis assignmentMemoryBasis,
+        Long outputMemoryAllocatorBytes,
         long categorizedDocCount,
         long totalCategoryCount,
         long frequentCategoryCount,
@@ -231,6 +236,7 @@ private ModelSizeStats(
         this.bucketAllocationFailuresCount = bucketAllocationFailuresCount;
         this.memoryStatus = memoryStatus;
         this.assignmentMemoryBasis = assignmentMemoryBasis;
+        this.outputMemoryAllocatorBytes = outputMemoryAllocatorBytes;
         this.categorizedDocCount = categorizedDocCount;
         this.totalCategoryCount = totalCategoryCount;
         this.frequentCategoryCount = frequentCategoryCount;
@@ -258,6 +264,11 @@ public ModelSizeStats(StreamInput in) throws IOException {
         } else {
             assignmentMemoryBasis = null;
         }
+        if (in.getTransportVersion().onOrAfter(TransportVersions.ML_AD_OUTPUT_MEMORY_ALLOCATOR_FIELD)) {
+            outputMemoryAllocatorBytes = in.readOptionalVLong();
+        } else {
+            outputMemoryAllocatorBytes = null;
+        }
         categorizedDocCount = in.readVLong();
         totalCategoryCount = in.readVLong();
         frequentCategoryCount = in.readVLong();
@@ -295,6 +306,9 @@ public void writeTo(StreamOutput out) throws IOException {
         } else {
             out.writeBoolean(false);
         }
+        if (out.getTransportVersion().onOrAfter(TransportVersions.ML_AD_OUTPUT_MEMORY_ALLOCATOR_FIELD)) {
+            out.writeOptionalVLong(outputMemoryAllocatorBytes);
+        }
         out.writeVLong(categorizedDocCount);
         out.writeVLong(totalCategoryCount);
         out.writeVLong(frequentCategoryCount);
@@ -339,6 +353,9 @@ public XContentBuilder doXContentBody(XContentBuilder builder) throws IOExceptio
         if (assignmentMemoryBasis != null) {
             builder.field(ASSIGNMENT_MEMORY_BASIS_FIELD.getPreferredName(), assignmentMemoryBasis);
         }
+        if (outputMemoryAllocatorBytes != null) {
+            builder.field(OUTPUT_MEMORY_ALLOCATOR_BYTES_FIELD.getPreferredName(), outputMemoryAllocatorBytes);
+        }
         builder.field(CATEGORIZED_DOC_COUNT_FIELD.getPreferredName(), categorizedDocCount);
         builder.field(TOTAL_CATEGORY_COUNT_FIELD.getPreferredName(), totalCategoryCount);
         builder.field(FREQUENT_CATEGORY_COUNT_FIELD.getPreferredName(), frequentCategoryCount);
@@ -399,6 +416,10 @@ public AssignmentMemoryBasis getAssignmentMemoryBasis() {
         return assignmentMemoryBasis;
     }
 
+    public Long getOutputMemmoryAllocatorBytes() {
+        return outputMemoryAllocatorBytes;
+    }
+
     public long getCategorizedDocCount() {
         return categorizedDocCount;
     }
@@ -458,6 +479,7 @@ public int hashCode() {
             bucketAllocationFailuresCount,
             memoryStatus,
             assignmentMemoryBasis,
+            outputMemoryAllocatorBytes,
             categorizedDocCount,
             totalCategoryCount,
             frequentCategoryCount,
@@ -495,6 +517,7 @@ public boolean equals(Object other) {
             && this.bucketAllocationFailuresCount == that.bucketAllocationFailuresCount
             && Objects.equals(this.memoryStatus, that.memoryStatus)
             && Objects.equals(this.assignmentMemoryBasis, that.assignmentMemoryBasis)
+            && Objects.equals(this.outputMemoryAllocatorBytes, that.outputMemoryAllocatorBytes)
             && Objects.equals(this.categorizedDocCount, that.categorizedDocCount)
             && Objects.equals(this.totalCategoryCount, that.totalCategoryCount)
             && Objects.equals(this.frequentCategoryCount, that.frequentCategoryCount)
@@ -520,6 +543,7 @@ public static class Builder {
         private long bucketAllocationFailuresCount;
         private MemoryStatus memoryStatus;
         private AssignmentMemoryBasis assignmentMemoryBasis;
+        private Long outputMemoryAllocatorBytes;
         private long categorizedDocCount;
         private long totalCategoryCount;
         private long frequentCategoryCount;
@@ -549,6 +573,7 @@ public Builder(ModelSizeStats modelSizeStats) {
             this.bucketAllocationFailuresCount = modelSizeStats.bucketAllocationFailuresCount;
             this.memoryStatus = modelSizeStats.memoryStatus;
             this.assignmentMemoryBasis = modelSizeStats.assignmentMemoryBasis;
+            this.outputMemoryAllocatorBytes = modelSizeStats.outputMemoryAllocatorBytes;
             this.categorizedDocCount = modelSizeStats.categorizedDocCount;
             this.totalCategoryCount = modelSizeStats.totalCategoryCount;
             this.frequentCategoryCount = modelSizeStats.frequentCategoryCount;
@@ -611,6 +636,11 @@ public Builder setAssignmentMemoryBasis(AssignmentMemoryBasis assignmentMemoryBa
             return this;
         }
 
+        public Builder setOutputMemoryAllocatorBytes(long outputMemoryAllocatorBytes) {
+            this.outputMemoryAllocatorBytes = outputMemoryAllocatorBytes;
+            return this;
+        }
+
         public Builder setCategorizedDocCount(long categorizedDocCount) {
             this.categorizedDocCount = categorizedDocCount;
             return this;
@@ -670,6 +700,7 @@ public ModelSizeStats build() {
                 bucketAllocationFailuresCount,
                 memoryStatus,
                 assignmentMemoryBasis,
+                outputMemoryAllocatorBytes,
                 categorizedDocCount,
                 totalCategoryCount,
                 frequentCategoryCount,
diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/job/process/autodetect/state/ModelSizeStatsTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/job/process/autodetect/state/ModelSizeStatsTests.java
index 8aa5088f41deb..e30b706caa0ef 100644
--- a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/job/process/autodetect/state/ModelSizeStatsTests.java
+++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/job/process/autodetect/state/ModelSizeStatsTests.java
@@ -32,6 +32,7 @@ public void testDefaultConstructor() {
         assertEquals(0, stats.getBucketAllocationFailuresCount());
         assertEquals(MemoryStatus.OK, stats.getMemoryStatus());
         assertNull(stats.getAssignmentMemoryBasis());
+        assertNull(stats.getOutputMemmoryAllocatorBytes());
         assertEquals(0, stats.getCategorizedDocCount());
         assertEquals(0, stats.getTotalCategoryCount());
         assertEquals(0, stats.getFrequentCategoryCount());
diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/rest/cat/RestCatJobsAction.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/rest/cat/RestCatJobsAction.java
index b6b050a10c790..cb02990da74c9 100644
--- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/rest/cat/RestCatJobsAction.java
+++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/rest/cat/RestCatJobsAction.java
@@ -213,6 +213,12 @@ protected Table getTableWithHeader(RestRequest request) {
                 .setAliases("mbaf", "modelBucketAllocationFailures")
                 .build()
         );
+        table.addCell(
+            "model.output_memory_allocator_bytes",
+            TableColumnAttributeBuilder.builder("how many bytes have been used to output the model documents", false)
+                .setAliases("momab", "modelOutputMemoryAllocatorBytes")
+                .build()
+        );
         table.addCell(
             "model.categorization_status",
             TableColumnAttributeBuilder.builder("current categorization status", false)
@@ -416,6 +422,11 @@ private Table buildTable(RestRequest request, Response jobStats) {
             table.addCell(modelSizeStats == null ? null : modelSizeStats.getTotalPartitionFieldCount());
             table.addCell(modelSizeStats == null ? null : modelSizeStats.getBucketAllocationFailuresCount());
             table.addCell(modelSizeStats == null ? null : modelSizeStats.getCategorizationStatus().toString());
+            table.addCell(
+                modelSizeStats == null || modelSizeStats.getOutputMemmoryAllocatorBytes() == null
+                    ? null
+                    : ByteSizeValue.ofBytes(modelSizeStats.getOutputMemmoryAllocatorBytes())
+            );
             table.addCell(modelSizeStats == null ? null : modelSizeStats.getCategorizedDocCount());
             table.addCell(modelSizeStats == null ? null : modelSizeStats.getTotalCategoryCount());
             table.addCell(modelSizeStats == null ? null : modelSizeStats.getFrequentCategoryCount());