From eee356607b21aecffb297aa56e72ded12a3a6664 Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Tue, 18 Jun 2024 15:25:05 +1200 Subject: [PATCH] [ML] Handle the "output memory allocator bytes" field (#109653) Handle the "output memory allocator bytes" field if and only if it is present in the model size stats, as reported by the C++ backend. This PR _must_ be merged prior to the corresponding `ml-cpp` one, to keep CI tests happy. --- docs/changelog/109653.yaml | 5 +++ docs/reference/cat/anomaly-detectors.asciidoc | 27 +++++++++------- docs/reference/ml/ml-shared.asciidoc | 12 ++++--- .../org/elasticsearch/TransportVersions.java | 3 +- .../autodetect/state/ModelSizeStats.java | 31 +++++++++++++++++++ .../autodetect/state/ModelSizeStatsTests.java | 1 + .../xpack/ml/rest/cat/RestCatJobsAction.java | 11 +++++++ 7 files changed, 73 insertions(+), 17 deletions(-) create mode 100644 docs/changelog/109653.yaml diff --git a/docs/changelog/109653.yaml b/docs/changelog/109653.yaml new file mode 100644 index 0000000000000..665163ec2a91b --- /dev/null +++ b/docs/changelog/109653.yaml @@ -0,0 +1,5 @@ +pr: 109653 +summary: Handle the "JSON memory allocator bytes" field +area: Machine Learning +type: enhancement +issues: [] diff --git a/docs/reference/cat/anomaly-detectors.asciidoc b/docs/reference/cat/anomaly-detectors.asciidoc index 607a88d1e1a5c..3416c256881af 100644 --- a/docs/reference/cat/anomaly-detectors.asciidoc +++ b/docs/reference/cat/anomaly-detectors.asciidoc @@ -7,9 +7,9 @@ [IMPORTANT] ==== -cat APIs are only intended for human consumption using the command line or {kib} -console. They are _not_ intended for use by applications. For application -consumption, use the +cat APIs are only intended for human consumption using the command line or {kib} +console. They are _not_ intended for use by applications. For application +consumption, use the <>. ==== @@ -137,7 +137,7 @@ include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=sparse-bucket-count] `forecasts.memory.avg`, `fmavg`, `forecastsMemoryAvg`::: The average memory usage in bytes for forecasts related to the {anomaly-job}. - + `forecasts.memory.max`, `fmmax`, `forecastsMemoryMax`::: The maximum memory usage in bytes for forecasts related to the {anomaly-job}. @@ -145,8 +145,8 @@ The maximum memory usage in bytes for forecasts related to the {anomaly-job}. The minimum memory usage in bytes for forecasts related to the {anomaly-job}. `forecasts.memory.total`, `fmt`, `forecastsMemoryTotal`::: -The total memory usage in bytes for forecasts related to the {anomaly-job}. - +The total memory usage in bytes for forecasts related to the {anomaly-job}. + `forecasts.records.avg`, `fravg`, `forecastsRecordsAvg`::: The average number of `model_forecast` documents written for forecasts related to the {anomaly-job}. @@ -161,8 +161,8 @@ to the {anomaly-job}. `forecasts.records.total`, `frt`, `forecastsRecordsTotal`::: The total number of `model_forecast` documents written for forecasts related to -the {anomaly-job}. - +the {anomaly-job}. + `forecasts.time.avg`, `ftavg`, `forecastsTimeAvg`::: The average runtime in milliseconds for forecasts related to the {anomaly-job}. @@ -198,7 +198,7 @@ include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=model-bytes-exceeded] `model.categorization_status`, `mcs`, `modelCategorizationStatus`::: include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=categorization-status] - + `model.categorized_doc_count`, `mcdc`, `modelCategorizedDocCount`::: include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=categorized-doc-count] @@ -221,6 +221,9 @@ include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=model-memory-limit-anomaly-jobs] (Default) include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=model-memory-status] +`model.output_memory_allocator_bytes`, `momab`, `modelOutputMemoryAllocatorBytes`::: +include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=output-memory-allocator-bytes] + `model.over_fields`, `mof`, `modelOverFields`::: include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=total-over-field-count] @@ -232,10 +235,10 @@ include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=rare-category-count] `model.timestamp`, `mt`, `modelTimestamp`::: include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=model-timestamp] - + `model.total_category_count`, `mtcc`, `modelTotalCategoryCount`::: include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=total-category-count] - + `node.address`, `na`, `nodeAddress`::: The network address of the node. + @@ -261,7 +264,7 @@ include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=open-time] `state`, `s`::: (Default) -include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=state-anomaly-job] +include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=state-anomaly-job] include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=help] diff --git a/docs/reference/ml/ml-shared.asciidoc b/docs/reference/ml/ml-shared.asciidoc index 6bbc98db1c2e1..a69fd2f1812e9 100644 --- a/docs/reference/ml/ml-shared.asciidoc +++ b/docs/reference/ml/ml-shared.asciidoc @@ -430,16 +430,16 @@ end::daily-model-snapshot-retention-after-days[] tag::data-description[] The data description defines the format of the input data when you send data to -the job by using the <> API. Note that when using a -{dfeed}, only the `time_field` needs to be set, the rest of the properties are -automatically set. When data is received via the <> API, +the job by using the <> API. Note that when using a +{dfeed}, only the `time_field` needs to be set, the rest of the properties are +automatically set. When data is received via the <> API, it is not stored in {es}. Only the results for {anomaly-detect} are retained. + .Properties of `data_description` [%collapsible%open] ==== `format`::: - (string) Only `xcontent` format is supported at this time, and this is the + (string) Only `xcontent` format is supported at this time, and this is the default value. `time_field`::: @@ -1285,6 +1285,10 @@ tag::job-id-datafeed[] The unique identifier for the job to which the {dfeed} sends data. end::job-id-datafeed[] +tag::output-memory-allocator-bytes[] +The amount of memory, in bytes, used to output {anomaly-job} documents. +end::output-memory-allocator-bytes[] + tag::lambda[] Advanced configuration option. Regularization parameter to prevent overfitting on the training data set. Multiplies an L2 regularization term which applies to diff --git a/server/src/main/java/org/elasticsearch/TransportVersions.java b/server/src/main/java/org/elasticsearch/TransportVersions.java index 3c8a403775802..564831475c960 100644 --- a/server/src/main/java/org/elasticsearch/TransportVersions.java +++ b/server/src/main/java/org/elasticsearch/TransportVersions.java @@ -176,7 +176,8 @@ static TransportVersion def(int id) { public static final TransportVersion ML_INFERENCE_AZURE_OPENAI_EMBEDDINGS = def(8_634_00_0); public static final TransportVersion ILM_SHRINK_ENABLE_WRITE = def(8_635_00_0); public static final TransportVersion GEOIP_CACHE_STATS = def(8_636_00_0); - public static final TransportVersion SHUTDOWN_REQUEST_TIMEOUTS_FIX_8_14 = def(8_636_00_1); + public static final TransportVersion SHUTDOWN_REQUEST_TIMEOUTS_FIX_8_14 = def(8_637_00_1); + public static final TransportVersion ML_AD_OUTPUT_MEMORY_ALLOCATOR_FIELD = def(8_638_00_0); /* * STOP! READ THIS FIRST! No, really, diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/process/autodetect/state/ModelSizeStats.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/process/autodetect/state/ModelSizeStats.java index 3812c012e2a3d..16eceb1e89a95 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/process/autodetect/state/ModelSizeStats.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/process/autodetect/state/ModelSizeStats.java @@ -6,6 +6,7 @@ */ package org.elasticsearch.xpack.core.ml.job.process.autodetect.state; +import org.elasticsearch.TransportVersions; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.io.stream.Writeable; @@ -48,6 +49,7 @@ public class ModelSizeStats implements ToXContentObject, Writeable { public static final ParseField BUCKET_ALLOCATION_FAILURES_COUNT_FIELD = new ParseField("bucket_allocation_failures_count"); public static final ParseField MEMORY_STATUS_FIELD = new ParseField("memory_status"); public static final ParseField ASSIGNMENT_MEMORY_BASIS_FIELD = new ParseField("assignment_memory_basis"); + public static final ParseField OUTPUT_MEMORY_ALLOCATOR_BYTES_FIELD = new ParseField("output_memory_allocator_bytes"); public static final ParseField CATEGORIZED_DOC_COUNT_FIELD = new ParseField("categorized_doc_count"); public static final ParseField TOTAL_CATEGORY_COUNT_FIELD = new ParseField("total_category_count"); public static final ParseField FREQUENT_CATEGORY_COUNT_FIELD = new ParseField("frequent_category_count"); @@ -85,6 +87,7 @@ private static ConstructingObjectParser createParser(boolean igno ASSIGNMENT_MEMORY_BASIS_FIELD, ValueType.STRING ); + parser.declareLong(Builder::setOutputMemoryAllocatorBytes, OUTPUT_MEMORY_ALLOCATOR_BYTES_FIELD); parser.declareLong(Builder::setCategorizedDocCount, CATEGORIZED_DOC_COUNT_FIELD); parser.declareLong(Builder::setTotalCategoryCount, TOTAL_CATEGORY_COUNT_FIELD); parser.declareLong(Builder::setFrequentCategoryCount, FREQUENT_CATEGORY_COUNT_FIELD); @@ -188,6 +191,7 @@ public String toString() { private final long bucketAllocationFailuresCount; private final MemoryStatus memoryStatus; private final AssignmentMemoryBasis assignmentMemoryBasis; + private final Long outputMemoryAllocatorBytes; private final long categorizedDocCount; private final long totalCategoryCount; private final long frequentCategoryCount; @@ -210,6 +214,7 @@ private ModelSizeStats( long bucketAllocationFailuresCount, MemoryStatus memoryStatus, AssignmentMemoryBasis assignmentMemoryBasis, + Long outputMemoryAllocatorBytes, long categorizedDocCount, long totalCategoryCount, long frequentCategoryCount, @@ -231,6 +236,7 @@ private ModelSizeStats( this.bucketAllocationFailuresCount = bucketAllocationFailuresCount; this.memoryStatus = memoryStatus; this.assignmentMemoryBasis = assignmentMemoryBasis; + this.outputMemoryAllocatorBytes = outputMemoryAllocatorBytes; this.categorizedDocCount = categorizedDocCount; this.totalCategoryCount = totalCategoryCount; this.frequentCategoryCount = frequentCategoryCount; @@ -258,6 +264,11 @@ public ModelSizeStats(StreamInput in) throws IOException { } else { assignmentMemoryBasis = null; } + if (in.getTransportVersion().onOrAfter(TransportVersions.ML_AD_OUTPUT_MEMORY_ALLOCATOR_FIELD)) { + outputMemoryAllocatorBytes = in.readOptionalVLong(); + } else { + outputMemoryAllocatorBytes = null; + } categorizedDocCount = in.readVLong(); totalCategoryCount = in.readVLong(); frequentCategoryCount = in.readVLong(); @@ -295,6 +306,9 @@ public void writeTo(StreamOutput out) throws IOException { } else { out.writeBoolean(false); } + if (out.getTransportVersion().onOrAfter(TransportVersions.ML_AD_OUTPUT_MEMORY_ALLOCATOR_FIELD)) { + out.writeOptionalVLong(outputMemoryAllocatorBytes); + } out.writeVLong(categorizedDocCount); out.writeVLong(totalCategoryCount); out.writeVLong(frequentCategoryCount); @@ -339,6 +353,9 @@ public XContentBuilder doXContentBody(XContentBuilder builder) throws IOExceptio if (assignmentMemoryBasis != null) { builder.field(ASSIGNMENT_MEMORY_BASIS_FIELD.getPreferredName(), assignmentMemoryBasis); } + if (outputMemoryAllocatorBytes != null) { + builder.field(OUTPUT_MEMORY_ALLOCATOR_BYTES_FIELD.getPreferredName(), outputMemoryAllocatorBytes); + } builder.field(CATEGORIZED_DOC_COUNT_FIELD.getPreferredName(), categorizedDocCount); builder.field(TOTAL_CATEGORY_COUNT_FIELD.getPreferredName(), totalCategoryCount); builder.field(FREQUENT_CATEGORY_COUNT_FIELD.getPreferredName(), frequentCategoryCount); @@ -399,6 +416,10 @@ public AssignmentMemoryBasis getAssignmentMemoryBasis() { return assignmentMemoryBasis; } + public Long getOutputMemmoryAllocatorBytes() { + return outputMemoryAllocatorBytes; + } + public long getCategorizedDocCount() { return categorizedDocCount; } @@ -458,6 +479,7 @@ public int hashCode() { bucketAllocationFailuresCount, memoryStatus, assignmentMemoryBasis, + outputMemoryAllocatorBytes, categorizedDocCount, totalCategoryCount, frequentCategoryCount, @@ -495,6 +517,7 @@ public boolean equals(Object other) { && this.bucketAllocationFailuresCount == that.bucketAllocationFailuresCount && Objects.equals(this.memoryStatus, that.memoryStatus) && Objects.equals(this.assignmentMemoryBasis, that.assignmentMemoryBasis) + && Objects.equals(this.outputMemoryAllocatorBytes, that.outputMemoryAllocatorBytes) && Objects.equals(this.categorizedDocCount, that.categorizedDocCount) && Objects.equals(this.totalCategoryCount, that.totalCategoryCount) && Objects.equals(this.frequentCategoryCount, that.frequentCategoryCount) @@ -520,6 +543,7 @@ public static class Builder { private long bucketAllocationFailuresCount; private MemoryStatus memoryStatus; private AssignmentMemoryBasis assignmentMemoryBasis; + private Long outputMemoryAllocatorBytes; private long categorizedDocCount; private long totalCategoryCount; private long frequentCategoryCount; @@ -549,6 +573,7 @@ public Builder(ModelSizeStats modelSizeStats) { this.bucketAllocationFailuresCount = modelSizeStats.bucketAllocationFailuresCount; this.memoryStatus = modelSizeStats.memoryStatus; this.assignmentMemoryBasis = modelSizeStats.assignmentMemoryBasis; + this.outputMemoryAllocatorBytes = modelSizeStats.outputMemoryAllocatorBytes; this.categorizedDocCount = modelSizeStats.categorizedDocCount; this.totalCategoryCount = modelSizeStats.totalCategoryCount; this.frequentCategoryCount = modelSizeStats.frequentCategoryCount; @@ -611,6 +636,11 @@ public Builder setAssignmentMemoryBasis(AssignmentMemoryBasis assignmentMemoryBa return this; } + public Builder setOutputMemoryAllocatorBytes(long outputMemoryAllocatorBytes) { + this.outputMemoryAllocatorBytes = outputMemoryAllocatorBytes; + return this; + } + public Builder setCategorizedDocCount(long categorizedDocCount) { this.categorizedDocCount = categorizedDocCount; return this; @@ -670,6 +700,7 @@ public ModelSizeStats build() { bucketAllocationFailuresCount, memoryStatus, assignmentMemoryBasis, + outputMemoryAllocatorBytes, categorizedDocCount, totalCategoryCount, frequentCategoryCount, diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/job/process/autodetect/state/ModelSizeStatsTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/job/process/autodetect/state/ModelSizeStatsTests.java index 8aa5088f41deb..e30b706caa0ef 100644 --- a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/job/process/autodetect/state/ModelSizeStatsTests.java +++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/job/process/autodetect/state/ModelSizeStatsTests.java @@ -32,6 +32,7 @@ public void testDefaultConstructor() { assertEquals(0, stats.getBucketAllocationFailuresCount()); assertEquals(MemoryStatus.OK, stats.getMemoryStatus()); assertNull(stats.getAssignmentMemoryBasis()); + assertNull(stats.getOutputMemmoryAllocatorBytes()); assertEquals(0, stats.getCategorizedDocCount()); assertEquals(0, stats.getTotalCategoryCount()); assertEquals(0, stats.getFrequentCategoryCount()); diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/rest/cat/RestCatJobsAction.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/rest/cat/RestCatJobsAction.java index b6b050a10c790..cb02990da74c9 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/rest/cat/RestCatJobsAction.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/rest/cat/RestCatJobsAction.java @@ -213,6 +213,12 @@ protected Table getTableWithHeader(RestRequest request) { .setAliases("mbaf", "modelBucketAllocationFailures") .build() ); + table.addCell( + "model.output_memory_allocator_bytes", + TableColumnAttributeBuilder.builder("how many bytes have been used to output the model documents", false) + .setAliases("momab", "modelOutputMemoryAllocatorBytes") + .build() + ); table.addCell( "model.categorization_status", TableColumnAttributeBuilder.builder("current categorization status", false) @@ -416,6 +422,11 @@ private Table buildTable(RestRequest request, Response jobStats) { table.addCell(modelSizeStats == null ? null : modelSizeStats.getTotalPartitionFieldCount()); table.addCell(modelSizeStats == null ? null : modelSizeStats.getBucketAllocationFailuresCount()); table.addCell(modelSizeStats == null ? null : modelSizeStats.getCategorizationStatus().toString()); + table.addCell( + modelSizeStats == null || modelSizeStats.getOutputMemmoryAllocatorBytes() == null + ? null + : ByteSizeValue.ofBytes(modelSizeStats.getOutputMemmoryAllocatorBytes()) + ); table.addCell(modelSizeStats == null ? null : modelSizeStats.getCategorizedDocCount()); table.addCell(modelSizeStats == null ? null : modelSizeStats.getTotalCategoryCount()); table.addCell(modelSizeStats == null ? null : modelSizeStats.getFrequentCategoryCount());