[ML] Update vCPUs ranges for start model deployment (#195617)

## Summary #### Different vCPUs ranges and enabling support for static allocations based on the serverless project type - Each serverless config yml, e.g. [search.es.yml](https://github.com/darnautov/kibana/blob/84b3b79a1537fd98b18d1f137b16b532f3f1061f/config/serverless.es.yml#L61) now contains parameters required for start model deployment: ```yml xpack.ml.nlp: enabled: true modelDeployment: allowStaticAllocations: true vCPURange: low: min: 0 max: 2 static: 2 medium: min: 1 max: 32 static: 32 high: min: 1 max: 512 static: 512 ``` Note: _There will be no static allocations option for serverless O11y and serverless Security._ #### The minimum values of vCPUs - 0 for the Low usage level on both serverless and ESS. - 1 for the Medium and High usage levels on both serverless and ESS. #### The default vCPUs usage levels - Low in serverless. - Medium in ESS and on-prem ### Checklist - [x] [Unit or functional tests](https://www.elastic.co/guide/en/kibana/master/development-tests.html) were updated or added to match the most common scenarios
elastic · Oct 14, 2024 · 1389708 · 1389708
1 parent 25d15c9
commit 1389708
Show file tree

Hide file tree

Showing 18 changed files with 493 additions and 75 deletions.
diff --git a/config/serverless.es.yml b/config/serverless.es.yml
@@ -57,7 +57,23 @@ xpack.painless_lab.enabled: false
 
 xpack.ml.ad.enabled: false
 xpack.ml.dfa.enabled: false
-xpack.ml.nlp.enabled: true
+xpack.ml.nlp:
+  enabled: true
+  modelDeployment:
+    allowStaticAllocations: true
+    vCPURange:
+      low:
+        min: 0
+        max: 2
+        static: 2
+      medium:
+        min: 1
+        max: 32
+        static: 32
+      high:
+        min: 1
+        max: 512
+        static: 512
 xpack.ml.compatibleModuleType: 'search'
 
 data_visualizer.resultLinks.fileBeat.enabled: false

diff --git a/config/serverless.oblt.yml b/config/serverless.oblt.yml
@@ -189,7 +189,20 @@ telemetry.labels.serverless: observability
 
 xpack.ml.ad.enabled: true
 xpack.ml.dfa.enabled: false
-xpack.ml.nlp.enabled: true
+xpack.ml.nlp:
+  enabled: true
+  modelDeployment:
+    allowStaticAllocations: false
+    vCPURange:
+      low:
+        min: 0
+        max: 2
+      medium:
+        min: 1
+        max: 32
+      high:
+        min: 1
+        max: 128
 xpack.ml.compatibleModuleType: 'observability'
 
 # Disable the embedded Dev Console

diff --git a/config/serverless.security.yml b/config/serverless.security.yml
@@ -100,7 +100,20 @@ xpack.fleet.packages:
 
 xpack.ml.ad.enabled: true
 xpack.ml.dfa.enabled: true
-xpack.ml.nlp.enabled: true
+xpack.ml.nlp:
+  enabled: true
+  modelDeployment:
+    allowStaticAllocations: false
+    vCPURange:
+      low:
+        min: 0
+        max: 2
+      medium:
+        min: 1
+        max: 32
+      high:
+        min: 1
+        max: 128
 xpack.ml.compatibleModuleType: 'security'
 
 # Disable the embedded Dev Console

diff --git a/test/plugin_functional/test_suites/core_plugins/rendering.ts b/test/plugin_functional/test_suites/core_plugins/rendering.ts
@@ -302,6 +302,16 @@ export default function ({ getService }: PluginFunctionalProviderContext) {
         'xpack.ml.ad.enabled (boolean)',
         'xpack.ml.dfa.enabled (boolean)',
         'xpack.ml.nlp.enabled (boolean)',
+        'xpack.ml.nlp.modelDeployment.allowStaticAllocations (boolean)',
+        'xpack.ml.nlp.modelDeployment.vCPURange.high.max (number)',
+        'xpack.ml.nlp.modelDeployment.vCPURange.high.min (number)',
+        'xpack.ml.nlp.modelDeployment.vCPURange.high.static (number?)',
+        'xpack.ml.nlp.modelDeployment.vCPURange.low.max (number)',
+        'xpack.ml.nlp.modelDeployment.vCPURange.low.min (number)',
+        'xpack.ml.nlp.modelDeployment.vCPURange.low.static (number?)',
+        'xpack.ml.nlp.modelDeployment.vCPURange.medium.max (number)',
+        'xpack.ml.nlp.modelDeployment.vCPURange.medium.min (number)',
+        'xpack.ml.nlp.modelDeployment.vCPURange.medium.static (number?)',
         'xpack.osquery.actionEnabled (boolean?)',
         'xpack.remote_clusters.ui.enabled (boolean?)',
         /**

diff --git a/x-pack/plugins/ml/common/constants/app.ts b/x-pack/plugins/ml/common/constants/app.ts
@@ -20,11 +20,29 @@ export const ML_EXTERNAL_BASE_PATH = '/api/ml';
 export type MlFeatures = Record<'ad' | 'dfa' | 'nlp', boolean>;
 export type CompatibleModule = 'security' | 'observability' | 'search';
 export type ExperimentalFeatures = Record<'ruleFormV2', boolean>;
+export interface ModelDeploymentSettings {
+  allowStaticAllocations: boolean;
+  vCPURange: Record<
+    'low' | 'medium' | 'high',
+    {
+      min: number;
+      max: number;
+      static?: number;
+    }
+  >;
+}
+
+export interface NLPSettings {
+  modelDeployment: ModelDeploymentSettings;
+}
 
 export interface ConfigSchema {
   ad?: { enabled: boolean };
   dfa?: { enabled: boolean };
-  nlp?: { enabled: boolean };
+  nlp?: {
+    enabled: boolean;
+    modelDeployment?: ModelDeploymentSettings;
+  };
   compatibleModuleType?: CompatibleModule;
   experimental?: {
     ruleFormV2?: { enabled: boolean };
@@ -51,3 +69,9 @@ export function initExperimentalFeatures(
     experimentalFeatures.ruleFormV2 = config.experimental.ruleFormV2.enabled;
   }
 }
+
+export function initModelDeploymentSettings(nlpSettings: NLPSettings, config: ConfigSchema) {
+  if (config.nlp?.modelDeployment !== undefined) {
+    nlpSettings.modelDeployment = config.nlp.modelDeployment;
+  }
+}
diff --git a/x-pack/plugins/ml/public/application/app.tsx b/x-pack/plugins/ml/public/application/app.tsx
@@ -19,13 +19,13 @@ import { KibanaRenderContextProvider } from '@kbn/react-kibana-context-render';
 import { StorageContextProvider } from '@kbn/ml-local-storage';
 import useLifecycles from 'react-use/lib/useLifecycles';
 import useObservable from 'react-use/lib/useObservable';
-import type { ExperimentalFeatures, MlFeatures } from '../../common/constants/app';
+import type { ExperimentalFeatures, MlFeatures, NLPSettings } from '../../common/constants/app';
 import { ML_STORAGE_KEYS } from '../../common/types/storage';
 import type { MlSetupDependencies, MlStartDependencies } from '../plugin';
 import { setLicenseCache } from './license';
 import { MlRouter } from './routing';
 import type { PageDependencies } from './routing/router';
-import { EnabledFeaturesContextProvider } from './contexts/ml';
+import { EnabledFeaturesContextProvider, MlServerInfoContextProvider } from './contexts/ml';
 import type { StartServices } from './contexts/kibana';
 import { getMlGlobalServices } from './util/get_services';
 
@@ -42,6 +42,7 @@ interface AppProps {
   isServerless: boolean;
   mlFeatures: MlFeatures;
   experimentalFeatures: ExperimentalFeatures;
+  nlpSettings: NLPSettings;
 }
 
 const localStorage = new Storage(window.localStorage);
@@ -59,6 +60,7 @@ const App: FC<AppProps> = ({
   isServerless,
   mlFeatures,
   experimentalFeatures,
+  nlpSettings,
 }) => {
   const pageDeps: PageDependencies = {
     history: appMountParams.history,
@@ -142,7 +144,9 @@ const App: FC<AppProps> = ({
                 showMLNavMenu={chromeStyle === 'classic'}
                 experimentalFeatures={experimentalFeatures}
               >
-                <MlRouter pageDeps={pageDeps} />
+                <MlServerInfoContextProvider nlpSettings={nlpSettings}>
+                  <MlRouter pageDeps={pageDeps} />
+                </MlServerInfoContextProvider>
               </EnabledFeaturesContextProvider>
             </DatePickerContextProvider>
           </StorageContextProvider>
@@ -158,7 +162,8 @@ export const renderApp = (
   appMountParams: AppMountParameters,
   isServerless: boolean,
   mlFeatures: MlFeatures,
-  experimentalFeatures: ExperimentalFeatures
+  experimentalFeatures: ExperimentalFeatures,
+  nlpSettings: NLPSettings
 ) => {
   appMountParams.onAppLeave((actions) => actions.default());
 
@@ -170,6 +175,7 @@ export const renderApp = (
       isServerless={isServerless}
       mlFeatures={mlFeatures}
       experimentalFeatures={experimentalFeatures}
+      nlpSettings={nlpSettings}
     />,
     appMountParams.element
   );

diff --git a/x-pack/plugins/ml/public/application/contexts/ml/index.ts b/x-pack/plugins/ml/public/application/contexts/ml/index.ts
@@ -7,3 +7,4 @@
 
 export { DataSourceContextProvider, useDataSource } from './data_source_context';
 export { EnabledFeaturesContextProvider, useEnabledFeatures } from './serverless_context';
+export { MlServerInfoContextProvider, useMlServerInfo } from './ml_server_info_context';
diff --git a/x-pack/plugins/ml/public/application/contexts/ml/ml_server_info_context.tsx b/x-pack/plugins/ml/public/application/contexts/ml/ml_server_info_context.tsx
@@ -0,0 +1,39 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+import React, { type FC, type PropsWithChildren, createContext, useContext } from 'react';
+import type { NLPSettings } from '../../../../common/constants/app';
+
+export interface MlServerInfoContextValue {
+  // TODO add ML server info
+  nlpSettings: NLPSettings;
+}
+
+export const MlServerInfoContext = createContext<MlServerInfoContextValue | undefined>(undefined);
+
+export const MlServerInfoContextProvider: FC<PropsWithChildren<MlServerInfoContextValue>> = ({
+  children,
+  nlpSettings,
+}) => {
+  return (
+    <MlServerInfoContext.Provider
+      value={{
+        nlpSettings,
+      }}
+    >
+      {children}
+    </MlServerInfoContext.Provider>
+  );
+};
+
+export function useMlServerInfo() {
+  const context = useContext(MlServerInfoContext);
+  if (context === undefined) {
+    throw new Error('useMlServerInfo must be used within a MlServerInfoContextProvider');
+  }
+  return context;
+}
diff --git a/x-pack/plugins/ml/public/application/model_management/deployment_params_mapper.test.ts b/x-pack/plugins/ml/public/application/model_management/deployment_params_mapper.test.ts
@@ -44,23 +44,82 @@ describe('DeploymentParamsMapper', () => {
 
       it('should get correct VCU levels', () => {
         expect(mapper.getVCURange('low')).toEqual({
-          min: 8,
+          min: 0,
           max: 16,
           static: 16,
         });
         expect(mapper.getVCURange('medium')).toEqual({
-          min: 24,
+          min: 8,
           max: 256,
           static: 256,
         });
         expect(mapper.getVCURange('high')).toEqual({
-          min: 264,
-          max: 4000,
-          static: 800,
+          min: 8,
+          max: 4096,
+          static: 4096,
         });
       });
 
-      it('should enforce adaptive allocations', () => {
+      it('maps UI params to API correctly', () => {
+        expect(
+          mapper.mapUiToApiDeploymentParams({
+            deploymentId: 'test-deployment',
+            optimized: 'optimizedForSearch',
+            adaptiveResources: false,
+            vCPUUsage: 'low',
+          })
+        ).toEqual({
+          number_of_allocations: 1,
+          deployment_id: 'test-deployment',
+          model_id: 'test-model',
+          priority: 'normal',
+          threads_per_allocation: 2,
+        });
+
+        expect(
+          mapper.mapUiToApiDeploymentParams({
+            deploymentId: 'test-deployment',
+            optimized: 'optimizedForIngest',
+            adaptiveResources: false,
+            vCPUUsage: 'low',
+          })
+        ).toEqual({
+          deployment_id: 'test-deployment',
+          model_id: 'test-model',
+          priority: 'normal',
+          threads_per_allocation: 1,
+          number_of_allocations: 2,
+        });
+      });
+
+      it('overrides vCPUs levels and enforces adaptive allocations if static support is not configured', () => {
+        mapper = new DeploymentParamsMapper(modelId, mlServerLimits, cloudInfo, false, {
+          modelDeployment: {
+            allowStaticAllocations: false,
+            vCPURange: {
+              low: { min: 0, max: 2, static: 2 },
+              medium: { min: 1, max: 32, static: 32 },
+              high: { min: 1, max: 128, static: 128 },
+            },
+          },
+        });
+
+        expect(mapper.getVCURange('low')).toEqual({
+          min: 0,
+          max: 16,
+          static: 16,
+        });
+        expect(mapper.getVCURange('medium')).toEqual({
+          min: 8,
+          max: 256,
+          static: 256,
+        });
+        expect(mapper.getVCURange('high')).toEqual({
+          min: 8,
+          max: 1024,
+          static: 1024,
+        });
+
         expect(
           mapper.mapUiToApiDeploymentParams({
             deploymentId: 'test-deployment',
@@ -72,7 +131,7 @@ describe('DeploymentParamsMapper', () => {
           adaptive_allocations: {
             enabled: true,
             max_number_of_allocations: 1,
-            min_number_of_allocations: 1,
+            min_number_of_allocations: 0,
           },
           deployment_id: 'test-deployment',
           model_id: 'test-model',
@@ -88,15 +147,15 @@ describe('DeploymentParamsMapper', () => {
             vCPUUsage: 'low',
           })
         ).toEqual({
-          adaptive_allocations: {
-            enabled: true,
-            max_number_of_allocations: 2,
-            min_number_of_allocations: 1,
-          },
           deployment_id: 'test-deployment',
           model_id: 'test-model',
           priority: 'normal',
           threads_per_allocation: 1,
+          adaptive_allocations: {
+            enabled: true,
+            max_number_of_allocations: 2,
+            min_number_of_allocations: 0,
+          },
         });
       });
     });
@@ -468,7 +527,7 @@ describe('DeploymentParamsMapper', () => {
           threads_per_allocation: 2,
           adaptive_allocations: {
             enabled: true,
-            min_number_of_allocations: 1,
+            min_number_of_allocations: 0,
             max_number_of_allocations: 1,
           },
         });
@@ -507,7 +566,7 @@ describe('DeploymentParamsMapper', () => {
           adaptive_allocations: {
             enabled: true,
             max_number_of_allocations: 12499,
-            min_number_of_allocations: 4,
+            min_number_of_allocations: 1,
           },
         });
 
@@ -525,7 +584,7 @@ describe('DeploymentParamsMapper', () => {
           threads_per_allocation: 1,
           adaptive_allocations: {
             enabled: true,
-            min_number_of_allocations: 1,
+            min_number_of_allocations: 0,
             max_number_of_allocations: 2,
           },
         });
@@ -544,7 +603,7 @@ describe('DeploymentParamsMapper', () => {
           threads_per_allocation: 1,
           adaptive_allocations: {
             enabled: true,
-            min_number_of_allocations: 3,
+            min_number_of_allocations: 1,
             max_number_of_allocations: 32,
           },
         });
@@ -563,7 +622,7 @@ describe('DeploymentParamsMapper', () => {
           threads_per_allocation: 1,
           adaptive_allocations: {
             enabled: true,
-            min_number_of_allocations: 33,
+            min_number_of_allocations: 1,
             max_number_of_allocations: 99999,
           },
         });
Original file line number	Diff line number	Diff line change
Expand Up		@@ -7,3 +7,4 @@

		export { DataSourceContextProvider, useDataSource } from './data_source_context';
		export { EnabledFeaturesContextProvider, useEnabledFeatures } from './serverless_context';
		export { MlServerInfoContextProvider, useMlServerInfo } from './ml_server_info_context';