From 13897083dc8c465a16ddb3856d4e4904b9629610 Mon Sep 17 00:00:00 2001 From: Dima Arnautov Date: Mon, 14 Oct 2024 16:38:26 +0200 Subject: [PATCH] [ML] Update vCPUs ranges for start model deployment (#195617) ## Summary #### Different vCPUs ranges and enabling support for static allocations based on the serverless project type - Each serverless config yml, e.g. [search.es.yml](https://github.com/darnautov/kibana/blob/84b3b79a1537fd98b18d1f137b16b532f3f1061f/config/serverless.es.yml#L61) now contains parameters required for start model deployment: ```yml xpack.ml.nlp: enabled: true modelDeployment: allowStaticAllocations: true vCPURange: low: min: 0 max: 2 static: 2 medium: min: 1 max: 32 static: 32 high: min: 1 max: 512 static: 512 ``` Note: _There will be no static allocations option for serverless O11y and serverless Security._ #### The minimum values of vCPUs - 0 for the Low usage level on both serverless and ESS. - 1 for the Medium and High usage levels on both serverless and ESS. #### The default vCPUs usage levels - Low in serverless. - Medium in ESS and on-prem ### Checklist - [x] [Unit or functional tests](https://www.elastic.co/guide/en/kibana/master/development-tests.html) were updated or added to match the most common scenarios --- config/serverless.es.yml | 18 +++- config/serverless.oblt.yml | 15 ++- config/serverless.security.yml | 15 ++- .../test_suites/core_plugins/rendering.ts | 10 ++ x-pack/plugins/ml/common/constants/app.ts | 26 +++++- x-pack/plugins/ml/public/application/app.tsx | 14 ++- .../public/application/contexts/ml/index.ts | 1 + .../contexts/ml/ml_server_info_context.tsx | 39 ++++++++ .../deployment_params_mapper.test.ts | 93 +++++++++++++++---- .../deployment_params_mapper.ts | 68 +++++++++----- .../model_management/deployment_setup.tsx | 55 ++++++++--- .../model_management/model_actions.tsx | 15 ++- x-pack/plugins/ml/public/plugin.ts | 25 ++++- x-pack/plugins/ml/server/config_schema.ts | 22 ++++- x-pack/plugins/ml/server/index.ts | 5 +- .../services/ml/trained_models_table.ts | 62 +++++++++++-- .../search/ml/trained_models_list.ts | 47 +++++++++- .../security/ml/trained_models_list.ts | 38 +++++++- 18 files changed, 493 insertions(+), 75 deletions(-) create mode 100644 x-pack/plugins/ml/public/application/contexts/ml/ml_server_info_context.tsx diff --git a/config/serverless.es.yml b/config/serverless.es.yml index ade2b7da90270..326b5f2d403bd 100644 --- a/config/serverless.es.yml +++ b/config/serverless.es.yml @@ -57,7 +57,23 @@ xpack.painless_lab.enabled: false xpack.ml.ad.enabled: false xpack.ml.dfa.enabled: false -xpack.ml.nlp.enabled: true +xpack.ml.nlp: + enabled: true + modelDeployment: + allowStaticAllocations: true + vCPURange: + low: + min: 0 + max: 2 + static: 2 + medium: + min: 1 + max: 32 + static: 32 + high: + min: 1 + max: 512 + static: 512 xpack.ml.compatibleModuleType: 'search' data_visualizer.resultLinks.fileBeat.enabled: false diff --git a/config/serverless.oblt.yml b/config/serverless.oblt.yml index 67b0cfe6ab4d5..f7e5290717cb3 100644 --- a/config/serverless.oblt.yml +++ b/config/serverless.oblt.yml @@ -189,7 +189,20 @@ telemetry.labels.serverless: observability xpack.ml.ad.enabled: true xpack.ml.dfa.enabled: false -xpack.ml.nlp.enabled: true +xpack.ml.nlp: + enabled: true + modelDeployment: + allowStaticAllocations: false + vCPURange: + low: + min: 0 + max: 2 + medium: + min: 1 + max: 32 + high: + min: 1 + max: 128 xpack.ml.compatibleModuleType: 'observability' # Disable the embedded Dev Console diff --git a/config/serverless.security.yml b/config/serverless.security.yml index ced84714e6eaf..9244b51702f9c 100644 --- a/config/serverless.security.yml +++ b/config/serverless.security.yml @@ -100,7 +100,20 @@ xpack.fleet.packages: xpack.ml.ad.enabled: true xpack.ml.dfa.enabled: true -xpack.ml.nlp.enabled: true +xpack.ml.nlp: + enabled: true + modelDeployment: + allowStaticAllocations: false + vCPURange: + low: + min: 0 + max: 2 + medium: + min: 1 + max: 32 + high: + min: 1 + max: 128 xpack.ml.compatibleModuleType: 'security' # Disable the embedded Dev Console diff --git a/test/plugin_functional/test_suites/core_plugins/rendering.ts b/test/plugin_functional/test_suites/core_plugins/rendering.ts index 72d1f97011274..02355c97823cf 100644 --- a/test/plugin_functional/test_suites/core_plugins/rendering.ts +++ b/test/plugin_functional/test_suites/core_plugins/rendering.ts @@ -302,6 +302,16 @@ export default function ({ getService }: PluginFunctionalProviderContext) { 'xpack.ml.ad.enabled (boolean)', 'xpack.ml.dfa.enabled (boolean)', 'xpack.ml.nlp.enabled (boolean)', + 'xpack.ml.nlp.modelDeployment.allowStaticAllocations (boolean)', + 'xpack.ml.nlp.modelDeployment.vCPURange.high.max (number)', + 'xpack.ml.nlp.modelDeployment.vCPURange.high.min (number)', + 'xpack.ml.nlp.modelDeployment.vCPURange.high.static (number?)', + 'xpack.ml.nlp.modelDeployment.vCPURange.low.max (number)', + 'xpack.ml.nlp.modelDeployment.vCPURange.low.min (number)', + 'xpack.ml.nlp.modelDeployment.vCPURange.low.static (number?)', + 'xpack.ml.nlp.modelDeployment.vCPURange.medium.max (number)', + 'xpack.ml.nlp.modelDeployment.vCPURange.medium.min (number)', + 'xpack.ml.nlp.modelDeployment.vCPURange.medium.static (number?)', 'xpack.osquery.actionEnabled (boolean?)', 'xpack.remote_clusters.ui.enabled (boolean?)', /** diff --git a/x-pack/plugins/ml/common/constants/app.ts b/x-pack/plugins/ml/common/constants/app.ts index dd41353184fd4..276fb10576fc5 100644 --- a/x-pack/plugins/ml/common/constants/app.ts +++ b/x-pack/plugins/ml/common/constants/app.ts @@ -20,11 +20,29 @@ export const ML_EXTERNAL_BASE_PATH = '/api/ml'; export type MlFeatures = Record<'ad' | 'dfa' | 'nlp', boolean>; export type CompatibleModule = 'security' | 'observability' | 'search'; export type ExperimentalFeatures = Record<'ruleFormV2', boolean>; +export interface ModelDeploymentSettings { + allowStaticAllocations: boolean; + vCPURange: Record< + 'low' | 'medium' | 'high', + { + min: number; + max: number; + static?: number; + } + >; +} + +export interface NLPSettings { + modelDeployment: ModelDeploymentSettings; +} export interface ConfigSchema { ad?: { enabled: boolean }; dfa?: { enabled: boolean }; - nlp?: { enabled: boolean }; + nlp?: { + enabled: boolean; + modelDeployment?: ModelDeploymentSettings; + }; compatibleModuleType?: CompatibleModule; experimental?: { ruleFormV2?: { enabled: boolean }; @@ -51,3 +69,9 @@ export function initExperimentalFeatures( experimentalFeatures.ruleFormV2 = config.experimental.ruleFormV2.enabled; } } + +export function initModelDeploymentSettings(nlpSettings: NLPSettings, config: ConfigSchema) { + if (config.nlp?.modelDeployment !== undefined) { + nlpSettings.modelDeployment = config.nlp.modelDeployment; + } +} diff --git a/x-pack/plugins/ml/public/application/app.tsx b/x-pack/plugins/ml/public/application/app.tsx index 6c6402abaee80..d2bc17ab210b9 100644 --- a/x-pack/plugins/ml/public/application/app.tsx +++ b/x-pack/plugins/ml/public/application/app.tsx @@ -19,13 +19,13 @@ import { KibanaRenderContextProvider } from '@kbn/react-kibana-context-render'; import { StorageContextProvider } from '@kbn/ml-local-storage'; import useLifecycles from 'react-use/lib/useLifecycles'; import useObservable from 'react-use/lib/useObservable'; -import type { ExperimentalFeatures, MlFeatures } from '../../common/constants/app'; +import type { ExperimentalFeatures, MlFeatures, NLPSettings } from '../../common/constants/app'; import { ML_STORAGE_KEYS } from '../../common/types/storage'; import type { MlSetupDependencies, MlStartDependencies } from '../plugin'; import { setLicenseCache } from './license'; import { MlRouter } from './routing'; import type { PageDependencies } from './routing/router'; -import { EnabledFeaturesContextProvider } from './contexts/ml'; +import { EnabledFeaturesContextProvider, MlServerInfoContextProvider } from './contexts/ml'; import type { StartServices } from './contexts/kibana'; import { getMlGlobalServices } from './util/get_services'; @@ -42,6 +42,7 @@ interface AppProps { isServerless: boolean; mlFeatures: MlFeatures; experimentalFeatures: ExperimentalFeatures; + nlpSettings: NLPSettings; } const localStorage = new Storage(window.localStorage); @@ -59,6 +60,7 @@ const App: FC = ({ isServerless, mlFeatures, experimentalFeatures, + nlpSettings, }) => { const pageDeps: PageDependencies = { history: appMountParams.history, @@ -142,7 +144,9 @@ const App: FC = ({ showMLNavMenu={chromeStyle === 'classic'} experimentalFeatures={experimentalFeatures} > - + + + @@ -158,7 +162,8 @@ export const renderApp = ( appMountParams: AppMountParameters, isServerless: boolean, mlFeatures: MlFeatures, - experimentalFeatures: ExperimentalFeatures + experimentalFeatures: ExperimentalFeatures, + nlpSettings: NLPSettings ) => { appMountParams.onAppLeave((actions) => actions.default()); @@ -170,6 +175,7 @@ export const renderApp = ( isServerless={isServerless} mlFeatures={mlFeatures} experimentalFeatures={experimentalFeatures} + nlpSettings={nlpSettings} />, appMountParams.element ); diff --git a/x-pack/plugins/ml/public/application/contexts/ml/index.ts b/x-pack/plugins/ml/public/application/contexts/ml/index.ts index d5935bdc2ad97..6b6effcb35e9d 100644 --- a/x-pack/plugins/ml/public/application/contexts/ml/index.ts +++ b/x-pack/plugins/ml/public/application/contexts/ml/index.ts @@ -7,3 +7,4 @@ export { DataSourceContextProvider, useDataSource } from './data_source_context'; export { EnabledFeaturesContextProvider, useEnabledFeatures } from './serverless_context'; +export { MlServerInfoContextProvider, useMlServerInfo } from './ml_server_info_context'; diff --git a/x-pack/plugins/ml/public/application/contexts/ml/ml_server_info_context.tsx b/x-pack/plugins/ml/public/application/contexts/ml/ml_server_info_context.tsx new file mode 100644 index 0000000000000..0105ffff16fc0 --- /dev/null +++ b/x-pack/plugins/ml/public/application/contexts/ml/ml_server_info_context.tsx @@ -0,0 +1,39 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import React, { type FC, type PropsWithChildren, createContext, useContext } from 'react'; +import type { NLPSettings } from '../../../../common/constants/app'; + +export interface MlServerInfoContextValue { + // TODO add ML server info + nlpSettings: NLPSettings; +} + +export const MlServerInfoContext = createContext(undefined); + +export const MlServerInfoContextProvider: FC> = ({ + children, + nlpSettings, +}) => { + return ( + + {children} + + ); +}; + +export function useMlServerInfo() { + const context = useContext(MlServerInfoContext); + if (context === undefined) { + throw new Error('useMlServerInfo must be used within a MlServerInfoContextProvider'); + } + return context; +} diff --git a/x-pack/plugins/ml/public/application/model_management/deployment_params_mapper.test.ts b/x-pack/plugins/ml/public/application/model_management/deployment_params_mapper.test.ts index 25e9417dc6e22..34875b893a867 100644 --- a/x-pack/plugins/ml/public/application/model_management/deployment_params_mapper.test.ts +++ b/x-pack/plugins/ml/public/application/model_management/deployment_params_mapper.test.ts @@ -44,23 +44,82 @@ describe('DeploymentParamsMapper', () => { it('should get correct VCU levels', () => { expect(mapper.getVCURange('low')).toEqual({ - min: 8, + min: 0, max: 16, static: 16, }); expect(mapper.getVCURange('medium')).toEqual({ - min: 24, + min: 8, max: 256, static: 256, }); expect(mapper.getVCURange('high')).toEqual({ - min: 264, - max: 4000, - static: 800, + min: 8, + max: 4096, + static: 4096, }); }); - it('should enforce adaptive allocations', () => { + it('maps UI params to API correctly', () => { + expect( + mapper.mapUiToApiDeploymentParams({ + deploymentId: 'test-deployment', + optimized: 'optimizedForSearch', + adaptiveResources: false, + vCPUUsage: 'low', + }) + ).toEqual({ + number_of_allocations: 1, + deployment_id: 'test-deployment', + model_id: 'test-model', + priority: 'normal', + threads_per_allocation: 2, + }); + + expect( + mapper.mapUiToApiDeploymentParams({ + deploymentId: 'test-deployment', + optimized: 'optimizedForIngest', + adaptiveResources: false, + vCPUUsage: 'low', + }) + ).toEqual({ + deployment_id: 'test-deployment', + model_id: 'test-model', + priority: 'normal', + threads_per_allocation: 1, + number_of_allocations: 2, + }); + }); + + it('overrides vCPUs levels and enforces adaptive allocations if static support is not configured', () => { + mapper = new DeploymentParamsMapper(modelId, mlServerLimits, cloudInfo, false, { + modelDeployment: { + allowStaticAllocations: false, + vCPURange: { + low: { min: 0, max: 2, static: 2 }, + medium: { min: 1, max: 32, static: 32 }, + high: { min: 1, max: 128, static: 128 }, + }, + }, + }); + + expect(mapper.getVCURange('low')).toEqual({ + min: 0, + max: 16, + static: 16, + }); + expect(mapper.getVCURange('medium')).toEqual({ + min: 8, + max: 256, + static: 256, + }); + expect(mapper.getVCURange('high')).toEqual({ + min: 8, + max: 1024, + static: 1024, + }); + expect( mapper.mapUiToApiDeploymentParams({ deploymentId: 'test-deployment', @@ -72,7 +131,7 @@ describe('DeploymentParamsMapper', () => { adaptive_allocations: { enabled: true, max_number_of_allocations: 1, - min_number_of_allocations: 1, + min_number_of_allocations: 0, }, deployment_id: 'test-deployment', model_id: 'test-model', @@ -88,15 +147,15 @@ describe('DeploymentParamsMapper', () => { vCPUUsage: 'low', }) ).toEqual({ - adaptive_allocations: { - enabled: true, - max_number_of_allocations: 2, - min_number_of_allocations: 1, - }, deployment_id: 'test-deployment', model_id: 'test-model', priority: 'normal', threads_per_allocation: 1, + adaptive_allocations: { + enabled: true, + max_number_of_allocations: 2, + min_number_of_allocations: 0, + }, }); }); }); @@ -468,7 +527,7 @@ describe('DeploymentParamsMapper', () => { threads_per_allocation: 2, adaptive_allocations: { enabled: true, - min_number_of_allocations: 1, + min_number_of_allocations: 0, max_number_of_allocations: 1, }, }); @@ -507,7 +566,7 @@ describe('DeploymentParamsMapper', () => { adaptive_allocations: { enabled: true, max_number_of_allocations: 12499, - min_number_of_allocations: 4, + min_number_of_allocations: 1, }, }); @@ -525,7 +584,7 @@ describe('DeploymentParamsMapper', () => { threads_per_allocation: 1, adaptive_allocations: { enabled: true, - min_number_of_allocations: 1, + min_number_of_allocations: 0, max_number_of_allocations: 2, }, }); @@ -544,7 +603,7 @@ describe('DeploymentParamsMapper', () => { threads_per_allocation: 1, adaptive_allocations: { enabled: true, - min_number_of_allocations: 3, + min_number_of_allocations: 1, max_number_of_allocations: 32, }, }); @@ -563,7 +622,7 @@ describe('DeploymentParamsMapper', () => { threads_per_allocation: 1, adaptive_allocations: { enabled: true, - min_number_of_allocations: 33, + min_number_of_allocations: 1, max_number_of_allocations: 99999, }, }); diff --git a/x-pack/plugins/ml/public/application/model_management/deployment_params_mapper.ts b/x-pack/plugins/ml/public/application/model_management/deployment_params_mapper.ts index 70252da694a6c..ecb8a06198b1c 100644 --- a/x-pack/plugins/ml/public/application/model_management/deployment_params_mapper.ts +++ b/x-pack/plugins/ml/public/application/model_management/deployment_params_mapper.ts @@ -6,6 +6,7 @@ */ import type { MlStartTrainedModelDeploymentRequest } from '@elastic/elasticsearch/lib/api/typesWithBodyKey'; +import type { NLPSettings } from '../../../common/constants/app'; import type { TrainedModelDeploymentStatsResponse } from '../../../common/types/trained_models'; import type { CloudInfo } from '../services/ml_server_info'; import type { MlServerLimits } from '../../../common/types/ml_server_info'; @@ -17,16 +18,16 @@ export type MlStartTrainedModelDeploymentRequestNew = MlStartTrainedModelDeploym const THREADS_MAX_EXPONENT = 5; -// TODO set to 0 when https://github.com/elastic/elasticsearch/pull/113455 is merged -const MIN_SUPPORTED_NUMBER_OF_ALLOCATIONS = 1; - type VCPUBreakpoints = Record< DeploymentParamsUI['vCPUUsage'], { min: number; max: number; - /** Static value is used for the number of vCPUs when the adaptive resources are disabled */ - static: number; + /** + * Static value is used for the number of vCPUs when the adaptive resources are disabled. + * Not allowed in certain environments. + */ + static?: number; } >; @@ -39,26 +40,28 @@ export class DeploymentParamsMapper { private readonly threadingParamsValues: number[]; /** - * vCPUs level breakpoints for cloud cluster with enabled ML autoscaling + * vCPUs level breakpoints for cloud cluster with enabled ML autoscaling. + * TODO resolve dynamically when Control Pane exposes the vCPUs range. */ private readonly autoscalingVCPUBreakpoints: VCPUBreakpoints = { - low: { min: MIN_SUPPORTED_NUMBER_OF_ALLOCATIONS, max: 2, static: 2 }, - medium: { min: 3, max: 32, static: 32 }, - high: { min: 33, max: 99999, static: 100 }, + low: { min: this.minAllowedNumberOfAllocation, max: 2, static: 2 }, + medium: { min: 1, max: 32, static: 32 }, + high: { min: 1, max: 99999, static: 128 }, }; /** - * vCPUs level breakpoints for serverless projects + * Default vCPUs level breakpoints for serverless projects. + * Can be overridden by the project specific settings. */ private readonly serverlessVCPUBreakpoints: VCPUBreakpoints = { - low: { min: MIN_SUPPORTED_NUMBER_OF_ALLOCATIONS, max: 2, static: 2 }, - medium: { min: 3, max: 32, static: 32 }, - high: { min: 33, max: 500, static: 100 }, + low: { min: this.minAllowedNumberOfAllocation, max: 2, static: 2 }, + medium: { min: 1, max: 32, static: 32 }, + high: { min: 1, max: 512, static: 512 }, }; /** * vCPUs level breakpoints based on the ML server limits. - * Either on-prem or cloud with disabled ML autoscaling + * Either on-prem or cloud with disabled ML autoscaling. */ private readonly hardwareVCPUBreakpoints: VCPUBreakpoints; @@ -67,12 +70,26 @@ export class DeploymentParamsMapper { */ private readonly vCpuBreakpoints: VCPUBreakpoints; + /** + * Gets the min allowed number of allocations. + * - 0 for serverless and ESS with enabled autoscaling. + * - 1 otherwise + * @private + */ + private get minAllowedNumberOfAllocation(): number { + return !this.showNodeInfo || this.cloudInfo.isMlAutoscalingEnabled ? 0 : 1; + } + constructor( private readonly modelId: string, private readonly mlServerLimits: MlServerLimits, private readonly cloudInfo: CloudInfo, - private readonly showNodeInfo: boolean + private readonly showNodeInfo: boolean, + private readonly nlpSettings?: NLPSettings ) { + /** + * Initial value can be different for serverless and ESS with autoscaling. + */ const maxSingleMlNodeProcessors = this.mlServerLimits.max_single_ml_node_processors; this.threadingParamsValues = new Array(THREADS_MAX_EXPONENT) @@ -83,7 +100,7 @@ export class DeploymentParamsMapper { const mediumValue = this.mlServerLimits!.total_ml_processors! / 2; this.hardwareVCPUBreakpoints = { - low: { min: MIN_SUPPORTED_NUMBER_OF_ALLOCATIONS, max: 2, static: 2 }, + low: { min: this.minAllowedNumberOfAllocation, max: 2, static: 2 }, medium: { min: Math.min(3, mediumValue), max: mediumValue, static: mediumValue }, high: { min: mediumValue + 1, @@ -94,6 +111,10 @@ export class DeploymentParamsMapper { if (!this.showNodeInfo) { this.vCpuBreakpoints = this.serverlessVCPUBreakpoints; + if (this.nlpSettings?.modelDeployment) { + // Apply project specific overrides + this.vCpuBreakpoints = this.nlpSettings.modelDeployment.vCPURange; + } } else if (this.cloudInfo.isMlAutoscalingEnabled) { this.vCpuBreakpoints = this.autoscalingVCPUBreakpoints; } else { @@ -108,6 +129,11 @@ export class DeploymentParamsMapper { return input.vCPUUsage === 'low' ? 2 : Math.max(...this.threadingParamsValues); } + /** + * Returns allocation values accounting for the number of threads per allocation. + * @param params + * @private + */ private getAllocationsParams( params: DeploymentParamsUI ): Pick & @@ -126,7 +152,7 @@ export class DeploymentParamsMapper { min_number_of_allocations: Math.floor(levelValues.min / threadsPerAllocation) || // in any env, allow scale down to 0 only for "low" vCPU usage - (params.vCPUUsage === 'low' ? MIN_SUPPORTED_NUMBER_OF_ALLOCATIONS : 1), + (params.vCPUUsage === 'low' ? this.minAllowedNumberOfAllocation : 1), max_number_of_allocations: maxValue, }; } @@ -148,7 +174,7 @@ export class DeploymentParamsMapper { public getVCURange(vCPUUsage: DeploymentParamsUI['vCPUUsage']) { // general purpose (c6gd) 1VCU = 1GB RAM / 0.5 vCPU // vector optimized (r6gd) 1VCU = 1GB RAM / 0.125 vCPU - const vCPUBreakpoints = this.serverlessVCPUBreakpoints[vCPUUsage]; + const vCPUBreakpoints = this.vCpuBreakpoints[vCPUUsage]; return Object.entries(vCPUBreakpoints).reduce((acc, [key, val]) => { // as we can't retrieve Search project configuration, we assume that the vector optimized instance is used @@ -165,8 +191,8 @@ export class DeploymentParamsMapper { input: DeploymentParamsUI ): MlStartTrainedModelDeploymentRequestNew { const resultInput: DeploymentParamsUI = Object.create(input); - if (!this.showNodeInfo) { - // Enforce adaptive resources for serverless + if (!this.showNodeInfo && this.nlpSettings?.modelDeployment.allowStaticAllocations === false) { + // Enforce adaptive resources for serverless projects with prohibited static allocations resultInput.adaptiveResources = true; } @@ -177,7 +203,7 @@ export class DeploymentParamsMapper { deployment_id: resultInput.deploymentId, priority: 'normal', threads_per_allocation: this.getNumberOfThreads(resultInput), - ...(resultInput.adaptiveResources || !this.showNodeInfo + ...(resultInput.adaptiveResources ? { adaptive_allocations: { enabled: true, diff --git a/x-pack/plugins/ml/public/application/model_management/deployment_setup.tsx b/x-pack/plugins/ml/public/application/model_management/deployment_setup.tsx index 4ed894854eab5..87fff2bf3eb75 100644 --- a/x-pack/plugins/ml/public/application/model_management/deployment_setup.tsx +++ b/x-pack/plugins/ml/public/application/model_management/deployment_setup.tsx @@ -41,6 +41,7 @@ import type { CoreStart, OverlayStart } from '@kbn/core/public'; import { css } from '@emotion/react'; import { toMountPoint } from '@kbn/react-kibana-mount'; import { dictionaryValidator } from '@kbn/ml-validators'; +import type { NLPSettings } from '../../../common/constants/app'; import type { TrainedModelDeploymentStatsResponse } from '../../../common/types/trained_models'; import { type CloudInfo, getNewJobLimits } from '../services/ml_server_info'; import type { ModelItem } from './models_list'; @@ -220,7 +221,7 @@ export const DeploymentSetup: FC = ({ const helperText = useMemo(() => { const vcpuRange = deploymentParamsMapper.getVCPURange(config.vCPUUsage); - if (cloudInfo.isCloud && cloudInfo.isMlAutoscalingEnabled) { + if (cloudInfo.isCloud && cloudInfo.isMlAutoscalingEnabled && showNodeInfo) { // Running in cloud with ML autoscaling enabled if (config.adaptiveResources) { // With adaptive resources @@ -285,7 +286,7 @@ export const DeploymentSetup: FC = ({ } } } else if ( - (cloudInfo.isCloud && !cloudInfo.isMlAutoscalingEnabled) || + (cloudInfo.isCloud && !cloudInfo.isMlAutoscalingEnabled && showNodeInfo) || (!cloudInfo.isCloud && showNodeInfo) ) { // Running in cloud with autoscaling disabled or on-prem @@ -352,7 +353,7 @@ export const DeploymentSetup: FC = ({ } } } else if (!showNodeInfo) { - // Running a Search project in serverless + // Running in serverless const vcuRange = deploymentParamsMapper.getVCURange(config.vCPUUsage); if (config.adaptiveResources) { @@ -386,6 +387,29 @@ export const DeploymentSetup: FC = ({ } ); } + } else { + // Static allocations are allowed for Search projects + switch (config.vCPUUsage) { + case 'low': + return i18n.translate( + 'xpack.ml.trainedModels.modelsList.startDeployment.serverless.lowCpuStaticHelp', + { + defaultMessage: + 'This level set resources to {staticVCUs, plural, one {VCU} other {# VCUs}}, which may be suitable for development, testing, and demos depending on your parameters. It is not recommended for production use.', + values: { staticVCUs: vcuRange.static }, + } + ); + case 'medium': + case 'high': + return i18n.translate( + 'xpack.ml.trainedModels.modelsList.startDeployment.serverless.mediumCpuStaticHelp', + { + defaultMessage: + 'Your model will consume {staticVCUs, plural, one {VCU} other {# VCUs}}, even when not in use.', + values: { staticVCUs: vcuRange.static }, + } + ); + } } } }, [ @@ -570,8 +594,8 @@ export const DeploymentSetup: FC = ({ - -

{helperText}

+ + {helperText}
@@ -630,6 +654,7 @@ interface StartDeploymentModalProps { cloudInfo: CloudInfo; deploymentParamsMapper: DeploymentParamsMapper; showNodeInfo: boolean; + nlpSettings: NLPSettings; } /** @@ -645,6 +670,7 @@ export const StartUpdateDeploymentModal: FC = ({ cloudInfo, deploymentParamsMapper, showNodeInfo, + nlpSettings, }) => { const isUpdate = !!initialParams; @@ -653,20 +679,22 @@ export const StartUpdateDeploymentModal: FC = ({ deploymentParamsMapper.mapApiToUiDeploymentParams(v) ); + const defaultVCPUUsage: DeploymentParamsUI['vCPUUsage'] = showNodeInfo ? 'medium' : 'low'; + return uiParams?.some((v) => v.optimized === 'optimizedForIngest') ? { deploymentId: `${model.model_id}_search`, optimized: 'optimizedForSearch', - vCPUUsage: 'medium', + vCPUUsage: defaultVCPUUsage, adaptiveResources: true, } : { deploymentId: `${model.model_id}_ingest`, optimized: 'optimizedForIngest', - vCPUUsage: 'medium', + vCPUUsage: defaultVCPUUsage, adaptiveResources: true, }; - }, [deploymentParamsMapper, model.model_id, model.stats?.deployment_stats]); + }, [deploymentParamsMapper, model.model_id, model.stats?.deployment_stats, showNodeInfo]); const [config, setConfig] = useState(initialParams ?? getDefaultParams()); @@ -721,7 +749,9 @@ export const StartUpdateDeploymentModal: FC = ({ onConfigChange={setConfig} errors={errors} isUpdate={isUpdate} - disableAdaptiveResourcesControl={!showNodeInfo} + disableAdaptiveResourcesControl={ + showNodeInfo ? false : !nlpSettings.modelDeployment.allowStaticAllocations + } deploymentsParams={model.stats?.deployment_stats.reduce< Record >((acc, curr) => { @@ -811,7 +841,8 @@ export const getUserInputModelDeploymentParamsProvider = startServices: Pick, startModelDeploymentDocUrl: string, cloudInfo: CloudInfo, - showNodeInfo: boolean + showNodeInfo: boolean, + nlpSettings: NLPSettings ) => ( model: ModelItem, @@ -822,7 +853,8 @@ export const getUserInputModelDeploymentParamsProvider = model.model_id, getNewJobLimits(), cloudInfo, - showNodeInfo + showNodeInfo, + nlpSettings ); const params = initialParams @@ -834,6 +866,7 @@ export const getUserInputModelDeploymentParamsProvider = const modalSession = overlays.openModal( toMountPoint( { return ( - item.model_type === TRAINED_MODEL_TYPE.PYTORCH && item.state === MODEL_STATE.DOWNLOADED + item.model_type === TRAINED_MODEL_TYPE.PYTORCH && + !!item.state && + item.state !== MODEL_STATE.DOWNLOADING && + item.state !== MODEL_STATE.NOT_DOWNLOADED ); }, onClick: async (item) => { @@ -539,7 +544,7 @@ export function useModelActions({ }, { name: i18n.translate('xpack.ml.inference.modelsList.testModelActionLabel', { - defaultMessage: 'Test model', + defaultMessage: 'Test', }), description: i18n.translate('xpack.ml.inference.modelsList.testModelActionLabel', { defaultMessage: 'Test model', diff --git a/x-pack/plugins/ml/public/plugin.ts b/x-pack/plugins/ml/public/plugin.ts index be6e0c3305230..ca3a7d1408d5a 100644 --- a/x-pack/plugins/ml/public/plugin.ts +++ b/x-pack/plugins/ml/public/plugin.ts @@ -68,6 +68,8 @@ import { type ConfigSchema, type ExperimentalFeatures, initExperimentalFeatures, + initModelDeploymentSettings, + type NLPSettings, } from '../common/constants/app'; import type { ElasticModels } from './application/services/elastic_models_service'; import type { MlApi } from './application/services/ml_api_service'; @@ -135,11 +137,31 @@ export class MlPlugin implements Plugin { private experimentalFeatures: ExperimentalFeatures = { ruleFormV2: false, }; + private nlpSettings: NLPSettings = { + modelDeployment: { + allowStaticAllocations: true, + vCPURange: { + low: { + min: 0, + max: 2, + }, + medium: { + min: 1, + max: 16, + }, + high: { + min: 1, + max: 32, + }, + }, + }, + }; constructor(private initializerContext: PluginInitializerContext) { this.isServerless = initializerContext.env.packageInfo.buildFlavor === 'serverless'; initEnabledFeatures(this.enabledFeatures, initializerContext.config.get()); initExperimentalFeatures(this.experimentalFeatures, initializerContext.config.get()); + initModelDeploymentSettings(this.nlpSettings, initializerContext.config.get()); } setup( @@ -194,7 +216,8 @@ export class MlPlugin implements Plugin { params, this.isServerless, this.enabledFeatures, - this.experimentalFeatures + this.experimentalFeatures, + this.nlpSettings ); }, }); diff --git a/x-pack/plugins/ml/server/config_schema.ts b/x-pack/plugins/ml/server/config_schema.ts index 9d5e560443790..951198e0f9b8c 100644 --- a/x-pack/plugins/ml/server/config_schema.ts +++ b/x-pack/plugins/ml/server/config_schema.ts @@ -20,10 +20,30 @@ const compatibleModuleTypeSchema = schema.maybe( ]) ); +const vCPURangeSchema = schema.object({ + min: schema.number(), + max: schema.number(), + static: schema.maybe(schema.number()), +}); + export const configSchema = schema.object({ ad: enabledSchema, dfa: enabledSchema, - nlp: enabledSchema, + nlp: schema.maybe( + schema.object({ + enabled: schema.boolean(), + modelDeployment: schema.maybe( + schema.object({ + allowStaticAllocations: schema.boolean(), + vCPURange: schema.object({ + low: vCPURangeSchema, + medium: vCPURangeSchema, + high: vCPURangeSchema, + }), + }) + ), + }) + ), compatibleModuleType: compatibleModuleTypeSchema, experimental: schema.maybe( schema.object({ diff --git a/x-pack/plugins/ml/server/index.ts b/x-pack/plugins/ml/server/index.ts index 2ea7f1ded668b..ec258628fa283 100644 --- a/x-pack/plugins/ml/server/index.ts +++ b/x-pack/plugins/ml/server/index.ts @@ -33,7 +33,10 @@ export const config: PluginConfigDescriptor = { exposeToBrowser: { ad: true, dfa: true, - nlp: true, + nlp: { + enabled: true, + modelDeployment: true, + }, experimental: true, }, }; diff --git a/x-pack/test/functional/services/ml/trained_models_table.ts b/x-pack/test/functional/services/ml/trained_models_table.ts index 8818df749ccd4..450973c68f4c7 100644 --- a/x-pack/test/functional/services/ml/trained_models_table.ts +++ b/x-pack/test/functional/services/ml/trained_models_table.ts @@ -78,6 +78,15 @@ export function TrainedModelsTableProvider( return rows; } + /** + * Maps the vCPU level to the corresponding value in the slider. + */ + public readonly vCPULevelValueMap = { + low: 0.5, + medium: 1.5, + high: 2.5, + }; + public rowSelector(modelId: string, subSelector?: string) { const row = `~mlModelsTable > ~row-${modelId}`; return !subSelector ? row : `${row} > ${subSelector}`; @@ -512,13 +521,25 @@ export function TrainedModelsTableProvider( } public async setVCPULevel(value: 'low' | 'medium' | 'high') { - const valuesMap = { - low: 0.5, - medium: 1.5, - high: 2.5, - }; - await mlCommonUI.setSliderValue('mlModelsStartDeploymentModalVCPULevel', valuesMap[value]); - await mlCommonUI.assertSliderValue('mlModelsStartDeploymentModalVCPULevel', valuesMap[value]); + await mlCommonUI.setSliderValue( + 'mlModelsStartDeploymentModalVCPULevel', + this.vCPULevelValueMap[value] + ); + await this.assertVCPULevel(value); + } + + public async assertVCPULevel(value: 'low' | 'medium' | 'high') { + await mlCommonUI.assertSliderValue( + 'mlModelsStartDeploymentModalVCPULevel', + this.vCPULevelValueMap[value] + ); + } + + public async assertVCPUHelperText(expectedText: string) { + const helperText = await testSubjects.getVisibleText( + 'mlModelsStartDeploymentModalVCPUHelperText' + ); + expect(expectedText).to.eql(helperText); } public async assertAdvancedConfigurationOpen(expectedValue: boolean) { @@ -544,6 +565,33 @@ export function TrainedModelsTableProvider( await this.assertAdvancedConfigurationOpen(open); } + public async assertAdaptiveResourcesSwitchExists(expectExist: boolean) { + if (expectExist) { + await testSubjects.existOrFail('mlModelsStartDeploymentModalAdaptiveResources'); + } else { + await testSubjects.missingOrFail('mlModelsStartDeploymentModalAdaptiveResources'); + } + } + + public async toggleAdaptiveResourcesSwitch(enabled: boolean) { + await mlCommonUI.toggleSwitchIfNeeded( + 'mlModelsStartDeploymentModalAdaptiveResources', + enabled + ); + + await this.assertAdaptiveResourcesSwitchChecked(enabled); + } + + public async assertAdaptiveResourcesSwitchChecked(expectedValue: boolean) { + const isChecked = await testSubjects.isEuiSwitchChecked( + 'mlModelsStartDeploymentModalAdaptiveResources' + ); + expect(isChecked).to.eql( + expectedValue, + `Expected adaptive resources switch to be ${expectedValue ? 'checked' : 'unchecked'}` + ); + } + public async startDeploymentWithParams( modelId: string, params: { diff --git a/x-pack/test_serverless/functional/test_suites/search/ml/trained_models_list.ts b/x-pack/test_serverless/functional/test_suites/search/ml/trained_models_list.ts index 3bfe887e2a3c1..1a273970bf1bf 100644 --- a/x-pack/test_serverless/functional/test_suites/search/ml/trained_models_list.ts +++ b/x-pack/test_serverless/functional/test_suites/search/ml/trained_models_list.ts @@ -4,18 +4,28 @@ * 2.0; you may not use this file except in compliance with the Elastic License * 2.0. */ +import { SUPPORTED_TRAINED_MODELS } from '@kbn/test-suites-xpack/functional/services/ml/api'; import { FtrProviderContext } from '../../../ftr_provider_context'; export default function ({ getService, getPageObjects }: FtrProviderContext) { const ml = getService('ml'); const PageObjects = getPageObjects(['svlCommonPage']); - describe('Trained models list', () => { + describe('Trained models list', function () { + const tinyElser = SUPPORTED_TRAINED_MODELS.TINY_ELSER; + before(async () => { await PageObjects.svlCommonPage.loginWithPrivilegedRole(); + await ml.api.importTrainedModel(tinyElser.name, tinyElser.name); + // Make sure the .ml-stats index is created in advance, see https://github.com/elastic/elasticsearch/issues/65846 + await ml.api.assureMlStatsIndexExists(); await ml.api.syncSavedObjects(); }); + after(async () => { + await ml.api.deleteAllTrainedModelsES(); + }); + describe('page navigation', () => { it('renders trained models list', async () => { await ml.navigation.navigateToMl(); @@ -24,9 +34,42 @@ export default function ({ getService, getPageObjects }: FtrProviderContext) { await ml.testExecution.logTestStep( 'should display the stats bar and the analytics table with 1 installed trained model and built in elser models in the table' ); - await ml.trainedModels.assertStats(1); + await ml.trainedModels.assertStats(2); await ml.trainedModelsTable.assertTableIsPopulated(); }); }); + + describe('trained models table', () => { + it('sets correct VCU ranges for start model deployment', async () => { + await ml.trainedModelsTable.openStartDeploymentModal(tinyElser.name); + await ml.trainedModelsTable.toggleAdvancedConfiguration(true); + + await ml.testExecution.logTestStep('should have correct default VCU level'); + // Assert that the default selected level is Low + await ml.trainedModelsTable.assertVCPULevel('low'); + // Assert VCU levels values + await ml.trainedModelsTable.assertVCPUHelperText( + 'This level limits resources to 16 VCUs, which may be suitable for development, testing, and demos depending on your parameters. It is not recommended for production use.' + ); + + await ml.testExecution.logTestStep( + 'should set control to high VCU level and update helper text' + ); + await ml.trainedModelsTable.setVCPULevel('high'); + await ml.trainedModelsTable.assertVCPUHelperText( + 'Your model will scale up to a maximum of 4,096 VCUs per hour based on your search or ingest load. It will automatically scale down when demand decreases, and you only pay for the resources you use.' + ); + + // Adaptive resources switch should be checked by default + await ml.trainedModelsTable.assertAdaptiveResourcesSwitchChecked(true); + + // Static allocations should be allowed for search projects + await ml.trainedModelsTable.toggleAdaptiveResourcesSwitch(false); + + await ml.trainedModelsTable.assertVCPUHelperText( + 'Your model will consume 4,096 VCUs, even when not in use.' + ); + }); + }); }); } diff --git a/x-pack/test_serverless/functional/test_suites/security/ml/trained_models_list.ts b/x-pack/test_serverless/functional/test_suites/security/ml/trained_models_list.ts index 51edbadf2e6b9..3a6e1fcead8c1 100644 --- a/x-pack/test_serverless/functional/test_suites/security/ml/trained_models_list.ts +++ b/x-pack/test_serverless/functional/test_suites/security/ml/trained_models_list.ts @@ -4,6 +4,7 @@ * 2.0; you may not use this file except in compliance with the Elastic License * 2.0. */ +import { SUPPORTED_TRAINED_MODELS } from '@kbn/test-suites-xpack/functional/services/ml/api'; import { ServerlessRoleName } from '../../../../shared/lib'; import { FtrProviderContext } from '../../../ftr_provider_context'; @@ -13,11 +14,20 @@ export default function ({ getService, getPageObjects }: FtrProviderContext) { const PageObjects = getPageObjects(['svlCommonPage']); describe('Trained models list', function () { + const tinyElser = SUPPORTED_TRAINED_MODELS.TINY_ELSER; + before(async () => { await PageObjects.svlCommonPage.loginWithRole(ServerlessRoleName.PLATFORM_ENGINEER); + await ml.api.importTrainedModel(tinyElser.name, tinyElser.name); + // Make sure the .ml-stats index is created in advance, see https://github.com/elastic/elasticsearch/issues/65846 + await ml.api.assureMlStatsIndexExists(); await ml.api.syncSavedObjects(); }); + after(async () => { + await ml.api.deleteAllTrainedModelsES(); + }); + describe('page navigation', () => { it('renders trained models list', async () => { await ml.navigation.navigateToMl(); @@ -27,9 +37,35 @@ export default function ({ getService, getPageObjects }: FtrProviderContext) { await ml.testExecution.logTestStep( 'should display the stats bar and the analytics table with one trained model' ); - await ml.trainedModels.assertStats(1); + await ml.trainedModels.assertStats(2); await ml.trainedModelsTable.assertTableIsPopulated(); }); }); + + describe('trained models table', () => { + it('sets correct VCU ranges for start model deployment', async () => { + await ml.trainedModelsTable.openStartDeploymentModal(tinyElser.name); + await ml.trainedModelsTable.toggleAdvancedConfiguration(true); + + // Adaptive resources switch should be hidden + await ml.trainedModelsTable.assertAdaptiveResourcesSwitchExists(false); + + await ml.testExecution.logTestStep('should have correct default VCU level'); + // Assert that the default selected level is Low + await ml.trainedModelsTable.assertVCPULevel('low'); + // Assert VCU levels values + await ml.trainedModelsTable.assertVCPUHelperText( + 'This level limits resources to 16 VCUs, which may be suitable for development, testing, and demos depending on your parameters. It is not recommended for production use.' + ); + + await ml.testExecution.logTestStep( + 'should set control to high VCU level and update helper text' + ); + await ml.trainedModelsTable.setVCPULevel('high'); + await ml.trainedModelsTable.assertVCPUHelperText( + 'Your model will scale up to a maximum of 1,024 VCUs per hour based on your search or ingest load. It will automatically scale down when demand decreases, and you only pay for the resources you use.' + ); + }); + }); }); }