[Inference] create the @kbn/inference-common package (#193464)

## Summary At the moment, all inference API related types and utilities (`chatComplete`, `output` and more) are living inside the `inference` plugin's common folder. This is somewhat problematic because it forces any consumers of those types to explicitly depends on the `inference` plugin (via plugin dep or ts ref), which could lead to any kind of cyclic dependency issues, in addition to being overall a bad design pattern. This also makes it more complicated that it should to try to split the inference logic / task framework / task implementation into distinct packages or plugins, due to some (concrete) utilities living in the inference plugin's code. It's also a bad experience for consumers, as it's quite difficult to easily resolve imports they need (we're mixing internal and public exports atm, plus not all types are exported from a single entry point, making it very tedious to find the right path for each individual import we need to consume the inference APIs) This PR addresses most of those points, by introducing a new `@kbn/inference-common` package and moving all the low level types and utilities to it, while exposing all of them from the package's entrypoint. --------- Co-authored-by: kibanamachine <[email protected]> Co-authored-by: Elastic Machine <[email protected]>
elastic · Nov 1, 2024 · 631ccb0 · 631ccb0
1 parent 2ae6333
commit 631ccb0
Show file tree

Hide file tree

Showing 94 changed files with 1,041 additions and 519 deletions.
diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
@@ -525,6 +525,7 @@ x-pack/plugins/index_management @elastic/kibana-management
 x-pack/packages/index-management/index_management_shared_types @elastic/kibana-management
 test/plugin_functional/plugins/index_patterns @elastic/kibana-data-discovery
 x-pack/packages/ml/inference_integration_flyout @elastic/ml-ui
+x-pack/packages/ai-infra/inference-common @elastic/appex-ai-infra
 x-pack/plugins/inference @elastic/appex-ai-infra
 x-pack/packages/kbn-infra-forge @elastic/obs-ux-management-team
 x-pack/plugins/observability_solution/infra @elastic/obs-ux-logs-team @elastic/obs-ux-infra_services-team

diff --git a/package.json b/package.json
@@ -571,6 +571,7 @@
     "@kbn/index-management-plugin": "link:x-pack/plugins/index_management",
     "@kbn/index-management-shared-types": "link:x-pack/packages/index-management/index_management_shared_types",
     "@kbn/index-patterns-test-plugin": "link:test/plugin_functional/plugins/index_patterns",
+    "@kbn/inference-common": "link:x-pack/packages/ai-infra/inference-common",
     "@kbn/inference-plugin": "link:x-pack/plugins/inference",
     "@kbn/inference_integration_flyout": "link:x-pack/packages/ml/inference_integration_flyout",
     "@kbn/infra-forge": "link:x-pack/packages/kbn-infra-forge",

diff --git a/tsconfig.base.json b/tsconfig.base.json
@@ -1044,6 +1044,8 @@
       "@kbn/index-patterns-test-plugin/*": ["test/plugin_functional/plugins/index_patterns/*"],
       "@kbn/inference_integration_flyout": ["x-pack/packages/ml/inference_integration_flyout"],
       "@kbn/inference_integration_flyout/*": ["x-pack/packages/ml/inference_integration_flyout/*"],
+      "@kbn/inference-common": ["x-pack/packages/ai-infra/inference-common"],
+      "@kbn/inference-common/*": ["x-pack/packages/ai-infra/inference-common/*"],
       "@kbn/inference-plugin": ["x-pack/plugins/inference"],
       "@kbn/inference-plugin/*": ["x-pack/plugins/inference/*"],
       "@kbn/infra-forge": ["x-pack/packages/kbn-infra-forge"],

diff --git a/x-pack/packages/ai-infra/inference-common/README.md b/x-pack/packages/ai-infra/inference-common/README.md
@@ -0,0 +1,7 @@
+# @kbn/inference-common
+
+Common types and utilities for the inference APIs and features.
+
+The main purpose of the package is to have a clean line between the inference plugin's
+implementation and the underlying types, so that other packages or plugins can leverage the
+types without directly depending on the plugin.
diff --git a/x-pack/packages/ai-infra/inference-common/index.ts b/x-pack/packages/ai-infra/inference-common/index.ts
@@ -0,0 +1,77 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+export {
+  MessageRole,
+  ChatCompletionEventType,
+  ToolChoiceType,
+  type Message,
+  type AssistantMessage,
+  type ToolMessage,
+  type UserMessage,
+  type ToolSchemaType,
+  type FromToolSchema,
+  type ToolSchema,
+  type UnvalidatedToolCall,
+  type ToolCallsOf,
+  type ToolCall,
+  type ToolDefinition,
+  type ToolOptions,
+  type FunctionCallingMode,
+  type ToolChoice,
+  type ChatCompleteAPI,
+  type ChatCompleteOptions,
+  type ChatCompletionResponse,
+  type ChatCompletionTokenCountEvent,
+  type ChatCompletionEvent,
+  type ChatCompletionChunkEvent,
+  type ChatCompletionChunkToolCall,
+  type ChatCompletionMessageEvent,
+  withoutTokenCountEvents,
+  withoutChunkEvents,
+  isChatCompletionMessageEvent,
+  isChatCompletionEvent,
+  isChatCompletionChunkEvent,
+  isChatCompletionTokenCountEvent,
+  ChatCompletionErrorCode,
+  type ChatCompletionToolNotFoundError,
+  type ChatCompletionToolValidationError,
+  type ChatCompletionTokenLimitReachedError,
+  isToolValidationError,
+  isTokenLimitReachedError,
+  isToolNotFoundError,
+} from './src/chat_complete';
+export {
+  OutputEventType,
+  type OutputAPI,
+  type OutputResponse,
+  type OutputCompleteEvent,
+  type OutputUpdateEvent,
+  type Output,
+  type OutputEvent,
+  isOutputCompleteEvent,
+  isOutputUpdateEvent,
+  isOutputEvent,
+  withoutOutputUpdateEvents,
+} from './src/output';
+export {
+  InferenceTaskEventType,
+  type InferenceTaskEvent,
+  type InferenceTaskEventBase,
+} from './src/inference_task';
+export {
+  InferenceTaskError,
+  InferenceTaskErrorCode,
+  type InferenceTaskErrorEvent,
+  type InferenceTaskInternalError,
+  type InferenceTaskRequestError,
+  createInferenceInternalError,
+  createInferenceRequestError,
+  isInferenceError,
+  isInferenceInternalError,
+  isInferenceRequestError,
+} from './src/errors';
diff --git a/...e/common/output/is_output_update_event.ts → .../ai-infra/inference-common/jest.config.js b/...e/common/output/is_output_update_event.ts → .../ai-infra/inference-common/jest.config.js
@@ -5,10 +5,8 @@
  * 2.0.
  */
 
-import { OutputEvent, OutputEventType, OutputUpdateEvent } from '.';
-
-export function isOutputUpdateEvent<TId extends string>(
-  event: OutputEvent
-): event is OutputUpdateEvent<TId> {
-  return event.type === OutputEventType.OutputComplete;
-}
+module.exports = {
+  preset: '@kbn/test',
+  rootDir: '../../../..',
+  roots: ['<rootDir>/x-pack/packages/ai-infra/inference-common'],
+};
diff --git a/x-pack/packages/ai-infra/inference-common/kibana.jsonc b/x-pack/packages/ai-infra/inference-common/kibana.jsonc
@@ -0,0 +1,5 @@
+{
+  "type": "shared-common",
+  "id": "@kbn/inference-common",
+  "owner": "@elastic/appex-ai-infra"
+}
diff --git a/x-pack/packages/ai-infra/inference-common/package.json b/x-pack/packages/ai-infra/inference-common/package.json
@@ -0,0 +1,7 @@
+{
+  "name": "@kbn/inference-common",
+  "private": true,
+  "version": "1.0.0",
+  "license": "Elastic License 2.0",
+  "sideEffects": false
+}
diff --git a/x-pack/packages/ai-infra/inference-common/src/chat_complete/api.ts b/x-pack/packages/ai-infra/inference-common/src/chat_complete/api.ts
@@ -0,0 +1,69 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+import type { Observable } from 'rxjs';
+import type { ToolOptions } from './tools';
+import type { Message } from './messages';
+import type { ChatCompletionEvent } from './events';
+
+/**
+ * Request a completion from the LLM based on a prompt or conversation.
+ *
+ * @example using the API to get an event observable.
+ * ```ts
+ * const events$ = chatComplete({
+ *   connectorId: 'my-connector',
+ *   system: "You are a helpful assistant",
+ *   messages: [
+ *      { role: MessageRole.User, content: "First question?"},
+ *      { role: MessageRole.Assistant, content: "Some answer"},
+ *      { role: MessageRole.User, content: "Another question?"},
+ *   ]
+ * });
+ */
+export type ChatCompleteAPI = <TToolOptions extends ToolOptions = ToolOptions>(
+  options: ChatCompleteOptions<TToolOptions>
+) => ChatCompletionResponse<TToolOptions>;
+
+/**
+ * Options used to call the {@link ChatCompleteAPI}
+ */
+export type ChatCompleteOptions<TToolOptions extends ToolOptions = ToolOptions> = {
+  /**
+   * The ID of the connector to use.
+   * Must be a genAI compatible connector, or an error will be thrown.
+   */
+  connectorId: string;
+  /**
+   * Optional system message for the LLM.
+   */
+  system?: string;
+  /**
+   * The list of messages for the current conversation
+   */
+  messages: Message[];
+  /**
+   * Function calling mode, defaults to "native".
+   */
+  functionCalling?: FunctionCallingMode;
+} & TToolOptions;
+
+/**
+ * Response from the {@link ChatCompleteAPI}.
+ *
+ * Observable of {@link ChatCompletionEvent}
+ */
+export type ChatCompletionResponse<TToolOptions extends ToolOptions = ToolOptions> = Observable<
+  ChatCompletionEvent<TToolOptions>
+>;
+
+/**
+ * Define the function calling mode when using inference APIs.
+ * - native will use the LLM's native function calling (requires the LLM to have native support)
+ * - simulated: will emulate function calling with function calling instructions
+ */
+export type FunctionCallingMode = 'native' | 'simulated';
diff --git a/.../inference/common/chat_complete/errors.ts → ...erence-common/src/chat_complete/errors.ts b/.../inference/common/chat_complete/errors.ts → ...erence-common/src/chat_complete/errors.ts
@@ -5,16 +5,22 @@
  * 2.0.
  */
 
-import { i18n } from '@kbn/i18n';
 import { InferenceTaskError } from '../errors';
 import type { UnvalidatedToolCall } from './tools';
 
+/**
+ * List of code of error that are specific to the {@link ChatCompleteAPI}
+ */
 export enum ChatCompletionErrorCode {
   TokenLimitReachedError = 'tokenLimitReachedError',
   ToolNotFoundError = 'toolNotFoundError',
   ToolValidationError = 'toolValidationError',
 }
 
+/**
+ * Error thrown if the completion call fails because of a token limit
+ * error, e.g. when the context window is higher than the limit
+ */
 export type ChatCompletionTokenLimitReachedError = InferenceTaskError<
   ChatCompletionErrorCode.TokenLimitReachedError,
   {
@@ -23,13 +29,24 @@ export type ChatCompletionTokenLimitReachedError = InferenceTaskError<
   }
 >;
 
+/**
+ * Error thrown if the LLM called a tool that was not provided
+ * in the list of available tools.
+ */
 export type ChatCompletionToolNotFoundError = InferenceTaskError<
   ChatCompletionErrorCode.ToolNotFoundError,
   {
+    /** The name of the tool that got called */
     name: string;
   }
 >;
 
+/**
+ * Error thrown when the LLM called a tool with parameters that
+ * don't match the tool's schema.
+ *
+ * The level of details on the error vary depending on the underlying LLM.
+ */
 export type ChatCompletionToolValidationError = InferenceTaskError<
   ChatCompletionErrorCode.ToolValidationError,
   {
@@ -40,49 +57,19 @@ export type ChatCompletionToolValidationError = InferenceTaskError<
   }
 >;
 
-export function createTokenLimitReachedError(
-  tokenLimit?: number,
-  tokenCount?: number
-): ChatCompletionTokenLimitReachedError {
-  return new InferenceTaskError(
-    ChatCompletionErrorCode.TokenLimitReachedError,
-    i18n.translate('xpack.inference.chatCompletionError.tokenLimitReachedError', {
-      defaultMessage: `Token limit reached. Token limit is {tokenLimit}, but the current conversation has {tokenCount} tokens.`,
-      values: { tokenLimit, tokenCount },
-    }),
-    { tokenLimit, tokenCount }
-  );
-}
-
-export function createToolNotFoundError(name: string): ChatCompletionToolNotFoundError {
-  return new InferenceTaskError(
-    ChatCompletionErrorCode.ToolNotFoundError,
-    `Tool ${name} called but was not available`,
-    {
-      name,
-    }
-  );
-}
-
-export function createToolValidationError(
-  message: string,
-  meta: {
-    name?: string;
-    arguments?: string;
-    errorsText?: string;
-    toolCalls?: UnvalidatedToolCall[];
-  }
-): ChatCompletionToolValidationError {
-  return new InferenceTaskError(ChatCompletionErrorCode.ToolValidationError, message, meta);
-}
-
+/**
+ * Check if an error is a {@link ChatCompletionToolValidationError}
+ */
 export function isToolValidationError(error?: Error): error is ChatCompletionToolValidationError {
   return (
     error instanceof InferenceTaskError &&
     error.code === ChatCompletionErrorCode.ToolValidationError
   );
 }
 
+/**
+ * Check if an error is a {@link ChatCompletionTokenLimitReachedError}
+ */
 export function isTokenLimitReachedError(
   error: Error
 ): error is ChatCompletionTokenLimitReachedError {
@@ -92,6 +79,9 @@ export function isTokenLimitReachedError(
   );
 }
 
+/**
+ * Check if an error is a {@link ChatCompletionToolNotFoundError}
+ */
 export function isToolNotFoundError(error: Error): error is ChatCompletionToolNotFoundError {
   return (
     error instanceof InferenceTaskError && error.code === ChatCompletionErrorCode.ToolNotFoundError