feat: Add streaming API to orchestration (#352)

SAP · Jan 13, 2025 · d836abf · d836abf
1 parent e35cab5
commit d836abf
Show file tree

Hide file tree

Showing 47 changed files with 1,891 additions and 243 deletions.
diff --git a/.changeset/hip-melons-destroy.md b/.changeset/hip-melons-destroy.md
@@ -0,0 +1,5 @@
+---
+'@sap-ai-sdk/orchestration': minor
+---
+
+[New Functionality] Add support for streaming in the orchestration client.
diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts
@@ -9,3 +9,4 @@ export {
   AwsBedrockChatModel,
   AiCoreOpenSourceChatModel
 } from './model-types.js';
+export { SseStream, LineDecoder, SSEDecoder } from './stream/index.js';
diff --git a/packages/core/src/stream/index.ts b/packages/core/src/stream/index.ts
@@ -0,0 +1,3 @@
+export * from './sse-stream.js';
+export * from './sse-decoder.js';
+export * from './line-decoder.js';
diff --git a/...s/src/azure-openai/stream/line-decoder.ts → packages/core/src/stream/line-decoder.ts b/...s/src/azure-openai/stream/line-decoder.ts → packages/core/src/stream/line-decoder.ts
@@ -5,7 +5,6 @@ type Bytes = string | ArrayBuffer | Uint8Array | Buffer | null | undefined;
  * reading lines from text.
  *
  * Https://github.com/encode/httpx/blob/920333ea98118e9cf617f246905d7b202510941c/httpx/_decoders.py#L258.
- * @internal
  */
 export class LineDecoder {
   // prettier-ignore

diff --git a/...ls/src/azure-openai/stream/sse-decoder.ts → packages/core/src/stream/sse-decoder.ts b/...ls/src/azure-openai/stream/sse-decoder.ts → packages/core/src/stream/sse-decoder.ts
@@ -19,7 +19,6 @@ export interface ServerSentEvent {
 
 /**
  * Server-Sent Event decoder.
- * @internal
  */
 export class SSEDecoder {
   private data: string[];

diff --git a/...rc/azure-openai/stream/sse-stream.test.ts → packages/core/src/stream/sse-stream.test.ts b/...rc/azure-openai/stream/sse-stream.test.ts → packages/core/src/stream/sse-stream.test.ts
diff --git a/...els/src/azure-openai/stream/sse-stream.ts → packages/core/src/stream/sse-stream.ts b/...els/src/azure-openai/stream/sse-stream.ts → packages/core/src/stream/sse-stream.ts
@@ -13,7 +13,6 @@ type Bytes = string | ArrayBuffer | Uint8Array | Buffer | null | undefined;
 
 /**
  * Stream implemented as an async iterable.
- * @internal
  */
 export class SseStream<Item> implements AsyncIterable<Item> {
   protected static transformToSseStream<Item>(

diff --git a/packages/foundation-models/README.md b/packages/foundation-models/README.md
@@ -152,7 +152,7 @@ Refer to `AzureOpenAiChatCompletionParameters` interface for other parameters th
 The `AzureOpenAiChatClient` supports streaming response for chat completion requests based on the [Server-sent events](https://html.spec.whatwg.org/multipage/server-sent-events.html#server-sent-events) standard.
 
 Use the `stream()` method to receive a stream of chunk responses from the model.
-After consuming the stream, call the helper methods to get the finish reason and token usage information respectively.
+After consuming the stream, call the helper methods to get the finish reason and token usage information.
 
 ```ts
 const chatClient = new AzureOpenAiChatClient('gpt-4o');
@@ -178,7 +178,7 @@ console.log(`Token usage: ${JSON.stringify(tokenUsage)}\n`);
 
 ##### Streaming the Delta Content
 
-The client provides a helper method to extract delta content and stream string directly.
+The client provides a helper method to extract the text chunks as strings:
 
 ```ts
 for await (const chunk of response.stream.toContentStream()) {
@@ -198,7 +198,7 @@ Additionally, it can be aborted manually by calling the `stream()` method with a
 ```ts
 const chatClient = new AzureOpenAiChatClient('gpt-4o');
 const controller = new AbortController();
-const response = await new AzureOpenAiChatClient('gpt-35-turbo').stream(
+const response = await chatClient.stream(
   {
     messages: [
       {

diff --git a/packages/foundation-models/src/azure-openai/azure-openai-chat-client.test.ts b/packages/foundation-models/src/azure-openai/azure-openai-chat-client.test.ts
@@ -198,7 +198,7 @@ describe('Azure OpenAI chat client', () => {
 
     const response = await client.stream(prompt);
     for await (const chunk of response.stream) {
-      expect(JSON.stringify(chunk.data)).toEqual(initialResponse);
+      expect(chunk.data).toEqual(JSON.parse(initialResponse));
       break;
     }
   });

diff --git a/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.test.ts b/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.test.ts
@@ -1,9 +1,8 @@
 import { createLogger } from '@sap-cloud-sdk/util';
 import { jest } from '@jest/globals';
+import { LineDecoder, SSEDecoder } from '@sap-ai-sdk/core';
 import { parseFileToString } from '../../../../test-util/mock-http.js';
 import { AzureOpenAiChatCompletionStream } from './azure-openai-chat-completion-stream.js';
-import { LineDecoder } from './stream/line-decoder.js';
-import { SSEDecoder } from './stream/sse-decoder.js';
 
 describe('OpenAI chat completion stream', () => {
   let sseChunks: string[];
@@ -39,12 +38,12 @@ describe('OpenAI chat completion stream', () => {
 
   it('should wrap the raw chunk', async () => {
     let output = '';
-    const asnycGenerator = AzureOpenAiChatCompletionStream._processChunk(
+    const asyncGenerator = AzureOpenAiChatCompletionStream._processChunk(
       originalChatCompletionStream
     );
-    for await (const chunk of asnycGenerator) {
+    for await (const chunk of asyncGenerator) {
       expect(chunk).toBeDefined();
-      chunk.getDeltaContent() ? (output += chunk.getDeltaContent()) : null;
+      output += chunk.getDeltaContent() ?? '';
     }
     expect(output).toEqual('The capital of France is Paris.');
   });

diff --git a/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.ts b/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.ts
@@ -1,5 +1,5 @@
 import { createLogger } from '@sap-cloud-sdk/util';
-import { SseStream } from './stream/index.js';
+import { SseStream } from '@sap-ai-sdk/core';
 import { AzureOpenAiChatCompletionStreamChunkResponse } from './azure-openai-chat-completion-stream-chunk-response.js';
 import type { HttpResponse } from '@sap-cloud-sdk/http-client';
 import type { AzureOpenAiChatCompletionStreamResponse } from './azure-openai-chat-completion-stream-response.js';

diff --git a/packages/foundation-models/src/azure-openai/stream/index.ts b/packages/foundation-models/src/azure-openai/stream/index.ts
diff --git a/packages/orchestration/README.md b/packages/orchestration/README.md
@@ -86,6 +86,125 @@ The client allows you to combine various modules, such as templating and content
 
 In addition to the examples below, you can find more **sample code** [here](https://github.com/SAP/ai-sdk-js/blob/main/sample-code/src/orchestration.ts).
 
+### Streaming
+
+The `OrchestrationClient` supports streaming responses for chat completion requests based on the [Server-sent events](https://html.spec.whatwg.org/multipage/server-sent-events.html#server-sent-events) standard.
+
+Use the `stream()` method to receive a stream of chunk responses from the model.
+After consuming the stream, call the helper methods to get the finish reason and token usage information.
+
+```ts
+const orchestrationClient = new OrchestrationClient({
+  llm: {
+    model_name: 'gpt-4o',
+    model_params: { max_tokens: 50, temperature: 0.1 }
+  },
+  templating: {
+    template: [
+      { role: 'user', content: 'Give a long history of {{?country}}?' }
+    ]
+  }
+});
+
+const response = await orchestrationClient.stream({
+  inputParams: { country: 'France' }
+});
+
+for await (const chunk of response.stream) {
+  console.log(JSON.stringify(chunk));
+}
+
+const finishReason = response.getFinishReason();
+const tokenUsage = response.getTokenUsage();
+
+console.log(`Finish reason: ${finishReason}\n`);
+console.log(`Token usage: ${JSON.stringify(tokenUsage)}\n`);
+```
+
+#### Streaming the Delta Content
+
+The client provides a helper method to extract the text chunks as strings:
+
+```ts
+for await (const chunk of response.stream.toContentStream()) {
+  console.log(chunk); // will log the delta content
+}
+```
+
+Each chunk will be a string containing the delta content.
+
+#### Streaming with Abort Controller
+
+Streaming request can be aborted using the `AbortController` API.
+In case of an error, the SAP Cloud SDK for AI will automatically close the stream.
+Additionally, it can be aborted manually by calling the `stream()` method with an `AbortController` object.
+
+```ts
+const orchestrationClient = new OrchestrationClient({
+  llm: {
+    model_name: 'gpt-4o',
+    model_params: { max_tokens: 50, temperature: 0.1 }
+  },
+  templating: {
+    template: [
+      { role: 'user', content: 'Give a long history of {{?country}}?' }
+    ]
+  }
+});
+
+const controller = new AbortController();
+const response = await orchestrationClient.stream(
+  {
+    inputParams: { country: 'France' }
+  },
+  controller
+);
+
+// Abort the streaming request after one second
+setTimeout(() => {
+  controller.abort();
+}, 1000);
+
+for await (const chunk of response.stream) {
+  console.log(JSON.stringify(chunk));
+}
+```
+
+In this example, streaming request will be aborted after one second.
+Abort controller can be useful, e.g., when end-user wants to stop the stream or refreshes the page.
+
+#### Stream Options
+
+The orchestration service offers multiple streaming options, which you can configure in addition to the LLM's streaming options.
+These include options like definining the maximum number of characters per chunk or modifying the output filter behavior.
+There are two ways to add specific streaming options to your client, either at initialization of orchestration client, or when calling the stream API.
+
+Setting streaming options dynamically could be useful if an initialized orchestration client will also be used for streaming.
+
+You can check the list of available stream options in the [orchestration service's documentation](https://help.sap.com/docs/sap-ai-core/sap-ai-core-service-guide/streaming).
+
+An example for setting the streaming options when calling the stream API looks like the following:
+
+```ts
+const response = orchestrationClient.stream(
+  {
+    inputParams: { country: 'France' }
+  },
+  controller,
+  {
+    llm: { include_usage: false },
+    global: { chunk_size: 10 },
+    outputFiltering: { overlap: 200 }
+  }
+);
+```
+
+Usage metrics are collected by default, if you do not want to receive them, set `include_usage` to `false`.
+If you don't want any streaming options as part of your call to the LLM, set `streamOptions.llm` to `null`.
+
+> [!NOTE]
+> When initalizing a client with a JSON module config, providing streaming options is not possible.
+
 ### Templating
 
 Use the orchestration client with templating to pass a prompt containing placeholders that will be replaced with input parameters during a chat completion request.

diff --git a/packages/orchestration/package.json b/packages/orchestration/package.json
@@ -32,6 +32,9 @@
   "dependencies": {
     "@sap-ai-sdk/core": "workspace:^",
     "@sap-ai-sdk/ai-api": "workspace:^",
+    "@sap-cloud-sdk/util": "^3.25.0"
+  },
+  "devDependencies": {
     "@sap-cloud-sdk/http-client": "^3.25.0",
     "@sap-cloud-sdk/connectivity": "^3.25.0"
   }

diff --git a/packages/orchestration/src/__snapshots__/orchestration-stream.test.ts.snap b/packages/orchestration/src/__snapshots__/orchestration-stream.test.ts.snap
@@ -0,0 +1,37 @@
+// Jest Snapshot v1, https://goo.gl/fbAQLP
+
+exports[`Orchestration chat completion stream should transform the original stream to string stream 1`] = `
+"The SAP Cloud SDK is a comprehensive development toolkit designed to simplify and accelerate the creation of applications that integrate with SAP solutions, particularly those built on the SAP Business Technology Platform (BTP). It provides developers with libraries, tools, and best practices that streamline the process of connecting to SAP systems, such as S/4HANA and other services available on the SAP Cloud Platform.
+
+Key features of the SAP Cloud SDK include:
+
+1. **Simplified Connectivity**: The SDK offers pre-built libraries to easily interact with SAP services, providing capabilities for authentication, service consumption, and OData/REST client generation.
+
+2. **Multi-cloud Support**: It supports multiple cloud environments, ensuring that applications remain flexible and can be deployed across various cloud providers.
+
+3. **Best Practices and Guidelines**: The SDK includes best practices for development, ensuring high-quality, scalable, and maintainable code.
+
+4. **Project Scaffolding and Code Samples**: Developers can quickly start their projects using provided templates and samples, accelerating the development process and reducing the learning curve.
+
+5. **Extensive Documentation and Community Support**: Ample documentation, tutorials, and an active community help developers overcome challenges and adopt the SDK efficiently.
+
+Overall, the SAP Cloud SDK is an essential tool for developers looking to build cloud-native applications and extensions that seamlessly integrate with SAP's enterprise solutions."
+`;
+
+exports[`Orchestration chat completion stream should wrap the raw chunk 1`] = `
+"The SAP Cloud SDK is a comprehensive development toolkit designed to simplify and accelerate the creation of applications that integrate with SAP solutions, particularly those built on the SAP Business Technology Platform (BTP). It provides developers with libraries, tools, and best practices that streamline the process of connecting to SAP systems, such as S/4HANA and other services available on the SAP Cloud Platform.
+
+Key features of the SAP Cloud SDK include:
+
+1. **Simplified Connectivity**: The SDK offers pre-built libraries to easily interact with SAP services, providing capabilities for authentication, service consumption, and OData/REST client generation.
+
+2. **Multi-cloud Support**: It supports multiple cloud environments, ensuring that applications remain flexible and can be deployed across various cloud providers.
+
+3. **Best Practices and Guidelines**: The SDK includes best practices for development, ensuring high-quality, scalable, and maintainable code.
+
+4. **Project Scaffolding and Code Samples**: Developers can quickly start their projects using provided templates and samples, accelerating the development process and reducing the learning curve.
+
+5. **Extensive Documentation and Community Support**: Ample documentation, tutorials, and an active community help developers overcome challenges and adopt the SDK efficiently.
+
+Overall, the SAP Cloud SDK is an essential tool for developers looking to build cloud-native applications and extensions that seamlessly integrate with SAP's enterprise solutions."
+`;
diff --git a/packages/orchestration/src/index.ts b/packages/orchestration/src/index.ts
@@ -1,52 +1,22 @@
-export type {
-  CompletionPostResponse,
-  ChatMessages,
-  TokenUsage,
-  TemplatingModuleConfig,
-  OrchestrationConfig,
-  ModuleResults,
-  ModuleConfigs,
-  MaskingModuleConfig,
-  MaskingProviderConfig,
-  GroundingModuleConfig,
-  DocumentGroundingFilter,
-  GroundingFilterId,
-  GroundingFilterSearchConfiguration,
-  DataRepositoryType,
-  KeyValueListPair,
-  SearchDocumentKeyValueListPair,
-  SearchSelectOptionEnum,
-  LlmModuleResult,
-  LlmChoice,
-  GenericModuleResult,
-  FilteringModuleConfig,
-  InputFilteringConfig,
-  OutputFilteringConfig,
-  FilterConfig,
-  ErrorResponse,
-  DpiEntities,
-  DpiEntityConfig,
-  DpiConfig,
-  CompletionPostRequest,
-  ChatMessage,
-  AzureThreshold,
-  AzureContentSafety,
-  AzureContentSafetyFilterConfig,
-  ImageContent,
-  TextContent,
-  MultiChatMessageContent,
-  MultiChatMessage
-} from './client/api/schema/index.js';
+export * from './client/api/schema/index.js';
 
 export type {
   OrchestrationModuleConfig,
   LlmModuleConfig,
   Prompt,
+  RequestOptions,
+  StreamOptions,
   DocumentGroundingServiceConfig,
   DocumentGroundingServiceFilter,
   LlmModelParams
 } from './orchestration-types.js';
 
+export { OrchestrationStreamResponse } from './orchestration-stream-response.js';
+
+export { OrchestrationStreamChunkResponse } from './orchestration-stream-chunk-response.js';
+
+export { OrchestrationStream } from './orchestration-stream.js';
+
 export { OrchestrationClient } from './orchestration-client.js';
 
 export {