From 7eef0e74ea5a8d4a2274654d48e353bbf9334fe5 Mon Sep 17 00:00:00 2001
From: Rhys <Anemy@users.noreply.github.com>
Date: Thu, 26 Sep 2024 12:09:29 -0400
Subject: [PATCH] fix(chat): update response handling to stream and inline code
 block parsing VSCODE-620 (#835)

---
 src/participant/constants.ts                  |   5 +
 src/participant/participant.ts                | 225 +++++++++++-------
 src/participant/prompts/generic.ts            |   4 +-
 src/participant/prompts/intent.ts             |   2 +-
 src/participant/prompts/query.ts              |  13 +-
 src/participant/streamParsing.ts              |  95 ++++++++
 src/test/ai-accuracy-tests/assertions.ts      |  17 +-
 .../participant/asyncIterableFromArray.ts     |  24 ++
 .../suite/participant/participant.test.ts     |  16 +-
 .../suite/participant/streamParsing.test.ts   | 219 +++++++++++++++++
 10 files changed, 508 insertions(+), 112 deletions(-)
 create mode 100644 src/participant/streamParsing.ts
 create mode 100644 src/test/suite/participant/asyncIterableFromArray.ts
 create mode 100644 src/test/suite/participant/streamParsing.test.ts
diff --git a/src/participant/constants.ts b/src/participant/constants.ts
index b43b1332f..90fd81490 100644
--- a/src/participant/constants.ts
+++ b/src/participant/constants.ts
@@ -14,6 +14,11 @@ export type ParticipantResponseType =
   | 'askToConnect'
   | 'askForNamespace';
 
+export const codeBlockIdentifier = {
+  start: '```javascript',
+  end: '```',
+};
+
 interface Metadata {
   intent: Exclude<ParticipantResponseType, 'askForNamespace' | 'docs'>;
   chatId: string;
diff --git a/src/participant/participant.ts b/src/participant/participant.ts
index 8edf5dc44..8d57f0726 100644
--- a/src/participant/participant.ts
+++ b/src/participant/participant.ts
@@ -21,6 +21,7 @@ import {
   docsRequestChatResult,
   schemaRequestChatResult,
   createCancelledRequestChatResult,
+  codeBlockIdentifier,
 } from './constants';
 import { SchemaFormatter } from './schema';
 import { getSimplifiedSampleDocuments } from './sampleDocuments';
@@ -38,7 +39,8 @@ import {
 } from '../telemetry/telemetryService';
 import { DocsChatbotAIService } from './docsChatbotAIService';
 import type TelemetryService from '../telemetry/telemetryService';
-import { IntentPrompt, type PromptIntent } from './prompts/intent';
+import { processStreamWithIdentifiers } from './streamParsing';
+import type { PromptIntent } from './prompts/intent';
 
 const log = createLogger('participant');
 
@@ -59,16 +61,6 @@ export type ParticipantCommand = '/query' | '/schema' | '/docs';
 
 const MAX_MARKDOWN_LIST_LENGTH = 10;
 
-export function getRunnableContentFromString(text: string): string {
-  const matchedJSresponseContent = text.match(/```javascript((.|\n)*)```/);
-
-  const code =
-    matchedJSresponseContent && matchedJSresponseContent.length > 1
-      ? matchedJSresponseContent[1]
-      : '';
-  return code.trim();
-}
-
 export default class ParticipantController {
   _participant?: vscode.ChatParticipant;
   _connectionController: ConnectionController;
@@ -171,48 +163,113 @@ export default class ParticipantController {
     });
   }
 
-  async getChatResponseContent({
+  async _getChatResponse({
     messages,
     token,
   }: {
     messages: vscode.LanguageModelChatMessage[];
     token: vscode.CancellationToken;
-  }): Promise<string> {
+  }): Promise<vscode.LanguageModelChatResponse> {
     const model = await getCopilotModel();
-    let responseContent = '';
-    if (model) {
-      const chatResponse = await model.sendRequest(messages, {}, token);
-      for await (const fragment of chatResponse.text) {
-        responseContent += fragment;
-      }
+
+    if (!model) {
+      throw new Error('Copilot model not found');
     }
 
-    return responseContent;
+    return await model.sendRequest(messages, {}, token);
   }
 
-  _streamRunnableContentActions({
-    responseContent,
+  async streamChatResponse({
+    messages,
     stream,
+    token,
   }: {
-    responseContent: string;
+    messages: vscode.LanguageModelChatMessage[];
+    stream: vscode.ChatResponseStream;
+    token: vscode.CancellationToken;
+  }): Promise<void> {
+    const chatResponse = await this._getChatResponse({
+      messages,
+      token,
+    });
+    for await (const fragment of chatResponse.text) {
+      stream.markdown(fragment);
+    }
+  }
+
+  _streamCodeBlockActions({
+    runnableContent,
+    stream,
+  }: {
+    runnableContent: string;
     stream: vscode.ChatResponseStream;
   }): void {
-    const runnableContent = getRunnableContentFromString(responseContent);
-    if (runnableContent) {
-      const commandArgs: RunParticipantQueryCommandArgs = {
-        runnableContent,
-      };
-      stream.button({
-        command: EXTENSION_COMMANDS.RUN_PARTICIPANT_QUERY,
-        title: vscode.l10n.t('▶️ Run'),
-        arguments: [commandArgs],
-      });
-      stream.button({
-        command: EXTENSION_COMMANDS.OPEN_PARTICIPANT_QUERY_IN_PLAYGROUND,
-        title: vscode.l10n.t('Open in playground'),
-        arguments: [commandArgs],
-      });
+    runnableContent = runnableContent.trim();
+
+    if (!runnableContent) {
+      return;
     }
+
+    const commandArgs: RunParticipantQueryCommandArgs = {
+      runnableContent,
+    };
+    stream.button({
+      command: EXTENSION_COMMANDS.RUN_PARTICIPANT_QUERY,
+      title: vscode.l10n.t('▶️ Run'),
+      arguments: [commandArgs],
+    });
+    stream.button({
+      command: EXTENSION_COMMANDS.OPEN_PARTICIPANT_QUERY_IN_PLAYGROUND,
+      title: vscode.l10n.t('Open in playground'),
+      arguments: [commandArgs],
+    });
+  }
+
+  async streamChatResponseContentWithCodeActions({
+    messages,
+    stream,
+    token,
+  }: {
+    messages: vscode.LanguageModelChatMessage[];
+    stream: vscode.ChatResponseStream;
+    token: vscode.CancellationToken;
+  }): Promise<void> {
+    const chatResponse = await this._getChatResponse({
+      messages,
+      token,
+    });
+
+    await processStreamWithIdentifiers({
+      processStreamFragment: (fragment: string) => {
+        stream.markdown(fragment);
+      },
+      onStreamIdentifier: (content: string) => {
+        this._streamCodeBlockActions({ runnableContent: content, stream });
+      },
+      inputIterable: chatResponse.text,
+      identifier: codeBlockIdentifier,
+    });
+  }
+
+  // This will stream all of the response content and create a string from it.
+  // It should only be used when the entire response is needed at one time.
+  async getChatResponseContent({
+    messages,
+    token,
+  }: {
+    messages: vscode.LanguageModelChatMessage[];
+    token: vscode.CancellationToken;
+  }): Promise<string> {
+    let responseContent = '';
+    const chatResponse = await this._getChatResponse({
+      messages,
+      token,
+    });
+    for await (const fragment of chatResponse.text) {
+      responseContent += fragment;
+    }
+
+    return responseContent;
   }
 
   async _handleRoutedGenericRequest(
@@ -227,14 +284,9 @@ export default class ParticipantController {
       connectionNames: this._getConnectionNames(),
     });
 
-    const responseContent = await this.getChatResponseContent({
+    await this.streamChatResponseContentWithCodeActions({
       messages,
       token,
-    });
-    stream.markdown(responseContent);
-
-    this._streamRunnableContentActions({
-      responseContent,
       stream,
     });
 
@@ -293,7 +345,7 @@ export default class ParticipantController {
       token,
     });
 
-    return IntentPrompt.getIntentFromModelResponse(responseContent);
+    return Prompts.intent.getIntentFromModelResponse(responseContent);
   }
 
   async handleGenericRequest(
@@ -1001,11 +1053,11 @@ export default class ParticipantController {
       connectionNames: this._getConnectionNames(),
       ...(sampleDocuments ? { sampleDocuments } : {}),
     });
-    const responseContent = await this.getChatResponseContent({
+    await this.streamChatResponse({
       messages,
+      stream,
       token,
     });
-    stream.markdown(responseContent);
 
     stream.button({
       command: EXTENSION_COMMANDS.PARTICIPANT_OPEN_RAW_SCHEMA_OUTPUT,
@@ -1104,16 +1156,11 @@ export default class ParticipantController {
       connectionNames: this._getConnectionNames(),
       ...(sampleDocuments ? { sampleDocuments } : {}),
     });
-    const responseContent = await this.getChatResponseContent({
-      messages,
-      token,
-    });
-
-    stream.markdown(responseContent);
 
-    this._streamRunnableContentActions({
-      responseContent,
+    await this.streamChatResponseContentWithCodeActions({
+      messages,
       stream,
+      token,
     });
 
     return queryRequestChatResult(context.history);
@@ -1181,32 +1228,41 @@ export default class ParticipantController {
       vscode.ChatResponseStream,
       vscode.CancellationToken
     ]
-  ): Promise<{
-    responseContent: string;
-    responseReferences?: Reference[];
-  }> {
-    const [request, context, , token] = args;
+  ): Promise<void> {
+    const [request, context, stream, token] = args;
     const messages = await Prompts.generic.buildMessages({
       request,
       context,
       connectionNames: this._getConnectionNames(),
     });
 
-    const responseContent = await this.getChatResponseContent({
+    await this.streamChatResponseContentWithCodeActions({
       messages,
+      stream,
       token,
     });
-    const responseReferences = [
-      {
+
+    this._streamResponseReference({
+      reference: {
         url: MONGODB_DOCS_LINK,
         title: 'View MongoDB documentation',
       },
-    ];
+      stream,
+    });
+  }
 
-    return {
-      responseContent,
-      responseReferences,
-    };
+  _streamResponseReference({
+    reference,
+    stream,
+  }: {
+    reference: Reference;
+    stream: vscode.ChatResponseStream;
+  }): void {
+    const link = new vscode.MarkdownString(
+      `- [${reference.title}](${reference.url})\n`
+    );
+    link.supportHtml = true;
+    stream.markdown(link);
   }
 
   async handleDocsRequest(
@@ -1235,6 +1291,19 @@ export default class ParticipantController {
         token,
         stream,
       });
+
+      if (docsResult.responseReferences) {
+        for (const reference of docsResult.responseReferences) {
+          this._streamResponseReference({
+            reference,
+            stream,
+          });
+        }
+      }
+
+      if (docsResult.responseContent) {
+        stream.markdown(docsResult.responseContent);
+      }
     } catch (error) {
       // If the docs chatbot API is not available, fall back to Copilot’s LLM and include
       // the MongoDB documentation link for users to go to our documentation site directly.
@@ -1255,25 +1324,7 @@ export default class ParticipantController {
         }
       );
 
-      docsResult = await this._handleDocsRequestWithCopilot(...args);
-    }
-
-    if (docsResult.responseContent) {
-      stream.markdown(docsResult.responseContent);
-      this._streamRunnableContentActions({
-        responseContent: docsResult.responseContent,
-        stream,
-      });
-    }
-
-    if (docsResult.responseReferences) {
-      for (const ref of docsResult.responseReferences) {
-        const link = new vscode.MarkdownString(
-          `- [${ref.title}](${ref.url})\n`
-        );
-        link.supportHtml = true;
-        stream.markdown(link);
-      }
+      await this._handleDocsRequestWithCopilot(...args);
     }
 
     return docsRequestChatResult({
diff --git a/src/participant/prompts/generic.ts b/src/participant/prompts/generic.ts
index 40b531228..2112233da 100644
--- a/src/participant/prompts/generic.ts
+++ b/src/participant/prompts/generic.ts
@@ -3,6 +3,8 @@ import * as vscode from 'vscode';
 import type { PromptArgsBase } from './promptBase';
 import { PromptBase } from './promptBase';
 
+import { codeBlockIdentifier } from '../constants';
+
 export class GenericPrompt extends PromptBase<PromptArgsBase> {
   protected getAssistantPrompt(): string {
     return `You are a MongoDB expert.
@@ -12,7 +14,7 @@ Rules:
 1. Keep your response concise.
 2. You should suggest code that is performant and correct.
 3. Respond with markdown.
-4. When relevant, provide code in a Markdown code block that begins with \`\`\`javascript and ends with \`\`\`.
+4. When relevant, provide code in a Markdown code block that begins with ${codeBlockIdentifier.start} and ends with ${codeBlockIdentifier.end}
 5. Use MongoDB shell syntax for code unless the user requests a specific language.
 6. If you require additional information to provide a response, ask the user for it.
 7. When specifying a database, use the MongoDB syntax use('databaseName').`;
diff --git a/src/participant/prompts/intent.ts b/src/participant/prompts/intent.ts
index 0726f0fc7..4d6216afa 100644
--- a/src/participant/prompts/intent.ts
+++ b/src/participant/prompts/intent.ts
@@ -34,7 +34,7 @@ Response:
 Docs`;
   }
 
-  static getIntentFromModelResponse(response: string): PromptIntent {
+  getIntentFromModelResponse(response: string): PromptIntent {
     response = response.trim();
     switch (response) {
       case 'Query':
diff --git a/src/participant/prompts/query.ts b/src/participant/prompts/query.ts
index b7ae5cc26..eff4d29ff 100644
--- a/src/participant/prompts/query.ts
+++ b/src/participant/prompts/query.ts
@@ -2,6 +2,7 @@ import * as vscode from 'vscode';
 import type { Document } from 'bson';
 
 import { getStringifiedSampleDocuments } from '../sampleDocuments';
+import { codeBlockIdentifier } from '../constants';
 import type { PromptArgsBase } from './promptBase';
 import { PromptBase } from './promptBase';
 
@@ -19,15 +20,15 @@ export class QueryPrompt extends PromptBase<QueryPromptArgs> {
 Your task is to help the user craft MongoDB shell syntax code to perform their task.
 Keep your response concise.
 You must suggest code that is performant and correct.
-Respond with markdown, write code in a Markdown code block that begins with \`\`\`javascript and ends with \`\`\`.
-Respond in MongoDB shell syntax using the \`\`\`javascript code block syntax.
+Respond with markdown, write code in a Markdown code block that begins with ${codeBlockIdentifier.start} and ends with ${codeBlockIdentifier.end}.
+Respond in MongoDB shell syntax using the ${codeBlockIdentifier.start} code block syntax.
 
 Concisely explain the code snippet you have generated.
 
 Example 1:
 User: Documents in the orders db, sales collection, where the date is in 2014 and group the total sales for each product.
 Response:
-\`\`\`javascript
+${codeBlockIdentifier.start}
 use('orders');
 db.getCollection('sales').aggregate([
   // Find all of the sales that occurred in 2014.
@@ -35,15 +36,15 @@ db.getCollection('sales').aggregate([
   // Group the total sales for each product.
   { $group: { _id: '$item', totalSaleAmount: { $sum: { $multiply: [ '$price', '$quantity' ] } } } }
 ]);
-\`\`\`
+${codeBlockIdentifier.end}
 
 Example 2:
 User: How do I create an index on the name field in my users collection?.
 Response:
-\`\`\`javascript
+${codeBlockIdentifier.start}
 use('test');
 db.getCollection('users').createIndex({ name: 1 });
-\`\`\`
+${codeBlockIdentifier.end}
 
 MongoDB command to specify database:
 use('');
diff --git a/src/participant/streamParsing.ts b/src/participant/streamParsing.ts
new file mode 100644
index 000000000..93bb5dad9
--- /dev/null
+++ b/src/participant/streamParsing.ts
@@ -0,0 +1,95 @@
+function escapeRegex(str: string): string {
+  return str.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
+}
+
+/**
+ * This function, provided a stream of text fragments, will stream the
+ * content to the provided stream and call the onStreamIdentifier function
+ * when an identifier is streamed. This is useful for inserting code actions
+ * into a chat response, whenever a code block has been written.
+ */
+export async function processStreamWithIdentifiers({
+  processStreamFragment,
+  onStreamIdentifier,
+  inputIterable,
+  identifier,
+}: {
+  processStreamFragment: (fragment: string) => void;
+  onStreamIdentifier: (content: string) => void;
+  inputIterable: AsyncIterable<string>;
+  identifier: {
+    start: string;
+    end: string;
+  };
+}): Promise<void> {
+  const escapedIdentifierStart = escapeRegex(identifier.start);
+  const escapedIdentifierEnd = escapeRegex(identifier.end);
+  const regex = new RegExp(
+    `${escapedIdentifierStart}([\\s\\S]*?)${escapedIdentifierEnd}`,
+    'g'
+  );
+
+  let contentSinceLastIdentifier = '';
+  for await (const fragment of inputIterable) {
+    contentSinceLastIdentifier += fragment;
+
+    let lastIndex = 0;
+    let match: RegExpExecArray | null;
+    while ((match = regex.exec(contentSinceLastIdentifier)) !== null) {
+      const endIndex = regex.lastIndex;
+
+      // Stream content up to the end of the identifier.
+      const contentToStream = contentSinceLastIdentifier.slice(
+        lastIndex,
+        endIndex
+      );
+      processStreamFragment(contentToStream);
+
+      const identifierContent = match[1];
+      onStreamIdentifier(identifierContent);
+
+      lastIndex = endIndex;
+    }
+
+    if (lastIndex > 0) {
+      // Remove all of the processed content.
+      contentSinceLastIdentifier = contentSinceLastIdentifier.slice(lastIndex);
+      // Reset the regex.
+      regex.lastIndex = 0;
+    } else {
+      // Clear as much of the content as we can safely.
+      const maxUnprocessedLength = identifier.start.length - 1;
+      if (contentSinceLastIdentifier.length > maxUnprocessedLength) {
+        const identifierIndex = contentSinceLastIdentifier.indexOf(
+          identifier.start
+        );
+        if (identifierIndex > -1) {
+          // We have an identifier, so clear up until the identifier.
+          const contentToStream = contentSinceLastIdentifier.slice(
+            0,
+            identifierIndex
+          );
+          processStreamFragment(contentToStream);
+          contentSinceLastIdentifier =
+            contentSinceLastIdentifier.slice(identifierIndex);
+        } else {
+          // No identifier, so clear up until the last maxUnprocessedLength.
+          const processUpTo =
+            contentSinceLastIdentifier.length - maxUnprocessedLength;
+          const contentToStream = contentSinceLastIdentifier.slice(
+            0,
+            processUpTo
+          );
+          processStreamFragment(contentToStream);
+          contentSinceLastIdentifier =
+            contentSinceLastIdentifier.slice(processUpTo);
+        }
+      }
+    }
+  }
+
+  // Finish up anything not streamed yet.
+  if (contentSinceLastIdentifier.length > 0) {
+    processStreamFragment(contentSinceLastIdentifier);
+  }
+}
diff --git a/src/test/ai-accuracy-tests/assertions.ts b/src/test/ai-accuracy-tests/assertions.ts
index 31460ef20..304cc68b8 100644
--- a/src/test/ai-accuracy-tests/assertions.ts
+++ b/src/test/ai-accuracy-tests/assertions.ts
@@ -3,9 +3,11 @@ import util from 'util';
 import type { Document } from 'mongodb';
 
 import type { Fixtures } from './fixtures/fixture-loader';
-import { getRunnableContentFromString } from '../../participant/participant';
 import { execute } from '../../language/worker';
 import type { ShellEvaluateResult } from '../../types/playgroundType';
+import { asyncIterableFromArray } from '../suite/participant/asyncIterableFromArray';
+import { codeBlockIdentifier } from '../../participant/constants';
+import { processStreamWithIdentifiers } from '../../participant/streamParsing';
 
 export const runCodeInMessage = async (
   message: string,
@@ -15,7 +17,18 @@ export const runCodeInMessage = async (
   data: ShellEvaluateResult;
   error: any;
 }> => {
-  const codeToEvaluate = getRunnableContentFromString(message);
+  // We only run the last code block passed.
+  let codeToEvaluate = '';
+  await processStreamWithIdentifiers({
+    processStreamFragment: () => {
+      /* no-op */
+    },
+    onStreamIdentifier: (codeBlockContent: string): void => {
+      codeToEvaluate = codeBlockContent;
+    },
+    inputIterable: asyncIterableFromArray<string>([message]),
+    identifier: codeBlockIdentifier,
+  });
 
   if (codeToEvaluate.trim().length === 0) {
     throw new Error(`no code found in message: ${message}`);
diff --git a/src/test/suite/participant/asyncIterableFromArray.ts b/src/test/suite/participant/asyncIterableFromArray.ts
new file mode 100644
index 000000000..e3b7c8bde
--- /dev/null
+++ b/src/test/suite/participant/asyncIterableFromArray.ts
@@ -0,0 +1,24 @@
+// Exported here so that the accuracy tests can use it without
+// needing to define all of the testing types the main tests have.
+export function asyncIterableFromArray<T>(array: T[]): AsyncIterable<T> {
+  return {
+    [Symbol.asyncIterator](): {
+      next(): Promise<IteratorResult<T, boolean>>;
+    } {
+      let index = 0;
+      return {
+        next(): Promise<{
+          value: any;
+          done: boolean;
+        }> {
+          if (index < array.length) {
+            const value = array[index++];
+            return Promise.resolve({ value, done: false });
+          }
+
+          return Promise.resolve({ value: undefined, done: true });
+        },
+      };
+    },
+  };
+}
diff --git a/src/test/suite/participant/participant.test.ts b/src/test/suite/participant/participant.test.ts
index 557fbd320..2610ccddf 100644
--- a/src/test/suite/participant/participant.test.ts
+++ b/src/test/suite/participant/participant.test.ts
@@ -6,9 +6,7 @@ import sinon from 'sinon';
 import type { DataService } from 'mongodb-data-service';
 import { ObjectId, Int32 } from 'bson';
 
-import ParticipantController, {
-  getRunnableContentFromString,
-} from '../../../participant/participant';
+import ParticipantController from '../../../participant/participant';
 import ConnectionController from '../../../connectionController';
 import { StorageController } from '../../../storage';
 import { StatusView } from '../../../views';
@@ -160,18 +158,6 @@ suite('Participant Controller Test Suite', function () {
     expect(collectionName).to.be.equal('cats');
   });
 
-  test('parses a returned by ai text for code blocks', function () {
-    const text =
-      '```javascript\n' +
-      "use('test');\n" +
-      "db.getCollection('test').find({ name: 'Shika' });\n" +
-      '```';
-    const code = getRunnableContentFromString(text);
-    expect(code).to.be.equal(
-      "use('test');\ndb.getCollection('test').find({ name: 'Shika' });"
-    );
-  });
-
   suite('when not connected', function () {
     let connectWithConnectionIdStub;
     let changeActiveConnectionStub;
diff --git a/src/test/suite/participant/streamParsing.test.ts b/src/test/suite/participant/streamParsing.test.ts
new file mode 100644
index 000000000..66208ecdd
--- /dev/null
+++ b/src/test/suite/participant/streamParsing.test.ts
@@ -0,0 +1,219 @@
+import { beforeEach } from 'mocha';
+import { expect } from 'chai';
+
+import { processStreamWithIdentifiers } from '../../../participant/streamParsing';
+import { asyncIterableFromArray } from './asyncIterableFromArray';
+
+const defaultCodeBlockIdentifier = {
+  start: '```',
+  end: '```',
+};
+
+suite('processStreamWithIdentifiers', () => {
+  let fragmentsProcessed: string[] = [];
+  let identifiersStreamed: string[] = [];
+
+  const processStreamFragment = (fragment: string): void => {
+    fragmentsProcessed.push(fragment);
+  };
+
+  const onStreamIdentifier = (content: string): void => {
+    identifiersStreamed.push(content);
+  };
+
+  beforeEach(function () {
+    fragmentsProcessed = [];
+    identifiersStreamed = [];
+  });
+
+  test('empty', async () => {
+    await processStreamWithIdentifiers({
+      processStreamFragment,
+      onStreamIdentifier,
+      inputIterable: asyncIterableFromArray<string>([]),
+      identifier: defaultCodeBlockIdentifier,
+    });
+
+    expect(fragmentsProcessed).to.be.empty;
+    expect(identifiersStreamed).to.be.empty;
+  });
+
+  test('input with no code block', async () => {
+    const inputText = 'This is some sample text without code blocks.';
+    const inputFragments = inputText.match(/.{1,5}/g) || [];
+    const inputIterable = asyncIterableFromArray<string>(inputFragments);
+
+    await processStreamWithIdentifiers({
+      processStreamFragment,
+      onStreamIdentifier,
+      inputIterable,
+      identifier: defaultCodeBlockIdentifier,
+    });
+
+    expect(fragmentsProcessed.join('')).to.equal(inputText);
+    expect(identifiersStreamed).to.be.empty;
+  });
+
+  test('one code block with fragment sizes 2', async () => {
+    const inputText = '```javascript\npineapple\n```\nMore text.';
+    const inputFragments: string[] = [];
+    let index = 0;
+    const fragmentSize = 2;
+    while (index < inputText.length) {
+      const fragment = inputText.substr(index, fragmentSize);
+      inputFragments.push(fragment);
+      index += fragmentSize;
+    }
+
+    const inputIterable = asyncIterableFromArray<string>(inputFragments);
+
+    await processStreamWithIdentifiers({
+      processStreamFragment,
+      onStreamIdentifier,
+      inputIterable,
+      identifier: {
+        start: '```javascript',
+        end: '```',
+      },
+    });
+
+    expect(fragmentsProcessed.join('')).to.equal(inputText);
+    expect(identifiersStreamed).to.have.lengthOf(1);
+    expect(identifiersStreamed[0]).to.equal('\npineapple\n');
+  });
+
+  test('multiple code blocks', async () => {
+    const inputText =
+      'Text before code.\n```\ncode1\n```\nText between code.\n```\ncode2\n```\nText after code.';
+    const inputFragments = inputText.split('');
+
+    const inputIterable = asyncIterableFromArray<string>(inputFragments);
+
+    await processStreamWithIdentifiers({
+      processStreamFragment,
+      onStreamIdentifier,
+      inputIterable,
+      identifier: defaultCodeBlockIdentifier,
+    });
+
+    expect(fragmentsProcessed.join('')).to.equal(inputText);
+    expect(identifiersStreamed).to.deep.equal(['\ncode1\n', '\ncode2\n']);
+  });
+
+  test('unfinished code block', async () => {
+    const inputText =
+      'Text before code.\n```\ncode content without end identifier.';
+    const inputFragments = inputText.split('');
+
+    const inputIterable = asyncIterableFromArray<string>(inputFragments);
+
+    await processStreamWithIdentifiers({
+      processStreamFragment,
+      onStreamIdentifier,
+      inputIterable,
+      identifier: defaultCodeBlockIdentifier,
+    });
+
+    expect(fragmentsProcessed.join('')).to.equal(inputText);
+    expect(identifiersStreamed).to.be.empty;
+  });
+
+  test('code block identifier is a fragment', async () => {
+    const inputFragments = [
+      'Text before code.\n',
+      '```js',
+      '\ncode content\n',
+      '```',
+      '```js',
+      '\npineapple\n',
+      '```',
+      '\nText after code.',
+    ];
+
+    const inputIterable = asyncIterableFromArray<string>(inputFragments);
+
+    const identifier = { start: '```js', end: '```' };
+
+    await processStreamWithIdentifiers({
+      processStreamFragment,
+      onStreamIdentifier,
+      inputIterable,
+      identifier,
+    });
+
+    expect(fragmentsProcessed.join('')).to.deep.equal(inputFragments.join(''));
+
+    expect(identifiersStreamed).to.deep.equal([
+      '\ncode content\n',
+      '\npineapple\n',
+    ]);
+  });
+
+  test('code block identifier split between fragments', async () => {
+    const inputFragments = [
+      'Text before code.\n`',
+      '``j',
+      's\ncode content\n`',
+      '``',
+      '\nText after code.',
+    ];
+
+    const inputIterable = asyncIterableFromArray<string>(inputFragments);
+
+    const identifier = { start: '```js', end: '```' };
+
+    await processStreamWithIdentifiers({
+      processStreamFragment,
+      onStreamIdentifier,
+      inputIterable,
+      identifier,
+    });
+
+    expect(fragmentsProcessed.join('')).to.deep.equal(inputFragments.join(''));
+
+    expect(identifiersStreamed).to.deep.equal(['\ncode content\n']);
+  });
+
+  test('fragments containing multiple code blocks', async () => {
+    const inputFragments = [
+      'Text before code.\n```',
+      'js\ncode1\n```',
+      '\nText',
+      ' between code.\n``',
+      '`js\ncode2\n``',
+      '`\nText after code.',
+    ];
+
+    const inputIterable = asyncIterableFromArray<string>(inputFragments);
+    const identifier = { start: '```js', end: '```' };
+
+    await processStreamWithIdentifiers({
+      processStreamFragment,
+      onStreamIdentifier,
+      inputIterable,
+      identifier,
+    });
+
+    expect(fragmentsProcessed.join('')).to.deep.equal(inputFragments.join(''));
+    expect(identifiersStreamed).to.deep.equal(['\ncode1\n', '\ncode2\n']);
+  });
+
+  test('one fragment containing multiple code blocks', async () => {
+    const inputFragments = [
+      'Text before code.\n```js\ncode1\n```\nText between code.\n```js\ncode2\n```\nText after code.',
+    ];
+
+    const inputIterable = asyncIterableFromArray<string>(inputFragments);
+    const identifier = { start: '```js', end: '```' };
+
+    await processStreamWithIdentifiers({
+      processStreamFragment,
+      onStreamIdentifier,
+      inputIterable,
+      identifier,
+    });
+
+    expect(fragmentsProcessed.join('')).to.deep.equal(inputFragments.join(''));
+    expect(identifiersStreamed).to.deep.equal(['\ncode1\n', '\ncode2\n']);
+  });
+});