From e6f9401fa08d53f9677f9350c00563e291de6649 Mon Sep 17 00:00:00 2001 From: Katerina Skroumpelou Date: Thu, 31 Aug 2023 19:46:44 +0300 Subject: [PATCH] feat(nx-dev): improve link text for ai docs --- nx-dev/util-ai/src/lib/chat-utils.ts | 69 ++++++++++++++++--- nx-dev/util-ai/src/lib/utils.ts | 1 + .../create-embeddings/src/main.mts | 33 ++++++++- 3 files changed, 91 insertions(+), 12 deletions(-) diff --git a/nx-dev/util-ai/src/lib/chat-utils.ts b/nx-dev/util-ai/src/lib/chat-utils.ts index 1d1c93e1c127c8..2db9e089f6ae94 100644 --- a/nx-dev/util-ai/src/lib/chat-utils.ts +++ b/nx-dev/util-ai/src/lib/chat-utils.ts @@ -54,7 +54,7 @@ export function getMessageFromResponse( export function getListOfSources( pageSections: PageSection[] -): { heading: string; url: string }[] { +): { heading: string; url: string; longer_heading: string }[] { const uniqueUrlPartials = new Set(); const result = pageSections .filter((section) => { @@ -72,6 +72,7 @@ export function getListOfSources( } return { heading: section.heading, + longer_heading: section.longer_heading, url: url.toString(), }; }); @@ -90,11 +91,40 @@ ${sourcesMarkdown} } export function toMarkdownList( - sections: { heading: string; url: string }[] + sections: { heading: string; url: string; longer_heading: string }[] ): string { - return sections + const sectionsWithLongerHeadings: { + heading: string; + url: string; + longer_heading: string; + }[] = []; + + const headings = new Set(); + const sectionsWithUniqueHeadings = sections.filter((section) => { + if (headings.has(section.heading)) { + sectionsWithLongerHeadings.push(section); + return false; + } else { + headings.add(section.heading); + return true; + } + }); + + const finalSections = sectionsWithUniqueHeadings .map((section) => `- [${section.heading}](${section.url})`) - .join('\n'); + .join('\n') + .concat('\n') + .concat( + sectionsWithLongerHeadings + .map( + (section, index) => + `- [${ + section.longer_heading ?? section.heading + ' ' + (index + 1) + }](${section.url})` + ) + .join('\n') + ); + return finalSections; } export function extractLinksFromSourcesSection(markdown: string): string[] { @@ -123,16 +153,35 @@ export function removeSourcesSection(markdown: string): string { export async function appendToStream( originalStream: ReadableStream, - appendContent: string + appendContent: string, + stopString: string = '### Sources' ): Promise> { - const appendText = new TransformStream({ - flush(ctrl) { - ctrl.enqueue(new TextEncoder().encode(appendContent)); - ctrl.terminate(); + let buffer = ''; + + const transformer = new TransformStream({ + async transform(chunk, controller) { + const decoder = new TextDecoder(); + buffer += decoder.decode(chunk); + + // Attempting to stop it from generating a list of Sources that will be wrong + // TODO(katerina): make sure that this works as expected + if (buffer.includes(stopString)) { + const truncated = buffer.split(stopString)[0]; + controller.enqueue(new TextEncoder().encode(truncated)); + controller.terminate(); + return; + } + + controller.enqueue(chunk); + }, + + flush(controller) { + controller.enqueue(new TextEncoder().encode(appendContent)); + controller.terminate(); }, }); - return originalStream.pipeThrough(appendText); + return originalStream.pipeThrough(transformer); } export function getLastAssistantIndex(messages: ChatItem[]): number { diff --git a/nx-dev/util-ai/src/lib/utils.ts b/nx-dev/util-ai/src/lib/utils.ts index baa745dab7b3b8..4d48792623f298 100644 --- a/nx-dev/util-ai/src/lib/utils.ts +++ b/nx-dev/util-ai/src/lib/utils.ts @@ -65,6 +65,7 @@ export interface PageSection { page_id: number; content: string; heading: string; + longer_heading: string; similarity: number; slug: string; url_partial: string | null; diff --git a/tools/documentation/create-embeddings/src/main.mts b/tools/documentation/create-embeddings/src/main.mts index 19d722fe5b3061..ebb1110dbf6fc6 100644 --- a/tools/documentation/create-embeddings/src/main.mts +++ b/tools/documentation/create-embeddings/src/main.mts @@ -155,7 +155,7 @@ async function generateEmbeddings() { type: 'boolean', }).argv; - const shouldRefresh = argv.refresh; + const shouldRefresh = argv.refresh ?? true; if (!process.env.NX_NEXT_PUBLIC_SUPABASE_URL) { throw new Error( @@ -307,13 +307,19 @@ async function generateEmbeddings() { const [responseData] = embeddingResponse.data; + const longer_heading = createLongerHeading(heading, url_partial); + const { error: insertPageSectionError, data: pageSection } = await supabaseClient .from('nods_page_section') .insert({ page_id: page.id, slug, - heading, + heading: + heading?.length && heading !== null && heading !== 'null' + ? heading + : longer_heading, + longer_heading, content, url_partial, token_count: embeddingResponse.usage.total_tokens, @@ -433,6 +439,29 @@ function getAllFilesWithItemList(data): WalkEntry[] { return files; } +function createLongerHeading( + heading?: string | null, + url_partial?: string +): string | undefined { + if (url_partial?.length) { + if (heading?.length && heading !== null && heading !== 'null') { + return `${heading}${` - ${ + url_partial.split('/')?.[1]?.[0].toUpperCase() + + url_partial.split('/')?.[1]?.slice(1) + }`}`; + } else { + return url_partial + .split('#')[0] + .split('/') + .map((part) => + part?.length ? part[0].toUpperCase() + part.slice(1) + ' - ' : '' + ) + .join('') + .slice(0, -3); + } + } +} + async function main() { await generateEmbeddings(); }