Skip to content

Commit

Permalink
feat(nx-dev): improve link text for ai docs
Browse files Browse the repository at this point in the history
  • Loading branch information
mandarini committed Aug 31, 2023
1 parent 1abe35c commit e6f9401
Show file tree
Hide file tree
Showing 3 changed files with 91 additions and 12 deletions.
69 changes: 59 additions & 10 deletions nx-dev/util-ai/src/lib/chat-utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ export function getMessageFromResponse(

export function getListOfSources(
pageSections: PageSection[]
): { heading: string; url: string }[] {
): { heading: string; url: string; longer_heading: string }[] {
const uniqueUrlPartials = new Set<string | null>();
const result = pageSections
.filter((section) => {
Expand All @@ -72,6 +72,7 @@ export function getListOfSources(
}
return {
heading: section.heading,
longer_heading: section.longer_heading,
url: url.toString(),
};
});
Expand All @@ -90,11 +91,40 @@ ${sourcesMarkdown}
}

export function toMarkdownList(
sections: { heading: string; url: string }[]
sections: { heading: string; url: string; longer_heading: string }[]
): string {
return sections
const sectionsWithLongerHeadings: {
heading: string;
url: string;
longer_heading: string;
}[] = [];

const headings = new Set<string>();
const sectionsWithUniqueHeadings = sections.filter((section) => {
if (headings.has(section.heading)) {
sectionsWithLongerHeadings.push(section);
return false;
} else {
headings.add(section.heading);
return true;
}
});

const finalSections = sectionsWithUniqueHeadings
.map((section) => `- [${section.heading}](${section.url})`)
.join('\n');
.join('\n')
.concat('\n')
.concat(
sectionsWithLongerHeadings
.map(
(section, index) =>
`- [${
section.longer_heading ?? section.heading + ' ' + (index + 1)
}](${section.url})`
)
.join('\n')
);
return finalSections;
}

export function extractLinksFromSourcesSection(markdown: string): string[] {
Expand Down Expand Up @@ -123,16 +153,35 @@ export function removeSourcesSection(markdown: string): string {

export async function appendToStream(
originalStream: ReadableStream<Uint8Array>,
appendContent: string
appendContent: string,
stopString: string = '### Sources'
): Promise<ReadableStream<Uint8Array>> {
const appendText = new TransformStream({
flush(ctrl) {
ctrl.enqueue(new TextEncoder().encode(appendContent));
ctrl.terminate();
let buffer = '';

const transformer = new TransformStream<Uint8Array, Uint8Array>({
async transform(chunk, controller) {
const decoder = new TextDecoder();
buffer += decoder.decode(chunk);

// Attempting to stop it from generating a list of Sources that will be wrong
// TODO(katerina): make sure that this works as expected
if (buffer.includes(stopString)) {
const truncated = buffer.split(stopString)[0];
controller.enqueue(new TextEncoder().encode(truncated));
controller.terminate();
return;
}

controller.enqueue(chunk);
},

flush(controller) {
controller.enqueue(new TextEncoder().encode(appendContent));
controller.terminate();
},
});

return originalStream.pipeThrough(appendText);
return originalStream.pipeThrough(transformer);
}

export function getLastAssistantIndex(messages: ChatItem[]): number {
Expand Down
1 change: 1 addition & 0 deletions nx-dev/util-ai/src/lib/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ export interface PageSection {
page_id: number;
content: string;
heading: string;
longer_heading: string;
similarity: number;
slug: string;
url_partial: string | null;
Expand Down
33 changes: 31 additions & 2 deletions tools/documentation/create-embeddings/src/main.mts
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ async function generateEmbeddings() {
type: 'boolean',
}).argv;

const shouldRefresh = argv.refresh;
const shouldRefresh = argv.refresh ?? true;

if (!process.env.NX_NEXT_PUBLIC_SUPABASE_URL) {
throw new Error(
Expand Down Expand Up @@ -307,13 +307,19 @@ async function generateEmbeddings() {

const [responseData] = embeddingResponse.data;

const longer_heading = createLongerHeading(heading, url_partial);

const { error: insertPageSectionError, data: pageSection } =
await supabaseClient
.from('nods_page_section')
.insert({
page_id: page.id,
slug,
heading,
heading:
heading?.length && heading !== null && heading !== 'null'
? heading
: longer_heading,
longer_heading,
content,
url_partial,
token_count: embeddingResponse.usage.total_tokens,
Expand Down Expand Up @@ -433,6 +439,29 @@ function getAllFilesWithItemList(data): WalkEntry[] {
return files;
}

function createLongerHeading(
heading?: string | null,
url_partial?: string
): string | undefined {
if (url_partial?.length) {
if (heading?.length && heading !== null && heading !== 'null') {
return `${heading}${` - ${
url_partial.split('/')?.[1]?.[0].toUpperCase() +
url_partial.split('/')?.[1]?.slice(1)
}`}`;
} else {
return url_partial
.split('#')[0]
.split('/')
.map((part) =>
part?.length ? part[0].toUpperCase() + part.slice(1) + ' - ' : ''
)
.join('')
.slice(0, -3);
}
}
}

async function main() {
await generateEmbeddings();
}
Expand Down

0 comments on commit e6f9401

Please sign in to comment.