Skip to content

Commit

Permalink
[8.x] [Obs AI Assistant] Add uuid to knowledge base entries to avoid …
Browse files Browse the repository at this point in the history
…overwriting accidentally (#191043) (#199263)

# Backport

This will backport the following commits from `main` to `8.x`:
- [[Obs AI Assistant] Add uuid to knowledge base entries to avoid
overwriting accidentally
(#191043)](#191043)

<!--- Backport version: 9.4.3 -->

### Questions ?
Please refer to the [Backport tool
documentation](https://github.com/sqren/backport)

<!--BACKPORT [{"author":{"name":"Søren
Louv-Jansen","email":"[email protected]"},"sourceCommit":{"committedDate":"2024-11-07T08:55:34Z","message":"[Obs
AI Assistant] Add uuid to knowledge base entries to avoid overwriting
accidentally (#191043)\n\nCloses
https://github.com/elastic/kibana/issues/184069\r\n\r\n**The
Problem**\r\nThe LLM decides the identifier (both `_id` and `doc_id`)
for knowledge\r\nbase entries. The `_id` must be globally unique in
Elasticsearch but the\r\nLLM can easily pick the same id for different
users thereby overwriting\r\none users learning with another users
learning.\r\n\r\n**Solution**\r\nThe LLM should not pick the `_id`. With
this PR a UUID is generated for\r\nnew entries. This means the LLM will
only be able to create new KB\r\nentries - it will not be able to update
existing ones.\r\n\r\n`doc_id` has been removed, and replaced with a
`title` property. Title\r\nis simply a human readable string - it is not
used to identify KB\r\nentries.\r\nTo retain backwards compatability, we
will display the `doc_id` if\r\n`title` is not
available\r\n\r\n---------\r\n\r\nCo-authored-by: Sandra G
<[email protected]>\r\nCo-authored-by: kibanamachine
<[email protected]>","sha":"7c92a10b324a8b1e10ae8924e5525b071b5c9797","branchLabelMapping":{"^v9.0.0$":"main","^v8.17.0$":"8.x","^v(\\d+).(\\d+).\\d+$":"$1.$2"}},"sourcePullRequest":{"labels":["release_note:fix","v9.0.0","backport:prev-minor","Team:Obs
AI Assistant","ci:project-deploy-observability"],"title":"[Obs AI
Assistant] Add uuid to knowledge base entries to avoid overwriting
accidentally","number":191043,"url":"https://github.com/elastic/kibana/pull/191043","mergeCommit":{"message":"[Obs
AI Assistant] Add uuid to knowledge base entries to avoid overwriting
accidentally (#191043)\n\nCloses
https://github.com/elastic/kibana/issues/184069\r\n\r\n**The
Problem**\r\nThe LLM decides the identifier (both `_id` and `doc_id`)
for knowledge\r\nbase entries. The `_id` must be globally unique in
Elasticsearch but the\r\nLLM can easily pick the same id for different
users thereby overwriting\r\none users learning with another users
learning.\r\n\r\n**Solution**\r\nThe LLM should not pick the `_id`. With
this PR a UUID is generated for\r\nnew entries. This means the LLM will
only be able to create new KB\r\nentries - it will not be able to update
existing ones.\r\n\r\n`doc_id` has been removed, and replaced with a
`title` property. Title\r\nis simply a human readable string - it is not
used to identify KB\r\nentries.\r\nTo retain backwards compatability, we
will display the `doc_id` if\r\n`title` is not
available\r\n\r\n---------\r\n\r\nCo-authored-by: Sandra G
<[email protected]>\r\nCo-authored-by: kibanamachine
<[email protected]>","sha":"7c92a10b324a8b1e10ae8924e5525b071b5c9797"}},"sourceBranch":"main","suggestedTargetBranches":[],"targetPullRequestStates":[{"branch":"main","label":"v9.0.0","branchLabelMappingKey":"^v9.0.0$","isSourceBranch":true,"state":"MERGED","url":"https://github.com/elastic/kibana/pull/191043","number":191043,"mergeCommit":{"message":"[Obs
AI Assistant] Add uuid to knowledge base entries to avoid overwriting
accidentally (#191043)\n\nCloses
https://github.com/elastic/kibana/issues/184069\r\n\r\n**The
Problem**\r\nThe LLM decides the identifier (both `_id` and `doc_id`)
for knowledge\r\nbase entries. The `_id` must be globally unique in
Elasticsearch but the\r\nLLM can easily pick the same id for different
users thereby overwriting\r\none users learning with another users
learning.\r\n\r\n**Solution**\r\nThe LLM should not pick the `_id`. With
this PR a UUID is generated for\r\nnew entries. This means the LLM will
only be able to create new KB\r\nentries - it will not be able to update
existing ones.\r\n\r\n`doc_id` has been removed, and replaced with a
`title` property. Title\r\nis simply a human readable string - it is not
used to identify KB\r\nentries.\r\nTo retain backwards compatability, we
will display the `doc_id` if\r\n`title` is not
available\r\n\r\n---------\r\n\r\nCo-authored-by: Sandra G
<[email protected]>\r\nCo-authored-by: kibanamachine
<[email protected]>","sha":"7c92a10b324a8b1e10ae8924e5525b071b5c9797"}}]}]
BACKPORT-->

Co-authored-by: Søren Louv-Jansen <[email protected]>
  • Loading branch information
kibanamachine and sorenlouv authored Nov 7, 2024
1 parent de6da8a commit 3b2a572
Show file tree
Hide file tree
Showing 51 changed files with 687 additions and 1,438 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -82,8 +82,8 @@ export type ConversationUpdateRequest = ConversationRequestBase & {
export interface KnowledgeBaseEntry {
'@timestamp': string;
id: string;
title?: string;
text: string;
doc_id: string;
confidence: 'low' | 'medium' | 'high';
is_correction: boolean;
type?: 'user_instruction' | 'contextual';
Expand All @@ -96,12 +96,12 @@ export interface KnowledgeBaseEntry {
}

export interface Instruction {
doc_id: string;
id: string;
text: string;
}

export interface AdHocInstruction {
doc_id?: string;
id?: string;
text: string;
instruction_type: 'user_instruction' | 'application_instruction';
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,16 @@
import { ShortIdTable } from './short_id_table';

describe('shortIdTable', () => {
it('generates a short id from a uuid', () => {
const table = new ShortIdTable();

const uuid = 'd877f65c-4036-42c4-b105-19e2f1a1c045';
const shortId = table.take(uuid);

expect(shortId.length).toBe(4);
expect(table.lookup(shortId)).toBe(uuid);
});

it('generates at least 10k unique ids consistently', () => {
const ids = new Set();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,9 +52,9 @@ const schema: RootSchema<RecallRanking> = {
},
};

export const RecallRankingEventType = 'observability_ai_assistant_recall_ranking';
export const recallRankingEventType = 'observability_ai_assistant_recall_ranking';

export const recallRankingEvent: EventTypeOpts<RecallRanking> = {
eventType: RecallRankingEventType,
eventType: recallRankingEventType,
schema,
};
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
* 2.0.
*/

import { KnowledgeBaseType } from '../../common/types';
import { v4 } from 'uuid';
import type { FunctionRegistrationParameters } from '.';
import { KnowledgeBaseEntryRole } from '../../common';

Expand All @@ -14,6 +14,7 @@ export const SUMMARIZE_FUNCTION_NAME = 'summarize';
export function registerSummarizationFunction({
client,
functions,
resources,
}: FunctionRegistrationParameters) {
functions.registerFunction(
{
Expand All @@ -28,10 +29,10 @@ export function registerSummarizationFunction({
parameters: {
type: 'object',
properties: {
id: {
title: {
type: 'string',
description:
'An id for the document. This should be a short human-readable keyword field with only alphabetic characters and underscores, that allow you to update it later.',
'A human readable title that can be used to identify the document later. This should be no longer than 255 characters',
},
text: {
type: 'string',
Expand All @@ -54,29 +55,31 @@ export function registerSummarizationFunction({
},
},
required: [
'id' as const,
'title' as const,
'text' as const,
'is_correction' as const,
'confidence' as const,
'public' as const,
],
},
},
(
{ arguments: { id, text, is_correction: isCorrection, confidence, public: isPublic } },
async (
{ arguments: { title, text, is_correction: isCorrection, confidence, public: isPublic } },
signal
) => {
const id = v4();
resources.logger.debug(`Creating new knowledge base entry with id: ${id}`);

return client
.addKnowledgeBaseEntry({
entry: {
doc_id: id,
role: KnowledgeBaseEntryRole.AssistantSummarization,
id,
title,
text,
is_correction: isCorrection,
type: KnowledgeBaseType.Contextual,
confidence,
public: isPublic,
role: KnowledgeBaseEntryRole.AssistantSummarization,
confidence,
is_correction: isCorrection,
labels: {},
},
// signal,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ const chatCompleteBaseRt = t.type({
]),
instructions: t.array(
t.intersection([
t.partial({ doc_id: t.string }),
t.partial({ id: t.string }),
t.type({
text: t.string,
instruction_type: t.union([
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import { notImplemented } from '@hapi/boom';
import { nonEmptyStringRt, toBooleanRt } from '@kbn/io-ts-utils';
import * as t from 'io-ts';
import { v4 } from 'uuid';
import { FunctionDefinition } from '../../../common/functions/types';
import { KnowledgeBaseEntryRole } from '../../../common/types';
import type { RecalledEntry } from '../../service/knowledge_base_service';
Expand Down Expand Up @@ -114,19 +115,19 @@ const functionRecallRoute = createObservabilityAIAssistantServerRoute({
throw notImplemented();
}

return client.recall({ queries, categories });
const entries = await client.recall({ queries, categories });
return { entries };
},
});

const functionSummariseRoute = createObservabilityAIAssistantServerRoute({
endpoint: 'POST /internal/observability_ai_assistant/functions/summarize',
params: t.type({
body: t.type({
id: t.string,
title: t.string,
text: nonEmptyStringRt,
confidence: t.union([t.literal('low'), t.literal('medium'), t.literal('high')]),
is_correction: toBooleanRt,
type: t.union([t.literal('user_instruction'), t.literal('contextual')]),
public: toBooleanRt,
labels: t.record(t.string, t.string),
}),
Expand All @@ -142,22 +143,20 @@ const functionSummariseRoute = createObservabilityAIAssistantServerRoute({
}

const {
title,
confidence,
id,
is_correction: isCorrection,
type,
text,
public: isPublic,
labels,
} = resources.params.body;

return client.addKnowledgeBaseEntry({
entry: {
title,
confidence,
id,
doc_id: id,
id: v4(),
is_correction: isCorrection,
type,
text,
public: isPublic,
labels,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,16 +9,12 @@ import type {
MlDeploymentAllocationState,
MlDeploymentState,
} from '@elastic/elasticsearch/lib/api/types';
import pLimit from 'p-limit';
import { notImplemented } from '@hapi/boom';
import { nonEmptyStringRt, toBooleanRt } from '@kbn/io-ts-utils';
import * as t from 'io-ts';
import { createObservabilityAIAssistantServerRoute } from '../create_observability_ai_assistant_server_route';
import {
Instruction,
KnowledgeBaseEntry,
KnowledgeBaseEntryRole,
KnowledgeBaseType,
} from '../../../common/types';
import { Instruction, KnowledgeBaseEntry, KnowledgeBaseEntryRole } from '../../../common/types';

const getKnowledgeBaseStatus = createObservabilityAIAssistantServerRoute({
endpoint: 'GET /internal/observability_ai_assistant/kb/status',
Expand Down Expand Up @@ -108,18 +104,8 @@ const saveKnowledgeBaseUserInstruction = createObservabilityAIAssistantServerRou
}

const { id, text, public: isPublic } = resources.params.body;
return client.addKnowledgeBaseEntry({
entry: {
id,
doc_id: id,
text,
public: isPublic,
confidence: 'high',
is_correction: false,
type: KnowledgeBaseType.UserInstruction,
labels: {},
role: KnowledgeBaseEntryRole.UserEntry,
},
return client.addUserInstruction({
entry: { id, text, public: isPublic },
});
},
});
Expand Down Expand Up @@ -153,26 +139,29 @@ const getKnowledgeBaseEntries = createObservabilityAIAssistantServerRoute({
},
});

const knowledgeBaseEntryRt = t.intersection([
t.type({
id: t.string,
title: t.string,
text: nonEmptyStringRt,
}),
t.partial({
confidence: t.union([t.literal('low'), t.literal('medium'), t.literal('high')]),
is_correction: toBooleanRt,
public: toBooleanRt,
labels: t.record(t.string, t.string),
role: t.union([
t.literal(KnowledgeBaseEntryRole.AssistantSummarization),
t.literal(KnowledgeBaseEntryRole.UserEntry),
t.literal(KnowledgeBaseEntryRole.Elastic),
]),
}),
]);

const saveKnowledgeBaseEntry = createObservabilityAIAssistantServerRoute({
endpoint: 'POST /internal/observability_ai_assistant/kb/entries/save',
params: t.type({
body: t.intersection([
t.type({
id: t.string,
text: nonEmptyStringRt,
}),
t.partial({
confidence: t.union([t.literal('low'), t.literal('medium'), t.literal('high')]),
is_correction: toBooleanRt,
public: toBooleanRt,
labels: t.record(t.string, t.string),
role: t.union([
t.literal('assistant_summarization'),
t.literal('user_entry'),
t.literal('elastic'),
]),
}),
]),
body: knowledgeBaseEntryRt,
}),
options: {
tags: ['access:ai_assistant'],
Expand All @@ -184,27 +173,15 @@ const saveKnowledgeBaseEntry = createObservabilityAIAssistantServerRoute({
throw notImplemented();
}

const {
id,
text,
public: isPublic,
confidence,
is_correction: isCorrection,
labels,
role,
} = resources.params.body;

const entry = resources.params.body;
return client.addKnowledgeBaseEntry({
entry: {
id,
text,
doc_id: id,
confidence: confidence ?? 'high',
is_correction: isCorrection ?? false,
type: 'contextual',
public: isPublic ?? true,
labels: labels ?? {},
role: (role as KnowledgeBaseEntryRole) ?? KnowledgeBaseEntryRole.UserEntry,
confidence: 'high',
is_correction: false,
public: true,
labels: {},
role: KnowledgeBaseEntryRole.UserEntry,
...entry,
},
});
},
Expand Down Expand Up @@ -235,12 +212,7 @@ const importKnowledgeBaseEntries = createObservabilityAIAssistantServerRoute({
endpoint: 'POST /internal/observability_ai_assistant/kb/entries/import',
params: t.type({
body: t.type({
entries: t.array(
t.type({
id: t.string,
text: nonEmptyStringRt,
})
),
entries: t.array(knowledgeBaseEntryRt),
}),
}),
options: {
Expand All @@ -253,18 +225,29 @@ const importKnowledgeBaseEntries = createObservabilityAIAssistantServerRoute({
throw notImplemented();
}

const entries = resources.params.body.entries.map((entry) => ({
doc_id: entry.id,
confidence: 'high' as KnowledgeBaseEntry['confidence'],
is_correction: false,
type: 'contextual' as const,
public: true,
labels: {},
role: KnowledgeBaseEntryRole.UserEntry,
...entry,
}));

return await client.importKnowledgeBaseEntries({ entries });
const status = await client.getKnowledgeBaseStatus();
if (!status.ready) {
throw new Error('Knowledge base is not ready');
}

const limiter = pLimit(5);

const promises = resources.params.body.entries.map(async (entry) => {
return limiter(async () => {
return client.addKnowledgeBaseEntry({
entry: {
confidence: 'high',
is_correction: false,
public: true,
labels: {},
role: KnowledgeBaseEntryRole.UserEntry,
...entry,
},
});
});
});

await Promise.all(promises);
},
});

Expand All @@ -273,8 +256,8 @@ export const knowledgeBaseRoutes = {
...getKnowledgeBaseStatus,
...getKnowledgeBaseEntries,
...saveKnowledgeBaseUserInstruction,
...getKnowledgeBaseUserInstructions,
...importKnowledgeBaseEntries,
...getKnowledgeBaseUserInstructions,
...saveKnowledgeBaseEntry,
...deleteKnowledgeBaseEntry,
};
Loading

0 comments on commit 3b2a572

Please sign in to comment.