Skip to content

Commit

Permalink
[Obs AI Assistant] Add uuid to knowledge base entries to avoid overwr…
Browse files Browse the repository at this point in the history
…iting accidentally (#191043)

Closes #184069

**The Problem**
The LLM decides the identifier (both `_id` and `doc_id`) for knowledge
base entries. The `_id` must be globally unique in Elasticsearch but the
LLM can easily pick the same id for different users thereby overwriting
one users learning with another users learning.

**Solution**
The LLM should not pick the `_id`. With this PR a UUID is generated for
new entries. This means the LLM will only be able to create new KB
entries - it will not be able to update existing ones.

`doc_id` has been removed, and replaced with a `title` property. Title
is simply a human readable string - it is not used to identify KB
entries.
To retain backwards compatability, we will display the `doc_id` if
`title` is not available

---------

Co-authored-by: Sandra G <[email protected]>
Co-authored-by: kibanamachine <[email protected]>
  • Loading branch information
3 people authored Nov 7, 2024
1 parent 669761b commit 7c92a10
Show file tree
Hide file tree
Showing 51 changed files with 687 additions and 1,438 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -82,8 +82,8 @@ export type ConversationUpdateRequest = ConversationRequestBase & {
export interface KnowledgeBaseEntry {
'@timestamp': string;
id: string;
title?: string;
text: string;
doc_id: string;
confidence: 'low' | 'medium' | 'high';
is_correction: boolean;
type?: 'user_instruction' | 'contextual';
Expand All @@ -96,12 +96,12 @@ export interface KnowledgeBaseEntry {
}

export interface Instruction {
doc_id: string;
id: string;
text: string;
}

export interface AdHocInstruction {
doc_id?: string;
id?: string;
text: string;
instruction_type: 'user_instruction' | 'application_instruction';
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,16 @@
import { ShortIdTable } from './short_id_table';

describe('shortIdTable', () => {
it('generates a short id from a uuid', () => {
const table = new ShortIdTable();

const uuid = 'd877f65c-4036-42c4-b105-19e2f1a1c045';
const shortId = table.take(uuid);

expect(shortId.length).toBe(4);
expect(table.lookup(shortId)).toBe(uuid);
});

it('generates at least 10k unique ids consistently', () => {
const ids = new Set();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,9 +52,9 @@ const schema: RootSchema<RecallRanking> = {
},
};

export const RecallRankingEventType = 'observability_ai_assistant_recall_ranking';
export const recallRankingEventType = 'observability_ai_assistant_recall_ranking';

export const recallRankingEvent: EventTypeOpts<RecallRanking> = {
eventType: RecallRankingEventType,
eventType: recallRankingEventType,
schema,
};
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
* 2.0.
*/

import { KnowledgeBaseType } from '../../common/types';
import { v4 } from 'uuid';
import type { FunctionRegistrationParameters } from '.';
import { KnowledgeBaseEntryRole } from '../../common';

Expand All @@ -14,6 +14,7 @@ export const SUMMARIZE_FUNCTION_NAME = 'summarize';
export function registerSummarizationFunction({
client,
functions,
resources,
}: FunctionRegistrationParameters) {
functions.registerFunction(
{
Expand All @@ -28,10 +29,10 @@ export function registerSummarizationFunction({
parameters: {
type: 'object',
properties: {
id: {
title: {
type: 'string',
description:
'An id for the document. This should be a short human-readable keyword field with only alphabetic characters and underscores, that allow you to update it later.',
'A human readable title that can be used to identify the document later. This should be no longer than 255 characters',
},
text: {
type: 'string',
Expand All @@ -54,29 +55,31 @@ export function registerSummarizationFunction({
},
},
required: [
'id' as const,
'title' as const,
'text' as const,
'is_correction' as const,
'confidence' as const,
'public' as const,
],
},
},
(
{ arguments: { id, text, is_correction: isCorrection, confidence, public: isPublic } },
async (
{ arguments: { title, text, is_correction: isCorrection, confidence, public: isPublic } },
signal
) => {
const id = v4();
resources.logger.debug(`Creating new knowledge base entry with id: ${id}`);

return client
.addKnowledgeBaseEntry({
entry: {
doc_id: id,
role: KnowledgeBaseEntryRole.AssistantSummarization,
id,
title,
text,
is_correction: isCorrection,
type: KnowledgeBaseType.Contextual,
confidence,
public: isPublic,
role: KnowledgeBaseEntryRole.AssistantSummarization,
confidence,
is_correction: isCorrection,
labels: {},
},
// signal,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ const chatCompleteBaseRt = t.type({
]),
instructions: t.array(
t.intersection([
t.partial({ doc_id: t.string }),
t.partial({ id: t.string }),
t.type({
text: t.string,
instruction_type: t.union([
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import { notImplemented } from '@hapi/boom';
import { nonEmptyStringRt, toBooleanRt } from '@kbn/io-ts-utils';
import * as t from 'io-ts';
import { v4 } from 'uuid';
import { FunctionDefinition } from '../../../common/functions/types';
import { KnowledgeBaseEntryRole } from '../../../common/types';
import type { RecalledEntry } from '../../service/knowledge_base_service';
Expand Down Expand Up @@ -114,19 +115,19 @@ const functionRecallRoute = createObservabilityAIAssistantServerRoute({
throw notImplemented();
}

return client.recall({ queries, categories });
const entries = await client.recall({ queries, categories });
return { entries };
},
});

const functionSummariseRoute = createObservabilityAIAssistantServerRoute({
endpoint: 'POST /internal/observability_ai_assistant/functions/summarize',
params: t.type({
body: t.type({
id: t.string,
title: t.string,
text: nonEmptyStringRt,
confidence: t.union([t.literal('low'), t.literal('medium'), t.literal('high')]),
is_correction: toBooleanRt,
type: t.union([t.literal('user_instruction'), t.literal('contextual')]),
public: toBooleanRt,
labels: t.record(t.string, t.string),
}),
Expand All @@ -142,22 +143,20 @@ const functionSummariseRoute = createObservabilityAIAssistantServerRoute({
}

const {
title,
confidence,
id,
is_correction: isCorrection,
type,
text,
public: isPublic,
labels,
} = resources.params.body;

return client.addKnowledgeBaseEntry({
entry: {
title,
confidence,
id,
doc_id: id,
id: v4(),
is_correction: isCorrection,
type,
text,
public: isPublic,
labels,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,16 +9,12 @@ import type {
MlDeploymentAllocationState,
MlDeploymentState,
} from '@elastic/elasticsearch/lib/api/types';
import pLimit from 'p-limit';
import { notImplemented } from '@hapi/boom';
import { nonEmptyStringRt, toBooleanRt } from '@kbn/io-ts-utils';
import * as t from 'io-ts';
import { createObservabilityAIAssistantServerRoute } from '../create_observability_ai_assistant_server_route';
import {
Instruction,
KnowledgeBaseEntry,
KnowledgeBaseEntryRole,
KnowledgeBaseType,
} from '../../../common/types';
import { Instruction, KnowledgeBaseEntry, KnowledgeBaseEntryRole } from '../../../common/types';

const getKnowledgeBaseStatus = createObservabilityAIAssistantServerRoute({
endpoint: 'GET /internal/observability_ai_assistant/kb/status',
Expand Down Expand Up @@ -108,18 +104,8 @@ const saveKnowledgeBaseUserInstruction = createObservabilityAIAssistantServerRou
}

const { id, text, public: isPublic } = resources.params.body;
return client.addKnowledgeBaseEntry({
entry: {
id,
doc_id: id,
text,
public: isPublic,
confidence: 'high',
is_correction: false,
type: KnowledgeBaseType.UserInstruction,
labels: {},
role: KnowledgeBaseEntryRole.UserEntry,
},
return client.addUserInstruction({
entry: { id, text, public: isPublic },
});
},
});
Expand Down Expand Up @@ -153,26 +139,29 @@ const getKnowledgeBaseEntries = createObservabilityAIAssistantServerRoute({
},
});

const knowledgeBaseEntryRt = t.intersection([
t.type({
id: t.string,
title: t.string,
text: nonEmptyStringRt,
}),
t.partial({
confidence: t.union([t.literal('low'), t.literal('medium'), t.literal('high')]),
is_correction: toBooleanRt,
public: toBooleanRt,
labels: t.record(t.string, t.string),
role: t.union([
t.literal(KnowledgeBaseEntryRole.AssistantSummarization),
t.literal(KnowledgeBaseEntryRole.UserEntry),
t.literal(KnowledgeBaseEntryRole.Elastic),
]),
}),
]);

const saveKnowledgeBaseEntry = createObservabilityAIAssistantServerRoute({
endpoint: 'POST /internal/observability_ai_assistant/kb/entries/save',
params: t.type({
body: t.intersection([
t.type({
id: t.string,
text: nonEmptyStringRt,
}),
t.partial({
confidence: t.union([t.literal('low'), t.literal('medium'), t.literal('high')]),
is_correction: toBooleanRt,
public: toBooleanRt,
labels: t.record(t.string, t.string),
role: t.union([
t.literal('assistant_summarization'),
t.literal('user_entry'),
t.literal('elastic'),
]),
}),
]),
body: knowledgeBaseEntryRt,
}),
options: {
tags: ['access:ai_assistant'],
Expand All @@ -184,27 +173,15 @@ const saveKnowledgeBaseEntry = createObservabilityAIAssistantServerRoute({
throw notImplemented();
}

const {
id,
text,
public: isPublic,
confidence,
is_correction: isCorrection,
labels,
role,
} = resources.params.body;

const entry = resources.params.body;
return client.addKnowledgeBaseEntry({
entry: {
id,
text,
doc_id: id,
confidence: confidence ?? 'high',
is_correction: isCorrection ?? false,
type: 'contextual',
public: isPublic ?? true,
labels: labels ?? {},
role: (role as KnowledgeBaseEntryRole) ?? KnowledgeBaseEntryRole.UserEntry,
confidence: 'high',
is_correction: false,
public: true,
labels: {},
role: KnowledgeBaseEntryRole.UserEntry,
...entry,
},
});
},
Expand Down Expand Up @@ -235,12 +212,7 @@ const importKnowledgeBaseEntries = createObservabilityAIAssistantServerRoute({
endpoint: 'POST /internal/observability_ai_assistant/kb/entries/import',
params: t.type({
body: t.type({
entries: t.array(
t.type({
id: t.string,
text: nonEmptyStringRt,
})
),
entries: t.array(knowledgeBaseEntryRt),
}),
}),
options: {
Expand All @@ -253,18 +225,29 @@ const importKnowledgeBaseEntries = createObservabilityAIAssistantServerRoute({
throw notImplemented();
}

const entries = resources.params.body.entries.map((entry) => ({
doc_id: entry.id,
confidence: 'high' as KnowledgeBaseEntry['confidence'],
is_correction: false,
type: 'contextual' as const,
public: true,
labels: {},
role: KnowledgeBaseEntryRole.UserEntry,
...entry,
}));

return await client.importKnowledgeBaseEntries({ entries });
const status = await client.getKnowledgeBaseStatus();
if (!status.ready) {
throw new Error('Knowledge base is not ready');
}

const limiter = pLimit(5);

const promises = resources.params.body.entries.map(async (entry) => {
return limiter(async () => {
return client.addKnowledgeBaseEntry({
entry: {
confidence: 'high',
is_correction: false,
public: true,
labels: {},
role: KnowledgeBaseEntryRole.UserEntry,
...entry,
},
});
});
});

await Promise.all(promises);
},
});

Expand All @@ -273,8 +256,8 @@ export const knowledgeBaseRoutes = {
...getKnowledgeBaseStatus,
...getKnowledgeBaseEntries,
...saveKnowledgeBaseUserInstruction,
...getKnowledgeBaseUserInstructions,
...importKnowledgeBaseEntries,
...getKnowledgeBaseUserInstructions,
...saveKnowledgeBaseEntry,
...deleteKnowledgeBaseEntry,
};
Loading

0 comments on commit 7c92a10

Please sign in to comment.