-
Notifications
You must be signed in to change notification settings - Fork 2.7k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
8 changed files
with
183 additions
and
130 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,120 @@ | ||
import { UUID } from "crypto"; | ||
|
||
import { AgentRuntime } from "./runtime.ts"; | ||
import { embed } from "./embedding.ts"; | ||
import { Content, ModelClass, type Memory } from "./types.ts"; | ||
import { stringToUuid } from "./uuid.ts"; | ||
import { embeddingZeroVector } from "./memory.ts"; | ||
import { splitChunks } from "./generation.ts"; | ||
import { models } from "./models.ts"; | ||
|
||
async function get(runtime: AgentRuntime, message: Memory): Promise<string[]> { | ||
const embedding = await embed(runtime, message.content.text); | ||
const fragments = await runtime.knowledgeManager.searchMemoriesByEmbedding( | ||
embedding, | ||
{ | ||
roomId: message.agentId, | ||
agentId: message.agentId, | ||
count: 3, | ||
match_threshold: 0.1, | ||
} | ||
); | ||
|
||
const uniqueSources = [ | ||
...new Set( | ||
fragments.map((memory) => { | ||
console.log((memory as any).similarity); | ||
return memory.content.source; | ||
}) | ||
), | ||
]; | ||
|
||
const knowledgeDocuments = await Promise.all( | ||
uniqueSources.map((source) => | ||
runtime.documentsManager.getMemoryById(source as UUID) | ||
) | ||
); | ||
|
||
const knowledge = knowledgeDocuments | ||
.filter((memory) => memory !== null) | ||
.map((memory) => memory.content.text); | ||
return knowledge; | ||
} | ||
|
||
export type KnowledgeItem = { | ||
id: UUID; | ||
content: Content; | ||
}; | ||
|
||
async function set(runtime: AgentRuntime, item: KnowledgeItem) { | ||
await runtime.documentsManager.createMemory({ | ||
embedding: embeddingZeroVector, | ||
id: item.id, | ||
agentId: runtime.agentId, | ||
roomId: runtime.agentId, | ||
userId: runtime.agentId, | ||
createdAt: Date.now(), | ||
content: item.content, | ||
}); | ||
|
||
const preprocessed = preprocess(item.content.text); | ||
const fragments = await splitChunks( | ||
preprocessed, | ||
10, | ||
models[runtime.character.modelProvider].model?.[ModelClass.EMBEDDING], | ||
5 | ||
); | ||
|
||
for (const fragment of fragments) { | ||
const embedding = await embed(this, fragment); | ||
await runtime.knowledgeManager.createMemory({ | ||
// We namespace the knowledge base uuid to avoid id | ||
// collision with the document above. | ||
id: stringToUuid(item.id + fragment), | ||
roomId: runtime.agentId, | ||
agentId: runtime.agentId, | ||
userId: runtime.agentId, | ||
createdAt: Date.now(), | ||
content: { | ||
source: item.id, | ||
text: fragment, | ||
}, | ||
embedding, | ||
}); | ||
} | ||
} | ||
|
||
export function preprocess(content: string): string { | ||
return ( | ||
content | ||
// Remove code blocks and their content | ||
.replace(/```[\s\S]*?```/g, "") | ||
// Remove inline code | ||
.replace(/`.*?`/g, "") | ||
// Convert headers to plain text with emphasis | ||
.replace(/#{1,6}\s*(.*)/g, "$1") | ||
// Remove image links but keep alt text | ||
.replace(/!\[(.*?)\]\(.*?\)/g, "$1") | ||
// Remove links but keep text | ||
.replace(/\[(.*?)\]\(.*?\)/g, "$1") | ||
// Remove HTML tags | ||
.replace(/<[^>]*>/g, "") | ||
// Remove horizontal rules | ||
.replace(/^\s*[-*_]{3,}\s*$/gm, "") | ||
// Remove comments | ||
.replace(/\/\*[\s\S]*?\*\//g, "") | ||
.replace(/\/\/.*/g, "") | ||
// Normalize whitespace | ||
.replace(/\s+/g, " ") | ||
// Remove multiple newlines | ||
.replace(/\n{3,}/g, "\n\n") | ||
.trim() | ||
.toLowerCase() | ||
); | ||
} | ||
|
||
export default { | ||
get, | ||
set, | ||
process, | ||
}; |
Oops, something went wrong.