diff --git a/packages/client-discord/src/actions/summarize_conversation.ts b/packages/client-discord/src/actions/summarize_conversation.ts index bca4adc8a2..f4f1020ce2 100644 --- a/packages/client-discord/src/actions/summarize_conversation.ts +++ b/packages/client-discord/src/actions/summarize_conversation.ts @@ -252,7 +252,7 @@ const summarizeAction = { const model = models[runtime.character.settings.model]; const chunkSize = model.settings.maxContextLength - 1000; - const chunks = await splitChunks(formattedMemories, chunkSize, 0); + const chunks = await splitChunks(formattedMemories, chunkSize, "gpt-4o-mini", 0); const datestr = new Date().toUTCString().replace(/:/g, "-"); diff --git a/packages/client-github/src/index.ts b/packages/client-github/src/index.ts index 59260baea7..6e81732385 100644 --- a/packages/client-github/src/index.ts +++ b/packages/client-github/src/index.ts @@ -10,12 +10,8 @@ import { AgentRuntime, Client, IAgentRuntime, - Content, - Memory, + knowledge, stringToUuid, - embeddingZeroVector, - splitChunks, - embed, } from "@ai16z/eliza"; export interface GitHubConfig { @@ -111,11 +107,8 @@ export class GitHubClient { relativePath ); - const memory: Memory = { + await knowledge.set({ id: knowledgeId, - agentId: this.runtime.agentId, - userId: this.runtime.agentId, - roomId: this.runtime.agentId, content: { text: content, hash: contentHash, @@ -127,39 +120,7 @@ export class GitHubClient { owner: this.config.owner, }, }, - embedding: embeddingZeroVector, - }; - - await this.runtime.documentsManager.createMemory(memory); - - // Only split if content exceeds 4000 characters - const fragments = - content.length > 4000 - ? await splitChunks(content, 2000, 200) - : [content]; - - for (const fragment of fragments) { - // Skip empty fragments - if (!fragment.trim()) continue; - - // Add file path context to the fragment before embedding - const fragmentWithPath = `File: ${relativePath}\n\n${fragment}`; - const embedding = await embed(this.runtime, fragmentWithPath); - - await this.runtime.knowledgeManager.createMemory({ - // We namespace the knowledge base uuid to avoid id - // collision with the document above. - id: stringToUuid(knowledgeId + fragment), - roomId: this.runtime.agentId, - agentId: this.runtime.agentId, - userId: this.runtime.agentId, - content: { - source: knowledgeId, - text: fragment, - }, - embedding, - }); - } + }); } } diff --git a/packages/core/src/generation.ts b/packages/core/src/generation.ts index 448b70518d..0b1feb3dd5 100644 --- a/packages/core/src/generation.ts +++ b/packages/core/src/generation.ts @@ -468,27 +468,32 @@ export async function generateShouldRespond({ export async function splitChunks( content: string, chunkSize: number, + model: string, bleed: number = 100 ): Promise { - const encoding = encoding_for_model("gpt-4o-mini"); + const encoding = encoding_for_model(model as TiktokenModel); const tokens = encoding.encode(content); const chunks: string[] = []; const textDecoder = new TextDecoder(); for (let i = 0; i < tokens.length; i += chunkSize) { - const chunk = tokens.slice(i, i + chunkSize); - const decodedChunk = textDecoder.decode(encoding.decode(chunk)); + let chunk = tokens.slice(i, i + chunkSize); // Append bleed characters from the previous chunk - const startBleed = i > 0 ? content.slice(i - bleed, i) : ""; + if (i > 0) { + chunk = new Uint32Array([...tokens.slice(i - bleed, i), ...chunk]); + } + // Append bleed characters from the next chunk - const endBleed = - i + chunkSize < tokens.length - ? content.slice(i + chunkSize, i + chunkSize + bleed) - : ""; + if (i + chunkSize < tokens.length) { + chunk = new Uint32Array([ + ...chunk, + ...tokens.slice(i + chunkSize, i + chunkSize + bleed), + ]); + } - chunks.push(startBleed + decodedChunk + endBleed); + chunks.push(textDecoder.decode(encoding.decode(chunk))); } return chunks; diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts index 94084acc07..a4efc41e06 100644 --- a/packages/core/src/index.ts +++ b/packages/core/src/index.ts @@ -18,3 +18,4 @@ export * from "./types.ts"; export * from "./logger.ts"; export * from "./parsing.ts"; export * from "./uuid.ts"; +export { default as knowledge } from "./knowledge.ts"; diff --git a/packages/core/src/knowledge.ts b/packages/core/src/knowledge.ts new file mode 100644 index 0000000000..6347943730 --- /dev/null +++ b/packages/core/src/knowledge.ts @@ -0,0 +1,120 @@ +import { UUID } from "crypto"; + +import { AgentRuntime } from "./runtime.ts"; +import { embed } from "./embedding.ts"; +import { Content, ModelClass, type Memory } from "./types.ts"; +import { stringToUuid } from "./uuid.ts"; +import { embeddingZeroVector } from "./memory.ts"; +import { splitChunks } from "./generation.ts"; +import { models } from "./models.ts"; + +async function get(runtime: AgentRuntime, message: Memory): Promise { + const embedding = await embed(runtime, message.content.text); + const fragments = await runtime.knowledgeManager.searchMemoriesByEmbedding( + embedding, + { + roomId: message.agentId, + agentId: message.agentId, + count: 3, + match_threshold: 0.1, + } + ); + + const uniqueSources = [ + ...new Set( + fragments.map((memory) => { + console.log((memory as any).similarity); + return memory.content.source; + }) + ), + ]; + + const knowledgeDocuments = await Promise.all( + uniqueSources.map((source) => + runtime.documentsManager.getMemoryById(source as UUID) + ) + ); + + const knowledge = knowledgeDocuments + .filter((memory) => memory !== null) + .map((memory) => memory.content.text); + return knowledge; +} + +export type KnowledgeItem = { + id: UUID; + content: Content; +}; + +async function set(runtime: AgentRuntime, item: KnowledgeItem) { + await runtime.documentsManager.createMemory({ + embedding: embeddingZeroVector, + id: item.id, + agentId: runtime.agentId, + roomId: runtime.agentId, + userId: runtime.agentId, + createdAt: Date.now(), + content: item.content, + }); + + const preprocessed = preprocess(item.content.text); + const fragments = await splitChunks( + preprocessed, + 10, + models[runtime.character.modelProvider].model?.[ModelClass.EMBEDDING], + 5 + ); + + for (const fragment of fragments) { + const embedding = await embed(this, fragment); + await runtime.knowledgeManager.createMemory({ + // We namespace the knowledge base uuid to avoid id + // collision with the document above. + id: stringToUuid(item.id + fragment), + roomId: runtime.agentId, + agentId: runtime.agentId, + userId: runtime.agentId, + createdAt: Date.now(), + content: { + source: item.id, + text: fragment, + }, + embedding, + }); + } +} + +export function preprocess(content: string): string { + return ( + content + // Remove code blocks and their content + .replace(/```[\s\S]*?```/g, "") + // Remove inline code + .replace(/`.*?`/g, "") + // Convert headers to plain text with emphasis + .replace(/#{1,6}\s*(.*)/g, "$1") + // Remove image links but keep alt text + .replace(/!\[(.*?)\]\(.*?\)/g, "$1") + // Remove links but keep text + .replace(/\[(.*?)\]\(.*?\)/g, "$1") + // Remove HTML tags + .replace(/<[^>]*>/g, "") + // Remove horizontal rules + .replace(/^\s*[-*_]{3,}\s*$/gm, "") + // Remove comments + .replace(/\/\*[\s\S]*?\*\//g, "") + .replace(/\/\/.*/g, "") + // Normalize whitespace + .replace(/\s+/g, " ") + // Remove multiple newlines + .replace(/\n{3,}/g, "\n\n") + .trim() + .toLowerCase() + ); +} + +export default { + get, + set, + process, +}; diff --git a/packages/core/src/runtime.ts b/packages/core/src/runtime.ts index b1fecd31a3..0b935171c0 100644 --- a/packages/core/src/runtime.ts +++ b/packages/core/src/runtime.ts @@ -14,8 +14,8 @@ import { } from "./evaluators.ts"; import { generateText } from "./generation.ts"; import { formatGoalsAsString, getGoals } from "./goals.ts"; -import { elizaLogger, embed, splitChunks } from "./index.ts"; -import { embeddingZeroVector, MemoryManager } from "./memory.ts"; +import { elizaLogger } from "./index.ts"; +import { MemoryManager } from "./memory.ts"; import { formatActors, formatMessages, getActorDetails } from "./messages.ts"; import { parseJsonArrayFromText } from "./parsing.ts"; import { formatPosts } from "./posts.ts"; @@ -43,6 +43,7 @@ import { } from "./types.ts"; import { stringToUuid } from "./uuid.ts"; import { v4 as uuidv4 } from "uuid"; +import knowledge from "./knowledge.ts"; /** * Represents the runtime environment for an agent, handling message processing, @@ -231,11 +232,21 @@ export class AgentRuntime implements IAgentRuntime { opts.character?.id ?? opts?.agentId ?? stringToUuid(opts.character?.name ?? uuidv4()); + this.character = opts.character || defaultCharacter; + + // By convention, we create a user and room using the agent id. + // Memories related to it are considered global context for the agent. + this.ensureRoomExists(this.agentId); + this.ensureUserExists( + this.agentId, + this.character.name, + this.character.name + ); + this.ensureParticipantExists(this.agentId, this.agentId); elizaLogger.success("Agent ID", this.agentId); this.fetch = (opts.fetch as typeof fetch) ?? this.fetch; - this.character = opts.character || defaultCharacter; if (!opts.databaseAdapter) { throw new Error("No database adapter provided"); } @@ -331,58 +342,28 @@ export class AgentRuntime implements IAgentRuntime { * then chunks the content into fragments, embeds each fragment, and creates fragment memories. * @param knowledge An array of knowledge items containing id, path, and content. */ - private async processCharacterKnowledge(knowledge: string[]) { - // ensure the room exists and the agent exists in the room - this.ensureRoomExists(this.agentId); - this.ensureUserExists( - this.agentId, - this.character.name, - this.character.name - ); - this.ensureParticipantExists(this.agentId, this.agentId); - - for (const knowledgeItem of knowledge) { - const knowledgeId = stringToUuid(knowledgeItem); + private async processCharacterKnowledge(items: string[]) { + for (const item of items) { + const knowledgeId = stringToUuid(item); const existingDocument = await this.documentsManager.getMemoryById(knowledgeId); - if (!existingDocument) { - console.log( - "Processing knowledge for ", - this.character.name, - " - ", - knowledgeItem.slice(0, 100) - ); - await this.documentsManager.createMemory({ - embedding: embeddingZeroVector, - id: knowledgeId, - agentId: this.agentId, - roomId: this.agentId, - userId: this.agentId, - createdAt: Date.now(), - content: { - text: knowledgeItem, - }, - }); - - const fragments = await splitChunks(knowledgeItem, 1200, 200); - for (const fragment of fragments) { - const embedding = await embed(this, fragment); - await this.knowledgeManager.createMemory({ - // We namespace the knowledge base uuid to avoid id - // collision with the document above. - id: stringToUuid(knowledgeId + fragment), - roomId: this.agentId, - agentId: this.agentId, - userId: this.agentId, - createdAt: Date.now(), - content: { - source: knowledgeId, - text: fragment, - }, - embedding, - }); - } + if (existingDocument) { + return; } + + console.log( + "Processing knowledge for ", + this.character.name, + " - ", + item.slice(0, 100) + ); + + await knowledge.set(this, { + id: knowledgeId, + content: { + text: item, + }, + }); } } @@ -685,6 +666,7 @@ export class AgentRuntime implements IAgentRuntime { message: Memory, additionalKeys: { [key: string]: unknown } = {} ) { + console.log("compose state"); const { userId, roomId } = message; const conversationLength = this.getConversationLength(); @@ -910,32 +892,8 @@ Text: ${attachment.text} .join(" "); } - async function getKnowledge( - runtime: AgentRuntime, - message: Memory - ): Promise { - const embedding = await embed(runtime, message.content.text); - - const memories = - await runtime.knowledgeManager.searchMemoriesByEmbedding( - embedding, - { - roomId: message.agentId, - agentId: message.agentId, - count: 3, - } - ); - - const knowledge = memories.map((memory) => memory.content.text); - return knowledge; - } - - const formatKnowledge = (knowledge: string[]) => { - return knowledge.map((knowledge) => `- ${knowledge}`).join("\n"); - }; - const formattedKnowledge = formatKnowledge( - await getKnowledge(this, message) + await knowledge.get(this, message) ); const initialState = { @@ -1217,3 +1175,7 @@ Text: ${attachment.text} } as State; } } + +const formatKnowledge = (knowledge: string[]) => { + return knowledge.map((knowledge) => `- ${knowledge}`).join("\n"); +}; diff --git a/packages/plugin-bootstrap/src/evaluators/fact.ts b/packages/plugin-bootstrap/src/evaluators/fact.ts index 97c22b6e1a..15857f3d11 100644 --- a/packages/plugin-bootstrap/src/evaluators/fact.ts +++ b/packages/plugin-bootstrap/src/evaluators/fact.ts @@ -13,7 +13,7 @@ import { export const formatFacts = (facts: Memory[]) => { const messageStrings = facts .reverse() - .map((fact: Memory) => `${(fact.content as Content)?.content}`); + .map((fact: Memory) => fact.content.text); const finalMessageStrings = messageStrings.join("\n"); return finalMessageStrings; }; diff --git a/packages/plugin-bootstrap/src/index.ts b/packages/plugin-bootstrap/src/index.ts index ea004debce..22de71d068 100644 --- a/packages/plugin-bootstrap/src/index.ts +++ b/packages/plugin-bootstrap/src/index.ts @@ -12,6 +12,10 @@ import { boredomProvider } from "./providers/boredom.ts"; import { factsProvider } from "./providers/facts.ts"; import { timeProvider } from "./providers/time.ts"; +export * as actions from "./actions"; +export * as evaluators from "./evaluators"; +export * as providers from "./providers"; + export const bootstrapPlugin: Plugin = { name: "bootstrap", description: "Agent bootstrap with basic actions and evaluators",