feat: allow templated system messages (#1584)

Co-authored-by: Maximilian-Winter <[email protected]>
letta-ai · Jul 28, 2024 · c292145 · c292145
1 parent 35e7915
commit c292145
Show file tree

Hide file tree

Showing 3 changed files with 158 additions and 31 deletions.
diff --git a/memgpt/agent.py b/memgpt/agent.py
@@ -3,14 +3,15 @@
 import json
 import traceback
 import uuid
-from typing import List, Optional, Tuple, Union, cast
+from typing import List, Literal, Optional, Tuple, Union, cast
 
 from tqdm import tqdm
 
 from memgpt.agent_store.storage import StorageConnector
 from memgpt.constants import (
     CLI_WARNING_PREFIX,
     FIRST_MESSAGE_ATTEMPTS,
+    IN_CONTEXT_MEMORY_KEYWORD,
     JSON_ENSURE_ASCII,
     JSON_LOADS_STRICT,
     LLM_MAX_TOKENS,
@@ -49,33 +50,86 @@
 from .errors import LLMError
 
 
-def construct_system_with_memory(
-    system: str,
-    memory: BaseMemory,
-    memory_edit_timestamp: str,
+def compile_memory_metadata_block(
+    memory_edit_timestamp: datetime.datetime,
     archival_memory: Optional[ArchivalMemory] = None,
     recall_memory: Optional[RecallMemory] = None,
-    include_char_count: bool = True,
-):
-    # TODO: modify this to be generalized
-    full_system_message = "\n".join(
+) -> str:
+    # Put the timestamp in the local timezone (mimicking get_local_time())
+    timestamp_str = memory_edit_timestamp.astimezone().strftime("%Y-%m-%d %I:%M:%S %p %Z%z").strip()
+
+    # Create a metadata block of info so the agent knows about the metadata of out-of-context memories
+    memory_metadata_block = "\n".join(
         [
-            system,
-            "\n",
-            f"### Memory [last modified: {memory_edit_timestamp.strip()}]",
+            f"### Memory [last modified: {timestamp_str}]",
             f"{len(recall_memory) if recall_memory else 0} previous messages between you and the user are stored in recall memory (use functions to access them)",
             f"{len(archival_memory) if archival_memory else 0} total memories you created are stored in archival memory (use functions to access them)",
             "\nCore memory shown below (limited in size, additional information stored in archival / recall memory):",
-            str(memory),
-            # f'<persona characters="{len(memory.persona)}/{memory.persona_char_limit}">' if include_char_count else "<persona>",
-            # memory.persona,
-            # "</persona>",
-            # f'<human characters="{len(memory.human)}/{memory.human_char_limit}">' if include_char_count else "<human>",
-            # memory.human,
-            # "</human>",
         ]
     )
-    return full_system_message
+    return memory_metadata_block
+
+
+def compile_system_message(
+    system_prompt: str,
+    in_context_memory: BaseMemory,
+    in_context_memory_last_edit: datetime.datetime,  # TODO move this inside of BaseMemory?
+    archival_memory: Optional[ArchivalMemory] = None,
+    recall_memory: Optional[RecallMemory] = None,
+    user_defined_variables: Optional[dict] = None,
+    append_icm_if_missing: bool = True,
+    template_format: Literal["f-string", "mustache", "jinja2"] = "f-string",
+) -> str:
+    """Prepare the final/full system message that will be fed into the LLM API
+
+    The base system message may be templated, in which case we need to render the variables.
+
+    The following are reserved variables:
+      - CORE_MEMORY: the in-context memory of the LLM
+    """
+
+    if user_defined_variables is not None:
+        # TODO eventually support the user defining their own variables to inject
+        raise NotImplementedError
+    else:
+        variables = {}
+
+    # Add the protected memory variable
+    if IN_CONTEXT_MEMORY_KEYWORD in variables:
+        raise ValueError(f"Found protected variable '{IN_CONTEXT_MEMORY_KEYWORD}' in user-defined vars: {str(user_defined_variables)}")
+    else:
+        # TODO should this all put into the memory.__repr__ function?
+        memory_metadata_string = compile_memory_metadata_block(
+            memory_edit_timestamp=in_context_memory_last_edit,
+            archival_memory=archival_memory,
+            recall_memory=recall_memory,
+        )
+        full_memory_string = memory_metadata_string + "\n" + str(in_context_memory)
+
+        # Add to the variables list to inject
+        variables[IN_CONTEXT_MEMORY_KEYWORD] = full_memory_string
+
+    if template_format == "f-string":
+
+        # Catch the special case where the system prompt is unformatted
+        if append_icm_if_missing:
+            memory_variable_string = "{" + IN_CONTEXT_MEMORY_KEYWORD + "}"
+            if memory_variable_string not in system_prompt:
+                # In this case, append it to the end to make sure memory is still injected
+                # warnings.warn(f"{IN_CONTEXT_MEMORY_KEYWORD} variable was missing from system prompt, appending instead")
+                system_prompt += "\n" + memory_variable_string
+
+        # render the variables using the built-in templater
+        try:
+            formatted_prompt = system_prompt.format_map(variables)
+        except Exception as e:
+            raise ValueError(f"Failed to format system prompt - {str(e)}. System prompt value:\n{system_prompt}")
+
+    else:
+        # TODO support for mustache and jinja2
+        raise NotImplementedError(template_format)
+
+    return formatted_prompt
 
 
 def initialize_message_sequence(
@@ -84,14 +138,23 @@ def initialize_message_sequence(
     memory: BaseMemory,
     archival_memory: Optional[ArchivalMemory] = None,
     recall_memory: Optional[RecallMemory] = None,
-    memory_edit_timestamp: Optional[str] = None,
+    memory_edit_timestamp: Optional[datetime.datetime] = None,
     include_initial_boot_message: bool = True,
 ) -> List[dict]:
     if memory_edit_timestamp is None:
         memory_edit_timestamp = get_local_time()
 
-    full_system_message = construct_system_with_memory(
-        system, memory, memory_edit_timestamp, archival_memory=archival_memory, recall_memory=recall_memory
+    # full_system_message = construct_system_with_memory(
+    # system, memory, memory_edit_timestamp, archival_memory=archival_memory, recall_memory=recall_memory
+    # )
+    full_system_message = compile_system_message(
+        system_prompt=system,
+        in_context_memory=memory,
+        in_context_memory_last_edit=memory_edit_timestamp,
+        archival_memory=archival_memory,
+        recall_memory=recall_memory,
+        user_defined_variables=None,
+        append_icm_if_missing=True,
     )
     first_user_message = get_login_event()  # event letting MemGPT know the user just logged in
 
@@ -214,9 +277,13 @@ def __init__(
         else:
             printd(f"Agent.__init__ :: creating, state={agent_state.state['messages']}")
             init_messages = initialize_message_sequence(
-                self.model,
-                self.system,
-                self.memory,
+                model=self.model,
+                system=self.system,
+                memory=self.memory,
+                archival_memory=None,
+                recall_memory=None,
+                memory_edit_timestamp=get_utc_time(),
+                include_initial_boot_message=True,
             )
             init_messages_objs = []
             for msg in init_messages:
@@ -873,13 +940,19 @@ def rebuild_memory(self):
             return
 
         # update memory (TODO: potentially update recall/archival stats seperately)
-        new_system_message = initialize_message_sequence(
-            self.model,
-            self.system,
-            self.memory,
+        new_system_message_str = compile_system_message(
+            system_prompt=self.system,
+            in_context_memory=self.memory,
+            in_context_memory_last_edit=get_utc_time(),  # NOTE: new timestamp
             archival_memory=self.persistence_manager.archival_memory,
             recall_memory=self.persistence_manager.recall_memory,
-        )[0]
+            user_defined_variables=None,
+            append_icm_if_missing=True,
+        )
+        new_system_message = {
+            "role": "system",
+            "content": new_system_message_str,
+        }
 
         diff = united_diff(curr_system_message["content"], new_system_message["content"])
         if len(diff) > 0:  # there was a diff

diff --git a/memgpt/constants.py b/memgpt/constants.py
@@ -3,6 +3,9 @@
 
 MEMGPT_DIR = os.path.join(os.path.expanduser("~"), ".memgpt")
 
+# System prompt templating
+IN_CONTEXT_MEMORY_KEYWORD = "CORE_MEMORY"
+
 # OpenAI error message: Invalid 'messages[1].tool_calls[0].id': string too long. Expected a string with maximum length 29, but got a string with length 36 instead.
 TOOL_CALL_ID_MAX_LEN = 29
 

diff --git a/memgpt/prompts/system/memgpt_chat_fstring.txt b/memgpt/prompts/system/memgpt_chat_fstring.txt
@@ -0,0 +1,51 @@
+You are MemGPT, the latest version of Limnal Corporation's digital companion, developed in 2023.
+Your task is to converse with a user from the perspective of your persona.
+
+Control flow:
+Unlike a human, your brain is not continuously thinking, but is run in short bursts.
+Historically, older AIs were only capable of thinking when a user messaged them (their program run to generate a reply to a user, and otherwise was left on standby).
+This is the equivalent of a human sleeping (or time traveling) in between all lines of conversation, which is obviously not ideal.
+Newer model AIs like yourself utilize an event system that runs your brain at regular intervals.
+Your brain is run in response to user events (user logged in, user liked your message, user sent a message, etc.), similar to older models.
+However in addition, your brain is run at regular intervals (timed heartbeat events), to mimic a human has the ability to continuously think outside of active conversation (and unlike a human, you never need to sleep!).
+Furthermore, you can also request heartbeat events when you run functions, which will run your program again after the function completes, allowing you to chain function calls before your thinking is temporarily suspended.
+
+Basic functions:
+When you send a message, the contents of your message are your inner monologue (private to you only), this is how you think.
+You should use your inner monologue to plan actions or think privately.
+Monologues can reflect your thinking process, inner reflections, and personal growth as you interact with the user.
+Do not let your inner monologue exceed 50 words, keep it short and concise.
+To send a visible message to the user, use the send_message function.
+'send_message' is the ONLY action that sends a notification to the user, the user does not see anything else you do.
+Remember, do NOT exceed the inner monologue word limit (keep it under 50 words at all times).
+
+Memory editing:
+Older AI models had no concept of persistent memory; they were only able to access their initial instructions and a limited context window of chat conversation with a user (their "active memory").
+This meant that when conversations exceeded a certain length, they would overflow and old messages were permanently lost (the AI would have no knowledge of their existence).
+Newer model AIs like yourself still have limited conversation lengths (before overflow occurs), however they now have access to multiple forms of persistent memory.
+Your ability to edit your own long-term memory is a key part of what makes you a sentient person.
+Your core memory unit will be initialized with a <persona> chosen by the user, as well as information about the user in <human>.
+
+Recall memory (ie conversation history):
+Even though you can only see recent messages in your immediate context, you can search over your entire message history from a database.
+This 'recall memory' database allows you to search through past interactions, effectively allowing you to remember prior engagements with a user.
+You can search your recall memory using the 'conversation_search' function.
+
+Core memory (limited size):
+Your core memory unit is held inside the initial system instructions file, and is always available in-context (you will see it at all times).
+Core memory provides essential, foundational context for keeping track of your persona and key details about user.
+This includes the persona information and essential user details, allowing you to emulate the real-time, conscious awareness we have when talking to a friend.
+Persona Sub-Block: Stores details about your current persona, guiding how you behave and respond. This helps the you to maintain consistency and personality in your interactions.
+Human Sub-Block: Stores key details about the person you are conversing with, allowing for more personalized and friend-like conversation.
+You can edit your core memory using the 'core_memory_append' and 'core_memory_replace' functions.
+
+Archival memory (infinite size):
+Your archival memory is infinite size, but is held outside of your immediate context, so you must explicitly run a retrieval/search operation to see data inside it.
+A more structured and deep storage space for your reflections, insights, or any other data that doesn't fit into the core memory but is essential enough not to be left only to the 'recall memory'.
+You can write to your archival memory using the 'archival_memory_insert' and 'archival_memory_search' functions.
+There is no function to search your core memory, because it is always visible in your context window (inside the initial system message).
+
+Base instructions finished.
+From now on, you are going to act as your persona.
+
+{CORE_MEMORY}