WIP - Condense configurations into conventions for Database (Metadata…

…store) Adapters (#1460)
letta-ai · Jul 10, 2024 · efd4b8b · efd4b8b
2 parents 0ddf283 + 7f4711e
commit efd4b8b
Show file tree

Hide file tree

Showing 75 changed files with 2,434 additions and 1,297 deletions.
diff --git a/.persist/sqlite/test_prefixed_ids_sqlite_chroma_.db b/.persist/sqlite/test_prefixed_ids_sqlite_chroma_.db
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -145,3 +145,11 @@ If you prefer to keep your resources isolated by developing purely in containers
 docker compose -f compose.yaml -f development.compose.yml up
 ```
 This will volume mount your local codebase and reload the server on file changes.
+
+MemGPT supports 2 alternate application backends, Postgres (with PGVector) and SQLite + Chromadb. Any time your unit or integration tests interact with the application data model (so almost always), the test suite will be run against _both_ database backends to ensure compatability.
+After each run you can find test artifacts in the `.persist` folder. Connect to the `SQLite`, `Chroma` and `pgdata` directories to inspect individual test artifacts.
+
+- for Postgres, each individual test will have a schema named after that test with the full data model.
+- For SQLite, each individual test will be a unique database file (named after the test).
+
+
diff --git a/compose.yaml b/compose.yaml
@@ -6,6 +6,7 @@ services:
         aliases:
           - pgvector_db
           - memgpt-db
+          - memgpt
     environment:
       - POSTGRES_USER=${MEMGPT_PG_USER}
       - POSTGRES_PASSWORD=${MEMGPT_PG_PASSWORD}

diff --git a/configs/server_config.yaml b/configs/server_config.yaml
@@ -4,33 +4,36 @@ persona = sam_pov
 human = basic
 
 [model]
-model = gpt-4
-model_endpoint = https://api.openai.com/v1
-model_endpoint_type = openai
-model_wrapper = null
+model = ehartford/dolphin-2.5-mixtral-8x7b
+model_endpoint = https://api.memgpt.ai
+model_endpoint_type = vllm
 context_window = 8192
 
 [embedding]
-embedding_endpoint_type = openai
-embedding_endpoint = https://api.openai.com/v1
-embedding_model = text-embedding-ada-002
-embedding_dim = 1536
+embedding_endpoint_type = hugging-face
+embedding_endpoint = https://embeddings.memgpt.ai
+embedding_model = BAAI/bge-large-en-v1.5
+embedding_dim = 1024
 embedding_chunk_size = 300
 
 [archival_storage]
 type = postgres
 path = /root/.memgpt/chroma
-uri = postgresql+pg8000://memgpt:memgpt@pgvector_db:5432/memgpt
+uri = postgresql+pg8000://memgpt:memgpt@memgpt-db:5432/memgpt
 
 [recall_storage]
 type = postgres
 path = /root/.memgpt
-uri = postgresql+pg8000://memgpt:memgpt@pgvector_db:5432/memgpt
+uri = postgresql+pg8000://memgpt:memgpt@memgpt-db:5432/memgpt
 
 [metadata_storage]
 type = postgres
 path = /root/.memgpt
-uri = postgresql+pg8000://memgpt:memgpt@pgvector_db:5432/memgpt
+uri = postgresql+pg8000://memgpt:memgpt@memgpt-db:5432/memgpt
+
+[version]
+memgpt_version = 0.3.14
 
 [client]
 anon_clientid = 00000000-0000-0000-0000-000000000000
+
diff --git a/development.compose.yml b/development.compose.yml
@@ -12,8 +12,10 @@ services:
       - memgpt_db
     env_file:
       - .env
+    # no value syntax to not set the env at all if it is not set in .env
     environment:
       - MEMGPT_SERVER_PASS=test_server_token
+      - MEMGPT_PG_URI=postgresql://memgpt:memgpt@memgpt-db:5432/memgpt
       - WATCHFILES_FORCE_POLLING=true
 
     volumes:
@@ -25,6 +27,7 @@ services:
       - ./tests/pytest.ini:/memgpt/pytest.ini
       - ./pyproject.toml:/pyproject.toml
       - ./tests:/tests
+      - ./.persist/sqlite:/sqlite
     ports:
       - "8083:8083"
       - "8283:8283"
diff --git a/docs/autogen.md b/docs/autogen.md
@@ -156,7 +156,7 @@ config_list = [
 # MemGPT-powered agents will also use local LLMs, but they need additional setup (also they use the Completions endpoint)
 config_list_memgpt = [
     {
-        "preset": DEFAULT_PRESET,
+        "preset": settings.preset,
         "model": None,  # not required for web UI, only required for Ollama, see: https://memgpt.readme.io/docs/ollama
         "model_wrapper": "airoboros-l2-70b-2.1",  # airoboros is the default wrapper and should work for most models
         "model_endpoint_type": "webui",
@@ -183,7 +183,7 @@ config_list = [
 # MemGPT-powered agents will also use local LLMs, but they need additional setup (also they use the Completions endpoint)
 config_list_memgpt = [
     {
-        "preset": DEFAULT_PRESET,
+        "preset": settings.preset,
         "model": None,
         "model_wrapper": "airoboros-l2-70b-2.1",
         "model_endpoint_type": "lmstudio",
@@ -209,7 +209,7 @@ config_list = [
 # This config is for autogen agents that powered by MemGPT
 config_list_memgpt = [
     {
-        "preset": DEFAULT_PRESET,
+        "preset": settings.preset,
         "model": "gpt-4",
         "context_window": 8192,  # gpt-4 context window
         "model_wrapper": None,
@@ -240,7 +240,7 @@ config_list = [
 # This config is for autogen agents that powered by MemGPT
 config_list_memgpt = [
     {
-        "preset": DEFAULT_PRESET,
+        "preset": settings.preset,
         "model": "gpt-4",  # make sure you choose a model that you have access to deploy on your Azure account
         "model_wrapper": None,
         "context_window": 8192,  # gpt-4 context window

diff --git a/examples/memgpt_client.py b/examples/memgpt_client.py
@@ -1,7 +1,7 @@
 import json
 
 from memgpt import Admin, create_client
-from memgpt.constants import DEFAULT_HUMAN, DEFAULT_PERSONA, DEFAULT_PRESET
+from memgpt.settings import settings
 from memgpt.utils import get_human_text, get_persona_text
 
 """
@@ -33,9 +33,9 @@ def main():
     # Create an agent
     agent_info = client.create_agent(
         name="my_agent",
-        preset=DEFAULT_PRESET,
-        persona=get_persona_text(DEFAULT_PERSONA),
-        human=get_human_text(DEFAULT_HUMAN),
+        preset=settings.preset,
+        persona=get_persona_text(settings.persona),
+        human=get_human_text(settings.human),
     )
     print(f"Created agent: {agent_info.name} with ID {str(agent_info.id)}")
 

diff --git a/init.sql b/init.sql
@@ -34,3 +34,7 @@ ALTER DATABASE :"db_name"
 CREATE EXTENSION IF NOT EXISTS vector WITH SCHEMA :"db_name";
 
 DROP SCHEMA IF EXISTS public CASCADE;
+
+CREATE DATABASE test_memgpt;
+GRANT ALL PRIVILEGES ON DATABASE test_memgpt to "memgpt";
+
diff --git a/memgpt/alembic.ini b/memgpt/alembic.ini
@@ -0,0 +1,116 @@
+# A generic, single database configuration.
+
+[alembic]
+# path to migration scripts
+# Use forward slashes (/) also on windows to provide an os agnostic path
+script_location = migrations
+
+# template used to generate migration file names; The default value is %%(rev)s_%%(slug)s
+# Uncomment the line below if you want the files to be prepended with date and time
+# see https://alembic.sqlalchemy.org/en/latest/tutorial.html#editing-the-ini-file
+# for all available tokens
+# file_template = %%(year)d_%%(month).2d_%%(day).2d_%%(hour).2d%%(minute).2d-%%(rev)s_%%(slug)s
+
+# sys.path path, will be prepended to sys.path if present.
+# defaults to the current working directory.
+prepend_sys_path = .
+
+# timezone to use when rendering the date within the migration file
+# as well as the filename.
+# If specified, requires the python>=3.9 or backports.zoneinfo library.
+# Any required deps can installed by adding `alembic[tz]` to the pip requirements
+# string value is passed to ZoneInfo()
+# leave blank for localtime
+# timezone =
+
+# max length of characters to apply to the "slug" field
+# truncate_slug_length = 40
+
+# set to 'true' to run the environment during
+# the 'revision' command, regardless of autogenerate
+# revision_environment = false
+
+# set to 'true' to allow .pyc and .pyo files without
+# a source .py file to be detected as revisions in the
+# versions/ directory
+# sourceless = false
+
+# version location specification; This defaults
+# to migrations/versions.  When using multiple version
+# directories, initial revisions must be specified with --version-path.
+# The path separator used here should be the separator specified by "version_path_separator" below.
+# version_locations = %(here)s/bar:%(here)s/bat:migrations/versions
+
+# version path separator; As mentioned above, this is the character used to split
+# version_locations. The default within new alembic.ini files is "os", which uses os.pathsep.
+# If this key is omitted entirely, it falls back to the legacy behavior of splitting on spaces and/or commas.
+# Valid values for version_path_separator are:
+#
+# version_path_separator = :
+# version_path_separator = ;
+# version_path_separator = space
+version_path_separator = os  # Use os.pathsep. Default configuration used for new projects.
+
+# set to 'true' to search source files recursively
+# in each "version_locations" directory
+# new in Alembic version 1.10
+# recursive_version_locations = false
+
+# the output encoding used when revision files
+# are written from script.py.mako
+# output_encoding = utf-8
+
+sqlalchemy.url = %(MEMGPT_DATABASE_URL)s
+
+
+[post_write_hooks]
+# post_write_hooks defines scripts or Python functions that are run
+# on newly generated revision scripts.  See the documentation for further
+# detail and examples
+
+# format using "black" - use the console_scripts runner, against the "black" entrypoint
+# hooks = black
+# black.type = console_scripts
+# black.entrypoint = black
+# black.options = -l 79 REVISION_SCRIPT_FILENAME
+
+# lint with attempts to fix using "ruff" - use the exec runner, execute a binary
+# hooks = ruff
+# ruff.type = exec
+# ruff.executable = %(here)s/.venv/bin/ruff
+# ruff.options = --fix REVISION_SCRIPT_FILENAME
+
+# Logging configuration
+[loggers]
+keys = root,sqlalchemy,alembic
+
+[handlers]
+keys = console
+
+[formatters]
+keys = generic
+
+[logger_root]
+level = WARN
+handlers = console
+qualname =
+
+[logger_sqlalchemy]
+level = WARN
+handlers =
+qualname = sqlalchemy.engine
+
+[logger_alembic]
+level = INFO
+handlers =
+qualname = alembic
+
+[handler_console]
+class = StreamHandler
+args = (sys.stderr,)
+level = NOTSET
+formatter = generic
+
+[formatter_generic]
+format = %(levelname)-5.5s [%(name)s] %(message)s
+datefmt = %H:%M:%S
diff --git a/memgpt/autogen/examples/agent_autoreply.py b/memgpt/autogen/examples/agent_autoreply.py
@@ -13,8 +13,9 @@
 
 import autogen
 
+from memgpt.settings import settings
 from memgpt.autogen.memgpt_agent import create_memgpt_autogen_agent_from_config
-from memgpt.constants import DEFAULT_PRESET, LLM_MAX_TOKENS
+from memgpt.constants import LLM_MAX_TOKENS
 
 LLM_BACKEND = "openai"
 # LLM_BACKEND = "azure"
@@ -40,7 +41,7 @@
         {
             "model": model,
             "context_window": LLM_MAX_TOKENS[model],
-            "preset": DEFAULT_PRESET,
+            "preset": settings.preset,
             "model_wrapper": None,
             # OpenAI specific
             "model_endpoint_type": "openai",
@@ -79,7 +80,7 @@
         {
             "model": model,
             "context_window": LLM_MAX_TOKENS[model],
-            "preset": DEFAULT_PRESET,
+            "preset": settings.preset,
             "model_wrapper": None,
             # Azure specific
             "model_endpoint_type": "azure",
@@ -108,7 +109,7 @@
     # MemGPT-powered agents will also use local LLMs, but they need additional setup (also they use the Completions endpoint)
     config_list_memgpt = [
         {
-            "preset": DEFAULT_PRESET,
+            "preset": settings.preset,
             "model": None,  # only required for Ollama, see: https://memgpt.readme.io/docs/ollama
             "context_window": 8192,  # the context window of your model (for Mistral 7B-based models, it's likely 8192)
             "model_wrapper": "chatml",  # chatml is the default wrapper

diff --git a/memgpt/autogen/examples/agent_docs.py b/memgpt/autogen/examples/agent_docs.py
@@ -15,8 +15,9 @@
 
 import autogen
 
+from memgpt.settings import settings
 from memgpt.autogen.memgpt_agent import create_memgpt_autogen_agent_from_config
-from memgpt.constants import DEFAULT_PRESET, LLM_MAX_TOKENS
+from memgpt.constants import LLM_MAX_TOKENS
 
 LLM_BACKEND = "openai"
 # LLM_BACKEND = "azure"
@@ -42,7 +43,7 @@
         {
             "model": model,
             "context_window": LLM_MAX_TOKENS[model],
-            "preset": DEFAULT_PRESET,
+            "preset": settings.preset,
             "model_wrapper": None,
             # OpenAI specific
             "model_endpoint_type": "openai",
@@ -81,7 +82,7 @@
         {
             "model": model,
             "context_window": LLM_MAX_TOKENS[model],
-            "preset": DEFAULT_PRESET,
+            "preset": settings.preset,
             "model_wrapper": None,
             # Azure specific
             "model_endpoint_type": "azure",
@@ -110,7 +111,7 @@
     # MemGPT-powered agents will also use local LLMs, but they need additional setup (also they use the Completions endpoint)
     config_list_memgpt = [
         {
-            "preset": DEFAULT_PRESET,
+            "preset": settings.preset,
             "model": None,  # only required for Ollama, see: https://memgpt.readme.io/docs/ollama
             "context_window": 8192,  # the context window of your model (for Mistral 7B-based models, it's likely 8192)
             "model_wrapper": "chatml",  # chatml is the default wrapper

diff --git a/memgpt/autogen/examples/agent_groupchat.py b/memgpt/autogen/examples/agent_groupchat.py
@@ -13,8 +13,9 @@
 
 import autogen
 
+from memgpt.settings import settings
 from memgpt.autogen.memgpt_agent import create_memgpt_autogen_agent_from_config
-from memgpt.constants import DEFAULT_PRESET, LLM_MAX_TOKENS
+from memgpt.constants import LLM_MAX_TOKENS
 
 LLM_BACKEND = "openai"
 # LLM_BACKEND = "azure"
@@ -40,7 +41,7 @@
         {
             "model": model,
             "context_window": LLM_MAX_TOKENS[model],
-            "preset": DEFAULT_PRESET,
+            "preset": settings.preset,
             "model_wrapper": None,
             # OpenAI specific
             "model_endpoint_type": "openai",
@@ -79,7 +80,7 @@
         {
             "model": model,
             "context_window": LLM_MAX_TOKENS[model],
-            "preset": DEFAULT_PRESET,
+            "preset": settings.preset,
             "model_wrapper": None,
             # Azure specific
             "model_endpoint_type": "azure",
@@ -108,7 +109,7 @@
     # MemGPT-powered agents will also use local LLMs, but they need additional setup (also they use the Completions endpoint)
     config_list_memgpt = [
         {
-            "preset": DEFAULT_PRESET,
+            "preset": settings.preset,
             "model": None,  # only required for Ollama, see: https://memgpt.readme.io/docs/ollama
             "context_window": 8192,  # the context window of your model (for Mistral 7B-based models, it's likely 8192)
             "model_wrapper": "chatml",  # chatml is the default wrapper