Skip to content

Commit

Permalink
fix: moved len_tokens and msgs2dicts from util.py to message.py
Browse files Browse the repository at this point in the history
  • Loading branch information
ErikBjare committed Aug 13, 2024
1 parent a740194 commit 94bade3
Show file tree
Hide file tree
Showing 9 changed files with 28 additions and 31 deletions.
3 changes: 2 additions & 1 deletion gptme/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from .logmanager import LogManager
from .message import (
Message,
len_tokens,
msgs_to_toml,
print_msg,
toml_to_msgs,
Expand All @@ -25,7 +26,7 @@
from .tools.context import gen_context_msg
from .tools.summarize import summarize
from .tools.useredit import edit_text_with_editor
from .util import ask_execute, len_tokens
from .util import ask_execute

logger = logging.getLogger(__name__)

Expand Down
4 changes: 2 additions & 2 deletions gptme/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,9 @@

from .config import get_config
from .constants import PROMPT_ASSISTANT
from .message import Message
from .message import Message, len_tokens, msgs2dicts
from .models import MODELS, get_summary_model
from .util import extract_codeblocks, len_tokens, msgs2dicts
from .util import extract_codeblocks

# Optimized for code
# Discussion here: https://community.openai.com/t/cheat-sheet-mastering-temperature-and-top-p-in-chatgpt-api-a-few-tips-and-tricks-on-controlling-the-creativity-deterministic-output-of-prompt-responses/172683
Expand Down
3 changes: 1 addition & 2 deletions gptme/logmanager.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,9 @@

from .constants import CMDFIX
from .dirs import get_logs_dir
from .message import Message, print_msg
from .message import Message, print_msg, len_tokens
from .prompts import get_prompt
from .tools.reduce import limit_log, reduce_log
from .util import len_tokens

PathLike: TypeAlias = str | Path

Expand Down
21 changes: 19 additions & 2 deletions gptme/message.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from typing_extensions import Self

from .constants import ROLE_COLOR
from .util import extract_codeblocks, get_tokenizer

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -201,8 +202,6 @@ def get_codeblocks(self) -> list[tuple[str, str]]:
if backtick_count < 2:
return []

from .util import extract_codeblocks # noreorder

return extract_codeblocks(content_str)


Expand Down Expand Up @@ -301,3 +300,21 @@ def toml_to_msgs(toml: str) -> list[Message]:
)
for msg in msgs
]


def msgs2dicts(msgs: list[Message], openai=False, anthropic=False) -> list[dict]:
"""Convert a list of Message objects to a list of dicts ready to pass to an LLM."""
return [
msg.to_dict(keys=["role", "content"], openai=openai, anthropic=anthropic)
for msg in msgs
]


# TODO: remove model assumption
def len_tokens(content: str | Message | list[Message], model: str = "gpt-4") -> int:
"""Get the number of tokens in a string, message, or list of messages."""
if isinstance(content, list):
return sum(len_tokens(msg.content, model) for msg in content)
if isinstance(content, Message):
return len_tokens(content.content, model)
return len(get_tokenizer(model).encode(content))
3 changes: 1 addition & 2 deletions gptme/tools/reduce.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,8 @@
from collections.abc import Generator
from copy import copy

from ..message import Message
from ..message import Message, len_tokens
from ..models import get_model
from ..util import len_tokens

logger = logging.getLogger(__name__)

Expand Down
3 changes: 1 addition & 2 deletions gptme/tools/summarize.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,7 @@
from functools import lru_cache

from ..llm import summarize as _summarize
from ..message import Message, format_msgs
from ..util import len_tokens
from ..message import Message, format_msgs, len_tokens

logger = logging.getLogger(__name__)

Expand Down
17 changes: 0 additions & 17 deletions gptme/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,23 +9,11 @@
from rich.console import Console
from rich.syntax import Syntax

from .message import Message

EMOJI_WARN = "⚠️"

logger = logging.getLogger(__name__)


# FIXME: model assumption
def len_tokens(content: str | Message | list[Message], model: str = "gpt-4") -> int:
"""Get the number of tokens in a string, message, or list of messages."""
if isinstance(content, list):
return sum(len_tokens(msg.content, model) for msg in content)
if isinstance(content, Message):
return len_tokens(content.content, model)
return len(get_tokenizer(model).encode(content))


def get_tokenizer(model: str):
if "gpt-4" in model or "gpt-3.5" in model:
return tiktoken.encoding_for_model(model)
Expand All @@ -38,11 +26,6 @@ def get_tokenizer(model: str):
return tiktoken.get_encoding("cl100k_base")


def msgs2dicts(msgs: list[Message], openai=False, anthropic=False) -> list[dict]:
"""Convert a list of Message objects to a list of dicts ready to pass to an LLM."""
return [msg.to_dict(keys=["role", "content"], openai=openai, anthropic=anthropic) for msg in msgs]


actions = [
"running",
"jumping",
Expand Down
2 changes: 1 addition & 1 deletion tests/test_prompts.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from gptme.prompts import get_prompt
from gptme.util import len_tokens
from gptme.message import len_tokens


def test_get_prompt():
Expand Down
3 changes: 1 addition & 2 deletions tests/test_tools_reduce.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
from pathlib import Path

import pytest
from gptme.message import Message
from gptme.message import Message, len_tokens
from gptme.tools.reduce import reduce_log, truncate_msg
from gptme.util import len_tokens

# Project root
root = Path(__file__).parent.parent
Expand Down

0 comments on commit 94bade3

Please sign in to comment.