Skip to content

Commit

Permalink
fix: further anthropic caching improvements (#318)
Browse files Browse the repository at this point in the history
  • Loading branch information
ErikBjare authored Dec 10, 2024
1 parent 8757108 commit 65efc3e
Showing 1 changed file with 12 additions and 14 deletions.
26 changes: 12 additions & 14 deletions gptme/llm/llm_anthropic.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,7 @@
import anthropic.types # fmt: skip
import anthropic.types.beta.prompt_caching # fmt: skip
from anthropic import Anthropic # fmt: skip
from anthropic.types.beta.prompt_caching import (
PromptCachingBetaTextBlockParam,
)
from anthropic.types.beta.prompt_caching import PromptCachingBetaTextBlockParam

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -326,7 +324,7 @@ def _prepare_messages_for_api(

# Apply cache control to optimize performance
messages_dicts_new: list[PromptCachingBetaMessageParam] = []
for i, msg in enumerate(messages_dicts):
for msg in messages_dicts:
content_parts: list[
PromptCachingBetaTextBlockParam
| PromptCachingBetaImageBlockParam
Expand All @@ -341,21 +339,21 @@ def _prepare_messages_for_api(

for part in raw_content:
if isinstance(part, dict):
if part.get("type") == "text" and i == len(messages_dicts) - 1:
content_parts.append(
{
"type": "text",
"text": part["text"],
"cache_control": {"type": "ephemeral"},
}
)
else:
content_parts.append(part) # type: ignore
content_parts.append(part) # type: ignore
else:
content_parts.append({"type": "text", "text": str(part)})

messages_dicts_new.append({"role": msg["role"], "content": content_parts})

# set cache points at the two last user messages, as suggested in Anthropic docs:
# > The conversation history (previous messages) is included in the messages array.
# > The final turn is marked with cache-control, for continuing in followups.
# > The second-to-last user message is marked for caching with the cache_control parameter, so that this checkpoint can read from the previous cache.
# https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching#continuing-a-multi-turn-conversation
for msgp in [msg for msg in messages_dicts_new if msg["role"] == "user"][-2:]:
assert isinstance(msgp["content"], list)
msgp["content"][-1]["cache_control"] = {"type": "ephemeral"}

# Prepare tools
tools_dict = [_spec2tool(tool) for tool in tools] if tools else None

Expand Down

0 comments on commit 65efc3e

Please sign in to comment.