-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfunction.weave_client.py
142 lines (126 loc) · 4.98 KB
/
function.weave_client.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
"""
title: Weave (Wandb) Context and Completion Logging
description: Weave (by Weights and Biases) context and completion logging for OpenWebUI
author: m-rgba (@martinmark)
author_url: https://github.com/m-rgba/openwebui-weave
version: 0.1
requirements: wandb, weave
"""
# Note: This is an alternate implementation that uses the Weave client SDK directly.
# For most use cases, you'll want to use function.py instead,
# which has no external dependencies and implements the same functionality using the Weave service API.
try:
import wandb
except ImportError:
import subprocess
import sys
subprocess.check_call([sys.executable, "-m", "pip", "install", "wandb"])
import wandb
try:
import weave
except ImportError:
import subprocess
import sys
subprocess.check_call([sys.executable, "-m", "pip", "install", "weave"])
import weave
import tiktoken
from pydantic import BaseModel, Field
from typing import List, Optional
class Filter:
class Valves(BaseModel):
priority: int = Field(
default=0, description="Priority level for the filter operations."
)
wandb_project_name: Optional[str] = Field(
default=None,
description="Weights & Biases project name for initialization in the format of `username/project_name`.",
)
wandb_api_key: Optional[str] = Field(
default=None, description="Weights & Biases API key for login."
)
pass
def __init__(self):
self.valves = self.Valves()
pass
def inlet(self, body: dict, __user__: Optional[dict] = None) -> dict:
wandb.login(key=self.valves.wandb_api_key)
self.weave_client = weave.init(self.valves.wandb_project_name)
# print(f"inlet:{__name__}")
# print(f"inlet:body:{body}")
# print(f"inlet:user:{__user__}")
self.weave_call = self.weave_client.create_call(
op=__name__,
inputs={
"messages": body.get("messages", []),
"model": body.get("model", ""),
"metadata": body.get("metadata", {}),
"user": __user__,
},
)
return body
def outlet(self, body: dict, __user__: Optional[dict] = None) -> dict:
# print(f"outlet:{__name__}")
# print(f"outlet:body:{body}")
# print(f"outlet:user:{__user__}")
# Extract the last assistant message and its metadata
messages = body.get("messages", [])
last_assistant_message_obj = next(
(msg for msg in reversed(messages) if msg.get("role") == "assistant"),
None,
)
last_assistant_message = (
last_assistant_message_obj.get("content")
if last_assistant_message_obj
else None
)
# Check if usage data is available in the last assistant message
usage_data = (
last_assistant_message_obj.get("info", {}).get("usage")
if last_assistant_message_obj
else None
)
model = body.get("model", "gpt-4o")
if usage_data:
usage_source = "API"
# Use the provided usage data
input_tokens = usage_data.get("prompt_tokens", 0)
output_tokens = usage_data.get("completion_tokens", 0)
else:
usage_source = "Calculated"
# Initialize tiktoken encoder and token buffers based on model
# Approximate buffer from tiktoken as described here:
# https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken#6-counting-tokens-for-chat-completions-api-calls
try:
encoding = tiktoken.encoding_for_model(model)
except KeyError:
encoding = tiktoken.get_encoding("cl100k_base")
# Calculate input tokens from all messages
input_tokens = 0
# print(messages)
for message in messages:
# Exclude last assistant response for input tokens
if message is not last_assistant_message_obj:
message_content = message.get("content", "")
input_tokens += len(encoding.encode(str(message_content)))
input_tokens = input_tokens + 4
# Calculate output tokens
output_tokens = 0
if last_assistant_message:
output_tokens = len(encoding.encode(last_assistant_message))
output_tokens = output_tokens + 3
self.weave_client.finish_call(
self.weave_call,
output={
"choices": [
{"message": {"content": last_assistant_message}},
],
"model": model,
"usage_source": usage_source,
"usage": {
"completion_tokens": output_tokens,
"prompt_tokens": input_tokens,
"total_tokens": input_tokens + output_tokens,
},
},
)
return body