Skip to content

Commit

Permalink
[Executor] Refine openai metrics calculator to adapt openai v1 (#1338)
Browse files Browse the repository at this point in the history
# Description

Refine openai metrics calculator to adapt openai v1.

# All Promptflow Contribution checklist:
- [ ] **The pull request does not introduce [breaking changes].**
- [ ] **CHANGELOG is updated for new features, bug fixes or other
significant changes.**
- [ ] **I have read the [contribution guidelines](../CONTRIBUTING.md).**
- [ ] **Create an issue and link to the pull request to get dedicated
review from promptflow team. Learn more: [suggested
workflow](../CONTRIBUTING.md#suggested-workflow).**

## General Guidelines and Best Practices
- [ ] Title of the pull request is clear and informative.
- [ ] There are a small number of commits, each of which have an
informative message. This means that previously merged commits do not
appear in the history of the PR. For more information on cleaning up the
commits in your PR, [see this
page](https://github.com/Azure/azure-powershell/blob/master/documentation/development-docs/cleaning-up-commits.md).

### Testing Guidelines
- [ ] Pull request includes test coverage for the included changes.

Co-authored-by: Lina Tang <[email protected]>
(cherry picked from commit a14f55a)
  • Loading branch information
lumoslnt authored and elliotzh committed Dec 1, 2023
1 parent 17ba53b commit b599428
Show file tree
Hide file tree
Showing 12 changed files with 96 additions and 43 deletions.
2 changes: 1 addition & 1 deletion src/promptflow/promptflow/_core/run_tracker.py
Original file line number Diff line number Diff line change
Expand Up @@ -337,7 +337,7 @@ def _collect_traces_from_nodes(self, run_id):
traces.extend(node_run_info.api_calls or [])
return traces

OPENAI_AGGREGATE_METRICS = ["total_tokens"]
OPENAI_AGGREGATE_METRICS = ["prompt_tokens", "completion_tokens", "total_tokens"]

def collect_metrics(self, run_infos: List[RunInfo], aggregate_metrics: List[str] = []):
if not aggregate_metrics:
Expand Down
30 changes: 21 additions & 9 deletions src/promptflow/promptflow/_utils/openai_metrics_calculator.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
import tiktoken
from importlib.metadata import version

from promptflow.exceptions import UserErrorException

IS_LEGACY_OPENAI = version("openai").startswith("0.")


class OpenAIMetricsCalculator:
def __init__(self, logger=None) -> None:
Expand Down Expand Up @@ -47,19 +50,22 @@ def _get_openai_metrics_for_signal_api(self, api_call: dict):
)

name = api_call.get("name")
if name.split(".")[-2] == "ChatCompletion":
if name.split(".")[-2] == "ChatCompletion" or name == "openai.resources.chat.completions.Completions.create":
return self._get_openai_metrics_for_chat_api(api_call)
elif name.split(".")[-2] == "Completion":
elif name.split(".")[-2] == "Completion" or name == "openai.resources.completions.Completions.create":
return self._get_openai_metrics_for_completion_api(api_call)
else:
raise CalculatingMetricsError(f"Calculating metrics for api {name} is not supported.")

def _try_get_model(self, inputs):
api_type = inputs.get("api_type")
if not api_type:
raise CalculatingMetricsError("Cannot calculate metrics for none or empty api_type.")
if api_type == "azure":
model = inputs.get("engine")
if IS_LEGACY_OPENAI:
api_type = inputs.get("api_type")
if not api_type:
raise CalculatingMetricsError("Cannot calculate metrics for none or empty api_type.")
if api_type == "azure":
model = inputs.get("engine")
else:
model = inputs.get("model")
else:
model = inputs.get("model")
if not model:
Expand All @@ -81,7 +87,10 @@ def _get_openai_metrics_for_chat_api(self, api_call):
tokens_per_name
)
if isinstance(output, list):
metrics["completion_tokens"] = len(output)
if IS_LEGACY_OPENAI:
metrics["completion_tokens"] = len(output)
else:
metrics["completion_tokens"] = len([chunk for chunk in output if chunk.choices[0].delta.content])
else:
metrics["completion_tokens"] = self._get_completion_tokens_for_chat_api(output, enc)
metrics["total_tokens"] = metrics["prompt_tokens"] + metrics["completion_tokens"]
Expand Down Expand Up @@ -139,7 +148,10 @@ def _get_openai_metrics_for_completion_api(self, api_call: dict):
for pro in prompt:
metrics["prompt_tokens"] += len(enc.encode(pro))
if isinstance(output, list):
metrics["completion_tokens"] = len(output)
if IS_LEGACY_OPENAI:
metrics["completion_tokens"] = len(output)
else:
metrics["completion_tokens"] = len([chunk for chunk in output if chunk.choices[0].text])
else:
metrics["completion_tokens"] = self._get_completion_tokens_for_completion_api(output, enc)
metrics["total_tokens"] = metrics["prompt_tokens"] + metrics["completion_tokens"]
Expand Down
19 changes: 16 additions & 3 deletions src/promptflow/promptflow/batch/_result.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,12 +96,25 @@ def _get_openai_metrics(line_results: List[LineResult], aggr_results: Aggregatio
total_metrics = {}
calculator = OpenAIMetricsCalculator()
for run_info in node_run_infos:
api_calls = run_info.api_calls or []
for call in api_calls:
metrics = calculator.get_openai_metrics_from_api_call(call)
metrics = SystemMetrics._try_get_openai_metrics(run_info)
if metrics:
calculator.merge_metrics_dict(total_metrics, metrics)
else:
api_calls = run_info.api_calls or []
for call in api_calls:
metrics = calculator.get_openai_metrics_from_api_call(call)
calculator.merge_metrics_dict(total_metrics, metrics)
return total_metrics

def _try_get_openai_metrics(run_info):
openai_metrics = {}
if run_info.system_metrics:
for metric in ["total_tokens", "prompt_tokens", "completion_tokens"]:
if metric not in run_info.system_metrics:
return False
openai_metrics[metric] = run_info.system_metrics[metric]
return openai_metrics

def to_dict(self):
return {
"total_tokens": self.total_tokens,
Expand Down
5 changes: 0 additions & 5 deletions src/promptflow/tests/executor/e2etests/test_langchain.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
from importlib.metadata import version
from pathlib import Path
from tempfile import mkdtemp

Expand All @@ -10,10 +9,6 @@
from ..utils import get_flow_folder, get_flow_inputs_file, get_yaml_file


@pytest.mark.skipif(
version("openai").startswith("1."),
reason="test needs to be upgraded to adapt to openai>=1.0.0",
)
@pytest.mark.usefixtures("use_secrets_config_file", "dev_connections")
@pytest.mark.e2etest
class TestLangchain:
Expand Down
27 changes: 18 additions & 9 deletions src/promptflow/tests/executor/unittests/batch/test_result.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@


def get_api_call(type, name, inputs={}, output={}, children=None):
return {"type": type, "name": f"_._.{name}._", "inputs": inputs, "output": output, "children": children}
return {"type": type, "name": name, "inputs": inputs, "output": output, "children": children}


@pytest.mark.unittest
Expand Down Expand Up @@ -98,32 +98,41 @@ def test_node_status(self):
}

def test_system_metrics(self):
from openai.types.completion import Completion, CompletionChoice

line_dict = {0: {"node_0": Status.Completed}}
aggr_dict = {"aggr_0": Status.Completed}

api_call_1 = get_api_call(
"LLM",
"Completion",
inputs={"prompt": "Please tell me a joke.", "api_type": "azure", "engine": "text-davinci-003"},
"openai.resources.completions.Completions.create",
inputs={"prompt": "Please tell me a joke.", "model": "text-davinci-003"},
output={"choices": [{"text": "text"}]},
)
api_call_2 = get_api_call(
"LLM",
"Completion",
"openai.resources.completions.Completions.create",
inputs={
"prompt": ["Please tell me a joke.", "Please tell me a joke about fruit."],
"api_type": "azure",
"engine": "text-davinci-003",
"model": "text-davinci-003",
},
output=[{"choices": [{"text": "text"}]}, {"choices": [{"text": "text"}]}],
output=[
Completion(
choices=[CompletionChoice(text="text", finish_reason="stop", index=0, logprobs=None)],
id="id", created=0, model="model", object="text_completion"
),
Completion(
choices=[CompletionChoice(text="text", finish_reason="stop", index=0, logprobs=None)],
id="id", created=0, model="model", object="text_completion"
),
],
)
line_api_calls = get_api_call("Chain", "Chain", children=[api_call_1, api_call_2])
aggr_api_call = get_api_call(
"LLM",
"ChatCompletion",
"openai.resources.chat.completions.Completions.create",
inputs={
"messages": [{"system": "You are a helpful assistant.", "user": "Please tell me a joke."}],
"api_type": "openai",
"model": "gpt-35-turbo",
},
output={"choices": [{"message": {"content": "content"}}]},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,9 @@

@tool
def test_langchain_traces(question: str, conn: AzureOpenAIConnection):
os.environ["OPENAI_API_KEY"] = conn.api_key
os.environ["AZURE_OPENAI_API_KEY"] = conn.api_key
os.environ["OPENAI_API_VERSION"] = conn.api_version
os.environ["OPENAI_API_BASE"] = conn.api_base
os.environ["OPENAI_API_TYPE"] = conn.api_type
os.environ["AZURE_OPENAI_ENDPOINT"] = conn.api_base

llm = AzureOpenAI(
temperature=0.7,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@
from promptflow import tool
from promptflow.connections import AzureOpenAIConnection

IS_LEGACY_OPENAI = OPENAI_VERSION.startswith("0.")


def get_client(connection: AzureOpenAIConnection):
api_key = connection.api_key
conn = dict(
Expand All @@ -20,6 +23,7 @@ def get_client(connection: AzureOpenAIConnection):
)
return Client(**conn)


def create_messages(question, chat_history):
yield {"role": "system", "content": "You are a helpful assistant."}
for chat in chat_history:
Expand All @@ -29,9 +33,8 @@ def create_messages(question, chat_history):


@tool
def chat(connection: AzureOpenAIConnection, question: str, chat_history: List) -> str:
stream = True
if OPENAI_VERSION.startswith("0."):
def chat(connection: AzureOpenAIConnection, question: str, chat_history: List, stream: bool) -> str:
if IS_LEGACY_OPENAI:
completion = openai.ChatCompletion.create(
engine="gpt-35-turbo",
messages=list(create_messages(question, chat_history)),
Expand Down Expand Up @@ -59,12 +62,18 @@ def chat(connection: AzureOpenAIConnection, question: str, chat_history: List) -
def generator():
for chunk in completion:
if chunk.choices:
yield getattr(chunk.choices[0]["delta"], "content", "")
if IS_LEGACY_OPENAI:
yield getattr(chunk.choices[0]["delta"], "content", "")
else:
yield chunk.choices[0].delta.content or ""

# We must return the generator object, not using yield directly here.
# Otherwise, the function itself will become a generator, despite whether stream is True or False.
# return generator()
return "".join(generator())
else:
# chat api may return message with no content.
return getattr(completion.choices[0].message, "content", "")
if IS_LEGACY_OPENAI:
return getattr(completion.choices[0].message, "content", "")
else:
return completion.choices[0].message.content or ""
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@ inputs:
type: string
chat_history:
type: list
stream:
type: bool
outputs:
answer:
type: string
Expand All @@ -17,3 +19,4 @@ nodes:
question: ${inputs.question}
chat_history: ${inputs.chat_history}
connection: azure_open_ai_connection
stream: ${inputs.stream}
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
{"question": "What is the capital of the United States of America?", "chat_history": []}
{"question": "What is the capital of the United States of America?", "chat_history": []}
{"question": "What is the capital of the United States of America?", "chat_history": [], "stream": true}
{"question": "What is the capital of the United States of America?", "chat_history": [], "stream": false}
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@
from promptflow import tool
from promptflow.connections import AzureOpenAIConnection

IS_LEGACY_OPENAI = OPENAI_VERSION.startswith("0.")


def get_client(connection: AzureOpenAIConnection):
api_key = connection.api_key
conn = dict(
Expand All @@ -19,10 +22,10 @@ def get_client(connection: AzureOpenAIConnection):
)
return Client(**conn)


@tool
def completion(connection: AzureOpenAIConnection, prompt: str) -> str:
stream = True
if OPENAI_VERSION.startswith("0."):
def completion(connection: AzureOpenAIConnection, prompt: str, stream: bool) -> str:
if IS_LEGACY_OPENAI:
completion = openai.Completion.create(
prompt=prompt,
engine="text-davinci-003",
Expand Down Expand Up @@ -50,8 +53,14 @@ def completion(connection: AzureOpenAIConnection, prompt: str) -> str:
def generator():
for chunk in completion:
if chunk.choices:
yield getattr(chunk.choices[0], "text", "")
if IS_LEGACY_OPENAI:
yield getattr(chunk.choices[0], "text", "")
else:
yield chunk.choices[0].text or ""

return "".join(generator())
else:
return getattr(completion.choices[0], "text", "")
if IS_LEGACY_OPENAI:
return getattr(completion.choices[0], "text", "")
else:
return completion.choices[0].text or ""
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
inputs:
prompt:
type: string
stream:
type: bool
outputs:
output:
type: string
Expand All @@ -14,3 +16,4 @@ nodes:
inputs:
prompt: ${inputs.prompt}
connection: azure_open_ai_connection
stream: ${inputs.stream}
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
{"prompt": "What is the capital of the United States of America?"}
{"prompt": "What is the capital of the United States of America?", "stream": true}
{"prompt": "What is the capital of the United States of America?", "stream": false}

0 comments on commit b599428

Please sign in to comment.