diff --git a/src/promptflow/promptflow/_core/run_tracker.py b/src/promptflow/promptflow/_core/run_tracker.py index 782fb15de12..a9d40bdeb7c 100644 --- a/src/promptflow/promptflow/_core/run_tracker.py +++ b/src/promptflow/promptflow/_core/run_tracker.py @@ -337,7 +337,7 @@ def _collect_traces_from_nodes(self, run_id): traces.extend(node_run_info.api_calls or []) return traces - OPENAI_AGGREGATE_METRICS = ["total_tokens"] + OPENAI_AGGREGATE_METRICS = ["prompt_tokens", "completion_tokens", "total_tokens"] def collect_metrics(self, run_infos: List[RunInfo], aggregate_metrics: List[str] = []): if not aggregate_metrics: diff --git a/src/promptflow/promptflow/_utils/openai_metrics_calculator.py b/src/promptflow/promptflow/_utils/openai_metrics_calculator.py index 94be6299f06..e8d3251b173 100644 --- a/src/promptflow/promptflow/_utils/openai_metrics_calculator.py +++ b/src/promptflow/promptflow/_utils/openai_metrics_calculator.py @@ -1,7 +1,10 @@ import tiktoken +from importlib.metadata import version from promptflow.exceptions import UserErrorException +IS_LEGACY_OPENAI = version("openai").startswith("0.") + class OpenAIMetricsCalculator: def __init__(self, logger=None) -> None: @@ -47,19 +50,22 @@ def _get_openai_metrics_for_signal_api(self, api_call: dict): ) name = api_call.get("name") - if name.split(".")[-2] == "ChatCompletion": + if name.split(".")[-2] == "ChatCompletion" or name == "openai.resources.chat.completions.Completions.create": return self._get_openai_metrics_for_chat_api(api_call) - elif name.split(".")[-2] == "Completion": + elif name.split(".")[-2] == "Completion" or name == "openai.resources.completions.Completions.create": return self._get_openai_metrics_for_completion_api(api_call) else: raise CalculatingMetricsError(f"Calculating metrics for api {name} is not supported.") def _try_get_model(self, inputs): - api_type = inputs.get("api_type") - if not api_type: - raise CalculatingMetricsError("Cannot calculate metrics for none or empty api_type.") - if api_type == "azure": - model = inputs.get("engine") + if IS_LEGACY_OPENAI: + api_type = inputs.get("api_type") + if not api_type: + raise CalculatingMetricsError("Cannot calculate metrics for none or empty api_type.") + if api_type == "azure": + model = inputs.get("engine") + else: + model = inputs.get("model") else: model = inputs.get("model") if not model: @@ -81,7 +87,10 @@ def _get_openai_metrics_for_chat_api(self, api_call): tokens_per_name ) if isinstance(output, list): - metrics["completion_tokens"] = len(output) + if IS_LEGACY_OPENAI: + metrics["completion_tokens"] = len(output) + else: + metrics["completion_tokens"] = len([chunk for chunk in output if chunk.choices[0].delta.content]) else: metrics["completion_tokens"] = self._get_completion_tokens_for_chat_api(output, enc) metrics["total_tokens"] = metrics["prompt_tokens"] + metrics["completion_tokens"] @@ -139,7 +148,10 @@ def _get_openai_metrics_for_completion_api(self, api_call: dict): for pro in prompt: metrics["prompt_tokens"] += len(enc.encode(pro)) if isinstance(output, list): - metrics["completion_tokens"] = len(output) + if IS_LEGACY_OPENAI: + metrics["completion_tokens"] = len(output) + else: + metrics["completion_tokens"] = len([chunk for chunk in output if chunk.choices[0].text]) else: metrics["completion_tokens"] = self._get_completion_tokens_for_completion_api(output, enc) metrics["total_tokens"] = metrics["prompt_tokens"] + metrics["completion_tokens"] diff --git a/src/promptflow/promptflow/batch/_result.py b/src/promptflow/promptflow/batch/_result.py index db8b4d985ed..fbcd5ec73a3 100644 --- a/src/promptflow/promptflow/batch/_result.py +++ b/src/promptflow/promptflow/batch/_result.py @@ -96,12 +96,25 @@ def _get_openai_metrics(line_results: List[LineResult], aggr_results: Aggregatio total_metrics = {} calculator = OpenAIMetricsCalculator() for run_info in node_run_infos: - api_calls = run_info.api_calls or [] - for call in api_calls: - metrics = calculator.get_openai_metrics_from_api_call(call) + metrics = SystemMetrics._try_get_openai_metrics(run_info) + if metrics: calculator.merge_metrics_dict(total_metrics, metrics) + else: + api_calls = run_info.api_calls or [] + for call in api_calls: + metrics = calculator.get_openai_metrics_from_api_call(call) + calculator.merge_metrics_dict(total_metrics, metrics) return total_metrics + def _try_get_openai_metrics(run_info): + openai_metrics = {} + if run_info.system_metrics: + for metric in ["total_tokens", "prompt_tokens", "completion_tokens"]: + if metric not in run_info.system_metrics: + return False + openai_metrics[metric] = run_info.system_metrics[metric] + return openai_metrics + def to_dict(self): return { "total_tokens": self.total_tokens, diff --git a/src/promptflow/tests/executor/e2etests/test_langchain.py b/src/promptflow/tests/executor/e2etests/test_langchain.py index 972d934e4eb..0e7d47d3fb2 100644 --- a/src/promptflow/tests/executor/e2etests/test_langchain.py +++ b/src/promptflow/tests/executor/e2etests/test_langchain.py @@ -1,4 +1,3 @@ -from importlib.metadata import version from pathlib import Path from tempfile import mkdtemp @@ -10,10 +9,6 @@ from ..utils import get_flow_folder, get_flow_inputs_file, get_yaml_file -@pytest.mark.skipif( - version("openai").startswith("1."), - reason="test needs to be upgraded to adapt to openai>=1.0.0", -) @pytest.mark.usefixtures("use_secrets_config_file", "dev_connections") @pytest.mark.e2etest class TestLangchain: diff --git a/src/promptflow/tests/executor/unittests/batch/test_result.py b/src/promptflow/tests/executor/unittests/batch/test_result.py index c5422e15935..fb81764ea38 100644 --- a/src/promptflow/tests/executor/unittests/batch/test_result.py +++ b/src/promptflow/tests/executor/unittests/batch/test_result.py @@ -10,7 +10,7 @@ def get_api_call(type, name, inputs={}, output={}, children=None): - return {"type": type, "name": f"_._.{name}._", "inputs": inputs, "output": output, "children": children} + return {"type": type, "name": name, "inputs": inputs, "output": output, "children": children} @pytest.mark.unittest @@ -98,32 +98,41 @@ def test_node_status(self): } def test_system_metrics(self): + from openai.types.completion import Completion, CompletionChoice + line_dict = {0: {"node_0": Status.Completed}} aggr_dict = {"aggr_0": Status.Completed} api_call_1 = get_api_call( "LLM", - "Completion", - inputs={"prompt": "Please tell me a joke.", "api_type": "azure", "engine": "text-davinci-003"}, + "openai.resources.completions.Completions.create", + inputs={"prompt": "Please tell me a joke.", "model": "text-davinci-003"}, output={"choices": [{"text": "text"}]}, ) api_call_2 = get_api_call( "LLM", - "Completion", + "openai.resources.completions.Completions.create", inputs={ "prompt": ["Please tell me a joke.", "Please tell me a joke about fruit."], - "api_type": "azure", - "engine": "text-davinci-003", + "model": "text-davinci-003", }, - output=[{"choices": [{"text": "text"}]}, {"choices": [{"text": "text"}]}], + output=[ + Completion( + choices=[CompletionChoice(text="text", finish_reason="stop", index=0, logprobs=None)], + id="id", created=0, model="model", object="text_completion" + ), + Completion( + choices=[CompletionChoice(text="text", finish_reason="stop", index=0, logprobs=None)], + id="id", created=0, model="model", object="text_completion" + ), + ], ) line_api_calls = get_api_call("Chain", "Chain", children=[api_call_1, api_call_2]) aggr_api_call = get_api_call( "LLM", - "ChatCompletion", + "openai.resources.chat.completions.Completions.create", inputs={ "messages": [{"system": "You are a helpful assistant.", "user": "Please tell me a joke."}], - "api_type": "openai", "model": "gpt-35-turbo", }, output={"choices": [{"message": {"content": "content"}}]}, diff --git a/src/promptflow/tests/test_configs/flows/flow_with_langchain_traces/test_langchain_traces.py b/src/promptflow/tests/test_configs/flows/flow_with_langchain_traces/test_langchain_traces.py index d6dc76e1f81..49ca690a28e 100644 --- a/src/promptflow/tests/test_configs/flows/flow_with_langchain_traces/test_langchain_traces.py +++ b/src/promptflow/tests/test_configs/flows/flow_with_langchain_traces/test_langchain_traces.py @@ -12,10 +12,9 @@ @tool def test_langchain_traces(question: str, conn: AzureOpenAIConnection): - os.environ["OPENAI_API_KEY"] = conn.api_key + os.environ["AZURE_OPENAI_API_KEY"] = conn.api_key os.environ["OPENAI_API_VERSION"] = conn.api_version - os.environ["OPENAI_API_BASE"] = conn.api_base - os.environ["OPENAI_API_TYPE"] = conn.api_type + os.environ["AZURE_OPENAI_ENDPOINT"] = conn.api_base llm = AzureOpenAI( temperature=0.7, diff --git a/src/promptflow/tests/test_configs/flows/openai_chat_api_flow/chat.py b/src/promptflow/tests/test_configs/flows/openai_chat_api_flow/chat.py index 64fafd2c381..e2b32d4e5a5 100644 --- a/src/promptflow/tests/test_configs/flows/openai_chat_api_flow/chat.py +++ b/src/promptflow/tests/test_configs/flows/openai_chat_api_flow/chat.py @@ -5,6 +5,9 @@ from promptflow import tool from promptflow.connections import AzureOpenAIConnection +IS_LEGACY_OPENAI = OPENAI_VERSION.startswith("0.") + + def get_client(connection: AzureOpenAIConnection): api_key = connection.api_key conn = dict( @@ -20,6 +23,7 @@ def get_client(connection: AzureOpenAIConnection): ) return Client(**conn) + def create_messages(question, chat_history): yield {"role": "system", "content": "You are a helpful assistant."} for chat in chat_history: @@ -29,9 +33,8 @@ def create_messages(question, chat_history): @tool -def chat(connection: AzureOpenAIConnection, question: str, chat_history: List) -> str: - stream = True - if OPENAI_VERSION.startswith("0."): +def chat(connection: AzureOpenAIConnection, question: str, chat_history: List, stream: bool) -> str: + if IS_LEGACY_OPENAI: completion = openai.ChatCompletion.create( engine="gpt-35-turbo", messages=list(create_messages(question, chat_history)), @@ -59,7 +62,10 @@ def chat(connection: AzureOpenAIConnection, question: str, chat_history: List) - def generator(): for chunk in completion: if chunk.choices: - yield getattr(chunk.choices[0]["delta"], "content", "") + if IS_LEGACY_OPENAI: + yield getattr(chunk.choices[0]["delta"], "content", "") + else: + yield chunk.choices[0].delta.content or "" # We must return the generator object, not using yield directly here. # Otherwise, the function itself will become a generator, despite whether stream is True or False. @@ -67,4 +73,7 @@ def generator(): return "".join(generator()) else: # chat api may return message with no content. - return getattr(completion.choices[0].message, "content", "") + if IS_LEGACY_OPENAI: + return getattr(completion.choices[0].message, "content", "") + else: + return completion.choices[0].message.content or "" diff --git a/src/promptflow/tests/test_configs/flows/openai_chat_api_flow/flow.dag.yaml b/src/promptflow/tests/test_configs/flows/openai_chat_api_flow/flow.dag.yaml index 5374b3f7d44..d85a7620937 100644 --- a/src/promptflow/tests/test_configs/flows/openai_chat_api_flow/flow.dag.yaml +++ b/src/promptflow/tests/test_configs/flows/openai_chat_api_flow/flow.dag.yaml @@ -3,6 +3,8 @@ inputs: type: string chat_history: type: list + stream: + type: bool outputs: answer: type: string @@ -17,3 +19,4 @@ nodes: question: ${inputs.question} chat_history: ${inputs.chat_history} connection: azure_open_ai_connection + stream: ${inputs.stream} diff --git a/src/promptflow/tests/test_configs/flows/openai_chat_api_flow/inputs.jsonl b/src/promptflow/tests/test_configs/flows/openai_chat_api_flow/inputs.jsonl index 94fad1bb38d..be05ef1ab77 100644 --- a/src/promptflow/tests/test_configs/flows/openai_chat_api_flow/inputs.jsonl +++ b/src/promptflow/tests/test_configs/flows/openai_chat_api_flow/inputs.jsonl @@ -1,2 +1,2 @@ -{"question": "What is the capital of the United States of America?", "chat_history": []} -{"question": "What is the capital of the United States of America?", "chat_history": []} +{"question": "What is the capital of the United States of America?", "chat_history": [], "stream": true} +{"question": "What is the capital of the United States of America?", "chat_history": [], "stream": false} diff --git a/src/promptflow/tests/test_configs/flows/openai_completion_api_flow/completion.py b/src/promptflow/tests/test_configs/flows/openai_completion_api_flow/completion.py index b1624e28e11..0c00d11596a 100644 --- a/src/promptflow/tests/test_configs/flows/openai_completion_api_flow/completion.py +++ b/src/promptflow/tests/test_configs/flows/openai_completion_api_flow/completion.py @@ -4,6 +4,9 @@ from promptflow import tool from promptflow.connections import AzureOpenAIConnection +IS_LEGACY_OPENAI = OPENAI_VERSION.startswith("0.") + + def get_client(connection: AzureOpenAIConnection): api_key = connection.api_key conn = dict( @@ -19,10 +22,10 @@ def get_client(connection: AzureOpenAIConnection): ) return Client(**conn) + @tool -def completion(connection: AzureOpenAIConnection, prompt: str) -> str: - stream = True - if OPENAI_VERSION.startswith("0."): +def completion(connection: AzureOpenAIConnection, prompt: str, stream: bool) -> str: + if IS_LEGACY_OPENAI: completion = openai.Completion.create( prompt=prompt, engine="text-davinci-003", @@ -50,8 +53,14 @@ def completion(connection: AzureOpenAIConnection, prompt: str) -> str: def generator(): for chunk in completion: if chunk.choices: - yield getattr(chunk.choices[0], "text", "") + if IS_LEGACY_OPENAI: + yield getattr(chunk.choices[0], "text", "") + else: + yield chunk.choices[0].text or "" return "".join(generator()) else: - return getattr(completion.choices[0], "text", "") + if IS_LEGACY_OPENAI: + return getattr(completion.choices[0], "text", "") + else: + return completion.choices[0].text or "" diff --git a/src/promptflow/tests/test_configs/flows/openai_completion_api_flow/flow.dag.yaml b/src/promptflow/tests/test_configs/flows/openai_completion_api_flow/flow.dag.yaml index b57d6558a98..f9ff8d4159a 100644 --- a/src/promptflow/tests/test_configs/flows/openai_completion_api_flow/flow.dag.yaml +++ b/src/promptflow/tests/test_configs/flows/openai_completion_api_flow/flow.dag.yaml @@ -1,6 +1,8 @@ inputs: prompt: type: string + stream: + type: bool outputs: output: type: string @@ -14,3 +16,4 @@ nodes: inputs: prompt: ${inputs.prompt} connection: azure_open_ai_connection + stream: ${inputs.stream} diff --git a/src/promptflow/tests/test_configs/flows/openai_completion_api_flow/inputs.jsonl b/src/promptflow/tests/test_configs/flows/openai_completion_api_flow/inputs.jsonl index e8330d94ad7..318c3dad8e4 100644 --- a/src/promptflow/tests/test_configs/flows/openai_completion_api_flow/inputs.jsonl +++ b/src/promptflow/tests/test_configs/flows/openai_completion_api_flow/inputs.jsonl @@ -1 +1,2 @@ -{"prompt": "What is the capital of the United States of America?"} +{"prompt": "What is the capital of the United States of America?", "stream": true} +{"prompt": "What is the capital of the United States of America?", "stream": false}