Upgrade tests to adapt openai v1 (#1352)

# Description Upgrade tests to adapt openai v1. # All Promptflow Contribution checklist: - [x] **The pull request does not introduce [breaking changes].** - [ ] **CHANGELOG is updated for new features, bug fixes or other significant changes.** - [x] **I have read the [contribution guidelines](../CONTRIBUTING.md).** - [ ] **Create an issue and link to the pull request to get dedicated review from promptflow team. Learn more: [suggested workflow](../CONTRIBUTING.md#suggested-workflow).** ## General Guidelines and Best Practices - [x] Title of the pull request is clear and informative. - [x] There are a small number of commits, each of which have an informative message. This means that previously merged commits do not appear in the history of the PR. For more information on cleaning up the commits in your PR, [see this page](https://github.com/Azure/azure-powershell/blob/master/documentation/development-docs/cleaning-up-commits.md). ### Testing Guidelines - [x] Pull request includes test coverage for the included changes. --------- Co-authored-by: Lina Tang <[email protected]>
microsoft · Dec 4, 2023 · 8b85f60 · 8b85f60
1 parent a8fa708
commit 8b85f60
Show file tree

Hide file tree

Showing 11 changed files with 172 additions and 210 deletions.
diff --git a/src/promptflow/promptflow/_utils/openai_metrics_calculator.py b/src/promptflow/promptflow/_utils/openai_metrics_calculator.py
@@ -50,9 +50,22 @@ def _get_openai_metrics_for_signal_api(self, api_call: dict):
             )
 
         name = api_call.get("name")
-        if name.split(".")[-2] == "ChatCompletion" or name == "openai.resources.chat.completions.Completions.create":
+        # Support both legacy api and OpenAI v1 api
+        # Legacy api:
+        #   https://github.com/openai/openai-python/blob/v0.28.1/openai/api_resources/chat_completion.py
+        #   https://github.com/openai/openai-python/blob/v0.28.1/openai/api_resources/completion.py
+        # OpenAI v1 api:
+        #   https://github.com/openai/openai-python/blob/main/src/openai/resources/chat/completions.py
+        #   https://github.com/openai/openai-python/blob/main/src/openai/resources/completions.py
+        if (
+            name == "openai.api_resources.chat_completion.ChatCompletion.create"
+            or name == "openai.resources.chat.completions.Completions.create"  # openai v1
+        ):
             return self._get_openai_metrics_for_chat_api(api_call)
-        elif name.split(".")[-2] == "Completion" or name == "openai.resources.completions.Completions.create":
+        elif (
+            name == "openai.api_resources.completion.Completion.create"
+            or name == "openai.resources.completions.Completions.create"  # openai v1
+        ):
             return self._get_openai_metrics_for_completion_api(api_call)
         else:
             raise CalculatingMetricsError(f"Calculating metrics for api {name} is not supported.")
@@ -90,7 +103,9 @@ def _get_openai_metrics_for_chat_api(self, api_call):
             if IS_LEGACY_OPENAI:
                 metrics["completion_tokens"] = len(output)
             else:
-                metrics["completion_tokens"] = len([chunk for chunk in output if chunk.choices[0].delta.content])
+                metrics["completion_tokens"] = len(
+                    [chunk for chunk in output if chunk.choices and chunk.choices[0].delta.content]
+                )
         else:
             metrics["completion_tokens"] = self._get_completion_tokens_for_chat_api(output, enc)
         metrics["total_tokens"] = metrics["prompt_tokens"] + metrics["completion_tokens"]
@@ -151,7 +166,9 @@ def _get_openai_metrics_for_completion_api(self, api_call: dict):
             if IS_LEGACY_OPENAI:
                 metrics["completion_tokens"] = len(output)
             else:
-                metrics["completion_tokens"] = len([chunk for chunk in output if chunk.choices[0].text])
+                metrics["completion_tokens"] = len(
+                    [chunk for chunk in output if chunk.choices and chunk.choices[0].text]
+                )
         else:
             metrics["completion_tokens"] = self._get_completion_tokens_for_completion_api(output, enc)
         metrics["total_tokens"] = metrics["prompt_tokens"] + metrics["completion_tokens"]

diff --git a/src/promptflow/tests/executor/e2etests/test_telemetry.py b/src/promptflow/tests/executor/e2etests/test_telemetry.py
@@ -1,4 +1,5 @@
 import json
+import sys
 import uuid
 from collections import namedtuple
 from importlib.metadata import version
@@ -15,34 +16,40 @@
 
 from ..utils import get_flow_folder, get_flow_inputs_file, get_yaml_file, load_jsonl
 
+IS_LEGACY_OPENAI = version("openai").startswith("0.")
+
 Completion = namedtuple("Completion", ["choices"])
+Choice = namedtuple("Choice", ["delta"])
 Delta = namedtuple("Delta", ["content"])
 
 
 def stream_response(kwargs):
-    delta = Delta(content=json.dumps(kwargs.get("headers", {})))
-    yield Completion(choices=[{"delta": delta}])
+    if IS_LEGACY_OPENAI:
+        delta = Delta(content=json.dumps(kwargs.get("headers", {})))
+        yield Completion(choices=[{"delta": delta}])
+    else:
+        delta = Delta(content=json.dumps(kwargs.get("extra_headers", {})))
+        yield Completion(choices=[Choice(delta=delta)])
 
 
-def mock_stream_chat(**kwargs):
+def mock_stream_chat(*args, **kwargs):
     return stream_response(kwargs)
 
 
-# @pytest.mark.skipif(sys.platform == "darwin" or sys.platform == "win32", reason="Skip on Mac and Windows")
+@pytest.mark.skipif(sys.platform == "darwin" or sys.platform == "win32", reason="Skip on Mac and Windows")
 @pytest.mark.usefixtures("dev_connections")
 @pytest.mark.e2etest
 class TestExecutorTelemetry:
-    @pytest.mark.skipif(
-        version("openai").startswith("1."),
-        reason="test needs to be upgraded to adapt to openai>=1.0.0",
-    )
     def test_executor_openai_telemetry(self, dev_connections):
         """This test validates telemetry info header is correctly injected to OpenAI API
-        by mocking openai.ChatCompletion.create method. The mock method will return a generator
-        that yields a namedtuple with a json string of the headers passed to the method.
+        by mocking chat api method. The mock method will return a generator that yields a
+        namedtuple with a json string of the headers passed to the method.
         """
-
-        with patch("openai.ChatCompletion.create", new=mock_stream_chat):
+        if IS_LEGACY_OPENAI:
+            api = "openai.ChatCompletion.create"
+        else:
+            api = "openai.resources.chat.Completions.create"
+        with patch(api, new=mock_stream_chat):
             operation_context = OperationContext.get_instance()
             operation_context.clear()
 
@@ -52,7 +59,7 @@ def test_executor_openai_telemetry(self, dev_connections):
             executor = FlowExecutor.create(get_yaml_file(flow_folder), dev_connections)
 
             # flow run case
-            inputs = {"question": "What's your name?", "chat_history": []}
+            inputs = {"question": "What's your name?", "chat_history": [], "stream": True}
             flow_result = executor.exec_line(inputs)
 
             assert isinstance(flow_result.output, dict)

diff --git a/src/promptflow/tests/executor/e2etests/test_traces.py b/src/promptflow/tests/executor/e2etests/test_traces.py
@@ -1,4 +1,3 @@
-from importlib.metadata import version
 from types import GeneratorType
 
 import pytest
@@ -7,7 +6,7 @@
 from promptflow.contracts.run_info import Status
 from promptflow.executor import FlowExecutor
 
-from ..utils import get_flow_sample_inputs, get_yaml_file
+from ..utils import get_yaml_file
 
 
 @pytest.mark.usefixtures("dev_connections")
@@ -47,37 +46,27 @@ def validate_openai_apicall(self, apicall: dict):
 
         return get_trace
 
-    @pytest.mark.skipif(
-        version("openai").startswith("1."),
-        reason="test needs to be upgraded to adapt to openai>=1.0.0",
+    def get_chat_input(stream):
+        return {
+            "question": "What is the capital of the United States of America?", "chat_history": [], "stream": stream
+        }
+
+    def get_comletion_input(stream):
+        return {"prompt": "What is the capital of the United States of America?", "stream": stream}
+
+    @pytest.mark.parametrize(
+        "flow_folder, inputs",
+        [
+            ("openai_chat_api_flow", get_chat_input(False)),
+            ("openai_chat_api_flow", get_chat_input(True)),
+            ("openai_completion_api_flow", get_comletion_input(False)),
+            ("openai_completion_api_flow", get_comletion_input(True)),
+            ("llm_tool", {"topic": "Hello", "stream": False}),
+            ("llm_tool", {"topic": "Hello", "stream": True}),
+        ]
     )
-    @pytest.mark.parametrize("flow_folder", ["openai_chat_api_flow", "openai_completion_api_flow"])
-    def test_executor_openai_api_flow(self, flow_folder, dev_connections):
+    def test_executor_openai_api_flow(self, flow_folder, inputs, dev_connections):
         executor = FlowExecutor.create(get_yaml_file(flow_folder), dev_connections)
-        inputs = get_flow_sample_inputs(flow_folder)
-        flow_result = executor.exec_line(inputs)
-
-        assert isinstance(flow_result.output, dict)
-        assert flow_result.run_info.status == Status.Completed
-        assert flow_result.run_info.api_calls is not None
-
-        assert "total_tokens" in flow_result.run_info.system_metrics
-        assert flow_result.run_info.system_metrics["total_tokens"] > 0
-
-        get_traced = False
-        for api_call in flow_result.run_info.api_calls:
-            get_traced = get_traced or self.validate_openai_apicall(serialize(api_call))
-
-        assert get_traced is True
-
-    @pytest.mark.skipif(
-        version("openai").startswith("0."),
-        reason="Run tests for openai>=1.0.0",
-    )
-    @pytest.mark.parametrize("flow_folder", ["openai_v1_chat_api_flow", "openai_v1_completion_api_flow"])
-    def test_executor_openai_v1_api_flow(self, flow_folder, dev_connections):
-        executor = FlowExecutor.create(get_yaml_file(flow_folder), dev_connections)
-        inputs = get_flow_sample_inputs(flow_folder)
         flow_result = executor.exec_line(inputs)
 
         assert isinstance(flow_result.output, dict)