Skip to content

Commit

Permalink
Upgrade tests to adapt openai v1 (#1352)
Browse files Browse the repository at this point in the history
# Description

Upgrade tests to adapt openai v1.

# All Promptflow Contribution checklist:
- [x] **The pull request does not introduce [breaking changes].**
- [ ] **CHANGELOG is updated for new features, bug fixes or other
significant changes.**
- [x] **I have read the [contribution guidelines](../CONTRIBUTING.md).**
- [ ] **Create an issue and link to the pull request to get dedicated
review from promptflow team. Learn more: [suggested
workflow](../CONTRIBUTING.md#suggested-workflow).**

## General Guidelines and Best Practices
- [x] Title of the pull request is clear and informative.
- [x] There are a small number of commits, each of which have an
informative message. This means that previously merged commits do not
appear in the history of the PR. For more information on cleaning up the
commits in your PR, [see this
page](https://github.com/Azure/azure-powershell/blob/master/documentation/development-docs/cleaning-up-commits.md).

### Testing Guidelines
- [x] Pull request includes test coverage for the included changes.

---------

Co-authored-by: Lina Tang <[email protected]>
  • Loading branch information
lumoslnt and Lina Tang authored Dec 4, 2023
1 parent a8fa708 commit 8b85f60
Show file tree
Hide file tree
Showing 11 changed files with 172 additions and 210 deletions.
25 changes: 21 additions & 4 deletions src/promptflow/promptflow/_utils/openai_metrics_calculator.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,9 +50,22 @@ def _get_openai_metrics_for_signal_api(self, api_call: dict):
)

name = api_call.get("name")
if name.split(".")[-2] == "ChatCompletion" or name == "openai.resources.chat.completions.Completions.create":
# Support both legacy api and OpenAI v1 api
# Legacy api:
# https://github.com/openai/openai-python/blob/v0.28.1/openai/api_resources/chat_completion.py
# https://github.com/openai/openai-python/blob/v0.28.1/openai/api_resources/completion.py
# OpenAI v1 api:
# https://github.com/openai/openai-python/blob/main/src/openai/resources/chat/completions.py
# https://github.com/openai/openai-python/blob/main/src/openai/resources/completions.py
if (
name == "openai.api_resources.chat_completion.ChatCompletion.create"
or name == "openai.resources.chat.completions.Completions.create" # openai v1
):
return self._get_openai_metrics_for_chat_api(api_call)
elif name.split(".")[-2] == "Completion" or name == "openai.resources.completions.Completions.create":
elif (
name == "openai.api_resources.completion.Completion.create"
or name == "openai.resources.completions.Completions.create" # openai v1
):
return self._get_openai_metrics_for_completion_api(api_call)
else:
raise CalculatingMetricsError(f"Calculating metrics for api {name} is not supported.")
Expand Down Expand Up @@ -90,7 +103,9 @@ def _get_openai_metrics_for_chat_api(self, api_call):
if IS_LEGACY_OPENAI:
metrics["completion_tokens"] = len(output)
else:
metrics["completion_tokens"] = len([chunk for chunk in output if chunk.choices[0].delta.content])
metrics["completion_tokens"] = len(
[chunk for chunk in output if chunk.choices and chunk.choices[0].delta.content]
)
else:
metrics["completion_tokens"] = self._get_completion_tokens_for_chat_api(output, enc)
metrics["total_tokens"] = metrics["prompt_tokens"] + metrics["completion_tokens"]
Expand Down Expand Up @@ -151,7 +166,9 @@ def _get_openai_metrics_for_completion_api(self, api_call: dict):
if IS_LEGACY_OPENAI:
metrics["completion_tokens"] = len(output)
else:
metrics["completion_tokens"] = len([chunk for chunk in output if chunk.choices[0].text])
metrics["completion_tokens"] = len(
[chunk for chunk in output if chunk.choices and chunk.choices[0].text]
)
else:
metrics["completion_tokens"] = self._get_completion_tokens_for_completion_api(output, enc)
metrics["total_tokens"] = metrics["prompt_tokens"] + metrics["completion_tokens"]
Expand Down
33 changes: 20 additions & 13 deletions src/promptflow/tests/executor/e2etests/test_telemetry.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import json
import sys
import uuid
from collections import namedtuple
from importlib.metadata import version
Expand All @@ -15,34 +16,40 @@

from ..utils import get_flow_folder, get_flow_inputs_file, get_yaml_file, load_jsonl

IS_LEGACY_OPENAI = version("openai").startswith("0.")

Completion = namedtuple("Completion", ["choices"])
Choice = namedtuple("Choice", ["delta"])
Delta = namedtuple("Delta", ["content"])


def stream_response(kwargs):
delta = Delta(content=json.dumps(kwargs.get("headers", {})))
yield Completion(choices=[{"delta": delta}])
if IS_LEGACY_OPENAI:
delta = Delta(content=json.dumps(kwargs.get("headers", {})))
yield Completion(choices=[{"delta": delta}])
else:
delta = Delta(content=json.dumps(kwargs.get("extra_headers", {})))
yield Completion(choices=[Choice(delta=delta)])


def mock_stream_chat(**kwargs):
def mock_stream_chat(*args, **kwargs):
return stream_response(kwargs)


# @pytest.mark.skipif(sys.platform == "darwin" or sys.platform == "win32", reason="Skip on Mac and Windows")
@pytest.mark.skipif(sys.platform == "darwin" or sys.platform == "win32", reason="Skip on Mac and Windows")
@pytest.mark.usefixtures("dev_connections")
@pytest.mark.e2etest
class TestExecutorTelemetry:
@pytest.mark.skipif(
version("openai").startswith("1."),
reason="test needs to be upgraded to adapt to openai>=1.0.0",
)
def test_executor_openai_telemetry(self, dev_connections):
"""This test validates telemetry info header is correctly injected to OpenAI API
by mocking openai.ChatCompletion.create method. The mock method will return a generator
that yields a namedtuple with a json string of the headers passed to the method.
by mocking chat api method. The mock method will return a generator that yields a
namedtuple with a json string of the headers passed to the method.
"""

with patch("openai.ChatCompletion.create", new=mock_stream_chat):
if IS_LEGACY_OPENAI:
api = "openai.ChatCompletion.create"
else:
api = "openai.resources.chat.Completions.create"
with patch(api, new=mock_stream_chat):
operation_context = OperationContext.get_instance()
operation_context.clear()

Expand All @@ -52,7 +59,7 @@ def test_executor_openai_telemetry(self, dev_connections):
executor = FlowExecutor.create(get_yaml_file(flow_folder), dev_connections)

# flow run case
inputs = {"question": "What's your name?", "chat_history": []}
inputs = {"question": "What's your name?", "chat_history": [], "stream": True}
flow_result = executor.exec_line(inputs)

assert isinstance(flow_result.output, dict)
Expand Down
51 changes: 20 additions & 31 deletions src/promptflow/tests/executor/e2etests/test_traces.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
from importlib.metadata import version
from types import GeneratorType

import pytest
Expand All @@ -7,7 +6,7 @@
from promptflow.contracts.run_info import Status
from promptflow.executor import FlowExecutor

from ..utils import get_flow_sample_inputs, get_yaml_file
from ..utils import get_yaml_file


@pytest.mark.usefixtures("dev_connections")
Expand Down Expand Up @@ -47,37 +46,27 @@ def validate_openai_apicall(self, apicall: dict):

return get_trace

@pytest.mark.skipif(
version("openai").startswith("1."),
reason="test needs to be upgraded to adapt to openai>=1.0.0",
def get_chat_input(stream):
return {
"question": "What is the capital of the United States of America?", "chat_history": [], "stream": stream
}

def get_comletion_input(stream):
return {"prompt": "What is the capital of the United States of America?", "stream": stream}

@pytest.mark.parametrize(
"flow_folder, inputs",
[
("openai_chat_api_flow", get_chat_input(False)),
("openai_chat_api_flow", get_chat_input(True)),
("openai_completion_api_flow", get_comletion_input(False)),
("openai_completion_api_flow", get_comletion_input(True)),
("llm_tool", {"topic": "Hello", "stream": False}),
("llm_tool", {"topic": "Hello", "stream": True}),
]
)
@pytest.mark.parametrize("flow_folder", ["openai_chat_api_flow", "openai_completion_api_flow"])
def test_executor_openai_api_flow(self, flow_folder, dev_connections):
def test_executor_openai_api_flow(self, flow_folder, inputs, dev_connections):
executor = FlowExecutor.create(get_yaml_file(flow_folder), dev_connections)
inputs = get_flow_sample_inputs(flow_folder)
flow_result = executor.exec_line(inputs)

assert isinstance(flow_result.output, dict)
assert flow_result.run_info.status == Status.Completed
assert flow_result.run_info.api_calls is not None

assert "total_tokens" in flow_result.run_info.system_metrics
assert flow_result.run_info.system_metrics["total_tokens"] > 0

get_traced = False
for api_call in flow_result.run_info.api_calls:
get_traced = get_traced or self.validate_openai_apicall(serialize(api_call))

assert get_traced is True

@pytest.mark.skipif(
version("openai").startswith("0."),
reason="Run tests for openai>=1.0.0",
)
@pytest.mark.parametrize("flow_folder", ["openai_v1_chat_api_flow", "openai_v1_completion_api_flow"])
def test_executor_openai_v1_api_flow(self, flow_folder, dev_connections):
executor = FlowExecutor.create(get_yaml_file(flow_folder), dev_connections)
inputs = get_flow_sample_inputs(flow_folder)
flow_result = executor.exec_line(inputs)

assert isinstance(flow_result.output, dict)
Expand Down
Loading

0 comments on commit 8b85f60

Please sign in to comment.