Skip to content

Commit

Permalink
Merge branch 'main' into clwan/sample_v1_11
Browse files Browse the repository at this point in the history
  • Loading branch information
wangchao1230 authored May 11, 2024
2 parents c21a982 + 7117995 commit 2d67e67
Show file tree
Hide file tree
Showing 67 changed files with 3,851 additions and 688 deletions.
9 changes: 8 additions & 1 deletion .cspell.json
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,8 @@
".github/workflows/**",
".github/actions/**",
".github/pipelines/**",
".github/CODEOWNERS"
".github/CODEOWNERS",
"src/promptflow-evals/tests/**"
],
"words": [
"aoai",
Expand Down Expand Up @@ -221,10 +222,16 @@
"mpnet",
"wargs",
"dcid",
"aiohttp",
"endofprompt",
"tkey",
"tparam",
"ncols",
"piezo",
"Piezo",
"cmpop",
"omap",
"Machinal",
"azureopenaimodelconfiguration",
"openaimodelconfiguration"
],
Expand Down
5 changes: 4 additions & 1 deletion .github/workflows/promptflow-evals-e2e-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,10 @@ jobs:
strategy:
matrix:
os: [ubuntu-latest, windows-latest, macos-13]
python-version: ['3.8', '3.9', '3.10', '3.11']
# TODO: Encounter hash mismatch for ubuntu-latest and 3.9 combination during installing promptflow-evals package
# https://github.com/microsoft/promptflow/actions/runs/9009397933/job/24753518853?pr=3158
# Add 3.9 back after we figure out the issue
python-version: ['3.8', '3.10', '3.11']
fail-fast: false
# snok/install-poetry need this to support Windows
defaults:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/promptflow-evals-unit-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -104,4 +104,4 @@ jobs:
format: markdown
hide_complexity: true
output: both
thresholds: 40 60
thresholds: 40 60
2 changes: 1 addition & 1 deletion examples/flex-flows/chat-stream/flow.flex.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@ $schema: https://azuremlschemas.azureedge.net/promptflow/latest/Flow.schema.json
entry: flow:ChatFlow
environment:
# image: mcr.microsoft.com/azureml/promptflow/promptflow-python
python_requirements_txt: requirements.txt
python_requirements_txt: requirements.txt
2 changes: 1 addition & 1 deletion scripts/dev-setup/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ def install_pkg_editable(pkg: str, verbose: bool, is_vscode: bool = False) -> No
# we should be able to remove this after we fully deprecate promptflow in local development
if is_vscode:
with open(pkg_working_dir / "promptflow" / "__init__.py", mode="w", encoding="utf-8") as f:
f.write("")
f.write("__path__ = __import__('pkgutil').extend_path(__path__, __name__)\n")


@dataclass
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
from promptflow._sdk._telemetry import ActivityType, WorkspaceTelemetryMixin, monitor_operation
from promptflow._sdk._utilities.general_utils import PromptflowIgnoreFile
from promptflow._sdk._vendor._asset_utils import traverse_directory
from promptflow._utils.flow_utils import resolve_flow_path
from promptflow._utils.flow_utils import get_flow_type, resolve_flow_path
from promptflow._utils.logger_utils import get_cli_sdk_logger
from promptflow.azure._constants._flow import DEFAULT_STORAGE
from promptflow.azure._entities._flow import Flow
Expand Down Expand Up @@ -604,3 +604,17 @@ def _try_resolve_code_for_flow_to_file_share(cls, flow: Flow, ops: OperationOrch
flow._code_uploaded = True

# endregion

def _get_telemetry_values(self, *args, **kwargs):
activity_name = kwargs.get("activity_name", None)
telemetry_values = super()._get_telemetry_values(*args, **kwargs)
try:
if activity_name == "pfazure.flows.create_or_update":
flow = kwargs.get("flow", None) or args[0]
if isinstance(flow, Flow):
flow = flow.path
telemetry_values["flow_type"] = get_flow_type(flow)
except Exception as e:
logger.error(f"Failed to get telemetry values: {str(e)}")

return telemetry_values
Original file line number Diff line number Diff line change
Expand Up @@ -466,3 +466,59 @@ def check_evelope():
)
logger.handlers[0].flush()
check_evelope()

@pytest.mark.skipif(
condition=not pytest.is_live,
reason="Live mode can run successfully, but an error will be reported when recording.",
)
def test_flow_type_with_pfazure_flows(self, pf, randstr: Callable[[str], str]):
from promptflow._constants import FlowType
from promptflow._sdk._configuration import Configuration
from promptflow._sdk._telemetry.logging_handler import PromptFlowSDKExporter

envelope = None
flow_type = None
config = Configuration.get_instance()
custom_dimensions = {
"python_version": platform.python_version(),
"installation_id": config.get_or_set_installation_id(),
}
log_to_envelope = PromptFlowSDKExporter(
connection_string="InstrumentationKey=00000000-0000-0000-0000-000000000000",
custom_dimensions=custom_dimensions,
)._log_to_envelope

def log_event(log_data):
nonlocal envelope
envelope = log_to_envelope(log_data)

def check_evelope():
assert envelope.data.base_data.name.startswith("pfazure.flows.create_or_update")
custom_dimensions = pydash.get(envelope, "data.base_data.properties")
assert isinstance(custom_dimensions, dict)
assert "flow_type" in custom_dimensions
assert custom_dimensions["flow_type"] == flow_type

with patch.object(PromptFlowSDKExporter, "_log_to_envelope", side_effect=log_event), patch(
"promptflow._sdk._telemetry.telemetry.get_telemetry_logger", side_effect=get_telemetry_logger
):
flow_type = FlowType.DAG_FLOW
try:
pf.flows.create_or_update(
flow=FLOWS_DIR / "print_input_flow",
)
except Exception:
pass
logger = get_telemetry_logger()
logger.handlers[0].flush()
check_evelope()

flow_type = FlowType.FLEX_FLOW
try:
pf.flows.create_or_update(
flow=EAGER_FLOWS_DIR / "simple_with_req",
)
except Exception:
pass
logger.handlers[0].flush()
check_evelope()
11 changes: 11 additions & 0 deletions src/promptflow-core/promptflow/_utils/flow_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
FLOW_FLEX_YAML,
PROMPT_FLOW_DIR_NAME,
PROMPTY_EXTENSION,
FlowType,
)
from promptflow._core._errors import MetaFileNotFound, MetaFileReadError
from promptflow._utils.logger_utils import LoggerFactory
Expand Down Expand Up @@ -335,3 +336,13 @@ def parse_variant(variant: str) -> Tuple[str, str]:
message=str(error),
error=error,
)


def get_flow_type(flow_path: Union[str, Path, PathLike]) -> str:
if not isinstance(flow_path, (str, Path, PathLike)):
raise UserErrorException(f"flow_path type is {type(flow_path)}, but only support: str, Path, PathLike.")
if is_prompty_flow(file_path=flow_path):
return FlowType.PROMPTY
if is_flex_flow(flow_path=flow_path):
return FlowType.FLEX_FLOW
return FlowType.DAG_FLOW
31 changes: 31 additions & 0 deletions src/promptflow-core/promptflow/core/_flow.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

from promptflow._constants import DEFAULT_ENCODING, LANGUAGE_KEY, PROMPTY_EXTENSION, FlowLanguage
from promptflow._utils.flow_utils import is_flex_flow, is_prompty_flow, resolve_flow_path
from promptflow._utils.logger_utils import LoggerFactory
from promptflow._utils.yaml_utils import load_yaml_string
from promptflow.contracts.tool import ValueType
from promptflow.core._errors import MissingRequiredInputError
Expand All @@ -20,6 +21,7 @@
format_llm_response,
get_open_ai_client_by_connection,
handle_openai_error,
num_tokens_from_messages,
prepare_open_ai_request_params,
resolve_references,
send_request_to_llm,
Expand All @@ -31,6 +33,8 @@
from promptflow.tracing._experimental import enrich_prompt_template
from promptflow.tracing._trace import _traced

logger = LoggerFactory.get_logger(name=__name__)


class AbstractFlowBase(abc.ABC):
"""Abstract class for all Flow entities in both core and devkit."""
Expand Down Expand Up @@ -469,6 +473,33 @@ def render(self, *args, **kwargs):
# For chat mode, the message generated is list type. Convert to string type and return to user.
return str(prompt)

def estimate_token_count(self, *args, **kwargs):
"""Estimate the token count.
LLM will reject the request when prompt token + response token is greater than the maximum number of
tokens supported by the model. It is used to estimate the number of total tokens in this round of chat.
:param args: positional arguments are not supported.
:param kwargs: prompty inputs with key word arguments.
:return: Estimate total token count
:rtype: int
"""
if args:
raise UserErrorException("Prompty can only be rendered with keyword arguments.")
inputs = self._resolve_inputs(kwargs)
prompt = convert_prompt_template(self._template, inputs, self._model.api)
response_max_token = self._model.parameters.get("max_tokens", None)
if response_max_token is None:
logger.warning(
"The maximum number of tokens that can be generated in the chat completion is not configured. "
"It will directly return prompt token count."
)
elif not isinstance(response_max_token, int):
raise UserErrorException("Max_token needs to be integer.")
elif response_max_token <= 1:
raise UserErrorException(f"{response_max_token} is less than the minimum of max_tokens.")
total_token = num_tokens_from_messages(prompt, self._model._model) + (response_max_token or 0)
return total_token


class AsyncPrompty(Prompty):
"""Async prompty is based on Prompty, which is used to invoke prompty in async mode.
Expand Down
45 changes: 45 additions & 0 deletions src/promptflow-core/promptflow/core/_prompty_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from pathlib import Path
from typing import List, Mapping

import tiktoken
from openai import APIConnectionError, APIStatusError, APITimeoutError, BadRequestError, OpenAIError, RateLimitError

from promptflow._utils.logger_utils import LoggerFactory
Expand Down Expand Up @@ -280,6 +281,50 @@ def format_stream(llm_response):
return result


def num_tokens_from_messages(messages, model):
"""Return the number of tokens used by a list of messages."""
# Ref: https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken#6-counting-tokens-for-chat-completions-api-calls # noqa: E501
try:
encoding = tiktoken.encoding_for_model(model)
except KeyError:
logger.warning("Model not found. Using cl100k_base encoding.")
encoding = tiktoken.get_encoding("cl100k_base")
if model in {
"gpt-3.5-turbo-0613",
"gpt-3.5-turbo-16k-0613",
"gpt-4-0314",
"gpt-4-32k-0314",
"gpt-4-0613",
"gpt-4-32k-0613",
}:
tokens_per_message = 3
tokens_per_name = 1
elif model == "gpt-3.5-turbo-0301":
tokens_per_message = 4 # every message follows <|start|>{role/name}\n{content}<|end|>\n
tokens_per_name = -1 # if there's a name, the role is omitted
elif "gpt-3.5-turbo" in model or "gpt-35-turbo":
logger.warning("gpt-3.5-turbo may update over time. Returning num tokens assuming gpt-3.5-turbo-0613.")
return num_tokens_from_messages(messages, model="gpt-3.5-turbo-0613")
elif "gpt-4" in model:
logger.warning("gpt-4 may update over time. Returning num tokens assuming gpt-4-0613.")
return num_tokens_from_messages(messages, model="gpt-4-0613")
else:
raise NotImplementedError(
f"num_tokens_from_messages() is not implemented for model {model}. "
"See https://github.com/openai/openai-python/blob/main/chatml.md for information on "
"how messages are converted to tokens."
)
num_tokens = 0
for message in messages:
num_tokens += tokens_per_message
for key, value in message.items():
num_tokens += len(encoding.encode(value))
if key == "name":
num_tokens += tokens_per_name
num_tokens += 3 # every reply is primed with <|start|>assistant<|message|>
return num_tokens


def resolve_references(origin, base_path=None):
"""Resolve all reference in the object."""
if isinstance(origin, str):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,4 @@

# flake8: noqa
from promptflow._sdk._constants import LINE_NUMBER, Local2Cloud
from promptflow._sdk._configuration import Configuration
from promptflow._sdk._utilities.general_utils import get_trace_destination
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,7 @@ def _submit_bulk_run(
trace_destination = self._config.get_trace_destination(path=run._get_flow_dir().resolve())
if trace_destination and trace_destination.startswith(REMOTE_URI_PREFIX):
logger.debug(f"Trace destination set to {trace_destination!r}, uploading run to cloud...")
self._upload_run_to_cloud(run=run)
self._upload_run_to_cloud(run=run, config=self._config)

def _resolve_input_dirs(self, run: Run):
result = {"data": run.data if run.data else None}
Expand Down Expand Up @@ -269,13 +269,13 @@ def _validate_column_mapping(cls, column_mapping: dict):
)

@classmethod
def _upload_run_to_cloud(cls, run: Run):
def _upload_run_to_cloud(cls, run: Run, config=None):
error_msg_prefix = f"Failed to upload run {run.name!r} to cloud."
try:
from promptflow._sdk._tracing import _get_ws_triad_from_pf_config
from promptflow.azure._cli._utils import _get_azure_pf_client

ws_triad = _get_ws_triad_from_pf_config(path=run._get_flow_dir().resolve(), config=run._config)
ws_triad = _get_ws_triad_from_pf_config(path=run._get_flow_dir().resolve(), config=config or run._config)
pf = _get_azure_pf_client(
subscription_id=ws_triad.subscription_id,
resource_group=ws_triad.resource_group_name,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1105,7 +1105,9 @@ def get_flow_path(flow) -> Path:
if isinstance(flow, DAGFlow):
return flow.flow_dag_path.parent.resolve()
if isinstance(flow, (FlexFlow, Prompty)):
return flow.path.parent.resolve()
# Use code path to return as flow path, since code path is the same as flow directory for yaml case and code
# path points to original code path in non-yaml case
return flow.code.resolve()
raise ValueError(f"Unsupported flow type {type(flow)!r}")


Expand Down Expand Up @@ -1148,3 +1150,18 @@ def resolve_flow_language(
f"Invalid flow path {file_path.as_posix()}, must exist and of suffix yaml, yml or prompty."
)
return yaml_dict.get(LANGUAGE_KEY, FlowLanguage.Python)


def get_trace_destination(pf_client=None):
"""get trace.destination
:param pf_client: pf_client object
:type pf_client: promptflow._sdk._pf_client.PFClient
:return:
"""
from promptflow._sdk._configuration import Configuration

config = pf_client._config if pf_client else Configuration.get_instance()
trace_destination = config.get_trace_destination()

return trace_destination
20 changes: 9 additions & 11 deletions src/promptflow-devkit/promptflow/_sdk/entities/_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@

from dateutil import parser as date_parser

from promptflow._constants import FlowType, OutputsFolderName, TokenKeys
from promptflow._constants import OutputsFolderName, TokenKeys
from promptflow._sdk._configuration import Configuration
from promptflow._sdk._constants import (
BASE_PATH_CONTEXT_KEY,
Expand Down Expand Up @@ -52,7 +52,13 @@
)
from promptflow._sdk.entities._yaml_translatable import YAMLTranslatableMixin
from promptflow._sdk.schemas._run import RunSchema
from promptflow._utils.flow_utils import get_flow_lineage_id, is_flex_flow, is_prompty_flow, parse_variant
from promptflow._utils.flow_utils import (
get_flow_lineage_id,
get_flow_type,
is_flex_flow,
is_prompty_flow,
parse_variant,
)
from promptflow._utils.logger_utils import get_cli_sdk_logger
from promptflow.exceptions import UserErrorException

Expand Down Expand Up @@ -874,12 +880,4 @@ def _load_from_source(cls, source: Union[str, Path], params_override: Optional[D
def _flow_type(self) -> str:
"""Get flow type of run."""

from promptflow._sdk._load_functions import load_flow
from promptflow._sdk.entities._flows import FlexFlow

if is_prompty_flow(self.flow):
return FlowType.PROMPTY
flow_obj = load_flow(source=self.flow)
if isinstance(flow_obj, FlexFlow):
return FlowType.FLEX_FLOW
return FlowType.DAG_FLOW
return get_flow_type(self.flow)
Loading

0 comments on commit 2d67e67

Please sign in to comment.