Skip to content

Commit

Permalink
Merge branch 'main' into clwan/sample_v1_11
Browse files Browse the repository at this point in the history
  • Loading branch information
wangchao1230 authored May 14, 2024
2 parents 7b1e954 + f949fea commit f444a38
Show file tree
Hide file tree
Showing 28 changed files with 294 additions and 30 deletions.
8 changes: 4 additions & 4 deletions src/promptflow-core/promptflow/_core/token_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
from promptflow._utils.credential_utils import get_default_azure_credential


# to access azure ai services, we need to get the token with this audience
COGNITIVE_AUDIENCE = "https://cognitiveservices.azure.com/"
# to access azure ai services, we need to get the token with this scope
COGNITIVE_SCOPE = "https://cognitiveservices.azure.com/.default"


class TokenProviderABC(ABC):
Expand Down Expand Up @@ -48,5 +48,5 @@ def _init_instance(self):
)

def get_token(self):
audience = COGNITIVE_AUDIENCE
return self.credential.get_token(audience).token
scope = COGNITIVE_SCOPE
return self.credential.get_token(scope).token
4 changes: 4 additions & 0 deletions src/promptflow-core/promptflow/contracts/flow.py
Original file line number Diff line number Diff line change
Expand Up @@ -988,13 +988,16 @@ class FlexFlow(FlowBase):
:type environment_variables: Dict[str, object]
:param message_format: The message format type of the flow to represent different multimedia contracts.
:type message_format: str
:param sample: Sample data for the flow. Will become default inputs & init kwargs if not provided.
:type sample: Dict[str, object]
"""

init: Dict[str, FlowInputDefinition] = None
program_language: str = FlowLanguage.Python
environment_variables: Dict[str, object] = None
# eager flow does not support multimedia contract currently, it is set to basic by default.
message_format: str = MessageFormatType.BASIC
sample: Dict[str, dict] = None

@staticmethod
def deserialize(data: dict) -> "FlexFlow":
Expand All @@ -1017,6 +1020,7 @@ def deserialize(data: dict) -> "FlexFlow":
init={name: FlowInitDefinition.deserialize(i) for name, i in init.items()},
program_language=data.get(LANGUAGE_KEY, FlowLanguage.Python),
environment_variables=data.get("environment_variables") or {},
sample=data.get("sample") or {},
)

@classmethod
Expand Down
4 changes: 4 additions & 0 deletions src/promptflow-core/promptflow/core/_serving/_errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,10 @@ class MissingRequiredFlowInput(BadRequest):
pass


class InvalidFlowInitConfig(BadRequest):
pass


class FlowConnectionError(UserErrorException):
pass

Expand Down
15 changes: 12 additions & 3 deletions src/promptflow-core/promptflow/core/_serving/app_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,12 @@
from promptflow._utils.logger_utils import LoggerFactory
from promptflow._utils.user_agent_utils import setup_user_agent_to_operation_context
from promptflow.core import Flow
from promptflow.core._serving._errors import InvalidFlowInitConfig
from promptflow.core._serving.extension.extension_factory import ExtensionFactory
from promptflow.core._serving.flow_invoker import AsyncFlowInvoker
from promptflow.core._serving.utils import get_output_fields_to_remove, get_sample_json, load_feedback_swagger
from promptflow.core._utils import init_executable
from promptflow.exceptions import ErrorTarget
from promptflow.storage._run_storage import DummyRunStorage

from ..._constants import PF_FLOW_INIT_CONFIG
Expand Down Expand Up @@ -58,9 +60,16 @@ def init_app(self, **kwargs):

self.init = kwargs.get("init", {})
if not self.init:
init_params = os.environ.get(PF_FLOW_INIT_CONFIG, "{}")
init_dict: dict = json.loads(init_params)
self.init = init_dict
init_params = os.environ.get(PF_FLOW_INIT_CONFIG, None)
if init_params:
try:
self.init = json.loads(init_params)
except Exception as e:
self.logger.error(f"PF_FLOW_INIT_CONFIG can't be deserialized to json: {e}")
raise InvalidFlowInitConfig(
"PF_FLOW_INIT_CONFIG can't be deserialized to json",
target=ErrorTarget.SERVING_APP,
)

logger.debug("Init params: " + str(self.init))

Expand Down
48 changes: 40 additions & 8 deletions src/promptflow-core/promptflow/executor/_script_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import dataclasses
import importlib
import inspect
import os.path
import uuid
from dataclasses import is_dataclass
from functools import partial
Expand Down Expand Up @@ -58,17 +59,30 @@ def __init__(
logger.debug(f"Start initializing the executor with {flow_file}.")
logger.debug(f"Init params for script executor: {init_kwargs}")

self._flow_file = flow_file
if connections and isinstance(connections, dict):
connections = DictConnectionProvider(connections)
self._connections = connections

self._flow_file = flow_file
entry = flow_file # Entry could be both a path or a callable
self._entry = entry
self._init_kwargs = init_kwargs or {}
if isinstance(entry, (str, Path)):
self._working_dir = Flow._resolve_working_dir(entry, working_dir)
else:
self._working_dir = working_dir or Path.cwd()

# load flow if possible
try:
flow_file = os.path.join(self._working_dir, self._flow_file)
with open(flow_file, "r", encoding="utf-8") as fin:
flow_data = load_yaml(fin)
flow = FlexFlow.deserialize(flow_data)
except Exception as e:
logger.debug(f"Failed to load flow from file {self._flow_file} with error: {e}")
flow = None
self._flow = flow

self._init_kwargs = self._apply_sample_init(init_kwargs=init_kwargs)
self._init_input_sign()
self._initialize_function()
self._storage = storage or DefaultRunStorage()
Expand Down Expand Up @@ -108,6 +122,7 @@ def exec_line(
**kwargs,
) -> LineResult:
run_id = run_id or str(uuid.uuid4())
inputs = self._apply_sample_inputs(inputs=inputs)
inputs = apply_default_value_for_input(self._inputs_sign, inputs)
with self._exec_line_context(run_id, index):
return self._exec_line(inputs, index, run_id, allow_generator_output=allow_generator_output)
Expand Down Expand Up @@ -273,6 +288,7 @@ async def exec_line_async(
**kwargs,
) -> LineResult:
run_id = run_id or str(uuid.uuid4())
inputs = self._apply_sample_inputs(inputs=inputs)
inputs = apply_default_value_for_input(self._inputs_sign, inputs)
with self._exec_line_context(run_id, index):
return await self._exec_line_async(inputs, index, run_id, allow_generator_output=allow_generator_output)
Expand Down Expand Up @@ -508,18 +524,34 @@ def _parse_flow_file(self):
return module_name, func_name

def _init_input_sign(self):
if not self.is_function_entry:
with open(self._working_dir / self._flow_file, "r", encoding="utf-8") as fin:
flow_dag = load_yaml(fin)
flow = FlexFlow.deserialize(flow_dag)
if not self.is_function_entry and self._flow is not None:
# In the yaml file, user can define the inputs and init signature for the flow, also SDK may create
# the signature and add them to the yaml file. We need to get the signature from the yaml file and
# used for applying default value and ensuring input type.
# If the default value is an empty string, we will assume user has defined the default value as None
# in python script. We will exclude it from signature.
self._inputs_sign = {k: v for k, v in flow.inputs.items() if v.default != ""}
self._init_sign = {k: v for k, v in flow.init.items() if v.default != ""}
self._inputs_sign = {k: v for k, v in self._flow.inputs.items() if v.default != ""}
self._init_sign = {k: v for k, v in self._flow.init.items() if v.default != ""}
else:
# TODO(3194196): support input signature for function entry.
# Since there is no yaml file for function entry, we set the inputs and init signature to empty dict.
self._inputs_sign = {}
self._init_sign = {}

def _apply_sample_init(self, init_kwargs: Mapping[str, Any]):
"""Apply sample init if init_kwargs not provided."""
if not init_kwargs and self._flow:
sample_init = self._flow.sample.get("init")
if sample_init:
logger.debug(f"Init kwargs are not provided, applying sample init: {sample_init}.")
return sample_init
return init_kwargs or {}

def _apply_sample_inputs(self, inputs: Mapping[str, Any]):
"""Apply sample inputs if inputs not provided."""
if not inputs and self._flow:
sample_inputs = self._flow.sample.get("inputs")
if sample_inputs:
logger.debug(f"Inputs are not provided, applying sample inputs: {sample_inputs}.")
return sample_inputs
return inputs or {}
24 changes: 24 additions & 0 deletions src/promptflow-core/tests/core/e2etests/test_eager_flow.py
Original file line number Diff line number Diff line change
Expand Up @@ -277,3 +277,27 @@ def test_get_function_name(self):
for (entry, _, _), expected_name in zip(function_entries, expected_names):
executor = FlowExecutor.create(entry, {})
assert executor._func_name == expected_name

@pytest.mark.parametrize(
"flow_folder, expected_output",
[
(
"flow_with_sample",
{
"func_input1": "val1",
"func_input2": "val2",
"line_number": 0,
"obj_input1": "val1",
"obj_input2": "val2",
},
),
("function_flow_with_sample", {"func_input1": "val1", "func_input2": "val2", "line_number": 0}),
],
)
def test_flow_with_sample(self, flow_folder, expected_output):
# when inputs & init not provided, will use sample field in flow file
flow_file = get_yaml_file(flow_folder, root=EAGER_FLOW_ROOT)
executor = FlowExecutor.create(flow_file=flow_file, connections={})
line_result = executor.exec_line(inputs={}, index=0)
assert line_result.run_info.error is None
assert line_result.output == expected_output
Original file line number Diff line number Diff line change
Expand Up @@ -425,7 +425,7 @@ def print_pf_version(with_azure: bool = False, ignore_none: bool = False):

class PromptflowIgnoreFile(IgnoreFile):
# TODO add more files to this list.
IGNORE_FILE = [".runs", "__pycache__"]
IGNORE_FILE = [".git", ".runs", "__pycache__"]

def __init__(self, prompt_flow_path: Union[Path, str]):
super(PromptflowIgnoreFile, self).__init__(prompt_flow_path)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -292,7 +292,9 @@ def _test(
init_kwargs=init,
collection=collection,
) as submitter:
inputs = inputs or load_inputs_from_sample(submitter.flow.sample)
# Only override sample inputs for prompty, flex flow and prompty has different sample format
if isinstance(flow, Prompty) and not inputs:
inputs = load_inputs_from_sample(submitter.flow.sample)
if isinstance(flow, FlexFlow) or isinstance(flow, Prompty):
# TODO(2897153): support chat eager flow
# set is chat flow to True to allow generator output
Expand Down Expand Up @@ -1138,17 +1140,18 @@ def _save(

if sample:
inputs = data.get("inputs", {})
if not isinstance(sample, dict):
sample_inputs = sample.get("inputs", {})
if not isinstance(sample_inputs, dict):
raise UserErrorException("Sample must be a dict.")
if not set(sample.keys()) == set(inputs.keys()):
if not set(sample_inputs.keys()) == set(inputs.keys()):
raise UserErrorException(
message_format="Sample keys {actual} do not match the inputs {expected}.",
actual=", ".join(sample.keys()),
actual=", ".join(sample_inputs.keys()),
expected=", ".join(inputs.keys()),
)
with open(target_flow_directory / SERVE_SAMPLE_JSON_PATH, "w", encoding=DEFAULT_ENCODING) as f:
json.dump(sample, f, indent=4)
data["sample"] = SERVE_SAMPLE_JSON_PATH
data["sample"] = f"${{file:{SERVE_SAMPLE_JSON_PATH}}}"
with open(target_flow_file, "w", encoding=DEFAULT_ENCODING):
dump_yaml(data, target_flow_file)

Expand Down
17 changes: 14 additions & 3 deletions src/promptflow-devkit/promptflow/_sdk/schemas/_flow.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,18 @@
# ---------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# ---------------------------------------------------------
from pathlib import Path

from marshmallow import ValidationError, fields, validate, validates_schema
from marshmallow import ValidationError, fields, pre_load, validate, validates_schema

from promptflow._constants import LANGUAGE_KEY, ConnectionType, FlowLanguage
from promptflow._proxy import ProxyFactory
from promptflow._sdk._constants import FlowType
from promptflow._sdk._constants import BASE_PATH_CONTEXT_KEY, FlowType
from promptflow._sdk.schemas._base import PatchedSchemaMeta, YamlFileSchema
from promptflow._sdk.schemas._fields import LocalPathField, NestedField
from promptflow.contracts.tool import ValueType
from promptflow.core._model_configuration import MODEL_CONFIG_NAME_2_CLASS
from promptflow.core._prompty_utils import resolve_references


class FlowInputSchema(metaclass=PatchedSchemaMeta):
Expand Down Expand Up @@ -108,7 +110,7 @@ class FlexFlowSchema(BaseFlowSchema):
inputs = fields.Dict(keys=fields.Str(), values=NestedField(FlexFlowInputSchema), required=False)
outputs = fields.Dict(keys=fields.Str(), values=NestedField(FlexFlowOutputSchema), required=False)
init = fields.Dict(keys=fields.Str(), values=NestedField(FlexFlowInitSchema), required=False)
sample = fields.Str()
sample = fields.Dict(keys=fields.Str(validate=validate.OneOf(["init", "inputs"])), required=False)
code = LocalPathField()

@validates_schema(skip_on_field_errors=False)
Expand All @@ -120,6 +122,15 @@ def validate_entry(self, data, **kwargs):
if not inspector_proxy.is_flex_flow_entry(data.get("entry", None)):
raise ValidationError(field_name="entry", message=f"Entry function {data['entry']} is not valid.")

@pre_load
def resolve_sample(self, data, **kwargs):
sample_dict = data.get("sample", {})
if sample_dict:
base_path = Path(self.context[BASE_PATH_CONTEXT_KEY])
sample_dict = resolve_references(origin=sample_dict, base_path=base_path)
data["sample"] = sample_dict
return data


class PromptySchema(BaseFlowSchema):
"""Schema for prompty."""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ class TestFlowSave:
}
},
},
"sample": {"text": "promptflow"},
"sample": {"inputs": {"text": "promptflow"}},
},
{
"inputs": {
Expand All @@ -114,7 +114,7 @@ class TestFlowSave:
"image": "python:3.8-slim",
"python_requirements_txt": "requirements",
},
"sample": "sample.json",
"sample": {"inputs": {"text": "promptflow"}},
},
id="hello_world.main",
),
Expand Down Expand Up @@ -317,7 +317,7 @@ def test_pf_save_succeed(self, save_args_overrides, request, expected_signature:
pytest.param(
{
"entry": "hello:hello_world",
"sample": {"non-exist": "promptflow"},
"sample": {"inputs": {"non-exist": "promptflow"}},
},
UserErrorException,
r"Sample keys non-exist do not match the inputs text.",
Expand Down Expand Up @@ -562,7 +562,9 @@ def test_public_save_with_path_sample(self):
with open(f"{tempdir}/sample.json", "w") as f:
json.dump(
{
"text": "promptflow",
"inputs": {
"text": "promptflow",
}
},
f,
)
Expand All @@ -574,7 +576,11 @@ def test_public_save_with_path_sample(self):
"type": "string",
}
},
"sample": "sample.json",
"sample": {
"inputs": {
"text": "promptflow",
}
},
}

def test_flow_save_file_code(self):
Expand Down
Loading

0 comments on commit f444a38

Please sign in to comment.