Merge branch 'main' into clwan/sample_v1_11

microsoft · May 14, 2024 · f444a38 · f444a38
2 parents 7b1e954 + f949fea
commit f444a38
Show file tree

Hide file tree

Showing 28 changed files with 294 additions and 30 deletions.
diff --git a/src/promptflow-core/promptflow/_core/token_provider.py b/src/promptflow-core/promptflow/_core/token_provider.py
@@ -4,8 +4,8 @@
 from promptflow._utils.credential_utils import get_default_azure_credential
 
 
-# to access azure ai services, we need to get the token with this audience
-COGNITIVE_AUDIENCE = "https://cognitiveservices.azure.com/"
+# to access azure ai services, we need to get the token with this scope
+COGNITIVE_SCOPE = "https://cognitiveservices.azure.com/.default"
 
 
 class TokenProviderABC(ABC):
@@ -48,5 +48,5 @@ def _init_instance(self):
             )
 
     def get_token(self):
-        audience = COGNITIVE_AUDIENCE
-        return self.credential.get_token(audience).token
+        scope = COGNITIVE_SCOPE
+        return self.credential.get_token(scope).token
diff --git a/src/promptflow-core/promptflow/contracts/flow.py b/src/promptflow-core/promptflow/contracts/flow.py
@@ -988,13 +988,16 @@ class FlexFlow(FlowBase):
     :type environment_variables: Dict[str, object]
     :param message_format: The message format type of the flow to represent different multimedia contracts.
     :type message_format: str
+    :param sample: Sample data for the flow. Will become default inputs & init kwargs if not provided.
+    :type sample: Dict[str, object]
     """
 
     init: Dict[str, FlowInputDefinition] = None
     program_language: str = FlowLanguage.Python
     environment_variables: Dict[str, object] = None
     # eager flow does not support multimedia contract currently, it is set to basic by default.
     message_format: str = MessageFormatType.BASIC
+    sample: Dict[str, dict] = None
 
     @staticmethod
     def deserialize(data: dict) -> "FlexFlow":
@@ -1017,6 +1020,7 @@ def deserialize(data: dict) -> "FlexFlow":
             init={name: FlowInitDefinition.deserialize(i) for name, i in init.items()},
             program_language=data.get(LANGUAGE_KEY, FlowLanguage.Python),
             environment_variables=data.get("environment_variables") or {},
+            sample=data.get("sample") or {},
         )
 
     @classmethod

diff --git a/src/promptflow-core/promptflow/core/_serving/_errors.py b/src/promptflow-core/promptflow/core/_serving/_errors.py
@@ -17,6 +17,10 @@ class MissingRequiredFlowInput(BadRequest):
     pass
 
 
+class InvalidFlowInitConfig(BadRequest):
+    pass
+
+
 class FlowConnectionError(UserErrorException):
     pass
 

diff --git a/src/promptflow-core/promptflow/core/_serving/app_base.py b/src/promptflow-core/promptflow/core/_serving/app_base.py
@@ -11,10 +11,12 @@
 from promptflow._utils.logger_utils import LoggerFactory
 from promptflow._utils.user_agent_utils import setup_user_agent_to_operation_context
 from promptflow.core import Flow
+from promptflow.core._serving._errors import InvalidFlowInitConfig
 from promptflow.core._serving.extension.extension_factory import ExtensionFactory
 from promptflow.core._serving.flow_invoker import AsyncFlowInvoker
 from promptflow.core._serving.utils import get_output_fields_to_remove, get_sample_json, load_feedback_swagger
 from promptflow.core._utils import init_executable
+from promptflow.exceptions import ErrorTarget
 from promptflow.storage._run_storage import DummyRunStorage
 
 from ..._constants import PF_FLOW_INIT_CONFIG
@@ -58,9 +60,16 @@ def init_app(self, **kwargs):
 
         self.init = kwargs.get("init", {})
         if not self.init:
-            init_params = os.environ.get(PF_FLOW_INIT_CONFIG, "{}")
-            init_dict: dict = json.loads(init_params)
-            self.init = init_dict
+            init_params = os.environ.get(PF_FLOW_INIT_CONFIG, None)
+            if init_params:
+                try:
+                    self.init = json.loads(init_params)
+                except Exception as e:
+                    self.logger.error(f"PF_FLOW_INIT_CONFIG can't be deserialized to json: {e}")
+                    raise InvalidFlowInitConfig(
+                        "PF_FLOW_INIT_CONFIG can't be deserialized to json",
+                        target=ErrorTarget.SERVING_APP,
+                    )
 
         logger.debug("Init params: " + str(self.init))
 

diff --git a/src/promptflow-core/promptflow/executor/_script_executor.py b/src/promptflow-core/promptflow/executor/_script_executor.py
@@ -2,6 +2,7 @@
 import dataclasses
 import importlib
 import inspect
+import os.path
 import uuid
 from dataclasses import is_dataclass
 from functools import partial
@@ -58,17 +59,30 @@ def __init__(
         logger.debug(f"Start initializing the executor with {flow_file}.")
         logger.debug(f"Init params for script executor: {init_kwargs}")
 
-        self._flow_file = flow_file
         if connections and isinstance(connections, dict):
             connections = DictConnectionProvider(connections)
         self._connections = connections
+
+        self._flow_file = flow_file
         entry = flow_file  # Entry could be both a path or a callable
         self._entry = entry
-        self._init_kwargs = init_kwargs or {}
         if isinstance(entry, (str, Path)):
             self._working_dir = Flow._resolve_working_dir(entry, working_dir)
         else:
             self._working_dir = working_dir or Path.cwd()
+
+        # load flow if possible
+        try:
+            flow_file = os.path.join(self._working_dir, self._flow_file)
+            with open(flow_file, "r", encoding="utf-8") as fin:
+                flow_data = load_yaml(fin)
+            flow = FlexFlow.deserialize(flow_data)
+        except Exception as e:
+            logger.debug(f"Failed to load flow from file {self._flow_file} with error: {e}")
+            flow = None
+        self._flow = flow
+
+        self._init_kwargs = self._apply_sample_init(init_kwargs=init_kwargs)
         self._init_input_sign()
         self._initialize_function()
         self._storage = storage or DefaultRunStorage()
@@ -108,6 +122,7 @@ def exec_line(
         **kwargs,
     ) -> LineResult:
         run_id = run_id or str(uuid.uuid4())
+        inputs = self._apply_sample_inputs(inputs=inputs)
         inputs = apply_default_value_for_input(self._inputs_sign, inputs)
         with self._exec_line_context(run_id, index):
             return self._exec_line(inputs, index, run_id, allow_generator_output=allow_generator_output)
@@ -273,6 +288,7 @@ async def exec_line_async(
         **kwargs,
     ) -> LineResult:
         run_id = run_id or str(uuid.uuid4())
+        inputs = self._apply_sample_inputs(inputs=inputs)
         inputs = apply_default_value_for_input(self._inputs_sign, inputs)
         with self._exec_line_context(run_id, index):
             return await self._exec_line_async(inputs, index, run_id, allow_generator_output=allow_generator_output)
@@ -508,18 +524,34 @@ def _parse_flow_file(self):
         return module_name, func_name
 
     def _init_input_sign(self):
-        if not self.is_function_entry:
-            with open(self._working_dir / self._flow_file, "r", encoding="utf-8") as fin:
-                flow_dag = load_yaml(fin)
-            flow = FlexFlow.deserialize(flow_dag)
+        if not self.is_function_entry and self._flow is not None:
             # In the yaml file, user can define the inputs and init signature for the flow, also SDK may create
             # the signature and add them to the yaml file. We need to get the signature from the yaml file and
             # used for applying default value and ensuring input type.
             # If the default value is an empty string, we will assume user has defined the default value as None
             # in python script. We will exclude it from signature.
-            self._inputs_sign = {k: v for k, v in flow.inputs.items() if v.default != ""}
-            self._init_sign = {k: v for k, v in flow.init.items() if v.default != ""}
+            self._inputs_sign = {k: v for k, v in self._flow.inputs.items() if v.default != ""}
+            self._init_sign = {k: v for k, v in self._flow.init.items() if v.default != ""}
         else:
+            # TODO(3194196): support input signature for function entry.
             # Since there is no yaml file for function entry, we set the inputs and init signature to empty dict.
             self._inputs_sign = {}
             self._init_sign = {}
+
+    def _apply_sample_init(self, init_kwargs: Mapping[str, Any]):
+        """Apply sample init if init_kwargs not provided."""
+        if not init_kwargs and self._flow:
+            sample_init = self._flow.sample.get("init")
+            if sample_init:
+                logger.debug(f"Init kwargs are not provided, applying sample init: {sample_init}.")
+                return sample_init
+        return init_kwargs or {}
+
+    def _apply_sample_inputs(self, inputs: Mapping[str, Any]):
+        """Apply sample inputs if inputs not provided."""
+        if not inputs and self._flow:
+            sample_inputs = self._flow.sample.get("inputs")
+            if sample_inputs:
+                logger.debug(f"Inputs are not provided, applying sample inputs: {sample_inputs}.")
+                return sample_inputs
+        return inputs or {}
diff --git a/src/promptflow-core/tests/core/e2etests/test_eager_flow.py b/src/promptflow-core/tests/core/e2etests/test_eager_flow.py
@@ -277,3 +277,27 @@ def test_get_function_name(self):
         for (entry, _, _), expected_name in zip(function_entries, expected_names):
             executor = FlowExecutor.create(entry, {})
             assert executor._func_name == expected_name
+
+    @pytest.mark.parametrize(
+        "flow_folder, expected_output",
+        [
+            (
+                "flow_with_sample",
+                {
+                    "func_input1": "val1",
+                    "func_input2": "val2",
+                    "line_number": 0,
+                    "obj_input1": "val1",
+                    "obj_input2": "val2",
+                },
+            ),
+            ("function_flow_with_sample", {"func_input1": "val1", "func_input2": "val2", "line_number": 0}),
+        ],
+    )
+    def test_flow_with_sample(self, flow_folder, expected_output):
+        # when inputs & init not provided, will use sample field in flow file
+        flow_file = get_yaml_file(flow_folder, root=EAGER_FLOW_ROOT)
+        executor = FlowExecutor.create(flow_file=flow_file, connections={})
+        line_result = executor.exec_line(inputs={}, index=0)
+        assert line_result.run_info.error is None
+        assert line_result.output == expected_output
diff --git a/src/promptflow-devkit/promptflow/_sdk/_utilities/general_utils.py b/src/promptflow-devkit/promptflow/_sdk/_utilities/general_utils.py
@@ -425,7 +425,7 @@ def print_pf_version(with_azure: bool = False, ignore_none: bool = False):
 
 class PromptflowIgnoreFile(IgnoreFile):
     # TODO add more files to this list.
-    IGNORE_FILE = [".runs", "__pycache__"]
+    IGNORE_FILE = [".git", ".runs", "__pycache__"]
 
     def __init__(self, prompt_flow_path: Union[Path, str]):
         super(PromptflowIgnoreFile, self).__init__(prompt_flow_path)

diff --git a/src/promptflow-devkit/promptflow/_sdk/operations/_flow_operations.py b/src/promptflow-devkit/promptflow/_sdk/operations/_flow_operations.py
@@ -292,7 +292,9 @@ def _test(
             init_kwargs=init,
             collection=collection,
         ) as submitter:
-            inputs = inputs or load_inputs_from_sample(submitter.flow.sample)
+            # Only override sample inputs for prompty, flex flow and prompty has different sample format
+            if isinstance(flow, Prompty) and not inputs:
+                inputs = load_inputs_from_sample(submitter.flow.sample)
             if isinstance(flow, FlexFlow) or isinstance(flow, Prompty):
                 # TODO(2897153): support chat eager flow
                 # set is chat flow to True to allow generator output
@@ -1138,17 +1140,18 @@ def _save(
 
         if sample:
             inputs = data.get("inputs", {})
-            if not isinstance(sample, dict):
+            sample_inputs = sample.get("inputs", {})
+            if not isinstance(sample_inputs, dict):
                 raise UserErrorException("Sample must be a dict.")
-            if not set(sample.keys()) == set(inputs.keys()):
+            if not set(sample_inputs.keys()) == set(inputs.keys()):
                 raise UserErrorException(
                     message_format="Sample keys {actual} do not match the inputs {expected}.",
-                    actual=", ".join(sample.keys()),
+                    actual=", ".join(sample_inputs.keys()),
                     expected=", ".join(inputs.keys()),
                 )
             with open(target_flow_directory / SERVE_SAMPLE_JSON_PATH, "w", encoding=DEFAULT_ENCODING) as f:
                 json.dump(sample, f, indent=4)
-            data["sample"] = SERVE_SAMPLE_JSON_PATH
+            data["sample"] = f"${{file:{SERVE_SAMPLE_JSON_PATH}}}"
         with open(target_flow_file, "w", encoding=DEFAULT_ENCODING):
             dump_yaml(data, target_flow_file)
 

diff --git a/src/promptflow-devkit/promptflow/_sdk/schemas/_flow.py b/src/promptflow-devkit/promptflow/_sdk/schemas/_flow.py
@@ -1,16 +1,18 @@
 # ---------------------------------------------------------
 # Copyright (c) Microsoft Corporation. All rights reserved.
 # ---------------------------------------------------------
+from pathlib import Path
 
-from marshmallow import ValidationError, fields, validate, validates_schema
+from marshmallow import ValidationError, fields, pre_load, validate, validates_schema
 
 from promptflow._constants import LANGUAGE_KEY, ConnectionType, FlowLanguage
 from promptflow._proxy import ProxyFactory
-from promptflow._sdk._constants import FlowType
+from promptflow._sdk._constants import BASE_PATH_CONTEXT_KEY, FlowType
 from promptflow._sdk.schemas._base import PatchedSchemaMeta, YamlFileSchema
 from promptflow._sdk.schemas._fields import LocalPathField, NestedField
 from promptflow.contracts.tool import ValueType
 from promptflow.core._model_configuration import MODEL_CONFIG_NAME_2_CLASS
+from promptflow.core._prompty_utils import resolve_references
 
 
 class FlowInputSchema(metaclass=PatchedSchemaMeta):
@@ -108,7 +110,7 @@ class FlexFlowSchema(BaseFlowSchema):
     inputs = fields.Dict(keys=fields.Str(), values=NestedField(FlexFlowInputSchema), required=False)
     outputs = fields.Dict(keys=fields.Str(), values=NestedField(FlexFlowOutputSchema), required=False)
     init = fields.Dict(keys=fields.Str(), values=NestedField(FlexFlowInitSchema), required=False)
-    sample = fields.Str()
+    sample = fields.Dict(keys=fields.Str(validate=validate.OneOf(["init", "inputs"])), required=False)
     code = LocalPathField()
 
     @validates_schema(skip_on_field_errors=False)
@@ -120,6 +122,15 @@ def validate_entry(self, data, **kwargs):
         if not inspector_proxy.is_flex_flow_entry(data.get("entry", None)):
             raise ValidationError(field_name="entry", message=f"Entry function {data['entry']} is not valid.")
 
+    @pre_load
+    def resolve_sample(self, data, **kwargs):
+        sample_dict = data.get("sample", {})
+        if sample_dict:
+            base_path = Path(self.context[BASE_PATH_CONTEXT_KEY])
+            sample_dict = resolve_references(origin=sample_dict, base_path=base_path)
+            data["sample"] = sample_dict
+        return data
+
 
 class PromptySchema(BaseFlowSchema):
     """Schema for prompty."""

diff --git a/src/promptflow-devkit/tests/sdk_cli_test/e2etests/test_flow_save.py b/src/promptflow-devkit/tests/sdk_cli_test/e2etests/test_flow_save.py
@@ -101,7 +101,7 @@ class TestFlowSave:
                             }
                         },
                     },
-                    "sample": {"text": "promptflow"},
+                    "sample": {"inputs": {"text": "promptflow"}},
                 },
                 {
                     "inputs": {
@@ -114,7 +114,7 @@ class TestFlowSave:
                         "image": "python:3.8-slim",
                         "python_requirements_txt": "requirements",
                     },
-                    "sample": "sample.json",
+                    "sample": {"inputs": {"text": "promptflow"}},
                 },
                 id="hello_world.main",
             ),
@@ -317,7 +317,7 @@ def test_pf_save_succeed(self, save_args_overrides, request, expected_signature:
             pytest.param(
                 {
                     "entry": "hello:hello_world",
-                    "sample": {"non-exist": "promptflow"},
+                    "sample": {"inputs": {"non-exist": "promptflow"}},
                 },
                 UserErrorException,
                 r"Sample keys non-exist do not match the inputs text.",
@@ -562,7 +562,9 @@ def test_public_save_with_path_sample(self):
             with open(f"{tempdir}/sample.json", "w") as f:
                 json.dump(
                     {
-                        "text": "promptflow",
+                        "inputs": {
+                            "text": "promptflow",
+                        }
                     },
                     f,
                 )
@@ -574,7 +576,11 @@ def test_public_save_with_path_sample(self):
                         "type": "string",
                     }
                 },
-                "sample": "sample.json",
+                "sample": {
+                    "inputs": {
+                        "text": "promptflow",
+                    }
+                },
             }
 
     def test_flow_save_file_code(self):
-Original file line number
+Diff line change
@@ Expand Up / @@ -17,6 +17,10 @@ class MissingRequiredFlowInput(BadRequest): @@
         pass
+    class InvalidFlowInitConfig(BadRequest):
+        pass
     class FlowConnectionError(UserErrorException):
         pass
@@ Expand Down @@