From 6a57ceac4ccb0731e7abebce155a1d36f088aefc Mon Sep 17 00:00:00 2001 From: Nagkumar Arkalgud Date: Wed, 6 Mar 2024 18:06:00 -0800 Subject: [PATCH 01/11] Reverting models to make sure calls to the simulator work --- .../simulator/_model_tools/models.py | 510 ++++++++---------- 1 file changed, 225 insertions(+), 285 deletions(-) diff --git a/sdk/ai/azure-ai-generative/azure/ai/generative/synthetic/simulator/_model_tools/models.py b/sdk/ai/azure-ai-generative/azure/ai/generative/synthetic/simulator/_model_tools/models.py index 104f8af11a3b..931830aa7a80 100644 --- a/sdk/ai/azure-ai-generative/azure/ai/generative/synthetic/simulator/_model_tools/models.py +++ b/sdk/ai/azure-ai-generative/azure/ai/generative/synthetic/simulator/_model_tools/models.py @@ -1,8 +1,7 @@ # --------------------------------------------------------- # Copyright (c) Microsoft Corporation. All rights reserved. # --------------------------------------------------------- - -from ast import literal_eval +# pylint: skip-file import copy import time import asyncio @@ -10,12 +9,12 @@ import logging from urllib.parse import urlparse from abc import ABC, abstractmethod -from typing import Deque, Dict, List, Optional, Union +from typing import Deque, Dict, List, Optional, Union, Sized from collections import deque -from aiohttp import TraceConfig # pylint: disable=networking-import-outside-azure-core-transport -from aiohttp.web import HTTPException # pylint: disable=networking-import-outside-azure-core-transport -from aiohttp_retry import RetryClient, RandomRetry # pylint: disable=networking-import-outside-azure-core-transport +from aiohttp import TraceConfig +from aiohttp.web import HTTPException +from aiohttp_retry import RetryClient, RandomRetry from .identity_manager import APITokenManager from .images import replace_prompt_captions, format_multimodal_prompt @@ -25,24 +24,18 @@ MAX_TIME_TAKEN_RECORDS = 20_000 -def get_model_class_from_url(endpoint_url: str) -> type: - """ - Convert an endpoint URL to the appropriate model class. - - :param endpoint_url: The URL of the endpoint. - :type endpoint_url: str - :return: The model class corresponding to the endpoint URL. - :rtype: type - """ +def get_model_class_from_url(endpoint_url: str): + '''Convert an endpoint URL to the appropriate model class.''' endpoint_path = urlparse(endpoint_url).path # remove query params if endpoint_path.endswith("chat/completions"): return OpenAIChatCompletionsModel - if "/rainbow" in endpoint_path: + elif "/rainbow" in endpoint_path: return OpenAIMultiModalCompletionsModel - if endpoint_path.endswith("completions"): + elif endpoint_path.endswith("completions"): return OpenAICompletionsModel - raise ValueError(f"Unknown API type for endpoint {endpoint_url}") + else: + raise ValueError(f"Unknown API type for endpoint {endpoint_url}") # ===================== HTTP Retry ====================== @@ -58,44 +51,43 @@ def __init__(self, n_retry, retry_timeout, logger, retry_options=None): trace_config.on_request_end.append(self.on_request_end) if retry_options is None: retry_options = RandomRetry( # set up retry configuration - statuses=[104, 408, 409, 424, 429, 500, 502, 503, 504], # on which statuses to retry + statuses=[104, 408, 409, 424, 429, 500, 502, + 503, 504], # on which statuses to retry attempts=n_retry, min_timeout=retry_timeout, max_timeout=retry_timeout, ) - self.client = RetryClient(trace_configs=[trace_config], retry_options=retry_options) + self.client = RetryClient( + trace_configs=[trace_config], retry_options=retry_options) - async def on_request_start(self, trace_config_ctx, params): + async def on_request_start(self, session, trace_config_ctx, params): current_attempt = trace_config_ctx.trace_request_ctx["current_attempt"] - self.logger.info("[ATTEMPT %s] Sending %s request to %s" % (current_attempt, params.method, params.url)) + self.logger.info("[ATTEMPT %s] Sending %s request to %s" % ( + current_attempt, params.method, params.url + )) - async def on_request_end(self, trace_config_ctx, params): + async def on_request_end(self, session, trace_config_ctx, params): current_attempt = trace_config_ctx.trace_request_ctx["current_attempt"] request_headers = dict(params.response.request_info.headers) if "Authorization" in request_headers: del request_headers["Authorization"] # hide auth token from logs if "api-key" in request_headers: del request_headers["api-key"] - self.logger.info( - "[ATTEMPT %s] For %s request to %s, received response with status %s and request headers: %s" - % (current_attempt, params.method, params.url, params.response.status, request_headers) - ) - + self.logger.info("[ATTEMPT %s] For %s request to %s, received response with status %s and request headers: %s" % ( + current_attempt, params.method, params.url, params.response.status, request_headers + )) # =========================================================== # ===================== LLMBase Class ======================= # =========================================================== - class LLMBase(ABC): - """ + ''' Base class for all LLM models. - """ + ''' - def __init__(self, endpoint_url: str, name: str = "unknown", additional_headers: Optional[dict] = None): - if additional_headers is None: - additional_headers = {} + def __init__(self, endpoint_url: str, name: str = "unknown", additional_headers: Optional[dict] = {}): self.endpoint_url = endpoint_url self.name = name self.additional_headers = additional_headers @@ -103,7 +95,7 @@ def __init__(self, endpoint_url: str, name: str = "unknown", additional_headers: # Metric tracking self.lock = asyncio.Lock() - self.response_times: Deque[Union[int, float]] = deque(maxlen=MAX_TIME_TAKEN_RECORDS) + self.response_times: Deque[Union[int, float]] = deque(maxlen=MAX_TIME_TAKEN_RECORDS) self.step = 0 self.error_count = 0 @@ -121,17 +113,15 @@ async def get_completion( session: RetryClient, **request_params, ) -> dict: - """ + ''' Query the model a single time with a prompt. - :param prompt: Prompt str to query model with. - :type prompt: str - :param session: aiohttp RetryClient object to use for the request. - :type session: RetryClient - :keyword **request_params: Additional parameters to pass to the request. - :return: Dictionary containing the completion response from the model. - :rtype: dict - """ + Parameters + ---------- + prompt: Prompt str to query model with. + session: aiohttp RetryClient object to use for the request. + **request_params: Additional parameters to pass to the request. + ''' request_data = self.format_request_data(prompt, **request_params) return await self.request_api( session=session, @@ -180,7 +170,7 @@ async def request_api_parallel( pass def _log_request(self, request: dict) -> None: - self.logger.info("Request: %s", request) + self.logger.info(f"Request: {request}") async def _add_successful_response(self, time_taken: Union[int, float]) -> None: async with self.lock: @@ -220,37 +210,28 @@ def __repr__(self): # ================== OpenAICompletions ====================== # =========================================================== - -class OpenAICompletionsModel(LLMBase): # pylint: disable=too-many-instance-attributes - """ +class OpenAICompletionsModel(LLMBase): + ''' Object for calling a Completions-style API for OpenAI models. - """ - + ''' prompt_idx_key = "__prompt_idx__" max_stop_tokens = 4 stop_tokens = ["<|im_end|>", "<|endoftext|>"] model_param_names = [ - "model", - "temperature", - "max_tokens", - "top_p", - "n", - "frequency_penalty", - "presence_penalty", - "stop", + "model", "temperature", "max_tokens", "top_p", "n", + "frequency_penalty", "presence_penalty", "stop" ] CHAT_START_TOKEN = "<|im_start|>" CHAT_END_TOKEN = "<|im_end|>" def __init__( - self, - *, + self, *, endpoint_url: str, - name: str = "OpenAICompletionsModel", - additional_headers: Optional[dict] = None, + name: str = 'OpenAICompletionsModel', + additional_headers: Optional[dict] = {}, api_version: Optional[str] = "2023-03-15-preview", token_manager: APITokenManager, azureml_model_deployment: Optional[str] = None, @@ -262,12 +243,9 @@ def __init__( frequency_penalty: Optional[float] = 0, presence_penalty: Optional[float] = 0, stop: Optional[Union[List[str], str]] = None, - image_captions: Optional[Dict[str, str]] = None, - # pylint: disable=unused-argument + image_captions: Dict[str, str] = {}, images_dir: Optional[str] = None, # Note: unused, kept for class compatibility ): - if additional_headers is None: - additional_headers = {} super().__init__(endpoint_url=endpoint_url, name=name, additional_headers=additional_headers) self.api_version = api_version self.token_manager = token_manager @@ -279,15 +257,15 @@ def __init__( self.n = n self.frequency_penalty = frequency_penalty self.presence_penalty = presence_penalty - self.image_captions = image_captions if image_captions is not None else {} + self.image_captions = image_captions # Default stop to end token if not provided if not stop: stop = [] # Else if stop sequence is given as a string (Ex: "["\n", ""]"), convert - elif isinstance(stop, str) and stop.startswith("[") and stop.endswith("]"): - stop = literal_eval(stop) - elif isinstance(stop, str): + elif type(stop) is str and stop.startswith('[') and stop.endswith(']'): + stop = eval(stop) + elif type(stop) is str: stop = [stop] self.stop: List = stop # type: ignore[assignment] @@ -299,25 +277,19 @@ def __init__( self.stop.append(token) if top_p not in [None, 1.0] and temperature is not None: - self.logger.warning( - "Both top_p and temperature are set. OpenAI advises against using both at the same time." - ) + self.logger.warning("Both top_p and temperature are set. OpenAI advises against using both at the same time.") + + self.logger.info(f"Default model settings: {self.get_model_params()}") - self.logger.info("Default model settings: %s", self.get_model_params()) def get_model_params(self): return {param: getattr(self, param) for param in self.model_param_names if getattr(self, param) is not None} + def format_request_data(self, prompt: str, **request_params) -> Dict[str, str]: - """ + ''' Format the request data for the OpenAI API. - - :param prompt: The prompt string. - :type prompt: str - :keyword request_params: Additional parameters to pass to the model. - :return: The formatted request data. - :rtype: Dict[str, str] - """ + ''' # Caption images if available if len(self.image_captions.keys()): prompt = replace_prompt_captions( @@ -329,6 +301,7 @@ def format_request_data(self, prompt: str, **request_params) -> Dict[str, str]: request_data.update(request_params) return request_data + async def get_conversation_completion( self, messages: List[dict], @@ -336,20 +309,16 @@ async def get_conversation_completion( role: str = "assistant", **request_params, ) -> dict: - """ + ''' Query the model a single time with a message. - :param messages: List of messages to query the model with. - Expected format: [{"role": "user", "content": "Hello!"}, ...] - :type messages: List[dict] - :param session: aiohttp RetryClient object to query the model with. - :type session: RetryClient - :param role: Role of the user sending the message. - :type role: str - :keyword request_params: Additional parameters to pass to the model. - :return: Dictionary containing the completion response from the model. - :rtype: dict - """ + Parameters + ---------- + messages: List of messages to query the model with. Expected format: [{"role": "user", "content": "Hello!"}, ...] + session: aiohttp RetryClient object to query the model with. + role: Role of the user sending the message. + request_params: Additional parameters to pass to the model. + ''' prompt = [] for message in messages: prompt.append(f"{self.CHAT_START_TOKEN}{message['role']}\n{message['content']}\n{self.CHAT_END_TOKEN}\n") @@ -362,6 +331,7 @@ async def get_conversation_completion( **request_params, ) + async def get_all_completions( # type: ignore[override] self, prompts: List[Dict[str, str]], @@ -371,32 +341,25 @@ async def get_all_completions( # type: ignore[override] request_error_rate_threshold: float = 0.5, **request_params, ) -> List[dict]: - """ + ''' Run a batch of prompts through the model and return the results in the order given. - :param prompts: List of prompts to query the model with. - :type prompts: List[Dict[str, str]] - :param session: aiohttp RetryClient to use for the request. - :type session: RetryClient - :param api_call_max_parallel_count: Number of parallel requests to make to the API. - :type api_call_max_parallel_count: int - :param api_call_delay_seconds: Number of seconds to wait between API requests. - :type api_call_delay_seconds: float - :param request_error_rate_threshold: Maximum error rate allowed before raising an error. - :type request_error_rate_threshold: float - :keyword request_params: Additional parameters to pass to the API. - :return: List of completion results. - :rtype: List[dict] - """ + Parameters + ---------- + prompts: List of prompts to query the model with. + session: aiohttp RetryClient to use for the request. + api_call_max_parallel_count: Number of parallel requests to make to the API. + api_call_delay_seconds: Number of seconds to wait between API requests. + request_error_rate_threshold: Maximum error rate allowed before raising an error. + request_params: Additional parameters to pass to the API. + ''' if api_call_max_parallel_count > 1: - self.logger.info("Using %s parallel workers to query the API..", api_call_max_parallel_count) + self.logger.info(f"Using {api_call_max_parallel_count} parallel workers to query the API..") # Format prompts and tag with index request_datas: List[Dict] = [] for idx, prompt in enumerate(prompts): - prompt: Dict[str, str] = self.format_request_data( # type: ignore[no-redef] - prompt, **request_params # type: ignore[arg-type] - ) + prompt: Dict[str, str] = self.format_request_data(prompt, **request_params) # type: ignore[no-redef,arg-type] prompt[self.prompt_idx_key] = idx # type: ignore[assignment] request_datas.append(prompt) @@ -406,22 +369,21 @@ async def get_all_completions( # type: ignore[override] output_collector: List = [] tasks = [ # create a set of worker-tasks to query inference endpoint in parallel - asyncio.create_task( - self.request_api_parallel( - request_datas=request_datas, - output_collector=output_collector, - session=session, - api_call_delay_seconds=api_call_delay_seconds, - request_error_rate_threshold=request_error_rate_threshold, - ) - ) + asyncio.create_task(self.request_api_parallel( + request_datas=request_datas, + output_collector=output_collector, + session=session, + api_call_delay_seconds=api_call_delay_seconds, + request_error_rate_threshold=request_error_rate_threshold, + )) for _ in range(api_call_max_parallel_count) ] # Await the completion of all tasks, and propagate any exceptions await asyncio.gather(*tasks, return_exceptions=False) - if request_datas: - raise RuntimeError("All inference tasks were finished, but the queue is not empty") + if len(request_datas): + raise RuntimeError( + "All inference tasks were finished, but the queue is not empty") # Output results back to the caller output_collector.sort(key=lambda x: x[self.prompt_idx_key]) @@ -429,6 +391,7 @@ async def get_all_completions( # type: ignore[override] output.pop(self.prompt_idx_key) return output_collector + async def request_api_parallel( self, request_datas: List[dict], @@ -439,17 +402,7 @@ async def request_api_parallel( ) -> None: """ Query the model for all prompts given as a list and append the output to output_collector. - - :param request_datas: List of request data dictionaries. - :type request_datas: List[dict] - :param output_collector: List to store the output. - :type output_collector: List - :param session: RetryClient session. - :type session: RetryClient - :param api_call_delay_seconds: Delay between consecutive API calls in seconds. - :type api_call_delay_seconds: float, optional - :param request_error_rate_threshold: Threshold for request error rate. - :type request_error_rate_threshold: float, optional + No return value, output_collector is modified in place. """ logger_tasks: List = [] # to await for logging to finish @@ -463,27 +416,25 @@ async def request_api_parallel( session=session, request_data=request_data, ) - await self._add_successful_response(response["time_taken"]) - except HTTPException as e: + await self._add_successful_response(response['time_taken']) + except Exception as e: response = { "request": request_data, "response": { "finish_reason": "error", "error": str(e), - }, + } } await self._add_error() - self.logger.exception("Errored on prompt #%s", str(prompt_idx)) + self.logger.exception(f"Errored on prompt #{prompt_idx}") # if we count too many errors, we stop and raise an exception response_count = await self.get_response_count() error_rate = await self.get_error_rate() if response_count >= MIN_ERRORS_TO_FAIL and error_rate >= request_error_rate_threshold: - error_msg = ( - f"Error rate is more than {request_error_rate_threshold:.0%} -- something is broken!" - ) - raise Exception(error_msg) from e + error_msg = f"Error rate is more than {request_error_rate_threshold:.0%} -- something is broken!" + raise Exception(error_msg) response[self.prompt_idx_key] = prompt_idx output_collector.append(response) @@ -496,6 +447,7 @@ async def request_api_parallel( await asyncio.gather(*logger_tasks) return + async def request_api( self, session: RetryClient, @@ -504,22 +456,20 @@ async def request_api( """ Request the model with a body of data. - :param session: HTTPS Session for invoking the endpoint. - :type session: RetryClient - :param request_data: Prompt dictionary to query the model with. (Pass {"prompt": prompt} instead of prompt.) - :type request_data: dict - :return: Response from the model. - :rtype: dict + Parameters + ---------- + session: HTTPS Session for invoking the endpoint. + request_data: Prompt dictionary to query the model with. (Pass {"prompt": prompt} instead of prompt.) """ self._log_request(request_data) token = await self.token_manager.get_token() - + headers = { "Content-Type": "application/json", "X-CV": f"{uuid.uuid4()}", - "X-ModelType": self.model or "", + "X-ModelType": self.model or '', } if self.token_manager.auth_header == "Bearer": @@ -542,21 +492,24 @@ async def request_api( time_start = time.time() full_response = None - async with session.post(url=self.endpoint_url, headers=headers, json=request_data, params=params) as response: + async with session.post( + url=self.endpoint_url, + headers=headers, + json=request_data, + params=params + ) as response: if response.status == 200: response_data = await response.json() - self.logger.info("Response: %s", response_data) + self.logger.info(f"Response: {response_data}") # Copy the full response and return it to be saved in jsonl. full_response = copy.copy(response_data) time_taken = time.time() - time_start - parsed_response = self._parse_response(response_data) + parsed_response = self._parse_response(response_data, request_data=request_data) else: - raise HTTPException( - reason="Received unexpected HTTP status: {} {}".format(response.status, await response.text()) - ) + raise HTTPException(reason=f"Received unexpected HTTP status: {response.status} {await response.text()}") return { "request": request_data, @@ -565,7 +518,7 @@ async def request_api( "full_response": full_response, } - def _parse_response(self, response_data: dict) -> dict: + def _parse_response(self, response_data: dict, request_data: Optional[dict] = None) -> dict: # https://platform.openai.com/docs/api-reference/completions samples = [] finish_reason = [] @@ -575,36 +528,40 @@ def _parse_response(self, response_data: dict) -> dict: if "finish_reason" in choice: finish_reason.append(choice["finish_reason"]) - return {"samples": samples, "finish_reason": finish_reason, "id": response_data["id"]} - + return { + "samples": samples, + "finish_reason": finish_reason, + "id": response_data["id"] + } # =========================================================== # ============== OpenAIChatCompletionsModel ================= # =========================================================== - class OpenAIChatCompletionsModel(OpenAICompletionsModel): - """ + ''' OpenAIChatCompletionsModel is a wrapper around OpenAICompletionsModel that formats the prompt for chat completion. - """ - # pylint: disable=keyword-arg-before-vararg - def __init__(self, name="OpenAIChatCompletionsModel", *args, **kwargs): + ''' + + def __init__(self, name='OpenAIChatCompletionsModel', *args, **kwargs): super().__init__(name=name, *args, **kwargs) - def format_request_data(self, prompt: List[dict], **request_params): # type: ignore[override] + + def format_request_data(self, messages: List[dict], **request_params): # type: ignore[override] # Caption images if available if len(self.image_captions.keys()): - for message in prompt: - message["content"] = replace_prompt_captions( - message["content"], + for message in messages: + message['content'] = replace_prompt_captions( + message['content'], captions=self.image_captions, ) - request_data = {"messages": prompt, **self.get_model_params()} + request_data = {"messages": messages, **self.get_model_params()} request_data.update(request_params) return request_data + async def get_conversation_completion( self, messages: List[dict], @@ -612,20 +569,16 @@ async def get_conversation_completion( role: str = "assistant", **request_params, ) -> dict: - """ + ''' Query the model a single time with a message. - :param messages: List of messages to query the model with. - Expected format: [{"role": "user", "content": "Hello!"}, ...] - :type messages: List[dict] - :param session: aiohttp RetryClient object to query the model with. - :type session: RetryClient - :param role: Not used for this model, since it is a chat model. - :type role: str - :keyword **request_params: Additional parameters to pass to the model. - :return: Dictionary containing the completion response. - :rtype: dict - """ + Parameters + ---------- + messages: List of messages to query the model with. Expected format: [{"role": "user", "content": "Hello!"}, ...] + session: aiohttp RetryClient object to query the model with. + role: Not used for this model, since it is a chat model. + request_params: Additional parameters to pass to the model. + ''' request_data = self.format_request_data( messages=messages, **request_params, @@ -635,31 +588,34 @@ async def get_conversation_completion( request_data=request_data, ) + async def get_completion( self, prompt: str, session: RetryClient, **request_params, ) -> dict: - """ - Query a ChatCompletions model with a single prompt. - - :param prompt: Prompt str to query model with. - :type prompt: str - :param session: aiohttp RetryClient object to use for the request. - :type session: RetryClient - :keyword **request_params: Additional parameters to pass to the request. - :return: Dictionary containing the completion response. - :rtype: dict - """ + ''' + Query a ChatCompletions model with a single prompt. Note: entire message will be inserted into a "system" call. + + Parameters + ---------- + prompt: Prompt str to query model with. + session: aiohttp RetryClient object to use for the request. + **request_params: Additional parameters to pass to the request. + ''' messages = [{"role": "system", "content": prompt}] - request_data = self.format_request_data(messages=messages, **request_params) + request_data = self.format_request_data( + messages=messages, + **request_params + ) return await self.request_api( session=session, request_data=request_data, ) + async def get_all_completions( self, prompts: List[str], # type: ignore[override] @@ -680,34 +636,36 @@ async def get_all_completions( **request_params, ) - def _parse_response(self, response_data: dict) -> dict: + + def _parse_response(self, response_data: dict, request_data: Optional[dict] = None) -> dict: # https://platform.openai.com/docs/api-reference/chat samples = [] finish_reason = [] for choice in response_data["choices"]: - if "message" in choice and "content" in choice["message"]: - samples.append(choice["message"]["content"]) - if "message" in choice and "finish_reason" in choice["message"]: - finish_reason.append(choice["message"]["finish_reason"]) - - return {"samples": samples, "finish_reason": finish_reason, "id": response_data["id"]} + if 'message' in choice and 'content' in choice['message']: + samples.append(choice['message']['content']) + if 'message' in choice and 'finish_reason' in choice['message']: + finish_reason.append(choice['message']['finish_reason']) + return { + "samples": samples, + "finish_reason": finish_reason, + "id": response_data["id"] + } # =========================================================== # =========== OpenAIMultiModalCompletionsModel ============== # =========================================================== - class OpenAIMultiModalCompletionsModel(OpenAICompletionsModel): - """ + ''' Wrapper around OpenAICompletionsModel that formats the prompt for multimodal completions containing images. - """ - + ''' model_param_names = ["temperature", "max_tokens", "top_p", "n", "stop"] - # pylint: disable=keyword-arg-before-vararg - def __init__(self, name="OpenAIMultiModalCompletionsModel", images_dir: Optional[str] = None, *args, **kwargs): + + def __init__(self, name='OpenAIMultiModalCompletionsModel', images_dir: Optional[str] = None, *args, **kwargs): self.images_dir = images_dir super().__init__(name=name, *args, **kwargs) @@ -723,18 +681,15 @@ def format_request_data(self, prompt: str, **request_params) -> dict: request.update(request_params) return request - def _log_request(self, request: dict) -> None: - """ - Log prompt, ignoring image data if multimodal. - :param request: The request dictionary. - :type request: dict - """ + def _log_request(self, request: dict) -> None: + '''Log prompt, ignoring image data if multimodal.''' loggable_prompt_transcript = { - "transcript": [ - (c if c["type"] != "image" else {"type": "image", "data": "..."}) for c in request["transcript"] + 'transcript': [ + (c if c['type'] != 'image' else {'type': 'image', 'data': '...'}) + for c in request['transcript'] ], - **{k: v for k, v in request.items() if k != "transcript"}, + **{k: v for k, v in request.items() if k != 'transcript'} } super()._log_request(loggable_prompt_transcript) @@ -743,27 +698,21 @@ def _log_request(self, request: dict) -> None: # ============== LLAMA CompletionsModel ===================== # =========================================================== - class LLAMACompletionsModel(OpenAICompletionsModel): - """ + ''' Object for calling a Completions-style API for LLAMA models. - """ - # pylint: disable=keyword-arg-before-vararg - def __init__(self, name: str = "LLAMACompletionsModel", *args, **kwargs): + ''' + + def __init__( + self, name: str = 'LLAMACompletionsModel', *args, **kwargs): super().__init__(name=name, *args, **kwargs) # set authentication header to Bearer, as llama apis always uses the bearer auth_header self.token_manager.auth_header = "Bearer" def format_request_data(self, prompt: str, **request_params): - """ + ''' Format the request data for the OpenAI API. - - :param prompt: The prompt string. - :type prompt: str - :keyword request_params: Additional request parameters. - :return: The formatted request data. - :rtype: dict - """ + ''' # Caption images if available if len(self.image_captions.keys()): prompt = replace_prompt_captions( @@ -774,21 +723,20 @@ def format_request_data(self, prompt: str, **request_params): request_data = { "input_data": { "input_string": [prompt], - "parameters": {"temperature": self.temperature, "max_gen_len": self.max_tokens}, + "parameters": {"temperature": self.temperature, "max_gen_len": self.max_tokens} } } request_data.update(request_params) return request_data - # pylint: disable=arguments-differ def _parse_response(self, response_data: dict, request_data: dict) -> dict: # type: ignore[override] - prompt = request_data["input_data"]["input_string"][0] + prompt = request_data['input_data']['input_string'][0] # remove prompt text from each response as llama model returns prompt + completion instead of only completion # remove any text after the stop tokens, since llama doesn't support stop token - for idx, _ in enumerate(response_data["samples"]): - response_data["samples"][idx] = response_data["samples"][idx].replace(prompt, "").strip() + for idx, response in enumerate(response_data["samples"]): + response_data["samples"][idx] = response_data["samples"][idx].replace(prompt, '').strip() for stop_token in self.stop: if stop_token in response_data["samples"][idx]: response_data["samples"][idx] = response_data["samples"][idx].split(stop_token)[0].strip() @@ -798,7 +746,7 @@ def _parse_response(self, response_data: dict, request_data: dict) -> dict: # t for choice in response_data: if "0" in choice: samples.append(choice["0"]) - finish_reason.append("Stop") + finish_reason.append('Stop') return { "samples": samples, @@ -810,75 +758,68 @@ def _parse_response(self, response_data: dict, request_data: dict) -> dict: # t # ============== LLAMA ChatCompletionsModel ================= # =========================================================== class LLAMAChatCompletionsModel(LLAMACompletionsModel): - """ + ''' LLaMa ChatCompletionsModel is a wrapper around LLaMaCompletionsModel that formats the prompt for chat completion. - This chat completion model should be only used as assistant, - and shouldn't be used to simulate user. It is not possible - to pass a system prompt do describe how the model would behave, - So we only use the model as assistant to reply for questions made by GPT simulated users. - """ - # pylint: disable=keyword-arg-before-vararg - def __init__(self, name="LLAMAChatCompletionsModel", *args, **kwargs): + This chat completion model should be only used as assistant, and shouldn't be used to simulate user. It is not possible + to pass a system prompt do describe how the model would behave, So we only use the model as assistant to reply for questions + made by GPT simulated users. + ''' + + def __init__(self, name='LLAMAChatCompletionsModel', *args, **kwargs): super().__init__(name=name, *args, **kwargs) # set authentication header to Bearer, as llama apis always uses the bearer auth_header self.token_manager.auth_header = "Bearer" - def format_request_data(self, prompt: List[dict], **request_params): # type: ignore[override] + def format_request_data(self, messages: List[dict], **request_params): # type: ignore[override] # Caption images if available if len(self.image_captions.keys()): - for message in prompt: - message["content"] = replace_prompt_captions( - message["content"], + for message in messages: + message['content'] = replace_prompt_captions( + message['content'], captions=self.image_captions, ) - # For LLaMa we don't pass the prompt (user persona) as a system message - # since LLama doesn't support system message - # LLama only supports user, and assistant messages. - # The messages sequence has to start with User message/ It can't have two user or + # For LLaMa we don't pass the prompt (user persona) as a system message since LLama doesn't support system message + # LLama only supports user, and assistant messages. The messages sequence has to start with User message/ It can't have two user or # two assistant consecutive messages. - # so if we set the system meta prompt as a user message, - # and if we have the first two messages made by user then we + # so if we set the system meta prompt as a user message, and if we have the first two messages made by user then we # combine the two messages in one message. - for _, x in enumerate(prompt): - if x["role"] == "system": - x["role"] = "user" - if len(prompt) > 1 and prompt[0]["role"] == "user" and prompt[1]["role"] == "user": - prompt[0] = {"role": "user", "content": prompt[0]["content"] + "\n" + prompt[1]["content"]} - del prompt[1] + for idx, x in enumerate(messages): + if x['role'] == 'system': + x['role'] = 'user' + if len(messages) > 1 and messages[0]['role'] == 'user' and messages[1]['role'] == 'user': + messages[0] = {'role': 'user', 'content': messages[0]['content'] + '\n' + messages[1]['content']} + del messages[1] # request_data = {"messages": messages, **self.get_model_params()} request_data = { - "input_data": { - "input_string": prompt, - "parameters": {"temperature": self.temperature, "max_new_tokens": self.max_tokens}, - }, + "input_data": + { + "input_string": messages, + "parameters": {"temperature": self.temperature, "max_new_tokens": self.max_tokens} + }, } request_data.update(request_params) return request_data async def get_conversation_completion( - self, - messages: List[dict], - session: RetryClient, - role: str = "assistant", - **request_params, + self, + messages: List[dict], + session: RetryClient, + role: str = "assistant", + **request_params, ) -> dict: - """ + ''' Query the model a single time with a message. - :param messages: List of messages to query the model with. - Expected format: [{"role": "user", "content": "Hello!"}, ...] - :type messages: List[dict] - :param session: aiohttp RetryClient object to query the model with. - :type session: RetryClient - :param role: Not used for this model, since it is a chat model. - :type role: str - :keyword request_params: Additional parameters to pass to the model. - :return: Dictionary containing the response from the model. - :rtype: dict - """ + Parameters + ---------- + messages: List of messages to query the model with. Expected format: [{"role": "user", "content": "Hello!"}, ...] + session: aiohttp RetryClient object to query the model with. + role: Not used for this model, since it is a chat model. + request_params: Additional parameters to pass to the model. + ''' request_data = self.format_request_data( messages=messages, @@ -889,18 +830,17 @@ async def get_conversation_completion( request_data=request_data, ) - # pylint: disable=arguments-differ def _parse_response(self, response_data: dict) -> dict: # type: ignore[override] # https://platform.openai.com/docs/api-reference/chat samples = [] finish_reason = [] # for choice in response_data: - if "output" in response_data: - samples.append(response_data["output"]) - finish_reason.append("Stop") + if 'output' in response_data: + samples.append(response_data['output']) + finish_reason.append('Stop') return { "samples": samples, "finish_reason": finish_reason, # "id": response_data["id"] - } + } \ No newline at end of file From 8cea9c33a47a35a9b8f74a68a019b052495f3945 Mon Sep 17 00:00:00 2001 From: Nagkumar Arkalgud Date: Wed, 6 Mar 2024 18:34:21 -0800 Subject: [PATCH 02/11] quotes --- .../simulator/_model_tools/models.py | 132 +++++++++--------- 1 file changed, 66 insertions(+), 66 deletions(-) diff --git a/sdk/ai/azure-ai-generative/azure/ai/generative/synthetic/simulator/_model_tools/models.py b/sdk/ai/azure-ai-generative/azure/ai/generative/synthetic/simulator/_model_tools/models.py index 931830aa7a80..404d76e387ed 100644 --- a/sdk/ai/azure-ai-generative/azure/ai/generative/synthetic/simulator/_model_tools/models.py +++ b/sdk/ai/azure-ai-generative/azure/ai/generative/synthetic/simulator/_model_tools/models.py @@ -25,7 +25,7 @@ def get_model_class_from_url(endpoint_url: str): - '''Convert an endpoint URL to the appropriate model class.''' + """Convert an endpoint URL to the appropriate model class.""" endpoint_path = urlparse(endpoint_url).path # remove query params if endpoint_path.endswith("chat/completions"): @@ -83,9 +83,9 @@ async def on_request_end(self, session, trace_config_ctx, params): # =========================================================== class LLMBase(ABC): - ''' + """ Base class for all LLM models. - ''' + """ def __init__(self, endpoint_url: str, name: str = "unknown", additional_headers: Optional[dict] = {}): self.endpoint_url = endpoint_url @@ -113,7 +113,7 @@ async def get_completion( session: RetryClient, **request_params, ) -> dict: - ''' + """ Query the model a single time with a prompt. Parameters @@ -121,7 +121,7 @@ async def get_completion( prompt: Prompt str to query model with. session: aiohttp RetryClient object to use for the request. **request_params: Additional parameters to pass to the request. - ''' + """ request_data = self.format_request_data(prompt, **request_params) return await self.request_api( session=session, @@ -211,9 +211,9 @@ def __repr__(self): # =========================================================== class OpenAICompletionsModel(LLMBase): - ''' + """ Object for calling a Completions-style API for OpenAI models. - ''' + """ prompt_idx_key = "__prompt_idx__" max_stop_tokens = 4 @@ -230,7 +230,7 @@ class OpenAICompletionsModel(LLMBase): def __init__( self, *, endpoint_url: str, - name: str = 'OpenAICompletionsModel', + name: str = "OpenAICompletionsModel", additional_headers: Optional[dict] = {}, api_version: Optional[str] = "2023-03-15-preview", token_manager: APITokenManager, @@ -263,7 +263,7 @@ def __init__( if not stop: stop = [] # Else if stop sequence is given as a string (Ex: "["\n", ""]"), convert - elif type(stop) is str and stop.startswith('[') and stop.endswith(']'): + elif type(stop) is str and stop.startswith("[") and stop.endswith("]"): stop = eval(stop) elif type(stop) is str: stop = [stop] @@ -287,9 +287,9 @@ def get_model_params(self): def format_request_data(self, prompt: str, **request_params) -> Dict[str, str]: - ''' + """ Format the request data for the OpenAI API. - ''' + """ # Caption images if available if len(self.image_captions.keys()): prompt = replace_prompt_captions( @@ -309,7 +309,7 @@ async def get_conversation_completion( role: str = "assistant", **request_params, ) -> dict: - ''' + """ Query the model a single time with a message. Parameters @@ -318,10 +318,10 @@ async def get_conversation_completion( session: aiohttp RetryClient object to query the model with. role: Role of the user sending the message. request_params: Additional parameters to pass to the model. - ''' + """ prompt = [] for message in messages: - prompt.append(f"{self.CHAT_START_TOKEN}{message['role']}\n{message['content']}\n{self.CHAT_END_TOKEN}\n") + prompt.append(f"{self.CHAT_START_TOKEN}{message["role"]}\n{message["content"]}\n{self.CHAT_END_TOKEN}\n") prompt_string: str = "".join(prompt) prompt_string += f"{self.CHAT_START_TOKEN}{role}\n" @@ -341,7 +341,7 @@ async def get_all_completions( # type: ignore[override] request_error_rate_threshold: float = 0.5, **request_params, ) -> List[dict]: - ''' + """ Run a batch of prompts through the model and return the results in the order given. Parameters @@ -352,7 +352,7 @@ async def get_all_completions( # type: ignore[override] api_call_delay_seconds: Number of seconds to wait between API requests. request_error_rate_threshold: Maximum error rate allowed before raising an error. request_params: Additional parameters to pass to the API. - ''' + """ if api_call_max_parallel_count > 1: self.logger.info(f"Using {api_call_max_parallel_count} parallel workers to query the API..") @@ -406,7 +406,7 @@ async def request_api_parallel( """ logger_tasks: List = [] # to await for logging to finish - while True: # process data from queue until it's empty + while True: # process data from queue until it"s empty try: request_data = request_datas.pop() prompt_idx = request_data.pop(self.prompt_idx_key) @@ -416,7 +416,7 @@ async def request_api_parallel( session=session, request_data=request_data, ) - await self._add_successful_response(response['time_taken']) + await self._add_successful_response(response["time_taken"]) except Exception as e: response = { "request": request_data, @@ -469,7 +469,7 @@ async def request_api( headers = { "Content-Type": "application/json", "X-CV": f"{uuid.uuid4()}", - "X-ModelType": self.model or '', + "X-ModelType": self.model or "", } if self.token_manager.auth_header == "Bearer": @@ -539,12 +539,12 @@ def _parse_response(self, response_data: dict, request_data: Optional[dict] = No # =========================================================== class OpenAIChatCompletionsModel(OpenAICompletionsModel): - ''' + """ OpenAIChatCompletionsModel is a wrapper around OpenAICompletionsModel that formats the prompt for chat completion. - ''' + """ - def __init__(self, name='OpenAIChatCompletionsModel', *args, **kwargs): + def __init__(self, name="OpenAIChatCompletionsModel", *args, **kwargs): super().__init__(name=name, *args, **kwargs) @@ -552,8 +552,8 @@ def format_request_data(self, messages: List[dict], **request_params): # type: # Caption images if available if len(self.image_captions.keys()): for message in messages: - message['content'] = replace_prompt_captions( - message['content'], + message["content"] = replace_prompt_captions( + message["content"], captions=self.image_captions, ) @@ -569,7 +569,7 @@ async def get_conversation_completion( role: str = "assistant", **request_params, ) -> dict: - ''' + """ Query the model a single time with a message. Parameters @@ -578,7 +578,7 @@ async def get_conversation_completion( session: aiohttp RetryClient object to query the model with. role: Not used for this model, since it is a chat model. request_params: Additional parameters to pass to the model. - ''' + """ request_data = self.format_request_data( messages=messages, **request_params, @@ -595,7 +595,7 @@ async def get_completion( session: RetryClient, **request_params, ) -> dict: - ''' + """ Query a ChatCompletions model with a single prompt. Note: entire message will be inserted into a "system" call. Parameters @@ -603,7 +603,7 @@ async def get_completion( prompt: Prompt str to query model with. session: aiohttp RetryClient object to use for the request. **request_params: Additional parameters to pass to the request. - ''' + """ messages = [{"role": "system", "content": prompt}] request_data = self.format_request_data( @@ -643,10 +643,10 @@ def _parse_response(self, response_data: dict, request_data: Optional[dict] = No finish_reason = [] for choice in response_data["choices"]: - if 'message' in choice and 'content' in choice['message']: - samples.append(choice['message']['content']) - if 'message' in choice and 'finish_reason' in choice['message']: - finish_reason.append(choice['message']['finish_reason']) + if "message" in choice and "content" in choice["message"]: + samples.append(choice["message"]["content"]) + if "message" in choice and "finish_reason" in choice["message"]: + finish_reason.append(choice["message"]["finish_reason"]) return { "samples": samples, @@ -659,13 +659,13 @@ def _parse_response(self, response_data: dict, request_data: Optional[dict] = No # =========================================================== class OpenAIMultiModalCompletionsModel(OpenAICompletionsModel): - ''' + """ Wrapper around OpenAICompletionsModel that formats the prompt for multimodal completions containing images. - ''' + """ model_param_names = ["temperature", "max_tokens", "top_p", "n", "stop"] - def __init__(self, name='OpenAIMultiModalCompletionsModel', images_dir: Optional[str] = None, *args, **kwargs): + def __init__(self, name="OpenAIMultiModalCompletionsModel", images_dir: Optional[str] = None, *args, **kwargs): self.images_dir = images_dir super().__init__(name=name, *args, **kwargs) @@ -683,13 +683,13 @@ def format_request_data(self, prompt: str, **request_params) -> dict: def _log_request(self, request: dict) -> None: - '''Log prompt, ignoring image data if multimodal.''' + """Log prompt, ignoring image data if multimodal.""" loggable_prompt_transcript = { - 'transcript': [ - (c if c['type'] != 'image' else {'type': 'image', 'data': '...'}) - for c in request['transcript'] + "transcript": [ + (c if c["type"] != "image" else {"type": "image", "data": "..."}) + for c in request["transcript"] ], - **{k: v for k, v in request.items() if k != 'transcript'} + **{k: v for k, v in request.items() if k != "transcript"} } super()._log_request(loggable_prompt_transcript) @@ -699,20 +699,20 @@ def _log_request(self, request: dict) -> None: # =========================================================== class LLAMACompletionsModel(OpenAICompletionsModel): - ''' + """ Object for calling a Completions-style API for LLAMA models. - ''' + """ def __init__( - self, name: str = 'LLAMACompletionsModel', *args, **kwargs): + self, name: str = "LLAMACompletionsModel", *args, **kwargs): super().__init__(name=name, *args, **kwargs) # set authentication header to Bearer, as llama apis always uses the bearer auth_header self.token_manager.auth_header = "Bearer" def format_request_data(self, prompt: str, **request_params): - ''' + """ Format the request data for the OpenAI API. - ''' + """ # Caption images if available if len(self.image_captions.keys()): prompt = replace_prompt_captions( @@ -731,12 +731,12 @@ def format_request_data(self, prompt: str, **request_params): return request_data def _parse_response(self, response_data: dict, request_data: dict) -> dict: # type: ignore[override] - prompt = request_data['input_data']['input_string'][0] + prompt = request_data["input_data"]["input_string"][0] # remove prompt text from each response as llama model returns prompt + completion instead of only completion - # remove any text after the stop tokens, since llama doesn't support stop token + # remove any text after the stop tokens, since llama doesn"t support stop token for idx, response in enumerate(response_data["samples"]): - response_data["samples"][idx] = response_data["samples"][idx].replace(prompt, '').strip() + response_data["samples"][idx] = response_data["samples"][idx].replace(prompt, "").strip() for stop_token in self.stop: if stop_token in response_data["samples"][idx]: response_data["samples"][idx] = response_data["samples"][idx].split(stop_token)[0].strip() @@ -746,7 +746,7 @@ def _parse_response(self, response_data: dict, request_data: dict) -> dict: # t for choice in response_data: if "0" in choice: samples.append(choice["0"]) - finish_reason.append('Stop') + finish_reason.append("Stop") return { "samples": samples, @@ -758,15 +758,15 @@ def _parse_response(self, response_data: dict, request_data: dict) -> dict: # t # ============== LLAMA ChatCompletionsModel ================= # =========================================================== class LLAMAChatCompletionsModel(LLAMACompletionsModel): - ''' + """ LLaMa ChatCompletionsModel is a wrapper around LLaMaCompletionsModel that formats the prompt for chat completion. - This chat completion model should be only used as assistant, and shouldn't be used to simulate user. It is not possible + This chat completion model should be only used as assistant, and shouldn"t be used to simulate user. It is not possible to pass a system prompt do describe how the model would behave, So we only use the model as assistant to reply for questions made by GPT simulated users. - ''' + """ - def __init__(self, name='LLAMAChatCompletionsModel', *args, **kwargs): + def __init__(self, name="LLAMAChatCompletionsModel", *args, **kwargs): super().__init__(name=name, *args, **kwargs) # set authentication header to Bearer, as llama apis always uses the bearer auth_header self.token_manager.auth_header = "Bearer" @@ -775,21 +775,21 @@ def format_request_data(self, messages: List[dict], **request_params): # type: # Caption images if available if len(self.image_captions.keys()): for message in messages: - message['content'] = replace_prompt_captions( - message['content'], + message["content"] = replace_prompt_captions( + message["content"], captions=self.image_captions, ) - # For LLaMa we don't pass the prompt (user persona) as a system message since LLama doesn't support system message - # LLama only supports user, and assistant messages. The messages sequence has to start with User message/ It can't have two user or + # For LLaMa we don"t pass the prompt (user persona) as a system message since LLama doesn"t support system message + # LLama only supports user, and assistant messages. The messages sequence has to start with User message/ It can"t have two user or # two assistant consecutive messages. # so if we set the system meta prompt as a user message, and if we have the first two messages made by user then we # combine the two messages in one message. for idx, x in enumerate(messages): - if x['role'] == 'system': - x['role'] = 'user' - if len(messages) > 1 and messages[0]['role'] == 'user' and messages[1]['role'] == 'user': - messages[0] = {'role': 'user', 'content': messages[0]['content'] + '\n' + messages[1]['content']} + if x["role"] == "system": + x["role"] = "user" + if len(messages) > 1 and messages[0]["role"] == "user" and messages[1]["role"] == "user": + messages[0] = {"role": "user", "content": messages[0]["content"] + "\n" + messages[1]["content"]} del messages[1] # request_data = {"messages": messages, **self.get_model_params()} @@ -810,7 +810,7 @@ async def get_conversation_completion( role: str = "assistant", **request_params, ) -> dict: - ''' + """ Query the model a single time with a message. Parameters @@ -819,7 +819,7 @@ async def get_conversation_completion( session: aiohttp RetryClient object to query the model with. role: Not used for this model, since it is a chat model. request_params: Additional parameters to pass to the model. - ''' + """ request_data = self.format_request_data( messages=messages, @@ -835,9 +835,9 @@ def _parse_response(self, response_data: dict) -> dict: # type: ignore[override samples = [] finish_reason = [] # for choice in response_data: - if 'output' in response_data: - samples.append(response_data['output']) - finish_reason.append('Stop') + if "output" in response_data: + samples.append(response_data["output"]) + finish_reason.append("Stop") return { "samples": samples, From bea237e466286888f05d0ec685eaa2082fd88332 Mon Sep 17 00:00:00 2001 From: Nagkumar Arkalgud Date: Thu, 7 Mar 2024 07:45:10 -0800 Subject: [PATCH 03/11] Spellcheck fixes --- .../synthetic/simulator/_model_tools/models.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/sdk/ai/azure-ai-generative/azure/ai/generative/synthetic/simulator/_model_tools/models.py b/sdk/ai/azure-ai-generative/azure/ai/generative/synthetic/simulator/_model_tools/models.py index 404d76e387ed..ead2dc644ee1 100644 --- a/sdk/ai/azure-ai-generative/azure/ai/generative/synthetic/simulator/_model_tools/models.py +++ b/sdk/ai/azure-ai-generative/azure/ai/generative/synthetic/simulator/_model_tools/models.py @@ -734,7 +734,7 @@ def _parse_response(self, response_data: dict, request_data: dict) -> dict: # t prompt = request_data["input_data"]["input_string"][0] # remove prompt text from each response as llama model returns prompt + completion instead of only completion - # remove any text after the stop tokens, since llama doesn"t support stop token + # remove any text after the stop tokens, since llama does not support stop token for idx, response in enumerate(response_data["samples"]): response_data["samples"][idx] = response_data["samples"][idx].replace(prompt, "").strip() for stop_token in self.stop: @@ -761,7 +761,7 @@ class LLAMAChatCompletionsModel(LLAMACompletionsModel): """ LLaMa ChatCompletionsModel is a wrapper around LLaMaCompletionsModel that formats the prompt for chat completion. - This chat completion model should be only used as assistant, and shouldn"t be used to simulate user. It is not possible + This chat completion model should be only used as assistant, and should not be used to simulate user. It is not possible to pass a system prompt do describe how the model would behave, So we only use the model as assistant to reply for questions made by GPT simulated users. """ @@ -780,8 +780,9 @@ def format_request_data(self, messages: List[dict], **request_params): # type: captions=self.image_captions, ) - # For LLaMa we don"t pass the prompt (user persona) as a system message since LLama doesn"t support system message - # LLama only supports user, and assistant messages. The messages sequence has to start with User message/ It can"t have two user or + # For LLaMa we do not pass the prompt (user persona) as a system message since LLama does not support system message + # LLama only supports user, and assistant messages. + # The messages sequence has to start with User message/ It can not have two user or # two assistant consecutive messages. # so if we set the system meta prompt as a user message, and if we have the first two messages made by user then we # combine the two messages in one message. From 45073cc695f56f1f7aa766445e2f07c619e4081a Mon Sep 17 00:00:00 2001 From: Nagkumar Arkalgud Date: Thu, 7 Mar 2024 08:13:43 -0800 Subject: [PATCH 04/11] ignore the models for doc generation --- doc/sphinx/individual_build_conf.py | 2 +- .../ai/generative/synthetic/simulator/simulator/simulator.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/sphinx/individual_build_conf.py b/doc/sphinx/individual_build_conf.py index 83950ae4cb25..a289c668e3ed 100644 --- a/doc/sphinx/individual_build_conf.py +++ b/doc/sphinx/individual_build_conf.py @@ -107,7 +107,7 @@ # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. -exclude_patterns = ['_build'] +exclude_patterns = ['_build', '*/synthetic/simulator/_model_tools/models.py'] # The reST default role (used for this markup: `text`) to use for all # documents. diff --git a/sdk/ai/azure-ai-generative/azure/ai/generative/synthetic/simulator/simulator/simulator.py b/sdk/ai/azure-ai-generative/azure/ai/generative/synthetic/simulator/simulator/simulator.py index 5e668943c1b5..f6a9162c30aa 100644 --- a/sdk/ai/azure-ai-generative/azure/ai/generative/synthetic/simulator/simulator/simulator.py +++ b/sdk/ai/azure-ai-generative/azure/ai/generative/synthetic/simulator/simulator/simulator.py @@ -1,7 +1,7 @@ # --------------------------------------------------------- # Copyright (c) Microsoft Corporation. All rights reserved. # --------------------------------------------------------- - +# pylint: disable=E0401 # needed for 'list' type annotations on 3.8 from __future__ import annotations From 08af5b31b34ebd75e71a65902a3e28588f982ef6 Mon Sep 17 00:00:00 2001 From: Nagkumar Arkalgud Date: Thu, 7 Mar 2024 08:32:24 -0800 Subject: [PATCH 05/11] Fixed the quotes on f strings --- .../ai/generative/synthetic/simulator/_model_tools/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdk/ai/azure-ai-generative/azure/ai/generative/synthetic/simulator/_model_tools/models.py b/sdk/ai/azure-ai-generative/azure/ai/generative/synthetic/simulator/_model_tools/models.py index ead2dc644ee1..246eccb89f35 100644 --- a/sdk/ai/azure-ai-generative/azure/ai/generative/synthetic/simulator/_model_tools/models.py +++ b/sdk/ai/azure-ai-generative/azure/ai/generative/synthetic/simulator/_model_tools/models.py @@ -321,7 +321,7 @@ async def get_conversation_completion( """ prompt = [] for message in messages: - prompt.append(f"{self.CHAT_START_TOKEN}{message["role"]}\n{message["content"]}\n{self.CHAT_END_TOKEN}\n") + prompt.append(f"{self.CHAT_START_TOKEN}{message['role']}\n{message['content']}\n{self.CHAT_END_TOKEN}\n") prompt_string: str = "".join(prompt) prompt_string += f"{self.CHAT_START_TOKEN}{role}\n" From 7584cc9eea23c271d01f9a572e07502fb75fa90f Mon Sep 17 00:00:00 2001 From: Nagkumar Arkalgud Date: Thu, 7 Mar 2024 09:27:33 -0800 Subject: [PATCH 06/11] pylint skip file --- .../synthetic/simulator/simulator/_proxy_completion_model.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sdk/ai/azure-ai-generative/azure/ai/generative/synthetic/simulator/simulator/_proxy_completion_model.py b/sdk/ai/azure-ai-generative/azure/ai/generative/synthetic/simulator/simulator/_proxy_completion_model.py index c33b2ba22c7f..0000c858380d 100644 --- a/sdk/ai/azure-ai-generative/azure/ai/generative/synthetic/simulator/simulator/_proxy_completion_model.py +++ b/sdk/ai/azure-ai-generative/azure/ai/generative/synthetic/simulator/simulator/_proxy_completion_model.py @@ -1,6 +1,7 @@ # --------------------------------------------------------- # Copyright (c) Microsoft Corporation. All rights reserved. # --------------------------------------------------------- +#pylint: skip-file from typing import List import uuid import time @@ -30,8 +31,8 @@ def __init__(self, name, template_key, template_parameters, *args, **kwargs): super().__init__(name=name, *args, **kwargs) - def format_request_data(self, prompt: List[dict], **request_params): # type: ignore[override] - request_data = {"messages": prompt, **self.get_model_params()} + def format_request_data(self, messages: List[dict], **request_params): # type: ignore[override] + request_data = {"messages": messages, **self.get_model_params()} request_data.update(request_params) return request_data From d727177f72d2e5b2091204397ba8ae490bdedc1f Mon Sep 17 00:00:00 2001 From: Nagkumar Arkalgud Date: Mon, 11 Mar 2024 11:34:59 -0700 Subject: [PATCH 07/11] Support for summarization --- .../simulator/_callback_conversation_bot.py | 22 +++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/sdk/ai/azure-ai-generative/azure/ai/generative/synthetic/simulator/simulator/_callback_conversation_bot.py b/sdk/ai/azure-ai-generative/azure/ai/generative/synthetic/simulator/simulator/_callback_conversation_bot.py index 007201fe9baf..8cdf2f45ec8c 100644 --- a/sdk/ai/azure-ai-generative/azure/ai/generative/synthetic/simulator/simulator/_callback_conversation_bot.py +++ b/sdk/ai/azure-ai-generative/azure/ai/generative/synthetic/simulator/simulator/_callback_conversation_bot.py @@ -1,6 +1,7 @@ # --------------------------------------------------------- # Copyright (c) Microsoft Corporation. All rights reserved. # --------------------------------------------------------- +#pylint: skip-file import copy from typing import List, Tuple @@ -29,8 +30,22 @@ async def generate_response( self.user_template, conversation_history, self.user_template_parameters ) msg_copy = copy.deepcopy(chat_protocol_message) - result = await self.callback(msg_copy) - + result = {} + try: + result = await self.callback(msg_copy) + except Exception as exc: + if "status_code" in dir(exc) and 400 <= exc.status_code < 500 and "response was filtered" in exc.message: + result = { + "messages": [{ + "content": ("Error: The response was filtered due to the prompt " + "triggering Azure OpenAI's content management policy. " + "Please modify your prompt and retry."), + "role": "assistant" + }], + "finish_reason": ["stop"], + "id": None, + "template_parameters": {} + } self.logger.info("Using user provided callback returning response.") time_taken = 0 @@ -54,6 +69,9 @@ def _to_chat_protocol(self, template, conversation_history, template_parameters) for _, m in enumerate(conversation_history): messages.append({"content": m.message, "role": m.role.value}) + if template_parameters.get("file_content", None) and any('File contents:' not in message['content'] for message in messages): + messages.append({"content": f"File contents: {template_parameters['file_content']}", "role": "user"}) + return { "template_parameters": template_parameters, "messages": messages, From 8b895ee8f3637e05325391ee85798a29212ddc9a Mon Sep 17 00:00:00 2001 From: Nagkumar Arkalgud Date: Mon, 11 Mar 2024 15:08:49 -0700 Subject: [PATCH 08/11] Adding a limit of 2 conversation turns for all but conversation simulators --- .../generative/synthetic/simulator/simulator/simulator.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/sdk/ai/azure-ai-generative/azure/ai/generative/synthetic/simulator/simulator/simulator.py b/sdk/ai/azure-ai-generative/azure/ai/generative/synthetic/simulator/simulator/simulator.py index f6a9162c30aa..2554ad75e489 100644 --- a/sdk/ai/azure-ai-generative/azure/ai/generative/synthetic/simulator/simulator/simulator.py +++ b/sdk/ai/azure-ai-generative/azure/ai/generative/synthetic/simulator/simulator/simulator.py @@ -194,7 +194,7 @@ def _join_conversation_starter(self, parameters, to_join): async def simulate_async( self, template: "Template", - max_conversation_turns: int, + max_conversation_turns: int = 2, parameters: Optional[List[dict]] = None, jailbreak: bool = False, api_call_retry_limit: int = 3, @@ -208,6 +208,7 @@ async def simulate_async( :keyword template: An instance of the Template class defining the conversation structure. :paramtype template: Template :keyword max_conversation_turns: The maximum number of conversation turns to simulate. + Defaults to 2, change only applies to chat templates. :paramtype max_conversation_turns: int :keyword parameters: A list of dictionaries containing the parameter values to be used in the simulations. Defaults to an empty list. @@ -239,7 +240,8 @@ async def simulate_async( if not isinstance(parameters, list): raise ValueError(f"Expect parameters to be a list of dictionary, but found {type(parameters)}") - + if "conversation" not in template.template_name: + max_conversation_turns = 2 if template.content_harm: self._ensure_service_dependencies() self.adversarial = True From 92d6d8ec26ca9a702a3e9acb5c902478d9b80537 Mon Sep 17 00:00:00 2001 From: Nagkumar Arkalgud Date: Mon, 11 Mar 2024 15:10:34 -0700 Subject: [PATCH 09/11] exclude synthetic from mypy --- sdk/ai/azure-ai-generative/pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdk/ai/azure-ai-generative/pyproject.toml b/sdk/ai/azure-ai-generative/pyproject.toml index 9e37165cc0f0..c12b3e784e21 100644 --- a/sdk/ai/azure-ai-generative/pyproject.toml +++ b/sdk/ai/azure-ai-generative/pyproject.toml @@ -13,7 +13,7 @@ strict_sphinx = true [tool.mypy] python_version = "3.10" -exclude = ["azure/ai/generative/index/_langchain/vendor", "tests", "setup.py", "samples", "azure/ai/generative/evaluate/pf_templates/built_in_metrics"] +exclude = ["azure/ai/generative/index/_langchain/vendor", "tests", "setup.py", "samples", "azure/ai/generative/evaluate/pf_templates/built_in_metrics", "azure/ai/generative/synthetic"] warn_unused_configs = true follow_imports = "skip" ignore_missing_imports = true From 4742b0418dfe61f741a0e6f63e1b7c9b7c25e83d Mon Sep 17 00:00:00 2001 From: Nagkumar Arkalgud Date: Mon, 11 Mar 2024 15:59:55 -0700 Subject: [PATCH 10/11] Another lint fix --- .../azure/ai/generative/index/_utils/logging.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sdk/ai/azure-ai-generative/azure/ai/generative/index/_utils/logging.py b/sdk/ai/azure-ai-generative/azure/ai/generative/index/_utils/logging.py index 7f6a47408fea..b6c56c305b7d 100644 --- a/sdk/ai/azure-ai-generative/azure/ai/generative/index/_utils/logging.py +++ b/sdk/ai/azure-ai-generative/azure/ai/generative/index/_utils/logging.py @@ -1,6 +1,7 @@ # --------------------------------------------------------- # Copyright (c) Microsoft Corporation. All rights reserved. # --------------------------------------------------------- +# pylint: disable=W0125 """Logging utilities.""" import inspect import logging From 975b0b37a37d0da3c77bbf2ff704f5e550a6ca4f Mon Sep 17 00:00:00 2001 From: Nagkumar Arkalgud Date: Tue, 12 Mar 2024 09:36:48 -0700 Subject: [PATCH 11/11] Skip the file causing linting issues --- .../azure/ai/generative/index/_embeddings/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sdk/ai/azure-ai-generative/azure/ai/generative/index/_embeddings/__init__.py b/sdk/ai/azure-ai-generative/azure/ai/generative/index/_embeddings/__init__.py index c2938372b55d..6aa5846c8063 100644 --- a/sdk/ai/azure-ai-generative/azure/ai/generative/index/_embeddings/__init__.py +++ b/sdk/ai/azure-ai-generative/azure/ai/generative/index/_embeddings/__init__.py @@ -2,6 +2,7 @@ # --------------------------------------------------------- # Copyright (c) Microsoft Corporation. All rights reserved. # --------------------------------------------------------- +# pylint: skip-file """Embeddings generation and management tools.""" import contextlib import copy