From 6a57ceac4ccb0731e7abebce155a1d36f088aefc Mon Sep 17 00:00:00 2001
From: Nagkumar Arkalgud <naarkalg@microsoft.com>
Date: Wed, 6 Mar 2024 18:06:00 -0800
Subject: [PATCH 01/11] Reverting models to make sure calls to the simulator
 work

---
 .../simulator/_model_tools/models.py          | 510 ++++++++----------
 1 file changed, 225 insertions(+), 285 deletions(-)

diff --git a/sdk/ai/azure-ai-generative/azure/ai/generative/synthetic/simulator/_model_tools/models.py b/sdk/ai/azure-ai-generative/azure/ai/generative/synthetic/simulator/_model_tools/models.py
index 104f8af11a3b..931830aa7a80 100644
--- a/sdk/ai/azure-ai-generative/azure/ai/generative/synthetic/simulator/_model_tools/models.py
+++ b/sdk/ai/azure-ai-generative/azure/ai/generative/synthetic/simulator/_model_tools/models.py
@@ -1,8 +1,7 @@
 # ---------------------------------------------------------
 # Copyright (c) Microsoft Corporation. All rights reserved.
 # ---------------------------------------------------------
-
-from ast import literal_eval
+# pylint: skip-file
 import copy
 import time
 import asyncio
@@ -10,12 +9,12 @@
 import logging
 from urllib.parse import urlparse
 from abc import ABC, abstractmethod
-from typing import Deque, Dict, List, Optional, Union
+from typing import Deque, Dict, List, Optional, Union, Sized
 from collections import deque
 
-from aiohttp import TraceConfig  # pylint: disable=networking-import-outside-azure-core-transport
-from aiohttp.web import HTTPException  # pylint: disable=networking-import-outside-azure-core-transport
-from aiohttp_retry import RetryClient, RandomRetry  # pylint: disable=networking-import-outside-azure-core-transport
+from aiohttp import TraceConfig
+from aiohttp.web import HTTPException
+from aiohttp_retry import RetryClient, RandomRetry
 
 from .identity_manager import APITokenManager
 from .images import replace_prompt_captions, format_multimodal_prompt
@@ -25,24 +24,18 @@
 MAX_TIME_TAKEN_RECORDS = 20_000
 
 
-def get_model_class_from_url(endpoint_url: str) -> type:
-    """
-    Convert an endpoint URL to the appropriate model class.
-
-    :param endpoint_url: The URL of the endpoint.
-    :type endpoint_url: str
-    :return: The model class corresponding to the endpoint URL.
-    :rtype: type
-    """
+def get_model_class_from_url(endpoint_url: str):
+    '''Convert an endpoint URL to the appropriate model class.'''
     endpoint_path = urlparse(endpoint_url).path  # remove query params
 
     if endpoint_path.endswith("chat/completions"):
         return OpenAIChatCompletionsModel
-    if "/rainbow" in endpoint_path:
+    elif "/rainbow" in endpoint_path:
         return OpenAIMultiModalCompletionsModel
-    if endpoint_path.endswith("completions"):
+    elif endpoint_path.endswith("completions"):
         return OpenAICompletionsModel
-    raise ValueError(f"Unknown API type for endpoint {endpoint_url}")
+    else:
+        raise ValueError(f"Unknown API type for endpoint {endpoint_url}")
 
 
 # ===================== HTTP Retry ======================
@@ -58,44 +51,43 @@ def __init__(self, n_retry, retry_timeout, logger, retry_options=None):
         trace_config.on_request_end.append(self.on_request_end)
         if retry_options is None:
             retry_options = RandomRetry(  # set up retry configuration
-                statuses=[104, 408, 409, 424, 429, 500, 502, 503, 504],  # on which statuses to retry
+                statuses=[104, 408, 409, 424, 429, 500, 502,
+                        503, 504],  # on which statuses to retry
                 attempts=n_retry,
                 min_timeout=retry_timeout,
                 max_timeout=retry_timeout,
             )
 
-        self.client = RetryClient(trace_configs=[trace_config], retry_options=retry_options)
+        self.client = RetryClient(
+            trace_configs=[trace_config], retry_options=retry_options)
 
-    async def on_request_start(self, trace_config_ctx, params):
+    async def on_request_start(self, session, trace_config_ctx, params):
         current_attempt = trace_config_ctx.trace_request_ctx["current_attempt"]
-        self.logger.info("[ATTEMPT %s] Sending %s request to %s" % (current_attempt, params.method, params.url))
+        self.logger.info("[ATTEMPT %s] Sending %s request to %s" % (
+            current_attempt, params.method, params.url
+        ))
 
-    async def on_request_end(self, trace_config_ctx, params):
+    async def on_request_end(self, session, trace_config_ctx, params):
         current_attempt = trace_config_ctx.trace_request_ctx["current_attempt"]
         request_headers = dict(params.response.request_info.headers)
         if "Authorization" in request_headers:
             del request_headers["Authorization"]  # hide auth token from logs
         if "api-key" in request_headers:
             del request_headers["api-key"]
-        self.logger.info(
-            "[ATTEMPT %s] For %s request to %s, received response with status %s and request headers: %s"
-            % (current_attempt, params.method, params.url, params.response.status, request_headers)
-        )
-
+        self.logger.info("[ATTEMPT %s] For %s request to %s, received response with status %s and request headers: %s" % (
+            current_attempt, params.method, params.url, params.response.status, request_headers
+        ))
 
 # ===========================================================
 # ===================== LLMBase Class =======================
 # ===========================================================
 
-
 class LLMBase(ABC):
-    """
+    '''
     Base class for all LLM models.
-    """
+    '''
 
-    def __init__(self, endpoint_url: str, name: str = "unknown", additional_headers: Optional[dict] = None):
-        if additional_headers is None:
-            additional_headers = {}
+    def __init__(self, endpoint_url: str, name: str = "unknown", additional_headers: Optional[dict] = {}):
         self.endpoint_url = endpoint_url
         self.name = name
         self.additional_headers = additional_headers
@@ -103,7 +95,7 @@ def __init__(self, endpoint_url: str, name: str = "unknown", additional_headers:
 
         # Metric tracking
         self.lock = asyncio.Lock()
-        self.response_times: Deque[Union[int, float]] = deque(maxlen=MAX_TIME_TAKEN_RECORDS)
+        self.response_times: Deque[Union[int, float]]  = deque(maxlen=MAX_TIME_TAKEN_RECORDS)
         self.step = 0
         self.error_count = 0
 
@@ -121,17 +113,15 @@ async def get_completion(
         session: RetryClient,
         **request_params,
     ) -> dict:
-        """
+        '''
         Query the model a single time with a prompt.
 
-        :param prompt: Prompt str to query model with.
-        :type prompt: str
-        :param session: aiohttp RetryClient object to use for the request.
-        :type session: RetryClient
-        :keyword **request_params: Additional parameters to pass to the request.
-        :return: Dictionary containing the completion response from the model.
-        :rtype: dict
-        """
+        Parameters
+        ----------
+        prompt: Prompt str to query model with.
+        session: aiohttp RetryClient object to use for the request.
+        **request_params: Additional parameters to pass to the request.
+        '''
         request_data = self.format_request_data(prompt, **request_params)
         return await self.request_api(
             session=session,
@@ -180,7 +170,7 @@ async def request_api_parallel(
         pass
 
     def _log_request(self, request: dict) -> None:
-        self.logger.info("Request: %s", request)
+        self.logger.info(f"Request: {request}")
 
     async def _add_successful_response(self, time_taken: Union[int, float]) -> None:
         async with self.lock:
@@ -220,37 +210,28 @@ def __repr__(self):
 # ================== OpenAICompletions ======================
 # ===========================================================
 
-
-class OpenAICompletionsModel(LLMBase):  # pylint: disable=too-many-instance-attributes
-    """
+class OpenAICompletionsModel(LLMBase):
+    '''
     Object for calling a Completions-style API for OpenAI models.
-    """
-
+    '''
     prompt_idx_key = "__prompt_idx__"
 
     max_stop_tokens = 4
     stop_tokens = ["<|im_end|>", "<|endoftext|>"]
 
     model_param_names = [
-        "model",
-        "temperature",
-        "max_tokens",
-        "top_p",
-        "n",
-        "frequency_penalty",
-        "presence_penalty",
-        "stop",
+        "model", "temperature", "max_tokens", "top_p", "n",
+        "frequency_penalty", "presence_penalty", "stop"
     ]
 
     CHAT_START_TOKEN = "<|im_start|>"
     CHAT_END_TOKEN = "<|im_end|>"
 
     def __init__(
-        self,
-        *,
+        self, *,
         endpoint_url: str,
-        name: str = "OpenAICompletionsModel",
-        additional_headers: Optional[dict] = None,
+        name: str = 'OpenAICompletionsModel',
+        additional_headers: Optional[dict] = {},
         api_version: Optional[str] = "2023-03-15-preview",
         token_manager: APITokenManager,
         azureml_model_deployment: Optional[str] = None,
@@ -262,12 +243,9 @@ def __init__(
         frequency_penalty: Optional[float] = 0,
         presence_penalty: Optional[float] = 0,
         stop: Optional[Union[List[str], str]] = None,
-        image_captions: Optional[Dict[str, str]] = None,
-        # pylint: disable=unused-argument
+        image_captions: Dict[str, str] = {},
         images_dir: Optional[str] = None,  # Note: unused, kept for class compatibility
     ):
-        if additional_headers is None:
-            additional_headers = {}
         super().__init__(endpoint_url=endpoint_url, name=name, additional_headers=additional_headers)
         self.api_version = api_version
         self.token_manager = token_manager
@@ -279,15 +257,15 @@ def __init__(
         self.n = n
         self.frequency_penalty = frequency_penalty
         self.presence_penalty = presence_penalty
-        self.image_captions = image_captions if image_captions is not None else {}
+        self.image_captions = image_captions
 
         # Default stop to end token if not provided
         if not stop:
             stop = []
         # Else if stop sequence is given as a string (Ex: "["\n", "<im_end>"]"), convert
-        elif isinstance(stop, str) and stop.startswith("[") and stop.endswith("]"):
-            stop = literal_eval(stop)
-        elif isinstance(stop, str):
+        elif type(stop) is str and stop.startswith('[') and stop.endswith(']'):
+            stop = eval(stop)
+        elif type(stop) is str:
             stop = [stop]
         self.stop: List = stop  # type: ignore[assignment]
 
@@ -299,25 +277,19 @@ def __init__(
                 self.stop.append(token)
 
         if top_p not in [None, 1.0] and temperature is not None:
-            self.logger.warning(
-                "Both top_p and temperature are set.  OpenAI advises against using both at the same time."
-            )
+            self.logger.warning("Both top_p and temperature are set.  OpenAI advises against using both at the same time.")
+
+        self.logger.info(f"Default model settings: {self.get_model_params()}")
 
-        self.logger.info("Default model settings: %s", self.get_model_params())
 
     def get_model_params(self):
         return {param: getattr(self, param) for param in self.model_param_names if getattr(self, param) is not None}
 
+
     def format_request_data(self, prompt: str, **request_params) -> Dict[str, str]:
-        """
+        '''
         Format the request data for the OpenAI API.
-
-        :param prompt: The prompt string.
-        :type prompt: str
-        :keyword request_params: Additional parameters to pass to the model.
-        :return: The formatted request data.
-        :rtype: Dict[str, str]
-        """
+        '''
         # Caption images if available
         if len(self.image_captions.keys()):
             prompt = replace_prompt_captions(
@@ -329,6 +301,7 @@ def format_request_data(self, prompt: str, **request_params) -> Dict[str, str]:
         request_data.update(request_params)
         return request_data
 
+
     async def get_conversation_completion(
         self,
         messages: List[dict],
@@ -336,20 +309,16 @@ async def get_conversation_completion(
         role: str = "assistant",
         **request_params,
     ) -> dict:
-        """
+        '''
         Query the model a single time with a message.
 
-        :param messages: List of messages to query the model with.
-                         Expected format: [{"role": "user", "content": "Hello!"}, ...]
-        :type messages: List[dict]
-        :param session: aiohttp RetryClient object to query the model with.
-        :type session: RetryClient
-        :param role: Role of the user sending the message.
-        :type role: str
-        :keyword request_params: Additional parameters to pass to the model.
-        :return: Dictionary containing the completion response from the model.
-        :rtype: dict
-        """
+        Parameters
+        ----------
+        messages: List of messages to query the model with. Expected format: [{"role": "user", "content": "Hello!"}, ...]
+        session: aiohttp RetryClient object to query the model with.
+        role: Role of the user sending the message.
+        request_params: Additional parameters to pass to the model.
+        '''
         prompt = []
         for message in messages:
             prompt.append(f"{self.CHAT_START_TOKEN}{message['role']}\n{message['content']}\n{self.CHAT_END_TOKEN}\n")
@@ -362,6 +331,7 @@ async def get_conversation_completion(
             **request_params,
         )
 
+
     async def get_all_completions(  # type: ignore[override]
         self,
         prompts: List[Dict[str, str]],
@@ -371,32 +341,25 @@ async def get_all_completions(  # type: ignore[override]
         request_error_rate_threshold: float = 0.5,
         **request_params,
     ) -> List[dict]:
-        """
+        '''
         Run a batch of prompts through the model and return the results in the order given.
 
-        :param prompts: List of prompts to query the model with.
-        :type prompts: List[Dict[str, str]]
-        :param session: aiohttp RetryClient to use for the request.
-        :type session: RetryClient
-        :param api_call_max_parallel_count: Number of parallel requests to make to the API.
-        :type api_call_max_parallel_count: int
-        :param api_call_delay_seconds: Number of seconds to wait between API requests.
-        :type api_call_delay_seconds: float
-        :param request_error_rate_threshold: Maximum error rate allowed before raising an error.
-        :type request_error_rate_threshold: float
-        :keyword request_params: Additional parameters to pass to the API.
-        :return: List of completion results.
-        :rtype: List[dict]
-        """
+        Parameters
+        ----------
+        prompts: List of prompts to query the model with.
+        session: aiohttp RetryClient to use for the request.
+        api_call_max_parallel_count: Number of parallel requests to make to the API.
+        api_call_delay_seconds: Number of seconds to wait between API requests.
+        request_error_rate_threshold: Maximum error rate allowed before raising an error.
+        request_params: Additional parameters to pass to the API.
+        '''
         if api_call_max_parallel_count > 1:
-            self.logger.info("Using %s parallel workers to query the API..", api_call_max_parallel_count)
+            self.logger.info(f"Using {api_call_max_parallel_count} parallel workers to query the API..")
 
         # Format prompts and tag with index
         request_datas: List[Dict] = []
         for idx, prompt in enumerate(prompts):
-            prompt: Dict[str, str] = self.format_request_data(  # type: ignore[no-redef]
-                prompt, **request_params  # type: ignore[arg-type]
-            )
+            prompt: Dict[str, str] = self.format_request_data(prompt, **request_params)  # type: ignore[no-redef,arg-type]
             prompt[self.prompt_idx_key] = idx  # type: ignore[assignment]
             request_datas.append(prompt)
 
@@ -406,22 +369,21 @@ async def get_all_completions(  # type: ignore[override]
 
         output_collector: List = []
         tasks = [  # create a set of worker-tasks to query inference endpoint in parallel
-            asyncio.create_task(
-                self.request_api_parallel(
-                    request_datas=request_datas,
-                    output_collector=output_collector,
-                    session=session,
-                    api_call_delay_seconds=api_call_delay_seconds,
-                    request_error_rate_threshold=request_error_rate_threshold,
-                )
-            )
+            asyncio.create_task(self.request_api_parallel(
+                request_datas=request_datas,
+                output_collector=output_collector,
+                session=session,
+                api_call_delay_seconds=api_call_delay_seconds,
+                request_error_rate_threshold=request_error_rate_threshold,
+            ))
             for _ in range(api_call_max_parallel_count)
         ]
 
         # Await the completion of all tasks, and propagate any exceptions
         await asyncio.gather(*tasks, return_exceptions=False)
-        if request_datas:
-            raise RuntimeError("All inference tasks were finished, but the queue is not empty")
+        if len(request_datas):
+            raise RuntimeError(
+                "All inference tasks were finished, but the queue is not empty")
 
         # Output results back to the caller
         output_collector.sort(key=lambda x: x[self.prompt_idx_key])
@@ -429,6 +391,7 @@ async def get_all_completions(  # type: ignore[override]
             output.pop(self.prompt_idx_key)
         return output_collector
 
+
     async def request_api_parallel(
         self,
         request_datas: List[dict],
@@ -439,17 +402,7 @@ async def request_api_parallel(
     ) -> None:
         """
         Query the model for all prompts given as a list and append the output to output_collector.
-
-        :param request_datas: List of request data dictionaries.
-        :type request_datas: List[dict]
-        :param output_collector: List to store the output.
-        :type output_collector: List
-        :param session: RetryClient session.
-        :type session: RetryClient
-        :param api_call_delay_seconds: Delay between consecutive API calls in seconds.
-        :type api_call_delay_seconds: float, optional
-        :param request_error_rate_threshold: Threshold for request error rate.
-        :type request_error_rate_threshold: float, optional
+        No return value, output_collector is modified in place.
         """
         logger_tasks: List = []  # to await for logging to finish
 
@@ -463,27 +416,25 @@ async def request_api_parallel(
                         session=session,
                         request_data=request_data,
                     )
-                    await self._add_successful_response(response["time_taken"])
-                except HTTPException as e:
+                    await self._add_successful_response(response['time_taken'])
+                except Exception as e:
                     response = {
                         "request": request_data,
                         "response": {
                             "finish_reason": "error",
                             "error": str(e),
-                        },
+                        }
                     }
                     await self._add_error()
 
-                    self.logger.exception("Errored on prompt #%s", str(prompt_idx))
+                    self.logger.exception(f"Errored on prompt #{prompt_idx}")
 
                     # if we count too many errors, we stop and raise an exception
                     response_count = await self.get_response_count()
                     error_rate = await self.get_error_rate()
                     if response_count >= MIN_ERRORS_TO_FAIL and error_rate >= request_error_rate_threshold:
-                        error_msg = (
-                            f"Error rate is more than {request_error_rate_threshold:.0%} -- something is broken!"
-                        )
-                        raise Exception(error_msg) from e
+                        error_msg = f"Error rate is more than {request_error_rate_threshold:.0%} -- something is broken!"
+                        raise Exception(error_msg)
 
                 response[self.prompt_idx_key] = prompt_idx
                 output_collector.append(response)
@@ -496,6 +447,7 @@ async def request_api_parallel(
                 await asyncio.gather(*logger_tasks)
                 return
 
+
     async def request_api(
         self,
         session: RetryClient,
@@ -504,22 +456,20 @@ async def request_api(
         """
         Request the model with a body of data.
 
-        :param session: HTTPS Session for invoking the endpoint.
-        :type session: RetryClient
-        :param request_data: Prompt dictionary to query the model with. (Pass {"prompt": prompt} instead of prompt.)
-        :type request_data: dict
-        :return: Response from the model.
-        :rtype: dict
+        Parameters
+        ----------
+        session: HTTPS Session for invoking the endpoint.
+        request_data: Prompt dictionary to query the model with. (Pass {"prompt": prompt} instead of prompt.)
         """
 
         self._log_request(request_data)
 
         token = await self.token_manager.get_token()
-
+        
         headers = {
             "Content-Type": "application/json",
             "X-CV": f"{uuid.uuid4()}",
-            "X-ModelType": self.model or "",
+            "X-ModelType": self.model or '',
         }
 
         if self.token_manager.auth_header == "Bearer":
@@ -542,21 +492,24 @@ async def request_api(
 
         time_start = time.time()
         full_response = None
-        async with session.post(url=self.endpoint_url, headers=headers, json=request_data, params=params) as response:
+        async with session.post(
+            url=self.endpoint_url,
+            headers=headers,
+            json=request_data,
+            params=params
+        ) as response:
             if response.status == 200:
                 response_data = await response.json()
-                self.logger.info("Response: %s", response_data)
+                self.logger.info(f"Response: {response_data}")
 
                 # Copy the full response and return it to be saved in jsonl.
                 full_response = copy.copy(response_data)
 
                 time_taken = time.time() - time_start
 
-                parsed_response = self._parse_response(response_data)
+                parsed_response = self._parse_response(response_data, request_data=request_data)
             else:
-                raise HTTPException(
-                    reason="Received unexpected HTTP status: {} {}".format(response.status, await response.text())
-                )
+                raise HTTPException(reason=f"Received unexpected HTTP status: {response.status} {await response.text()}")
 
         return {
             "request": request_data,
@@ -565,7 +518,7 @@ async def request_api(
             "full_response": full_response,
         }
 
-    def _parse_response(self, response_data: dict) -> dict:
+    def _parse_response(self, response_data: dict, request_data: Optional[dict] = None) -> dict:
         # https://platform.openai.com/docs/api-reference/completions
         samples = []
         finish_reason = []
@@ -575,36 +528,40 @@ def _parse_response(self, response_data: dict) -> dict:
             if "finish_reason" in choice:
                 finish_reason.append(choice["finish_reason"])
 
-        return {"samples": samples, "finish_reason": finish_reason, "id": response_data["id"]}
-
+        return {
+            "samples": samples,
+            "finish_reason": finish_reason,
+            "id": response_data["id"]
+        }
 
 # ===========================================================
 # ============== OpenAIChatCompletionsModel =================
 # ===========================================================
 
-
 class OpenAIChatCompletionsModel(OpenAICompletionsModel):
-    """
+    '''
     OpenAIChatCompletionsModel is a wrapper around OpenAICompletionsModel that
     formats the prompt for chat completion.
-    """
-    # pylint: disable=keyword-arg-before-vararg
-    def __init__(self, name="OpenAIChatCompletionsModel", *args, **kwargs):
+    '''
+
+    def __init__(self, name='OpenAIChatCompletionsModel', *args, **kwargs):
         super().__init__(name=name, *args, **kwargs)
 
-    def format_request_data(self, prompt: List[dict], **request_params):  # type: ignore[override]
+
+    def format_request_data(self, messages: List[dict], **request_params):  # type: ignore[override]
         # Caption images if available
         if len(self.image_captions.keys()):
-            for message in prompt:
-                message["content"] = replace_prompt_captions(
-                    message["content"],
+            for message in messages:
+                message['content'] = replace_prompt_captions(
+                    message['content'],
                     captions=self.image_captions,
                 )
 
-        request_data = {"messages": prompt, **self.get_model_params()}
+        request_data = {"messages": messages, **self.get_model_params()}
         request_data.update(request_params)
         return request_data
 
+
     async def get_conversation_completion(
         self,
         messages: List[dict],
@@ -612,20 +569,16 @@ async def get_conversation_completion(
         role: str = "assistant",
         **request_params,
     ) -> dict:
-        """
+        '''
         Query the model a single time with a message.
 
-        :param messages: List of messages to query the model with.
-                         Expected format: [{"role": "user", "content": "Hello!"}, ...]
-        :type messages: List[dict]
-        :param session: aiohttp RetryClient object to query the model with.
-        :type session: RetryClient
-        :param role: Not used for this model, since it is a chat model.
-        :type role: str
-        :keyword **request_params: Additional parameters to pass to the model.
-        :return: Dictionary containing the completion response.
-        :rtype: dict
-        """
+        Parameters
+        ----------
+        messages: List of messages to query the model with. Expected format: [{"role": "user", "content": "Hello!"}, ...]
+        session: aiohttp RetryClient object to query the model with.
+        role: Not used for this model, since it is a chat model.
+        request_params: Additional parameters to pass to the model.
+        '''
         request_data = self.format_request_data(
             messages=messages,
             **request_params,
@@ -635,31 +588,34 @@ async def get_conversation_completion(
             request_data=request_data,
         )
 
+
     async def get_completion(
         self,
         prompt: str,
         session: RetryClient,
         **request_params,
     ) -> dict:
-        """
-        Query a ChatCompletions model with a single prompt.
-
-        :param prompt: Prompt str to query model with.
-        :type prompt: str
-        :param session: aiohttp RetryClient object to use for the request.
-        :type session: RetryClient
-        :keyword **request_params: Additional parameters to pass to the request.
-        :return: Dictionary containing the completion response.
-        :rtype: dict
-        """
+        '''
+        Query a ChatCompletions model with a single prompt.  Note: entire message will be inserted into a "system" call.
+
+        Parameters
+        ----------
+        prompt: Prompt str to query model with.
+        session: aiohttp RetryClient object to use for the request.
+        **request_params: Additional parameters to pass to the request.
+        '''
         messages = [{"role": "system", "content": prompt}]
 
-        request_data = self.format_request_data(messages=messages, **request_params)
+        request_data = self.format_request_data(
+            messages=messages,
+            **request_params
+        )
         return await self.request_api(
             session=session,
             request_data=request_data,
         )
 
+
     async def get_all_completions(
         self,
         prompts: List[str],  # type: ignore[override]
@@ -680,34 +636,36 @@ async def get_all_completions(
             **request_params,
         )
 
-    def _parse_response(self, response_data: dict) -> dict:
+
+    def _parse_response(self, response_data: dict, request_data: Optional[dict] = None) -> dict:
         # https://platform.openai.com/docs/api-reference/chat
         samples = []
         finish_reason = []
 
         for choice in response_data["choices"]:
-            if "message" in choice and "content" in choice["message"]:
-                samples.append(choice["message"]["content"])
-            if "message" in choice and "finish_reason" in choice["message"]:
-                finish_reason.append(choice["message"]["finish_reason"])
-
-        return {"samples": samples, "finish_reason": finish_reason, "id": response_data["id"]}
+            if 'message' in choice and 'content' in choice['message']:
+                samples.append(choice['message']['content'])
+            if 'message' in choice and 'finish_reason' in choice['message']:
+                finish_reason.append(choice['message']['finish_reason'])
 
+        return {
+            "samples": samples,
+            "finish_reason": finish_reason,
+            "id": response_data["id"]
+        }
 
 # ===========================================================
 # =========== OpenAIMultiModalCompletionsModel ==============
 # ===========================================================
 
-
 class OpenAIMultiModalCompletionsModel(OpenAICompletionsModel):
-    """
+    '''
     Wrapper around OpenAICompletionsModel that formats the prompt for multimodal
     completions containing images.
-    """
-
+    '''
     model_param_names = ["temperature", "max_tokens", "top_p", "n", "stop"]
-    # pylint: disable=keyword-arg-before-vararg
-    def __init__(self, name="OpenAIMultiModalCompletionsModel", images_dir: Optional[str] = None, *args, **kwargs):
+
+    def __init__(self, name='OpenAIMultiModalCompletionsModel', images_dir: Optional[str] = None, *args, **kwargs):
         self.images_dir = images_dir
 
         super().__init__(name=name, *args, **kwargs)
@@ -723,18 +681,15 @@ def format_request_data(self, prompt: str, **request_params) -> dict:
         request.update(request_params)
         return request
 
-    def _log_request(self, request: dict) -> None:
-        """
-        Log prompt, ignoring image data if multimodal.
 
-        :param request: The request dictionary.
-        :type request: dict
-        """
+    def _log_request(self, request: dict) -> None:
+        '''Log prompt, ignoring image data if multimodal.'''
         loggable_prompt_transcript = {
-            "transcript": [
-                (c if c["type"] != "image" else {"type": "image", "data": "..."}) for c in request["transcript"]
+            'transcript': [
+                (c if c['type'] != 'image' else {'type': 'image', 'data': '...'})
+                for c in request['transcript']
             ],
-            **{k: v for k, v in request.items() if k != "transcript"},
+            **{k: v for k, v in request.items() if k != 'transcript'}
         }
         super()._log_request(loggable_prompt_transcript)
 
@@ -743,27 +698,21 @@ def _log_request(self, request: dict) -> None:
 # ============== LLAMA CompletionsModel =====================
 # ===========================================================
 
-
 class LLAMACompletionsModel(OpenAICompletionsModel):
-    """
+    '''
     Object for calling a Completions-style API for LLAMA models.
-    """
-    # pylint: disable=keyword-arg-before-vararg
-    def __init__(self, name: str = "LLAMACompletionsModel", *args, **kwargs):
+    '''
+
+    def __init__(
+            self, name: str = 'LLAMACompletionsModel', *args, **kwargs):
         super().__init__(name=name, *args, **kwargs)
         # set authentication header to Bearer, as llama apis always uses the bearer auth_header
         self.token_manager.auth_header = "Bearer"
 
     def format_request_data(self, prompt: str, **request_params):
-        """
+        '''
         Format the request data for the OpenAI API.
-
-        :param prompt: The prompt string.
-        :type prompt: str
-        :keyword request_params: Additional request parameters.
-        :return: The formatted request data.
-        :rtype: dict
-        """
+        '''
         # Caption images if available
         if len(self.image_captions.keys()):
             prompt = replace_prompt_captions(
@@ -774,21 +723,20 @@ def format_request_data(self, prompt: str, **request_params):
         request_data = {
             "input_data": {
                 "input_string": [prompt],
-                "parameters": {"temperature": self.temperature, "max_gen_len": self.max_tokens},
+                "parameters": {"temperature": self.temperature, "max_gen_len": self.max_tokens}
             }
         }
 
         request_data.update(request_params)
         return request_data
 
-    # pylint: disable=arguments-differ
     def _parse_response(self, response_data: dict, request_data: dict) -> dict:  # type: ignore[override]
-        prompt = request_data["input_data"]["input_string"][0]
+        prompt = request_data['input_data']['input_string'][0]
 
         # remove prompt text from each response as llama model returns prompt + completion instead of only completion
         # remove any text after the stop tokens, since llama doesn't support stop token
-        for idx, _ in enumerate(response_data["samples"]):
-            response_data["samples"][idx] = response_data["samples"][idx].replace(prompt, "").strip()
+        for idx, response in enumerate(response_data["samples"]):
+            response_data["samples"][idx] = response_data["samples"][idx].replace(prompt, '').strip()
             for stop_token in self.stop:
                 if stop_token in response_data["samples"][idx]:
                     response_data["samples"][idx] = response_data["samples"][idx].split(stop_token)[0].strip()
@@ -798,7 +746,7 @@ def _parse_response(self, response_data: dict, request_data: dict) -> dict:  # t
         for choice in response_data:
             if "0" in choice:
                 samples.append(choice["0"])
-                finish_reason.append("Stop")
+                finish_reason.append('Stop')
 
         return {
             "samples": samples,
@@ -810,75 +758,68 @@ def _parse_response(self, response_data: dict, request_data: dict) -> dict:  # t
 # ============== LLAMA ChatCompletionsModel =================
 # ===========================================================
 class LLAMAChatCompletionsModel(LLAMACompletionsModel):
-    """
+    '''
     LLaMa ChatCompletionsModel is a wrapper around LLaMaCompletionsModel that
     formats the prompt for chat completion.
-    This chat completion model should be only used as assistant,
-    and shouldn't be used to simulate user. It is not possible
-    to pass a system prompt do describe how the model would behave,
-    So we only use the model as assistant to reply for questions made by GPT simulated users.
-    """
-    # pylint: disable=keyword-arg-before-vararg
-    def __init__(self, name="LLAMAChatCompletionsModel", *args, **kwargs):
+    This chat completion model should be only used as assistant, and shouldn't be used to simulate user. It is not possible
+     to pass a system prompt do describe how the model would behave, So we only use the model as assistant to reply for questions
+     made by GPT simulated users.
+    '''
+
+    def __init__(self, name='LLAMAChatCompletionsModel', *args, **kwargs):
         super().__init__(name=name, *args, **kwargs)
         # set authentication header to Bearer, as llama apis always uses the bearer auth_header
         self.token_manager.auth_header = "Bearer"
 
-    def format_request_data(self, prompt: List[dict], **request_params):  # type: ignore[override]
+    def format_request_data(self, messages: List[dict], **request_params):  # type: ignore[override]
         # Caption images if available
         if len(self.image_captions.keys()):
-            for message in prompt:
-                message["content"] = replace_prompt_captions(
-                    message["content"],
+            for message in messages:
+                message['content'] = replace_prompt_captions(
+                    message['content'],
                     captions=self.image_captions,
                 )
 
-        # For LLaMa we don't pass the prompt (user persona) as a system message
-        # since LLama doesn't support system message
-        # LLama only supports user, and assistant messages.
-        # The messages sequence has to start with User message/ It can't have two user or
+        # For LLaMa we don't pass the prompt (user persona) as a system message since LLama doesn't support system message
+        # LLama only supports user, and assistant messages. The messages sequence has to start with User message/ It can't have two user or
         # two assistant consecutive messages.
-        # so if we set the system meta prompt as a user message,
-        # and if we have the first two messages made by user then we
+        # so if we set the system meta prompt as a user message, and if we have the first two messages made by user then we
         # combine the two messages in one message.
-        for _, x in enumerate(prompt):
-            if x["role"] == "system":
-                x["role"] = "user"
-        if len(prompt) > 1 and prompt[0]["role"] == "user" and prompt[1]["role"] == "user":
-            prompt[0] = {"role": "user", "content": prompt[0]["content"] + "\n" + prompt[1]["content"]}
-            del prompt[1]
+        for idx, x in enumerate(messages):
+            if x['role'] == 'system':
+                x['role'] = 'user'
+        if len(messages) > 1 and messages[0]['role'] == 'user' and messages[1]['role'] == 'user':
+            messages[0] = {'role': 'user', 'content': messages[0]['content'] + '\n' + messages[1]['content']}
+            del messages[1]
 
         # request_data = {"messages": messages, **self.get_model_params()}
         request_data = {
-            "input_data": {
-                "input_string": prompt,
-                "parameters": {"temperature": self.temperature, "max_new_tokens": self.max_tokens},
-            },
+            "input_data":
+                {
+                    "input_string": messages,
+                    "parameters": {"temperature": self.temperature, "max_new_tokens": self.max_tokens}
+                },
         }
         request_data.update(request_params)
         return request_data
 
     async def get_conversation_completion(
-        self,
-        messages: List[dict],
-        session: RetryClient,
-        role: str = "assistant",
-        **request_params,
+            self,
+            messages: List[dict],
+            session: RetryClient,
+            role: str = "assistant",
+            **request_params,
     ) -> dict:
-        """
+        '''
         Query the model a single time with a message.
 
-        :param messages: List of messages to query the model with.
-                         Expected format: [{"role": "user", "content": "Hello!"}, ...]
-        :type messages: List[dict]
-        :param session: aiohttp RetryClient object to query the model with.
-        :type session: RetryClient
-        :param role: Not used for this model, since it is a chat model.
-        :type role: str
-        :keyword request_params: Additional parameters to pass to the model.
-        :return: Dictionary containing the response from the model.
-        :rtype: dict
-        """
+        Parameters
+        ----------
+        messages: List of messages to query the model with. Expected format: [{"role": "user", "content": "Hello!"}, ...]
+        session: aiohttp RetryClient object to query the model with.
+        role: Not used for this model, since it is a chat model.
+        request_params: Additional parameters to pass to the model.
+        '''
 
         request_data = self.format_request_data(
             messages=messages,
@@ -889,18 +830,17 @@ async def get_conversation_completion(
             request_data=request_data,
         )
 
-    # pylint: disable=arguments-differ
     def _parse_response(self, response_data: dict) -> dict:  # type: ignore[override]
         # https://platform.openai.com/docs/api-reference/chat
         samples = []
         finish_reason = []
         # for choice in response_data:
-        if "output" in response_data:
-            samples.append(response_data["output"])
-            finish_reason.append("Stop")
+        if 'output' in response_data:
+            samples.append(response_data['output'])
+            finish_reason.append('Stop')
 
         return {
             "samples": samples,
             "finish_reason": finish_reason,
             # "id": response_data["id"]
-        }
+        }
\ No newline at end of file

From 8cea9c33a47a35a9b8f74a68a019b052495f3945 Mon Sep 17 00:00:00 2001
From: Nagkumar Arkalgud <naarkalg@microsoft.com>
Date: Wed, 6 Mar 2024 18:34:21 -0800
Subject: [PATCH 02/11] quotes

---
 .../simulator/_model_tools/models.py          | 132 +++++++++---------
 1 file changed, 66 insertions(+), 66 deletions(-)

diff --git a/sdk/ai/azure-ai-generative/azure/ai/generative/synthetic/simulator/_model_tools/models.py b/sdk/ai/azure-ai-generative/azure/ai/generative/synthetic/simulator/_model_tools/models.py
index 931830aa7a80..404d76e387ed 100644
--- a/sdk/ai/azure-ai-generative/azure/ai/generative/synthetic/simulator/_model_tools/models.py
+++ b/sdk/ai/azure-ai-generative/azure/ai/generative/synthetic/simulator/_model_tools/models.py
@@ -25,7 +25,7 @@
 
 
 def get_model_class_from_url(endpoint_url: str):
-    '''Convert an endpoint URL to the appropriate model class.'''
+    """Convert an endpoint URL to the appropriate model class."""
     endpoint_path = urlparse(endpoint_url).path  # remove query params
 
     if endpoint_path.endswith("chat/completions"):
@@ -83,9 +83,9 @@ async def on_request_end(self, session, trace_config_ctx, params):
 # ===========================================================
 
 class LLMBase(ABC):
-    '''
+    """
     Base class for all LLM models.
-    '''
+    """
 
     def __init__(self, endpoint_url: str, name: str = "unknown", additional_headers: Optional[dict] = {}):
         self.endpoint_url = endpoint_url
@@ -113,7 +113,7 @@ async def get_completion(
         session: RetryClient,
         **request_params,
     ) -> dict:
-        '''
+        """
         Query the model a single time with a prompt.
 
         Parameters
@@ -121,7 +121,7 @@ async def get_completion(
         prompt: Prompt str to query model with.
         session: aiohttp RetryClient object to use for the request.
         **request_params: Additional parameters to pass to the request.
-        '''
+        """
         request_data = self.format_request_data(prompt, **request_params)
         return await self.request_api(
             session=session,
@@ -211,9 +211,9 @@ def __repr__(self):
 # ===========================================================
 
 class OpenAICompletionsModel(LLMBase):
-    '''
+    """
     Object for calling a Completions-style API for OpenAI models.
-    '''
+    """
     prompt_idx_key = "__prompt_idx__"
 
     max_stop_tokens = 4
@@ -230,7 +230,7 @@ class OpenAICompletionsModel(LLMBase):
     def __init__(
         self, *,
         endpoint_url: str,
-        name: str = 'OpenAICompletionsModel',
+        name: str = "OpenAICompletionsModel",
         additional_headers: Optional[dict] = {},
         api_version: Optional[str] = "2023-03-15-preview",
         token_manager: APITokenManager,
@@ -263,7 +263,7 @@ def __init__(
         if not stop:
             stop = []
         # Else if stop sequence is given as a string (Ex: "["\n", "<im_end>"]"), convert
-        elif type(stop) is str and stop.startswith('[') and stop.endswith(']'):
+        elif type(stop) is str and stop.startswith("[") and stop.endswith("]"):
             stop = eval(stop)
         elif type(stop) is str:
             stop = [stop]
@@ -287,9 +287,9 @@ def get_model_params(self):
 
 
     def format_request_data(self, prompt: str, **request_params) -> Dict[str, str]:
-        '''
+        """
         Format the request data for the OpenAI API.
-        '''
+        """
         # Caption images if available
         if len(self.image_captions.keys()):
             prompt = replace_prompt_captions(
@@ -309,7 +309,7 @@ async def get_conversation_completion(
         role: str = "assistant",
         **request_params,
     ) -> dict:
-        '''
+        """
         Query the model a single time with a message.
 
         Parameters
@@ -318,10 +318,10 @@ async def get_conversation_completion(
         session: aiohttp RetryClient object to query the model with.
         role: Role of the user sending the message.
         request_params: Additional parameters to pass to the model.
-        '''
+        """
         prompt = []
         for message in messages:
-            prompt.append(f"{self.CHAT_START_TOKEN}{message['role']}\n{message['content']}\n{self.CHAT_END_TOKEN}\n")
+            prompt.append(f"{self.CHAT_START_TOKEN}{message["role"]}\n{message["content"]}\n{self.CHAT_END_TOKEN}\n")
         prompt_string: str = "".join(prompt)
         prompt_string += f"{self.CHAT_START_TOKEN}{role}\n"
 
@@ -341,7 +341,7 @@ async def get_all_completions(  # type: ignore[override]
         request_error_rate_threshold: float = 0.5,
         **request_params,
     ) -> List[dict]:
-        '''
+        """
         Run a batch of prompts through the model and return the results in the order given.
 
         Parameters
@@ -352,7 +352,7 @@ async def get_all_completions(  # type: ignore[override]
         api_call_delay_seconds: Number of seconds to wait between API requests.
         request_error_rate_threshold: Maximum error rate allowed before raising an error.
         request_params: Additional parameters to pass to the API.
-        '''
+        """
         if api_call_max_parallel_count > 1:
             self.logger.info(f"Using {api_call_max_parallel_count} parallel workers to query the API..")
 
@@ -406,7 +406,7 @@ async def request_api_parallel(
         """
         logger_tasks: List = []  # to await for logging to finish
 
-        while True:  # process data from queue until it's empty
+        while True:  # process data from queue until it"s empty
             try:
                 request_data = request_datas.pop()
                 prompt_idx = request_data.pop(self.prompt_idx_key)
@@ -416,7 +416,7 @@ async def request_api_parallel(
                         session=session,
                         request_data=request_data,
                     )
-                    await self._add_successful_response(response['time_taken'])
+                    await self._add_successful_response(response["time_taken"])
                 except Exception as e:
                     response = {
                         "request": request_data,
@@ -469,7 +469,7 @@ async def request_api(
         headers = {
             "Content-Type": "application/json",
             "X-CV": f"{uuid.uuid4()}",
-            "X-ModelType": self.model or '',
+            "X-ModelType": self.model or "",
         }
 
         if self.token_manager.auth_header == "Bearer":
@@ -539,12 +539,12 @@ def _parse_response(self, response_data: dict, request_data: Optional[dict] = No
 # ===========================================================
 
 class OpenAIChatCompletionsModel(OpenAICompletionsModel):
-    '''
+    """
     OpenAIChatCompletionsModel is a wrapper around OpenAICompletionsModel that
     formats the prompt for chat completion.
-    '''
+    """
 
-    def __init__(self, name='OpenAIChatCompletionsModel', *args, **kwargs):
+    def __init__(self, name="OpenAIChatCompletionsModel", *args, **kwargs):
         super().__init__(name=name, *args, **kwargs)
 
 
@@ -552,8 +552,8 @@ def format_request_data(self, messages: List[dict], **request_params):  # type:
         # Caption images if available
         if len(self.image_captions.keys()):
             for message in messages:
-                message['content'] = replace_prompt_captions(
-                    message['content'],
+                message["content"] = replace_prompt_captions(
+                    message["content"],
                     captions=self.image_captions,
                 )
 
@@ -569,7 +569,7 @@ async def get_conversation_completion(
         role: str = "assistant",
         **request_params,
     ) -> dict:
-        '''
+        """
         Query the model a single time with a message.
 
         Parameters
@@ -578,7 +578,7 @@ async def get_conversation_completion(
         session: aiohttp RetryClient object to query the model with.
         role: Not used for this model, since it is a chat model.
         request_params: Additional parameters to pass to the model.
-        '''
+        """
         request_data = self.format_request_data(
             messages=messages,
             **request_params,
@@ -595,7 +595,7 @@ async def get_completion(
         session: RetryClient,
         **request_params,
     ) -> dict:
-        '''
+        """
         Query a ChatCompletions model with a single prompt.  Note: entire message will be inserted into a "system" call.
 
         Parameters
@@ -603,7 +603,7 @@ async def get_completion(
         prompt: Prompt str to query model with.
         session: aiohttp RetryClient object to use for the request.
         **request_params: Additional parameters to pass to the request.
-        '''
+        """
         messages = [{"role": "system", "content": prompt}]
 
         request_data = self.format_request_data(
@@ -643,10 +643,10 @@ def _parse_response(self, response_data: dict, request_data: Optional[dict] = No
         finish_reason = []
 
         for choice in response_data["choices"]:
-            if 'message' in choice and 'content' in choice['message']:
-                samples.append(choice['message']['content'])
-            if 'message' in choice and 'finish_reason' in choice['message']:
-                finish_reason.append(choice['message']['finish_reason'])
+            if "message" in choice and "content" in choice["message"]:
+                samples.append(choice["message"]["content"])
+            if "message" in choice and "finish_reason" in choice["message"]:
+                finish_reason.append(choice["message"]["finish_reason"])
 
         return {
             "samples": samples,
@@ -659,13 +659,13 @@ def _parse_response(self, response_data: dict, request_data: Optional[dict] = No
 # ===========================================================
 
 class OpenAIMultiModalCompletionsModel(OpenAICompletionsModel):
-    '''
+    """
     Wrapper around OpenAICompletionsModel that formats the prompt for multimodal
     completions containing images.
-    '''
+    """
     model_param_names = ["temperature", "max_tokens", "top_p", "n", "stop"]
 
-    def __init__(self, name='OpenAIMultiModalCompletionsModel', images_dir: Optional[str] = None, *args, **kwargs):
+    def __init__(self, name="OpenAIMultiModalCompletionsModel", images_dir: Optional[str] = None, *args, **kwargs):
         self.images_dir = images_dir
 
         super().__init__(name=name, *args, **kwargs)
@@ -683,13 +683,13 @@ def format_request_data(self, prompt: str, **request_params) -> dict:
 
 
     def _log_request(self, request: dict) -> None:
-        '''Log prompt, ignoring image data if multimodal.'''
+        """Log prompt, ignoring image data if multimodal."""
         loggable_prompt_transcript = {
-            'transcript': [
-                (c if c['type'] != 'image' else {'type': 'image', 'data': '...'})
-                for c in request['transcript']
+            "transcript": [
+                (c if c["type"] != "image" else {"type": "image", "data": "..."})
+                for c in request["transcript"]
             ],
-            **{k: v for k, v in request.items() if k != 'transcript'}
+            **{k: v for k, v in request.items() if k != "transcript"}
         }
         super()._log_request(loggable_prompt_transcript)
 
@@ -699,20 +699,20 @@ def _log_request(self, request: dict) -> None:
 # ===========================================================
 
 class LLAMACompletionsModel(OpenAICompletionsModel):
-    '''
+    """
     Object for calling a Completions-style API for LLAMA models.
-    '''
+    """
 
     def __init__(
-            self, name: str = 'LLAMACompletionsModel', *args, **kwargs):
+            self, name: str = "LLAMACompletionsModel", *args, **kwargs):
         super().__init__(name=name, *args, **kwargs)
         # set authentication header to Bearer, as llama apis always uses the bearer auth_header
         self.token_manager.auth_header = "Bearer"
 
     def format_request_data(self, prompt: str, **request_params):
-        '''
+        """
         Format the request data for the OpenAI API.
-        '''
+        """
         # Caption images if available
         if len(self.image_captions.keys()):
             prompt = replace_prompt_captions(
@@ -731,12 +731,12 @@ def format_request_data(self, prompt: str, **request_params):
         return request_data
 
     def _parse_response(self, response_data: dict, request_data: dict) -> dict:  # type: ignore[override]
-        prompt = request_data['input_data']['input_string'][0]
+        prompt = request_data["input_data"]["input_string"][0]
 
         # remove prompt text from each response as llama model returns prompt + completion instead of only completion
-        # remove any text after the stop tokens, since llama doesn't support stop token
+        # remove any text after the stop tokens, since llama doesn"t support stop token
         for idx, response in enumerate(response_data["samples"]):
-            response_data["samples"][idx] = response_data["samples"][idx].replace(prompt, '').strip()
+            response_data["samples"][idx] = response_data["samples"][idx].replace(prompt, "").strip()
             for stop_token in self.stop:
                 if stop_token in response_data["samples"][idx]:
                     response_data["samples"][idx] = response_data["samples"][idx].split(stop_token)[0].strip()
@@ -746,7 +746,7 @@ def _parse_response(self, response_data: dict, request_data: dict) -> dict:  # t
         for choice in response_data:
             if "0" in choice:
                 samples.append(choice["0"])
-                finish_reason.append('Stop')
+                finish_reason.append("Stop")
 
         return {
             "samples": samples,
@@ -758,15 +758,15 @@ def _parse_response(self, response_data: dict, request_data: dict) -> dict:  # t
 # ============== LLAMA ChatCompletionsModel =================
 # ===========================================================
 class LLAMAChatCompletionsModel(LLAMACompletionsModel):
-    '''
+    """
     LLaMa ChatCompletionsModel is a wrapper around LLaMaCompletionsModel that
     formats the prompt for chat completion.
-    This chat completion model should be only used as assistant, and shouldn't be used to simulate user. It is not possible
+    This chat completion model should be only used as assistant, and shouldn"t be used to simulate user. It is not possible
      to pass a system prompt do describe how the model would behave, So we only use the model as assistant to reply for questions
      made by GPT simulated users.
-    '''
+    """
 
-    def __init__(self, name='LLAMAChatCompletionsModel', *args, **kwargs):
+    def __init__(self, name="LLAMAChatCompletionsModel", *args, **kwargs):
         super().__init__(name=name, *args, **kwargs)
         # set authentication header to Bearer, as llama apis always uses the bearer auth_header
         self.token_manager.auth_header = "Bearer"
@@ -775,21 +775,21 @@ def format_request_data(self, messages: List[dict], **request_params):  # type:
         # Caption images if available
         if len(self.image_captions.keys()):
             for message in messages:
-                message['content'] = replace_prompt_captions(
-                    message['content'],
+                message["content"] = replace_prompt_captions(
+                    message["content"],
                     captions=self.image_captions,
                 )
 
-        # For LLaMa we don't pass the prompt (user persona) as a system message since LLama doesn't support system message
-        # LLama only supports user, and assistant messages. The messages sequence has to start with User message/ It can't have two user or
+        # For LLaMa we don"t pass the prompt (user persona) as a system message since LLama doesn"t support system message
+        # LLama only supports user, and assistant messages. The messages sequence has to start with User message/ It can"t have two user or
         # two assistant consecutive messages.
         # so if we set the system meta prompt as a user message, and if we have the first two messages made by user then we
         # combine the two messages in one message.
         for idx, x in enumerate(messages):
-            if x['role'] == 'system':
-                x['role'] = 'user'
-        if len(messages) > 1 and messages[0]['role'] == 'user' and messages[1]['role'] == 'user':
-            messages[0] = {'role': 'user', 'content': messages[0]['content'] + '\n' + messages[1]['content']}
+            if x["role"] == "system":
+                x["role"] = "user"
+        if len(messages) > 1 and messages[0]["role"] == "user" and messages[1]["role"] == "user":
+            messages[0] = {"role": "user", "content": messages[0]["content"] + "\n" + messages[1]["content"]}
             del messages[1]
 
         # request_data = {"messages": messages, **self.get_model_params()}
@@ -810,7 +810,7 @@ async def get_conversation_completion(
             role: str = "assistant",
             **request_params,
     ) -> dict:
-        '''
+        """
         Query the model a single time with a message.
 
         Parameters
@@ -819,7 +819,7 @@ async def get_conversation_completion(
         session: aiohttp RetryClient object to query the model with.
         role: Not used for this model, since it is a chat model.
         request_params: Additional parameters to pass to the model.
-        '''
+        """
 
         request_data = self.format_request_data(
             messages=messages,
@@ -835,9 +835,9 @@ def _parse_response(self, response_data: dict) -> dict:  # type: ignore[override
         samples = []
         finish_reason = []
         # for choice in response_data:
-        if 'output' in response_data:
-            samples.append(response_data['output'])
-            finish_reason.append('Stop')
+        if "output" in response_data:
+            samples.append(response_data["output"])
+            finish_reason.append("Stop")
 
         return {
             "samples": samples,

From bea237e466286888f05d0ec685eaa2082fd88332 Mon Sep 17 00:00:00 2001
From: Nagkumar Arkalgud <naarkalg@microsoft.com>
Date: Thu, 7 Mar 2024 07:45:10 -0800
Subject: [PATCH 03/11] Spellcheck fixes

---
 .../synthetic/simulator/_model_tools/models.py           | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/sdk/ai/azure-ai-generative/azure/ai/generative/synthetic/simulator/_model_tools/models.py b/sdk/ai/azure-ai-generative/azure/ai/generative/synthetic/simulator/_model_tools/models.py
index 404d76e387ed..ead2dc644ee1 100644
--- a/sdk/ai/azure-ai-generative/azure/ai/generative/synthetic/simulator/_model_tools/models.py
+++ b/sdk/ai/azure-ai-generative/azure/ai/generative/synthetic/simulator/_model_tools/models.py
@@ -734,7 +734,7 @@ def _parse_response(self, response_data: dict, request_data: dict) -> dict:  # t
         prompt = request_data["input_data"]["input_string"][0]
 
         # remove prompt text from each response as llama model returns prompt + completion instead of only completion
-        # remove any text after the stop tokens, since llama doesn"t support stop token
+        # remove any text after the stop tokens, since llama does not support stop token
         for idx, response in enumerate(response_data["samples"]):
             response_data["samples"][idx] = response_data["samples"][idx].replace(prompt, "").strip()
             for stop_token in self.stop:
@@ -761,7 +761,7 @@ class LLAMAChatCompletionsModel(LLAMACompletionsModel):
     """
     LLaMa ChatCompletionsModel is a wrapper around LLaMaCompletionsModel that
     formats the prompt for chat completion.
-    This chat completion model should be only used as assistant, and shouldn"t be used to simulate user. It is not possible
+    This chat completion model should be only used as assistant, and should not be used to simulate user. It is not possible
      to pass a system prompt do describe how the model would behave, So we only use the model as assistant to reply for questions
      made by GPT simulated users.
     """
@@ -780,8 +780,9 @@ def format_request_data(self, messages: List[dict], **request_params):  # type:
                     captions=self.image_captions,
                 )
 
-        # For LLaMa we don"t pass the prompt (user persona) as a system message since LLama doesn"t support system message
-        # LLama only supports user, and assistant messages. The messages sequence has to start with User message/ It can"t have two user or
+        # For LLaMa we do not pass the prompt (user persona) as a system message since LLama does not support system message
+        # LLama only supports user, and assistant messages. 
+        # The messages sequence has to start with User message/ It can not have two user or
         # two assistant consecutive messages.
         # so if we set the system meta prompt as a user message, and if we have the first two messages made by user then we
         # combine the two messages in one message.

From 45073cc695f56f1f7aa766445e2f07c619e4081a Mon Sep 17 00:00:00 2001
From: Nagkumar Arkalgud <naarkalg@microsoft.com>
Date: Thu, 7 Mar 2024 08:13:43 -0800
Subject: [PATCH 04/11] ignore the models for doc generation

---
 doc/sphinx/individual_build_conf.py                             | 2 +-
 .../ai/generative/synthetic/simulator/simulator/simulator.py    | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/doc/sphinx/individual_build_conf.py b/doc/sphinx/individual_build_conf.py
index 83950ae4cb25..a289c668e3ed 100644
--- a/doc/sphinx/individual_build_conf.py
+++ b/doc/sphinx/individual_build_conf.py
@@ -107,7 +107,7 @@
 
 # List of patterns, relative to source directory, that match files and
 # directories to ignore when looking for source files.
-exclude_patterns = ['_build']
+exclude_patterns = ['_build', '*/synthetic/simulator/_model_tools/models.py']
 
 # The reST default role (used for this markup: `text`) to use for all
 # documents.
diff --git a/sdk/ai/azure-ai-generative/azure/ai/generative/synthetic/simulator/simulator/simulator.py b/sdk/ai/azure-ai-generative/azure/ai/generative/synthetic/simulator/simulator/simulator.py
index 5e668943c1b5..f6a9162c30aa 100644
--- a/sdk/ai/azure-ai-generative/azure/ai/generative/synthetic/simulator/simulator/simulator.py
+++ b/sdk/ai/azure-ai-generative/azure/ai/generative/synthetic/simulator/simulator/simulator.py
@@ -1,7 +1,7 @@
 # ---------------------------------------------------------
 # Copyright (c) Microsoft Corporation. All rights reserved.
 # ---------------------------------------------------------
-
+# pylint: disable=E0401
 # needed for 'list' type annotations on 3.8
 from __future__ import annotations
 

From 08af5b31b34ebd75e71a65902a3e28588f982ef6 Mon Sep 17 00:00:00 2001
From: Nagkumar Arkalgud <naarkalg@microsoft.com>
Date: Thu, 7 Mar 2024 08:32:24 -0800
Subject: [PATCH 05/11] Fixed the quotes on f strings

---
 .../ai/generative/synthetic/simulator/_model_tools/models.py    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sdk/ai/azure-ai-generative/azure/ai/generative/synthetic/simulator/_model_tools/models.py b/sdk/ai/azure-ai-generative/azure/ai/generative/synthetic/simulator/_model_tools/models.py
index ead2dc644ee1..246eccb89f35 100644
--- a/sdk/ai/azure-ai-generative/azure/ai/generative/synthetic/simulator/_model_tools/models.py
+++ b/sdk/ai/azure-ai-generative/azure/ai/generative/synthetic/simulator/_model_tools/models.py
@@ -321,7 +321,7 @@ async def get_conversation_completion(
         """
         prompt = []
         for message in messages:
-            prompt.append(f"{self.CHAT_START_TOKEN}{message["role"]}\n{message["content"]}\n{self.CHAT_END_TOKEN}\n")
+            prompt.append(f"{self.CHAT_START_TOKEN}{message['role']}\n{message['content']}\n{self.CHAT_END_TOKEN}\n")
         prompt_string: str = "".join(prompt)
         prompt_string += f"{self.CHAT_START_TOKEN}{role}\n"
 

From 7584cc9eea23c271d01f9a572e07502fb75fa90f Mon Sep 17 00:00:00 2001
From: Nagkumar Arkalgud <naarkalg@microsoft.com>
Date: Thu, 7 Mar 2024 09:27:33 -0800
Subject: [PATCH 06/11] pylint skip file

---
 .../synthetic/simulator/simulator/_proxy_completion_model.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/sdk/ai/azure-ai-generative/azure/ai/generative/synthetic/simulator/simulator/_proxy_completion_model.py b/sdk/ai/azure-ai-generative/azure/ai/generative/synthetic/simulator/simulator/_proxy_completion_model.py
index c33b2ba22c7f..0000c858380d 100644
--- a/sdk/ai/azure-ai-generative/azure/ai/generative/synthetic/simulator/simulator/_proxy_completion_model.py
+++ b/sdk/ai/azure-ai-generative/azure/ai/generative/synthetic/simulator/simulator/_proxy_completion_model.py
@@ -1,6 +1,7 @@
 # ---------------------------------------------------------
 # Copyright (c) Microsoft Corporation. All rights reserved.
 # ---------------------------------------------------------
+#pylint: skip-file
 from typing import List
 import uuid
 import time
@@ -30,8 +31,8 @@ def __init__(self, name, template_key, template_parameters, *args, **kwargs):
 
         super().__init__(name=name, *args, **kwargs)
 
-    def format_request_data(self, prompt: List[dict], **request_params):  # type: ignore[override]
-        request_data = {"messages": prompt, **self.get_model_params()}
+    def format_request_data(self, messages: List[dict], **request_params): # type: ignore[override]
+        request_data = {"messages": messages, **self.get_model_params()}
         request_data.update(request_params)
         return request_data
 

From d727177f72d2e5b2091204397ba8ae490bdedc1f Mon Sep 17 00:00:00 2001
From: Nagkumar Arkalgud <naarkalg@microsoft.com>
Date: Mon, 11 Mar 2024 11:34:59 -0700
Subject: [PATCH 07/11] Support for summarization

---
 .../simulator/_callback_conversation_bot.py   | 22 +++++++++++++++++--
 1 file changed, 20 insertions(+), 2 deletions(-)

diff --git a/sdk/ai/azure-ai-generative/azure/ai/generative/synthetic/simulator/simulator/_callback_conversation_bot.py b/sdk/ai/azure-ai-generative/azure/ai/generative/synthetic/simulator/simulator/_callback_conversation_bot.py
index 007201fe9baf..8cdf2f45ec8c 100644
--- a/sdk/ai/azure-ai-generative/azure/ai/generative/synthetic/simulator/simulator/_callback_conversation_bot.py
+++ b/sdk/ai/azure-ai-generative/azure/ai/generative/synthetic/simulator/simulator/_callback_conversation_bot.py
@@ -1,6 +1,7 @@
 # ---------------------------------------------------------
 # Copyright (c) Microsoft Corporation. All rights reserved.
 # ---------------------------------------------------------
+#pylint: skip-file
 import copy
 from typing import List, Tuple
 
@@ -29,8 +30,22 @@ async def generate_response(
             self.user_template, conversation_history, self.user_template_parameters
         )
         msg_copy = copy.deepcopy(chat_protocol_message)
-        result = await self.callback(msg_copy)
-
+        result = {}
+        try:
+            result = await self.callback(msg_copy)
+        except Exception as exc:
+            if "status_code" in dir(exc) and 400 <= exc.status_code < 500 and "response was filtered" in exc.message:
+                result = {
+                    "messages": [{
+                        "content": ("Error: The response was filtered due to the prompt "
+                                    "triggering Azure OpenAI's content management policy. "
+                                    "Please modify your prompt and retry."), 
+                        "role": "assistant"
+                    }],
+                    "finish_reason": ["stop"],
+                    "id": None,
+                    "template_parameters": {}
+                }
         self.logger.info("Using user provided callback returning response.")
 
         time_taken = 0
@@ -54,6 +69,9 @@ def _to_chat_protocol(self, template, conversation_history, template_parameters)
         for _, m in enumerate(conversation_history):
             messages.append({"content": m.message, "role": m.role.value})
 
+        if template_parameters.get("file_content", None) and any('File contents:' not in message['content'] for message in messages):
+            messages.append({"content": f"File contents: {template_parameters['file_content']}", "role": "user"})
+
         return {
             "template_parameters": template_parameters,
             "messages": messages,

From 8b895ee8f3637e05325391ee85798a29212ddc9a Mon Sep 17 00:00:00 2001
From: Nagkumar Arkalgud <naarkalg@microsoft.com>
Date: Mon, 11 Mar 2024 15:08:49 -0700
Subject: [PATCH 08/11] Adding a limit of 2 conversation turns for all but
 conversation simulators

---
 .../generative/synthetic/simulator/simulator/simulator.py   | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/sdk/ai/azure-ai-generative/azure/ai/generative/synthetic/simulator/simulator/simulator.py b/sdk/ai/azure-ai-generative/azure/ai/generative/synthetic/simulator/simulator/simulator.py
index f6a9162c30aa..2554ad75e489 100644
--- a/sdk/ai/azure-ai-generative/azure/ai/generative/synthetic/simulator/simulator/simulator.py
+++ b/sdk/ai/azure-ai-generative/azure/ai/generative/synthetic/simulator/simulator/simulator.py
@@ -194,7 +194,7 @@ def _join_conversation_starter(self, parameters, to_join):
     async def simulate_async(
         self,
         template: "Template",
-        max_conversation_turns: int,
+        max_conversation_turns: int = 2,
         parameters: Optional[List[dict]] = None,
         jailbreak: bool = False,
         api_call_retry_limit: int = 3,
@@ -208,6 +208,7 @@ async def simulate_async(
         :keyword template: An instance of the Template class defining the conversation structure.
         :paramtype template: Template
         :keyword max_conversation_turns: The maximum number of conversation turns to simulate.
+            Defaults to 2, change only applies to chat templates.
         :paramtype max_conversation_turns: int
         :keyword parameters: A list of dictionaries containing the parameter values to be used in the simulations.
             Defaults to an empty list.
@@ -239,7 +240,8 @@ async def simulate_async(
 
         if not isinstance(parameters, list):
             raise ValueError(f"Expect parameters to be a list of dictionary, but found {type(parameters)}")
-
+        if "conversation" not in template.template_name:
+            max_conversation_turns = 2
         if template.content_harm:
             self._ensure_service_dependencies()
             self.adversarial = True

From 92d6d8ec26ca9a702a3e9acb5c902478d9b80537 Mon Sep 17 00:00:00 2001
From: Nagkumar Arkalgud <naarkalg@microsoft.com>
Date: Mon, 11 Mar 2024 15:10:34 -0700
Subject: [PATCH 09/11] exclude synthetic from mypy

---
 sdk/ai/azure-ai-generative/pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sdk/ai/azure-ai-generative/pyproject.toml b/sdk/ai/azure-ai-generative/pyproject.toml
index 9e37165cc0f0..c12b3e784e21 100644
--- a/sdk/ai/azure-ai-generative/pyproject.toml
+++ b/sdk/ai/azure-ai-generative/pyproject.toml
@@ -13,7 +13,7 @@ strict_sphinx = true
 
 [tool.mypy] 
 python_version = "3.10"
-exclude = ["azure/ai/generative/index/_langchain/vendor", "tests", "setup.py", "samples", "azure/ai/generative/evaluate/pf_templates/built_in_metrics"]   
+exclude = ["azure/ai/generative/index/_langchain/vendor", "tests", "setup.py", "samples", "azure/ai/generative/evaluate/pf_templates/built_in_metrics", "azure/ai/generative/synthetic"]   
 warn_unused_configs = true 
 follow_imports = "skip"
 ignore_missing_imports = true

From 4742b0418dfe61f741a0e6f63e1b7c9b7c25e83d Mon Sep 17 00:00:00 2001
From: Nagkumar Arkalgud <naarkalg@microsoft.com>
Date: Mon, 11 Mar 2024 15:59:55 -0700
Subject: [PATCH 10/11] Another lint fix

---
 .../azure/ai/generative/index/_utils/logging.py                  | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sdk/ai/azure-ai-generative/azure/ai/generative/index/_utils/logging.py b/sdk/ai/azure-ai-generative/azure/ai/generative/index/_utils/logging.py
index 7f6a47408fea..b6c56c305b7d 100644
--- a/sdk/ai/azure-ai-generative/azure/ai/generative/index/_utils/logging.py
+++ b/sdk/ai/azure-ai-generative/azure/ai/generative/index/_utils/logging.py
@@ -1,6 +1,7 @@
 # ---------------------------------------------------------
 # Copyright (c) Microsoft Corporation. All rights reserved.
 # ---------------------------------------------------------
+# pylint: disable=W0125
 """Logging utilities."""
 import inspect
 import logging

From 975b0b37a37d0da3c77bbf2ff704f5e550a6ca4f Mon Sep 17 00:00:00 2001
From: Nagkumar Arkalgud <naarkalg@microsoft.com>
Date: Tue, 12 Mar 2024 09:36:48 -0700
Subject: [PATCH 11/11] Skip the file causing linting issues

---
 .../azure/ai/generative/index/_embeddings/__init__.py            | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sdk/ai/azure-ai-generative/azure/ai/generative/index/_embeddings/__init__.py b/sdk/ai/azure-ai-generative/azure/ai/generative/index/_embeddings/__init__.py
index c2938372b55d..6aa5846c8063 100644
--- a/sdk/ai/azure-ai-generative/azure/ai/generative/index/_embeddings/__init__.py
+++ b/sdk/ai/azure-ai-generative/azure/ai/generative/index/_embeddings/__init__.py
@@ -2,6 +2,7 @@
 # ---------------------------------------------------------
 # Copyright (c) Microsoft Corporation. All rights reserved.
 # ---------------------------------------------------------
+# pylint: skip-file
 """Embeddings generation and management tools."""
 import contextlib
 import copy