groq · stainless-app · Feb 15, 2024 · Feb 15, 2024
@@ -2,10 +2,10 @@ name: CI
 on:
   push:
     branches:
-      - main
+      - stainless
   pull_request:
     branches:
-      - main
+      - stainless
 
 jobs:
   lint:

@@ -261,9 +261,9 @@ completion = response.parse()  # get the object that `chat.completions.create()`
 print(completion.id)
 ```
 
-These methods return an [`APIResponse`](https://github.com/groq/groq-python/tree/main/src/groq/_response.py) object.
+These methods return an [`APIResponse`](https://github.com/groq/groq-python/tree/stainless/src/groq/_response.py) object.
 
-The async client returns an [`AsyncAPIResponse`](https://github.com/groq/groq-python/tree/main/src/groq/_response.py) with the same structure, the only difference being `await`able methods for reading the response content.
+The async client returns an [`AsyncAPIResponse`](https://github.com/groq/groq-python/tree/stainless/src/groq/_response.py) with the same structure, the only difference being `await`able methods for reading the response content.
 
 #### `.with_streaming_response`
 

@@ -6,9 +6,9 @@ if [ -z "${PYPI_TOKEN}" ]; then
   errors+=("The GROQ_PYPI_TOKEN secret has not been set. Please set it in either this repository's secrets or your organization secrets.")
 fi
 
-len=${#errors[@]}
+lenErrors=${#errors[@]}
 
-if [[ len -gt 0 ]]; then
+if [[ lenErrors -gt 0 ]]; then
   echo -e "Found the following errors in the release environment:\n"
 
   for error in "${errors[@]}"; do

@@ -10,43 +10,33 @@
         # Set an optional system message. This sets the behavior of the
         # assistant and can be used to provide specific instructions for
         # how it should behave throughout the conversation.
-        {
-            "role": "system",
-            "content": "you are a helpful assistant."
-        },
+        {"role": "system", "content": "you are a helpful assistant."},
         # Set a user message for the assistant to respond to.
         {
             "role": "user",
             "content": "Explain the importance of low latency LLMs",
-        }
+        },
     ],
-
     # The language model which will generate the completion.
     model="mixtral-8x7b-32768",
-
     #
     # Optional parameters
     #
-
     # Controls randomness: lowering results in less random completions.
     # As the temperature approaches zero, the model will become deterministic
     # and repetitive.
     temperature=0.5,
-
     # The maximum number of tokens to generate. Requests can use up to
     # 2048 tokens shared between prompt and completion.
     max_tokens=1024,
-
     # Controls diversity via nucleus sampling: 0.5 means half of all
     # likelihood-weighted options are considered.
     top_p=1,
-
     # A stop sequence is a predefined or user-specified text string that
     # signals an AI to stop generating content, ensuring its responses
     # remain focused and concise. Examples include punctuation marks and
     # markers like "[end]".
     stop=None,
-
     # If set, partial message deltas will be sent.
     stream=False,
 )

@@ -2,6 +2,7 @@
 
 from groq import AsyncGroq
 
+
 async def main():
     client = AsyncGroq()
 
@@ -13,48 +14,39 @@ async def main():
             # Set an optional system message. This sets the behavior of the
             # assistant and can be used to provide specific instructions for
             # how it should behave throughout the conversation.
-            {
-                "role": "system",
-                "content": "you are a helpful assistant."
-            },
+            {"role": "system", "content": "you are a helpful assistant."},
             # Set a user message for the assistant to respond to.
             {
                 "role": "user",
                 "content": "Explain the importance of low latency LLMs",
-            }
+            },
         ],
-
         # The language model which will generate the completion.
         model="mixtral-8x7b-32768",
-
         #
         # Optional parameters
         #
-
         # Controls randomness: lowering results in less random completions.
         # As the temperature approaches zero, the model will become
         # deterministic and repetitive.
         temperature=0.5,
-
         # The maximum number of tokens to generate. Requests can use up to
         # 2048 tokens shared between prompt and completion.
         max_tokens=1024,
-
         # Controls diversity via nucleus sampling: 0.5 means half of all
         # likelihood-weighted options are considered.
         top_p=1,
-
         # A stop sequence is a predefined or user-specified text string that
         # signals an AI to stop generating content, ensuring its responses
         # remain focused and concise. Examples include punctuation marks and
         # markers like "[end]".
         stop=None,
-
         # If set, partial message deltas will be sent.
         stream=False,
     )
 
     # Print the completion returned by the LLM.
     print(chat_completion.choices[0].message.content)
 
+
 asyncio.run(main())
@@ -2,6 +2,7 @@
 
 from groq import AsyncGroq
 
+
 async def main():
     client = AsyncGroq()
 
@@ -13,39 +14,30 @@ async def main():
             # Set an optional system message. This sets the behavior of the
             # assistant and can be used to provide specific instructions for
             # how it should behave throughout the conversation.
-            {
-                "role": "system",
-                "content": "you are a helpful assistant."
-            },
+            {"role": "system", "content": "you are a helpful assistant."},
             # Set a user message for the assistant to respond to.
             {
                 "role": "user",
                 "content": "Explain the importance of low latency LLMs",
-            }
+            },
         ],
-
         # The language model which will generate the completion.
         model="mixtral-8x7b-32768",
-
         #
         # Optional parameters
         #
-
         # Controls randomness: lowering results in less random completions.
         # As the temperature approaches zero, the model will become
         # deterministic and repetitive.
         temperature=0.5,
-
         # The maximum number of tokens to generate. Requests can use up to
         # 2048 tokens shared between prompt and completion.
         max_tokens=1024,
-
         # A stop sequence is a predefined or user-specified text string that
         # signals an AI to stop generating content, ensuring its responses
         # remain focused and concise. Examples include punctuation marks and
         # markers like "[end]".
         stop=None,
-
         # Controls diversity via nucleus sampling: 0.5 means half of all
         # likelihood-weighted options are considered.
         stream=True,
@@ -55,4 +47,5 @@ async def main():
     async for chunk in stream:
         print(chunk.choices[0].delta.content, end="")
 
+
 asyncio.run(main())
@@ -10,37 +10,28 @@
         # Set an optional system message. This sets the behavior of the
         # assistant and can be used to provide specific instructions for
         # how it should behave throughout the conversation.
-        {
-            "role": "system",
-            "content": "you are a helpful assistant."
-        },
+        {"role": "system", "content": "you are a helpful assistant."},
         # Set a user message for the assistant to respond to.
         {
             "role": "user",
-            "content": "Count to 10.  Your response must begin with \"1, \".  example: 1, 2, 3, ...",
-        }
+            "content": 'Count to 10.  Your response must begin with "1, ".  example: 1, 2, 3, ...',
+        },
     ],
-
     # The language model which will generate the completion.
     model="mixtral-8x7b-32768",
-
     #
     # Optional parameters
     #
-
     # Controls randomness: lowering results in less random completions.
     # As the temperature approaches zero, the model will become deterministic
     # and repetitive.
     temperature=0.5,
-
     # The maximum number of tokens to generate. Requests can use up to
     # 2048 tokens shared between prompt and completion.
     max_tokens=1024,
-
     # Controls diversity via nucleus sampling: 0.5 means half of all
     # likelihood-weighted options are considered.
     top_p=1,
-
     # A stop sequence is a predefined or user-specified text string that
     # signals an AI to stop generating content, ensuring its responses
     # remain focused and concise. Examples include punctuation marks and
@@ -49,7 +40,6 @@
     # If multiple stop values are needed, an array of string may be passed,
     # stop=[", 6", ", six", ", Six"]
     stop=", 6",
-
     # If set, partial message deltas will be sent.
     stream=False,
 )

@@ -10,43 +10,33 @@
         # Set an optional system message. This sets the behavior of the
         # assistant and can be used to provide specific instructions for
         # how it should behave throughout the conversation.
-        {
-            "role": "system",
-            "content": "you are a helpful assistant."
-        },
+        {"role": "system", "content": "you are a helpful assistant."},
         # Set a user message for the assistant to respond to.
         {
             "role": "user",
             "content": "Explain the importance of low latency LLMs",
-        }
+        },
     ],
-
     # The language model which will generate the completion.
     model="mixtral-8x7b-32768",
-
     #
     # Optional parameters
     #
-
     # Controls randomness: lowering results in less random completions.
     # As the temperature approaches zero, the model will become deterministic
     # and repetitive.
     temperature=0.5,
-
     # The maximum number of tokens to generate. Requests can use up to
     # 2048 tokens shared between prompt and completion.
     max_tokens=1024,
-
     # Controls diversity via nucleus sampling: 0.5 means half of all
     # likelihood-weighted options are considered.
     top_p=1,
-
     # A stop sequence is a predefined or user-specified text string that
     # signals an AI to stop generating content, ensuring its responses
     # remain focused and concise. Examples include punctuation marks and
     # markers like "[end]".
     stop=None,
-
     # If set, partial message deltas will be sent.
     stream=True,
 )

@@ -53,8 +53,6 @@ def __stream__(self) -> Iterator[_T]:
         iterator = self._iter_events()
 
         for sse in iterator:
-            if sse.data.startswith("[DONE]"):
-                break
             yield process_data(data=sse.json(), cast_to=cast_to, response=response)
 
         # Ensure the entire stream is consumed
@@ -108,8 +106,6 @@ async def __aiter__(self) -> AsyncIterator[_T]:
 
     async def _iter_events(self) -> AsyncIterator[ServerSentEvent]:
         async for sse in self._decoder.aiter(self.response.aiter_lines()):
-            if sse.data.startswith("[DONE]"):
-                break
             yield sse
 
     async def __stream__(self) -> AsyncIterator[_T]: