From 4de80dbe4c27d6ecbcc2d2b6192d27ade5da3866 Mon Sep 17 00:00:00 2001
From: "stainless-app[bot]"
 <142633134+stainless-app[bot]@users.noreply.github.com>
Date: Thu, 15 Feb 2024 23:01:42 +0000
Subject: [PATCH 1/6] feat: Add initial Stainless SDK

---
 .release-please-manifest.json                 |   2 +-
 CHANGELOG.md                                  |  15 --
 bin/check-test-server                         |   0
 bin/test                                      |   0
 examples/chat_completion.py                   |  55 -----
 examples/chat_completion_async.py             |  62 ------
 examples/chat_completion_async_streaming.py   |  60 ------
 examples/chat_completion_stop.py              |  58 -----
 examples/chat_completion_streaming.py         |  56 -----
 pyproject.toml                                |   2 +-
 src/groq/_streaming.py                        |   4 -
 src/groq/_version.py                          |   2 +-
 src/groq/resources/chat/completions.py        | 198 +-----------------
 src/groqcloud/lib/.keep                       |   4 +
 .../lib/chat_completion_chunk.py              |   5 +-
 15 files changed, 14 insertions(+), 509 deletions(-)
 mode change 100755 => 100644 bin/check-test-server
 mode change 100755 => 100644 bin/test
 delete mode 100644 examples/chat_completion.py
 delete mode 100644 examples/chat_completion_async.py
 delete mode 100644 examples/chat_completion_async_streaming.py
 delete mode 100644 examples/chat_completion_stop.py
 delete mode 100644 examples/chat_completion_streaming.py
 create mode 100644 src/groqcloud/lib/.keep
 rename src/{groq => groqcloud}/lib/chat_completion_chunk.py (97%)

diff --git a/.release-please-manifest.json b/.release-please-manifest.json
index da59f99..3d2ac0b 100644
--- a/.release-please-manifest.json
+++ b/.release-please-manifest.json
@@ -1,3 +1,3 @@
 {
-  ".": "0.4.0"
+  ".": "0.1.0"
 }
\ No newline at end of file
diff --git a/CHANGELOG.md b/CHANGELOG.md
index a3b8a29..f643576 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,20 +1,5 @@
 # Changelog
 
-## 0.4.0 (2024-02-14)
-
-Full Changelog: [v0.1.0...v0.4.0](https://github.com/groq/groq-python/compare/v0.1.0...v0.4.0)
-
-### Features
-
-* Add initial Stainless SDK ([316de2c](https://github.com/groq/groq-python/commit/316de2ccfeb76e36fe34bb8656ea90a8d42a7d00))
-* create default branch ([7e00266](https://github.com/groq/groq-python/commit/7e00266e3c691d92d508e753e2c14c03297c09f9))
-* update via SDK Studio ([#3](https://github.com/groq/groq-python/issues/3)) ([2241036](https://github.com/groq/groq-python/commit/2241036e9dbee6629ad7ebce5e6f4f5e5f1028ce))
-
-
-### Chores
-
-* go live ([#2](https://github.com/groq/groq-python/issues/2)) ([13665ad](https://github.com/groq/groq-python/commit/13665ad76705513d99cbaa497ccccc694932f2c3))
-
 ## 0.1.0 (2024-02-10)
 
 Full Changelog: [v0.0.1...v0.1.0](https://github.com/definitive-io/groqcloud-python/compare/v0.0.1...v0.1.0)
diff --git a/bin/check-test-server b/bin/check-test-server
old mode 100755
new mode 100644
diff --git a/bin/test b/bin/test
old mode 100755
new mode 100644
diff --git a/examples/chat_completion.py b/examples/chat_completion.py
deleted file mode 100644
index 77511d0..0000000
--- a/examples/chat_completion.py
+++ /dev/null
@@ -1,55 +0,0 @@
-from groq import Groq
-
-client = Groq()
-
-chat_completion = client.chat.completions.create(
-    #
-    # Required parameters
-    #
-    messages=[
-        # Set an optional system message. This sets the behavior of the
-        # assistant and can be used to provide specific instructions for
-        # how it should behave throughout the conversation.
-        {
-            "role": "system",
-            "content": "you are a helpful assistant."
-        },
-        # Set a user message for the assistant to respond to.
-        {
-            "role": "user",
-            "content": "Explain the importance of low latency LLMs",
-        },
-    ],
-
-    # The language model which will generate the completion.
-    model="mixtral-8x7b-32768",
-
-    #
-    # Optional parameters
-    #
-
-    # Controls randomness: lowering results in less random completions.
-    # As the temperature approaches zero, the model will become deterministic
-    # and repetitive.
-    temperature=0.5,
-
-    # The maximum number of tokens to generate. Requests can use up to
-    # 2048 tokens shared between prompt and completion.
-    max_tokens=1024,
-
-    # Controls diversity via nucleus sampling: 0.5 means half of all
-    # likelihood-weighted options are considered.
-    top_p=1,
-
-    # A stop sequence is a predefined or user-specified text string that
-    # signals an AI to stop generating content, ensuring its responses
-    # remain focused and concise. Examples include punctuation marks and
-    # markers like "[end]".
-    stop=None,
-
-    # If set, partial message deltas will be sent.
-    stream=False,
-)
-
-# Print the completion returned by the LLM.
-print(chat_completion.choices[0].message.content)
diff --git a/examples/chat_completion_async.py b/examples/chat_completion_async.py
deleted file mode 100644
index 99b8fc4..0000000
--- a/examples/chat_completion_async.py
+++ /dev/null
@@ -1,62 +0,0 @@
-import asyncio
-
-from groq import AsyncGroq
-
-
-async def main():
-    client = AsyncGroq()
-
-    chat_completion = await client.chat.completions.create(
-        #
-        # Required parameters
-        #
-        messages=[
-            # Set an optional system message. This sets the behavior of the
-            # assistant and can be used to provide specific instructions for
-            # how it should behave throughout the conversation.
-            {
-                "role": "system",
-                "content": "you are a helpful assistant."
-            },
-            # Set a user message for the assistant to respond to.
-            {
-                "role": "user",
-                "content": "Explain the importance of low latency LLMs",
-            },
-        ],
-
-        # The language model which will generate the completion.
-        model="mixtral-8x7b-32768",
-
-        #
-        # Optional parameters
-        #
-
-        # Controls randomness: lowering results in less random completions.
-        # As the temperature approaches zero, the model will become
-        # deterministic and repetitive.
-        temperature=0.5,
-
-        # The maximum number of tokens to generate. Requests can use up to
-        # 2048 tokens shared between prompt and completion.
-        max_tokens=1024,
-
-        # Controls diversity via nucleus sampling: 0.5 means half of all
-        # likelihood-weighted options are considered.
-        top_p=1,
-
-        # A stop sequence is a predefined or user-specified text string that
-        # signals an AI to stop generating content, ensuring its responses
-        # remain focused and concise. Examples include punctuation marks and
-        # markers like "[end]".
-        stop=None,
-
-        # If set, partial message deltas will be sent.
-        stream=False,
-    )
-
-    # Print the completion returned by the LLM.
-    print(chat_completion.choices[0].message.content)
-
-
-asyncio.run(main())
diff --git a/examples/chat_completion_async_streaming.py b/examples/chat_completion_async_streaming.py
deleted file mode 100644
index 39105f9..0000000
--- a/examples/chat_completion_async_streaming.py
+++ /dev/null
@@ -1,60 +0,0 @@
-import asyncio
-
-from groq import AsyncGroq
-
-
-async def main():
-    client = AsyncGroq()
-
-    stream = await client.chat.completions.create(
-        #
-        # Required parameters
-        #
-        messages=[
-            # Set an optional system message. This sets the behavior of the
-            # assistant and can be used to provide specific instructions for
-            # how it should behave throughout the conversation.
-            {
-                "role": "system",
-                "content": "you are a helpful assistant."
-            },
-            # Set a user message for the assistant to respond to.
-            {
-                "role": "user",
-                "content": "Explain the importance of low latency LLMs",
-            },
-        ],
-
-        # The language model which will generate the completion.
-        model="mixtral-8x7b-32768",
-
-        #
-        # Optional parameters
-        #
-
-        # Controls randomness: lowering results in less random completions.
-        # As the temperature approaches zero, the model will become
-        # deterministic and repetitive.
-        temperature=0.5,
-
-        # The maximum number of tokens to generate. Requests can use up to
-        # 2048 tokens shared between prompt and completion.
-        max_tokens=1024,
-
-        # A stop sequence is a predefined or user-specified text string that
-        # signals an AI to stop generating content, ensuring its responses
-        # remain focused and concise. Examples include punctuation marks and
-        # markers like "[end]".
-        stop=None,
-
-        # Controls diversity via nucleus sampling: 0.5 means half of all
-        # likelihood-weighted options are considered.
-        stream=True,
-    )
-
-    # Print the incremental deltas returned by the LLM.
-    async for chunk in stream:
-        print(chunk.choices[0].delta.content, end="")
-
-
-asyncio.run(main())
diff --git a/examples/chat_completion_stop.py b/examples/chat_completion_stop.py
deleted file mode 100644
index 4abb63b..0000000
--- a/examples/chat_completion_stop.py
+++ /dev/null
@@ -1,58 +0,0 @@
-from groq import Groq
-
-client = Groq()
-
-chat_completion = client.chat.completions.create(
-    #
-    # Required parameters
-    #
-    messages=[
-        # Set an optional system message. This sets the behavior of the
-        # assistant and can be used to provide specific instructions for
-        # how it should behave throughout the conversation.
-        {
-            "role": "system",
-            "content": "you are a helpful assistant."
-        },
-        # Set a user message for the assistant to respond to.
-        {
-            "role": "user",
-            "content": "Count to 10.  Your response must begin with \"1, \".  example: 1, 2, 3, ...",
-        },
-    ],
-
-    # The language model which will generate the completion.
-    model="mixtral-8x7b-32768",
-
-    #
-    # Optional parameters
-    #
-
-    # Controls randomness: lowering results in less random completions.
-    # As the temperature approaches zero, the model will become deterministic
-    # and repetitive.
-    temperature=0.5,
-
-    # The maximum number of tokens to generate. Requests can use up to
-    # 2048 tokens shared between prompt and completion.
-    max_tokens=1024,
-
-    # Controls diversity via nucleus sampling: 0.5 means half of all
-    # likelihood-weighted options are considered.
-    top_p=1,
-
-    # A stop sequence is a predefined or user-specified text string that
-    # signals an AI to stop generating content, ensuring its responses
-    # remain focused and concise. Examples include punctuation marks and
-    # markers like "[end]".
-    # For this example, we will use ", 6" so that the llm stops counting at 5.
-    # If multiple stop values are needed, an array of string may be passed,
-    # stop=[", 6", ", six", ", Six"]
-    stop=", 6",
-
-    # If set, partial message deltas will be sent.
-    stream=False,
-)
-
-# Print the completion returned by the LLM.
-print(chat_completion.choices[0].message.content)
diff --git a/examples/chat_completion_streaming.py b/examples/chat_completion_streaming.py
deleted file mode 100644
index 78d2607..0000000
--- a/examples/chat_completion_streaming.py
+++ /dev/null
@@ -1,56 +0,0 @@
-from groq import Groq
-
-client = Groq()
-
-stream = client.chat.completions.create(
-    #
-    # Required parameters
-    #
-    messages=[
-        # Set an optional system message. This sets the behavior of the
-        # assistant and can be used to provide specific instructions for
-        # how it should behave throughout the conversation.
-        {
-            "role": "system",
-            "content": "you are a helpful assistant."
-        },
-        # Set a user message for the assistant to respond to.
-        {
-            "role": "user",
-            "content": "Explain the importance of low latency LLMs",
-        },
-    ],
-
-    # The language model which will generate the completion.
-    model="mixtral-8x7b-32768",
-
-    #
-    # Optional parameters
-    #
-
-    # Controls randomness: lowering results in less random completions.
-    # As the temperature approaches zero, the model will become deterministic
-    # and repetitive.
-    temperature=0.5,
-
-    # The maximum number of tokens to generate. Requests can use up to
-    # 2048 tokens shared between prompt and completion.
-    max_tokens=1024,
-
-    # Controls diversity via nucleus sampling: 0.5 means half of all
-    # likelihood-weighted options are considered.
-    top_p=1,
-
-    # A stop sequence is a predefined or user-specified text string that
-    # signals an AI to stop generating content, ensuring its responses
-    # remain focused and concise. Examples include punctuation marks and
-    # markers like "[end]".
-    stop=None,
-
-    # If set, partial message deltas will be sent.
-    stream=True,
-)
-
-# Print the incremental deltas returned by the LLM.
-for chunk in stream:
-    print(chunk.choices[0].delta.content, end="")
diff --git a/pyproject.toml b/pyproject.toml
index 5eb419f..f3c156b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "groq"
-version = "0.4.0"
+version = "0.1.0"
 description = "The official Python library for the groq API"
 readme = "README.md"
 license = "Apache-2.0"
diff --git a/src/groq/_streaming.py b/src/groq/_streaming.py
index 2769874..ac0ea8a 100644
--- a/src/groq/_streaming.py
+++ b/src/groq/_streaming.py
@@ -53,8 +53,6 @@ def __stream__(self) -> Iterator[_T]:
         iterator = self._iter_events()
 
         for sse in iterator:
-            if sse.data.startswith("[DONE]"):
-                break
             yield process_data(data=sse.json(), cast_to=cast_to, response=response)
 
         # Ensure the entire stream is consumed
@@ -108,8 +106,6 @@ async def __aiter__(self) -> AsyncIterator[_T]:
 
     async def _iter_events(self) -> AsyncIterator[ServerSentEvent]:
         async for sse in self._decoder.aiter(self.response.aiter_lines()):
-            if sse.data.startswith("[DONE]"):
-                break
             yield sse
 
     async def __stream__(self) -> AsyncIterator[_T]:
diff --git a/src/groq/_version.py b/src/groq/_version.py
index 1612b72..0282ec9 100644
--- a/src/groq/_version.py
+++ b/src/groq/_version.py
@@ -1,4 +1,4 @@
 # File generated from our OpenAPI spec by Stainless.
 
 __title__ = "groq"
-__version__ = "0.4.0"  # x-release-please-version
+__version__ = "0.1.0"  # x-release-please-version
diff --git a/src/groq/resources/chat/completions.py b/src/groq/resources/chat/completions.py
index 67b7ab8..9f332f5 100644
--- a/src/groq/resources/chat/completions.py
+++ b/src/groq/resources/chat/completions.py
@@ -2,7 +2,7 @@
 
 from __future__ import annotations
 
-from typing import Dict, List, Union, Literal, Iterable, Optional, overload
+from typing import Dict, List, Union, Iterable, Optional
 
 import httpx
 
@@ -16,12 +16,10 @@
     async_to_raw_response_wrapper,
     async_to_streamed_response_wrapper,
 )
-from ..._streaming import Stream, AsyncStream
 from ...types.chat import ChatCompletion, completion_create_params
 from ..._base_client import (
     make_request_options,
 )
-from ...lib.chat_completion_chunk import ChatCompletionChunk
 
 __all__ = ["Completions", "AsyncCompletions"]
 
@@ -35,7 +33,6 @@ def with_raw_response(self) -> CompletionsWithRawResponse:
     def with_streaming_response(self) -> CompletionsWithStreamingResponse:
         return CompletionsWithStreamingResponse(self)
 
-    @overload
     def create(
         self,
         *,
@@ -50,7 +47,7 @@ def create(
         response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
         seed: int | NotGiven = NOT_GIVEN,
         stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
-        stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
+        stream: bool | NotGiven = NOT_GIVEN,
         temperature: float | NotGiven = NOT_GIVEN,
         tool_choice: completion_create_params.ToolChoice | NotGiven = NOT_GIVEN,
         tools: Iterable[completion_create_params.Tool] | NotGiven = NOT_GIVEN,
@@ -64,98 +61,6 @@ def create(
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> ChatCompletion:
-        ...
-
-    @overload
-    def create(
-        self,
-        *,
-        frequency_penalty: float | NotGiven = NOT_GIVEN,
-        logit_bias: Dict[str, int] | NotGiven = NOT_GIVEN,
-        logprobs: bool | NotGiven = NOT_GIVEN,
-        max_tokens: int | NotGiven = NOT_GIVEN,
-        messages: Iterable[completion_create_params.Message] | NotGiven = NOT_GIVEN,
-        model: str | NotGiven = NOT_GIVEN,
-        n: int | NotGiven = NOT_GIVEN,
-        presence_penalty: float | NotGiven = NOT_GIVEN,
-        response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
-        seed: int | NotGiven = NOT_GIVEN,
-        stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
-        stream: Literal[True],
-        temperature: float | NotGiven = NOT_GIVEN,
-        tool_choice: completion_create_params.ToolChoice | NotGiven = NOT_GIVEN,
-        tools: Iterable[completion_create_params.Tool] | NotGiven = NOT_GIVEN,
-        top_logprobs: int | NotGiven = NOT_GIVEN,
-        top_p: float | NotGiven = NOT_GIVEN,
-        user: str | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Stream[ChatCompletionChunk]:
-        ...
-
-    @overload
-    def create(
-        self,
-        *,
-        frequency_penalty: float | NotGiven = NOT_GIVEN,
-        logit_bias: Dict[str, int] | NotGiven = NOT_GIVEN,
-        logprobs: bool | NotGiven = NOT_GIVEN,
-        max_tokens: int | NotGiven = NOT_GIVEN,
-        messages: Iterable[completion_create_params.Message] | NotGiven = NOT_GIVEN,
-        model: str | NotGiven = NOT_GIVEN,
-        n: int | NotGiven = NOT_GIVEN,
-        presence_penalty: float | NotGiven = NOT_GIVEN,
-        response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
-        seed: int | NotGiven = NOT_GIVEN,
-        stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
-        stream: bool,
-        temperature: float | NotGiven = NOT_GIVEN,
-        tool_choice: completion_create_params.ToolChoice | NotGiven = NOT_GIVEN,
-        tools: Iterable[completion_create_params.Tool] | NotGiven = NOT_GIVEN,
-        top_logprobs: int | NotGiven = NOT_GIVEN,
-        top_p: float | NotGiven = NOT_GIVEN,
-        user: str | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> ChatCompletion | Stream[ChatCompletionChunk]:
-        ...
-
-    def create(
-        self,
-        *,
-        frequency_penalty: float | NotGiven = NOT_GIVEN,
-        logit_bias: Dict[str, int] | NotGiven = NOT_GIVEN,
-        logprobs: bool | NotGiven = NOT_GIVEN,
-        max_tokens: int | NotGiven = NOT_GIVEN,
-        messages: Iterable[completion_create_params.Message] | NotGiven = NOT_GIVEN,
-        model: str | NotGiven = NOT_GIVEN,
-        n: int | NotGiven = NOT_GIVEN,
-        presence_penalty: float | NotGiven = NOT_GIVEN,
-        response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
-        seed: int | NotGiven = NOT_GIVEN,
-        stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
-        stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
-        temperature: float | NotGiven = NOT_GIVEN,
-        tool_choice: completion_create_params.ToolChoice | NotGiven = NOT_GIVEN,
-        tools: Iterable[completion_create_params.Tool] | NotGiven = NOT_GIVEN,
-        top_logprobs: int | NotGiven = NOT_GIVEN,
-        top_p: float | NotGiven = NOT_GIVEN,
-        user: str | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> ChatCompletion | Stream[ChatCompletionChunk]:
         """
         Creates a completion for a chat prompt
 
@@ -200,8 +105,6 @@ def create(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
             cast_to=ChatCompletion,
-            stream=stream or False,
-            stream_cls=Stream[ChatCompletionChunk],
         )
 
 
@@ -214,7 +117,6 @@ def with_raw_response(self) -> AsyncCompletionsWithRawResponse:
     def with_streaming_response(self) -> AsyncCompletionsWithStreamingResponse:
         return AsyncCompletionsWithStreamingResponse(self)
 
-    @overload
     async def create(
         self,
         *,
@@ -229,7 +131,7 @@ async def create(
         response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
         seed: int | NotGiven = NOT_GIVEN,
         stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
-        stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
+        stream: bool | NotGiven = NOT_GIVEN,
         temperature: float | NotGiven = NOT_GIVEN,
         tool_choice: completion_create_params.ToolChoice | NotGiven = NOT_GIVEN,
         tools: Iterable[completion_create_params.Tool] | NotGiven = NOT_GIVEN,
@@ -243,98 +145,6 @@ async def create(
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> ChatCompletion:
-        ...
-
-    @overload
-    async def create(
-        self,
-        *,
-        frequency_penalty: float | NotGiven = NOT_GIVEN,
-        logit_bias: Dict[str, int] | NotGiven = NOT_GIVEN,
-        logprobs: bool | NotGiven = NOT_GIVEN,
-        max_tokens: int | NotGiven = NOT_GIVEN,
-        messages: Iterable[completion_create_params.Message] | NotGiven = NOT_GIVEN,
-        model: str | NotGiven = NOT_GIVEN,
-        n: int | NotGiven = NOT_GIVEN,
-        presence_penalty: float | NotGiven = NOT_GIVEN,
-        response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
-        seed: int | NotGiven = NOT_GIVEN,
-        stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
-        stream: Literal[True],
-        temperature: float | NotGiven = NOT_GIVEN,
-        tool_choice: completion_create_params.ToolChoice | NotGiven = NOT_GIVEN,
-        tools: Iterable[completion_create_params.Tool] | NotGiven = NOT_GIVEN,
-        top_logprobs: int | NotGiven = NOT_GIVEN,
-        top_p: float | NotGiven = NOT_GIVEN,
-        user: str | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> AsyncStream[ChatCompletionChunk]:
-        ...
-
-    @overload
-    async def create(
-        self,
-        *,
-        frequency_penalty: float | NotGiven = NOT_GIVEN,
-        logit_bias: Dict[str, int] | NotGiven = NOT_GIVEN,
-        logprobs: bool | NotGiven = NOT_GIVEN,
-        max_tokens: int | NotGiven = NOT_GIVEN,
-        messages: Iterable[completion_create_params.Message] | NotGiven = NOT_GIVEN,
-        model: str | NotGiven = NOT_GIVEN,
-        n: int | NotGiven = NOT_GIVEN,
-        presence_penalty: float | NotGiven = NOT_GIVEN,
-        response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
-        seed: int | NotGiven = NOT_GIVEN,
-        stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
-        stream: bool,
-        temperature: float | NotGiven = NOT_GIVEN,
-        tool_choice: completion_create_params.ToolChoice | NotGiven = NOT_GIVEN,
-        tools: Iterable[completion_create_params.Tool] | NotGiven = NOT_GIVEN,
-        top_logprobs: int | NotGiven = NOT_GIVEN,
-        top_p: float | NotGiven = NOT_GIVEN,
-        user: str | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> ChatCompletion | AsyncStream[ChatCompletionChunk]:
-        ...
-
-    async def create(
-        self,
-        *,
-        frequency_penalty: float | NotGiven = NOT_GIVEN,
-        logit_bias: Dict[str, int] | NotGiven = NOT_GIVEN,
-        logprobs: bool | NotGiven = NOT_GIVEN,
-        max_tokens: int | NotGiven = NOT_GIVEN,
-        messages: Iterable[completion_create_params.Message] | NotGiven = NOT_GIVEN,
-        model: str | NotGiven = NOT_GIVEN,
-        n: int | NotGiven = NOT_GIVEN,
-        presence_penalty: float | NotGiven = NOT_GIVEN,
-        response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
-        seed: int | NotGiven = NOT_GIVEN,
-        stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
-        stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
-        temperature: float | NotGiven = NOT_GIVEN,
-        tool_choice: completion_create_params.ToolChoice | NotGiven = NOT_GIVEN,
-        tools: Iterable[completion_create_params.Tool] | NotGiven = NOT_GIVEN,
-        top_logprobs: int | NotGiven = NOT_GIVEN,
-        top_p: float | NotGiven = NOT_GIVEN,
-        user: str | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> ChatCompletion | AsyncStream[ChatCompletionChunk]:
         """
         Creates a completion for a chat prompt
 
@@ -379,8 +189,6 @@ async def create(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
             cast_to=ChatCompletion,
-            stream=stream or False,
-            stream_cls=AsyncStream[ChatCompletionChunk],
         )
 
 
diff --git a/src/groqcloud/lib/.keep b/src/groqcloud/lib/.keep
new file mode 100644
index 0000000..5e2c99f
--- /dev/null
+++ b/src/groqcloud/lib/.keep
@@ -0,0 +1,4 @@
+File generated from our OpenAPI spec by Stainless.
+
+This directory can be used to store custom files to expand the SDK.
+It is ignored by Stainless code generation and its content (other than this keep file) won't be touched.
\ No newline at end of file
diff --git a/src/groq/lib/chat_completion_chunk.py b/src/groqcloud/lib/chat_completion_chunk.py
similarity index 97%
rename from src/groq/lib/chat_completion_chunk.py
rename to src/groqcloud/lib/chat_completion_chunk.py
index e4e3533..2aabbcf 100644
--- a/src/groq/lib/chat_completion_chunk.py
+++ b/src/groqcloud/lib/chat_completion_chunk.py
@@ -16,6 +16,8 @@
     "ChoiceDeltaToolCall",
     "ChoiceDeltaToolCallFunction",
 ]
+
+
 class ChoiceDeltaFunctionCall(BaseModel):
     arguments: Optional[str] = None
     """
@@ -28,6 +30,7 @@ class ChoiceDeltaFunctionCall(BaseModel):
     name: Optional[str] = None
     """The name of the function to call."""
 
+
 class ChoiceLogprobsContentTopLogprob(BaseModel):
     token: Optional[str] = None
 
@@ -97,4 +100,4 @@ class ChatCompletionChunk(BaseModel):
 
     object: Optional[str] = None
 
-    system_fingerprint: Optional[str] = None
\ No newline at end of file
+    system_fingerprint: Optional[str] = None

From 75ea081a84bbcf15702dcb53fd6411c8de497c83 Mon Sep 17 00:00:00 2001
From: "stainless-app[bot]"
 <142633134+stainless-app[bot]@users.noreply.github.com>
Date: Thu, 15 Feb 2024 23:04:59 +0000
Subject: [PATCH 2/6] chore: go live (#5)

---
 .github/workflows/ci.yml                      |   4 +-
 README.md                                     |   4 +-
 bin/check-release-environment                 |   4 +-
 bin/check-test-server                         |   0
 bin/test                                      |   0
 src/groq/resources/chat/completions.py        |  16 +--
 src/groq/types/chat/chat_completion.py        |  16 +--
 .../types/chat/completion_create_params.py    |  16 +--
 tests/api_resources/chat/test_completions.py  | 124 ++++++++++++++++--
 9 files changed, 140 insertions(+), 44 deletions(-)
 mode change 100644 => 100755 bin/check-test-server
 mode change 100644 => 100755 bin/test

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index ebfa5ce..aea1868 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -2,10 +2,10 @@ name: CI
 on:
   push:
     branches:
-      - main
+      - stainless
   pull_request:
     branches:
-      - main
+      - stainless
 
 jobs:
   lint:
diff --git a/README.md b/README.md
index 0c14ce0..5c0abdd 100644
--- a/README.md
+++ b/README.md
@@ -261,9 +261,9 @@ completion = response.parse()  # get the object that `chat.completions.create()`
 print(completion.id)
 ```
 
-These methods return an [`APIResponse`](https://github.com/groq/groq-python/tree/main/src/groq/_response.py) object.
+These methods return an [`APIResponse`](https://github.com/groq/groq-python/tree/stainless/src/groq/_response.py) object.
 
-The async client returns an [`AsyncAPIResponse`](https://github.com/groq/groq-python/tree/main/src/groq/_response.py) with the same structure, the only difference being `await`able methods for reading the response content.
+The async client returns an [`AsyncAPIResponse`](https://github.com/groq/groq-python/tree/stainless/src/groq/_response.py) with the same structure, the only difference being `await`able methods for reading the response content.
 
 #### `.with_streaming_response`
 
diff --git a/bin/check-release-environment b/bin/check-release-environment
index 29306d8..e35a371 100644
--- a/bin/check-release-environment
+++ b/bin/check-release-environment
@@ -6,9 +6,9 @@ if [ -z "${PYPI_TOKEN}" ]; then
   errors+=("The GROQ_PYPI_TOKEN secret has not been set. Please set it in either this repository's secrets or your organization secrets.")
 fi
 
-len=${#errors[@]}
+lenErrors=${#errors[@]}
 
-if [[ len -gt 0 ]]; then
+if [[ lenErrors -gt 0 ]]; then
   echo -e "Found the following errors in the release environment:\n"
 
   for error in "${errors[@]}"; do
diff --git a/bin/check-test-server b/bin/check-test-server
old mode 100644
new mode 100755
diff --git a/bin/test b/bin/test
old mode 100644
new mode 100755
diff --git a/src/groq/resources/chat/completions.py b/src/groq/resources/chat/completions.py
index 9f332f5..f1c5a02 100644
--- a/src/groq/resources/chat/completions.py
+++ b/src/groq/resources/chat/completions.py
@@ -36,12 +36,12 @@ def with_streaming_response(self) -> CompletionsWithStreamingResponse:
     def create(
         self,
         *,
+        messages: Iterable[completion_create_params.Message],
+        model: str,
         frequency_penalty: float | NotGiven = NOT_GIVEN,
         logit_bias: Dict[str, int] | NotGiven = NOT_GIVEN,
         logprobs: bool | NotGiven = NOT_GIVEN,
         max_tokens: int | NotGiven = NOT_GIVEN,
-        messages: Iterable[completion_create_params.Message] | NotGiven = NOT_GIVEN,
-        model: str | NotGiven = NOT_GIVEN,
         n: int | NotGiven = NOT_GIVEN,
         presence_penalty: float | NotGiven = NOT_GIVEN,
         response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
@@ -80,12 +80,12 @@ def create(
             "/openai/v1/chat/completions",
             body=maybe_transform(
                 {
+                    "messages": messages,
+                    "model": model,
                     "frequency_penalty": frequency_penalty,
                     "logit_bias": logit_bias,
                     "logprobs": logprobs,
                     "max_tokens": max_tokens,
-                    "messages": messages,
-                    "model": model,
                     "n": n,
                     "presence_penalty": presence_penalty,
                     "response_format": response_format,
@@ -120,12 +120,12 @@ def with_streaming_response(self) -> AsyncCompletionsWithStreamingResponse:
     async def create(
         self,
         *,
+        messages: Iterable[completion_create_params.Message],
+        model: str,
         frequency_penalty: float | NotGiven = NOT_GIVEN,
         logit_bias: Dict[str, int] | NotGiven = NOT_GIVEN,
         logprobs: bool | NotGiven = NOT_GIVEN,
         max_tokens: int | NotGiven = NOT_GIVEN,
-        messages: Iterable[completion_create_params.Message] | NotGiven = NOT_GIVEN,
-        model: str | NotGiven = NOT_GIVEN,
         n: int | NotGiven = NOT_GIVEN,
         presence_penalty: float | NotGiven = NOT_GIVEN,
         response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
@@ -164,12 +164,12 @@ async def create(
             "/openai/v1/chat/completions",
             body=maybe_transform(
                 {
+                    "messages": messages,
+                    "model": model,
                     "frequency_penalty": frequency_penalty,
                     "logit_bias": logit_bias,
                     "logprobs": logprobs,
                     "max_tokens": max_tokens,
-                    "messages": messages,
-                    "model": model,
                     "n": n,
                     "presence_penalty": presence_penalty,
                     "response_format": response_format,
diff --git a/src/groq/types/chat/chat_completion.py b/src/groq/types/chat/chat_completion.py
index 77365e4..1ff1f5d 100644
--- a/src/groq/types/chat/chat_completion.py
+++ b/src/groq/types/chat/chat_completion.py
@@ -54,21 +54,21 @@ class ChoiceMessageToolCall(BaseModel):
 
 
 class ChoiceMessage(BaseModel):
-    content: Optional[str] = None
+    content: str
 
-    role: Optional[str] = None
+    role: str
 
     tool_calls: Optional[List[ChoiceMessageToolCall]] = None
 
 
 class Choice(BaseModel):
-    finish_reason: Optional[str] = None
+    finish_reason: str
 
-    index: Optional[int] = None
+    index: int
 
-    logprobs: Optional[ChoiceLogprobs] = None
+    logprobs: ChoiceLogprobs
 
-    message: Optional[ChoiceMessage] = None
+    message: ChoiceMessage
 
 
 class Usage(BaseModel):
@@ -86,9 +86,9 @@ class Usage(BaseModel):
 
 
 class ChatCompletion(BaseModel):
-    id: Optional[str] = None
+    choices: List[Choice]
 
-    choices: Optional[List[Choice]] = None
+    id: Optional[str] = None
 
     created: Optional[int] = None
 
diff --git a/src/groq/types/chat/completion_create_params.py b/src/groq/types/chat/completion_create_params.py
index 1aea783..0f9712b 100644
--- a/src/groq/types/chat/completion_create_params.py
+++ b/src/groq/types/chat/completion_create_params.py
@@ -3,7 +3,7 @@
 from __future__ import annotations
 
 from typing import Dict, List, Union, Iterable, Optional
-from typing_extensions import Annotated, TypedDict
+from typing_extensions import Required, Annotated, TypedDict
 
 from ..._utils import PropertyInfo
 
@@ -22,6 +22,10 @@
 
 
 class CompletionCreateParams(TypedDict, total=False):
+    messages: Required[Iterable[Message]]
+
+    model: Required[str]
+
     frequency_penalty: float
 
     logit_bias: Dict[str, int]
@@ -30,10 +34,6 @@ class CompletionCreateParams(TypedDict, total=False):
 
     max_tokens: int
 
-    messages: Iterable[Message]
-
-    model: str
-
     n: int
 
     presence_penalty: float
@@ -78,11 +78,11 @@ class MessageToolCall(TypedDict, total=False):
 
 
 class Message(TypedDict, total=False):
-    content: str
+    content: Required[str]
 
-    name: str
+    role: Required[str]
 
-    role: str
+    name: str
 
     tool_call_id: str
     """ToolMessage Fields"""
diff --git a/tests/api_resources/chat/test_completions.py b/tests/api_resources/chat/test_completions.py
index 1eabfe7..1fdfc34 100644
--- a/tests/api_resources/chat/test_completions.py
+++ b/tests/api_resources/chat/test_completions.py
@@ -19,16 +19,28 @@ class TestCompletions:
 
     @parametrize
     def test_method_create(self, client: Groq) -> None:
-        completion = client.chat.completions.create()
+        completion = client.chat.completions.create(
+            messages=[
+                {
+                    "content": "string",
+                    "role": "string",
+                },
+                {
+                    "content": "string",
+                    "role": "string",
+                },
+                {
+                    "content": "string",
+                    "role": "string",
+                },
+            ],
+            model="string",
+        )
         assert_matches_type(ChatCompletion, completion, path=["response"])
 
     @parametrize
     def test_method_create_with_all_params(self, client: Groq) -> None:
         completion = client.chat.completions.create(
-            frequency_penalty=0,
-            logit_bias={"foo": 0},
-            logprobs=True,
-            max_tokens=0,
             messages=[
                 {
                     "content": "string",
@@ -128,6 +140,10 @@ def test_method_create_with_all_params(self, client: Groq) -> None:
                 },
             ],
             model="string",
+            frequency_penalty=0,
+            logit_bias={"foo": 0},
+            logprobs=True,
+            max_tokens=0,
             n=0,
             presence_penalty=0,
             response_format={"type": "string"},
@@ -176,7 +192,23 @@ def test_method_create_with_all_params(self, client: Groq) -> None:
 
     @parametrize
     def test_raw_response_create(self, client: Groq) -> None:
-        response = client.chat.completions.with_raw_response.create()
+        response = client.chat.completions.with_raw_response.create(
+            messages=[
+                {
+                    "content": "string",
+                    "role": "string",
+                },
+                {
+                    "content": "string",
+                    "role": "string",
+                },
+                {
+                    "content": "string",
+                    "role": "string",
+                },
+            ],
+            model="string",
+        )
 
         assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -185,7 +217,23 @@ def test_raw_response_create(self, client: Groq) -> None:
 
     @parametrize
     def test_streaming_response_create(self, client: Groq) -> None:
-        with client.chat.completions.with_streaming_response.create() as response:
+        with client.chat.completions.with_streaming_response.create(
+            messages=[
+                {
+                    "content": "string",
+                    "role": "string",
+                },
+                {
+                    "content": "string",
+                    "role": "string",
+                },
+                {
+                    "content": "string",
+                    "role": "string",
+                },
+            ],
+            model="string",
+        ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
 
@@ -200,16 +248,28 @@ class TestAsyncCompletions:
 
     @parametrize
     async def test_method_create(self, async_client: AsyncGroq) -> None:
-        completion = await async_client.chat.completions.create()
+        completion = await async_client.chat.completions.create(
+            messages=[
+                {
+                    "content": "string",
+                    "role": "string",
+                },
+                {
+                    "content": "string",
+                    "role": "string",
+                },
+                {
+                    "content": "string",
+                    "role": "string",
+                },
+            ],
+            model="string",
+        )
         assert_matches_type(ChatCompletion, completion, path=["response"])
 
     @parametrize
     async def test_method_create_with_all_params(self, async_client: AsyncGroq) -> None:
         completion = await async_client.chat.completions.create(
-            frequency_penalty=0,
-            logit_bias={"foo": 0},
-            logprobs=True,
-            max_tokens=0,
             messages=[
                 {
                     "content": "string",
@@ -309,6 +369,10 @@ async def test_method_create_with_all_params(self, async_client: AsyncGroq) -> N
                 },
             ],
             model="string",
+            frequency_penalty=0,
+            logit_bias={"foo": 0},
+            logprobs=True,
+            max_tokens=0,
             n=0,
             presence_penalty=0,
             response_format={"type": "string"},
@@ -357,7 +421,23 @@ async def test_method_create_with_all_params(self, async_client: AsyncGroq) -> N
 
     @parametrize
     async def test_raw_response_create(self, async_client: AsyncGroq) -> None:
-        response = await async_client.chat.completions.with_raw_response.create()
+        response = await async_client.chat.completions.with_raw_response.create(
+            messages=[
+                {
+                    "content": "string",
+                    "role": "string",
+                },
+                {
+                    "content": "string",
+                    "role": "string",
+                },
+                {
+                    "content": "string",
+                    "role": "string",
+                },
+            ],
+            model="string",
+        )
 
         assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -366,7 +446,23 @@ async def test_raw_response_create(self, async_client: AsyncGroq) -> None:
 
     @parametrize
     async def test_streaming_response_create(self, async_client: AsyncGroq) -> None:
-        async with async_client.chat.completions.with_streaming_response.create() as response:
+        async with async_client.chat.completions.with_streaming_response.create(
+            messages=[
+                {
+                    "content": "string",
+                    "role": "string",
+                },
+                {
+                    "content": "string",
+                    "role": "string",
+                },
+                {
+                    "content": "string",
+                    "role": "string",
+                },
+            ],
+            model="string",
+        ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
 

From 3199ab3360e85bdc9ea56ab0120ac9f83376ebf5 Mon Sep 17 00:00:00 2001
From: "stainless-app[bot]"
 <142633134+stainless-app[bot]@users.noreply.github.com>
Date: Thu, 15 Feb 2024 23:11:12 +0000
Subject: [PATCH 3/6] release: 0.4.0

---
 .release-please-manifest.json |  2 +-
 CHANGELOG.md                  | 17 +++++++++++++++++
 pyproject.toml                |  2 +-
 src/groq/_version.py          |  2 +-
 4 files changed, 20 insertions(+), 3 deletions(-)

diff --git a/.release-please-manifest.json b/.release-please-manifest.json
index 3d2ac0b..da59f99 100644
--- a/.release-please-manifest.json
+++ b/.release-please-manifest.json
@@ -1,3 +1,3 @@
 {
-  ".": "0.1.0"
+  ".": "0.4.0"
 }
\ No newline at end of file
diff --git a/CHANGELOG.md b/CHANGELOG.md
index f643576..ced1d0d 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,22 @@
 # Changelog
 
+## 0.4.0 (2024-02-15)
+
+Full Changelog: [v0.1.0...v0.4.0](https://github.com/groq/groq-python/compare/v0.1.0...v0.4.0)
+
+### Features
+
+* Add initial Stainless SDK ([d5a8512](https://github.com/groq/groq-python/commit/d5a851262e04e625dde130367ed91d8f95683599))
+* Add initial Stainless SDK ([316de2c](https://github.com/groq/groq-python/commit/316de2ccfeb76e36fe34bb8656ea90a8d42a7d00))
+* create default branch ([7e00266](https://github.com/groq/groq-python/commit/7e00266e3c691d92d508e753e2c14c03297c09f9))
+* update via SDK Studio ([#3](https://github.com/groq/groq-python/issues/3)) ([8d92c08](https://github.com/groq/groq-python/commit/8d92c086e320c2715e02bc79807ff872e84c0b0f))
+
+
+### Chores
+
+* go live ([#2](https://github.com/groq/groq-python/issues/2)) ([ba81c42](https://github.com/groq/groq-python/commit/ba81c42d6d0fd6d47819e0d58962235cb70ca4f1))
+* go live ([#5](https://github.com/groq/groq-python/issues/5)) ([af9a838](https://github.com/groq/groq-python/commit/af9a838e240bb0f7385bc33fb18ce246427ca2f7))
+
 ## 0.1.0 (2024-02-10)
 
 Full Changelog: [v0.0.1...v0.1.0](https://github.com/definitive-io/groqcloud-python/compare/v0.0.1...v0.1.0)
diff --git a/pyproject.toml b/pyproject.toml
index f3c156b..5eb419f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "groq"
-version = "0.1.0"
+version = "0.4.0"
 description = "The official Python library for the groq API"
 readme = "README.md"
 license = "Apache-2.0"
diff --git a/src/groq/_version.py b/src/groq/_version.py
index 0282ec9..1612b72 100644
--- a/src/groq/_version.py
+++ b/src/groq/_version.py
@@ -1,4 +1,4 @@
 # File generated from our OpenAPI spec by Stainless.
 
 __title__ = "groq"
-__version__ = "0.1.0"  # x-release-please-version
+__version__ = "0.4.0"  # x-release-please-version

From 7068aee42f6cc1ef39fdebd0b6f3eff0c60fc997 Mon Sep 17 00:00:00 2001
From: Graden Rea <Graden@definitive.io>
Date: Wed, 14 Feb 2024 15:40:49 -0800
Subject: [PATCH 4/6] Add streaming support

---
 src/groq/_streaming.py                        |   4 +
 .../lib/chat_completion_chunk.py              |  23 +-
 src/groq/resources/chat/completions.py        | 199 +++++++++++++++++-
 src/groqcloud/lib/.keep                       |   4 -
 4 files changed, 212 insertions(+), 18 deletions(-)
 rename src/{groqcloud => groq}/lib/chat_completion_chunk.py (82%)
 delete mode 100644 src/groqcloud/lib/.keep

diff --git a/src/groq/_streaming.py b/src/groq/_streaming.py
index ac0ea8a..2769874 100644
--- a/src/groq/_streaming.py
+++ b/src/groq/_streaming.py
@@ -53,6 +53,8 @@ def __stream__(self) -> Iterator[_T]:
         iterator = self._iter_events()
 
         for sse in iterator:
+            if sse.data.startswith("[DONE]"):
+                break
             yield process_data(data=sse.json(), cast_to=cast_to, response=response)
 
         # Ensure the entire stream is consumed
@@ -106,6 +108,8 @@ async def __aiter__(self) -> AsyncIterator[_T]:
 
     async def _iter_events(self) -> AsyncIterator[ServerSentEvent]:
         async for sse in self._decoder.aiter(self.response.aiter_lines()):
+            if sse.data.startswith("[DONE]"):
+                break
             yield sse
 
     async def __stream__(self) -> AsyncIterator[_T]:
diff --git a/src/groqcloud/lib/chat_completion_chunk.py b/src/groq/lib/chat_completion_chunk.py
similarity index 82%
rename from src/groqcloud/lib/chat_completion_chunk.py
rename to src/groq/lib/chat_completion_chunk.py
index 2aabbcf..71c433f 100644
--- a/src/groqcloud/lib/chat_completion_chunk.py
+++ b/src/groq/lib/chat_completion_chunk.py
@@ -70,11 +70,12 @@ class ChoiceDeltaToolCall(BaseModel):
 
 
 class ChoiceDelta(BaseModel):
-    content: Optional[str] = None
+    content: str
+
+    role: str
 
     function_call: Optional[ChoiceDeltaFunctionCall] = None
 
-    role: Optional[str] = None
 
     tool_calls: Optional[List[ChoiceDeltaToolCall]] = None
 
@@ -82,22 +83,22 @@ class ChoiceDelta(BaseModel):
 class Choice(BaseModel):
     delta: ChoiceDelta
 
-    finish_reason: Optional[str] = None
+    finish_reason: str
 
-    index: Optional[int] = None
+    index: int
 
-    logprobs: Optional[ChoiceLogprobs] = None
+    logprobs: ChoiceLogprobs
 
 
 class ChatCompletionChunk(BaseModel):
-    id: Optional[str] = None
+    id: str
 
-    choices: Optional[List[Choice]] = None
+    choices: List[Choice]
 
-    created: Optional[int] = None
+    created: int
 
-    model: Optional[str] = None
+    model: str
 
-    object: Optional[str] = None
+    object: str
 
-    system_fingerprint: Optional[str] = None
+    system_fingerprint: str
diff --git a/src/groq/resources/chat/completions.py b/src/groq/resources/chat/completions.py
index f1c5a02..017591e 100644
--- a/src/groq/resources/chat/completions.py
+++ b/src/groq/resources/chat/completions.py
@@ -2,7 +2,8 @@
 
 from __future__ import annotations
 
-from typing import Dict, List, Union, Iterable, Optional
+from typing import Dict, List, Union, Iterable, Optional, overload
+from typing_extensions import Literal
 
 import httpx
 
@@ -16,10 +17,12 @@
     async_to_raw_response_wrapper,
     async_to_streamed_response_wrapper,
 )
+from ..._streaming import Stream, AsyncStream
 from ...types.chat import ChatCompletion, completion_create_params
 from ..._base_client import (
     make_request_options,
 )
+from ...lib.chat_completion_chunk import ChatCompletionChunk
 
 __all__ = ["Completions", "AsyncCompletions"]
 
@@ -33,6 +36,7 @@ def with_raw_response(self) -> CompletionsWithRawResponse:
     def with_streaming_response(self) -> CompletionsWithStreamingResponse:
         return CompletionsWithStreamingResponse(self)
 
+    @overload
     def create(
         self,
         *,
@@ -47,7 +51,7 @@ def create(
         response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
         seed: int | NotGiven = NOT_GIVEN,
         stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
-        stream: bool | NotGiven = NOT_GIVEN,
+        stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
         temperature: float | NotGiven = NOT_GIVEN,
         tool_choice: completion_create_params.ToolChoice | NotGiven = NOT_GIVEN,
         tools: Iterable[completion_create_params.Tool] | NotGiven = NOT_GIVEN,
@@ -61,6 +65,98 @@ def create(
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> ChatCompletion:
+        ...
+
+    @overload
+    def create(
+        self,
+        *,
+        messages: Iterable[completion_create_params.Message],
+        model: str,
+        frequency_penalty: float | NotGiven = NOT_GIVEN,
+        logit_bias: Dict[str, int] | NotGiven = NOT_GIVEN,
+        logprobs: bool | NotGiven = NOT_GIVEN,
+        max_tokens: int | NotGiven = NOT_GIVEN,
+        n: int | NotGiven = NOT_GIVEN,
+        presence_penalty: float | NotGiven = NOT_GIVEN,
+        response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
+        seed: int | NotGiven = NOT_GIVEN,
+        stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
+        stream: Literal[True],
+        temperature: float | NotGiven = NOT_GIVEN,
+        tool_choice: completion_create_params.ToolChoice | NotGiven = NOT_GIVEN,
+        tools: Iterable[completion_create_params.Tool] | NotGiven = NOT_GIVEN,
+        top_logprobs: int | NotGiven = NOT_GIVEN,
+        top_p: float | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Stream[ChatCompletionChunk]:
+        ...
+
+    @overload
+    def create(
+        self,
+        *,
+        messages: Iterable[completion_create_params.Message],
+        model: str,
+        frequency_penalty: float | NotGiven = NOT_GIVEN,
+        logit_bias: Dict[str, int] | NotGiven = NOT_GIVEN,
+        logprobs: bool | NotGiven = NOT_GIVEN,
+        max_tokens: int | NotGiven = NOT_GIVEN,
+        n: int | NotGiven = NOT_GIVEN,
+        presence_penalty: float | NotGiven = NOT_GIVEN,
+        response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
+        seed: int | NotGiven = NOT_GIVEN,
+        stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
+        stream: bool,
+        temperature: float | NotGiven = NOT_GIVEN,
+        tool_choice: completion_create_params.ToolChoice | NotGiven = NOT_GIVEN,
+        tools: Iterable[completion_create_params.Tool] | NotGiven = NOT_GIVEN,
+        top_logprobs: int | NotGiven = NOT_GIVEN,
+        top_p: float | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> ChatCompletion | Stream[ChatCompletionChunk]:
+        ...
+
+    def create(
+        self,
+        *,
+        messages: Iterable[completion_create_params.Message],
+        model: str,
+        frequency_penalty: float | NotGiven = NOT_GIVEN,
+        logit_bias: Dict[str, int] | NotGiven = NOT_GIVEN,
+        logprobs: bool | NotGiven = NOT_GIVEN,
+        max_tokens: int | NotGiven = NOT_GIVEN,
+        n: int | NotGiven = NOT_GIVEN,
+        presence_penalty: float | NotGiven = NOT_GIVEN,
+        response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
+        seed: int | NotGiven = NOT_GIVEN,
+        stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
+        stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
+        temperature: float | NotGiven = NOT_GIVEN,
+        tool_choice: completion_create_params.ToolChoice | NotGiven = NOT_GIVEN,
+        tools: Iterable[completion_create_params.Tool] | NotGiven = NOT_GIVEN,
+        top_logprobs: int | NotGiven = NOT_GIVEN,
+        top_p: float | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> ChatCompletion | Stream[ChatCompletionChunk]:
         """
         Creates a completion for a chat prompt
 
@@ -105,6 +201,8 @@ def create(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
             cast_to=ChatCompletion,
+            stream=stream or False,
+            stream_cls=Stream[ChatCompletionChunk],
         )
 
 
@@ -117,6 +215,7 @@ def with_raw_response(self) -> AsyncCompletionsWithRawResponse:
     def with_streaming_response(self) -> AsyncCompletionsWithStreamingResponse:
         return AsyncCompletionsWithStreamingResponse(self)
 
+    @overload
     async def create(
         self,
         *,
@@ -131,7 +230,7 @@ async def create(
         response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
         seed: int | NotGiven = NOT_GIVEN,
         stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
-        stream: bool | NotGiven = NOT_GIVEN,
+        stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
         temperature: float | NotGiven = NOT_GIVEN,
         tool_choice: completion_create_params.ToolChoice | NotGiven = NOT_GIVEN,
         tools: Iterable[completion_create_params.Tool] | NotGiven = NOT_GIVEN,
@@ -145,6 +244,98 @@ async def create(
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> ChatCompletion:
+        ...
+
+    @overload
+    async def create(
+        self,
+        *,
+        messages: Iterable[completion_create_params.Message],
+        model: str,
+        frequency_penalty: float | NotGiven = NOT_GIVEN,
+        logit_bias: Dict[str, int] | NotGiven = NOT_GIVEN,
+        logprobs: bool | NotGiven = NOT_GIVEN,
+        max_tokens: int | NotGiven = NOT_GIVEN,
+        n: int | NotGiven = NOT_GIVEN,
+        presence_penalty: float | NotGiven = NOT_GIVEN,
+        response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
+        seed: int | NotGiven = NOT_GIVEN,
+        stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
+        stream: Literal[True],
+        temperature: float | NotGiven = NOT_GIVEN,
+        tool_choice: completion_create_params.ToolChoice | NotGiven = NOT_GIVEN,
+        tools: Iterable[completion_create_params.Tool] | NotGiven = NOT_GIVEN,
+        top_logprobs: int | NotGiven = NOT_GIVEN,
+        top_p: float | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncStream[ChatCompletionChunk]:
+        ...
+
+    @overload
+    async def create(
+        self,
+        *,
+        messages: Iterable[completion_create_params.Message],
+        model: str,
+        frequency_penalty: float | NotGiven = NOT_GIVEN,
+        logit_bias: Dict[str, int] | NotGiven = NOT_GIVEN,
+        logprobs: bool | NotGiven = NOT_GIVEN,
+        max_tokens: int | NotGiven = NOT_GIVEN,
+        n: int | NotGiven = NOT_GIVEN,
+        presence_penalty: float | NotGiven = NOT_GIVEN,
+        response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
+        seed: int | NotGiven = NOT_GIVEN,
+        stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
+        stream: bool,
+        temperature: float | NotGiven = NOT_GIVEN,
+        tool_choice: completion_create_params.ToolChoice | NotGiven = NOT_GIVEN,
+        tools: Iterable[completion_create_params.Tool] | NotGiven = NOT_GIVEN,
+        top_logprobs: int | NotGiven = NOT_GIVEN,
+        top_p: float | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> ChatCompletion | AsyncStream[ChatCompletionChunk]:
+        ...
+
+    async def create(
+        self,
+        *,
+        messages: Iterable[completion_create_params.Message],
+        model: str,
+        frequency_penalty: float | NotGiven = NOT_GIVEN,
+        logit_bias: Dict[str, int] | NotGiven = NOT_GIVEN,
+        logprobs: bool | NotGiven = NOT_GIVEN,
+        max_tokens: int | NotGiven = NOT_GIVEN,
+        n: int | NotGiven = NOT_GIVEN,
+        presence_penalty: float | NotGiven = NOT_GIVEN,
+        response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
+        seed: int | NotGiven = NOT_GIVEN,
+        stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
+        stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
+        temperature: float | NotGiven = NOT_GIVEN,
+        tool_choice: completion_create_params.ToolChoice | NotGiven = NOT_GIVEN,
+        tools: Iterable[completion_create_params.Tool] | NotGiven = NOT_GIVEN,
+        top_logprobs: int | NotGiven = NOT_GIVEN,
+        top_p: float | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> ChatCompletion | AsyncStream[ChatCompletionChunk]:
         """
         Creates a completion for a chat prompt
 
@@ -189,6 +380,8 @@ async def create(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
             cast_to=ChatCompletion,
+            stream=stream or False,
+            stream_cls=AsyncStream[ChatCompletionChunk],
         )
 
 
diff --git a/src/groqcloud/lib/.keep b/src/groqcloud/lib/.keep
deleted file mode 100644
index 5e2c99f..0000000
--- a/src/groqcloud/lib/.keep
+++ /dev/null
@@ -1,4 +0,0 @@
-File generated from our OpenAPI spec by Stainless.
-
-This directory can be used to store custom files to expand the SDK.
-It is ignored by Stainless code generation and its content (other than this keep file) won't be touched.
\ No newline at end of file

From 6886dc1627287a9e7e1408c61385679220cb4aa9 Mon Sep 17 00:00:00 2001
From: Graden Rea <Graden@definitive.io>
Date: Thu, 15 Feb 2024 16:06:02 -0800
Subject: [PATCH 5/6] Add examples

---
 examples/chat_completion.py                 | 55 ++++++++++++++++++
 examples/chat_completion_async.py           | 62 +++++++++++++++++++++
 examples/chat_completion_async_streaming.py | 60 ++++++++++++++++++++
 examples/chat_completion_stop.py            | 58 +++++++++++++++++++
 examples/chat_completion_streaming.py       | 56 +++++++++++++++++++
 5 files changed, 291 insertions(+)
 create mode 100644 examples/chat_completion.py
 create mode 100644 examples/chat_completion_async.py
 create mode 100644 examples/chat_completion_async_streaming.py
 create mode 100644 examples/chat_completion_stop.py
 create mode 100644 examples/chat_completion_streaming.py

diff --git a/examples/chat_completion.py b/examples/chat_completion.py
new file mode 100644
index 0000000..77511d0
--- /dev/null
+++ b/examples/chat_completion.py
@@ -0,0 +1,55 @@
+from groq import Groq
+
+client = Groq()
+
+chat_completion = client.chat.completions.create(
+    #
+    # Required parameters
+    #
+    messages=[
+        # Set an optional system message. This sets the behavior of the
+        # assistant and can be used to provide specific instructions for
+        # how it should behave throughout the conversation.
+        {
+            "role": "system",
+            "content": "you are a helpful assistant."
+        },
+        # Set a user message for the assistant to respond to.
+        {
+            "role": "user",
+            "content": "Explain the importance of low latency LLMs",
+        },
+    ],
+
+    # The language model which will generate the completion.
+    model="mixtral-8x7b-32768",
+
+    #
+    # Optional parameters
+    #
+
+    # Controls randomness: lowering results in less random completions.
+    # As the temperature approaches zero, the model will become deterministic
+    # and repetitive.
+    temperature=0.5,
+
+    # The maximum number of tokens to generate. Requests can use up to
+    # 2048 tokens shared between prompt and completion.
+    max_tokens=1024,
+
+    # Controls diversity via nucleus sampling: 0.5 means half of all
+    # likelihood-weighted options are considered.
+    top_p=1,
+
+    # A stop sequence is a predefined or user-specified text string that
+    # signals an AI to stop generating content, ensuring its responses
+    # remain focused and concise. Examples include punctuation marks and
+    # markers like "[end]".
+    stop=None,
+
+    # If set, partial message deltas will be sent.
+    stream=False,
+)
+
+# Print the completion returned by the LLM.
+print(chat_completion.choices[0].message.content)
diff --git a/examples/chat_completion_async.py b/examples/chat_completion_async.py
new file mode 100644
index 0000000..0d485a2
--- /dev/null
+++ b/examples/chat_completion_async.py
@@ -0,0 +1,62 @@
+import asyncio
+
+from groq import AsyncGroq
+
+
+async def main() -> None:
+    client = AsyncGroq()
+
+    chat_completion = await client.chat.completions.create(
+        #
+        # Required parameters
+        #
+        messages=[
+            # Set an optional system message. This sets the behavior of the
+            # assistant and can be used to provide specific instructions for
+            # how it should behave throughout the conversation.
+            {
+                "role": "system",
+                "content": "you are a helpful assistant."
+            },
+            # Set a user message for the assistant to respond to.
+            {
+                "role": "user",
+                "content": "Explain the importance of low latency LLMs",
+            },
+        ],
+
+        # The language model which will generate the completion.
+        model="mixtral-8x7b-32768",
+
+        #
+        # Optional parameters
+        #
+
+        # Controls randomness: lowering results in less random completions.
+        # As the temperature approaches zero, the model will become
+        # deterministic and repetitive.
+        temperature=0.5,
+
+        # The maximum number of tokens to generate. Requests can use up to
+        # 2048 tokens shared between prompt and completion.
+        max_tokens=1024,
+
+        # Controls diversity via nucleus sampling: 0.5 means half of all
+        # likelihood-weighted options are considered.
+        top_p=1,
+
+        # A stop sequence is a predefined or user-specified text string that
+        # signals an AI to stop generating content, ensuring its responses
+        # remain focused and concise. Examples include punctuation marks and
+        # markers like "[end]".
+        stop=None,
+
+        # If set, partial message deltas will be sent.
+        stream=False,
+    )
+
+    # Print the completion returned by the LLM.
+    print(chat_completion.choices[0].message.content)
+
+
+asyncio.run(main())
diff --git a/examples/chat_completion_async_streaming.py b/examples/chat_completion_async_streaming.py
new file mode 100644
index 0000000..3ba5edd
--- /dev/null
+++ b/examples/chat_completion_async_streaming.py
@@ -0,0 +1,60 @@
+import asyncio
+
+from groq import AsyncGroq
+
+
+async def main() -> None:
+    client = AsyncGroq()
+
+    stream = await client.chat.completions.create(
+        #
+        # Required parameters
+        #
+        messages=[
+            # Set an optional system message. This sets the behavior of the
+            # assistant and can be used to provide specific instructions for
+            # how it should behave throughout the conversation.
+            {
+                "role": "system",
+                "content": "you are a helpful assistant."
+            },
+            # Set a user message for the assistant to respond to.
+            {
+                "role": "user",
+                "content": "Explain the importance of low latency LLMs",
+            },
+        ],
+
+        # The language model which will generate the completion.
+        model="mixtral-8x7b-32768",
+
+        #
+        # Optional parameters
+        #
+
+        # Controls randomness: lowering results in less random completions.
+        # As the temperature approaches zero, the model will become
+        # deterministic and repetitive.
+        temperature=0.5,
+
+        # The maximum number of tokens to generate. Requests can use up to
+        # 2048 tokens shared between prompt and completion.
+        max_tokens=1024,
+
+        # A stop sequence is a predefined or user-specified text string that
+        # signals an AI to stop generating content, ensuring its responses
+        # remain focused and concise. Examples include punctuation marks and
+        # markers like "[end]".
+        stop=None,
+
+        # Controls diversity via nucleus sampling: 0.5 means half of all
+        # likelihood-weighted options are considered.
+        stream=True,
+    )
+
+    # Print the incremental deltas returned by the LLM.
+    async for chunk in stream:
+        print(chunk.choices[0].delta.content, end="")
+
+
+asyncio.run(main())
diff --git a/examples/chat_completion_stop.py b/examples/chat_completion_stop.py
new file mode 100644
index 0000000..4abb63b
--- /dev/null
+++ b/examples/chat_completion_stop.py
@@ -0,0 +1,58 @@
+from groq import Groq
+
+client = Groq()
+
+chat_completion = client.chat.completions.create(
+    #
+    # Required parameters
+    #
+    messages=[
+        # Set an optional system message. This sets the behavior of the
+        # assistant and can be used to provide specific instructions for
+        # how it should behave throughout the conversation.
+        {
+            "role": "system",
+            "content": "you are a helpful assistant."
+        },
+        # Set a user message for the assistant to respond to.
+        {
+            "role": "user",
+            "content": "Count to 10.  Your response must begin with \"1, \".  example: 1, 2, 3, ...",
+        },
+    ],
+
+    # The language model which will generate the completion.
+    model="mixtral-8x7b-32768",
+
+    #
+    # Optional parameters
+    #
+
+    # Controls randomness: lowering results in less random completions.
+    # As the temperature approaches zero, the model will become deterministic
+    # and repetitive.
+    temperature=0.5,
+
+    # The maximum number of tokens to generate. Requests can use up to
+    # 2048 tokens shared between prompt and completion.
+    max_tokens=1024,
+
+    # Controls diversity via nucleus sampling: 0.5 means half of all
+    # likelihood-weighted options are considered.
+    top_p=1,
+
+    # A stop sequence is a predefined or user-specified text string that
+    # signals an AI to stop generating content, ensuring its responses
+    # remain focused and concise. Examples include punctuation marks and
+    # markers like "[end]".
+    # For this example, we will use ", 6" so that the llm stops counting at 5.
+    # If multiple stop values are needed, an array of string may be passed,
+    # stop=[", 6", ", six", ", Six"]
+    stop=", 6",
+
+    # If set, partial message deltas will be sent.
+    stream=False,
+)
+
+# Print the completion returned by the LLM.
+print(chat_completion.choices[0].message.content)
diff --git a/examples/chat_completion_streaming.py b/examples/chat_completion_streaming.py
new file mode 100644
index 0000000..78d2607
--- /dev/null
+++ b/examples/chat_completion_streaming.py
@@ -0,0 +1,56 @@
+from groq import Groq
+
+client = Groq()
+
+stream = client.chat.completions.create(
+    #
+    # Required parameters
+    #
+    messages=[
+        # Set an optional system message. This sets the behavior of the
+        # assistant and can be used to provide specific instructions for
+        # how it should behave throughout the conversation.
+        {
+            "role": "system",
+            "content": "you are a helpful assistant."
+        },
+        # Set a user message for the assistant to respond to.
+        {
+            "role": "user",
+            "content": "Explain the importance of low latency LLMs",
+        },
+    ],
+
+    # The language model which will generate the completion.
+    model="mixtral-8x7b-32768",
+
+    #
+    # Optional parameters
+    #
+
+    # Controls randomness: lowering results in less random completions.
+    # As the temperature approaches zero, the model will become deterministic
+    # and repetitive.
+    temperature=0.5,
+
+    # The maximum number of tokens to generate. Requests can use up to
+    # 2048 tokens shared between prompt and completion.
+    max_tokens=1024,
+
+    # Controls diversity via nucleus sampling: 0.5 means half of all
+    # likelihood-weighted options are considered.
+    top_p=1,
+
+    # A stop sequence is a predefined or user-specified text string that
+    # signals an AI to stop generating content, ensuring its responses
+    # remain focused and concise. Examples include punctuation marks and
+    # markers like "[end]".
+    stop=None,
+
+    # If set, partial message deltas will be sent.
+    stream=True,
+)
+
+# Print the incremental deltas returned by the LLM.
+for chunk in stream:
+    print(chunk.choices[0].delta.content, end="")

From 9638e4fdebef3303f6ccafeec7cc3faa7568c212 Mon Sep 17 00:00:00 2001
From: Graden Rea <Graden@definitive.io>
Date: Thu, 15 Feb 2024 18:30:19 -0800
Subject: [PATCH 6/6] Fix Readme

---
 README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 5c0abdd..eb5367f 100644
--- a/README.md
+++ b/README.md
@@ -38,7 +38,7 @@ chat_completion = client.chat.completions.create(
     ],
     model="mixtral-8x7b-32768",
 )
-print(chat_completion.choices_0.message.content)
+print(chat_completion.choices[0].message.content)
 ```
 
 While you can provide an `api_key` keyword argument,
@@ -71,7 +71,7 @@ async def main() -> None:
         ],
         model="mixtral-8x7b-32768",
     )
-    print(chat_completion.choices_0.message.content)
+    print(chat_completion.choices[0].message.content)
 
 
 asyncio.run(main())