From 4de80dbe4c27d6ecbcc2d2b6192d27ade5da3866 Mon Sep 17 00:00:00 2001 From: "stainless-app[bot]" <142633134+stainless-app[bot]@users.noreply.github.com> Date: Thu, 15 Feb 2024 23:01:42 +0000 Subject: [PATCH 1/6] feat: Add initial Stainless SDK --- .release-please-manifest.json | 2 +- CHANGELOG.md | 15 -- bin/check-test-server | 0 bin/test | 0 examples/chat_completion.py | 55 ----- examples/chat_completion_async.py | 62 ------ examples/chat_completion_async_streaming.py | 60 ------ examples/chat_completion_stop.py | 58 ----- examples/chat_completion_streaming.py | 56 ----- pyproject.toml | 2 +- src/groq/_streaming.py | 4 - src/groq/_version.py | 2 +- src/groq/resources/chat/completions.py | 198 +----------------- src/groqcloud/lib/.keep | 4 + .../lib/chat_completion_chunk.py | 5 +- 15 files changed, 14 insertions(+), 509 deletions(-) mode change 100755 => 100644 bin/check-test-server mode change 100755 => 100644 bin/test delete mode 100644 examples/chat_completion.py delete mode 100644 examples/chat_completion_async.py delete mode 100644 examples/chat_completion_async_streaming.py delete mode 100644 examples/chat_completion_stop.py delete mode 100644 examples/chat_completion_streaming.py create mode 100644 src/groqcloud/lib/.keep rename src/{groq => groqcloud}/lib/chat_completion_chunk.py (97%) diff --git a/.release-please-manifest.json b/.release-please-manifest.json index da59f99..3d2ac0b 100644 --- a/.release-please-manifest.json +++ b/.release-please-manifest.json @@ -1,3 +1,3 @@ { - ".": "0.4.0" + ".": "0.1.0" } \ No newline at end of file diff --git a/CHANGELOG.md b/CHANGELOG.md index a3b8a29..f643576 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,20 +1,5 @@ # Changelog -## 0.4.0 (2024-02-14) - -Full Changelog: [v0.1.0...v0.4.0](https://github.com/groq/groq-python/compare/v0.1.0...v0.4.0) - -### Features - -* Add initial Stainless SDK ([316de2c](https://github.com/groq/groq-python/commit/316de2ccfeb76e36fe34bb8656ea90a8d42a7d00)) -* create default branch ([7e00266](https://github.com/groq/groq-python/commit/7e00266e3c691d92d508e753e2c14c03297c09f9)) -* update via SDK Studio ([#3](https://github.com/groq/groq-python/issues/3)) ([2241036](https://github.com/groq/groq-python/commit/2241036e9dbee6629ad7ebce5e6f4f5e5f1028ce)) - - -### Chores - -* go live ([#2](https://github.com/groq/groq-python/issues/2)) ([13665ad](https://github.com/groq/groq-python/commit/13665ad76705513d99cbaa497ccccc694932f2c3)) - ## 0.1.0 (2024-02-10) Full Changelog: [v0.0.1...v0.1.0](https://github.com/definitive-io/groqcloud-python/compare/v0.0.1...v0.1.0) diff --git a/bin/check-test-server b/bin/check-test-server old mode 100755 new mode 100644 diff --git a/bin/test b/bin/test old mode 100755 new mode 100644 diff --git a/examples/chat_completion.py b/examples/chat_completion.py deleted file mode 100644 index 77511d0..0000000 --- a/examples/chat_completion.py +++ /dev/null @@ -1,55 +0,0 @@ -from groq import Groq - -client = Groq() - -chat_completion = client.chat.completions.create( - # - # Required parameters - # - messages=[ - # Set an optional system message. This sets the behavior of the - # assistant and can be used to provide specific instructions for - # how it should behave throughout the conversation. - { - "role": "system", - "content": "you are a helpful assistant." - }, - # Set a user message for the assistant to respond to. - { - "role": "user", - "content": "Explain the importance of low latency LLMs", - }, - ], - - # The language model which will generate the completion. - model="mixtral-8x7b-32768", - - # - # Optional parameters - # - - # Controls randomness: lowering results in less random completions. - # As the temperature approaches zero, the model will become deterministic - # and repetitive. - temperature=0.5, - - # The maximum number of tokens to generate. Requests can use up to - # 2048 tokens shared between prompt and completion. - max_tokens=1024, - - # Controls diversity via nucleus sampling: 0.5 means half of all - # likelihood-weighted options are considered. - top_p=1, - - # A stop sequence is a predefined or user-specified text string that - # signals an AI to stop generating content, ensuring its responses - # remain focused and concise. Examples include punctuation marks and - # markers like "[end]". - stop=None, - - # If set, partial message deltas will be sent. - stream=False, -) - -# Print the completion returned by the LLM. -print(chat_completion.choices[0].message.content) diff --git a/examples/chat_completion_async.py b/examples/chat_completion_async.py deleted file mode 100644 index 99b8fc4..0000000 --- a/examples/chat_completion_async.py +++ /dev/null @@ -1,62 +0,0 @@ -import asyncio - -from groq import AsyncGroq - - -async def main(): - client = AsyncGroq() - - chat_completion = await client.chat.completions.create( - # - # Required parameters - # - messages=[ - # Set an optional system message. This sets the behavior of the - # assistant and can be used to provide specific instructions for - # how it should behave throughout the conversation. - { - "role": "system", - "content": "you are a helpful assistant." - }, - # Set a user message for the assistant to respond to. - { - "role": "user", - "content": "Explain the importance of low latency LLMs", - }, - ], - - # The language model which will generate the completion. - model="mixtral-8x7b-32768", - - # - # Optional parameters - # - - # Controls randomness: lowering results in less random completions. - # As the temperature approaches zero, the model will become - # deterministic and repetitive. - temperature=0.5, - - # The maximum number of tokens to generate. Requests can use up to - # 2048 tokens shared between prompt and completion. - max_tokens=1024, - - # Controls diversity via nucleus sampling: 0.5 means half of all - # likelihood-weighted options are considered. - top_p=1, - - # A stop sequence is a predefined or user-specified text string that - # signals an AI to stop generating content, ensuring its responses - # remain focused and concise. Examples include punctuation marks and - # markers like "[end]". - stop=None, - - # If set, partial message deltas will be sent. - stream=False, - ) - - # Print the completion returned by the LLM. - print(chat_completion.choices[0].message.content) - - -asyncio.run(main()) diff --git a/examples/chat_completion_async_streaming.py b/examples/chat_completion_async_streaming.py deleted file mode 100644 index 39105f9..0000000 --- a/examples/chat_completion_async_streaming.py +++ /dev/null @@ -1,60 +0,0 @@ -import asyncio - -from groq import AsyncGroq - - -async def main(): - client = AsyncGroq() - - stream = await client.chat.completions.create( - # - # Required parameters - # - messages=[ - # Set an optional system message. This sets the behavior of the - # assistant and can be used to provide specific instructions for - # how it should behave throughout the conversation. - { - "role": "system", - "content": "you are a helpful assistant." - }, - # Set a user message for the assistant to respond to. - { - "role": "user", - "content": "Explain the importance of low latency LLMs", - }, - ], - - # The language model which will generate the completion. - model="mixtral-8x7b-32768", - - # - # Optional parameters - # - - # Controls randomness: lowering results in less random completions. - # As the temperature approaches zero, the model will become - # deterministic and repetitive. - temperature=0.5, - - # The maximum number of tokens to generate. Requests can use up to - # 2048 tokens shared between prompt and completion. - max_tokens=1024, - - # A stop sequence is a predefined or user-specified text string that - # signals an AI to stop generating content, ensuring its responses - # remain focused and concise. Examples include punctuation marks and - # markers like "[end]". - stop=None, - - # Controls diversity via nucleus sampling: 0.5 means half of all - # likelihood-weighted options are considered. - stream=True, - ) - - # Print the incremental deltas returned by the LLM. - async for chunk in stream: - print(chunk.choices[0].delta.content, end="") - - -asyncio.run(main()) diff --git a/examples/chat_completion_stop.py b/examples/chat_completion_stop.py deleted file mode 100644 index 4abb63b..0000000 --- a/examples/chat_completion_stop.py +++ /dev/null @@ -1,58 +0,0 @@ -from groq import Groq - -client = Groq() - -chat_completion = client.chat.completions.create( - # - # Required parameters - # - messages=[ - # Set an optional system message. This sets the behavior of the - # assistant and can be used to provide specific instructions for - # how it should behave throughout the conversation. - { - "role": "system", - "content": "you are a helpful assistant." - }, - # Set a user message for the assistant to respond to. - { - "role": "user", - "content": "Count to 10. Your response must begin with \"1, \". example: 1, 2, 3, ...", - }, - ], - - # The language model which will generate the completion. - model="mixtral-8x7b-32768", - - # - # Optional parameters - # - - # Controls randomness: lowering results in less random completions. - # As the temperature approaches zero, the model will become deterministic - # and repetitive. - temperature=0.5, - - # The maximum number of tokens to generate. Requests can use up to - # 2048 tokens shared between prompt and completion. - max_tokens=1024, - - # Controls diversity via nucleus sampling: 0.5 means half of all - # likelihood-weighted options are considered. - top_p=1, - - # A stop sequence is a predefined or user-specified text string that - # signals an AI to stop generating content, ensuring its responses - # remain focused and concise. Examples include punctuation marks and - # markers like "[end]". - # For this example, we will use ", 6" so that the llm stops counting at 5. - # If multiple stop values are needed, an array of string may be passed, - # stop=[", 6", ", six", ", Six"] - stop=", 6", - - # If set, partial message deltas will be sent. - stream=False, -) - -# Print the completion returned by the LLM. -print(chat_completion.choices[0].message.content) diff --git a/examples/chat_completion_streaming.py b/examples/chat_completion_streaming.py deleted file mode 100644 index 78d2607..0000000 --- a/examples/chat_completion_streaming.py +++ /dev/null @@ -1,56 +0,0 @@ -from groq import Groq - -client = Groq() - -stream = client.chat.completions.create( - # - # Required parameters - # - messages=[ - # Set an optional system message. This sets the behavior of the - # assistant and can be used to provide specific instructions for - # how it should behave throughout the conversation. - { - "role": "system", - "content": "you are a helpful assistant." - }, - # Set a user message for the assistant to respond to. - { - "role": "user", - "content": "Explain the importance of low latency LLMs", - }, - ], - - # The language model which will generate the completion. - model="mixtral-8x7b-32768", - - # - # Optional parameters - # - - # Controls randomness: lowering results in less random completions. - # As the temperature approaches zero, the model will become deterministic - # and repetitive. - temperature=0.5, - - # The maximum number of tokens to generate. Requests can use up to - # 2048 tokens shared between prompt and completion. - max_tokens=1024, - - # Controls diversity via nucleus sampling: 0.5 means half of all - # likelihood-weighted options are considered. - top_p=1, - - # A stop sequence is a predefined or user-specified text string that - # signals an AI to stop generating content, ensuring its responses - # remain focused and concise. Examples include punctuation marks and - # markers like "[end]". - stop=None, - - # If set, partial message deltas will be sent. - stream=True, -) - -# Print the incremental deltas returned by the LLM. -for chunk in stream: - print(chunk.choices[0].delta.content, end="") diff --git a/pyproject.toml b/pyproject.toml index 5eb419f..f3c156b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "groq" -version = "0.4.0" +version = "0.1.0" description = "The official Python library for the groq API" readme = "README.md" license = "Apache-2.0" diff --git a/src/groq/_streaming.py b/src/groq/_streaming.py index 2769874..ac0ea8a 100644 --- a/src/groq/_streaming.py +++ b/src/groq/_streaming.py @@ -53,8 +53,6 @@ def __stream__(self) -> Iterator[_T]: iterator = self._iter_events() for sse in iterator: - if sse.data.startswith("[DONE]"): - break yield process_data(data=sse.json(), cast_to=cast_to, response=response) # Ensure the entire stream is consumed @@ -108,8 +106,6 @@ async def __aiter__(self) -> AsyncIterator[_T]: async def _iter_events(self) -> AsyncIterator[ServerSentEvent]: async for sse in self._decoder.aiter(self.response.aiter_lines()): - if sse.data.startswith("[DONE]"): - break yield sse async def __stream__(self) -> AsyncIterator[_T]: diff --git a/src/groq/_version.py b/src/groq/_version.py index 1612b72..0282ec9 100644 --- a/src/groq/_version.py +++ b/src/groq/_version.py @@ -1,4 +1,4 @@ # File generated from our OpenAPI spec by Stainless. __title__ = "groq" -__version__ = "0.4.0" # x-release-please-version +__version__ = "0.1.0" # x-release-please-version diff --git a/src/groq/resources/chat/completions.py b/src/groq/resources/chat/completions.py index 67b7ab8..9f332f5 100644 --- a/src/groq/resources/chat/completions.py +++ b/src/groq/resources/chat/completions.py @@ -2,7 +2,7 @@ from __future__ import annotations -from typing import Dict, List, Union, Literal, Iterable, Optional, overload +from typing import Dict, List, Union, Iterable, Optional import httpx @@ -16,12 +16,10 @@ async_to_raw_response_wrapper, async_to_streamed_response_wrapper, ) -from ..._streaming import Stream, AsyncStream from ...types.chat import ChatCompletion, completion_create_params from ..._base_client import ( make_request_options, ) -from ...lib.chat_completion_chunk import ChatCompletionChunk __all__ = ["Completions", "AsyncCompletions"] @@ -35,7 +33,6 @@ def with_raw_response(self) -> CompletionsWithRawResponse: def with_streaming_response(self) -> CompletionsWithStreamingResponse: return CompletionsWithStreamingResponse(self) - @overload def create( self, *, @@ -50,7 +47,7 @@ def create( response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN, seed: int | NotGiven = NOT_GIVEN, stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN, - stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN, + stream: bool | NotGiven = NOT_GIVEN, temperature: float | NotGiven = NOT_GIVEN, tool_choice: completion_create_params.ToolChoice | NotGiven = NOT_GIVEN, tools: Iterable[completion_create_params.Tool] | NotGiven = NOT_GIVEN, @@ -64,98 +61,6 @@ def create( extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> ChatCompletion: - ... - - @overload - def create( - self, - *, - frequency_penalty: float | NotGiven = NOT_GIVEN, - logit_bias: Dict[str, int] | NotGiven = NOT_GIVEN, - logprobs: bool | NotGiven = NOT_GIVEN, - max_tokens: int | NotGiven = NOT_GIVEN, - messages: Iterable[completion_create_params.Message] | NotGiven = NOT_GIVEN, - model: str | NotGiven = NOT_GIVEN, - n: int | NotGiven = NOT_GIVEN, - presence_penalty: float | NotGiven = NOT_GIVEN, - response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN, - seed: int | NotGiven = NOT_GIVEN, - stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN, - stream: Literal[True], - temperature: float | NotGiven = NOT_GIVEN, - tool_choice: completion_create_params.ToolChoice | NotGiven = NOT_GIVEN, - tools: Iterable[completion_create_params.Tool] | NotGiven = NOT_GIVEN, - top_logprobs: int | NotGiven = NOT_GIVEN, - top_p: float | NotGiven = NOT_GIVEN, - user: str | NotGiven = NOT_GIVEN, - # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. - # The extra values given here take precedence over values defined on the client or passed to this method. - extra_headers: Headers | None = None, - extra_query: Query | None = None, - extra_body: Body | None = None, - timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, - ) -> Stream[ChatCompletionChunk]: - ... - - @overload - def create( - self, - *, - frequency_penalty: float | NotGiven = NOT_GIVEN, - logit_bias: Dict[str, int] | NotGiven = NOT_GIVEN, - logprobs: bool | NotGiven = NOT_GIVEN, - max_tokens: int | NotGiven = NOT_GIVEN, - messages: Iterable[completion_create_params.Message] | NotGiven = NOT_GIVEN, - model: str | NotGiven = NOT_GIVEN, - n: int | NotGiven = NOT_GIVEN, - presence_penalty: float | NotGiven = NOT_GIVEN, - response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN, - seed: int | NotGiven = NOT_GIVEN, - stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN, - stream: bool, - temperature: float | NotGiven = NOT_GIVEN, - tool_choice: completion_create_params.ToolChoice | NotGiven = NOT_GIVEN, - tools: Iterable[completion_create_params.Tool] | NotGiven = NOT_GIVEN, - top_logprobs: int | NotGiven = NOT_GIVEN, - top_p: float | NotGiven = NOT_GIVEN, - user: str | NotGiven = NOT_GIVEN, - # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. - # The extra values given here take precedence over values defined on the client or passed to this method. - extra_headers: Headers | None = None, - extra_query: Query | None = None, - extra_body: Body | None = None, - timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, - ) -> ChatCompletion | Stream[ChatCompletionChunk]: - ... - - def create( - self, - *, - frequency_penalty: float | NotGiven = NOT_GIVEN, - logit_bias: Dict[str, int] | NotGiven = NOT_GIVEN, - logprobs: bool | NotGiven = NOT_GIVEN, - max_tokens: int | NotGiven = NOT_GIVEN, - messages: Iterable[completion_create_params.Message] | NotGiven = NOT_GIVEN, - model: str | NotGiven = NOT_GIVEN, - n: int | NotGiven = NOT_GIVEN, - presence_penalty: float | NotGiven = NOT_GIVEN, - response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN, - seed: int | NotGiven = NOT_GIVEN, - stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN, - stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN, - temperature: float | NotGiven = NOT_GIVEN, - tool_choice: completion_create_params.ToolChoice | NotGiven = NOT_GIVEN, - tools: Iterable[completion_create_params.Tool] | NotGiven = NOT_GIVEN, - top_logprobs: int | NotGiven = NOT_GIVEN, - top_p: float | NotGiven = NOT_GIVEN, - user: str | NotGiven = NOT_GIVEN, - # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. - # The extra values given here take precedence over values defined on the client or passed to this method. - extra_headers: Headers | None = None, - extra_query: Query | None = None, - extra_body: Body | None = None, - timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, - ) -> ChatCompletion | Stream[ChatCompletionChunk]: """ Creates a completion for a chat prompt @@ -200,8 +105,6 @@ def create( extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout ), cast_to=ChatCompletion, - stream=stream or False, - stream_cls=Stream[ChatCompletionChunk], ) @@ -214,7 +117,6 @@ def with_raw_response(self) -> AsyncCompletionsWithRawResponse: def with_streaming_response(self) -> AsyncCompletionsWithStreamingResponse: return AsyncCompletionsWithStreamingResponse(self) - @overload async def create( self, *, @@ -229,7 +131,7 @@ async def create( response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN, seed: int | NotGiven = NOT_GIVEN, stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN, - stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN, + stream: bool | NotGiven = NOT_GIVEN, temperature: float | NotGiven = NOT_GIVEN, tool_choice: completion_create_params.ToolChoice | NotGiven = NOT_GIVEN, tools: Iterable[completion_create_params.Tool] | NotGiven = NOT_GIVEN, @@ -243,98 +145,6 @@ async def create( extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> ChatCompletion: - ... - - @overload - async def create( - self, - *, - frequency_penalty: float | NotGiven = NOT_GIVEN, - logit_bias: Dict[str, int] | NotGiven = NOT_GIVEN, - logprobs: bool | NotGiven = NOT_GIVEN, - max_tokens: int | NotGiven = NOT_GIVEN, - messages: Iterable[completion_create_params.Message] | NotGiven = NOT_GIVEN, - model: str | NotGiven = NOT_GIVEN, - n: int | NotGiven = NOT_GIVEN, - presence_penalty: float | NotGiven = NOT_GIVEN, - response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN, - seed: int | NotGiven = NOT_GIVEN, - stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN, - stream: Literal[True], - temperature: float | NotGiven = NOT_GIVEN, - tool_choice: completion_create_params.ToolChoice | NotGiven = NOT_GIVEN, - tools: Iterable[completion_create_params.Tool] | NotGiven = NOT_GIVEN, - top_logprobs: int | NotGiven = NOT_GIVEN, - top_p: float | NotGiven = NOT_GIVEN, - user: str | NotGiven = NOT_GIVEN, - # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. - # The extra values given here take precedence over values defined on the client or passed to this method. - extra_headers: Headers | None = None, - extra_query: Query | None = None, - extra_body: Body | None = None, - timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, - ) -> AsyncStream[ChatCompletionChunk]: - ... - - @overload - async def create( - self, - *, - frequency_penalty: float | NotGiven = NOT_GIVEN, - logit_bias: Dict[str, int] | NotGiven = NOT_GIVEN, - logprobs: bool | NotGiven = NOT_GIVEN, - max_tokens: int | NotGiven = NOT_GIVEN, - messages: Iterable[completion_create_params.Message] | NotGiven = NOT_GIVEN, - model: str | NotGiven = NOT_GIVEN, - n: int | NotGiven = NOT_GIVEN, - presence_penalty: float | NotGiven = NOT_GIVEN, - response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN, - seed: int | NotGiven = NOT_GIVEN, - stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN, - stream: bool, - temperature: float | NotGiven = NOT_GIVEN, - tool_choice: completion_create_params.ToolChoice | NotGiven = NOT_GIVEN, - tools: Iterable[completion_create_params.Tool] | NotGiven = NOT_GIVEN, - top_logprobs: int | NotGiven = NOT_GIVEN, - top_p: float | NotGiven = NOT_GIVEN, - user: str | NotGiven = NOT_GIVEN, - # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. - # The extra values given here take precedence over values defined on the client or passed to this method. - extra_headers: Headers | None = None, - extra_query: Query | None = None, - extra_body: Body | None = None, - timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, - ) -> ChatCompletion | AsyncStream[ChatCompletionChunk]: - ... - - async def create( - self, - *, - frequency_penalty: float | NotGiven = NOT_GIVEN, - logit_bias: Dict[str, int] | NotGiven = NOT_GIVEN, - logprobs: bool | NotGiven = NOT_GIVEN, - max_tokens: int | NotGiven = NOT_GIVEN, - messages: Iterable[completion_create_params.Message] | NotGiven = NOT_GIVEN, - model: str | NotGiven = NOT_GIVEN, - n: int | NotGiven = NOT_GIVEN, - presence_penalty: float | NotGiven = NOT_GIVEN, - response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN, - seed: int | NotGiven = NOT_GIVEN, - stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN, - stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN, - temperature: float | NotGiven = NOT_GIVEN, - tool_choice: completion_create_params.ToolChoice | NotGiven = NOT_GIVEN, - tools: Iterable[completion_create_params.Tool] | NotGiven = NOT_GIVEN, - top_logprobs: int | NotGiven = NOT_GIVEN, - top_p: float | NotGiven = NOT_GIVEN, - user: str | NotGiven = NOT_GIVEN, - # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. - # The extra values given here take precedence over values defined on the client or passed to this method. - extra_headers: Headers | None = None, - extra_query: Query | None = None, - extra_body: Body | None = None, - timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, - ) -> ChatCompletion | AsyncStream[ChatCompletionChunk]: """ Creates a completion for a chat prompt @@ -379,8 +189,6 @@ async def create( extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout ), cast_to=ChatCompletion, - stream=stream or False, - stream_cls=AsyncStream[ChatCompletionChunk], ) diff --git a/src/groqcloud/lib/.keep b/src/groqcloud/lib/.keep new file mode 100644 index 0000000..5e2c99f --- /dev/null +++ b/src/groqcloud/lib/.keep @@ -0,0 +1,4 @@ +File generated from our OpenAPI spec by Stainless. + +This directory can be used to store custom files to expand the SDK. +It is ignored by Stainless code generation and its content (other than this keep file) won't be touched. \ No newline at end of file diff --git a/src/groq/lib/chat_completion_chunk.py b/src/groqcloud/lib/chat_completion_chunk.py similarity index 97% rename from src/groq/lib/chat_completion_chunk.py rename to src/groqcloud/lib/chat_completion_chunk.py index e4e3533..2aabbcf 100644 --- a/src/groq/lib/chat_completion_chunk.py +++ b/src/groqcloud/lib/chat_completion_chunk.py @@ -16,6 +16,8 @@ "ChoiceDeltaToolCall", "ChoiceDeltaToolCallFunction", ] + + class ChoiceDeltaFunctionCall(BaseModel): arguments: Optional[str] = None """ @@ -28,6 +30,7 @@ class ChoiceDeltaFunctionCall(BaseModel): name: Optional[str] = None """The name of the function to call.""" + class ChoiceLogprobsContentTopLogprob(BaseModel): token: Optional[str] = None @@ -97,4 +100,4 @@ class ChatCompletionChunk(BaseModel): object: Optional[str] = None - system_fingerprint: Optional[str] = None \ No newline at end of file + system_fingerprint: Optional[str] = None From 75ea081a84bbcf15702dcb53fd6411c8de497c83 Mon Sep 17 00:00:00 2001 From: "stainless-app[bot]" <142633134+stainless-app[bot]@users.noreply.github.com> Date: Thu, 15 Feb 2024 23:04:59 +0000 Subject: [PATCH 2/6] chore: go live (#5) --- .github/workflows/ci.yml | 4 +- README.md | 4 +- bin/check-release-environment | 4 +- bin/check-test-server | 0 bin/test | 0 src/groq/resources/chat/completions.py | 16 +-- src/groq/types/chat/chat_completion.py | 16 +-- .../types/chat/completion_create_params.py | 16 +-- tests/api_resources/chat/test_completions.py | 124 ++++++++++++++++-- 9 files changed, 140 insertions(+), 44 deletions(-) mode change 100644 => 100755 bin/check-test-server mode change 100644 => 100755 bin/test diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index ebfa5ce..aea1868 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -2,10 +2,10 @@ name: CI on: push: branches: - - main + - stainless pull_request: branches: - - main + - stainless jobs: lint: diff --git a/README.md b/README.md index 0c14ce0..5c0abdd 100644 --- a/README.md +++ b/README.md @@ -261,9 +261,9 @@ completion = response.parse() # get the object that `chat.completions.create()` print(completion.id) ``` -These methods return an [`APIResponse`](https://github.com/groq/groq-python/tree/main/src/groq/_response.py) object. +These methods return an [`APIResponse`](https://github.com/groq/groq-python/tree/stainless/src/groq/_response.py) object. -The async client returns an [`AsyncAPIResponse`](https://github.com/groq/groq-python/tree/main/src/groq/_response.py) with the same structure, the only difference being `await`able methods for reading the response content. +The async client returns an [`AsyncAPIResponse`](https://github.com/groq/groq-python/tree/stainless/src/groq/_response.py) with the same structure, the only difference being `await`able methods for reading the response content. #### `.with_streaming_response` diff --git a/bin/check-release-environment b/bin/check-release-environment index 29306d8..e35a371 100644 --- a/bin/check-release-environment +++ b/bin/check-release-environment @@ -6,9 +6,9 @@ if [ -z "${PYPI_TOKEN}" ]; then errors+=("The GROQ_PYPI_TOKEN secret has not been set. Please set it in either this repository's secrets or your organization secrets.") fi -len=${#errors[@]} +lenErrors=${#errors[@]} -if [[ len -gt 0 ]]; then +if [[ lenErrors -gt 0 ]]; then echo -e "Found the following errors in the release environment:\n" for error in "${errors[@]}"; do diff --git a/bin/check-test-server b/bin/check-test-server old mode 100644 new mode 100755 diff --git a/bin/test b/bin/test old mode 100644 new mode 100755 diff --git a/src/groq/resources/chat/completions.py b/src/groq/resources/chat/completions.py index 9f332f5..f1c5a02 100644 --- a/src/groq/resources/chat/completions.py +++ b/src/groq/resources/chat/completions.py @@ -36,12 +36,12 @@ def with_streaming_response(self) -> CompletionsWithStreamingResponse: def create( self, *, + messages: Iterable[completion_create_params.Message], + model: str, frequency_penalty: float | NotGiven = NOT_GIVEN, logit_bias: Dict[str, int] | NotGiven = NOT_GIVEN, logprobs: bool | NotGiven = NOT_GIVEN, max_tokens: int | NotGiven = NOT_GIVEN, - messages: Iterable[completion_create_params.Message] | NotGiven = NOT_GIVEN, - model: str | NotGiven = NOT_GIVEN, n: int | NotGiven = NOT_GIVEN, presence_penalty: float | NotGiven = NOT_GIVEN, response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN, @@ -80,12 +80,12 @@ def create( "/openai/v1/chat/completions", body=maybe_transform( { + "messages": messages, + "model": model, "frequency_penalty": frequency_penalty, "logit_bias": logit_bias, "logprobs": logprobs, "max_tokens": max_tokens, - "messages": messages, - "model": model, "n": n, "presence_penalty": presence_penalty, "response_format": response_format, @@ -120,12 +120,12 @@ def with_streaming_response(self) -> AsyncCompletionsWithStreamingResponse: async def create( self, *, + messages: Iterable[completion_create_params.Message], + model: str, frequency_penalty: float | NotGiven = NOT_GIVEN, logit_bias: Dict[str, int] | NotGiven = NOT_GIVEN, logprobs: bool | NotGiven = NOT_GIVEN, max_tokens: int | NotGiven = NOT_GIVEN, - messages: Iterable[completion_create_params.Message] | NotGiven = NOT_GIVEN, - model: str | NotGiven = NOT_GIVEN, n: int | NotGiven = NOT_GIVEN, presence_penalty: float | NotGiven = NOT_GIVEN, response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN, @@ -164,12 +164,12 @@ async def create( "/openai/v1/chat/completions", body=maybe_transform( { + "messages": messages, + "model": model, "frequency_penalty": frequency_penalty, "logit_bias": logit_bias, "logprobs": logprobs, "max_tokens": max_tokens, - "messages": messages, - "model": model, "n": n, "presence_penalty": presence_penalty, "response_format": response_format, diff --git a/src/groq/types/chat/chat_completion.py b/src/groq/types/chat/chat_completion.py index 77365e4..1ff1f5d 100644 --- a/src/groq/types/chat/chat_completion.py +++ b/src/groq/types/chat/chat_completion.py @@ -54,21 +54,21 @@ class ChoiceMessageToolCall(BaseModel): class ChoiceMessage(BaseModel): - content: Optional[str] = None + content: str - role: Optional[str] = None + role: str tool_calls: Optional[List[ChoiceMessageToolCall]] = None class Choice(BaseModel): - finish_reason: Optional[str] = None + finish_reason: str - index: Optional[int] = None + index: int - logprobs: Optional[ChoiceLogprobs] = None + logprobs: ChoiceLogprobs - message: Optional[ChoiceMessage] = None + message: ChoiceMessage class Usage(BaseModel): @@ -86,9 +86,9 @@ class Usage(BaseModel): class ChatCompletion(BaseModel): - id: Optional[str] = None + choices: List[Choice] - choices: Optional[List[Choice]] = None + id: Optional[str] = None created: Optional[int] = None diff --git a/src/groq/types/chat/completion_create_params.py b/src/groq/types/chat/completion_create_params.py index 1aea783..0f9712b 100644 --- a/src/groq/types/chat/completion_create_params.py +++ b/src/groq/types/chat/completion_create_params.py @@ -3,7 +3,7 @@ from __future__ import annotations from typing import Dict, List, Union, Iterable, Optional -from typing_extensions import Annotated, TypedDict +from typing_extensions import Required, Annotated, TypedDict from ..._utils import PropertyInfo @@ -22,6 +22,10 @@ class CompletionCreateParams(TypedDict, total=False): + messages: Required[Iterable[Message]] + + model: Required[str] + frequency_penalty: float logit_bias: Dict[str, int] @@ -30,10 +34,6 @@ class CompletionCreateParams(TypedDict, total=False): max_tokens: int - messages: Iterable[Message] - - model: str - n: int presence_penalty: float @@ -78,11 +78,11 @@ class MessageToolCall(TypedDict, total=False): class Message(TypedDict, total=False): - content: str + content: Required[str] - name: str + role: Required[str] - role: str + name: str tool_call_id: str """ToolMessage Fields""" diff --git a/tests/api_resources/chat/test_completions.py b/tests/api_resources/chat/test_completions.py index 1eabfe7..1fdfc34 100644 --- a/tests/api_resources/chat/test_completions.py +++ b/tests/api_resources/chat/test_completions.py @@ -19,16 +19,28 @@ class TestCompletions: @parametrize def test_method_create(self, client: Groq) -> None: - completion = client.chat.completions.create() + completion = client.chat.completions.create( + messages=[ + { + "content": "string", + "role": "string", + }, + { + "content": "string", + "role": "string", + }, + { + "content": "string", + "role": "string", + }, + ], + model="string", + ) assert_matches_type(ChatCompletion, completion, path=["response"]) @parametrize def test_method_create_with_all_params(self, client: Groq) -> None: completion = client.chat.completions.create( - frequency_penalty=0, - logit_bias={"foo": 0}, - logprobs=True, - max_tokens=0, messages=[ { "content": "string", @@ -128,6 +140,10 @@ def test_method_create_with_all_params(self, client: Groq) -> None: }, ], model="string", + frequency_penalty=0, + logit_bias={"foo": 0}, + logprobs=True, + max_tokens=0, n=0, presence_penalty=0, response_format={"type": "string"}, @@ -176,7 +192,23 @@ def test_method_create_with_all_params(self, client: Groq) -> None: @parametrize def test_raw_response_create(self, client: Groq) -> None: - response = client.chat.completions.with_raw_response.create() + response = client.chat.completions.with_raw_response.create( + messages=[ + { + "content": "string", + "role": "string", + }, + { + "content": "string", + "role": "string", + }, + { + "content": "string", + "role": "string", + }, + ], + model="string", + ) assert response.is_closed is True assert response.http_request.headers.get("X-Stainless-Lang") == "python" @@ -185,7 +217,23 @@ def test_raw_response_create(self, client: Groq) -> None: @parametrize def test_streaming_response_create(self, client: Groq) -> None: - with client.chat.completions.with_streaming_response.create() as response: + with client.chat.completions.with_streaming_response.create( + messages=[ + { + "content": "string", + "role": "string", + }, + { + "content": "string", + "role": "string", + }, + { + "content": "string", + "role": "string", + }, + ], + model="string", + ) as response: assert not response.is_closed assert response.http_request.headers.get("X-Stainless-Lang") == "python" @@ -200,16 +248,28 @@ class TestAsyncCompletions: @parametrize async def test_method_create(self, async_client: AsyncGroq) -> None: - completion = await async_client.chat.completions.create() + completion = await async_client.chat.completions.create( + messages=[ + { + "content": "string", + "role": "string", + }, + { + "content": "string", + "role": "string", + }, + { + "content": "string", + "role": "string", + }, + ], + model="string", + ) assert_matches_type(ChatCompletion, completion, path=["response"]) @parametrize async def test_method_create_with_all_params(self, async_client: AsyncGroq) -> None: completion = await async_client.chat.completions.create( - frequency_penalty=0, - logit_bias={"foo": 0}, - logprobs=True, - max_tokens=0, messages=[ { "content": "string", @@ -309,6 +369,10 @@ async def test_method_create_with_all_params(self, async_client: AsyncGroq) -> N }, ], model="string", + frequency_penalty=0, + logit_bias={"foo": 0}, + logprobs=True, + max_tokens=0, n=0, presence_penalty=0, response_format={"type": "string"}, @@ -357,7 +421,23 @@ async def test_method_create_with_all_params(self, async_client: AsyncGroq) -> N @parametrize async def test_raw_response_create(self, async_client: AsyncGroq) -> None: - response = await async_client.chat.completions.with_raw_response.create() + response = await async_client.chat.completions.with_raw_response.create( + messages=[ + { + "content": "string", + "role": "string", + }, + { + "content": "string", + "role": "string", + }, + { + "content": "string", + "role": "string", + }, + ], + model="string", + ) assert response.is_closed is True assert response.http_request.headers.get("X-Stainless-Lang") == "python" @@ -366,7 +446,23 @@ async def test_raw_response_create(self, async_client: AsyncGroq) -> None: @parametrize async def test_streaming_response_create(self, async_client: AsyncGroq) -> None: - async with async_client.chat.completions.with_streaming_response.create() as response: + async with async_client.chat.completions.with_streaming_response.create( + messages=[ + { + "content": "string", + "role": "string", + }, + { + "content": "string", + "role": "string", + }, + { + "content": "string", + "role": "string", + }, + ], + model="string", + ) as response: assert not response.is_closed assert response.http_request.headers.get("X-Stainless-Lang") == "python" From 3199ab3360e85bdc9ea56ab0120ac9f83376ebf5 Mon Sep 17 00:00:00 2001 From: "stainless-app[bot]" <142633134+stainless-app[bot]@users.noreply.github.com> Date: Thu, 15 Feb 2024 23:11:12 +0000 Subject: [PATCH 3/6] release: 0.4.0 --- .release-please-manifest.json | 2 +- CHANGELOG.md | 17 +++++++++++++++++ pyproject.toml | 2 +- src/groq/_version.py | 2 +- 4 files changed, 20 insertions(+), 3 deletions(-) diff --git a/.release-please-manifest.json b/.release-please-manifest.json index 3d2ac0b..da59f99 100644 --- a/.release-please-manifest.json +++ b/.release-please-manifest.json @@ -1,3 +1,3 @@ { - ".": "0.1.0" + ".": "0.4.0" } \ No newline at end of file diff --git a/CHANGELOG.md b/CHANGELOG.md index f643576..ced1d0d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,22 @@ # Changelog +## 0.4.0 (2024-02-15) + +Full Changelog: [v0.1.0...v0.4.0](https://github.com/groq/groq-python/compare/v0.1.0...v0.4.0) + +### Features + +* Add initial Stainless SDK ([d5a8512](https://github.com/groq/groq-python/commit/d5a851262e04e625dde130367ed91d8f95683599)) +* Add initial Stainless SDK ([316de2c](https://github.com/groq/groq-python/commit/316de2ccfeb76e36fe34bb8656ea90a8d42a7d00)) +* create default branch ([7e00266](https://github.com/groq/groq-python/commit/7e00266e3c691d92d508e753e2c14c03297c09f9)) +* update via SDK Studio ([#3](https://github.com/groq/groq-python/issues/3)) ([8d92c08](https://github.com/groq/groq-python/commit/8d92c086e320c2715e02bc79807ff872e84c0b0f)) + + +### Chores + +* go live ([#2](https://github.com/groq/groq-python/issues/2)) ([ba81c42](https://github.com/groq/groq-python/commit/ba81c42d6d0fd6d47819e0d58962235cb70ca4f1)) +* go live ([#5](https://github.com/groq/groq-python/issues/5)) ([af9a838](https://github.com/groq/groq-python/commit/af9a838e240bb0f7385bc33fb18ce246427ca2f7)) + ## 0.1.0 (2024-02-10) Full Changelog: [v0.0.1...v0.1.0](https://github.com/definitive-io/groqcloud-python/compare/v0.0.1...v0.1.0) diff --git a/pyproject.toml b/pyproject.toml index f3c156b..5eb419f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "groq" -version = "0.1.0" +version = "0.4.0" description = "The official Python library for the groq API" readme = "README.md" license = "Apache-2.0" diff --git a/src/groq/_version.py b/src/groq/_version.py index 0282ec9..1612b72 100644 --- a/src/groq/_version.py +++ b/src/groq/_version.py @@ -1,4 +1,4 @@ # File generated from our OpenAPI spec by Stainless. __title__ = "groq" -__version__ = "0.1.0" # x-release-please-version +__version__ = "0.4.0" # x-release-please-version From 7068aee42f6cc1ef39fdebd0b6f3eff0c60fc997 Mon Sep 17 00:00:00 2001 From: Graden Rea Date: Wed, 14 Feb 2024 15:40:49 -0800 Subject: [PATCH 4/6] Add streaming support --- src/groq/_streaming.py | 4 + .../lib/chat_completion_chunk.py | 23 +- src/groq/resources/chat/completions.py | 199 +++++++++++++++++- src/groqcloud/lib/.keep | 4 - 4 files changed, 212 insertions(+), 18 deletions(-) rename src/{groqcloud => groq}/lib/chat_completion_chunk.py (82%) delete mode 100644 src/groqcloud/lib/.keep diff --git a/src/groq/_streaming.py b/src/groq/_streaming.py index ac0ea8a..2769874 100644 --- a/src/groq/_streaming.py +++ b/src/groq/_streaming.py @@ -53,6 +53,8 @@ def __stream__(self) -> Iterator[_T]: iterator = self._iter_events() for sse in iterator: + if sse.data.startswith("[DONE]"): + break yield process_data(data=sse.json(), cast_to=cast_to, response=response) # Ensure the entire stream is consumed @@ -106,6 +108,8 @@ async def __aiter__(self) -> AsyncIterator[_T]: async def _iter_events(self) -> AsyncIterator[ServerSentEvent]: async for sse in self._decoder.aiter(self.response.aiter_lines()): + if sse.data.startswith("[DONE]"): + break yield sse async def __stream__(self) -> AsyncIterator[_T]: diff --git a/src/groqcloud/lib/chat_completion_chunk.py b/src/groq/lib/chat_completion_chunk.py similarity index 82% rename from src/groqcloud/lib/chat_completion_chunk.py rename to src/groq/lib/chat_completion_chunk.py index 2aabbcf..71c433f 100644 --- a/src/groqcloud/lib/chat_completion_chunk.py +++ b/src/groq/lib/chat_completion_chunk.py @@ -70,11 +70,12 @@ class ChoiceDeltaToolCall(BaseModel): class ChoiceDelta(BaseModel): - content: Optional[str] = None + content: str + + role: str function_call: Optional[ChoiceDeltaFunctionCall] = None - role: Optional[str] = None tool_calls: Optional[List[ChoiceDeltaToolCall]] = None @@ -82,22 +83,22 @@ class ChoiceDelta(BaseModel): class Choice(BaseModel): delta: ChoiceDelta - finish_reason: Optional[str] = None + finish_reason: str - index: Optional[int] = None + index: int - logprobs: Optional[ChoiceLogprobs] = None + logprobs: ChoiceLogprobs class ChatCompletionChunk(BaseModel): - id: Optional[str] = None + id: str - choices: Optional[List[Choice]] = None + choices: List[Choice] - created: Optional[int] = None + created: int - model: Optional[str] = None + model: str - object: Optional[str] = None + object: str - system_fingerprint: Optional[str] = None + system_fingerprint: str diff --git a/src/groq/resources/chat/completions.py b/src/groq/resources/chat/completions.py index f1c5a02..017591e 100644 --- a/src/groq/resources/chat/completions.py +++ b/src/groq/resources/chat/completions.py @@ -2,7 +2,8 @@ from __future__ import annotations -from typing import Dict, List, Union, Iterable, Optional +from typing import Dict, List, Union, Iterable, Optional, overload +from typing_extensions import Literal import httpx @@ -16,10 +17,12 @@ async_to_raw_response_wrapper, async_to_streamed_response_wrapper, ) +from ..._streaming import Stream, AsyncStream from ...types.chat import ChatCompletion, completion_create_params from ..._base_client import ( make_request_options, ) +from ...lib.chat_completion_chunk import ChatCompletionChunk __all__ = ["Completions", "AsyncCompletions"] @@ -33,6 +36,7 @@ def with_raw_response(self) -> CompletionsWithRawResponse: def with_streaming_response(self) -> CompletionsWithStreamingResponse: return CompletionsWithStreamingResponse(self) + @overload def create( self, *, @@ -47,7 +51,7 @@ def create( response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN, seed: int | NotGiven = NOT_GIVEN, stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN, - stream: bool | NotGiven = NOT_GIVEN, + stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN, temperature: float | NotGiven = NOT_GIVEN, tool_choice: completion_create_params.ToolChoice | NotGiven = NOT_GIVEN, tools: Iterable[completion_create_params.Tool] | NotGiven = NOT_GIVEN, @@ -61,6 +65,98 @@ def create( extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> ChatCompletion: + ... + + @overload + def create( + self, + *, + messages: Iterable[completion_create_params.Message], + model: str, + frequency_penalty: float | NotGiven = NOT_GIVEN, + logit_bias: Dict[str, int] | NotGiven = NOT_GIVEN, + logprobs: bool | NotGiven = NOT_GIVEN, + max_tokens: int | NotGiven = NOT_GIVEN, + n: int | NotGiven = NOT_GIVEN, + presence_penalty: float | NotGiven = NOT_GIVEN, + response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN, + seed: int | NotGiven = NOT_GIVEN, + stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN, + stream: Literal[True], + temperature: float | NotGiven = NOT_GIVEN, + tool_choice: completion_create_params.ToolChoice | NotGiven = NOT_GIVEN, + tools: Iterable[completion_create_params.Tool] | NotGiven = NOT_GIVEN, + top_logprobs: int | NotGiven = NOT_GIVEN, + top_p: float | NotGiven = NOT_GIVEN, + user: str | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> Stream[ChatCompletionChunk]: + ... + + @overload + def create( + self, + *, + messages: Iterable[completion_create_params.Message], + model: str, + frequency_penalty: float | NotGiven = NOT_GIVEN, + logit_bias: Dict[str, int] | NotGiven = NOT_GIVEN, + logprobs: bool | NotGiven = NOT_GIVEN, + max_tokens: int | NotGiven = NOT_GIVEN, + n: int | NotGiven = NOT_GIVEN, + presence_penalty: float | NotGiven = NOT_GIVEN, + response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN, + seed: int | NotGiven = NOT_GIVEN, + stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN, + stream: bool, + temperature: float | NotGiven = NOT_GIVEN, + tool_choice: completion_create_params.ToolChoice | NotGiven = NOT_GIVEN, + tools: Iterable[completion_create_params.Tool] | NotGiven = NOT_GIVEN, + top_logprobs: int | NotGiven = NOT_GIVEN, + top_p: float | NotGiven = NOT_GIVEN, + user: str | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> ChatCompletion | Stream[ChatCompletionChunk]: + ... + + def create( + self, + *, + messages: Iterable[completion_create_params.Message], + model: str, + frequency_penalty: float | NotGiven = NOT_GIVEN, + logit_bias: Dict[str, int] | NotGiven = NOT_GIVEN, + logprobs: bool | NotGiven = NOT_GIVEN, + max_tokens: int | NotGiven = NOT_GIVEN, + n: int | NotGiven = NOT_GIVEN, + presence_penalty: float | NotGiven = NOT_GIVEN, + response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN, + seed: int | NotGiven = NOT_GIVEN, + stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN, + stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN, + temperature: float | NotGiven = NOT_GIVEN, + tool_choice: completion_create_params.ToolChoice | NotGiven = NOT_GIVEN, + tools: Iterable[completion_create_params.Tool] | NotGiven = NOT_GIVEN, + top_logprobs: int | NotGiven = NOT_GIVEN, + top_p: float | NotGiven = NOT_GIVEN, + user: str | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> ChatCompletion | Stream[ChatCompletionChunk]: """ Creates a completion for a chat prompt @@ -105,6 +201,8 @@ def create( extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout ), cast_to=ChatCompletion, + stream=stream or False, + stream_cls=Stream[ChatCompletionChunk], ) @@ -117,6 +215,7 @@ def with_raw_response(self) -> AsyncCompletionsWithRawResponse: def with_streaming_response(self) -> AsyncCompletionsWithStreamingResponse: return AsyncCompletionsWithStreamingResponse(self) + @overload async def create( self, *, @@ -131,7 +230,7 @@ async def create( response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN, seed: int | NotGiven = NOT_GIVEN, stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN, - stream: bool | NotGiven = NOT_GIVEN, + stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN, temperature: float | NotGiven = NOT_GIVEN, tool_choice: completion_create_params.ToolChoice | NotGiven = NOT_GIVEN, tools: Iterable[completion_create_params.Tool] | NotGiven = NOT_GIVEN, @@ -145,6 +244,98 @@ async def create( extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, ) -> ChatCompletion: + ... + + @overload + async def create( + self, + *, + messages: Iterable[completion_create_params.Message], + model: str, + frequency_penalty: float | NotGiven = NOT_GIVEN, + logit_bias: Dict[str, int] | NotGiven = NOT_GIVEN, + logprobs: bool | NotGiven = NOT_GIVEN, + max_tokens: int | NotGiven = NOT_GIVEN, + n: int | NotGiven = NOT_GIVEN, + presence_penalty: float | NotGiven = NOT_GIVEN, + response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN, + seed: int | NotGiven = NOT_GIVEN, + stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN, + stream: Literal[True], + temperature: float | NotGiven = NOT_GIVEN, + tool_choice: completion_create_params.ToolChoice | NotGiven = NOT_GIVEN, + tools: Iterable[completion_create_params.Tool] | NotGiven = NOT_GIVEN, + top_logprobs: int | NotGiven = NOT_GIVEN, + top_p: float | NotGiven = NOT_GIVEN, + user: str | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> AsyncStream[ChatCompletionChunk]: + ... + + @overload + async def create( + self, + *, + messages: Iterable[completion_create_params.Message], + model: str, + frequency_penalty: float | NotGiven = NOT_GIVEN, + logit_bias: Dict[str, int] | NotGiven = NOT_GIVEN, + logprobs: bool | NotGiven = NOT_GIVEN, + max_tokens: int | NotGiven = NOT_GIVEN, + n: int | NotGiven = NOT_GIVEN, + presence_penalty: float | NotGiven = NOT_GIVEN, + response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN, + seed: int | NotGiven = NOT_GIVEN, + stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN, + stream: bool, + temperature: float | NotGiven = NOT_GIVEN, + tool_choice: completion_create_params.ToolChoice | NotGiven = NOT_GIVEN, + tools: Iterable[completion_create_params.Tool] | NotGiven = NOT_GIVEN, + top_logprobs: int | NotGiven = NOT_GIVEN, + top_p: float | NotGiven = NOT_GIVEN, + user: str | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> ChatCompletion | AsyncStream[ChatCompletionChunk]: + ... + + async def create( + self, + *, + messages: Iterable[completion_create_params.Message], + model: str, + frequency_penalty: float | NotGiven = NOT_GIVEN, + logit_bias: Dict[str, int] | NotGiven = NOT_GIVEN, + logprobs: bool | NotGiven = NOT_GIVEN, + max_tokens: int | NotGiven = NOT_GIVEN, + n: int | NotGiven = NOT_GIVEN, + presence_penalty: float | NotGiven = NOT_GIVEN, + response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN, + seed: int | NotGiven = NOT_GIVEN, + stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN, + stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN, + temperature: float | NotGiven = NOT_GIVEN, + tool_choice: completion_create_params.ToolChoice | NotGiven = NOT_GIVEN, + tools: Iterable[completion_create_params.Tool] | NotGiven = NOT_GIVEN, + top_logprobs: int | NotGiven = NOT_GIVEN, + top_p: float | NotGiven = NOT_GIVEN, + user: str | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN, + ) -> ChatCompletion | AsyncStream[ChatCompletionChunk]: """ Creates a completion for a chat prompt @@ -189,6 +380,8 @@ async def create( extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout ), cast_to=ChatCompletion, + stream=stream or False, + stream_cls=AsyncStream[ChatCompletionChunk], ) diff --git a/src/groqcloud/lib/.keep b/src/groqcloud/lib/.keep deleted file mode 100644 index 5e2c99f..0000000 --- a/src/groqcloud/lib/.keep +++ /dev/null @@ -1,4 +0,0 @@ -File generated from our OpenAPI spec by Stainless. - -This directory can be used to store custom files to expand the SDK. -It is ignored by Stainless code generation and its content (other than this keep file) won't be touched. \ No newline at end of file From 6886dc1627287a9e7e1408c61385679220cb4aa9 Mon Sep 17 00:00:00 2001 From: Graden Rea Date: Thu, 15 Feb 2024 16:06:02 -0800 Subject: [PATCH 5/6] Add examples --- examples/chat_completion.py | 55 ++++++++++++++++++ examples/chat_completion_async.py | 62 +++++++++++++++++++++ examples/chat_completion_async_streaming.py | 60 ++++++++++++++++++++ examples/chat_completion_stop.py | 58 +++++++++++++++++++ examples/chat_completion_streaming.py | 56 +++++++++++++++++++ 5 files changed, 291 insertions(+) create mode 100644 examples/chat_completion.py create mode 100644 examples/chat_completion_async.py create mode 100644 examples/chat_completion_async_streaming.py create mode 100644 examples/chat_completion_stop.py create mode 100644 examples/chat_completion_streaming.py diff --git a/examples/chat_completion.py b/examples/chat_completion.py new file mode 100644 index 0000000..77511d0 --- /dev/null +++ b/examples/chat_completion.py @@ -0,0 +1,55 @@ +from groq import Groq + +client = Groq() + +chat_completion = client.chat.completions.create( + # + # Required parameters + # + messages=[ + # Set an optional system message. This sets the behavior of the + # assistant and can be used to provide specific instructions for + # how it should behave throughout the conversation. + { + "role": "system", + "content": "you are a helpful assistant." + }, + # Set a user message for the assistant to respond to. + { + "role": "user", + "content": "Explain the importance of low latency LLMs", + }, + ], + + # The language model which will generate the completion. + model="mixtral-8x7b-32768", + + # + # Optional parameters + # + + # Controls randomness: lowering results in less random completions. + # As the temperature approaches zero, the model will become deterministic + # and repetitive. + temperature=0.5, + + # The maximum number of tokens to generate. Requests can use up to + # 2048 tokens shared between prompt and completion. + max_tokens=1024, + + # Controls diversity via nucleus sampling: 0.5 means half of all + # likelihood-weighted options are considered. + top_p=1, + + # A stop sequence is a predefined or user-specified text string that + # signals an AI to stop generating content, ensuring its responses + # remain focused and concise. Examples include punctuation marks and + # markers like "[end]". + stop=None, + + # If set, partial message deltas will be sent. + stream=False, +) + +# Print the completion returned by the LLM. +print(chat_completion.choices[0].message.content) diff --git a/examples/chat_completion_async.py b/examples/chat_completion_async.py new file mode 100644 index 0000000..0d485a2 --- /dev/null +++ b/examples/chat_completion_async.py @@ -0,0 +1,62 @@ +import asyncio + +from groq import AsyncGroq + + +async def main() -> None: + client = AsyncGroq() + + chat_completion = await client.chat.completions.create( + # + # Required parameters + # + messages=[ + # Set an optional system message. This sets the behavior of the + # assistant and can be used to provide specific instructions for + # how it should behave throughout the conversation. + { + "role": "system", + "content": "you are a helpful assistant." + }, + # Set a user message for the assistant to respond to. + { + "role": "user", + "content": "Explain the importance of low latency LLMs", + }, + ], + + # The language model which will generate the completion. + model="mixtral-8x7b-32768", + + # + # Optional parameters + # + + # Controls randomness: lowering results in less random completions. + # As the temperature approaches zero, the model will become + # deterministic and repetitive. + temperature=0.5, + + # The maximum number of tokens to generate. Requests can use up to + # 2048 tokens shared between prompt and completion. + max_tokens=1024, + + # Controls diversity via nucleus sampling: 0.5 means half of all + # likelihood-weighted options are considered. + top_p=1, + + # A stop sequence is a predefined or user-specified text string that + # signals an AI to stop generating content, ensuring its responses + # remain focused and concise. Examples include punctuation marks and + # markers like "[end]". + stop=None, + + # If set, partial message deltas will be sent. + stream=False, + ) + + # Print the completion returned by the LLM. + print(chat_completion.choices[0].message.content) + + +asyncio.run(main()) diff --git a/examples/chat_completion_async_streaming.py b/examples/chat_completion_async_streaming.py new file mode 100644 index 0000000..3ba5edd --- /dev/null +++ b/examples/chat_completion_async_streaming.py @@ -0,0 +1,60 @@ +import asyncio + +from groq import AsyncGroq + + +async def main() -> None: + client = AsyncGroq() + + stream = await client.chat.completions.create( + # + # Required parameters + # + messages=[ + # Set an optional system message. This sets the behavior of the + # assistant and can be used to provide specific instructions for + # how it should behave throughout the conversation. + { + "role": "system", + "content": "you are a helpful assistant." + }, + # Set a user message for the assistant to respond to. + { + "role": "user", + "content": "Explain the importance of low latency LLMs", + }, + ], + + # The language model which will generate the completion. + model="mixtral-8x7b-32768", + + # + # Optional parameters + # + + # Controls randomness: lowering results in less random completions. + # As the temperature approaches zero, the model will become + # deterministic and repetitive. + temperature=0.5, + + # The maximum number of tokens to generate. Requests can use up to + # 2048 tokens shared between prompt and completion. + max_tokens=1024, + + # A stop sequence is a predefined or user-specified text string that + # signals an AI to stop generating content, ensuring its responses + # remain focused and concise. Examples include punctuation marks and + # markers like "[end]". + stop=None, + + # Controls diversity via nucleus sampling: 0.5 means half of all + # likelihood-weighted options are considered. + stream=True, + ) + + # Print the incremental deltas returned by the LLM. + async for chunk in stream: + print(chunk.choices[0].delta.content, end="") + + +asyncio.run(main()) diff --git a/examples/chat_completion_stop.py b/examples/chat_completion_stop.py new file mode 100644 index 0000000..4abb63b --- /dev/null +++ b/examples/chat_completion_stop.py @@ -0,0 +1,58 @@ +from groq import Groq + +client = Groq() + +chat_completion = client.chat.completions.create( + # + # Required parameters + # + messages=[ + # Set an optional system message. This sets the behavior of the + # assistant and can be used to provide specific instructions for + # how it should behave throughout the conversation. + { + "role": "system", + "content": "you are a helpful assistant." + }, + # Set a user message for the assistant to respond to. + { + "role": "user", + "content": "Count to 10. Your response must begin with \"1, \". example: 1, 2, 3, ...", + }, + ], + + # The language model which will generate the completion. + model="mixtral-8x7b-32768", + + # + # Optional parameters + # + + # Controls randomness: lowering results in less random completions. + # As the temperature approaches zero, the model will become deterministic + # and repetitive. + temperature=0.5, + + # The maximum number of tokens to generate. Requests can use up to + # 2048 tokens shared between prompt and completion. + max_tokens=1024, + + # Controls diversity via nucleus sampling: 0.5 means half of all + # likelihood-weighted options are considered. + top_p=1, + + # A stop sequence is a predefined or user-specified text string that + # signals an AI to stop generating content, ensuring its responses + # remain focused and concise. Examples include punctuation marks and + # markers like "[end]". + # For this example, we will use ", 6" so that the llm stops counting at 5. + # If multiple stop values are needed, an array of string may be passed, + # stop=[", 6", ", six", ", Six"] + stop=", 6", + + # If set, partial message deltas will be sent. + stream=False, +) + +# Print the completion returned by the LLM. +print(chat_completion.choices[0].message.content) diff --git a/examples/chat_completion_streaming.py b/examples/chat_completion_streaming.py new file mode 100644 index 0000000..78d2607 --- /dev/null +++ b/examples/chat_completion_streaming.py @@ -0,0 +1,56 @@ +from groq import Groq + +client = Groq() + +stream = client.chat.completions.create( + # + # Required parameters + # + messages=[ + # Set an optional system message. This sets the behavior of the + # assistant and can be used to provide specific instructions for + # how it should behave throughout the conversation. + { + "role": "system", + "content": "you are a helpful assistant." + }, + # Set a user message for the assistant to respond to. + { + "role": "user", + "content": "Explain the importance of low latency LLMs", + }, + ], + + # The language model which will generate the completion. + model="mixtral-8x7b-32768", + + # + # Optional parameters + # + + # Controls randomness: lowering results in less random completions. + # As the temperature approaches zero, the model will become deterministic + # and repetitive. + temperature=0.5, + + # The maximum number of tokens to generate. Requests can use up to + # 2048 tokens shared between prompt and completion. + max_tokens=1024, + + # Controls diversity via nucleus sampling: 0.5 means half of all + # likelihood-weighted options are considered. + top_p=1, + + # A stop sequence is a predefined or user-specified text string that + # signals an AI to stop generating content, ensuring its responses + # remain focused and concise. Examples include punctuation marks and + # markers like "[end]". + stop=None, + + # If set, partial message deltas will be sent. + stream=True, +) + +# Print the incremental deltas returned by the LLM. +for chunk in stream: + print(chunk.choices[0].delta.content, end="") From 9638e4fdebef3303f6ccafeec7cc3faa7568c212 Mon Sep 17 00:00:00 2001 From: Graden Rea Date: Thu, 15 Feb 2024 18:30:19 -0800 Subject: [PATCH 6/6] Fix Readme --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 5c0abdd..eb5367f 100644 --- a/README.md +++ b/README.md @@ -38,7 +38,7 @@ chat_completion = client.chat.completions.create( ], model="mixtral-8x7b-32768", ) -print(chat_completion.choices_0.message.content) +print(chat_completion.choices[0].message.content) ``` While you can provide an `api_key` keyword argument, @@ -71,7 +71,7 @@ async def main() -> None: ], model="mixtral-8x7b-32768", ) - print(chat_completion.choices_0.message.content) + print(chat_completion.choices[0].message.content) asyncio.run(main())