diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile
old mode 100755
new mode 100644
diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
old mode 100755
new mode 100644
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
old mode 100755
new mode 100644
diff --git a/.github/workflows/publish-pypi.yml b/.github/workflows/publish-pypi.yml
old mode 100755
new mode 100644
diff --git a/.github/workflows/release-doctor.yml b/.github/workflows/release-doctor.yml
old mode 100755
new mode 100644
diff --git a/.gitignore b/.gitignore
old mode 100755
new mode 100644
diff --git a/.python-version b/.python-version
old mode 100755
new mode 100644
diff --git a/.release-please-manifest.json b/.release-please-manifest.json
old mode 100755
new mode 100644
index f7014c3..a713055
--- a/.release-please-manifest.json
+++ b/.release-please-manifest.json
@@ -1,3 +1,3 @@
 {
-  ".": "0.11.0"
+  ".": "0.12.0"
 }
\ No newline at end of file
diff --git a/.stats.yml b/.stats.yml
old mode 100755
new mode 100644
index 2740b98..d23e89e
--- a/.stats.yml
+++ b/.stats.yml
@@ -1,2 +1,2 @@
 configured_endpoints: 7
-openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/groqcloud%2Fgroqcloud-1f0d266ba97b03672f10d33a6dc6e324af9a95646f978ffbff6a31f3907bbfe7.yml
+openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/groqcloud%2Fgroqcloud-2e2427d7a1c97af4cb989c736fabccf1531532dd3487d330c851db96d6c5da1c.yml
diff --git a/Brewfile b/Brewfile
old mode 100755
new mode 100644
diff --git a/CHANGELOG.md b/CHANGELOG.md
old mode 100755
new mode 100644
index aee292b..d56901f
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,26 @@
 # Changelog
 
+## 0.12.0 (2024-11-12)
+
+Full Changelog: [v0.11.0...v0.12.0](https://github.com/groq/groq-python/compare/v0.11.0...v0.12.0)
+
+### Features
+
+* **api:** api update ([#127](https://github.com/groq/groq-python/issues/127)) ([2f20c22](https://github.com/groq/groq-python/commit/2f20c227860ec41c9255b5b7dd7d0af2dd98c7f9))
+* **api:** api update ([#129](https://github.com/groq/groq-python/issues/129)) ([41c4f28](https://github.com/groq/groq-python/commit/41c4f280afb004f41994d8456d63f21acd6da6a9))
+
+
+### Bug Fixes
+
+* GitHub Terraform: Create/Update .github/workflows/stale.yaml [skip ci] ([537cc49](https://github.com/groq/groq-python/commit/537cc4977b4fcc1c7679abcc9ca4ddf2fbafdcd2))
+
+
+### Chores
+
+* rebuild project due to codegen change ([#130](https://github.com/groq/groq-python/issues/130)) ([e08a00f](https://github.com/groq/groq-python/commit/e08a00f13f68b8041293f02064d7b6d692a07cd9))
+* rebuild project due to codegen change ([#131](https://github.com/groq/groq-python/issues/131)) ([8a3c31d](https://github.com/groq/groq-python/commit/8a3c31d2aa3cb855e6f1feef169be48447adca61))
+* rebuild project due to codegen change ([#132](https://github.com/groq/groq-python/issues/132)) ([8287ed4](https://github.com/groq/groq-python/commit/8287ed44c4a668484f4cfc068b86a2132ab19714))
+
 ## 0.11.0 (2024-09-03)
 
 Full Changelog: [v0.10.0...v0.11.0](https://github.com/groq/groq-python/compare/v0.10.0...v0.11.0)
diff --git a/CODEOWNERS b/CODEOWNERS
old mode 100755
new mode 100644
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
old mode 100755
new mode 100644
index c384b8b..c04763a
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -2,9 +2,13 @@
 
 ### With Rye
 
-We use [Rye](https://rye.astral.sh/) to manage dependencies so we highly recommend [installing it](https://rye.astral.sh/guide/installation/) as it will automatically provision a Python environment with the expected Python version.
+We use [Rye](https://rye.astral.sh/) to manage dependencies because it will automatically provision a Python environment with the expected Python version. To set it up, run:
 
-After installing Rye, you'll just have to run this command:
+```sh
+$ ./scripts/bootstrap
+```
+
+Or [install Rye manually](https://rye.astral.sh/guide/installation/) and run:
 
 ```sh
 $ rye sync --all-features
@@ -31,25 +35,25 @@ $ pip install -r requirements-dev.lock
 
 ## Modifying/Adding code
 
-Most of the SDK is generated code, and any modified code will be overridden on the next generation. The
-`src/groq/lib/` and `examples/` directories are exceptions and will never be overridden.
+Most of the SDK is generated code. Modifications to code will be persisted between generations, but may
+result in merge conflicts between manual patches and changes from the generator. The generator will never
+modify the contents of the `src/groq/lib/` and `examples/` directories.
 
 ## Adding and running examples
 
-All files in the `examples/` directory are not modified by the Stainless generator and can be freely edited or
-added to.
+All files in the `examples/` directory are not modified by the generator and can be freely edited or added to.
 
-```bash
+```py
 # add an example to examples/<your-example>.py
 
 #!/usr/bin/env -S rye run python
 …
 ```
 
-```
-chmod +x examples/<your-example>.py
+```sh
+$ chmod +x examples/<your-example>.py
 # run the example against your api
-./examples/<your-example>.py
+$ ./examples/<your-example>.py
 ```
 
 ## Using the repository from source
@@ -58,8 +62,8 @@ If you’d like to use the repository from source, you can either install from g
 
 To install via git:
 
-```bash
-pip install git+ssh://git@github.com/groq/groq-python#main.git
+```sh
+$ pip install git+ssh://git@github.com/groq/groq-python#main.git
 ```
 
 Alternatively, you can build from source and install the wheel file:
@@ -68,29 +72,29 @@ Building this package will create two files in the `dist/` directory, a `.tar.gz
 
 To create a distributable version of the library, all you have to do is run this command:
 
-```bash
-rye build
+```sh
+$ rye build
 # or
-python -m build
+$ python -m build
 ```
 
 Then to install:
 
 ```sh
-pip install ./path-to-wheel-file.whl
+$ pip install ./path-to-wheel-file.whl
 ```
 
 ## Running tests
 
 Most tests require you to [set up a mock server](https://github.com/stoplightio/prism) against the OpenAPI spec to run the tests.
 
-```bash
+```sh
 # you will need npm installed
-npx prism mock path/to/your/openapi.yml
+$ npx prism mock path/to/your/openapi.yml
 ```
 
-```bash
-rye run pytest
+```sh
+$ ./scripts/test
 ```
 
 ## Linting and formatting
@@ -100,14 +104,14 @@ This repository uses [ruff](https://github.com/astral-sh/ruff) and
 
 To lint:
 
-```bash
-rye run lint
+```sh
+$ ./scripts/lint
 ```
 
 To format and fix all ruff issues automatically:
 
-```bash
-rye run format
+```sh
+$ ./scripts/format
 ```
 
 ## Publishing and releases
diff --git a/LICENSE b/LICENSE
old mode 100755
new mode 100644
diff --git a/README.md b/README.md
old mode 100755
new mode 100644
index 5f8dbae..f690c80
--- a/README.md
+++ b/README.md
@@ -2,7 +2,7 @@
 
 [![PyPI version](https://img.shields.io/pypi/v/groq.svg)](https://pypi.org/project/groq/)
 
-The Groq Python library provides convenient access to the Groq REST API from any Python 3.7+
+The Groq Python library provides convenient access to the Groq REST API from any Python 3.8+
 application. The library includes type definitions for all request params and response fields,
 and offers both synchronous and asynchronous clients powered by [httpx](https://github.com/encode/httpx).
 
@@ -28,8 +28,7 @@ import os
 from groq import Groq
 
 client = Groq(
-    # This is the default and can be omitted
-    api_key=os.environ.get("GROQ_API_KEY"),
+    api_key=os.environ.get("GROQ_API_KEY"),  # This is the default and can be omitted
 )
 
 chat_completion = client.chat.completions.create(
@@ -59,8 +58,7 @@ import asyncio
 from groq import AsyncGroq
 
 client = AsyncGroq(
-    # This is the default and can be omitted
-    api_key=os.environ.get("GROQ_API_KEY"),
+    api_key=os.environ.get("GROQ_API_KEY"),  # This is the default and can be omitted
 )
 
 
@@ -374,6 +372,21 @@ We take backwards-compatibility seriously and work hard to ensure you can rely o
 
 We are keen for your feedback; please open an [issue](https://www.github.com/groq/groq-python/issues) with questions, bugs, or suggestions.
 
+### Determining the installed version
+
+If you've upgraded to the latest version but aren't seeing any new features you were expecting then your python environment is likely still using an older version.
+
+You can determine the version that is being used at runtime with:
+
+```py
+import groq
+print(groq.__version__)
+```
+
 ## Requirements
 
-Python 3.7 or higher.
+Python 3.8 or higher.
+
+## Contributing
+
+See [the contributing documentation](./CONTRIBUTING.md).
diff --git a/SECURITY.md b/SECURITY.md
old mode 100755
new mode 100644
diff --git a/api.md b/api.md
old mode 100755
new mode 100644
diff --git a/bin/check-release-environment b/bin/check-release-environment
old mode 100755
new mode 100644
diff --git a/bin/publish-pypi b/bin/publish-pypi
old mode 100755
new mode 100644
diff --git a/examples/.keep b/examples/.keep
old mode 100755
new mode 100644
diff --git a/examples/chat_completion.py b/examples/chat_completion.py
old mode 100755
new mode 100644
diff --git a/examples/chat_completion_async.py b/examples/chat_completion_async.py
old mode 100755
new mode 100644
diff --git a/examples/chat_completion_async_streaming.py b/examples/chat_completion_async_streaming.py
old mode 100755
new mode 100644
diff --git a/examples/chat_completion_stop.py b/examples/chat_completion_stop.py
old mode 100755
new mode 100644
diff --git a/examples/chat_completion_streaming.py b/examples/chat_completion_streaming.py
old mode 100755
new mode 100644
diff --git a/mypy.ini b/mypy.ini
old mode 100755
new mode 100644
diff --git a/noxfile.py b/noxfile.py
old mode 100755
new mode 100644
diff --git a/pyproject.toml b/pyproject.toml
old mode 100755
new mode 100644
index 9196a48..f9e1aa2
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "groq"
-version = "0.11.0"
+version = "0.12.0"
 description = "The official Python library for the groq API"
 dynamic = ["readme"]
 license = "Apache-2.0"
@@ -15,13 +15,11 @@ dependencies = [
     "distro>=1.7.0, <2",
     "sniffio",
     "cached-property; python_version < '3.8'",
-
 ]
-requires-python = ">= 3.7"
+requires-python = ">= 3.8"
 classifiers = [
   "Typing :: Typed",
   "Intended Audience :: Developers",
-  "Programming Language :: Python :: 3.7",
   "Programming Language :: Python :: 3.8",
   "Programming Language :: Python :: 3.9",
   "Programming Language :: Python :: 3.10",
@@ -36,8 +34,6 @@ classifiers = [
   "License :: OSI Approved :: Apache Software License"
 ]
 
-
-
 [project.urls]
 Homepage = "https://github.com/groq/groq-python"
 Repository = "https://github.com/groq/groq-python"
@@ -59,7 +55,6 @@ dev-dependencies = [
     "dirty-equals>=0.6.0",
     "importlib-metadata>=6.7.0",
     "rich>=13.7.1",
-
 ]
 
 [tool.rye.scripts]
@@ -67,11 +62,11 @@ format = { chain = [
   "format:ruff",
   "format:docs",
   "fix:ruff",
+  # run formatting again to fix any inconsistencies when imports are stripped
+  "format:ruff",
 ]}
-"format:black" = "black ."
 "format:docs" = "python scripts/utils/ruffen-docs.py README.md api.md"
 "format:ruff" = "ruff format"
-"format:isort" = "isort ."
 
 "lint" = { chain = [
   "check:ruff",
@@ -129,10 +124,6 @@ path = "README.md"
 pattern = '\[(.+?)\]\(((?!https?://)\S+?)\)'
 replacement = '[\1](https://github.com/groq/groq-python/tree/main/\g<2>)'
 
-[tool.black]
-line-length = 120
-target-version = ["py37"]
-
 [tool.pytest.ini_options]
 testpaths = ["tests"]
 addopts = "--tb=short"
@@ -147,7 +138,7 @@ filterwarnings = [
 # there are a couple of flags that are still disabled by
 # default in strict mode as they are experimental and niche.
 typeCheckingMode = "strict"
-pythonVersion = "3.7"
+pythonVersion = "3.8"
 
 exclude = [
     "_dev",
diff --git a/release-please-config.json b/release-please-config.json
old mode 100755
new mode 100644
diff --git a/requirements-dev.lock b/requirements-dev.lock
old mode 100755
new mode 100644
index 6b7d864..88dcb05
--- a/requirements-dev.lock
+++ b/requirements-dev.lock
@@ -16,8 +16,6 @@ anyio==4.4.0
     # via httpx
 argcomplete==3.1.2
     # via nox
-attrs==23.1.0
-    # via pytest
 certifi==2023.7.22
     # via httpcore
     # via httpx
@@ -28,8 +26,9 @@ distlib==0.3.7
     # via virtualenv
 distro==1.8.0
     # via groq
-exceptiongroup==1.1.3
+exceptiongroup==1.2.2
     # via anyio
+    # via pytest
 filelock==3.12.4
     # via virtualenv
 h11==0.14.0
@@ -49,7 +48,7 @@ markdown-it-py==3.0.0
     # via rich
 mdurl==0.1.2
     # via markdown-it-py
-mypy==1.10.1
+mypy==1.13.0
 mypy-extensions==1.0.0
     # via mypy
 nodeenv==1.8.0
@@ -60,27 +59,25 @@ packaging==23.2
     # via pytest
 platformdirs==3.11.0
     # via virtualenv
-pluggy==1.3.0
-    # via pytest
-py==1.11.0
+pluggy==1.5.0
     # via pytest
-pydantic==2.7.1
+pydantic==2.9.2
     # via groq
-pydantic-core==2.18.2
+pydantic-core==2.23.4
     # via pydantic
 pygments==2.18.0
     # via rich
-pyright==1.1.374
-pytest==7.1.1
+pyright==1.1.380
+pytest==8.3.3
     # via pytest-asyncio
-pytest-asyncio==0.21.1
+pytest-asyncio==0.24.0
 python-dateutil==2.8.2
     # via time-machine
 pytz==2023.3.post1
     # via dirty-equals
 respx==0.20.2
 rich==13.7.1
-ruff==0.5.6
+ruff==0.6.9
 setuptools==68.2.2
     # via nodeenv
 six==1.16.0
@@ -90,10 +87,10 @@ sniffio==1.3.0
     # via groq
     # via httpx
 time-machine==2.9.0
-tomli==2.0.1
+tomli==2.0.2
     # via mypy
     # via pytest
-typing-extensions==4.8.0
+typing-extensions==4.12.2
     # via anyio
     # via groq
     # via mypy
diff --git a/requirements.lock b/requirements.lock
old mode 100755
new mode 100644
index 874a3cb..d2a8ddb
--- a/requirements.lock
+++ b/requirements.lock
@@ -19,7 +19,7 @@ certifi==2023.7.22
     # via httpx
 distro==1.8.0
     # via groq
-exceptiongroup==1.1.3
+exceptiongroup==1.2.2
     # via anyio
 h11==0.14.0
     # via httpcore
@@ -30,15 +30,15 @@ httpx==0.25.2
 idna==3.4
     # via anyio
     # via httpx
-pydantic==2.7.1
+pydantic==2.9.2
     # via groq
-pydantic-core==2.18.2
+pydantic-core==2.23.4
     # via pydantic
 sniffio==1.3.0
     # via anyio
     # via groq
     # via httpx
-typing-extensions==4.8.0
+typing-extensions==4.12.2
     # via anyio
     # via groq
     # via pydantic
diff --git a/scripts/utils/ruffen-docs.py b/scripts/utils/ruffen-docs.py
old mode 100755
new mode 100644
diff --git a/src/groq/__init__.py b/src/groq/__init__.py
old mode 100755
new mode 100644
diff --git a/src/groq/_base_client.py b/src/groq/_base_client.py
old mode 100755
new mode 100644
index 2311063..f94212c
--- a/src/groq/_base_client.py
+++ b/src/groq/_base_client.py
@@ -143,6 +143,12 @@ def __init__(
         self.url = url
         self.params = params
 
+    @override
+    def __repr__(self) -> str:
+        if self.url:
+            return f"{self.__class__.__name__}(url={self.url})"
+        return f"{self.__class__.__name__}(params={self.params})"
+
 
 class BasePage(GenericModel, Generic[_T]):
     """
@@ -400,14 +406,7 @@ def _make_status_error(
     ) -> _exceptions.APIStatusError:
         raise NotImplementedError()
 
-    def _remaining_retries(
-        self,
-        remaining_retries: Optional[int],
-        options: FinalRequestOptions,
-    ) -> int:
-        return remaining_retries if remaining_retries is not None else options.get_max_retries(self.max_retries)
-
-    def _build_headers(self, options: FinalRequestOptions) -> httpx.Headers:
+    def _build_headers(self, options: FinalRequestOptions, *, retries_taken: int = 0) -> httpx.Headers:
         custom_headers = options.headers or {}
         headers_dict = _merge_mappings(self.default_headers, custom_headers)
         self._validate_headers(headers_dict, custom_headers)
@@ -419,6 +418,11 @@ def _build_headers(self, options: FinalRequestOptions) -> httpx.Headers:
         if idempotency_header and options.method.lower() != "get" and idempotency_header not in headers:
             headers[idempotency_header] = options.idempotency_key or self._idempotency_key()
 
+        # Don't set the retry count header if it was already set or removed by the caller. We check
+        # `custom_headers`, which can contain `Omit()`, instead of `headers` to account for the removal case.
+        if "x-stainless-retry-count" not in (header.lower() for header in custom_headers):
+            headers["x-stainless-retry-count"] = str(retries_taken)
+
         return headers
 
     def _prepare_url(self, url: str) -> URL:
@@ -440,6 +444,8 @@ def _make_sse_decoder(self) -> SSEDecoder | SSEBytesDecoder:
     def _build_request(
         self,
         options: FinalRequestOptions,
+        *,
+        retries_taken: int = 0,
     ) -> httpx.Request:
         if log.isEnabledFor(logging.DEBUG):
             log.debug("Request options: %s", model_dump(options, exclude_unset=True))
@@ -455,7 +461,7 @@ def _build_request(
             else:
                 raise RuntimeError(f"Unexpected JSON data type, {type(json_data)}, cannot merge with `extra_body`")
 
-        headers = self._build_headers(options)
+        headers = self._build_headers(options, retries_taken=retries_taken)
         params = _merge_mappings(self.default_query, options.params)
         content_type = headers.get("Content-Type")
         files = options.files
@@ -489,12 +495,17 @@ def _build_request(
             if not files:
                 files = cast(HttpxRequestFiles, ForceMultipartDict())
 
+        prepared_url = self._prepare_url(options.url)
+        if "_" in prepared_url.host:
+            # work around https://github.com/encode/httpx/discussions/2880
+            kwargs["extensions"] = {"sni_hostname": prepared_url.host.replace("_", "-")}
+
         # TODO: report this error to httpx
         return self._client.build_request(  # pyright: ignore[reportUnknownMemberType]
             headers=headers,
             timeout=self.timeout if isinstance(options.timeout, NotGiven) else options.timeout,
             method=options.method,
-            url=self._prepare_url(options.url),
+            url=prepared_url,
             # the `Query` type that we use is incompatible with qs'
             # `Params` type as it needs to be typed as `Mapping[str, object]`
             # so that passing a `TypedDict` doesn't cause an error.
@@ -684,7 +695,8 @@ def _calculate_retry_timeout(
         if retry_after is not None and 0 < retry_after <= 60:
             return retry_after
 
-        nb_retries = max_retries - remaining_retries
+        # Also cap retry count to 1000 to avoid any potential overflows with `pow`
+        nb_retries = min(max_retries - remaining_retries, 1000)
 
         # Apply exponential backoff, but not more than the max.
         sleep_seconds = min(INITIAL_RETRY_DELAY * pow(2.0, nb_retries), MAX_RETRY_DELAY)
@@ -933,12 +945,17 @@ def request(
         stream: bool = False,
         stream_cls: type[_StreamT] | None = None,
     ) -> ResponseT | _StreamT:
+        if remaining_retries is not None:
+            retries_taken = options.get_max_retries(self.max_retries) - remaining_retries
+        else:
+            retries_taken = 0
+
         return self._request(
             cast_to=cast_to,
             options=options,
             stream=stream,
             stream_cls=stream_cls,
-            remaining_retries=remaining_retries,
+            retries_taken=retries_taken,
         )
 
     def _request(
@@ -946,7 +963,7 @@ def _request(
         *,
         cast_to: Type[ResponseT],
         options: FinalRequestOptions,
-        remaining_retries: int | None,
+        retries_taken: int,
         stream: bool,
         stream_cls: type[_StreamT] | None,
     ) -> ResponseT | _StreamT:
@@ -958,8 +975,8 @@ def _request(
         cast_to = self._maybe_override_cast_to(cast_to, options)
         options = self._prepare_options(options)
 
-        retries = self._remaining_retries(remaining_retries, options)
-        request = self._build_request(options)
+        remaining_retries = options.get_max_retries(self.max_retries) - retries_taken
+        request = self._build_request(options, retries_taken=retries_taken)
         self._prepare_request(request)
 
         kwargs: HttpxSendArgs = {}
@@ -977,11 +994,11 @@ def _request(
         except httpx.TimeoutException as err:
             log.debug("Encountered httpx.TimeoutException", exc_info=True)
 
-            if retries > 0:
+            if remaining_retries > 0:
                 return self._retry_request(
                     input_options,
                     cast_to,
-                    retries,
+                    retries_taken=retries_taken,
                     stream=stream,
                     stream_cls=stream_cls,
                     response_headers=None,
@@ -992,11 +1009,11 @@ def _request(
         except Exception as err:
             log.debug("Encountered Exception", exc_info=True)
 
-            if retries > 0:
+            if remaining_retries > 0:
                 return self._retry_request(
                     input_options,
                     cast_to,
-                    retries,
+                    retries_taken=retries_taken,
                     stream=stream,
                     stream_cls=stream_cls,
                     response_headers=None,
@@ -1019,13 +1036,13 @@ def _request(
         except httpx.HTTPStatusError as err:  # thrown on 4xx and 5xx status code
             log.debug("Encountered httpx.HTTPStatusError", exc_info=True)
 
-            if retries > 0 and self._should_retry(err.response):
+            if remaining_retries > 0 and self._should_retry(err.response):
                 err.response.close()
                 return self._retry_request(
                     input_options,
                     cast_to,
-                    retries,
-                    err.response.headers,
+                    retries_taken=retries_taken,
+                    response_headers=err.response.headers,
                     stream=stream,
                     stream_cls=stream_cls,
                 )
@@ -1044,26 +1061,26 @@ def _request(
             response=response,
             stream=stream,
             stream_cls=stream_cls,
-            retries_taken=options.get_max_retries(self.max_retries) - retries,
+            retries_taken=retries_taken,
         )
 
     def _retry_request(
         self,
         options: FinalRequestOptions,
         cast_to: Type[ResponseT],
-        remaining_retries: int,
-        response_headers: httpx.Headers | None,
         *,
+        retries_taken: int,
+        response_headers: httpx.Headers | None,
         stream: bool,
         stream_cls: type[_StreamT] | None,
     ) -> ResponseT | _StreamT:
-        remaining = remaining_retries - 1
-        if remaining == 1:
+        remaining_retries = options.get_max_retries(self.max_retries) - retries_taken
+        if remaining_retries == 1:
             log.debug("1 retry left")
         else:
-            log.debug("%i retries left", remaining)
+            log.debug("%i retries left", remaining_retries)
 
-        timeout = self._calculate_retry_timeout(remaining, options, response_headers)
+        timeout = self._calculate_retry_timeout(remaining_retries, options, response_headers)
         log.info("Retrying request to %s in %f seconds", options.url, timeout)
 
         # In a synchronous context we are blocking the entire thread. Up to the library user to run the client in a
@@ -1073,7 +1090,7 @@ def _retry_request(
         return self._request(
             options=options,
             cast_to=cast_to,
-            remaining_retries=remaining,
+            retries_taken=retries_taken + 1,
             stream=stream,
             stream_cls=stream_cls,
         )
@@ -1491,12 +1508,17 @@ async def request(
         stream_cls: type[_AsyncStreamT] | None = None,
         remaining_retries: Optional[int] = None,
     ) -> ResponseT | _AsyncStreamT:
+        if remaining_retries is not None:
+            retries_taken = options.get_max_retries(self.max_retries) - remaining_retries
+        else:
+            retries_taken = 0
+
         return await self._request(
             cast_to=cast_to,
             options=options,
             stream=stream,
             stream_cls=stream_cls,
-            remaining_retries=remaining_retries,
+            retries_taken=retries_taken,
         )
 
     async def _request(
@@ -1506,7 +1528,7 @@ async def _request(
         *,
         stream: bool,
         stream_cls: type[_AsyncStreamT] | None,
-        remaining_retries: int | None,
+        retries_taken: int,
     ) -> ResponseT | _AsyncStreamT:
         if self._platform is None:
             # `get_platform` can make blocking IO calls so we
@@ -1521,8 +1543,8 @@ async def _request(
         cast_to = self._maybe_override_cast_to(cast_to, options)
         options = await self._prepare_options(options)
 
-        retries = self._remaining_retries(remaining_retries, options)
-        request = self._build_request(options)
+        remaining_retries = options.get_max_retries(self.max_retries) - retries_taken
+        request = self._build_request(options, retries_taken=retries_taken)
         await self._prepare_request(request)
 
         kwargs: HttpxSendArgs = {}
@@ -1538,11 +1560,11 @@ async def _request(
         except httpx.TimeoutException as err:
             log.debug("Encountered httpx.TimeoutException", exc_info=True)
 
-            if retries > 0:
+            if remaining_retries > 0:
                 return await self._retry_request(
                     input_options,
                     cast_to,
-                    retries,
+                    retries_taken=retries_taken,
                     stream=stream,
                     stream_cls=stream_cls,
                     response_headers=None,
@@ -1553,11 +1575,11 @@ async def _request(
         except Exception as err:
             log.debug("Encountered Exception", exc_info=True)
 
-            if retries > 0:
+            if remaining_retries > 0:
                 return await self._retry_request(
                     input_options,
                     cast_to,
-                    retries,
+                    retries_taken=retries_taken,
                     stream=stream,
                     stream_cls=stream_cls,
                     response_headers=None,
@@ -1575,13 +1597,13 @@ async def _request(
         except httpx.HTTPStatusError as err:  # thrown on 4xx and 5xx status code
             log.debug("Encountered httpx.HTTPStatusError", exc_info=True)
 
-            if retries > 0 and self._should_retry(err.response):
+            if remaining_retries > 0 and self._should_retry(err.response):
                 await err.response.aclose()
                 return await self._retry_request(
                     input_options,
                     cast_to,
-                    retries,
-                    err.response.headers,
+                    retries_taken=retries_taken,
+                    response_headers=err.response.headers,
                     stream=stream,
                     stream_cls=stream_cls,
                 )
@@ -1600,26 +1622,26 @@ async def _request(
             response=response,
             stream=stream,
             stream_cls=stream_cls,
-            retries_taken=options.get_max_retries(self.max_retries) - retries,
+            retries_taken=retries_taken,
         )
 
     async def _retry_request(
         self,
         options: FinalRequestOptions,
         cast_to: Type[ResponseT],
-        remaining_retries: int,
-        response_headers: httpx.Headers | None,
         *,
+        retries_taken: int,
+        response_headers: httpx.Headers | None,
         stream: bool,
         stream_cls: type[_AsyncStreamT] | None,
     ) -> ResponseT | _AsyncStreamT:
-        remaining = remaining_retries - 1
-        if remaining == 1:
+        remaining_retries = options.get_max_retries(self.max_retries) - retries_taken
+        if remaining_retries == 1:
             log.debug("1 retry left")
         else:
-            log.debug("%i retries left", remaining)
+            log.debug("%i retries left", remaining_retries)
 
-        timeout = self._calculate_retry_timeout(remaining, options, response_headers)
+        timeout = self._calculate_retry_timeout(remaining_retries, options, response_headers)
         log.info("Retrying request to %s in %f seconds", options.url, timeout)
 
         await anyio.sleep(timeout)
@@ -1627,7 +1649,7 @@ async def _retry_request(
         return await self._request(
             options=options,
             cast_to=cast_to,
-            remaining_retries=remaining,
+            retries_taken=retries_taken + 1,
             stream=stream,
             stream_cls=stream_cls,
         )
diff --git a/src/groq/_client.py b/src/groq/_client.py
old mode 100755
new mode 100644
diff --git a/src/groq/_compat.py b/src/groq/_compat.py
old mode 100755
new mode 100644
index 21fe694..4794129
--- a/src/groq/_compat.py
+++ b/src/groq/_compat.py
@@ -2,7 +2,7 @@
 
 from typing import TYPE_CHECKING, Any, Union, Generic, TypeVar, Callable, cast, overload
 from datetime import date, datetime
-from typing_extensions import Self
+from typing_extensions import Self, Literal
 
 import pydantic
 from pydantic.fields import FieldInfo
@@ -133,15 +133,19 @@ def model_json(model: pydantic.BaseModel, *, indent: int | None = None) -> str:
 def model_dump(
     model: pydantic.BaseModel,
     *,
-    exclude: IncEx = None,
+    exclude: IncEx | None = None,
     exclude_unset: bool = False,
     exclude_defaults: bool = False,
+    warnings: bool = True,
+    mode: Literal["json", "python"] = "python",
 ) -> dict[str, Any]:
-    if PYDANTIC_V2:
+    if PYDANTIC_V2 or hasattr(model, "model_dump"):
         return model.model_dump(
+            mode=mode,
             exclude=exclude,
             exclude_unset=exclude_unset,
             exclude_defaults=exclude_defaults,
+            warnings=warnings,
         )
     return cast(
         "dict[str, Any]",
diff --git a/src/groq/_constants.py b/src/groq/_constants.py
old mode 100755
new mode 100644
diff --git a/src/groq/_exceptions.py b/src/groq/_exceptions.py
old mode 100755
new mode 100644
diff --git a/src/groq/_files.py b/src/groq/_files.py
old mode 100755
new mode 100644
diff --git a/src/groq/_models.py b/src/groq/_models.py
old mode 100755
new mode 100644
index d386eaa..6cb469e
--- a/src/groq/_models.py
+++ b/src/groq/_models.py
@@ -37,6 +37,7 @@
     PropertyInfo,
     is_list,
     is_given,
+    json_safe,
     lru_cache,
     is_mapping,
     parse_date,
@@ -176,7 +177,7 @@ def __str__(self) -> str:
     # Based on https://github.com/samuelcolvin/pydantic/issues/1168#issuecomment-817742836.
     @classmethod
     @override
-    def construct(
+    def construct(  # pyright: ignore[reportIncompatibleMethodOverride]
         cls: Type[ModelT],
         _fields_set: set[str] | None = None,
         **values: object,
@@ -248,8 +249,8 @@ def model_dump(
             self,
             *,
             mode: Literal["json", "python"] | str = "python",
-            include: IncEx = None,
-            exclude: IncEx = None,
+            include: IncEx | None = None,
+            exclude: IncEx | None = None,
             by_alias: bool = False,
             exclude_unset: bool = False,
             exclude_defaults: bool = False,
@@ -279,8 +280,8 @@ def model_dump(
             Returns:
                 A dictionary representation of the model.
             """
-            if mode != "python":
-                raise ValueError("mode is only supported in Pydantic v2")
+            if mode not in {"json", "python"}:
+                raise ValueError("mode must be either 'json' or 'python'")
             if round_trip != False:
                 raise ValueError("round_trip is only supported in Pydantic v2")
             if warnings != True:
@@ -289,7 +290,7 @@ def model_dump(
                 raise ValueError("context is only supported in Pydantic v2")
             if serialize_as_any != False:
                 raise ValueError("serialize_as_any is only supported in Pydantic v2")
-            return super().dict(  # pyright: ignore[reportDeprecated]
+            dumped = super().dict(  # pyright: ignore[reportDeprecated]
                 include=include,
                 exclude=exclude,
                 by_alias=by_alias,
@@ -298,13 +299,15 @@ def model_dump(
                 exclude_none=exclude_none,
             )
 
+            return cast(dict[str, Any], json_safe(dumped)) if mode == "json" else dumped
+
         @override
         def model_dump_json(
             self,
             *,
             indent: int | None = None,
-            include: IncEx = None,
-            exclude: IncEx = None,
+            include: IncEx | None = None,
+            exclude: IncEx | None = None,
             by_alias: bool = False,
             exclude_unset: bool = False,
             exclude_defaults: bool = False,
diff --git a/src/groq/_qs.py b/src/groq/_qs.py
old mode 100755
new mode 100644
diff --git a/src/groq/_resource.py b/src/groq/_resource.py
old mode 100755
new mode 100644
diff --git a/src/groq/_response.py b/src/groq/_response.py
old mode 100755
new mode 100644
index d7ae9cd..95a0361
--- a/src/groq/_response.py
+++ b/src/groq/_response.py
@@ -192,6 +192,9 @@ def _parse(self, *, to: type[_T] | None = None) -> R | _T:
         if cast_to == float:
             return cast(R, float(response.text))
 
+        if cast_to == bool:
+            return cast(R, response.text.lower() == "true")
+
         origin = get_origin(cast_to) or cast_to
 
         if origin == APIResponse:
diff --git a/src/groq/_streaming.py b/src/groq/_streaming.py
old mode 100755
new mode 100644
diff --git a/src/groq/_types.py b/src/groq/_types.py
old mode 100755
new mode 100644
index f85d73b..453e1a0
--- a/src/groq/_types.py
+++ b/src/groq/_types.py
@@ -16,7 +16,7 @@
     Optional,
     Sequence,
 )
-from typing_extensions import Literal, Protocol, TypeAlias, TypedDict, override, runtime_checkable
+from typing_extensions import Set, Literal, Protocol, TypeAlias, TypedDict, override, runtime_checkable
 
 import httpx
 import pydantic
@@ -193,7 +193,9 @@ def get(self, __key: str) -> str | None: ...
 
 # Note: copied from Pydantic
 # https://github.com/pydantic/pydantic/blob/32ea570bf96e84234d2992e1ddf40ab8a565925a/pydantic/main.py#L49
-IncEx: TypeAlias = "set[int] | set[str] | dict[int, Any] | dict[str, Any] | None"
+IncEx: TypeAlias = Union[
+    Set[int], Set[str], Mapping[int, Union["IncEx", Literal[True]]], Mapping[str, Union["IncEx", Literal[True]]]
+]
 
 PostParser = Callable[[Any], Any]
 
diff --git a/src/groq/_utils/__init__.py b/src/groq/_utils/__init__.py
old mode 100755
new mode 100644
index 3efe66c..a7cff3c
--- a/src/groq/_utils/__init__.py
+++ b/src/groq/_utils/__init__.py
@@ -6,6 +6,7 @@
     is_list as is_list,
     is_given as is_given,
     is_tuple as is_tuple,
+    json_safe as json_safe,
     lru_cache as lru_cache,
     is_mapping as is_mapping,
     is_tuple_t as is_tuple_t,
diff --git a/src/groq/_utils/_logs.py b/src/groq/_utils/_logs.py
old mode 100755
new mode 100644
diff --git a/src/groq/_utils/_proxy.py b/src/groq/_utils/_proxy.py
old mode 100755
new mode 100644
diff --git a/src/groq/_utils/_reflection.py b/src/groq/_utils/_reflection.py
old mode 100755
new mode 100644
diff --git a/src/groq/_utils/_streams.py b/src/groq/_utils/_streams.py
old mode 100755
new mode 100644
diff --git a/src/groq/_utils/_sync.py b/src/groq/_utils/_sync.py
old mode 100755
new mode 100644
diff --git a/src/groq/_utils/_transform.py b/src/groq/_utils/_transform.py
old mode 100755
new mode 100644
index 47e262a..a6b62ca
--- a/src/groq/_utils/_transform.py
+++ b/src/groq/_utils/_transform.py
@@ -173,6 +173,11 @@ def _transform_recursive(
         # Iterable[T]
         or (is_iterable_type(stripped_type) and is_iterable(data) and not isinstance(data, str))
     ):
+        # dicts are technically iterable, but it is an iterable on the keys of the dict and is not usually
+        # intended as an iterable, so we don't transform it.
+        if isinstance(data, dict):
+            return cast(object, data)
+
         inner_type = extract_type_arg(stripped_type, 0)
         return [_transform_recursive(d, annotation=annotation, inner_type=inner_type) for d in data]
 
@@ -186,7 +191,7 @@ def _transform_recursive(
         return data
 
     if isinstance(data, pydantic.BaseModel):
-        return model_dump(data, exclude_unset=True)
+        return model_dump(data, exclude_unset=True, mode="json")
 
     annotated_type = _get_annotated_type(annotation)
     if annotated_type is None:
@@ -311,6 +316,11 @@ async def _async_transform_recursive(
         # Iterable[T]
         or (is_iterable_type(stripped_type) and is_iterable(data) and not isinstance(data, str))
     ):
+        # dicts are technically iterable, but it is an iterable on the keys of the dict and is not usually
+        # intended as an iterable, so we don't transform it.
+        if isinstance(data, dict):
+            return cast(object, data)
+
         inner_type = extract_type_arg(stripped_type, 0)
         return [await _async_transform_recursive(d, annotation=annotation, inner_type=inner_type) for d in data]
 
@@ -324,7 +334,7 @@ async def _async_transform_recursive(
         return data
 
     if isinstance(data, pydantic.BaseModel):
-        return model_dump(data, exclude_unset=True)
+        return model_dump(data, exclude_unset=True, mode="json")
 
     annotated_type = _get_annotated_type(annotation)
     if annotated_type is None:
diff --git a/src/groq/_utils/_typing.py b/src/groq/_utils/_typing.py
old mode 100755
new mode 100644
diff --git a/src/groq/_utils/_utils.py b/src/groq/_utils/_utils.py
old mode 100755
new mode 100644
index 2fc5a1c..e5811bb
--- a/src/groq/_utils/_utils.py
+++ b/src/groq/_utils/_utils.py
@@ -16,6 +16,7 @@
     overload,
 )
 from pathlib import Path
+from datetime import date, datetime
 from typing_extensions import TypeGuard
 
 import sniffio
@@ -363,12 +364,13 @@ def file_from_path(path: str) -> FileTypes:
 
 def get_required_header(headers: HeadersLike, header: str) -> str:
     lower_header = header.lower()
-    if isinstance(headers, Mapping):
-        for k, v in headers.items():
+    if is_mapping_t(headers):
+        # mypy doesn't understand the type narrowing here
+        for k, v in headers.items():  # type: ignore
             if k.lower() == lower_header and isinstance(v, str):
                 return v
 
-    """ to deal with the case where the header looks like Stainless-Event-Id """
+    # to deal with the case where the header looks like Stainless-Event-Id
     intercaps_header = re.sub(r"([^\w])(\w)", lambda pat: pat.group(1) + pat.group(2).upper(), header.capitalize())
 
     for normalized_header in [header, lower_header, header.upper(), intercaps_header]:
@@ -394,3 +396,19 @@ def lru_cache(*, maxsize: int | None = 128) -> Callable[[CallableT], CallableT]:
         maxsize=maxsize,
     )
     return cast(Any, wrapper)  # type: ignore[no-any-return]
+
+
+def json_safe(data: object) -> object:
+    """Translates a mapping / sequence recursively in the same fashion
+    as `pydantic` v2's `model_dump(mode="json")`.
+    """
+    if is_mapping(data):
+        return {json_safe(key): json_safe(value) for key, value in data.items()}
+
+    if is_iterable(data) and not isinstance(data, (str, bytes, bytearray)):
+        return [json_safe(item) for item in data]
+
+    if isinstance(data, (datetime, date)):
+        return data.isoformat()
+
+    return data
diff --git a/src/groq/_version.py b/src/groq/_version.py
old mode 100755
new mode 100644
index 653c26f..da943c2
--- a/src/groq/_version.py
+++ b/src/groq/_version.py
@@ -1,4 +1,4 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 __title__ = "groq"
-__version__ = "0.11.0"  # x-release-please-version
+__version__ = "0.12.0"  # x-release-please-version
diff --git a/src/groq/lib/.keep b/src/groq/lib/.keep
old mode 100755
new mode 100644
diff --git a/src/groq/py.typed b/src/groq/py.typed
old mode 100755
new mode 100644
diff --git a/src/groq/resources/__init__.py b/src/groq/resources/__init__.py
old mode 100755
new mode 100644
diff --git a/src/groq/resources/audio/__init__.py b/src/groq/resources/audio/__init__.py
old mode 100755
new mode 100644
diff --git a/src/groq/resources/audio/audio.py b/src/groq/resources/audio/audio.py
old mode 100755
new mode 100644
index 728ba29..f33b8c2
--- a/src/groq/resources/audio/audio.py
+++ b/src/groq/resources/audio/audio.py
@@ -35,10 +35,21 @@ def translations(self) -> Translations:
 
     @cached_property
     def with_raw_response(self) -> AudioWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return the
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/groq/groq-python#accessing-raw-response-data-eg-headers
+        """
         return AudioWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> AudioWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/groq/groq-python#with_streaming_response
+        """
         return AudioWithStreamingResponse(self)
 
 
@@ -53,10 +64,21 @@ def translations(self) -> AsyncTranslations:
 
     @cached_property
     def with_raw_response(self) -> AsyncAudioWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return the
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/groq/groq-python#accessing-raw-response-data-eg-headers
+        """
         return AsyncAudioWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> AsyncAudioWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/groq/groq-python#with_streaming_response
+        """
         return AsyncAudioWithStreamingResponse(self)
 
 
diff --git a/src/groq/resources/audio/transcriptions.py b/src/groq/resources/audio/transcriptions.py
old mode 100755
new mode 100644
index f39363c..df3d9e9
--- a/src/groq/resources/audio/transcriptions.py
+++ b/src/groq/resources/audio/transcriptions.py
@@ -32,10 +32,21 @@
 class Transcriptions(SyncAPIResource):
     @cached_property
     def with_raw_response(self) -> TranscriptionsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return the
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/groq/groq-python#accessing-raw-response-data-eg-headers
+        """
         return TranscriptionsWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> TranscriptionsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/groq/groq-python#with_streaming_response
+        """
         return TranscriptionsWithStreamingResponse(self)
 
     def create(
@@ -230,10 +241,21 @@ def create(
 class AsyncTranscriptions(AsyncAPIResource):
     @cached_property
     def with_raw_response(self) -> AsyncTranscriptionsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return the
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/groq/groq-python#accessing-raw-response-data-eg-headers
+        """
         return AsyncTranscriptionsWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> AsyncTranscriptionsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/groq/groq-python#with_streaming_response
+        """
         return AsyncTranscriptionsWithStreamingResponse(self)
 
     async def create(
diff --git a/src/groq/resources/audio/translations.py b/src/groq/resources/audio/translations.py
old mode 100755
new mode 100644
index 101a797..e689512
--- a/src/groq/resources/audio/translations.py
+++ b/src/groq/resources/audio/translations.py
@@ -32,10 +32,21 @@
 class Translations(SyncAPIResource):
     @cached_property
     def with_raw_response(self) -> TranslationsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return the
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/groq/groq-python#accessing-raw-response-data-eg-headers
+        """
         return TranslationsWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> TranslationsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/groq/groq-python#with_streaming_response
+        """
         return TranslationsWithStreamingResponse(self)
 
     def create(
@@ -111,10 +122,21 @@ def create(
 class AsyncTranslations(AsyncAPIResource):
     @cached_property
     def with_raw_response(self) -> AsyncTranslationsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return the
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/groq/groq-python#accessing-raw-response-data-eg-headers
+        """
         return AsyncTranslationsWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> AsyncTranslationsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/groq/groq-python#with_streaming_response
+        """
         return AsyncTranslationsWithStreamingResponse(self)
 
     async def create(
diff --git a/src/groq/resources/chat/__init__.py b/src/groq/resources/chat/__init__.py
old mode 100755
new mode 100644
diff --git a/src/groq/resources/chat/chat.py b/src/groq/resources/chat/chat.py
old mode 100755
new mode 100644
index d14d055..1b2ea5a
--- a/src/groq/resources/chat/chat.py
+++ b/src/groq/resources/chat/chat.py
@@ -23,10 +23,21 @@ def completions(self) -> Completions:
 
     @cached_property
     def with_raw_response(self) -> ChatWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return the
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/groq/groq-python#accessing-raw-response-data-eg-headers
+        """
         return ChatWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> ChatWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/groq/groq-python#with_streaming_response
+        """
         return ChatWithStreamingResponse(self)
 
 
@@ -37,10 +48,21 @@ def completions(self) -> AsyncCompletions:
 
     @cached_property
     def with_raw_response(self) -> AsyncChatWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return the
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/groq/groq-python#accessing-raw-response-data-eg-headers
+        """
         return AsyncChatWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> AsyncChatWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/groq/groq-python#with_streaming_response
+        """
         return AsyncChatWithStreamingResponse(self)
 
 
diff --git a/src/groq/resources/chat/completions.py b/src/groq/resources/chat/completions.py
old mode 100755
new mode 100644
index e70cff9..bcd0117
--- a/src/groq/resources/chat/completions.py
+++ b/src/groq/resources/chat/completions.py
@@ -35,10 +35,21 @@
 class Completions(SyncAPIResource):
     @cached_property
     def with_raw_response(self) -> CompletionsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return the
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/groq/groq-python#accessing-raw-response-data-eg-headers
+        """
         return CompletionsWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> CompletionsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/groq/groq-python#with_streaming_response
+        """
         return CompletionsWithStreamingResponse(self)
 
     @overload
@@ -324,10 +335,21 @@ def create(
 class AsyncCompletions(AsyncAPIResource):
     @cached_property
     def with_raw_response(self) -> AsyncCompletionsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return the
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/groq/groq-python#accessing-raw-response-data-eg-headers
+        """
         return AsyncCompletionsWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> AsyncCompletionsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/groq/groq-python#with_streaming_response
+        """
         return AsyncCompletionsWithStreamingResponse(self)
 
     @overload
diff --git a/src/groq/resources/embeddings.py b/src/groq/resources/embeddings.py
old mode 100755
new mode 100644
index d52fd5e..8e439dd
--- a/src/groq/resources/embeddings.py
+++ b/src/groq/resources/embeddings.py
@@ -30,10 +30,21 @@
 class Embeddings(SyncAPIResource):
     @cached_property
     def with_raw_response(self) -> EmbeddingsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return the
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/groq/groq-python#accessing-raw-response-data-eg-headers
+        """
         return EmbeddingsWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> EmbeddingsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/groq/groq-python#with_streaming_response
+        """
         return EmbeddingsWithStreamingResponse(self)
 
     def create(
@@ -95,10 +106,21 @@ def create(
 class AsyncEmbeddings(AsyncAPIResource):
     @cached_property
     def with_raw_response(self) -> AsyncEmbeddingsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return the
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/groq/groq-python#accessing-raw-response-data-eg-headers
+        """
         return AsyncEmbeddingsWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> AsyncEmbeddingsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/groq/groq-python#with_streaming_response
+        """
         return AsyncEmbeddingsWithStreamingResponse(self)
 
     async def create(
diff --git a/src/groq/resources/models.py b/src/groq/resources/models.py
old mode 100755
new mode 100644
index e81a21c..0cb9c71
--- a/src/groq/resources/models.py
+++ b/src/groq/resources/models.py
@@ -24,10 +24,21 @@
 class Models(SyncAPIResource):
     @cached_property
     def with_raw_response(self) -> ModelsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return the
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/groq/groq-python#accessing-raw-response-data-eg-headers
+        """
         return ModelsWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> ModelsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/groq/groq-python#with_streaming_response
+        """
         return ModelsWithStreamingResponse(self)
 
     def retrieve(
@@ -119,10 +130,21 @@ def delete(
 class AsyncModels(AsyncAPIResource):
     @cached_property
     def with_raw_response(self) -> AsyncModelsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return the
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/groq/groq-python#accessing-raw-response-data-eg-headers
+        """
         return AsyncModelsWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> AsyncModelsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/groq/groq-python#with_streaming_response
+        """
         return AsyncModelsWithStreamingResponse(self)
 
     async def retrieve(
diff --git a/src/groq/types/__init__.py b/src/groq/types/__init__.py
old mode 100755
new mode 100644
diff --git a/src/groq/types/audio/__init__.py b/src/groq/types/audio/__init__.py
old mode 100755
new mode 100644
diff --git a/src/groq/types/audio/transcription.py b/src/groq/types/audio/transcription.py
old mode 100755
new mode 100644
index 0b6ab39..edb5f22
--- a/src/groq/types/audio/transcription.py
+++ b/src/groq/types/audio/transcription.py
@@ -1,7 +1,6 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 
-
 from ..._models import BaseModel
 
 __all__ = ["Transcription"]
diff --git a/src/groq/types/audio/transcription_create_params.py b/src/groq/types/audio/transcription_create_params.py
old mode 100755
new mode 100644
diff --git a/src/groq/types/audio/translation.py b/src/groq/types/audio/translation.py
old mode 100755
new mode 100644
index 3d9ede2..7c0e905
--- a/src/groq/types/audio/translation.py
+++ b/src/groq/types/audio/translation.py
@@ -1,7 +1,6 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 
-
 from ..._models import BaseModel
 
 __all__ = ["Translation"]
diff --git a/src/groq/types/audio/translation_create_params.py b/src/groq/types/audio/translation_create_params.py
old mode 100755
new mode 100644
diff --git a/src/groq/types/chat/__init__.py b/src/groq/types/chat/__init__.py
old mode 100755
new mode 100644
diff --git a/src/groq/types/chat/chat_completion.py b/src/groq/types/chat/chat_completion.py
old mode 100755
new mode 100644
diff --git a/src/groq/types/chat/chat_completion_assistant_message_param.py b/src/groq/types/chat/chat_completion_assistant_message_param.py
old mode 100755
new mode 100644
diff --git a/src/groq/types/chat/chat_completion_content_part_image_param.py b/src/groq/types/chat/chat_completion_content_part_image_param.py
old mode 100755
new mode 100644
diff --git a/src/groq/types/chat/chat_completion_content_part_param.py b/src/groq/types/chat/chat_completion_content_part_param.py
old mode 100755
new mode 100644
diff --git a/src/groq/types/chat/chat_completion_content_part_text_param.py b/src/groq/types/chat/chat_completion_content_part_text_param.py
old mode 100755
new mode 100644
diff --git a/src/groq/types/chat/chat_completion_function_call_option_param.py b/src/groq/types/chat/chat_completion_function_call_option_param.py
old mode 100755
new mode 100644
diff --git a/src/groq/types/chat/chat_completion_function_message_param.py b/src/groq/types/chat/chat_completion_function_message_param.py
old mode 100755
new mode 100644
diff --git a/src/groq/types/chat/chat_completion_message.py b/src/groq/types/chat/chat_completion_message.py
old mode 100755
new mode 100644
diff --git a/src/groq/types/chat/chat_completion_message_param.py b/src/groq/types/chat/chat_completion_message_param.py
old mode 100755
new mode 100644
diff --git a/src/groq/types/chat/chat_completion_message_tool_call.py b/src/groq/types/chat/chat_completion_message_tool_call.py
old mode 100755
new mode 100644
diff --git a/src/groq/types/chat/chat_completion_message_tool_call_param.py b/src/groq/types/chat/chat_completion_message_tool_call_param.py
old mode 100755
new mode 100644
diff --git a/src/groq/types/chat/chat_completion_named_tool_choice_param.py b/src/groq/types/chat/chat_completion_named_tool_choice_param.py
old mode 100755
new mode 100644
diff --git a/src/groq/types/chat/chat_completion_role.py b/src/groq/types/chat/chat_completion_role.py
old mode 100755
new mode 100644
diff --git a/src/groq/types/chat/chat_completion_system_message_param.py b/src/groq/types/chat/chat_completion_system_message_param.py
old mode 100755
new mode 100644
diff --git a/src/groq/types/chat/chat_completion_token_logprob.py b/src/groq/types/chat/chat_completion_token_logprob.py
old mode 100755
new mode 100644
diff --git a/src/groq/types/chat/chat_completion_tool_choice_option_param.py b/src/groq/types/chat/chat_completion_tool_choice_option_param.py
old mode 100755
new mode 100644
diff --git a/src/groq/types/chat/chat_completion_tool_message_param.py b/src/groq/types/chat/chat_completion_tool_message_param.py
old mode 100755
new mode 100644
diff --git a/src/groq/types/chat/chat_completion_tool_param.py b/src/groq/types/chat/chat_completion_tool_param.py
old mode 100755
new mode 100644
diff --git a/src/groq/types/chat/chat_completion_user_message_param.py b/src/groq/types/chat/chat_completion_user_message_param.py
old mode 100755
new mode 100644
diff --git a/src/groq/types/chat/completion_create_params.py b/src/groq/types/chat/completion_create_params.py
old mode 100755
new mode 100644
diff --git a/src/groq/types/completion_usage.py b/src/groq/types/completion_usage.py
old mode 100755
new mode 100644
diff --git a/src/groq/types/create_embedding_response.py b/src/groq/types/create_embedding_response.py
old mode 100755
new mode 100644
diff --git a/src/groq/types/embedding.py b/src/groq/types/embedding.py
old mode 100755
new mode 100644
diff --git a/src/groq/types/embedding_create_params.py b/src/groq/types/embedding_create_params.py
old mode 100755
new mode 100644
diff --git a/src/groq/types/model.py b/src/groq/types/model.py
old mode 100755
new mode 100644
diff --git a/src/groq/types/model_deleted.py b/src/groq/types/model_deleted.py
old mode 100755
new mode 100644
index d9a48bb..7f81e1b
--- a/src/groq/types/model_deleted.py
+++ b/src/groq/types/model_deleted.py
@@ -1,7 +1,6 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 
-
 from .._models import BaseModel
 
 __all__ = ["ModelDeleted"]
diff --git a/src/groq/types/model_list_response.py b/src/groq/types/model_list_response.py
old mode 100755
new mode 100644
diff --git a/src/groq/types/shared/__init__.py b/src/groq/types/shared/__init__.py
old mode 100755
new mode 100644
diff --git a/src/groq/types/shared/error_object.py b/src/groq/types/shared/error_object.py
old mode 100755
new mode 100644
diff --git a/src/groq/types/shared/function_definition.py b/src/groq/types/shared/function_definition.py
old mode 100755
new mode 100644
diff --git a/src/groq/types/shared/function_parameters.py b/src/groq/types/shared/function_parameters.py
old mode 100755
new mode 100644
diff --git a/src/groq/types/shared_params/__init__.py b/src/groq/types/shared_params/__init__.py
old mode 100755
new mode 100644
diff --git a/src/groq/types/shared_params/function_definition.py b/src/groq/types/shared_params/function_definition.py
old mode 100755
new mode 100644
diff --git a/src/groq/types/shared_params/function_parameters.py b/src/groq/types/shared_params/function_parameters.py
old mode 100755
new mode 100644
diff --git a/tests/__init__.py b/tests/__init__.py
old mode 100755
new mode 100644
diff --git a/tests/api_resources/__init__.py b/tests/api_resources/__init__.py
old mode 100755
new mode 100644
diff --git a/tests/api_resources/audio/__init__.py b/tests/api_resources/audio/__init__.py
old mode 100755
new mode 100644
diff --git a/tests/api_resources/audio/test_transcriptions.py b/tests/api_resources/audio/test_transcriptions.py
old mode 100755
new mode 100644
diff --git a/tests/api_resources/audio/test_translations.py b/tests/api_resources/audio/test_translations.py
old mode 100755
new mode 100644
diff --git a/tests/api_resources/chat/__init__.py b/tests/api_resources/chat/__init__.py
old mode 100755
new mode 100644
diff --git a/tests/api_resources/chat/test_completions.py b/tests/api_resources/chat/test_completions.py
old mode 100755
new mode 100644
diff --git a/tests/api_resources/test_embeddings.py b/tests/api_resources/test_embeddings.py
old mode 100755
new mode 100644
diff --git a/tests/api_resources/test_models.py b/tests/api_resources/test_models.py
old mode 100755
new mode 100644
diff --git a/tests/conftest.py b/tests/conftest.py
old mode 100755
new mode 100644
index 21b8c75..3fad581
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1,11 +1,11 @@
 from __future__ import annotations
 
 import os
-import asyncio
 import logging
 from typing import TYPE_CHECKING, Iterator, AsyncIterator
 
 import pytest
+from pytest_asyncio import is_async_test
 
 from groq import Groq, AsyncGroq
 
@@ -17,11 +17,13 @@
 logging.getLogger("groq").setLevel(logging.DEBUG)
 
 
-@pytest.fixture(scope="session")
-def event_loop() -> Iterator[asyncio.AbstractEventLoop]:
-    loop = asyncio.new_event_loop()
-    yield loop
-    loop.close()
+# automatically add `pytest.mark.asyncio()` to all of our async tests
+# so we don't have to add that boilerplate everywhere
+def pytest_collection_modifyitems(items: list[pytest.Function]) -> None:
+    pytest_asyncio_tests = (item for item in items if is_async_test(item))
+    session_scope_marker = pytest.mark.asyncio(loop_scope="session")
+    for async_test in pytest_asyncio_tests:
+        async_test.add_marker(session_scope_marker, append=False)
 
 
 base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
diff --git a/tests/sample_file.txt b/tests/sample_file.txt
old mode 100755
new mode 100644
diff --git a/tests/test_client.py b/tests/test_client.py
old mode 100755
new mode 100644
index fe06746..6eedb65
--- a/tests/test_client.py
+++ b/tests/test_client.py
@@ -10,6 +10,7 @@
 import tracemalloc
 from typing import Any, Union, cast
 from unittest import mock
+from typing_extensions import Literal
 
 import httpx
 import pytest
@@ -679,6 +680,7 @@ class Model(BaseModel):
             [3, "", 0.5],
             [2, "", 0.5 * 2.0],
             [1, "", 0.5 * 4.0],
+            [-1100, "", 8],  # test large number potentially overflowing
         ],
     )
     @mock.patch("time.time", mock.MagicMock(return_value=1696004797))
@@ -753,7 +755,14 @@ def test_retrying_status_errors_doesnt_leak(self, respx_mock: MockRouter) -> Non
     @pytest.mark.parametrize("failures_before_success", [0, 2, 4])
     @mock.patch("groq._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout)
     @pytest.mark.respx(base_url=base_url)
-    def test_retries_taken(self, client: Groq, failures_before_success: int, respx_mock: MockRouter) -> None:
+    @pytest.mark.parametrize("failure_mode", ["status", "exception"])
+    def test_retries_taken(
+        self,
+        client: Groq,
+        failures_before_success: int,
+        failure_mode: Literal["status", "exception"],
+        respx_mock: MockRouter,
+    ) -> None:
         client = client.with_options(max_retries=4)
 
         nb_retries = 0
@@ -762,6 +771,8 @@ def retry_handler(_request: httpx.Request) -> httpx.Response:
             nonlocal nb_retries
             if nb_retries < failures_before_success:
                 nb_retries += 1
+                if failure_mode == "exception":
+                    raise RuntimeError("oops")
                 return httpx.Response(500)
             return httpx.Response(200)
 
@@ -778,6 +789,69 @@ def retry_handler(_request: httpx.Request) -> httpx.Response:
         )
 
         assert response.retries_taken == failures_before_success
+        assert int(response.http_request.headers.get("x-stainless-retry-count")) == failures_before_success
+
+    @pytest.mark.parametrize("failures_before_success", [0, 2, 4])
+    @mock.patch("groq._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout)
+    @pytest.mark.respx(base_url=base_url)
+    def test_omit_retry_count_header(self, client: Groq, failures_before_success: int, respx_mock: MockRouter) -> None:
+        client = client.with_options(max_retries=4)
+
+        nb_retries = 0
+
+        def retry_handler(_request: httpx.Request) -> httpx.Response:
+            nonlocal nb_retries
+            if nb_retries < failures_before_success:
+                nb_retries += 1
+                return httpx.Response(500)
+            return httpx.Response(200)
+
+        respx_mock.post("/openai/v1/chat/completions").mock(side_effect=retry_handler)
+
+        response = client.chat.completions.with_raw_response.create(
+            messages=[
+                {
+                    "content": "content",
+                    "role": "system",
+                }
+            ],
+            model="string",
+            extra_headers={"x-stainless-retry-count": Omit()},
+        )
+
+        assert len(response.http_request.headers.get_list("x-stainless-retry-count")) == 0
+
+    @pytest.mark.parametrize("failures_before_success", [0, 2, 4])
+    @mock.patch("groq._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout)
+    @pytest.mark.respx(base_url=base_url)
+    def test_overwrite_retry_count_header(
+        self, client: Groq, failures_before_success: int, respx_mock: MockRouter
+    ) -> None:
+        client = client.with_options(max_retries=4)
+
+        nb_retries = 0
+
+        def retry_handler(_request: httpx.Request) -> httpx.Response:
+            nonlocal nb_retries
+            if nb_retries < failures_before_success:
+                nb_retries += 1
+                return httpx.Response(500)
+            return httpx.Response(200)
+
+        respx_mock.post("/openai/v1/chat/completions").mock(side_effect=retry_handler)
+
+        response = client.chat.completions.with_raw_response.create(
+            messages=[
+                {
+                    "content": "content",
+                    "role": "system",
+                }
+            ],
+            model="string",
+            extra_headers={"x-stainless-retry-count": "42"},
+        )
+
+        assert response.http_request.headers.get("x-stainless-retry-count") == "42"
 
 
 class TestAsyncGroq:
@@ -1425,6 +1499,7 @@ class Model(BaseModel):
             [3, "", 0.5],
             [2, "", 0.5 * 2.0],
             [1, "", 0.5 * 4.0],
+            [-1100, "", 8],  # test large number potentially overflowing
         ],
     )
     @mock.patch("time.time", mock.MagicMock(return_value=1696004797))
@@ -1501,8 +1576,13 @@ async def test_retrying_status_errors_doesnt_leak(self, respx_mock: MockRouter)
     @mock.patch("groq._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout)
     @pytest.mark.respx(base_url=base_url)
     @pytest.mark.asyncio
+    @pytest.mark.parametrize("failure_mode", ["status", "exception"])
     async def test_retries_taken(
-        self, async_client: AsyncGroq, failures_before_success: int, respx_mock: MockRouter
+        self,
+        async_client: AsyncGroq,
+        failures_before_success: int,
+        failure_mode: Literal["status", "exception"],
+        respx_mock: MockRouter,
     ) -> None:
         client = async_client.with_options(max_retries=4)
 
@@ -1512,6 +1592,8 @@ def retry_handler(_request: httpx.Request) -> httpx.Response:
             nonlocal nb_retries
             if nb_retries < failures_before_success:
                 nb_retries += 1
+                if failure_mode == "exception":
+                    raise RuntimeError("oops")
                 return httpx.Response(500)
             return httpx.Response(200)
 
@@ -1528,3 +1610,70 @@ def retry_handler(_request: httpx.Request) -> httpx.Response:
         )
 
         assert response.retries_taken == failures_before_success
+        assert int(response.http_request.headers.get("x-stainless-retry-count")) == failures_before_success
+
+    @pytest.mark.parametrize("failures_before_success", [0, 2, 4])
+    @mock.patch("groq._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout)
+    @pytest.mark.respx(base_url=base_url)
+    @pytest.mark.asyncio
+    async def test_omit_retry_count_header(
+        self, async_client: AsyncGroq, failures_before_success: int, respx_mock: MockRouter
+    ) -> None:
+        client = async_client.with_options(max_retries=4)
+
+        nb_retries = 0
+
+        def retry_handler(_request: httpx.Request) -> httpx.Response:
+            nonlocal nb_retries
+            if nb_retries < failures_before_success:
+                nb_retries += 1
+                return httpx.Response(500)
+            return httpx.Response(200)
+
+        respx_mock.post("/openai/v1/chat/completions").mock(side_effect=retry_handler)
+
+        response = await client.chat.completions.with_raw_response.create(
+            messages=[
+                {
+                    "content": "content",
+                    "role": "system",
+                }
+            ],
+            model="string",
+            extra_headers={"x-stainless-retry-count": Omit()},
+        )
+
+        assert len(response.http_request.headers.get_list("x-stainless-retry-count")) == 0
+
+    @pytest.mark.parametrize("failures_before_success", [0, 2, 4])
+    @mock.patch("groq._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout)
+    @pytest.mark.respx(base_url=base_url)
+    @pytest.mark.asyncio
+    async def test_overwrite_retry_count_header(
+        self, async_client: AsyncGroq, failures_before_success: int, respx_mock: MockRouter
+    ) -> None:
+        client = async_client.with_options(max_retries=4)
+
+        nb_retries = 0
+
+        def retry_handler(_request: httpx.Request) -> httpx.Response:
+            nonlocal nb_retries
+            if nb_retries < failures_before_success:
+                nb_retries += 1
+                return httpx.Response(500)
+            return httpx.Response(200)
+
+        respx_mock.post("/openai/v1/chat/completions").mock(side_effect=retry_handler)
+
+        response = await client.chat.completions.with_raw_response.create(
+            messages=[
+                {
+                    "content": "content",
+                    "role": "system",
+                }
+            ],
+            model="string",
+            extra_headers={"x-stainless-retry-count": "42"},
+        )
+
+        assert response.http_request.headers.get("x-stainless-retry-count") == "42"
diff --git a/tests/test_deepcopy.py b/tests/test_deepcopy.py
old mode 100755
new mode 100644
diff --git a/tests/test_extract_files.py b/tests/test_extract_files.py
old mode 100755
new mode 100644
diff --git a/tests/test_files.py b/tests/test_files.py
old mode 100755
new mode 100644
diff --git a/tests/test_models.py b/tests/test_models.py
old mode 100755
new mode 100644
index bd5c305..e486da8
--- a/tests/test_models.py
+++ b/tests/test_models.py
@@ -245,7 +245,7 @@ class Model(BaseModel):
     assert m.foo is True
 
     m = Model.construct(foo="CARD_HOLDER")
-    assert m.foo is "CARD_HOLDER"
+    assert m.foo == "CARD_HOLDER"
 
     m = Model.construct(foo={"bar": False})
     assert isinstance(m.foo, Submodel1)
@@ -520,19 +520,15 @@ class Model(BaseModel):
     assert m3.to_dict(exclude_none=True) == {}
     assert m3.to_dict(exclude_defaults=True) == {}
 
-    if PYDANTIC_V2:
-
-        class Model2(BaseModel):
-            created_at: datetime
+    class Model2(BaseModel):
+        created_at: datetime
 
-        time_str = "2024-03-21T11:39:01.275859"
-        m4 = Model2.construct(created_at=time_str)
-        assert m4.to_dict(mode="python") == {"created_at": datetime.fromisoformat(time_str)}
-        assert m4.to_dict(mode="json") == {"created_at": time_str}
-    else:
-        with pytest.raises(ValueError, match="mode is only supported in Pydantic v2"):
-            m.to_dict(mode="json")
+    time_str = "2024-03-21T11:39:01.275859"
+    m4 = Model2.construct(created_at=time_str)
+    assert m4.to_dict(mode="python") == {"created_at": datetime.fromisoformat(time_str)}
+    assert m4.to_dict(mode="json") == {"created_at": time_str}
 
+    if not PYDANTIC_V2:
         with pytest.raises(ValueError, match="warnings is only supported in Pydantic v2"):
             m.to_dict(warnings=False)
 
@@ -558,9 +554,6 @@ class Model(BaseModel):
     assert m3.model_dump(exclude_none=True) == {}
 
     if not PYDANTIC_V2:
-        with pytest.raises(ValueError, match="mode is only supported in Pydantic v2"):
-            m.model_dump(mode="json")
-
         with pytest.raises(ValueError, match="round_trip is only supported in Pydantic v2"):
             m.model_dump(round_trip=True)
 
diff --git a/tests/test_qs.py b/tests/test_qs.py
old mode 100755
new mode 100644
diff --git a/tests/test_required_args.py b/tests/test_required_args.py
old mode 100755
new mode 100644
diff --git a/tests/test_response.py b/tests/test_response.py
old mode 100755
new mode 100644
index b33a361..0111eb5
--- a/tests/test_response.py
+++ b/tests/test_response.py
@@ -190,6 +190,56 @@ async def test_async_response_parse_annotated_type(async_client: AsyncGroq) -> N
     assert obj.bar == 2
 
 
+@pytest.mark.parametrize(
+    "content, expected",
+    [
+        ("false", False),
+        ("true", True),
+        ("False", False),
+        ("True", True),
+        ("TrUe", True),
+        ("FalSe", False),
+    ],
+)
+def test_response_parse_bool(client: Groq, content: str, expected: bool) -> None:
+    response = APIResponse(
+        raw=httpx.Response(200, content=content),
+        client=client,
+        stream=False,
+        stream_cls=None,
+        cast_to=str,
+        options=FinalRequestOptions.construct(method="get", url="/foo"),
+    )
+
+    result = response.parse(to=bool)
+    assert result is expected
+
+
+@pytest.mark.parametrize(
+    "content, expected",
+    [
+        ("false", False),
+        ("true", True),
+        ("False", False),
+        ("True", True),
+        ("TrUe", True),
+        ("FalSe", False),
+    ],
+)
+async def test_async_response_parse_bool(client: AsyncGroq, content: str, expected: bool) -> None:
+    response = AsyncAPIResponse(
+        raw=httpx.Response(200, content=content),
+        client=client,
+        stream=False,
+        stream_cls=None,
+        cast_to=str,
+        options=FinalRequestOptions.construct(method="get", url="/foo"),
+    )
+
+    result = await response.parse(to=bool)
+    assert result is expected
+
+
 class OtherModel(BaseModel):
     a: str
 
diff --git a/tests/test_streaming.py b/tests/test_streaming.py
old mode 100755
new mode 100644
diff --git a/tests/test_transform.py b/tests/test_transform.py
old mode 100755
new mode 100644
index b1d6cbc..e29e15c
--- a/tests/test_transform.py
+++ b/tests/test_transform.py
@@ -177,17 +177,32 @@ class DateDict(TypedDict, total=False):
     foo: Annotated[date, PropertyInfo(format="iso8601")]
 
 
+class DatetimeModel(BaseModel):
+    foo: datetime
+
+
+class DateModel(BaseModel):
+    foo: Optional[date]
+
+
 @parametrize
 @pytest.mark.asyncio
 async def test_iso8601_format(use_async: bool) -> None:
     dt = datetime.fromisoformat("2023-02-23T14:16:36.337692+00:00")
+    tz = "Z" if PYDANTIC_V2 else "+00:00"
     assert await transform({"foo": dt}, DatetimeDict, use_async) == {"foo": "2023-02-23T14:16:36.337692+00:00"}  # type: ignore[comparison-overlap]
+    assert await transform(DatetimeModel(foo=dt), Any, use_async) == {"foo": "2023-02-23T14:16:36.337692" + tz}  # type: ignore[comparison-overlap]
 
     dt = dt.replace(tzinfo=None)
     assert await transform({"foo": dt}, DatetimeDict, use_async) == {"foo": "2023-02-23T14:16:36.337692"}  # type: ignore[comparison-overlap]
+    assert await transform(DatetimeModel(foo=dt), Any, use_async) == {"foo": "2023-02-23T14:16:36.337692"}  # type: ignore[comparison-overlap]
 
     assert await transform({"foo": None}, DateDict, use_async) == {"foo": None}  # type: ignore[comparison-overlap]
+    assert await transform(DateModel(foo=None), Any, use_async) == {"foo": None}  # type: ignore
     assert await transform({"foo": date.fromisoformat("2023-02-23")}, DateDict, use_async) == {"foo": "2023-02-23"}  # type: ignore[comparison-overlap]
+    assert await transform(DateModel(foo=date.fromisoformat("2023-02-23")), DateDict, use_async) == {
+        "foo": "2023-02-23"
+    }  # type: ignore[comparison-overlap]
 
 
 @parametrize
diff --git a/tests/test_utils/test_proxy.py b/tests/test_utils/test_proxy.py
old mode 100755
new mode 100644
diff --git a/tests/test_utils/test_typing.py b/tests/test_utils/test_typing.py
old mode 100755
new mode 100644
diff --git a/tests/utils.py b/tests/utils.py
old mode 100755
new mode 100644