diff --git a/.stats.yml b/.stats.yml
index 1de8464..3d0c6fd 100644
--- a/.stats.yml
+++ b/.stats.yml
@@ -1,2 +1,2 @@
 configured_endpoints: 7
-openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/groqcloud%2Fgroqcloud-45aff5d28bf67a962a928a38a6cf90ae3034bc824d2e65036d8784643e4995db.yml
+openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/groqcloud%2Fgroqcloud-36fcf453a77cbc8279361577a1e785a3a86ef7bcbde2195270a83e93cbc4b8b3.yml
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 6c3719d..0108a3a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,42 +1,5 @@
 # Changelog
 
-## 0.7.0 (2024-05-22)
-
-Full Changelog: [v0.6.0...v0.7.0](https://github.com/groq/groq-python/compare/v0.6.0...v0.7.0)
-
-### Features
-
-* **api:** Improve types ([e46d3ed](https://github.com/groq/groq-python/commit/e46d3ed8591ef543a6f50c4443825fd547b74ab4))
-
-
-### Chores
-
-* fix array indices in README examples ([bacc106](https://github.com/groq/groq-python/commit/bacc10611d74ae9369093bbf44a451182b82e253))
-* patch streaming ([9287ee7](https://github.com/groq/groq-python/commit/9287ee77e505187bb504729fad95edde41663ad0))
-
-## 0.6.0 (2024-05-16)
-
-Full Changelog: [v0.5.0...v0.6.0](https://github.com/groq/groq-python/compare/v0.5.0...v0.6.0)
-
-### Features
-
-* **api:** OpenAPI spec update via Stainless API ([#39](https://github.com/groq/groq-python/issues/39)) ([53a8b27](https://github.com/groq/groq-python/commit/53a8b27365c085227e449812779c5bafadada9fb))
-* **api:** OpenAPI spec update via Stainless API ([#45](https://github.com/groq/groq-python/issues/45)) ([18bce01](https://github.com/groq/groq-python/commit/18bce01c835fbc0485ef78f12b28caead37d15c4))
-* **api:** update via SDK Studio ([#35](https://github.com/groq/groq-python/issues/35)) ([5ca3b78](https://github.com/groq/groq-python/commit/5ca3b785626145a87a27d64a4a4c3eae1b1fa8de))
-* **api:** update via SDK Studio ([#38](https://github.com/groq/groq-python/issues/38)) ([dc73eeb](https://github.com/groq/groq-python/commit/dc73eeb92a441123596b3f3420a757074ee62345))
-* **api:** update via SDK Studio ([#40](https://github.com/groq/groq-python/issues/40)) ([7227816](https://github.com/groq/groq-python/commit/7227816b92397d913f8aeceda64127e2db99f720))
-* **api:** update via SDK Studio ([#41](https://github.com/groq/groq-python/issues/41)) ([3fdb317](https://github.com/groq/groq-python/commit/3fdb31786699d3298d5064faac77e26dbc7852d8))
-* **api:** update via SDK Studio ([#42](https://github.com/groq/groq-python/issues/42)) ([d270e51](https://github.com/groq/groq-python/commit/d270e51d63edc138b0f0afa0b53c2f84a69b1f73))
-* **api:** update via SDK Studio ([#43](https://github.com/groq/groq-python/issues/43)) ([0e04381](https://github.com/groq/groq-python/commit/0e043816e5cbcdf5def7583d20312c6f5e29808f))
-* **api:** update via SDK Studio ([#44](https://github.com/groq/groq-python/issues/44)) ([3807b4c](https://github.com/groq/groq-python/commit/3807b4c67981aec711d3e60e57ec0e5e9493f8a0))
-* **api:** update via SDK Studio ([#46](https://github.com/groq/groq-python/issues/46)) ([ed1ea2a](https://github.com/groq/groq-python/commit/ed1ea2a99f95d4d353c3dd9baf7fc1c5d37cc20a))
-
-
-### Chores
-
-* **examples:** embedding example ([961159c](https://github.com/groq/groq-python/commit/961159c9c83a21cd9631af4802e4b538ebbbb186))
-* patch streaming ([c8eaad3](https://github.com/groq/groq-python/commit/c8eaad35abe33b692e029495157948db3a09af3c))
-
 ## 0.5.0 (2024-04-11)
 
 Full Changelog: [v0.4.2...v0.5.0](https://github.com/groq/groq-python/compare/v0.4.2...v0.5.0)
diff --git a/README.md b/README.md
index 8ed555c..a8b47a5 100644
--- a/README.md
+++ b/README.md
@@ -24,9 +24,13 @@ pip install groq
 The full API of this library can be found in [api.md](api.md).
 
 ```python
+import os
 from groq import Groq
 
-client = Groq()
+client = Groq(
+    # This is the default and can be omitted
+    api_key=os.environ.get("GROQ_API_KEY"),
+)
 
 chat_completion = client.chat.completions.create(
     messages=[
@@ -35,9 +39,9 @@ chat_completion = client.chat.completions.create(
             "content": "Explain the importance of low latency LLMs",
         }
     ],
-    model="mixtral-8x7b-32768",
+    model="llama3-8b-8192",
 )
-print(chat_completion.choices[0].message.content)
+print(chat_completion.choices_0.message.content)
 ```
 
 While you can provide an `api_key` keyword argument,
@@ -50,10 +54,14 @@ so that your API Key is not stored in source control.
 Simply import `AsyncGroq` instead of `Groq` and use `await` with each API call:
 
 ```python
+import os
 import asyncio
 from groq import AsyncGroq
 
-client = AsyncGroq()
+client = AsyncGroq(
+    # This is the default and can be omitted
+    api_key=os.environ.get("GROQ_API_KEY"),
+)
 
 
 async def main() -> None:
@@ -64,9 +72,9 @@ async def main() -> None:
                 "content": "Explain the importance of low latency LLMs",
             }
         ],
-        model="mixtral-8x7b-32768",
+        model="llama3-8b-8192",
     )
-    print(chat_completion.choices[0].message.content)
+    print(chat_completion.choices_0.message.content)
 
 
 asyncio.run(main())
@@ -110,7 +118,7 @@ try:
                 "content": "Explain the importance of low latency LLMs",
             },
         ],
-        model="mixtral-8x7b-32768",
+        model="llama3-8b-8192",
     )
 except groq.APIConnectionError as e:
     print("The server could not be reached")
@@ -165,7 +173,7 @@ client.with_options(max_retries=5).chat.completions.create(
             "content": "Explain the importance of low latency LLMs",
         },
     ],
-    model="mixtral-8x7b-32768",
+    model="llama3-8b-8192",
 )
 ```
 
@@ -200,7 +208,7 @@ client.with_options(timeout=5.0).chat.completions.create(
             "content": "Explain the importance of low latency LLMs",
         },
     ],
-    model="mixtral-8x7b-32768",
+    model="llama3-8b-8192",
 )
 ```
 
@@ -248,7 +256,7 @@ response = client.chat.completions.with_raw_response.create(
         "role": "user",
         "content": "Explain the importance of low latency LLMs",
     }],
-    model="mixtral-8x7b-32768",
+    model="llama3-8b-8192",
 )
 print(response.headers.get('X-My-Header'))
 
@@ -278,7 +286,7 @@ with client.chat.completions.with_streaming_response.create(
             "content": "Explain the importance of low latency LLMs",
         },
     ],
-    model="mixtral-8x7b-32768",
+    model="llama3-8b-8192",
 ) as response:
     print(response.headers.get("X-My-Header"))
 
diff --git a/api.md b/api.md
index de3ee49..41c0971 100644
--- a/api.md
+++ b/api.md
@@ -1,14 +1,16 @@
-# Embeddings
-
-Types:
+# Shared Types
 
 ```python
-from groq.types import CreateEmbeddingResponse, Embedding
+from groq.types import ErrorObject, FunctionDefinition, FunctionParameters
 ```
 
-Methods:
+# Completions
 
-- <code title="post /openai/v1/embeddings">client.embeddings.<a href="./src/groq/resources/embeddings.py">create</a>(\*\*<a href="src/groq/types/embedding_create_params.py">params</a>) -> <a href="./src/groq/types/create_embedding_response.py">CreateEmbeddingResponse</a></code>
+Types:
+
+```python
+from groq.types import CompletionUsage
+```
 
 # Chat
 
@@ -17,13 +19,45 @@ Methods:
 Types:
 
 ```python
-from groq.types.chat import ChatCompletion
+from groq.types.chat import (
+    ChatCompletion,
+    ChatCompletionAssistantMessageParam,
+    ChatCompletionChunk,
+    ChatCompletionContentPart,
+    ChatCompletionContentPartImage,
+    ChatCompletionContentPartText,
+    ChatCompletionFunctionCallOption,
+    ChatCompletionFunctionMessageParam,
+    ChatCompletionMessage,
+    ChatCompletionMessageParam,
+    ChatCompletionMessageToolCall,
+    ChatCompletionNamedToolChoice,
+    ChatCompletionRole,
+    ChatCompletionSystemMessageParam,
+    ChatCompletionTokenLogprob,
+    ChatCompletionTool,
+    ChatCompletionToolChoiceOption,
+    ChatCompletionToolMessageParam,
+    ChatCompletionUserMessageParam,
+)
 ```
 
 Methods:
 
 - <code title="post /openai/v1/chat/completions">client.chat.completions.<a href="./src/groq/resources/chat/completions.py">create</a>(\*\*<a href="src/groq/types/chat/completion_create_params.py">params</a>) -> <a href="./src/groq/types/chat/chat_completion.py">ChatCompletion</a></code>
 
+# Embeddings
+
+Types:
+
+```python
+from groq.types import CreateEmbeddingResponse, Embedding
+```
+
+Methods:
+
+- <code title="post /openai/v1/embeddings">client.embeddings.<a href="./src/groq/resources/embeddings.py">create</a>(\*\*<a href="src/groq/types/embedding_create_params.py">params</a>) -> <a href="./src/groq/types/create_embedding_response.py">CreateEmbeddingResponse</a></code>
+
 # Audio
 
 ## Transcriptions
@@ -55,11 +89,11 @@ Methods:
 Types:
 
 ```python
-from groq.types import Model, ModelList
+from groq.types import Model, ModelDeleted, ModelListResponse
 ```
 
 Methods:
 
 - <code title="get /openai/v1/models/{model}">client.models.<a href="./src/groq/resources/models.py">retrieve</a>(model) -> <a href="./src/groq/types/model.py">Model</a></code>
-- <code title="get /openai/v1/models">client.models.<a href="./src/groq/resources/models.py">list</a>() -> <a href="./src/groq/types/model_list.py">ModelList</a></code>
-- <code title="delete /openai/v1/models/{model}">client.models.<a href="./src/groq/resources/models.py">delete</a>(model) -> None</code>
+- <code title="get /openai/v1/models">client.models.<a href="./src/groq/resources/models.py">list</a>() -> <a href="./src/groq/types/model_list_response.py">ModelListResponse</a></code>
+- <code title="delete /openai/v1/models/{model}">client.models.<a href="./src/groq/resources/models.py">delete</a>(model) -> <a href="./src/groq/types/model_deleted.py">ModelDeleted</a></code>
diff --git a/src/groq/_client.py b/src/groq/_client.py
index 1a5b0fa..87f6486 100644
--- a/src/groq/_client.py
+++ b/src/groq/_client.py
@@ -46,8 +46,8 @@
 
 
 class Groq(SyncAPIClient):
-    embeddings: resources.Embeddings
     chat: resources.Chat
+    embeddings: resources.Embeddings
     audio: resources.Audio
     models: resources.Models
     with_raw_response: GroqWithRawResponse
@@ -107,8 +107,8 @@ def __init__(
             _strict_response_validation=_strict_response_validation,
         )
 
-        self.embeddings = resources.Embeddings(self)
         self.chat = resources.Chat(self)
+        self.embeddings = resources.Embeddings(self)
         self.audio = resources.Audio(self)
         self.models = resources.Models(self)
         self.with_raw_response = GroqWithRawResponse(self)
@@ -220,8 +220,8 @@ def _make_status_error(
 
 
 class AsyncGroq(AsyncAPIClient):
-    embeddings: resources.AsyncEmbeddings
     chat: resources.AsyncChat
+    embeddings: resources.AsyncEmbeddings
     audio: resources.AsyncAudio
     models: resources.AsyncModels
     with_raw_response: AsyncGroqWithRawResponse
@@ -281,8 +281,8 @@ def __init__(
             _strict_response_validation=_strict_response_validation,
         )
 
-        self.embeddings = resources.AsyncEmbeddings(self)
         self.chat = resources.AsyncChat(self)
+        self.embeddings = resources.AsyncEmbeddings(self)
         self.audio = resources.AsyncAudio(self)
         self.models = resources.AsyncModels(self)
         self.with_raw_response = AsyncGroqWithRawResponse(self)
@@ -395,32 +395,32 @@ def _make_status_error(
 
 class GroqWithRawResponse:
     def __init__(self, client: Groq) -> None:
-        self.embeddings = resources.EmbeddingsWithRawResponse(client.embeddings)
         self.chat = resources.ChatWithRawResponse(client.chat)
+        self.embeddings = resources.EmbeddingsWithRawResponse(client.embeddings)
         self.audio = resources.AudioWithRawResponse(client.audio)
         self.models = resources.ModelsWithRawResponse(client.models)
 
 
 class AsyncGroqWithRawResponse:
     def __init__(self, client: AsyncGroq) -> None:
-        self.embeddings = resources.AsyncEmbeddingsWithRawResponse(client.embeddings)
         self.chat = resources.AsyncChatWithRawResponse(client.chat)
+        self.embeddings = resources.AsyncEmbeddingsWithRawResponse(client.embeddings)
         self.audio = resources.AsyncAudioWithRawResponse(client.audio)
         self.models = resources.AsyncModelsWithRawResponse(client.models)
 
 
 class GroqWithStreamedResponse:
     def __init__(self, client: Groq) -> None:
-        self.embeddings = resources.EmbeddingsWithStreamingResponse(client.embeddings)
         self.chat = resources.ChatWithStreamingResponse(client.chat)
+        self.embeddings = resources.EmbeddingsWithStreamingResponse(client.embeddings)
         self.audio = resources.AudioWithStreamingResponse(client.audio)
         self.models = resources.ModelsWithStreamingResponse(client.models)
 
 
 class AsyncGroqWithStreamedResponse:
     def __init__(self, client: AsyncGroq) -> None:
-        self.embeddings = resources.AsyncEmbeddingsWithStreamingResponse(client.embeddings)
         self.chat = resources.AsyncChatWithStreamingResponse(client.chat)
+        self.embeddings = resources.AsyncEmbeddingsWithStreamingResponse(client.embeddings)
         self.audio = resources.AsyncAudioWithStreamingResponse(client.audio)
         self.models = resources.AsyncModelsWithStreamingResponse(client.models)
 
diff --git a/src/groq/_streaming.py b/src/groq/_streaming.py
index 73cf684..70e074f 100644
--- a/src/groq/_streaming.py
+++ b/src/groq/_streaming.py
@@ -55,8 +55,6 @@ def __stream__(self) -> Iterator[_T]:
         iterator = self._iter_events()
 
         for sse in iterator:
-            if sse.data.startswith("[DONE]"):
-                break
             yield process_data(data=sse.json(), cast_to=cast_to, response=response)
 
         # Ensure the entire stream is consumed
@@ -121,8 +119,6 @@ async def __stream__(self) -> AsyncIterator[_T]:
         iterator = self._iter_events()
 
         async for sse in iterator:
-            if sse.data.startswith("[DONE]"):
-                break
             yield process_data(data=sse.json(), cast_to=cast_to, response=response)
 
         # Ensure the entire stream is consumed
diff --git a/src/groq/resources/__init__.py b/src/groq/resources/__init__.py
index 9fe7790..4f29788 100644
--- a/src/groq/resources/__init__.py
+++ b/src/groq/resources/__init__.py
@@ -34,18 +34,18 @@
 )
 
 __all__ = [
-    "Embeddings",
-    "AsyncEmbeddings",
-    "EmbeddingsWithRawResponse",
-    "AsyncEmbeddingsWithRawResponse",
-    "EmbeddingsWithStreamingResponse",
-    "AsyncEmbeddingsWithStreamingResponse",
     "Chat",
     "AsyncChat",
     "ChatWithRawResponse",
     "AsyncChatWithRawResponse",
     "ChatWithStreamingResponse",
     "AsyncChatWithStreamingResponse",
+    "Embeddings",
+    "AsyncEmbeddings",
+    "EmbeddingsWithRawResponse",
+    "AsyncEmbeddingsWithRawResponse",
+    "EmbeddingsWithStreamingResponse",
+    "AsyncEmbeddingsWithStreamingResponse",
     "Audio",
     "AsyncAudio",
     "AudioWithRawResponse",
diff --git a/src/groq/resources/chat/completions.py b/src/groq/resources/chat/completions.py
index d171b64..94eaafd 100644
--- a/src/groq/resources/chat/completions.py
+++ b/src/groq/resources/chat/completions.py
@@ -2,7 +2,7 @@
 
 from __future__ import annotations
 
-from typing import Dict, List, Union, Iterable, Optional, overload
+from typing import Dict, List, Union, Iterable, Optional
 from typing_extensions import Literal
 
 import httpx
@@ -20,13 +20,14 @@
     async_to_raw_response_wrapper,
     async_to_streamed_response_wrapper,
 )
-from ..._streaming import Stream, AsyncStream
 from ...types.chat import completion_create_params
 from ..._base_client import (
     make_request_options,
 )
-from ...lib.chat_completion_chunk import ChatCompletionChunk
 from ...types.chat.chat_completion import ChatCompletion
+from ...types.chat.chat_completion_tool_param import ChatCompletionToolParam
+from ...types.chat.chat_completion_message_param import ChatCompletionMessageParam
+from ...types.chat.chat_completion_tool_choice_option_param import ChatCompletionToolChoiceOptionParam
 
 __all__ = ["Completions", "AsyncCompletions"]
 
@@ -40,12 +41,11 @@ def with_raw_response(self) -> CompletionsWithRawResponse:
     def with_streaming_response(self) -> CompletionsWithStreamingResponse:
         return CompletionsWithStreamingResponse(self)
 
-    @overload
     def create(
         self,
         *,
-        messages: Iterable[completion_create_params.Message],
-        model: str,
+        messages: Iterable[ChatCompletionMessageParam],
+        model: Union[str, Literal["gemma-7b-it", "llama3-70b-8192", "llama3-8b-8192", "mixtral-8x7b-32768"]],
         frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
         function_call: Optional[completion_create_params.FunctionCall] | NotGiven = NOT_GIVEN,
         functions: Optional[Iterable[completion_create_params.Function]] | NotGiven = NOT_GIVEN,
@@ -57,10 +57,10 @@ def create(
         response_format: Optional[completion_create_params.ResponseFormat] | NotGiven = NOT_GIVEN,
         seed: Optional[int] | NotGiven = NOT_GIVEN,
         stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
-        stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
+        stream: Optional[bool] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        tool_choice: Optional[completion_create_params.ToolChoice] | NotGiven = NOT_GIVEN,
-        tools: Optional[Iterable[completion_create_params.Tool]] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[ChatCompletionToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[ChatCompletionToolParam]] | NotGiven = NOT_GIVEN,
         top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
         top_p: Optional[float] | NotGiven = NOT_GIVEN,
         user: Optional[str] | NotGiven = NOT_GIVEN,
@@ -71,104 +71,6 @@ def create(
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> ChatCompletion:
-        ...
-
-    @overload
-    def create(
-        self,
-        *,
-        messages: Iterable[completion_create_params.Message],
-        model: str,
-        frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
-        function_call: Optional[completion_create_params.FunctionCall] | NotGiven = NOT_GIVEN,
-        functions: Optional[Iterable[completion_create_params.Function]] | NotGiven = NOT_GIVEN,
-        logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
-        logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
-        max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        n: Optional[int] | NotGiven = NOT_GIVEN,
-        presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
-        response_format: Optional[completion_create_params.ResponseFormat] | NotGiven = NOT_GIVEN,
-        seed: Optional[int] | NotGiven = NOT_GIVEN,
-        stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
-        stream: Literal[True],
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        tool_choice: Optional[completion_create_params.ToolChoice] | NotGiven = NOT_GIVEN,
-        tools: Optional[Iterable[completion_create_params.Tool]] | NotGiven = NOT_GIVEN,
-        top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
-        top_p: Optional[float] | NotGiven = NOT_GIVEN,
-        user: Optional[str] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Stream[ChatCompletionChunk]:
-        ...
-
-    @overload
-    def create(
-        self,
-        *,
-        messages: Iterable[completion_create_params.Message],
-        model: str,
-        frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
-        function_call: Optional[completion_create_params.FunctionCall] | NotGiven = NOT_GIVEN,
-        functions: Optional[Iterable[completion_create_params.Function]] | NotGiven = NOT_GIVEN,
-        logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
-        logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
-        max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        n: Optional[int] | NotGiven = NOT_GIVEN,
-        presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
-        response_format: Optional[completion_create_params.ResponseFormat] | NotGiven = NOT_GIVEN,
-        seed: Optional[int] | NotGiven = NOT_GIVEN,
-        stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
-        stream: bool,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        tool_choice: Optional[completion_create_params.ToolChoice] | NotGiven = NOT_GIVEN,
-        tools: Optional[Iterable[completion_create_params.Tool]] | NotGiven = NOT_GIVEN,
-        top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
-        top_p: Optional[float] | NotGiven = NOT_GIVEN,
-        user: Optional[str] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> ChatCompletion | Stream[ChatCompletionChunk]:
-        ...
-
-    def create(
-        self,
-        *,
-        messages: Iterable[completion_create_params.Message],
-        model: str,
-        frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
-        function_call: Optional[completion_create_params.FunctionCall] | NotGiven = NOT_GIVEN,
-        functions: Optional[Iterable[completion_create_params.Function]] | NotGiven = NOT_GIVEN,
-        logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
-        logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
-        max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        n: Optional[int] | NotGiven = NOT_GIVEN,
-        presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
-        response_format: Optional[completion_create_params.ResponseFormat] | NotGiven = NOT_GIVEN,
-        seed: Optional[int] | NotGiven = NOT_GIVEN,
-        stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
-        stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        tool_choice: Optional[completion_create_params.ToolChoice] | NotGiven = NOT_GIVEN,
-        tools: Optional[Iterable[completion_create_params.Tool]] | NotGiven = NOT_GIVEN,
-        top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
-        top_p: Optional[float] | NotGiven = NOT_GIVEN,
-        user: Optional[str] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> ChatCompletion | Stream[ChatCompletionChunk]:
         """
         Creates a model response for the given chat conversation.
 
@@ -218,36 +120,39 @@ def create(
 
           response_format: An object specifying the format that the model must output.
 
-              Setting to `{ "type": "json" }` enables JSON mode, which guarantees the message
-              the model generates is valid JSON.
+              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+              message the model generates is valid JSON.
 
-              Important: when using JSON mode, you must also instruct the model to produce
-              JSON yourself via a system or user message. Without this, the model may generate
-              an unending stream of whitespace until the generation reaches the token limit,
-              resulting in a long-running and seemingly "stuck" request. Also note that the
-              message content may be partially cut off if finish_reason="length", which
-              indicates the generation exceeded max_tokens or the conversation exceeded the
-              max context length.
+              **Important:** when using JSON mode, you **must** also instruct the model to
+              produce JSON yourself via a system or user message.
 
-          seed: If specified, our system will sample deterministically, such that repeated
-              requests with the same seed and parameters will return the same result.
+          seed: If specified, our system will make a best effort to sample deterministically,
+              such that repeated requests with the same `seed` and parameters should return
+              the same result. Determinism is not guaranteed, and you should refer to the
+              `system_fingerprint` response parameter to monitor changes in the backend.
 
           stop: Up to 4 sequences where the API will stop generating further tokens. The
               returned text will not contain the stop sequence.
 
           stream: If set, partial message deltas will be sent. Tokens will be sent as data-only
-              server-sent events as they become available, with the stream terminated by a
-              data: [DONE]. [Example code](/docs/text-chat#streaming-a-chat-completion).
+              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format)
+              as they become available, with the stream terminated by a `data: [DONE]`
+              message. [Example code](/docs/text-chat#streaming-a-chat-completion).
 
           temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
               make the output more random, while lower values like 0.2 will make it more
               focused and deterministic. We generally recommend altering this or top_p but not
               both
 
-          tool_choice: Controls which (if any) function is called by the model. Specifying a particular
-              function via a structured object like
+          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+              not call any tool and instead generates a message. `auto` means the model can
+              pick between generating a message or calling one or more tools. `required` means
+              the model must call one or more tools. Specifying a particular tool via
               `{"type": "function", "function": {"name": "my_function"}}` forces the model to
-              call that function.
+              call that tool.
+
+              `none` is the default when no tools are present. `auto` is the default if tools
+              are present.
 
           tools: A list of tools the model may call. Currently, only functions are supported as a
               tool. Use this to provide a list of functions the model may generate JSON inputs
@@ -305,8 +210,6 @@ def create(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
             cast_to=ChatCompletion,
-            stream=stream or False,
-            stream_cls=Stream[ChatCompletionChunk],
         )
 
 
@@ -319,12 +222,11 @@ def with_raw_response(self) -> AsyncCompletionsWithRawResponse:
     def with_streaming_response(self) -> AsyncCompletionsWithStreamingResponse:
         return AsyncCompletionsWithStreamingResponse(self)
 
-    @overload
     async def create(
         self,
         *,
-        messages: Iterable[completion_create_params.Message],
-        model: str,
+        messages: Iterable[ChatCompletionMessageParam],
+        model: Union[str, Literal["gemma-7b-it", "llama3-70b-8192", "llama3-8b-8192", "mixtral-8x7b-32768"]],
         frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
         function_call: Optional[completion_create_params.FunctionCall] | NotGiven = NOT_GIVEN,
         functions: Optional[Iterable[completion_create_params.Function]] | NotGiven = NOT_GIVEN,
@@ -336,10 +238,10 @@ async def create(
         response_format: Optional[completion_create_params.ResponseFormat] | NotGiven = NOT_GIVEN,
         seed: Optional[int] | NotGiven = NOT_GIVEN,
         stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
-        stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
+        stream: Optional[bool] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        tool_choice: Optional[completion_create_params.ToolChoice] | NotGiven = NOT_GIVEN,
-        tools: Optional[Iterable[completion_create_params.Tool]] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[ChatCompletionToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[ChatCompletionToolParam]] | NotGiven = NOT_GIVEN,
         top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
         top_p: Optional[float] | NotGiven = NOT_GIVEN,
         user: Optional[str] | NotGiven = NOT_GIVEN,
@@ -350,104 +252,6 @@ async def create(
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> ChatCompletion:
-        ...
-
-    @overload
-    async def create(
-        self,
-        *,
-        messages: Iterable[completion_create_params.Message],
-        model: str,
-        frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
-        function_call: Optional[completion_create_params.FunctionCall] | NotGiven = NOT_GIVEN,
-        functions: Optional[Iterable[completion_create_params.Function]] | NotGiven = NOT_GIVEN,
-        logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
-        logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
-        max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        n: Optional[int] | NotGiven = NOT_GIVEN,
-        presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
-        response_format: Optional[completion_create_params.ResponseFormat] | NotGiven = NOT_GIVEN,
-        seed: Optional[int] | NotGiven = NOT_GIVEN,
-        stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
-        stream: Literal[True],
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        tool_choice: Optional[completion_create_params.ToolChoice] | NotGiven = NOT_GIVEN,
-        tools: Optional[Iterable[completion_create_params.Tool]] | NotGiven = NOT_GIVEN,
-        top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
-        top_p: Optional[float] | NotGiven = NOT_GIVEN,
-        user: Optional[str] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> AsyncStream[ChatCompletionChunk]:
-        ...
-
-    @overload
-    async def create(
-        self,
-        *,
-        messages: Iterable[completion_create_params.Message],
-        model: str,
-        frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
-        function_call: Optional[completion_create_params.FunctionCall] | NotGiven = NOT_GIVEN,
-        functions: Optional[Iterable[completion_create_params.Function]] | NotGiven = NOT_GIVEN,
-        logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
-        logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
-        max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        n: Optional[int] | NotGiven = NOT_GIVEN,
-        presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
-        response_format: Optional[completion_create_params.ResponseFormat] | NotGiven = NOT_GIVEN,
-        seed: Optional[int] | NotGiven = NOT_GIVEN,
-        stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
-        stream: bool,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        tool_choice: Optional[completion_create_params.ToolChoice] | NotGiven = NOT_GIVEN,
-        tools: Optional[Iterable[completion_create_params.Tool]] | NotGiven = NOT_GIVEN,
-        top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
-        top_p: Optional[float] | NotGiven = NOT_GIVEN,
-        user: Optional[str] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> ChatCompletion | AsyncStream[ChatCompletionChunk]:
-        ...
-
-    async def create(
-        self,
-        *,
-        messages: Iterable[completion_create_params.Message],
-        model: str,
-        frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
-        function_call: Optional[completion_create_params.FunctionCall] | NotGiven = NOT_GIVEN,
-        functions: Optional[Iterable[completion_create_params.Function]] | NotGiven = NOT_GIVEN,
-        logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
-        logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
-        max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        n: Optional[int] | NotGiven = NOT_GIVEN,
-        presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
-        response_format: Optional[completion_create_params.ResponseFormat] | NotGiven = NOT_GIVEN,
-        seed: Optional[int] | NotGiven = NOT_GIVEN,
-        stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
-        stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        tool_choice: Optional[completion_create_params.ToolChoice] | NotGiven = NOT_GIVEN,
-        tools: Optional[Iterable[completion_create_params.Tool]] | NotGiven = NOT_GIVEN,
-        top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
-        top_p: Optional[float] | NotGiven = NOT_GIVEN,
-        user: Optional[str] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> ChatCompletion | AsyncStream[ChatCompletionChunk]:
         """
         Creates a model response for the given chat conversation.
 
@@ -497,36 +301,39 @@ async def create(
 
           response_format: An object specifying the format that the model must output.
 
-              Setting to `{ "type": "json" }` enables JSON mode, which guarantees the message
-              the model generates is valid JSON.
+              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+              message the model generates is valid JSON.
 
-              Important: when using JSON mode, you must also instruct the model to produce
-              JSON yourself via a system or user message. Without this, the model may generate
-              an unending stream of whitespace until the generation reaches the token limit,
-              resulting in a long-running and seemingly "stuck" request. Also note that the
-              message content may be partially cut off if finish_reason="length", which
-              indicates the generation exceeded max_tokens or the conversation exceeded the
-              max context length.
+              **Important:** when using JSON mode, you **must** also instruct the model to
+              produce JSON yourself via a system or user message.
 
-          seed: If specified, our system will sample deterministically, such that repeated
-              requests with the same seed and parameters will return the same result.
+          seed: If specified, our system will make a best effort to sample deterministically,
+              such that repeated requests with the same `seed` and parameters should return
+              the same result. Determinism is not guaranteed, and you should refer to the
+              `system_fingerprint` response parameter to monitor changes in the backend.
 
           stop: Up to 4 sequences where the API will stop generating further tokens. The
               returned text will not contain the stop sequence.
 
           stream: If set, partial message deltas will be sent. Tokens will be sent as data-only
-              server-sent events as they become available, with the stream terminated by a
-              data: [DONE]. [Example code](/docs/text-chat#streaming-a-chat-completion).
+              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format)
+              as they become available, with the stream terminated by a `data: [DONE]`
+              message. [Example code](/docs/text-chat#streaming-a-chat-completion).
 
           temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
               make the output more random, while lower values like 0.2 will make it more
               focused and deterministic. We generally recommend altering this or top_p but not
               both
 
-          tool_choice: Controls which (if any) function is called by the model. Specifying a particular
-              function via a structured object like
+          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+              not call any tool and instead generates a message. `auto` means the model can
+              pick between generating a message or calling one or more tools. `required` means
+              the model must call one or more tools. Specifying a particular tool via
               `{"type": "function", "function": {"name": "my_function"}}` forces the model to
-              call that function.
+              call that tool.
+
+              `none` is the default when no tools are present. `auto` is the default if tools
+              are present.
 
           tools: A list of tools the model may call. Currently, only functions are supported as a
               tool. Use this to provide a list of functions the model may generate JSON inputs
@@ -584,8 +391,6 @@ async def create(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
             cast_to=ChatCompletion,
-            stream=stream or False,
-            stream_cls=AsyncStream[ChatCompletionChunk],
         )
 
 
diff --git a/src/groq/resources/embeddings.py b/src/groq/resources/embeddings.py
index 74f4604..e58980b 100644
--- a/src/groq/resources/embeddings.py
+++ b/src/groq/resources/embeddings.py
@@ -42,7 +42,7 @@ def create(
         self,
         *,
         input: Union[str, List[str]],
-        model: str,
+        model: Union[str, Literal["nomic-embed-text-v1_5"]],
         encoding_format: Literal["float", "base64"] | NotGiven = NOT_GIVEN,
         user: Optional[str] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
@@ -107,7 +107,7 @@ async def create(
         self,
         *,
         input: Union[str, List[str]],
-        model: str,
+        model: Union[str, Literal["nomic-embed-text-v1_5"]],
         encoding_format: Literal["float", "base64"] | NotGiven = NOT_GIVEN,
         user: Optional[str] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
diff --git a/src/groq/resources/models.py b/src/groq/resources/models.py
index c5fff9b..7aada47 100644
--- a/src/groq/resources/models.py
+++ b/src/groq/resources/models.py
@@ -4,7 +4,7 @@
 
 import httpx
 
-from .._types import NOT_GIVEN, Body, Query, Headers, NoneType, NotGiven
+from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven
 from .._compat import cached_property
 from .._resource import SyncAPIResource, AsyncAPIResource
 from .._response import (
@@ -17,7 +17,8 @@
 from .._base_client import (
     make_request_options,
 )
-from ..types.model_list import ModelList
+from ..types.model_deleted import ModelDeleted
+from ..types.model_list_response import ModelListResponse
 
 __all__ = ["Models", "AsyncModels"]
 
@@ -73,14 +74,14 @@ def list(
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> ModelList:
+    ) -> ModelListResponse:
         """get all available models"""
         return self._get(
             "/openai/v1/models",
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
-            cast_to=ModelList,
+            cast_to=ModelListResponse,
         )
 
     def delete(
@@ -93,7 +94,7 @@ def delete(
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> None:
+    ) -> ModelDeleted:
         """
         Delete a model
 
@@ -108,13 +109,12 @@ def delete(
         """
         if not model:
             raise ValueError(f"Expected a non-empty value for `model` but received {model!r}")
-        extra_headers = {"Accept": "*/*", **(extra_headers or {})}
         return self._delete(
             f"/openai/v1/models/{model}",
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
-            cast_to=NoneType,
+            cast_to=ModelDeleted,
         )
 
 
@@ -169,14 +169,14 @@ async def list(
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> ModelList:
+    ) -> ModelListResponse:
         """get all available models"""
         return await self._get(
             "/openai/v1/models",
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
-            cast_to=ModelList,
+            cast_to=ModelListResponse,
         )
 
     async def delete(
@@ -189,7 +189,7 @@ async def delete(
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> None:
+    ) -> ModelDeleted:
         """
         Delete a model
 
@@ -204,13 +204,12 @@ async def delete(
         """
         if not model:
             raise ValueError(f"Expected a non-empty value for `model` but received {model!r}")
-        extra_headers = {"Accept": "*/*", **(extra_headers or {})}
         return await self._delete(
             f"/openai/v1/models/{model}",
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
-            cast_to=NoneType,
+            cast_to=ModelDeleted,
         )
 
 
diff --git a/src/groq/types/__init__.py b/src/groq/types/__init__.py
index 11fb431..bb0aada 100644
--- a/src/groq/types/__init__.py
+++ b/src/groq/types/__init__.py
@@ -3,7 +3,14 @@
 from __future__ import annotations
 
 from .model import Model as Model
+from .shared import (
+    ErrorObject as ErrorObject,
+    FunctionDefinition as FunctionDefinition,
+    FunctionParameters as FunctionParameters,
+)
 from .embedding import Embedding as Embedding
-from .model_list import ModelList as ModelList
+from .model_deleted import ModelDeleted as ModelDeleted
+from .completion_usage import CompletionUsage as CompletionUsage
+from .model_list_response import ModelListResponse as ModelListResponse
 from .embedding_create_params import EmbeddingCreateParams as EmbeddingCreateParams
 from .create_embedding_response import CreateEmbeddingResponse as CreateEmbeddingResponse
diff --git a/src/groq/types/chat/__init__.py b/src/groq/types/chat/__init__.py
index fa7a61a..756dbea 100644
--- a/src/groq/types/chat/__init__.py
+++ b/src/groq/types/chat/__init__.py
@@ -3,4 +3,38 @@
 from __future__ import annotations
 
 from .chat_completion import ChatCompletion as ChatCompletion
+from .chat_completion_role import ChatCompletionRole as ChatCompletionRole
+from .chat_completion_message import ChatCompletionMessage as ChatCompletionMessage
 from .completion_create_params import CompletionCreateParams as CompletionCreateParams
+from .chat_completion_tool_param import ChatCompletionToolParam as ChatCompletionToolParam
+from .chat_completion_message_param import ChatCompletionMessageParam as ChatCompletionMessageParam
+from .chat_completion_token_logprob import ChatCompletionTokenLogprob as ChatCompletionTokenLogprob
+from .chat_completion_message_tool_call import ChatCompletionMessageToolCall as ChatCompletionMessageToolCall
+from .chat_completion_content_part_param import ChatCompletionContentPartParam as ChatCompletionContentPartParam
+from .chat_completion_tool_message_param import ChatCompletionToolMessageParam as ChatCompletionToolMessageParam
+from .chat_completion_user_message_param import ChatCompletionUserMessageParam as ChatCompletionUserMessageParam
+from .chat_completion_system_message_param import ChatCompletionSystemMessageParam as ChatCompletionSystemMessageParam
+from .chat_completion_function_message_param import (
+    ChatCompletionFunctionMessageParam as ChatCompletionFunctionMessageParam,
+)
+from .chat_completion_assistant_message_param import (
+    ChatCompletionAssistantMessageParam as ChatCompletionAssistantMessageParam,
+)
+from .chat_completion_content_part_text_param import (
+    ChatCompletionContentPartTextParam as ChatCompletionContentPartTextParam,
+)
+from .chat_completion_message_tool_call_param import (
+    ChatCompletionMessageToolCallParam as ChatCompletionMessageToolCallParam,
+)
+from .chat_completion_named_tool_choice_param import (
+    ChatCompletionNamedToolChoiceParam as ChatCompletionNamedToolChoiceParam,
+)
+from .chat_completion_content_part_image_param import (
+    ChatCompletionContentPartImageParam as ChatCompletionContentPartImageParam,
+)
+from .chat_completion_tool_choice_option_param import (
+    ChatCompletionToolChoiceOptionParam as ChatCompletionToolChoiceOptionParam,
+)
+from .chat_completion_function_call_option_param import (
+    ChatCompletionFunctionCallOptionParam as ChatCompletionFunctionCallOptionParam,
+)
diff --git a/src/groq/types/chat/chat_completion.py b/src/groq/types/chat/chat_completion.py
index 9e36c15..4f8b04b 100644
--- a/src/groq/types/chat/chat_completion.py
+++ b/src/groq/types/chat/chat_completion.py
@@ -1,103 +1,66 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from typing import List, Optional
+from typing_extensions import Literal
 
 from ..._models import BaseModel
+from ..completion_usage import CompletionUsage
+from .chat_completion_message import ChatCompletionMessage
+from .chat_completion_token_logprob import ChatCompletionTokenLogprob
 
-__all__ = [
-    "ChatCompletion",
-    "Choice",
-    "ChoiceLogprobs",
-    "ChoiceLogprobsContent",
-    "ChoiceLogprobsContentTopLogprob",
-    "ChoiceMessage",
-    "ChoiceMessageToolCall",
-    "ChoiceMessageToolCallFunction",
-    "Usage",
-]
-
-
-class ChoiceLogprobsContentTopLogprob(BaseModel):
-    token: Optional[str] = None
-
-    bytes: Optional[List[int]] = None
-
-    logprob: Optional[float] = None
-
-
-class ChoiceLogprobsContent(BaseModel):
-    token: Optional[str] = None
-
-    bytes: Optional[List[int]] = None
-
-    logprob: Optional[float] = None
-
-    top_logprobs: Optional[List[ChoiceLogprobsContentTopLogprob]] = None
+__all__ = ["ChatCompletion", "Choice", "ChoiceLogprobs"]
 
 
 class ChoiceLogprobs(BaseModel):
-    content: Optional[List[ChoiceLogprobsContent]] = None
-
-
-class ChoiceMessageToolCallFunction(BaseModel):
-    arguments: Optional[str] = None
-
-    name: Optional[str] = None
-
-
-class ChoiceMessageToolCall(BaseModel):
-    id: Optional[str] = None
-
-    function: Optional[ChoiceMessageToolCallFunction] = None
-
-    type: Optional[str] = None
-
-
-class ChoiceMessage(BaseModel):
-    content: str
-
-    role: str
-
-    tool_calls: Optional[List[ChoiceMessageToolCall]] = None
+    content: Optional[List[ChatCompletionTokenLogprob]] = None
+    """A list of message content tokens with log probability information."""
 
 
 class Choice(BaseModel):
-    finish_reason: str
-
-    index: int
-
-    logprobs: ChoiceLogprobs
-
-    message: ChoiceMessage
+    finish_reason: Literal["stop", "length", "tool_calls", "function_call"]
+    """The reason the model stopped generating tokens.
 
+    This will be `stop` if the model hit a natural stop point or a provided stop
+    sequence, `length` if the maximum number of tokens specified in the request was
+    reached, `tool_calls` if the model called a tool, or `function_call`
+    (deprecated) if the model called a function.
+    """
 
-class Usage(BaseModel):
-    completion_time: Optional[float] = None
-
-    completion_tokens: Optional[int] = None
-
-    prompt_time: Optional[float] = None
-
-    prompt_tokens: Optional[int] = None
-
-    queue_time: Optional[float] = None
+    index: int
+    """The index of the choice in the list of choices."""
 
-    total_time: Optional[float] = None
+    logprobs: Optional[ChoiceLogprobs] = None
+    """Log probability information for the choice."""
 
-    total_tokens: Optional[int] = None
+    message: ChatCompletionMessage
+    """A chat completion message generated by the model."""
 
 
 class ChatCompletion(BaseModel):
+    id: str
+    """A unique identifier for the chat completion."""
+
     choices: List[Choice]
+    """A list of chat completion choices.
 
-    id: Optional[str] = None
+    Can be more than one if `n` is greater than 1.
+    """
 
-    created: Optional[int] = None
+    created: int
+    """The Unix timestamp (in seconds) of when the chat completion was created."""
 
-    model: Optional[str] = None
+    model: str
+    """The model used for the chat completion."""
 
-    object: Optional[str] = None
+    object: Literal["chat.completion"]
+    """The object type, which is always `chat.completion`."""
 
     system_fingerprint: Optional[str] = None
+    """This fingerprint represents the backend configuration that the model runs with.
+
+    Can be used in conjunction with the `seed` request parameter to understand when
+    backend changes have been made that might impact determinism.
+    """
 
-    usage: Optional[Usage] = None
+    usage: Optional[CompletionUsage] = None
+    """Usage statistics for the completion request."""
diff --git a/src/groq/types/chat/chat_completion_assistant_message_param.py b/src/groq/types/chat/chat_completion_assistant_message_param.py
new file mode 100644
index 0000000..e1e3994
--- /dev/null
+++ b/src/groq/types/chat/chat_completion_assistant_message_param.py
@@ -0,0 +1,51 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Iterable, Optional
+from typing_extensions import Literal, Required, TypedDict
+
+from .chat_completion_message_tool_call_param import ChatCompletionMessageToolCallParam
+
+__all__ = ["ChatCompletionAssistantMessageParam", "FunctionCall"]
+
+
+class FunctionCall(TypedDict, total=False):
+    arguments: Required[str]
+    """
+    The arguments to call the function with, as generated by the model in JSON
+    format. Note that the model does not always generate valid JSON, and may
+    hallucinate parameters not defined by your function schema. Validate the
+    arguments in your code before calling your function.
+    """
+
+    name: Required[str]
+    """The name of the function to call."""
+
+
+class ChatCompletionAssistantMessageParam(TypedDict, total=False):
+    role: Required[Literal["assistant"]]
+    """The role of the messages author, in this case `assistant`."""
+
+    content: Optional[str]
+    """The contents of the assistant message.
+
+    Required unless `tool_calls` or `function_call` is specified.
+    """
+
+    function_call: FunctionCall
+    """Deprecated and replaced by `tool_calls`.
+
+    The name and arguments of a function that should be called, as generated by the
+    model.
+    """
+
+    name: str
+    """An optional name for the participant.
+
+    Provides the model information to differentiate between participants of the same
+    role.
+    """
+
+    tool_calls: Iterable[ChatCompletionMessageToolCallParam]
+    """The tool calls generated by the model, such as function calls."""
diff --git a/src/groq/types/chat/chat_completion_content_part_image_param.py b/src/groq/types/chat/chat_completion_content_part_image_param.py
new file mode 100644
index 0000000..c1e0841
--- /dev/null
+++ b/src/groq/types/chat/chat_completion_content_part_image_param.py
@@ -0,0 +1,22 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ChatCompletionContentPartImageParam", "ImageURL"]
+
+
+class ImageURL(TypedDict, total=False):
+    url: Required[str]
+    """Either a URL of the image or the base64 encoded image data."""
+
+    detail: Literal["auto", "low", "high"]
+    """Specifies the detail level of the image."""
+
+
+class ChatCompletionContentPartImageParam(TypedDict, total=False):
+    image_url: Required[ImageURL]
+
+    type: Required[Literal["image_url"]]
+    """The type of the content part."""
diff --git a/src/groq/types/chat/chat_completion_content_part_param.py b/src/groq/types/chat/chat_completion_content_part_param.py
new file mode 100644
index 0000000..f9b5f71
--- /dev/null
+++ b/src/groq/types/chat/chat_completion_content_part_param.py
@@ -0,0 +1,12 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union
+
+from .chat_completion_content_part_text_param import ChatCompletionContentPartTextParam
+from .chat_completion_content_part_image_param import ChatCompletionContentPartImageParam
+
+__all__ = ["ChatCompletionContentPartParam"]
+
+ChatCompletionContentPartParam = Union[ChatCompletionContentPartTextParam, ChatCompletionContentPartImageParam]
diff --git a/src/groq/types/chat/chat_completion_content_part_text_param.py b/src/groq/types/chat/chat_completion_content_part_text_param.py
new file mode 100644
index 0000000..a270744
--- /dev/null
+++ b/src/groq/types/chat/chat_completion_content_part_text_param.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ChatCompletionContentPartTextParam"]
+
+
+class ChatCompletionContentPartTextParam(TypedDict, total=False):
+    text: Required[str]
+    """The text content."""
+
+    type: Required[Literal["text"]]
+    """The type of the content part."""
diff --git a/src/groq/types/chat/chat_completion_function_call_option_param.py b/src/groq/types/chat/chat_completion_function_call_option_param.py
new file mode 100644
index 0000000..2bc014a
--- /dev/null
+++ b/src/groq/types/chat/chat_completion_function_call_option_param.py
@@ -0,0 +1,12 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Required, TypedDict
+
+__all__ = ["ChatCompletionFunctionCallOptionParam"]
+
+
+class ChatCompletionFunctionCallOptionParam(TypedDict, total=False):
+    name: Required[str]
+    """The name of the function to call."""
diff --git a/src/groq/types/chat/chat_completion_function_message_param.py b/src/groq/types/chat/chat_completion_function_message_param.py
new file mode 100644
index 0000000..5af12bf
--- /dev/null
+++ b/src/groq/types/chat/chat_completion_function_message_param.py
@@ -0,0 +1,19 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Optional
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ChatCompletionFunctionMessageParam"]
+
+
+class ChatCompletionFunctionMessageParam(TypedDict, total=False):
+    content: Required[Optional[str]]
+    """The contents of the function message."""
+
+    name: Required[str]
+    """The name of the function to call."""
+
+    role: Required[Literal["function"]]
+    """The role of the messages author, in this case `function`."""
diff --git a/src/groq/types/chat/chat_completion_message.py b/src/groq/types/chat/chat_completion_message.py
new file mode 100644
index 0000000..8db7d17
--- /dev/null
+++ b/src/groq/types/chat/chat_completion_message.py
@@ -0,0 +1,40 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+from .chat_completion_message_tool_call import ChatCompletionMessageToolCall
+
+__all__ = ["ChatCompletionMessage", "FunctionCall"]
+
+
+class FunctionCall(BaseModel):
+    arguments: str
+    """
+    The arguments to call the function with, as generated by the model in JSON
+    format. Note that the model does not always generate valid JSON, and may
+    hallucinate parameters not defined by your function schema. Validate the
+    arguments in your code before calling your function.
+    """
+
+    name: str
+    """The name of the function to call."""
+
+
+class ChatCompletionMessage(BaseModel):
+    content: Optional[str] = None
+    """The contents of the message."""
+
+    role: Literal["assistant"]
+    """The role of the author of this message."""
+
+    function_call: Optional[FunctionCall] = None
+    """Deprecated and replaced by `tool_calls`.
+
+    The name and arguments of a function that should be called, as generated by the
+    model.
+    """
+
+    tool_calls: Optional[List[ChatCompletionMessageToolCall]] = None
+    """The tool calls generated by the model, such as function calls."""
diff --git a/src/groq/types/chat/chat_completion_message_param.py b/src/groq/types/chat/chat_completion_message_param.py
new file mode 100644
index 0000000..a3644a5
--- /dev/null
+++ b/src/groq/types/chat/chat_completion_message_param.py
@@ -0,0 +1,21 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union
+
+from .chat_completion_tool_message_param import ChatCompletionToolMessageParam
+from .chat_completion_user_message_param import ChatCompletionUserMessageParam
+from .chat_completion_system_message_param import ChatCompletionSystemMessageParam
+from .chat_completion_function_message_param import ChatCompletionFunctionMessageParam
+from .chat_completion_assistant_message_param import ChatCompletionAssistantMessageParam
+
+__all__ = ["ChatCompletionMessageParam"]
+
+ChatCompletionMessageParam = Union[
+    ChatCompletionSystemMessageParam,
+    ChatCompletionUserMessageParam,
+    ChatCompletionAssistantMessageParam,
+    ChatCompletionToolMessageParam,
+    ChatCompletionFunctionMessageParam,
+]
diff --git a/src/groq/types/chat/chat_completion_message_tool_call.py b/src/groq/types/chat/chat_completion_message_tool_call.py
new file mode 100644
index 0000000..4fec667
--- /dev/null
+++ b/src/groq/types/chat/chat_completion_message_tool_call.py
@@ -0,0 +1,31 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ChatCompletionMessageToolCall", "Function"]
+
+
+class Function(BaseModel):
+    arguments: str
+    """
+    The arguments to call the function with, as generated by the model in JSON
+    format. Note that the model does not always generate valid JSON, and may
+    hallucinate parameters not defined by your function schema. Validate the
+    arguments in your code before calling your function.
+    """
+
+    name: str
+    """The name of the function to call."""
+
+
+class ChatCompletionMessageToolCall(BaseModel):
+    id: str
+    """The ID of the tool call."""
+
+    function: Function
+    """The function that the model called."""
+
+    type: Literal["function"]
+    """The type of the tool. Currently, only `function` is supported."""
diff --git a/src/groq/types/chat/chat_completion_message_tool_call_param.py b/src/groq/types/chat/chat_completion_message_tool_call_param.py
new file mode 100644
index 0000000..f616c36
--- /dev/null
+++ b/src/groq/types/chat/chat_completion_message_tool_call_param.py
@@ -0,0 +1,31 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ChatCompletionMessageToolCallParam", "Function"]
+
+
+class Function(TypedDict, total=False):
+    arguments: Required[str]
+    """
+    The arguments to call the function with, as generated by the model in JSON
+    format. Note that the model does not always generate valid JSON, and may
+    hallucinate parameters not defined by your function schema. Validate the
+    arguments in your code before calling your function.
+    """
+
+    name: Required[str]
+    """The name of the function to call."""
+
+
+class ChatCompletionMessageToolCallParam(TypedDict, total=False):
+    id: Required[str]
+    """The ID of the tool call."""
+
+    function: Required[Function]
+    """The function that the model called."""
+
+    type: Required[Literal["function"]]
+    """The type of the tool. Currently, only `function` is supported."""
diff --git a/src/groq/types/chat/chat_completion_named_tool_choice_param.py b/src/groq/types/chat/chat_completion_named_tool_choice_param.py
new file mode 100644
index 0000000..369f8b4
--- /dev/null
+++ b/src/groq/types/chat/chat_completion_named_tool_choice_param.py
@@ -0,0 +1,19 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ChatCompletionNamedToolChoiceParam", "Function"]
+
+
+class Function(TypedDict, total=False):
+    name: Required[str]
+    """The name of the function to call."""
+
+
+class ChatCompletionNamedToolChoiceParam(TypedDict, total=False):
+    function: Required[Function]
+
+    type: Required[Literal["function"]]
+    """The type of the tool. Currently, only `function` is supported."""
diff --git a/src/groq/types/chat/chat_completion_role.py b/src/groq/types/chat/chat_completion_role.py
new file mode 100644
index 0000000..1fd8388
--- /dev/null
+++ b/src/groq/types/chat/chat_completion_role.py
@@ -0,0 +1,7 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+__all__ = ["ChatCompletionRole"]
+
+ChatCompletionRole = Literal["system", "user", "assistant", "tool", "function"]
diff --git a/src/groq/types/chat/chat_completion_system_message_param.py b/src/groq/types/chat/chat_completion_system_message_param.py
new file mode 100644
index 0000000..94bb3f6
--- /dev/null
+++ b/src/groq/types/chat/chat_completion_system_message_param.py
@@ -0,0 +1,22 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ChatCompletionSystemMessageParam"]
+
+
+class ChatCompletionSystemMessageParam(TypedDict, total=False):
+    content: Required[str]
+    """The contents of the system message."""
+
+    role: Required[Literal["system"]]
+    """The role of the messages author, in this case `system`."""
+
+    name: str
+    """An optional name for the participant.
+
+    Provides the model information to differentiate between participants of the same
+    role.
+    """
diff --git a/src/groq/types/chat/chat_completion_token_logprob.py b/src/groq/types/chat/chat_completion_token_logprob.py
new file mode 100644
index 0000000..c69e258
--- /dev/null
+++ b/src/groq/types/chat/chat_completion_token_logprob.py
@@ -0,0 +1,57 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Optional
+
+from ..._models import BaseModel
+
+__all__ = ["ChatCompletionTokenLogprob", "TopLogprob"]
+
+
+class TopLogprob(BaseModel):
+    token: str
+    """The token."""
+
+    bytes: Optional[List[int]] = None
+    """A list of integers representing the UTF-8 bytes representation of the token.
+
+    Useful in instances where characters are represented by multiple tokens and
+    their byte representations must be combined to generate the correct text
+    representation. Can be `null` if there is no bytes representation for the token.
+    """
+
+    logprob: float
+    """The log probability of this token, if it is within the top 20 most likely
+    tokens.
+
+    Otherwise, the value `-9999.0` is used to signify that the token is very
+    unlikely.
+    """
+
+
+class ChatCompletionTokenLogprob(BaseModel):
+    token: str
+    """The token."""
+
+    bytes: Optional[List[int]] = None
+    """A list of integers representing the UTF-8 bytes representation of the token.
+
+    Useful in instances where characters are represented by multiple tokens and
+    their byte representations must be combined to generate the correct text
+    representation. Can be `null` if there is no bytes representation for the token.
+    """
+
+    logprob: float
+    """The log probability of this token, if it is within the top 20 most likely
+    tokens.
+
+    Otherwise, the value `-9999.0` is used to signify that the token is very
+    unlikely.
+    """
+
+    top_logprobs: List[TopLogprob]
+    """List of the most likely tokens and their log probability, at this token
+    position.
+
+    In rare cases, there may be fewer than the number of requested `top_logprobs`
+    returned.
+    """
diff --git a/src/groq/types/chat/chat_completion_tool_choice_option_param.py b/src/groq/types/chat/chat_completion_tool_choice_option_param.py
new file mode 100644
index 0000000..9c0ae22
--- /dev/null
+++ b/src/groq/types/chat/chat_completion_tool_choice_option_param.py
@@ -0,0 +1,12 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union
+from typing_extensions import Literal
+
+from .chat_completion_named_tool_choice_param import ChatCompletionNamedToolChoiceParam
+
+__all__ = ["ChatCompletionToolChoiceOptionParam"]
+
+ChatCompletionToolChoiceOptionParam = Union[Literal["none", "auto"], ChatCompletionNamedToolChoiceParam]
diff --git a/src/groq/types/chat/chat_completion_tool_message_param.py b/src/groq/types/chat/chat_completion_tool_message_param.py
new file mode 100644
index 0000000..5c590e0
--- /dev/null
+++ b/src/groq/types/chat/chat_completion_tool_message_param.py
@@ -0,0 +1,18 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ChatCompletionToolMessageParam"]
+
+
+class ChatCompletionToolMessageParam(TypedDict, total=False):
+    content: Required[str]
+    """The contents of the tool message."""
+
+    role: Required[Literal["tool"]]
+    """The role of the messages author, in this case `tool`."""
+
+    tool_call_id: Required[str]
+    """Tool call that this message is responding to."""
diff --git a/src/groq/types/chat/chat_completion_tool_param.py b/src/groq/types/chat/chat_completion_tool_param.py
new file mode 100644
index 0000000..0cf6ea7
--- /dev/null
+++ b/src/groq/types/chat/chat_completion_tool_param.py
@@ -0,0 +1,16 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+from ...types import shared_params
+
+__all__ = ["ChatCompletionToolParam"]
+
+
+class ChatCompletionToolParam(TypedDict, total=False):
+    function: Required[shared_params.FunctionDefinition]
+
+    type: Required[Literal["function"]]
+    """The type of the tool. Currently, only `function` is supported."""
diff --git a/src/groq/types/chat/chat_completion_user_message_param.py b/src/groq/types/chat/chat_completion_user_message_param.py
new file mode 100644
index 0000000..5c15322
--- /dev/null
+++ b/src/groq/types/chat/chat_completion_user_message_param.py
@@ -0,0 +1,25 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union, Iterable
+from typing_extensions import Literal, Required, TypedDict
+
+from .chat_completion_content_part_param import ChatCompletionContentPartParam
+
+__all__ = ["ChatCompletionUserMessageParam"]
+
+
+class ChatCompletionUserMessageParam(TypedDict, total=False):
+    content: Required[Union[str, Iterable[ChatCompletionContentPartParam]]]
+    """The contents of the user message."""
+
+    role: Required[Literal["user"]]
+    """The role of the messages author, in this case `user`."""
+
+    name: str
+    """An optional name for the participant.
+
+    Provides the model information to differentiate between participants of the same
+    role.
+    """
diff --git a/src/groq/types/chat/completion_create_params.py b/src/groq/types/chat/completion_create_params.py
index f05c425..9bb5ef4 100644
--- a/src/groq/types/chat/completion_create_params.py
+++ b/src/groq/types/chat/completion_create_params.py
@@ -5,38 +5,20 @@
 from typing import Dict, List, Union, Iterable, Optional
 from typing_extensions import Literal, Required, TypedDict
 
-__all__ = [
-    "CompletionCreateParams",
-    "Message",
-    "MessageChatCompletionRequestSystemMessage",
-    "MessageChatCompletionRequestUserMessage",
-    "MessageChatCompletionRequestUserMessageContentArrayOfContentPart",
-    "MessageChatCompletionRequestUserMessageContentArrayOfContentPartChatCompletionRequestMessageContentPartText",
-    "MessageChatCompletionRequestUserMessageContentArrayOfContentPartChatCompletionRequestMessageContentPartImage",
-    "MessageChatCompletionRequestUserMessageContentArrayOfContentPartChatCompletionRequestMessageContentPartImageImageURL",
-    "MessageChatCompletionRequestAssistantMessage",
-    "MessageChatCompletionRequestAssistantMessageFunctionCall",
-    "MessageChatCompletionRequestAssistantMessageToolCall",
-    "MessageChatCompletionRequestAssistantMessageToolCallFunction",
-    "MessageChatCompletionRequestToolMessage",
-    "MessageChatCompletionRequestFunctionMessage",
-    "FunctionCall",
-    "FunctionCallChatCompletionFunctionCallOption",
-    "Function",
-    "ResponseFormat",
-    "ToolChoice",
-    "ToolChoiceChatToolChoice",
-    "ToolChoiceChatToolChoiceFunction",
-    "Tool",
-    "ToolFunction",
-]
+from ...types import shared_params
+from .chat_completion_tool_param import ChatCompletionToolParam
+from .chat_completion_message_param import ChatCompletionMessageParam
+from .chat_completion_tool_choice_option_param import ChatCompletionToolChoiceOptionParam
+from .chat_completion_function_call_option_param import ChatCompletionFunctionCallOptionParam
+
+__all__ = ["CompletionCreateParams", "FunctionCall", "Function", "ResponseFormat"]
 
 
 class CompletionCreateParams(TypedDict, total=False):
-    messages: Required[Iterable[Message]]
+    messages: Required[Iterable[ChatCompletionMessageParam]]
     """A list of messages comprising the conversation so far."""
 
-    model: Required[str]
+    model: Required[Union[str, Literal["gemma-7b-it", "llama3-70b-8192", "llama3-8b-8192", "mixtral-8x7b-32768"]]]
     """ID of the model to use.
 
     For details on which models are compatible with the Chat API, see available
@@ -106,22 +88,19 @@ class CompletionCreateParams(TypedDict, total=False):
     response_format: Optional[ResponseFormat]
     """An object specifying the format that the model must output.
 
-    Setting to `{ "type": "json" }` enables JSON mode, which guarantees the message
-    the model generates is valid JSON.
+    Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+    message the model generates is valid JSON.
 
-    Important: when using JSON mode, you must also instruct the model to produce
-    JSON yourself via a system or user message. Without this, the model may generate
-    an unending stream of whitespace until the generation reaches the token limit,
-    resulting in a long-running and seemingly "stuck" request. Also note that the
-    message content may be partially cut off if finish_reason="length", which
-    indicates the generation exceeded max_tokens or the conversation exceeded the
-    max context length.
+    **Important:** when using JSON mode, you **must** also instruct the model to
+    produce JSON yourself via a system or user message.
     """
 
     seed: Optional[int]
     """
-    If specified, our system will sample deterministically, such that repeated
-    requests with the same seed and parameters will return the same result.
+    If specified, our system will make a best effort to sample deterministically,
+    such that repeated requests with the same `seed` and parameters should return
+    the same result. Determinism is not guaranteed, and you should refer to the
+    `system_fingerprint` response parameter to monitor changes in the backend.
     """
 
     stop: Union[Optional[str], List[str], None]
@@ -133,9 +112,10 @@ class CompletionCreateParams(TypedDict, total=False):
     stream: Optional[bool]
     """If set, partial message deltas will be sent.
 
-    Tokens will be sent as data-only server-sent events as they become available,
-    with the stream terminated by a data: [DONE].
-    [Example code](/docs/text-chat#streaming-a-chat-completion).
+    Tokens will be sent as data-only
+    [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format)
+    as they become available, with the stream terminated by a `data: [DONE]`
+    message. [Example code](/docs/text-chat#streaming-a-chat-completion).
     """
 
     temperature: Optional[float]
@@ -146,15 +126,20 @@ class CompletionCreateParams(TypedDict, total=False):
     this or top_p but not both
     """
 
-    tool_choice: Optional[ToolChoice]
-    """Controls which (if any) function is called by the model.
-
-    Specifying a particular function via a structured object like
+    tool_choice: Optional[ChatCompletionToolChoiceOptionParam]
+    """
+    Controls which (if any) tool is called by the model. `none` means the model will
+    not call any tool and instead generates a message. `auto` means the model can
+    pick between generating a message or calling one or more tools. `required` means
+    the model must call one or more tools. Specifying a particular tool via
     `{"type": "function", "function": {"name": "my_function"}}` forces the model to
-    call that function.
+    call that tool.
+
+    `none` is the default when no tools are present. `auto` is the default if tools
+    are present.
     """
 
-    tools: Optional[Iterable[Tool]]
+    tools: Optional[Iterable[ChatCompletionToolParam]]
     """A list of tools the model may call.
 
     Currently, only functions are supported as a tool. Use this to provide a list of
@@ -185,185 +170,7 @@ class CompletionCreateParams(TypedDict, total=False):
     """
 
 
-class MessageChatCompletionRequestSystemMessage(TypedDict, total=False):
-    content: Required[str]
-    """The contents of the system message."""
-
-    role: Required[Literal["system"]]
-    """The role of the messages author, in this case `system`."""
-
-    name: str
-    """An optional name for the participant.
-
-    Provides the model information to differentiate between participants of the same
-    role.
-    """
-
-    tool_call_id: Optional[str]
-
-
-class MessageChatCompletionRequestUserMessageContentArrayOfContentPartChatCompletionRequestMessageContentPartText(
-    TypedDict, total=False
-):
-    text: Required[str]
-    """The text content."""
-
-    type: Required[Literal["text"]]
-    """The type of the content part."""
-
-
-class MessageChatCompletionRequestUserMessageContentArrayOfContentPartChatCompletionRequestMessageContentPartImageImageURL(
-    TypedDict, total=False
-):
-    url: Required[str]
-    """Either a URL of the image or the base64 encoded image data."""
-
-    detail: Literal["auto", "low", "high"]
-    """Specifies the detail level of the image."""
-
-
-class MessageChatCompletionRequestUserMessageContentArrayOfContentPartChatCompletionRequestMessageContentPartImage(
-    TypedDict, total=False
-):
-    image_url: Required[
-        MessageChatCompletionRequestUserMessageContentArrayOfContentPartChatCompletionRequestMessageContentPartImageImageURL
-    ]
-
-    type: Required[Literal["image_url"]]
-    """The type of the content part."""
-
-
-MessageChatCompletionRequestUserMessageContentArrayOfContentPart = Union[
-    MessageChatCompletionRequestUserMessageContentArrayOfContentPartChatCompletionRequestMessageContentPartText,
-    MessageChatCompletionRequestUserMessageContentArrayOfContentPartChatCompletionRequestMessageContentPartImage,
-]
-
-
-class MessageChatCompletionRequestUserMessage(TypedDict, total=False):
-    content: Required[Union[str, Iterable[MessageChatCompletionRequestUserMessageContentArrayOfContentPart]]]
-    """The contents of the user message."""
-
-    role: Required[Literal["user"]]
-    """The role of the messages author, in this case `user`."""
-
-    name: Optional[str]
-    """An optional name for the participant.
-
-    Provides the model information to differentiate between participants of the same
-    role.
-    """
-
-    tool_call_id: Optional[str]
-
-
-class MessageChatCompletionRequestAssistantMessageFunctionCall(TypedDict, total=False):
-    arguments: Required[str]
-    """
-    The arguments to call the function with, as generated by the model in JSON
-    format. Note that the model does not always generate valid JSON, and may
-    hallucinate parameters not defined by your function schema. Validate the
-    arguments in your code before calling your function.
-    """
-
-    name: Required[str]
-    """The name of the function to call."""
-
-
-class MessageChatCompletionRequestAssistantMessageToolCallFunction(TypedDict, total=False):
-    arguments: Required[str]
-    """
-    The arguments to call the function with, as generated by the model in JSON
-    format. Note that the model does not always generate valid JSON, and may
-    hallucinate parameters not defined by your function schema. Validate the
-    arguments in your code before calling your function.
-    """
-
-    name: Required[str]
-    """The name of the function to call."""
-
-
-class MessageChatCompletionRequestAssistantMessageToolCall(TypedDict, total=False):
-    id: Required[str]
-    """The ID of the tool call."""
-
-    function: Required[MessageChatCompletionRequestAssistantMessageToolCallFunction]
-    """The function that the model called."""
-
-    type: Required[Literal["function"]]
-    """The type of the tool. Currently, only `function` is supported."""
-
-
-class MessageChatCompletionRequestAssistantMessage(TypedDict, total=False):
-    role: Required[Literal["assistant"]]
-    """The role of the messages author, in this case `assistant`."""
-
-    content: Optional[str]
-    """The contents of the assistant message.
-
-    Required unless `tool_calls` or `function_call` is specified.
-    """
-
-    function_call: MessageChatCompletionRequestAssistantMessageFunctionCall
-    """Deprecated and replaced by `tool_calls`.
-
-    The name and arguments of a function that should be called, as generated by the
-    model.
-    """
-
-    name: str
-    """An optional name for the participant.
-
-    Provides the model information to differentiate between participants of the same
-    role.
-    """
-
-    tool_call_id: Optional[str]
-
-    tool_calls: Iterable[MessageChatCompletionRequestAssistantMessageToolCall]
-    """The tool calls generated by the model, such as function calls."""
-
-
-class MessageChatCompletionRequestToolMessage(TypedDict, total=False):
-    content: Required[str]
-    """The contents of the tool message."""
-
-    role: Required[Literal["tool"]]
-    """The role of the messages author, in this case `tool`."""
-
-    tool_call_id: Required[str]
-    """Tool call that this message is responding to."""
-
-    name: Optional[str]
-
-
-class MessageChatCompletionRequestFunctionMessage(TypedDict, total=False):
-    content: Required[Optional[str]]
-    """The contents of the function message."""
-
-    name: Required[str]
-    """The name of the function to call."""
-
-    role: Required[Literal["function"]]
-    """The role of the messages author, in this case `function`."""
-
-    tool_call_id: Optional[str]
-
-
-Message = Union[
-    MessageChatCompletionRequestSystemMessage,
-    MessageChatCompletionRequestUserMessage,
-    MessageChatCompletionRequestAssistantMessage,
-    MessageChatCompletionRequestToolMessage,
-    MessageChatCompletionRequestFunctionMessage,
-]
-
-
-class FunctionCallChatCompletionFunctionCallOption(TypedDict, total=False):
-    name: Required[str]
-    """The name of the function to call."""
-
-
-FunctionCall = Union[Literal["none", "auto"], FunctionCallChatCompletionFunctionCallOption]
+FunctionCall = Union[Literal["none", "auto"], ChatCompletionFunctionCallOptionParam]
 
 
 class Function(TypedDict, total=False):
@@ -380,7 +187,7 @@ class Function(TypedDict, total=False):
     how to call the function.
     """
 
-    parameters: Dict[str, object]
+    parameters: shared_params.FunctionParameters
     """The parameters the functions accepts, described as a JSON Schema object.
 
     See the [guide](/docs/guides/text-generation/function-calling) for examples, and
@@ -392,32 +199,5 @@ class Function(TypedDict, total=False):
 
 
 class ResponseFormat(TypedDict, total=False):
-    type: str
-
-
-class ToolChoiceChatToolChoiceFunction(TypedDict, total=False):
-    name: Required[str]
-    """The name of the function to call."""
-
-
-class ToolChoiceChatToolChoice(TypedDict, total=False):
-    function: Required[ToolChoiceChatToolChoiceFunction]
-
-    type: Required[Literal["function"]]
-
-
-ToolChoice = Union[Literal["none", "auto"], ToolChoiceChatToolChoice]
-
-
-class ToolFunction(TypedDict, total=False):
-    name: Required[str]
-
-    description: str
-
-    parameters: Dict[str, object]
-
-
-class Tool(TypedDict, total=False):
-    function: Required[ToolFunction]
-
-    type: Required[Literal["function"]]
+    type: Literal["text", "json_object"]
+    """Must be one of `text` or `json_object`."""
diff --git a/src/groq/types/completion_usage.py b/src/groq/types/completion_usage.py
new file mode 100644
index 0000000..5ca4ff3
--- /dev/null
+++ b/src/groq/types/completion_usage.py
@@ -0,0 +1,30 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+
+from .._models import BaseModel
+
+__all__ = ["CompletionUsage"]
+
+
+class CompletionUsage(BaseModel):
+    completion_tokens: int
+    """Number of tokens in the generated completion."""
+
+    prompt_tokens: int
+    """Number of tokens in the prompt."""
+
+    total_tokens: int
+    """Total number of tokens used in the request (prompt + completion)."""
+
+    completion_time: Optional[float] = None
+    """Time spent generating tokens"""
+
+    prompt_time: Optional[float] = None
+    """Time spent processing input tokens"""
+
+    queue_time: Optional[float] = None
+    """Time the requests was spent queued"""
+
+    total_time: Optional[float] = None
+    """completion time and prompt time combined"""
diff --git a/src/groq/types/embedding_create_params.py b/src/groq/types/embedding_create_params.py
index 57f9371..67a5991 100644
--- a/src/groq/types/embedding_create_params.py
+++ b/src/groq/types/embedding_create_params.py
@@ -17,7 +17,7 @@ class EmbeddingCreateParams(TypedDict, total=False):
     cannot be an empty string, and any array must be 2048 dimensions or less.
     """
 
-    model: Required[str]
+    model: Required[Union[str, Literal["nomic-embed-text-v1_5"]]]
     """ID of the model to use."""
 
     encoding_format: Literal["float", "base64"]
diff --git a/src/groq/types/model.py b/src/groq/types/model.py
index ed49a06..2631ee8 100644
--- a/src/groq/types/model.py
+++ b/src/groq/types/model.py
@@ -1,6 +1,6 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
-from typing import Optional
+from typing_extensions import Literal
 
 from .._models import BaseModel
 
@@ -8,10 +8,14 @@
 
 
 class Model(BaseModel):
-    id: Optional[str] = None
+    id: str
+    """The model identifier, which can be referenced in the API endpoints."""
 
-    created: Optional[int] = None
+    created: int
+    """The Unix timestamp (in seconds) when the model was created."""
 
-    object: Optional[str] = None
+    object: Literal["model"]
+    """The object type, which is always "model"."""
 
-    owned_by: Optional[str] = None
+    owned_by: str
+    """The organization that owns the model."""
diff --git a/src/groq/types/model_deleted.py b/src/groq/types/model_deleted.py
new file mode 100644
index 0000000..d9a48bb
--- /dev/null
+++ b/src/groq/types/model_deleted.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+
+
+from .._models import BaseModel
+
+__all__ = ["ModelDeleted"]
+
+
+class ModelDeleted(BaseModel):
+    id: str
+
+    deleted: bool
+
+    object: str
diff --git a/src/groq/types/model_list.py b/src/groq/types/model_list.py
deleted file mode 100644
index 8315560..0000000
--- a/src/groq/types/model_list.py
+++ /dev/null
@@ -1,14 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import List, Optional
-
-from .model import Model
-from .._models import BaseModel
-
-__all__ = ["ModelList"]
-
-
-class ModelList(BaseModel):
-    data: Optional[List[Model]] = None
-
-    object: Optional[str] = None
diff --git a/src/groq/types/model_list_response.py b/src/groq/types/model_list_response.py
new file mode 100644
index 0000000..8f83544
--- /dev/null
+++ b/src/groq/types/model_list_response.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List
+from typing_extensions import Literal
+
+from .model import Model
+from .._models import BaseModel
+
+__all__ = ["ModelListResponse"]
+
+
+class ModelListResponse(BaseModel):
+    data: List[Model]
+
+    object: Literal["list"]
diff --git a/src/groq/types/shared/__init__.py b/src/groq/types/shared/__init__.py
new file mode 100644
index 0000000..e085744
--- /dev/null
+++ b/src/groq/types/shared/__init__.py
@@ -0,0 +1,5 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from .error_object import ErrorObject as ErrorObject
+from .function_definition import FunctionDefinition as FunctionDefinition
+from .function_parameters import FunctionParameters as FunctionParameters
diff --git a/src/groq/types/shared/error_object.py b/src/groq/types/shared/error_object.py
new file mode 100644
index 0000000..32d7045
--- /dev/null
+++ b/src/groq/types/shared/error_object.py
@@ -0,0 +1,17 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+
+from ..._models import BaseModel
+
+__all__ = ["ErrorObject"]
+
+
+class ErrorObject(BaseModel):
+    code: Optional[str] = None
+
+    message: str
+
+    param: Optional[str] = None
+
+    type: str
diff --git a/src/groq/types/shared/function_definition.py b/src/groq/types/shared/function_definition.py
new file mode 100644
index 0000000..b452cec
--- /dev/null
+++ b/src/groq/types/shared/function_definition.py
@@ -0,0 +1,33 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+
+from ..._models import BaseModel
+from .function_parameters import FunctionParameters
+
+__all__ = ["FunctionDefinition"]
+
+
+class FunctionDefinition(BaseModel):
+    name: str
+    """The name of the function to be called.
+
+    Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length
+    of 64.
+    """
+
+    description: Optional[str] = None
+    """
+    A description of what the function does, used by the model to choose when and
+    how to call the function.
+    """
+
+    parameters: Optional[FunctionParameters] = None
+    """The parameters the functions accepts, described as a JSON Schema object.
+
+    See the [guide](/docs/guides/text-generation/function-calling) for examples, and
+    the [JSON Schema reference](https://json-schema.org/understanding-json-schema/)
+    for documentation about the format.
+
+    Omitting `parameters` defines a function with an empty parameter list.
+    """
diff --git a/src/groq/types/shared/function_parameters.py b/src/groq/types/shared/function_parameters.py
new file mode 100644
index 0000000..c9524e4
--- /dev/null
+++ b/src/groq/types/shared/function_parameters.py
@@ -0,0 +1,7 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Dict
+
+__all__ = ["FunctionParameters"]
+
+FunctionParameters = Dict[str, object]
diff --git a/src/groq/types/shared_params/__init__.py b/src/groq/types/shared_params/__init__.py
new file mode 100644
index 0000000..ef638cb
--- /dev/null
+++ b/src/groq/types/shared_params/__init__.py
@@ -0,0 +1,4 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from .function_definition import FunctionDefinition as FunctionDefinition
+from .function_parameters import FunctionParameters as FunctionParameters
diff --git a/src/groq/types/shared_params/function_definition.py b/src/groq/types/shared_params/function_definition.py
new file mode 100644
index 0000000..843f5c6
--- /dev/null
+++ b/src/groq/types/shared_params/function_definition.py
@@ -0,0 +1,34 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Required, TypedDict
+
+from ...types import shared_params
+
+__all__ = ["FunctionDefinition"]
+
+
+class FunctionDefinition(TypedDict, total=False):
+    name: Required[str]
+    """The name of the function to be called.
+
+    Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length
+    of 64.
+    """
+
+    description: str
+    """
+    A description of what the function does, used by the model to choose when and
+    how to call the function.
+    """
+
+    parameters: shared_params.FunctionParameters
+    """The parameters the functions accepts, described as a JSON Schema object.
+
+    See the [guide](/docs/guides/text-generation/function-calling) for examples, and
+    the [JSON Schema reference](https://json-schema.org/understanding-json-schema/)
+    for documentation about the format.
+
+    Omitting `parameters` defines a function with an empty parameter list.
+    """
diff --git a/src/groq/types/shared_params/function_parameters.py b/src/groq/types/shared_params/function_parameters.py
new file mode 100644
index 0000000..5b40efb
--- /dev/null
+++ b/src/groq/types/shared_params/function_parameters.py
@@ -0,0 +1,9 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Dict
+
+__all__ = ["FunctionParameters"]
+
+FunctionParameters = Dict[str, object]
diff --git a/tests/api_resources/chat/test_completions.py b/tests/api_resources/chat/test_completions.py
index 4f0945d..5842d13 100644
--- a/tests/api_resources/chat/test_completions.py
+++ b/tests/api_resources/chat/test_completions.py
@@ -24,15 +24,7 @@ def test_method_create(self, client: Groq) -> None:
                 {
                     "content": "string",
                     "role": "system",
-                },
-                {
-                    "content": "string",
-                    "role": "system",
-                },
-                {
-                    "content": "string",
-                    "role": "system",
-                },
+                }
             ],
             model="string",
         )
@@ -46,20 +38,7 @@ def test_method_create_with_all_params(self, client: Groq) -> None:
                     "content": "string",
                     "role": "system",
                     "name": "string",
-                    "tool_call_id": "string",
-                },
-                {
-                    "content": "string",
-                    "role": "system",
-                    "name": "string",
-                    "tool_call_id": "string",
-                },
-                {
-                    "content": "string",
-                    "role": "system",
-                    "name": "string",
-                    "tool_call_id": "string",
-                },
+                }
             ],
             model="string",
             frequency_penalty=-2,
@@ -69,57 +48,47 @@ def test_method_create_with_all_params(self, client: Groq) -> None:
                     "description": "string",
                     "name": "string",
                     "parameters": {"foo": "bar"},
-                },
-                {
-                    "description": "string",
-                    "name": "string",
-                    "parameters": {"foo": "bar"},
-                },
-                {
-                    "description": "string",
-                    "name": "string",
-                    "parameters": {"foo": "bar"},
-                },
+                }
             ],
             logit_bias={"foo": 0},
             logprobs=True,
             max_tokens=0,
             n=1,
             presence_penalty=-2,
-            response_format={"type": "string"},
+            response_format={"type": "json_object"},
             seed=0,
             stop="\n",
-            stream=False,
-            temperature=0,
+            stream=True,
+            temperature=1,
             tool_choice="none",
             tools=[
                 {
+                    "type": "function",
                     "function": {
                         "description": "string",
                         "name": "string",
                         "parameters": {"foo": "bar"},
                     },
-                    "type": "function",
                 },
                 {
+                    "type": "function",
                     "function": {
                         "description": "string",
                         "name": "string",
                         "parameters": {"foo": "bar"},
                     },
-                    "type": "function",
                 },
                 {
+                    "type": "function",
                     "function": {
                         "description": "string",
                         "name": "string",
                         "parameters": {"foo": "bar"},
                     },
-                    "type": "function",
                 },
             ],
             top_logprobs=0,
-            top_p=0,
+            top_p=1,
             user="string",
         )
         assert_matches_type(ChatCompletion, completion, path=["response"])
@@ -131,15 +100,7 @@ def test_raw_response_create(self, client: Groq) -> None:
                 {
                     "content": "string",
                     "role": "system",
-                },
-                {
-                    "content": "string",
-                    "role": "system",
-                },
-                {
-                    "content": "string",
-                    "role": "system",
-                },
+                }
             ],
             model="string",
         )
@@ -156,15 +117,7 @@ def test_streaming_response_create(self, client: Groq) -> None:
                 {
                     "content": "string",
                     "role": "system",
-                },
-                {
-                    "content": "string",
-                    "role": "system",
-                },
-                {
-                    "content": "string",
-                    "role": "system",
-                },
+                }
             ],
             model="string",
         ) as response:
@@ -187,15 +140,7 @@ async def test_method_create(self, async_client: AsyncGroq) -> None:
                 {
                     "content": "string",
                     "role": "system",
-                },
-                {
-                    "content": "string",
-                    "role": "system",
-                },
-                {
-                    "content": "string",
-                    "role": "system",
-                },
+                }
             ],
             model="string",
         )
@@ -209,20 +154,7 @@ async def test_method_create_with_all_params(self, async_client: AsyncGroq) -> N
                     "content": "string",
                     "role": "system",
                     "name": "string",
-                    "tool_call_id": "string",
-                },
-                {
-                    "content": "string",
-                    "role": "system",
-                    "name": "string",
-                    "tool_call_id": "string",
-                },
-                {
-                    "content": "string",
-                    "role": "system",
-                    "name": "string",
-                    "tool_call_id": "string",
-                },
+                }
             ],
             model="string",
             frequency_penalty=-2,
@@ -232,57 +164,47 @@ async def test_method_create_with_all_params(self, async_client: AsyncGroq) -> N
                     "description": "string",
                     "name": "string",
                     "parameters": {"foo": "bar"},
-                },
-                {
-                    "description": "string",
-                    "name": "string",
-                    "parameters": {"foo": "bar"},
-                },
-                {
-                    "description": "string",
-                    "name": "string",
-                    "parameters": {"foo": "bar"},
-                },
+                }
             ],
             logit_bias={"foo": 0},
             logprobs=True,
             max_tokens=0,
             n=1,
             presence_penalty=-2,
-            response_format={"type": "string"},
+            response_format={"type": "json_object"},
             seed=0,
             stop="\n",
-            stream=False,
-            temperature=0,
+            stream=True,
+            temperature=1,
             tool_choice="none",
             tools=[
                 {
+                    "type": "function",
                     "function": {
                         "description": "string",
                         "name": "string",
                         "parameters": {"foo": "bar"},
                     },
-                    "type": "function",
                 },
                 {
+                    "type": "function",
                     "function": {
                         "description": "string",
                         "name": "string",
                         "parameters": {"foo": "bar"},
                     },
-                    "type": "function",
                 },
                 {
+                    "type": "function",
                     "function": {
                         "description": "string",
                         "name": "string",
                         "parameters": {"foo": "bar"},
                     },
-                    "type": "function",
                 },
             ],
             top_logprobs=0,
-            top_p=0,
+            top_p=1,
             user="string",
         )
         assert_matches_type(ChatCompletion, completion, path=["response"])
@@ -294,15 +216,7 @@ async def test_raw_response_create(self, async_client: AsyncGroq) -> None:
                 {
                     "content": "string",
                     "role": "system",
-                },
-                {
-                    "content": "string",
-                    "role": "system",
-                },
-                {
-                    "content": "string",
-                    "role": "system",
-                },
+                }
             ],
             model="string",
         )
@@ -319,15 +233,7 @@ async def test_streaming_response_create(self, async_client: AsyncGroq) -> None:
                 {
                     "content": "string",
                     "role": "system",
-                },
-                {
-                    "content": "string",
-                    "role": "system",
-                },
-                {
-                    "content": "string",
-                    "role": "system",
-                },
+                }
             ],
             model="string",
         ) as response:
diff --git a/tests/api_resources/test_models.py b/tests/api_resources/test_models.py
index e07c0d8..55969db 100644
--- a/tests/api_resources/test_models.py
+++ b/tests/api_resources/test_models.py
@@ -8,7 +8,7 @@
 import pytest
 
 from groq import Groq, AsyncGroq
-from groq.types import Model, ModelList
+from groq.types import Model, ModelDeleted, ModelListResponse
 from tests.utils import assert_matches_type
 
 base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
@@ -58,7 +58,7 @@ def test_path_params_retrieve(self, client: Groq) -> None:
     @parametrize
     def test_method_list(self, client: Groq) -> None:
         model = client.models.list()
-        assert_matches_type(ModelList, model, path=["response"])
+        assert_matches_type(ModelListResponse, model, path=["response"])
 
     @parametrize
     def test_raw_response_list(self, client: Groq) -> None:
@@ -67,7 +67,7 @@ def test_raw_response_list(self, client: Groq) -> None:
         assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         model = response.parse()
-        assert_matches_type(ModelList, model, path=["response"])
+        assert_matches_type(ModelListResponse, model, path=["response"])
 
     @parametrize
     def test_streaming_response_list(self, client: Groq) -> None:
@@ -76,7 +76,7 @@ def test_streaming_response_list(self, client: Groq) -> None:
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
 
             model = response.parse()
-            assert_matches_type(ModelList, model, path=["response"])
+            assert_matches_type(ModelListResponse, model, path=["response"])
 
         assert cast(Any, response.is_closed) is True
 
@@ -85,7 +85,7 @@ def test_method_delete(self, client: Groq) -> None:
         model = client.models.delete(
             "string",
         )
-        assert model is None
+        assert_matches_type(ModelDeleted, model, path=["response"])
 
     @parametrize
     def test_raw_response_delete(self, client: Groq) -> None:
@@ -96,7 +96,7 @@ def test_raw_response_delete(self, client: Groq) -> None:
         assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         model = response.parse()
-        assert model is None
+        assert_matches_type(ModelDeleted, model, path=["response"])
 
     @parametrize
     def test_streaming_response_delete(self, client: Groq) -> None:
@@ -107,7 +107,7 @@ def test_streaming_response_delete(self, client: Groq) -> None:
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
 
             model = response.parse()
-            assert model is None
+            assert_matches_type(ModelDeleted, model, path=["response"])
 
         assert cast(Any, response.is_closed) is True
 
@@ -163,7 +163,7 @@ async def test_path_params_retrieve(self, async_client: AsyncGroq) -> None:
     @parametrize
     async def test_method_list(self, async_client: AsyncGroq) -> None:
         model = await async_client.models.list()
-        assert_matches_type(ModelList, model, path=["response"])
+        assert_matches_type(ModelListResponse, model, path=["response"])
 
     @parametrize
     async def test_raw_response_list(self, async_client: AsyncGroq) -> None:
@@ -172,7 +172,7 @@ async def test_raw_response_list(self, async_client: AsyncGroq) -> None:
         assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         model = await response.parse()
-        assert_matches_type(ModelList, model, path=["response"])
+        assert_matches_type(ModelListResponse, model, path=["response"])
 
     @parametrize
     async def test_streaming_response_list(self, async_client: AsyncGroq) -> None:
@@ -181,7 +181,7 @@ async def test_streaming_response_list(self, async_client: AsyncGroq) -> None:
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
 
             model = await response.parse()
-            assert_matches_type(ModelList, model, path=["response"])
+            assert_matches_type(ModelListResponse, model, path=["response"])
 
         assert cast(Any, response.is_closed) is True
 
@@ -190,7 +190,7 @@ async def test_method_delete(self, async_client: AsyncGroq) -> None:
         model = await async_client.models.delete(
             "string",
         )
-        assert model is None
+        assert_matches_type(ModelDeleted, model, path=["response"])
 
     @parametrize
     async def test_raw_response_delete(self, async_client: AsyncGroq) -> None:
@@ -201,7 +201,7 @@ async def test_raw_response_delete(self, async_client: AsyncGroq) -> None:
         assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         model = await response.parse()
-        assert model is None
+        assert_matches_type(ModelDeleted, model, path=["response"])
 
     @parametrize
     async def test_streaming_response_delete(self, async_client: AsyncGroq) -> None:
@@ -212,7 +212,7 @@ async def test_streaming_response_delete(self, async_client: AsyncGroq) -> None:
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
 
             model = await response.parse()
-            assert model is None
+            assert_matches_type(ModelDeleted, model, path=["response"])
 
         assert cast(Any, response.is_closed) is True
 
diff --git a/tests/test_client.py b/tests/test_client.py
index aef9164..0983b1d 100644
--- a/tests/test_client.py
+++ b/tests/test_client.py
@@ -19,7 +19,7 @@
 from groq import Groq, AsyncGroq, APIResponseValidationError
 from groq._models import BaseModel, FinalRequestOptions
 from groq._constants import RAW_RESPONSE_HEADER
-from groq._exceptions import APIStatusError, APITimeoutError, APIResponseValidationError
+from groq._exceptions import GroqError, APIStatusError, APITimeoutError, APIResponseValidationError
 from groq._base_client import DEFAULT_TIMEOUT, HTTPX_DEFAULT_TIMEOUT, BaseClient, make_request_options
 
 from .utils import update_env
@@ -315,6 +315,15 @@ def test_default_headers_option(self) -> None:
         assert request.headers.get("x-foo") == "stainless"
         assert request.headers.get("x-stainless-lang") == "my-overriding-header"
 
+    def test_validate_headers(self) -> None:
+        client = Groq(base_url=base_url, api_key=api_key, _strict_response_validation=True)
+        request = client._build_request(FinalRequestOptions(method="get", url="/foo"))
+        assert request.headers.get("Authorization") == f"Bearer {api_key}"
+
+        with pytest.raises(GroqError):
+            client2 = Groq(base_url=base_url, api_key=None, _strict_response_validation=True)
+            _ = client2
+
     def test_default_query_option(self) -> None:
         client = Groq(
             base_url=base_url, api_key=api_key, _strict_response_validation=True, default_query={"query_param": "bar"}
@@ -700,7 +709,7 @@ def test_retrying_timeout_errors_doesnt_leak(self, respx_mock: MockRouter) -> No
                                 "content": "Explain the importance of low latency LLMs",
                             },
                         ],
-                        model="mixtral-8x7b-32768",
+                        model="llama3-8b-8192",
                     ),
                 ),
                 cast_to=httpx.Response,
@@ -730,7 +739,7 @@ def test_retrying_status_errors_doesnt_leak(self, respx_mock: MockRouter) -> Non
                                 "content": "Explain the importance of low latency LLMs",
                             },
                         ],
-                        model="mixtral-8x7b-32768",
+                        model="llama3-8b-8192",
                     ),
                 ),
                 cast_to=httpx.Response,
@@ -1019,6 +1028,15 @@ def test_default_headers_option(self) -> None:
         assert request.headers.get("x-foo") == "stainless"
         assert request.headers.get("x-stainless-lang") == "my-overriding-header"
 
+    def test_validate_headers(self) -> None:
+        client = AsyncGroq(base_url=base_url, api_key=api_key, _strict_response_validation=True)
+        request = client._build_request(FinalRequestOptions(method="get", url="/foo"))
+        assert request.headers.get("Authorization") == f"Bearer {api_key}"
+
+        with pytest.raises(GroqError):
+            client2 = AsyncGroq(base_url=base_url, api_key=None, _strict_response_validation=True)
+            _ = client2
+
     def test_default_query_option(self) -> None:
         client = AsyncGroq(
             base_url=base_url, api_key=api_key, _strict_response_validation=True, default_query={"query_param": "bar"}
@@ -1408,7 +1426,7 @@ async def test_retrying_timeout_errors_doesnt_leak(self, respx_mock: MockRouter)
                                 "content": "Explain the importance of low latency LLMs",
                             },
                         ],
-                        model="mixtral-8x7b-32768",
+                        model="llama3-8b-8192",
                     ),
                 ),
                 cast_to=httpx.Response,
@@ -1438,7 +1456,7 @@ async def test_retrying_status_errors_doesnt_leak(self, respx_mock: MockRouter)
                                 "content": "Explain the importance of low latency LLMs",
                             },
                         ],
-                        model="mixtral-8x7b-32768",
+                        model="llama3-8b-8192",
                     ),
                 ),
                 cast_to=httpx.Response,