Merge pull request #36 from simonw/main

[pull] main from simonw:main
simonw · Sep 14, 2024 · b01c627 · b01c627
2 parents 4c6c761 + d654c95
commit b01c627
Show file tree

Hide file tree

Showing 5 changed files with 43 additions and 2 deletions.
diff --git a/docs/changelog.md b/docs/changelog.md
@@ -1,5 +1,16 @@
 # Changelog
 
+(v0_16)=
+## 0.16 (2024-09-12)
+
+- OpenAI models now use the internal `self.get_key()` mechanism, which means they can be used from Python code in a way that will pick up keys that have been configured using `llm keys set` or the `OPENAI_API_KEY` environment variable. [#552](https://github.com/simonw/llm/issues/552). This code now works correctly:
+    ```python
+    import llm
+    print(llm.get_model("gpt-4o-mini").prompt("hi"))
+    ```
+- New documented API methods: `llm.get_default_model()`, `llm.set_default_model(alias)`, `llm.get_default_embedding_model(alias)`, `llm.set_default_embedding_model()`. [#553](https://github.com/simonw/llm/issues/553)
+- Support for OpenAI's new [o1 family](https://openai.com/o1/) of preview models, `llm -m o1-preview "prompt"` and `llm -m o1-mini "prompt"`. These models are currently only available to [tier 5](https://platform.openai.com/docs/guides/rate-limits/usage-tiers?context=tier-five) OpenAI API users, though this may change in the future. [#570](https://github.com/simonw/llm/issues/570)
+
 (v0_15)=
 ## 0.15 (2024-07-18)
 

diff --git a/docs/openai-models.md b/docs/openai-models.md
@@ -41,6 +41,8 @@ OpenAI Chat: gpt-4-turbo-2024-04-09
 OpenAI Chat: gpt-4-turbo (aliases: gpt-4-turbo-preview, 4-turbo, 4t)
 OpenAI Chat: gpt-4o (aliases: 4o)
 OpenAI Chat: gpt-4o-mini (aliases: 4o-mini)
+OpenAI Chat: o1-preview
+OpenAI Chat: o1-mini
 OpenAI Completion: gpt-3.5-turbo-instruct (aliases: 3.5-instruct, chatgpt-instruct)
 ```
 <!-- [[[end]]] -->

diff --git a/docs/usage.md b/docs/usage.md
@@ -345,6 +345,26 @@ OpenAI Chat: gpt-4o-mini (aliases: 4o-mini)
   logit_bias: dict, str
   seed: int
   json_object: boolean
+OpenAI Chat: o1-preview
+  temperature: float
+  max_tokens: int
+  top_p: float
+  frequency_penalty: float
+  presence_penalty: float
+  stop: str
+  logit_bias: dict, str
+  seed: int
+  json_object: boolean
+OpenAI Chat: o1-mini
+  temperature: float
+  max_tokens: int
+  top_p: float
+  frequency_penalty: float
+  presence_penalty: float
+  stop: str
+  logit_bias: dict, str
+  seed: int
+  json_object: boolean
 OpenAI Completion: gpt-3.5-turbo-instruct (aliases: 3.5-instruct, chatgpt-instruct)
   temperature: float
     What sampling temperature to use, between 0 and 2. Higher values like

diff --git a/llm/default_plugins/openai_models.py b/llm/default_plugins/openai_models.py
@@ -35,6 +35,9 @@ def register_models(register):
     # GPT-4o
     register(Chat("gpt-4o"), aliases=("4o",))
     register(Chat("gpt-4o-mini"), aliases=("4o-mini",))
+    # o1
+    register(Chat("o1-preview", can_stream=False, allows_system_prompt=False))
+    register(Chat("o1-mini", can_stream=False, allows_system_prompt=False))
     # The -instruct completion model
     register(
         Completion("gpt-3.5-turbo-instruct", default_max_tokens=256),
@@ -248,7 +251,6 @@ def validate_logit_bias(cls, logit_bias):
 class Chat(Model):
     needs_key = "openai"
     key_env_var = "OPENAI_API_KEY"
-    can_stream: bool = True
 
     default_max_tokens = None
 
@@ -268,6 +270,8 @@ def __init__(
         api_version=None,
         api_engine=None,
         headers=None,
+        can_stream=True,
+        allows_system_prompt=True,
     ):
         self.model_id = model_id
         self.key = key
@@ -277,12 +281,16 @@ def __init__(
         self.api_version = api_version
         self.api_engine = api_engine
         self.headers = headers
+        self.can_stream = can_stream
+        self.allows_system_prompt = allows_system_prompt
 
     def __str__(self):
         return "OpenAI Chat: {}".format(self.model_id)
 
     def execute(self, prompt, stream, response, conversation=None):
         messages = []
+        if prompt.system and not self.allows_system_prompt:
+            raise NotImplementedError("Model does not support system prompts")
         current_system = None
         if conversation is not None:
             for prev_response in conversation.responses:

diff --git a/setup.py b/setup.py
@@ -1,7 +1,7 @@
 from setuptools import setup, find_packages
 import os
 
-VERSION = "0.15"
+VERSION = "0.16"
 
 
 def get_long_description():