openai: better error messages; fix greedy matching (#2327)

* better error message; fix greedy matching * Update lm_eval/models/openai_completions.py Co-authored-by: Hailey Schoelkopf <65563625+haileyschoelkopf@users.noreply.github.com> * Update lm_eval/models/openai_completions.py Co-authored-by: Hailey Schoelkopf <65563625+haileyschoelkopf@users.noreply.github.com> * pre-commit --------- Co-authored-by: Hailey Schoelkopf <65563625+haileyschoelkopf@users.noreply.github.com>
EleutherAI · Sep 26, 2024 · 1bc6c93 · 1bc6c93
1 parent 00f5537
commit 1bc6c93
Showing 1 changed file with 16 additions and 7 deletions.
diff --git a/lm_eval/models/openai_completions.py b/lm_eval/models/openai_completions.py
@@ -69,11 +69,11 @@ def parse_logprobs(
             for choice, ctxlen in zip(out["choices"], ctxlens):
                 assert ctxlen > 0, "Context length must be greater than 0"
                 logprobs = sum(choice["logprobs"]["token_logprobs"][ctxlen:-1])
-                tokens = choice["logprobs"]["token_logprobs"][ctxlen:-1]
+                tokens_logprobs = choice["logprobs"]["token_logprobs"][ctxlen:-1]
                 top_logprobs = choice["logprobs"]["top_logprobs"][ctxlen:-1]
                 is_greedy = True
-                for tok, top in zip(tokens, top_logprobs):
-                    if tok != max(top, key=top.get):
+                for tok, top in zip(tokens_logprobs, top_logprobs):
+                    if tok != max(top.values()):
                         is_greedy = False
                         break
                 res.append((logprobs, is_greedy))
@@ -190,14 +190,18 @@ def api_key(self):
         key = os.environ.get("OPENAI_API_KEY", None)
         if key is None:
             raise ValueError(
-                "API key not found. Please set the OPENAI_API_KEY environment variable."
+                "API key not found. Please set the `OPENAI_API_KEY` environment variable."
             )
         return key
 
     def loglikelihood(self, requests, **kwargs):
         assert (
-            self.model != "gpt-3.5-turbo"
-        ), "Loglikelihood is not supported for gpt-3.5-turbo"
+            self.model
+            in [
+                "babbage-002",
+                "davinci-002",
+            ]
+        ), f"Prompt loglikelihoods are only supported by OpenAI's API for {['babbage-002', 'davinci-002']}."
         return super().loglikelihood(requests, **kwargs)
 
     def chat_template(self, chat_template: Union[bool, str] = False) -> Optional[str]:
@@ -226,6 +230,11 @@ def api_key(self):
         key = os.environ.get("OPENAI_API_KEY", None)
         if key is None:
             raise ValueError(
-                "API key not found. Please set the OPENAI_API_KEY environment variable."
+                "API key not found. Please set the `OPENAI_API_KEY` environment variable."
             )
         return key
+
+    def loglikelihood(self, requests, **kwargs):
+        raise NotImplementedError(
+            "Loglikelihood (and therefore `multiple_choice`-type tasks) is not supported for chat completions as OpenAI does not provide prompt logprobs. See https://github.com/EleutherAI/lm-evaluation-harness/issues/942#issuecomment-1777836312 or https://github.com/EleutherAI/lm-evaluation-harness/issues/1196 for more background on this limitation."
+        )