fix(commit): fix options for exclude and include

The options for excluding and including files were not working correctly. This commit fixes the issue by updating the options for exclude and include to use the correct flags (-e and -i, respectively).
liblaf · Dec 8, 2023 · 47fbe99 · 47fbe99
1 parent 725abf9
commit 47fbe99
Show file tree

Hide file tree

Showing 3 changed files with 81 additions and 27 deletions.
diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
@@ -91,6 +91,10 @@ jobs:
         uses: actions/download-artifact@v3
         with:
           path: artifacts
+      - name: Delete Existing Release
+        run: gh release --repo="${{ github.repository }}" delete dev --cleanup-tag
+        env:
+          GH_TOKEN: ${{ github.token }}
       - name: Upload Release Assets
         uses: softprops/action-gh-release@master
         with:

diff --git a/ai_commit_cli/cmd/commit.py b/ai_commit_cli/cmd/commit.py
@@ -20,8 +20,8 @@ def main(
     diff_file: Annotated[
         Optional[pathlib.Path], typer.Option(exists=True, dir_okay=False)
     ] = None,
-    exclude: Annotated[Optional[list[str]], typer.Option()] = None,
-    include: Annotated[Optional[list[str]], typer.Option()] = None,
+    exclude: Annotated[Optional[list[str]], typer.Option("-e", "--exclude")] = None,
+    include: Annotated[Optional[list[str]], typer.Option("-i", "--include")] = None,
     pre_commit: Annotated[bool, typer.Option()] = True,
     prompt: Annotated[Optional[str], typer.Option()] = None,
     prompt_file: Annotated[
@@ -109,22 +109,7 @@ def main(
         {"role": "system", "content": prompt},
         {"role": "user", "content": diff},
     ]
-    try:
-        num_system_tokens: int = token.num_tokens_from_messages(
-            messages=[{"role": "system", "content": prompt}], model=model
-        )
-        num_user_tokens: int = token.num_tokens_from_messages(
-            messages=[{"role": "user", "content": diff}], model=model
-        )
-        num_tokens: int = num_system_tokens + num_user_tokens
-        logging.info(
-            "Number of Tokens: %d (System) + %d (User) = %d",
-            num_system_tokens,
-            num_user_tokens,
-            num_tokens,
-        )
-    except NotImplementedError as e:
-        logging.error(e)
+    logging.debug(messages)
     stream: openai.Stream[chat.ChatCompletionChunk] = client.chat.completions.create(
         messages=messages,
         model=model,
@@ -143,6 +128,32 @@ def main(
             else:
                 message += chunk.choices[0].delta.content
                 live_panel.update(markdown.Markdown(commit.sanitize(message)))
+    try:
+        num_tokens_prompt: int = token.num_tokens_from_string(prompt, model=model)
+        num_tokens_diff: int = token.num_tokens_from_string(diff, model=model)
+        num_tokens_input: int = token.num_tokens_from_messages(messages, model=model)
+        num_tokens_output: int = token.num_tokens_from_string(message, model=model)
+        pricing_input: float
+        pricing_output: float
+        pricing_input, pricing_output = token.pricing(model)
+        logging.info(
+            "Input Tokens: %d = %d (Prompt) + %d (Diff) + %d",
+            num_tokens_input,
+            num_tokens_prompt,
+            num_tokens_diff,
+            num_tokens_input - (num_tokens_prompt + num_tokens_diff),
+        )
+        logging.info("Output Tokens: %d", num_tokens_output)
+        pricing_input *= num_tokens_input
+        pricing_output *= num_tokens_output
+        logging.info(
+            "Pricing: $%f = $%f (Input) + $%f (Output)",
+            pricing_input + pricing_output,
+            pricing_input,
+            pricing_output,
+        )
+    except NotImplementedError as e:
+        logging.error(e)
     message = commit.sanitize(message)
     confirm: bool = questionary.confirm(
         message="Confirm the commit message?"

diff --git a/ai_commit_cli/token.py b/ai_commit_cli/token.py
@@ -1,16 +1,49 @@
+import logging
 from collections.abc import Sequence
 
 import tiktoken
 from openai.types import chat
 
 
+def pricing(model: str = "gpt-3.5-turbo-16k") -> tuple[float, float]:
+    if model.startswith("gpt-4-1106-preview"):
+        return 0.01 / 1e3, 0.03 / 1e3
+    elif model.startswith("gpt-4-32k"):
+        return 0.06 / 1e3, 0.12 / 1e3
+    elif model.startswith("gpt-4"):
+        return 0.03 / 1e3, 0.06 / 1e3
+    elif model.startswith("gpt-3.5"):
+        return 0.0010 / 1e3, 0.0020 / 1e3
+    raise NotImplementedError(f"price() is not implemented for model {model}.")
+
+
+# https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken
+def num_tokens_from_string(string: str, model: str = "gpt-3.5-turbo-16k") -> int:
+    """Returns the number of tokens in a text string."""
+    encoding: tiktoken.Encoding
+    try:
+        encoding = tiktoken.encoding_for_model(model)
+    except KeyError:
+        logging.warning("model not found. Using cl100k_base encoding.")
+        encoding = tiktoken.get_encoding("cl100k_base")
+    num_tokens: int = len(encoding.encode(string))
+    return num_tokens
+
+
 # https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken
 def num_tokens_from_messages(
     messages: Sequence[chat.ChatCompletionMessageParam],
     model: str = "gpt-3.5-turbo-16k",
 ) -> int:
     """Return the number of tokens used by a list of messages."""
-    encoding: tiktoken.Encoding = tiktoken.encoding_for_model(model)
+    encoding: tiktoken.Encoding
+    tokens_per_message: int
+    tokens_per_name: int
+    try:
+        encoding = tiktoken.encoding_for_model(model)
+    except KeyError:
+        logging.warning("model not found. Using cl100k_base encoding.")
+        encoding = tiktoken.get_encoding("cl100k_base")
     if model in {
         "gpt-3.5-turbo-0613",
         "gpt-3.5-turbo-16k-0613",
@@ -19,21 +52,27 @@ def num_tokens_from_messages(
         "gpt-4-0613",
         "gpt-4-32k-0613",
     }:
-        tokens_per_message: int = 3
-        tokens_per_name: int = 1
+        tokens_per_message = 3
+        tokens_per_name = 1
     elif model == "gpt-3.5-turbo-0301":
-        # every message follows <|start|>{role/name}\n{content}<|end|>\n
-        tokens_per_message: int = 4
-        tokens_per_name: int = -1  # if there's a name, the role is omitted
+        tokens_per_message = (
+            4  # every message follows <|start|>{role/name}\n{content}<|end|>\n
+        )
+        tokens_per_name = -1  # if there's a name, the role is omitted
     elif "gpt-3.5-turbo" in model:
+        logging.warning(
+            "gpt-3.5-turbo may update over time. "
+            "Returning num tokens assuming gpt-3.5-turbo-0613."
+        )
         return num_tokens_from_messages(messages, model="gpt-3.5-turbo-0613")
     elif "gpt-4" in model:
+        logging.warning(
+            "gpt-4 may update over time. Returning num tokens assuming gpt-4-0613."
+        )
         return num_tokens_from_messages(messages, model="gpt-4-0613")
     else:
         raise NotImplementedError(
-            f"""num_tokens_from_messages() is not implemented for model {model}. "
-            "See https://github.com/openai/openai-python/blob/main/chatml.md for "
-            "information on how messages are converted to tokens."""
+            f"num_tokens_from_messages() is not implemented for model {model}."
         )
     num_tokens: int = 0
     for message in messages: