From 21ebc80d7333e881408c5944b4327be1c8c9d16c Mon Sep 17 00:00:00 2001
From: Nick Sullivan <nick@sullivanflock.com>
Date: Wed, 26 Jul 2023 13:52:02 -0700
Subject: [PATCH] Refine token size calculation in sidekick function
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fixes #67

The previous implementation of token size calculation in the sidekick function was simplified. This commit introduces a more accurate
calculation by rounding the memory token size and creating a new function, calc_response_token_size, to dynamically calculate the response
token size based on the files. This change also includes a small buffer for the response token size and a recalculation of the response
token size in case the files change. This should lead to more precise token size management and potentially improve the performance of the
sidekick function. 🚀📊
---
 aicodebot/cli.py | 22 ++++++++++++++++++----
 1 file changed, 18 insertions(+), 4 deletions(-)

diff --git a/aicodebot/cli.py b/aicodebot/cli.py
index 068d7c1..dea40ea 100644
--- a/aicodebot/cli.py
+++ b/aicodebot/cli.py
@@ -480,12 +480,24 @@ def sidekick(request, verbose, no_files, max_file_tokens, files):  # noqa: PLR09
 
     # Generate the prompt and set up the model
     prompt = get_prompt("sidekick")
-    memory_token_size = model_token_limit * 0.1
+    memory_token_size = round(model_token_limit * 0.1)
 
     # Determine the max token size for the response
-    response_token_size = model_token_limit - (
-        memory_token_size + file_token_size + Coder.get_token_length(prompt.template)
-    )
+    def calc_response_token_size(files):
+        file_token_size = 0
+        for file in files:
+            file_token_size += Coder.get_token_length(Path(file).read_text())
+        prompt_token_size = Coder.get_token_length(prompt.template)
+        logger.trace(
+            f"File token size: {file_token_size}, memory token size: {memory_token_size}, "
+            f"prompt token size: {prompt_token_size}, model token limit: {model_token_limit}"
+        )
+        out = model_token_limit - (memory_token_size + file_token_size + prompt_token_size)
+        out = round(out * 0.95)  # Small buffer
+        logger.debug(f"Response max token size: {out}")
+        return out
+
+    response_token_size = calc_response_token_size(files)
 
     llm = Coder.get_llm(model_name, verbose, response_token_size, streaming=True)
     memory = ConversationTokenBufferMemory(
@@ -571,6 +583,8 @@ def sidekick(request, verbose, no_files, max_file_tokens, files):  # noqa: PLR09
         with Live(Markdown(""), auto_refresh=True) as live:
             callback = RichLiveCallbackHandler(live, bot_style)
             llm.callbacks = [callback]  # a fresh callback handler for each question
+            # Recalculate the response token size in case the files changed
+            llm.max_tokens = calc_response_token_size(files)
 
             chain.run({"task": human_input, "context": context, "languages": languages})