From 21ebc80d7333e881408c5944b4327be1c8c9d16c Mon Sep 17 00:00:00 2001 From: Nick Sullivan Date: Wed, 26 Jul 2023 13:52:02 -0700 Subject: [PATCH] Refine token size calculation in sidekick function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes #67 The previous implementation of token size calculation in the sidekick function was simplified. This commit introduces a more accurate calculation by rounding the memory token size and creating a new function, calc_response_token_size, to dynamically calculate the response token size based on the files. This change also includes a small buffer for the response token size and a recalculation of the response token size in case the files change. This should lead to more precise token size management and potentially improve the performance of the sidekick function. 🚀📊 --- aicodebot/cli.py | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/aicodebot/cli.py b/aicodebot/cli.py index 068d7c1..dea40ea 100644 --- a/aicodebot/cli.py +++ b/aicodebot/cli.py @@ -480,12 +480,24 @@ def sidekick(request, verbose, no_files, max_file_tokens, files): # noqa: PLR09 # Generate the prompt and set up the model prompt = get_prompt("sidekick") - memory_token_size = model_token_limit * 0.1 + memory_token_size = round(model_token_limit * 0.1) # Determine the max token size for the response - response_token_size = model_token_limit - ( - memory_token_size + file_token_size + Coder.get_token_length(prompt.template) - ) + def calc_response_token_size(files): + file_token_size = 0 + for file in files: + file_token_size += Coder.get_token_length(Path(file).read_text()) + prompt_token_size = Coder.get_token_length(prompt.template) + logger.trace( + f"File token size: {file_token_size}, memory token size: {memory_token_size}, " + f"prompt token size: {prompt_token_size}, model token limit: {model_token_limit}" + ) + out = model_token_limit - (memory_token_size + file_token_size + prompt_token_size) + out = round(out * 0.95) # Small buffer + logger.debug(f"Response max token size: {out}") + return out + + response_token_size = calc_response_token_size(files) llm = Coder.get_llm(model_name, verbose, response_token_size, streaming=True) memory = ConversationTokenBufferMemory( @@ -571,6 +583,8 @@ def sidekick(request, verbose, no_files, max_file_tokens, files): # noqa: PLR09 with Live(Markdown(""), auto_refresh=True) as live: callback = RichLiveCallbackHandler(live, bot_style) llm.callbacks = [callback] # a fresh callback handler for each question + # Recalculate the response token size in case the files changed + llm.max_tokens = calc_response_token_size(files) chain.run({"task": human_input, "context": context, "languages": languages})