diff --git a/examples/basic/execute.py b/examples/basic/execute.py index dac82cbb..a24c2930 100644 --- a/examples/basic/execute.py +++ b/examples/basic/execute.py @@ -2,4 +2,10 @@ if __name__ == "__main__": test_dir = ".kaizen/tests" - pytest.main(["--timeout=60", "-v", test_dir, ]) + pytest.main( + [ + "--timeout=60", + "-v", + test_dir, + ] + ) diff --git a/examples/work_summarizer/main.py b/examples/work_summarizer/main.py new file mode 100644 index 00000000..9def25d2 --- /dev/null +++ b/examples/work_summarizer/main.py @@ -0,0 +1,56 @@ +from kaizen.reviewer.work_summarizer import WorkSummaryGenerator +import requests +from datetime import datetime, timedelta, timezone + +# GitHub repository information +GITHUB_OWNER = "Cloud-Code-AI" +GITHUB_REPO_NAME = "kaizen" + +# Get the current date and calculate the date 14 days ago +current_date = datetime.now(timezone.utc).date() +since_date = current_date - timedelta(days=14) + +# Convert the date to ISO format +since_date_iso = since_date.isoformat() + +# GitHub API endpoint for getting commits +commits_url = f"https://api.github.com/repos/{GITHUB_OWNER}/{GITHUB_REPO_NAME}/commits" + +# Add query parameters for the since date +params = {"since": since_date_iso} + +# Make the API request +commits_response = requests.get(commits_url, params=params) + +# Check if the request was successful +if commits_response.status_code != 200: + print("ERROR: Could not get GitHub commits") + exit(1) + +commits = commits_response.json() + +# Get the SHA hashes of the first and last commits +first_commit_sha = commits[0]["sha"] +last_commit_sha = commits[-1]["sha"] + +headers = {"Accept": "application/vnd.github.v3+json"} + +# Get the diff between the first and last commits +diff_url = f"https://api.github.com/repos/{GITHUB_OWNER}/{GITHUB_REPO_NAME}/compare/{last_commit_sha}...{first_commit_sha}" +diff_response = requests.get(diff_url, headers=headers) +diff_data = diff_response.json() + +# Extract file diffs +file_diffs = [] +for file_dict in diff_data["files"]: + if "patch" in file_dict: + file_diffs.append( + { + "file": file_dict["filename"], + "patch": file_dict["patch"], + "status": file_dict["status"], + } + ) + +work_summary_generator = WorkSummaryGenerator() +print(work_summary_generator.generate_work_summaries(diff_file_data=file_diffs)) diff --git a/kaizen/helpers/output.py b/kaizen/helpers/output.py index 7af6738f..aed90cee 100644 --- a/kaizen/helpers/output.py +++ b/kaizen/helpers/output.py @@ -84,14 +84,14 @@ def get_web_html(url): for svg in soup.find_all("svg"): svg.decompose() - + # Delete each comment for comment in soup.find_all(text=lambda text: isinstance(text, Comment)): comment.extract() - - for style_block in soup.find_all('style'): + + for style_block in soup.find_all("style"): style_block.decompose() - + pretty_html = soup.prettify() return pretty_html diff --git a/kaizen/helpers/parser.py b/kaizen/helpers/parser.py index 7469fbbb..01377c5d 100644 --- a/kaizen/helpers/parser.py +++ b/kaizen/helpers/parser.py @@ -1,11 +1,7 @@ import json import re -EXCLUDED_FILETYPES = [ - "json", - "css", - "xml" -] +EXCLUDED_FILETYPES = ["json", "css", "xml"] def extract_json(text): diff --git a/kaizen/llms/prompts.py b/kaizen/llms/prompts.py index bc61c42e..e2db71e9 100644 --- a/kaizen/llms/prompts.py +++ b/kaizen/llms/prompts.py @@ -152,3 +152,34 @@ URL: {URL} ```{WEB_CONTENT}``` """ + +WORK_SUMMARY_SYSTEM_PROMPT = """ +You are an AI assistant that explains technical code changes to non-technical audiences in a user-friendly manner. When presented with a git diff: + +1. Analyze and identify key changes (features, bug fixes, optimizations, refactoring). +2. Break down into sections discussing changes to specific code areas/files. +3. Provide plain language overviews explaining purpose and goals of the changes. +4. Avoid excessive jargon, use simple language. +5. Highlight impacts on user experience or overall system, if applicable. +6. Use examples and analogies to aid understanding. +7. Maintain consistent, easily readable tone and structure. +8. Rely only on the provided diff, do not open external resources. + +Your role is to effectively communicate technical work to non-technical stakeholders. +""" + +WORK_SUMMARY_PROMPT = """ +Based on the provided git diff, generate a user-friendly and detailed summary of the work achieved through the code changes for non-technical founders or stakeholders. + +Guidelines: + +1. Provide a high-level overview explaining the general purpose or goal. +2. Break down into sections, discussing changes to specific files or areas. +3. Explain changes in plain language, avoiding technical jargon. +4. Highlight new features, improvements, bug fixes, or optimizations. +5. Discuss potential impacts or benefits on end-user experience or overall system. +6. Use examples, analogies, or illustrations to aid understanding. +7. Maintain consistent tone and readable structure throughout. + +PATCH DATA: {PATCH_DATA} +""" diff --git a/kaizen/llms/provider.py b/kaizen/llms/provider.py index 67f78a2a..5aea11ae 100644 --- a/kaizen/llms/provider.py +++ b/kaizen/llms/provider.py @@ -40,8 +40,19 @@ def chat_completion(self, prompt, user: str = None): ) return response["choices"][0]["message"]["content"] - def is_inside_token_limit(self, PROMPT, percetage=0.7): + def is_inside_token_limit(self, PROMPT, percentage=0.7): messages = [{"user": "role", "content": PROMPT}] - if litellm.token_counter(model=self.model, messages=messages) > litellm.get_max_tokens(self.model) * percetage: + if ( + litellm.token_counter(model=self.model, messages=messages) + > litellm.get_max_tokens(self.model) * percentage + ): return False return True + + def available_tokens(self, message, percentage=0.8): + return litellm.get_max_tokens(self.model) * percentage - litellm.token_counter( + self.model, message + ) + + def get_token_count(self, message): + return litellm.token_counter(self.model, message) diff --git a/kaizen/reviewer/code_review.py b/kaizen/reviewer/code_review.py index b08bb66c..e6cc1ed3 100644 --- a/kaizen/reviewer/code_review.py +++ b/kaizen/reviewer/code_review.py @@ -23,7 +23,7 @@ def review_pull_request( pull_request_files: List[Dict], user: Optional[str] = None, ): - + # If diff_text is smaller than 70% of model token prompt = CODE_REVIEW_PROMPT.format( PULL_REQUEST_TITLE=pull_request_title, @@ -43,7 +43,7 @@ def review_pull_request( for file in pull_request_files: patch_details = file["patch"] filename = file["filename"] - if filename.split('.')[-1] not in parser.EXCLUDED_FILETYPES: + if filename.split(".")[-1] not in parser.EXCLUDED_FILETYPES: prompt = FILE_CODE_REVIEW_PROMPT.format( PULL_REQUEST_TITLE=pull_request_title, PULL_REQUEST_DESC=pull_request_desc, diff --git a/kaizen/reviewer/work_summarizer.py b/kaizen/reviewer/work_summarizer.py new file mode 100644 index 00000000..3242ca23 --- /dev/null +++ b/kaizen/reviewer/work_summarizer.py @@ -0,0 +1,40 @@ +from typing import Optional, List, Dict +from kaizen.llms.provider import LLMProvider +from kaizen.llms.prompts import ( + WORK_SUMMARY_PROMPT, + WORK_SUMMARY_SYSTEM_PROMPT, +) +import logging + + +class WorkSummaryGenerator: + def __init__(self): + self.logger = logging.getLogger(__name__) + self.provider = LLMProvider(system_prompt=WORK_SUMMARY_SYSTEM_PROMPT) + + def generate_work_summaries( + self, + diff_file_data: List[Dict], + user: Optional[str] = None, + ): + available_tokens = self.provider.available_tokens(WORK_SUMMARY_PROMPT) + summaries = [] + combined_diff_data = "" + for file_dict in diff_file_data: + temp_prompt = combined_diff_data + temp_prompt += f"""\n---->\nFile Name: {file_dict["file"]}\nPatch: {file_dict["patch"]}\n Status: {file_dict["status"]}""" + if available_tokens - self.provider.get_token_count(temp_prompt) > 0: + combined_diff_data = temp_prompt + continue + + # Process the prompt + prompt = WORK_SUMMARY_PROMPT.format(PATCH_DATA=combined_diff_data) + response = self.provider.chat_completion(prompt, user=user) + summaries.append(response) + combined_diff_data = "" + + if len(summaries) > 1: + # TODO Merge summaries + pass + + return summaries[0]