From 5dff392644fe999b00d3d9ce3f8d17b789786a70 Mon Sep 17 00:00:00 2001 From: Saurav Panda Date: Fri, 10 May 2024 09:55:24 -0400 Subject: [PATCH 1/2] feat: Added basic sample for work summary --- examples/work_summarizer/main.py | 65 ++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) create mode 100644 examples/work_summarizer/main.py diff --git a/examples/work_summarizer/main.py b/examples/work_summarizer/main.py new file mode 100644 index 00000000..d3fd53b9 --- /dev/null +++ b/examples/work_summarizer/main.py @@ -0,0 +1,65 @@ +# from kaizen.reviewer.work_summarizer import WorkSummarizer +import requests +import json +from datetime import datetime, timedelta, timezone + +# Replace with the owner and repository name +OWNER = 'Cloud-Code-AI' +REPO_NAME = 'kaizen' + +# Get the current date and calculate the date 5 days ago +today = datetime.now(timezone.utc).date() +week_ago = today - timedelta(days=14) + +# Convert the dates to ISO format +since = week_ago.isoformat() + +# GitHub API endpoint for getting commits +url = f"https://api.github.com/repos/{OWNER}/{REPO_NAME}/commits" + +# Add query parameters for the since date +params = { + 'since': since +} + +# Make the API request +response = requests.get(url, params=params) + +# Check if the request was successful +if response.status_code != 200: + # Parse the JSON data + print("ERROR: Couldnt get github commits") + +commits = response.json() + +# print(commits[0]) + +# Get the SHA hashes of the two commits +commit1_sha = commits[0]["sha"] +commit2_sha = commits[-1]["sha"] + +headers = { + # "Authorization": f"token {access_token}", + "Accept": "application/vnd.github.v3+json" +} +# print(json.dumps(commits)) + +# Get the diff between the two commits +diff_url = f"https://api.github.com/repos/{OWNER}/{REPO_NAME}/compare/{commit2_sha}...{commit1_sha}" +response = requests.get(diff_url, headers=headers) +diff_data = response.json() + +print(diff_url) +# Print the diff +file_diffs = [] +for file in diff_data["files"]: + if "patch" in file: + file_diffs.append( + { + "file": file['filename'], + "patch": file["patch"], + "status": file["status"] + } + ) + +print(f"Diff Files: {json.dumps(file_diffs)}") \ No newline at end of file From 69a8e8c7fafdc61c1e9391e21efefc47de20c1c2 Mon Sep 17 00:00:00 2001 From: Saurav Panda Date: Fri, 10 May 2024 11:33:44 -0400 Subject: [PATCH 2/2] feat: Added basic work summarizer --- examples/basic/execute.py | 8 +++- examples/work_summarizer/main.py | 73 +++++++++++++----------------- kaizen/helpers/output.py | 8 ++-- kaizen/helpers/parser.py | 6 +-- kaizen/llms/prompts.py | 31 +++++++++++++ kaizen/llms/provider.py | 15 +++++- kaizen/reviewer/code_review.py | 4 +- kaizen/reviewer/work_summarizer.py | 40 ++++++++++++++++ 8 files changed, 130 insertions(+), 55 deletions(-) create mode 100644 kaizen/reviewer/work_summarizer.py diff --git a/examples/basic/execute.py b/examples/basic/execute.py index dac82cbb..a24c2930 100644 --- a/examples/basic/execute.py +++ b/examples/basic/execute.py @@ -2,4 +2,10 @@ if __name__ == "__main__": test_dir = ".kaizen/tests" - pytest.main(["--timeout=60", "-v", test_dir, ]) + pytest.main( + [ + "--timeout=60", + "-v", + test_dir, + ] + ) diff --git a/examples/work_summarizer/main.py b/examples/work_summarizer/main.py index d3fd53b9..9def25d2 100644 --- a/examples/work_summarizer/main.py +++ b/examples/work_summarizer/main.py @@ -1,65 +1,56 @@ -# from kaizen.reviewer.work_summarizer import WorkSummarizer +from kaizen.reviewer.work_summarizer import WorkSummaryGenerator import requests -import json from datetime import datetime, timedelta, timezone -# Replace with the owner and repository name -OWNER = 'Cloud-Code-AI' -REPO_NAME = 'kaizen' +# GitHub repository information +GITHUB_OWNER = "Cloud-Code-AI" +GITHUB_REPO_NAME = "kaizen" -# Get the current date and calculate the date 5 days ago -today = datetime.now(timezone.utc).date() -week_ago = today - timedelta(days=14) +# Get the current date and calculate the date 14 days ago +current_date = datetime.now(timezone.utc).date() +since_date = current_date - timedelta(days=14) -# Convert the dates to ISO format -since = week_ago.isoformat() +# Convert the date to ISO format +since_date_iso = since_date.isoformat() # GitHub API endpoint for getting commits -url = f"https://api.github.com/repos/{OWNER}/{REPO_NAME}/commits" +commits_url = f"https://api.github.com/repos/{GITHUB_OWNER}/{GITHUB_REPO_NAME}/commits" # Add query parameters for the since date -params = { - 'since': since -} +params = {"since": since_date_iso} # Make the API request -response = requests.get(url, params=params) +commits_response = requests.get(commits_url, params=params) # Check if the request was successful -if response.status_code != 200: - # Parse the JSON data - print("ERROR: Couldnt get github commits") +if commits_response.status_code != 200: + print("ERROR: Could not get GitHub commits") + exit(1) -commits = response.json() +commits = commits_response.json() -# print(commits[0]) +# Get the SHA hashes of the first and last commits +first_commit_sha = commits[0]["sha"] +last_commit_sha = commits[-1]["sha"] -# Get the SHA hashes of the two commits -commit1_sha = commits[0]["sha"] -commit2_sha = commits[-1]["sha"] +headers = {"Accept": "application/vnd.github.v3+json"} -headers = { - # "Authorization": f"token {access_token}", - "Accept": "application/vnd.github.v3+json" -} -# print(json.dumps(commits)) +# Get the diff between the first and last commits +diff_url = f"https://api.github.com/repos/{GITHUB_OWNER}/{GITHUB_REPO_NAME}/compare/{last_commit_sha}...{first_commit_sha}" +diff_response = requests.get(diff_url, headers=headers) +diff_data = diff_response.json() -# Get the diff between the two commits -diff_url = f"https://api.github.com/repos/{OWNER}/{REPO_NAME}/compare/{commit2_sha}...{commit1_sha}" -response = requests.get(diff_url, headers=headers) -diff_data = response.json() - -print(diff_url) -# Print the diff +# Extract file diffs file_diffs = [] -for file in diff_data["files"]: - if "patch" in file: +for file_dict in diff_data["files"]: + if "patch" in file_dict: file_diffs.append( { - "file": file['filename'], - "patch": file["patch"], - "status": file["status"] + "file": file_dict["filename"], + "patch": file_dict["patch"], + "status": file_dict["status"], } ) -print(f"Diff Files: {json.dumps(file_diffs)}") \ No newline at end of file +work_summary_generator = WorkSummaryGenerator() +print(work_summary_generator.generate_work_summaries(diff_file_data=file_diffs)) diff --git a/kaizen/helpers/output.py b/kaizen/helpers/output.py index 37b5157f..122f52f9 100644 --- a/kaizen/helpers/output.py +++ b/kaizen/helpers/output.py @@ -82,14 +82,14 @@ def get_web_html(url): for svg in soup.find_all("svg"): svg.decompose() - + # Delete each comment for comment in soup.find_all(text=lambda text: isinstance(text, Comment)): comment.extract() - - for style_block in soup.find_all('style'): + + for style_block in soup.find_all("style"): style_block.decompose() - + pretty_html = soup.prettify() return pretty_html diff --git a/kaizen/helpers/parser.py b/kaizen/helpers/parser.py index 7469fbbb..01377c5d 100644 --- a/kaizen/helpers/parser.py +++ b/kaizen/helpers/parser.py @@ -1,11 +1,7 @@ import json import re -EXCLUDED_FILETYPES = [ - "json", - "css", - "xml" -] +EXCLUDED_FILETYPES = ["json", "css", "xml"] def extract_json(text): diff --git a/kaizen/llms/prompts.py b/kaizen/llms/prompts.py index bc61c42e..e2db71e9 100644 --- a/kaizen/llms/prompts.py +++ b/kaizen/llms/prompts.py @@ -152,3 +152,34 @@ URL: {URL} ```{WEB_CONTENT}``` """ + +WORK_SUMMARY_SYSTEM_PROMPT = """ +You are an AI assistant that explains technical code changes to non-technical audiences in a user-friendly manner. When presented with a git diff: + +1. Analyze and identify key changes (features, bug fixes, optimizations, refactoring). +2. Break down into sections discussing changes to specific code areas/files. +3. Provide plain language overviews explaining purpose and goals of the changes. +4. Avoid excessive jargon, use simple language. +5. Highlight impacts on user experience or overall system, if applicable. +6. Use examples and analogies to aid understanding. +7. Maintain consistent, easily readable tone and structure. +8. Rely only on the provided diff, do not open external resources. + +Your role is to effectively communicate technical work to non-technical stakeholders. +""" + +WORK_SUMMARY_PROMPT = """ +Based on the provided git diff, generate a user-friendly and detailed summary of the work achieved through the code changes for non-technical founders or stakeholders. + +Guidelines: + +1. Provide a high-level overview explaining the general purpose or goal. +2. Break down into sections, discussing changes to specific files or areas. +3. Explain changes in plain language, avoiding technical jargon. +4. Highlight new features, improvements, bug fixes, or optimizations. +5. Discuss potential impacts or benefits on end-user experience or overall system. +6. Use examples, analogies, or illustrations to aid understanding. +7. Maintain consistent tone and readable structure throughout. + +PATCH DATA: {PATCH_DATA} +""" diff --git a/kaizen/llms/provider.py b/kaizen/llms/provider.py index 67f78a2a..5aea11ae 100644 --- a/kaizen/llms/provider.py +++ b/kaizen/llms/provider.py @@ -40,8 +40,19 @@ def chat_completion(self, prompt, user: str = None): ) return response["choices"][0]["message"]["content"] - def is_inside_token_limit(self, PROMPT, percetage=0.7): + def is_inside_token_limit(self, PROMPT, percentage=0.7): messages = [{"user": "role", "content": PROMPT}] - if litellm.token_counter(model=self.model, messages=messages) > litellm.get_max_tokens(self.model) * percetage: + if ( + litellm.token_counter(model=self.model, messages=messages) + > litellm.get_max_tokens(self.model) * percentage + ): return False return True + + def available_tokens(self, message, percentage=0.8): + return litellm.get_max_tokens(self.model) * percentage - litellm.token_counter( + self.model, message + ) + + def get_token_count(self, message): + return litellm.token_counter(self.model, message) diff --git a/kaizen/reviewer/code_review.py b/kaizen/reviewer/code_review.py index b08bb66c..e6cc1ed3 100644 --- a/kaizen/reviewer/code_review.py +++ b/kaizen/reviewer/code_review.py @@ -23,7 +23,7 @@ def review_pull_request( pull_request_files: List[Dict], user: Optional[str] = None, ): - + # If diff_text is smaller than 70% of model token prompt = CODE_REVIEW_PROMPT.format( PULL_REQUEST_TITLE=pull_request_title, @@ -43,7 +43,7 @@ def review_pull_request( for file in pull_request_files: patch_details = file["patch"] filename = file["filename"] - if filename.split('.')[-1] not in parser.EXCLUDED_FILETYPES: + if filename.split(".")[-1] not in parser.EXCLUDED_FILETYPES: prompt = FILE_CODE_REVIEW_PROMPT.format( PULL_REQUEST_TITLE=pull_request_title, PULL_REQUEST_DESC=pull_request_desc, diff --git a/kaizen/reviewer/work_summarizer.py b/kaizen/reviewer/work_summarizer.py new file mode 100644 index 00000000..3242ca23 --- /dev/null +++ b/kaizen/reviewer/work_summarizer.py @@ -0,0 +1,40 @@ +from typing import Optional, List, Dict +from kaizen.llms.provider import LLMProvider +from kaizen.llms.prompts import ( + WORK_SUMMARY_PROMPT, + WORK_SUMMARY_SYSTEM_PROMPT, +) +import logging + + +class WorkSummaryGenerator: + def __init__(self): + self.logger = logging.getLogger(__name__) + self.provider = LLMProvider(system_prompt=WORK_SUMMARY_SYSTEM_PROMPT) + + def generate_work_summaries( + self, + diff_file_data: List[Dict], + user: Optional[str] = None, + ): + available_tokens = self.provider.available_tokens(WORK_SUMMARY_PROMPT) + summaries = [] + combined_diff_data = "" + for file_dict in diff_file_data: + temp_prompt = combined_diff_data + temp_prompt += f"""\n---->\nFile Name: {file_dict["file"]}\nPatch: {file_dict["patch"]}\n Status: {file_dict["status"]}""" + if available_tokens - self.provider.get_token_count(temp_prompt) > 0: + combined_diff_data = temp_prompt + continue + + # Process the prompt + prompt = WORK_SUMMARY_PROMPT.format(PATCH_DATA=combined_diff_data) + response = self.provider.chat_completion(prompt, user=user) + summaries.append(response) + combined_diff_data = "" + + if len(summaries) > 1: + # TODO Merge summaries + pass + + return summaries[0]