Cloud-Code-AI · sauravpanda · May 10, 2024 · May 10, 2024 · May 10, 2024 · May 10, 2024
diff --git a/examples/basic/execute.py b/examples/basic/execute.py
@@ -2,4 +2,10 @@
 
 if __name__ == "__main__":
     test_dir = ".kaizen/tests"
-    pytest.main(["--timeout=60", "-v", test_dir, ])
+    pytest.main(
+        [
+            "--timeout=60",
+            "-v",
+            test_dir,
+        ]
+    )
diff --git a/examples/work_summarizer/main.py b/examples/work_summarizer/main.py
@@ -0,0 +1,56 @@
+from kaizen.reviewer.work_summarizer import WorkSummaryGenerator
+import requests
+from datetime import datetime, timedelta, timezone
+
+# GitHub repository information
+GITHUB_OWNER = "Cloud-Code-AI"
+GITHUB_REPO_NAME = "kaizen"
+
+# Get the current date and calculate the date 14 days ago
+current_date = datetime.now(timezone.utc).date()
+since_date = current_date - timedelta(days=14)
+
+# Convert the date to ISO format
+since_date_iso = since_date.isoformat()
+
+# GitHub API endpoint for getting commits
+commits_url = f"https://api.github.com/repos/{GITHUB_OWNER}/{GITHUB_REPO_NAME}/commits"
+
+# Add query parameters for the since date
+params = {"since": since_date_iso}
+
+# Make the API request
+commits_response = requests.get(commits_url, params=params)
+
+# Check if the request was successful
+if commits_response.status_code != 200:
+    print("ERROR: Could not get GitHub commits")
+    exit(1)
+
+commits = commits_response.json()
+
+# Get the SHA hashes of the first and last commits
+first_commit_sha = commits[0]["sha"]
+last_commit_sha = commits[-1]["sha"]
+
+headers = {"Accept": "application/vnd.github.v3+json"}
+
+# Get the diff between the first and last commits
+diff_url = f"https://api.github.com/repos/{GITHUB_OWNER}/{GITHUB_REPO_NAME}/compare/{last_commit_sha}...{first_commit_sha}"
+diff_response = requests.get(diff_url, headers=headers)
+diff_data = diff_response.json()
+
+# Extract file diffs
+file_diffs = []
+for file_dict in diff_data["files"]:
+    if "patch" in file_dict:
+        file_diffs.append(
+            {
+                "file": file_dict["filename"],
+                "patch": file_dict["patch"],
+                "status": file_dict["status"],
+            }
+        )
+
+work_summary_generator = WorkSummaryGenerator()
+print(work_summary_generator.generate_work_summaries(diff_file_data=file_diffs))
diff --git a/kaizen/helpers/output.py b/kaizen/helpers/output.py
@@ -84,14 +84,14 @@ def get_web_html(url):
 
     for svg in soup.find_all("svg"):
         svg.decompose()
-    
+
     # Delete each comment
     for comment in soup.find_all(text=lambda text: isinstance(text, Comment)):
         comment.extract()
-    
-    for style_block in soup.find_all('style'):
+
+    for style_block in soup.find_all("style"):
         style_block.decompose()
-    
+
     pretty_html = soup.prettify()
     return pretty_html
 

diff --git a/kaizen/helpers/parser.py b/kaizen/helpers/parser.py
@@ -1,11 +1,7 @@
 import json
 import re
 
-EXCLUDED_FILETYPES = [
-    "json",
-    "css",
-    "xml"
-]
+EXCLUDED_FILETYPES = ["json", "css", "xml"]
 
 
 def extract_json(text):

diff --git a/kaizen/llms/prompts.py b/kaizen/llms/prompts.py
@@ -152,3 +152,34 @@
 URL: {URL}
 ```{WEB_CONTENT}```
 """
+
+WORK_SUMMARY_SYSTEM_PROMPT = """
+You are an AI assistant that explains technical code changes to non-technical audiences in a user-friendly manner. When presented with a git diff:
+
+1. Analyze and identify key changes (features, bug fixes, optimizations, refactoring).
+2. Break down into sections discussing changes to specific code areas/files. 
+3. Provide plain language overviews explaining purpose and goals of the changes.
+4. Avoid excessive jargon, use simple language.
+5. Highlight impacts on user experience or overall system, if applicable.
+6. Use examples and analogies to aid understanding.
+7. Maintain consistent, easily readable tone and structure.
+8. Rely only on the provided diff, do not open external resources.
+
+Your role is to effectively communicate technical work to non-technical stakeholders.
+"""
+
+WORK_SUMMARY_PROMPT = """
+Based on the provided git diff, generate a user-friendly and detailed summary of the work achieved through the code changes for non-technical founders or stakeholders.
+
+Guidelines:
+
+1. Provide a high-level overview explaining the general purpose or goal.
+2. Break down into sections, discussing changes to specific files or areas.
+3. Explain changes in plain language, avoiding technical jargon.
+4. Highlight new features, improvements, bug fixes, or optimizations.
+5. Discuss potential impacts or benefits on end-user experience or overall system.
+6. Use examples, analogies, or illustrations to aid understanding.
+7. Maintain consistent tone and readable structure throughout.
+
+PATCH DATA: {PATCH_DATA}
+"""
diff --git a/kaizen/llms/provider.py b/kaizen/llms/provider.py
@@ -40,8 +40,19 @@ def chat_completion(self, prompt, user: str = None):
         )
         return response["choices"][0]["message"]["content"]
 
-    def is_inside_token_limit(self, PROMPT, percetage=0.7):
+    def is_inside_token_limit(self, PROMPT, percentage=0.7):
         messages = [{"user": "role", "content": PROMPT}]
-        if litellm.token_counter(model=self.model, messages=messages) > litellm.get_max_tokens(self.model) * percetage:
+        if (
+            litellm.token_counter(model=self.model, messages=messages)
+            > litellm.get_max_tokens(self.model) * percentage
+        ):
             return False
         return True
+
+    def available_tokens(self, message, percentage=0.8):
+        return litellm.get_max_tokens(self.model) * percentage - litellm.token_counter(
+            self.model, message
+        )
+
+    def get_token_count(self, message):
+        return litellm.token_counter(self.model, message)
diff --git a/kaizen/reviewer/code_review.py b/kaizen/reviewer/code_review.py
@@ -23,7 +23,7 @@ def review_pull_request(
         pull_request_files: List[Dict],
         user: Optional[str] = None,
     ):
-        
+
         # If diff_text is smaller than 70% of model token
         prompt = CODE_REVIEW_PROMPT.format(
             PULL_REQUEST_TITLE=pull_request_title,
@@ -43,7 +43,7 @@ def review_pull_request(
             for file in pull_request_files:
                 patch_details = file["patch"]
                 filename = file["filename"]
-                if filename.split('.')[-1] not in parser.EXCLUDED_FILETYPES:
+                if filename.split(".")[-1] not in parser.EXCLUDED_FILETYPES:
                     prompt = FILE_CODE_REVIEW_PROMPT.format(
                         PULL_REQUEST_TITLE=pull_request_title,
                         PULL_REQUEST_DESC=pull_request_desc,

diff --git a/kaizen/reviewer/work_summarizer.py b/kaizen/reviewer/work_summarizer.py
@@ -0,0 +1,40 @@
+from typing import Optional, List, Dict
+from kaizen.llms.provider import LLMProvider
+from kaizen.llms.prompts import (
+    WORK_SUMMARY_PROMPT,
+    WORK_SUMMARY_SYSTEM_PROMPT,
+)
+import logging
+
+
+class WorkSummaryGenerator:
+    def __init__(self):
+        self.logger = logging.getLogger(__name__)
+        self.provider = LLMProvider(system_prompt=WORK_SUMMARY_SYSTEM_PROMPT)
+
+    def generate_work_summaries(
+        self,
+        diff_file_data: List[Dict],
+        user: Optional[str] = None,
+    ):
+        available_tokens = self.provider.available_tokens(WORK_SUMMARY_PROMPT)
+        summaries = []
+        combined_diff_data = ""
+        for file_dict in diff_file_data:
+            temp_prompt = combined_diff_data
+            temp_prompt += f"""\n---->\nFile Name: {file_dict["file"]}\nPatch: {file_dict["patch"]}\n Status: {file_dict["status"]}"""
+            if available_tokens - self.provider.get_token_count(temp_prompt) > 0:
+                combined_diff_data = temp_prompt
+                continue
+
+            # Process the prompt
+            prompt = WORK_SUMMARY_PROMPT.format(PATCH_DATA=combined_diff_data)
+            response = self.provider.chat_completion(prompt, user=user)
+            summaries.append(response)
+            combined_diff_data = ""
+
+        if len(summaries) > 1:
+            # TODO Merge summaries
+            pass
+
+        return summaries[0]