From 5dff392644fe999b00d3d9ce3f8d17b789786a70 Mon Sep 17 00:00:00 2001
From: Saurav Panda <sgp65@cornell.edu>
Date: Fri, 10 May 2024 09:55:24 -0400
Subject: [PATCH 1/2] feat: Added basic sample for work summary

---
 examples/work_summarizer/main.py | 65 ++++++++++++++++++++++++++++++++
 1 file changed, 65 insertions(+)
 create mode 100644 examples/work_summarizer/main.py

diff --git a/examples/work_summarizer/main.py b/examples/work_summarizer/main.py
new file mode 100644
index 00000000..d3fd53b9
--- /dev/null
+++ b/examples/work_summarizer/main.py
@@ -0,0 +1,65 @@
+# from kaizen.reviewer.work_summarizer import WorkSummarizer
+import requests
+import json
+from datetime import datetime, timedelta, timezone
+
+# Replace with the owner and repository name
+OWNER = 'Cloud-Code-AI'
+REPO_NAME = 'kaizen'
+
+# Get the current date and calculate the date 5 days ago
+today = datetime.now(timezone.utc).date()
+week_ago = today - timedelta(days=14)
+
+# Convert the dates to ISO format
+since = week_ago.isoformat()
+
+# GitHub API endpoint for getting commits
+url = f"https://api.github.com/repos/{OWNER}/{REPO_NAME}/commits"
+
+# Add query parameters for the since date
+params = {
+    'since': since
+}
+
+# Make the API request
+response = requests.get(url, params=params)
+
+# Check if the request was successful
+if response.status_code != 200:
+    # Parse the JSON data
+    print("ERROR: Couldnt get github commits")
+
+commits = response.json()
+
+# print(commits[0])
+
+# Get the SHA hashes of the two commits
+commit1_sha = commits[0]["sha"]
+commit2_sha = commits[-1]["sha"]
+
+headers = {
+    # "Authorization": f"token {access_token}",
+    "Accept": "application/vnd.github.v3+json"
+}
+# print(json.dumps(commits))
+
+# Get the diff between the two commits
+diff_url = f"https://api.github.com/repos/{OWNER}/{REPO_NAME}/compare/{commit2_sha}...{commit1_sha}"
+response = requests.get(diff_url, headers=headers)
+diff_data = response.json()
+
+print(diff_url)
+# Print the diff
+file_diffs = []
+for file in diff_data["files"]:
+    if "patch" in file:
+        file_diffs.append(
+            {
+                "file": file['filename'],
+                "patch": file["patch"],
+                "status": file["status"]
+            }
+        )
+
+print(f"Diff Files: {json.dumps(file_diffs)}")
\ No newline at end of file

From 69a8e8c7fafdc61c1e9391e21efefc47de20c1c2 Mon Sep 17 00:00:00 2001
From: Saurav Panda <sgp65@cornell.edu>
Date: Fri, 10 May 2024 11:33:44 -0400
Subject: [PATCH 2/2] feat: Added basic work summarizer

---
 examples/basic/execute.py          |  8 +++-
 examples/work_summarizer/main.py   | 73 +++++++++++++-----------------
 kaizen/helpers/output.py           |  8 ++--
 kaizen/helpers/parser.py           |  6 +--
 kaizen/llms/prompts.py             | 31 +++++++++++++
 kaizen/llms/provider.py            | 15 +++++-
 kaizen/reviewer/code_review.py     |  4 +-
 kaizen/reviewer/work_summarizer.py | 40 ++++++++++++++++
 8 files changed, 130 insertions(+), 55 deletions(-)
 create mode 100644 kaizen/reviewer/work_summarizer.py

diff --git a/examples/basic/execute.py b/examples/basic/execute.py
index dac82cbb..a24c2930 100644
--- a/examples/basic/execute.py
+++ b/examples/basic/execute.py
@@ -2,4 +2,10 @@
 
 if __name__ == "__main__":
     test_dir = ".kaizen/tests"
-    pytest.main(["--timeout=60", "-v", test_dir, ])
+    pytest.main(
+        [
+            "--timeout=60",
+            "-v",
+            test_dir,
+        ]
+    )
diff --git a/examples/work_summarizer/main.py b/examples/work_summarizer/main.py
index d3fd53b9..9def25d2 100644
--- a/examples/work_summarizer/main.py
+++ b/examples/work_summarizer/main.py
@@ -1,65 +1,56 @@
-# from kaizen.reviewer.work_summarizer import WorkSummarizer
+from kaizen.reviewer.work_summarizer import WorkSummaryGenerator
 import requests
-import json
 from datetime import datetime, timedelta, timezone
 
-# Replace with the owner and repository name
-OWNER = 'Cloud-Code-AI'
-REPO_NAME = 'kaizen'
+# GitHub repository information
+GITHUB_OWNER = "Cloud-Code-AI"
+GITHUB_REPO_NAME = "kaizen"
 
-# Get the current date and calculate the date 5 days ago
-today = datetime.now(timezone.utc).date()
-week_ago = today - timedelta(days=14)
+# Get the current date and calculate the date 14 days ago
+current_date = datetime.now(timezone.utc).date()
+since_date = current_date - timedelta(days=14)
 
-# Convert the dates to ISO format
-since = week_ago.isoformat()
+# Convert the date to ISO format
+since_date_iso = since_date.isoformat()
 
 # GitHub API endpoint for getting commits
-url = f"https://api.github.com/repos/{OWNER}/{REPO_NAME}/commits"
+commits_url = f"https://api.github.com/repos/{GITHUB_OWNER}/{GITHUB_REPO_NAME}/commits"
 
 # Add query parameters for the since date
-params = {
-    'since': since
-}
+params = {"since": since_date_iso}
 
 # Make the API request
-response = requests.get(url, params=params)
+commits_response = requests.get(commits_url, params=params)
 
 # Check if the request was successful
-if response.status_code != 200:
-    # Parse the JSON data
-    print("ERROR: Couldnt get github commits")
+if commits_response.status_code != 200:
+    print("ERROR: Could not get GitHub commits")
+    exit(1)
 
-commits = response.json()
+commits = commits_response.json()
 
-# print(commits[0])
+# Get the SHA hashes of the first and last commits
+first_commit_sha = commits[0]["sha"]
+last_commit_sha = commits[-1]["sha"]
 
-# Get the SHA hashes of the two commits
-commit1_sha = commits[0]["sha"]
-commit2_sha = commits[-1]["sha"]
+headers = {"Accept": "application/vnd.github.v3+json"}
 
-headers = {
-    # "Authorization": f"token {access_token}",
-    "Accept": "application/vnd.github.v3+json"
-}
-# print(json.dumps(commits))
+# Get the diff between the first and last commits
+diff_url = f"https://api.github.com/repos/{GITHUB_OWNER}/{GITHUB_REPO_NAME}/compare/{last_commit_sha}...{first_commit_sha}"
+diff_response = requests.get(diff_url, headers=headers)
+diff_data = diff_response.json()
 
-# Get the diff between the two commits
-diff_url = f"https://api.github.com/repos/{OWNER}/{REPO_NAME}/compare/{commit2_sha}...{commit1_sha}"
-response = requests.get(diff_url, headers=headers)
-diff_data = response.json()
-
-print(diff_url)
-# Print the diff
+# Extract file diffs
 file_diffs = []
-for file in diff_data["files"]:
-    if "patch" in file:
+for file_dict in diff_data["files"]:
+    if "patch" in file_dict:
         file_diffs.append(
             {
-                "file": file['filename'],
-                "patch": file["patch"],
-                "status": file["status"]
+                "file": file_dict["filename"],
+                "patch": file_dict["patch"],
+                "status": file_dict["status"],
             }
         )
 
-print(f"Diff Files: {json.dumps(file_diffs)}")
\ No newline at end of file
+work_summary_generator = WorkSummaryGenerator()
+print(work_summary_generator.generate_work_summaries(diff_file_data=file_diffs))
diff --git a/kaizen/helpers/output.py b/kaizen/helpers/output.py
index 37b5157f..122f52f9 100644
--- a/kaizen/helpers/output.py
+++ b/kaizen/helpers/output.py
@@ -82,14 +82,14 @@ def get_web_html(url):
 
     for svg in soup.find_all("svg"):
         svg.decompose()
-    
+
     # Delete each comment
     for comment in soup.find_all(text=lambda text: isinstance(text, Comment)):
         comment.extract()
-    
-    for style_block in soup.find_all('style'):
+
+    for style_block in soup.find_all("style"):
         style_block.decompose()
-    
+
     pretty_html = soup.prettify()
     return pretty_html
 
diff --git a/kaizen/helpers/parser.py b/kaizen/helpers/parser.py
index 7469fbbb..01377c5d 100644
--- a/kaizen/helpers/parser.py
+++ b/kaizen/helpers/parser.py
@@ -1,11 +1,7 @@
 import json
 import re
 
-EXCLUDED_FILETYPES = [
-    "json",
-    "css",
-    "xml"
-]
+EXCLUDED_FILETYPES = ["json", "css", "xml"]
 
 
 def extract_json(text):
diff --git a/kaizen/llms/prompts.py b/kaizen/llms/prompts.py
index bc61c42e..e2db71e9 100644
--- a/kaizen/llms/prompts.py
+++ b/kaizen/llms/prompts.py
@@ -152,3 +152,34 @@
 URL: {URL}
 ```{WEB_CONTENT}```
 """
+
+WORK_SUMMARY_SYSTEM_PROMPT = """
+You are an AI assistant that explains technical code changes to non-technical audiences in a user-friendly manner. When presented with a git diff:
+
+1. Analyze and identify key changes (features, bug fixes, optimizations, refactoring).
+2. Break down into sections discussing changes to specific code areas/files. 
+3. Provide plain language overviews explaining purpose and goals of the changes.
+4. Avoid excessive jargon, use simple language.
+5. Highlight impacts on user experience or overall system, if applicable.
+6. Use examples and analogies to aid understanding.
+7. Maintain consistent, easily readable tone and structure.
+8. Rely only on the provided diff, do not open external resources.
+
+Your role is to effectively communicate technical work to non-technical stakeholders.
+"""
+
+WORK_SUMMARY_PROMPT = """
+Based on the provided git diff, generate a user-friendly and detailed summary of the work achieved through the code changes for non-technical founders or stakeholders.
+
+Guidelines:
+
+1. Provide a high-level overview explaining the general purpose or goal.
+2. Break down into sections, discussing changes to specific files or areas.
+3. Explain changes in plain language, avoiding technical jargon.
+4. Highlight new features, improvements, bug fixes, or optimizations.
+5. Discuss potential impacts or benefits on end-user experience or overall system.
+6. Use examples, analogies, or illustrations to aid understanding.
+7. Maintain consistent tone and readable structure throughout.
+
+PATCH DATA: {PATCH_DATA}
+"""
diff --git a/kaizen/llms/provider.py b/kaizen/llms/provider.py
index 67f78a2a..5aea11ae 100644
--- a/kaizen/llms/provider.py
+++ b/kaizen/llms/provider.py
@@ -40,8 +40,19 @@ def chat_completion(self, prompt, user: str = None):
         )
         return response["choices"][0]["message"]["content"]
 
-    def is_inside_token_limit(self, PROMPT, percetage=0.7):
+    def is_inside_token_limit(self, PROMPT, percentage=0.7):
         messages = [{"user": "role", "content": PROMPT}]
-        if litellm.token_counter(model=self.model, messages=messages) > litellm.get_max_tokens(self.model) * percetage:
+        if (
+            litellm.token_counter(model=self.model, messages=messages)
+            > litellm.get_max_tokens(self.model) * percentage
+        ):
             return False
         return True
+
+    def available_tokens(self, message, percentage=0.8):
+        return litellm.get_max_tokens(self.model) * percentage - litellm.token_counter(
+            self.model, message
+        )
+
+    def get_token_count(self, message):
+        return litellm.token_counter(self.model, message)
diff --git a/kaizen/reviewer/code_review.py b/kaizen/reviewer/code_review.py
index b08bb66c..e6cc1ed3 100644
--- a/kaizen/reviewer/code_review.py
+++ b/kaizen/reviewer/code_review.py
@@ -23,7 +23,7 @@ def review_pull_request(
         pull_request_files: List[Dict],
         user: Optional[str] = None,
     ):
-        
+
         # If diff_text is smaller than 70% of model token
         prompt = CODE_REVIEW_PROMPT.format(
             PULL_REQUEST_TITLE=pull_request_title,
@@ -43,7 +43,7 @@ def review_pull_request(
             for file in pull_request_files:
                 patch_details = file["patch"]
                 filename = file["filename"]
-                if filename.split('.')[-1] not in parser.EXCLUDED_FILETYPES:
+                if filename.split(".")[-1] not in parser.EXCLUDED_FILETYPES:
                     prompt = FILE_CODE_REVIEW_PROMPT.format(
                         PULL_REQUEST_TITLE=pull_request_title,
                         PULL_REQUEST_DESC=pull_request_desc,
diff --git a/kaizen/reviewer/work_summarizer.py b/kaizen/reviewer/work_summarizer.py
new file mode 100644
index 00000000..3242ca23
--- /dev/null
+++ b/kaizen/reviewer/work_summarizer.py
@@ -0,0 +1,40 @@
+from typing import Optional, List, Dict
+from kaizen.llms.provider import LLMProvider
+from kaizen.llms.prompts import (
+    WORK_SUMMARY_PROMPT,
+    WORK_SUMMARY_SYSTEM_PROMPT,
+)
+import logging
+
+
+class WorkSummaryGenerator:
+    def __init__(self):
+        self.logger = logging.getLogger(__name__)
+        self.provider = LLMProvider(system_prompt=WORK_SUMMARY_SYSTEM_PROMPT)
+
+    def generate_work_summaries(
+        self,
+        diff_file_data: List[Dict],
+        user: Optional[str] = None,
+    ):
+        available_tokens = self.provider.available_tokens(WORK_SUMMARY_PROMPT)
+        summaries = []
+        combined_diff_data = ""
+        for file_dict in diff_file_data:
+            temp_prompt = combined_diff_data
+            temp_prompt += f"""\n---->\nFile Name: {file_dict["file"]}\nPatch: {file_dict["patch"]}\n Status: {file_dict["status"]}"""
+            if available_tokens - self.provider.get_token_count(temp_prompt) > 0:
+                combined_diff_data = temp_prompt
+                continue
+
+            # Process the prompt
+            prompt = WORK_SUMMARY_PROMPT.format(PATCH_DATA=combined_diff_data)
+            response = self.provider.chat_completion(prompt, user=user)
+            summaries.append(response)
+            combined_diff_data = ""
+
+        if len(summaries) > 1:
+            # TODO Merge summaries
+            pass
+
+        return summaries[0]