Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

92 add work summary reporting functionality #99

Merged
merged 3 commits into from
May 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion examples/basic/execute.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,10 @@

if __name__ == "__main__":
test_dir = ".kaizen/tests"
pytest.main(["--timeout=60", "-v", test_dir, ])
pytest.main(
[
"--timeout=60",
"-v",
test_dir,
]
)
56 changes: 56 additions & 0 deletions examples/work_summarizer/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
from kaizen.reviewer.work_summarizer import WorkSummaryGenerator
import requests
from datetime import datetime, timedelta, timezone

# GitHub repository information
GITHUB_OWNER = "Cloud-Code-AI"
GITHUB_REPO_NAME = "kaizen"

# Get the current date and calculate the date 14 days ago
current_date = datetime.now(timezone.utc).date()
since_date = current_date - timedelta(days=14)

# Convert the date to ISO format
since_date_iso = since_date.isoformat()

# GitHub API endpoint for getting commits
commits_url = f"https://api.github.com/repos/{GITHUB_OWNER}/{GITHUB_REPO_NAME}/commits"

# Add query parameters for the since date
params = {"since": since_date_iso}

# Make the API request
commits_response = requests.get(commits_url, params=params)

# Check if the request was successful
if commits_response.status_code != 200:
print("ERROR: Could not get GitHub commits")
exit(1)

commits = commits_response.json()

# Get the SHA hashes of the first and last commits
first_commit_sha = commits[0]["sha"]
last_commit_sha = commits[-1]["sha"]

headers = {"Accept": "application/vnd.github.v3+json"}

# Get the diff between the first and last commits
diff_url = f"https://api.github.com/repos/{GITHUB_OWNER}/{GITHUB_REPO_NAME}/compare/{last_commit_sha}...{first_commit_sha}"
diff_response = requests.get(diff_url, headers=headers)
diff_data = diff_response.json()

# Extract file diffs
file_diffs = []
for file_dict in diff_data["files"]:
if "patch" in file_dict:
file_diffs.append(
{
"file": file_dict["filename"],
"patch": file_dict["patch"],
"status": file_dict["status"],
}
)

work_summary_generator = WorkSummaryGenerator()
print(work_summary_generator.generate_work_summaries(diff_file_data=file_diffs))
8 changes: 4 additions & 4 deletions kaizen/helpers/output.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,14 +84,14 @@ def get_web_html(url):

for svg in soup.find_all("svg"):
svg.decompose()

# Delete each comment
for comment in soup.find_all(text=lambda text: isinstance(text, Comment)):
comment.extract()
for style_block in soup.find_all('style'):

for style_block in soup.find_all("style"):
style_block.decompose()

pretty_html = soup.prettify()
return pretty_html

Expand Down
6 changes: 1 addition & 5 deletions kaizen/helpers/parser.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,7 @@
import json
import re

EXCLUDED_FILETYPES = [
"json",
"css",
"xml"
]
EXCLUDED_FILETYPES = ["json", "css", "xml"]


def extract_json(text):
Expand Down
31 changes: 31 additions & 0 deletions kaizen/llms/prompts.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,3 +152,34 @@
URL: {URL}
```{WEB_CONTENT}```
"""

WORK_SUMMARY_SYSTEM_PROMPT = """
You are an AI assistant that explains technical code changes to non-technical audiences in a user-friendly manner. When presented with a git diff:

1. Analyze and identify key changes (features, bug fixes, optimizations, refactoring).
2. Break down into sections discussing changes to specific code areas/files.
3. Provide plain language overviews explaining purpose and goals of the changes.
4. Avoid excessive jargon, use simple language.
5. Highlight impacts on user experience or overall system, if applicable.
6. Use examples and analogies to aid understanding.
7. Maintain consistent, easily readable tone and structure.
8. Rely only on the provided diff, do not open external resources.

Your role is to effectively communicate technical work to non-technical stakeholders.
"""

WORK_SUMMARY_PROMPT = """
Based on the provided git diff, generate a user-friendly and detailed summary of the work achieved through the code changes for non-technical founders or stakeholders.

Guidelines:

1. Provide a high-level overview explaining the general purpose or goal.
2. Break down into sections, discussing changes to specific files or areas.
3. Explain changes in plain language, avoiding technical jargon.
4. Highlight new features, improvements, bug fixes, or optimizations.
5. Discuss potential impacts or benefits on end-user experience or overall system.
6. Use examples, analogies, or illustrations to aid understanding.
7. Maintain consistent tone and readable structure throughout.

PATCH DATA: {PATCH_DATA}
"""
15 changes: 13 additions & 2 deletions kaizen/llms/provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,19 @@ def chat_completion(self, prompt, user: str = None):
)
return response["choices"][0]["message"]["content"]

def is_inside_token_limit(self, PROMPT, percetage=0.7):
def is_inside_token_limit(self, PROMPT, percentage=0.7):
messages = [{"user": "role", "content": PROMPT}]
if litellm.token_counter(model=self.model, messages=messages) > litellm.get_max_tokens(self.model) * percetage:
if (
litellm.token_counter(model=self.model, messages=messages)
> litellm.get_max_tokens(self.model) * percentage
):
return False
return True

def available_tokens(self, message, percentage=0.8):
return litellm.get_max_tokens(self.model) * percentage - litellm.token_counter(
self.model, message
)

def get_token_count(self, message):
return litellm.token_counter(self.model, message)
4 changes: 2 additions & 2 deletions kaizen/reviewer/code_review.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def review_pull_request(
pull_request_files: List[Dict],
user: Optional[str] = None,
):

# If diff_text is smaller than 70% of model token
prompt = CODE_REVIEW_PROMPT.format(
PULL_REQUEST_TITLE=pull_request_title,
Expand All @@ -43,7 +43,7 @@ def review_pull_request(
for file in pull_request_files:
patch_details = file["patch"]
filename = file["filename"]
if filename.split('.')[-1] not in parser.EXCLUDED_FILETYPES:
if filename.split(".")[-1] not in parser.EXCLUDED_FILETYPES:
prompt = FILE_CODE_REVIEW_PROMPT.format(
PULL_REQUEST_TITLE=pull_request_title,
PULL_REQUEST_DESC=pull_request_desc,
Expand Down
40 changes: 40 additions & 0 deletions kaizen/reviewer/work_summarizer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
from typing import Optional, List, Dict
from kaizen.llms.provider import LLMProvider
from kaizen.llms.prompts import (
WORK_SUMMARY_PROMPT,
WORK_SUMMARY_SYSTEM_PROMPT,
)
import logging


class WorkSummaryGenerator:
def __init__(self):
self.logger = logging.getLogger(__name__)
self.provider = LLMProvider(system_prompt=WORK_SUMMARY_SYSTEM_PROMPT)

def generate_work_summaries(
self,
diff_file_data: List[Dict],
user: Optional[str] = None,
):
available_tokens = self.provider.available_tokens(WORK_SUMMARY_PROMPT)
summaries = []
combined_diff_data = ""
for file_dict in diff_file_data:
temp_prompt = combined_diff_data
temp_prompt += f"""\n---->\nFile Name: {file_dict["file"]}\nPatch: {file_dict["patch"]}\n Status: {file_dict["status"]}"""
if available_tokens - self.provider.get_token_count(temp_prompt) > 0:
combined_diff_data = temp_prompt
continue

# Process the prompt
prompt = WORK_SUMMARY_PROMPT.format(PATCH_DATA=combined_diff_data)
response = self.provider.chat_completion(prompt, user=user)
summaries.append(response)
combined_diff_data = ""

if len(summaries) > 1:
# TODO Merge summaries
pass

return summaries[0]
Loading