From cb6844fc1331df18f70a217c275bf64467aa9811 Mon Sep 17 00:00:00 2001 From: Nick Sullivan Date: Sun, 23 Jul 2023 11:52:49 -0700 Subject: [PATCH] =?UTF-8?q?Enhance=20language=20identification=20and=20con?= =?UTF-8?q?text=20generation=20=F0=9F=A7=A0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Incorporated language identification in the commit and sidekick functions of cli.py and updated the expert software engineer prompt in prompts.py to reflect multiple languages. This change allows for a more context-aware and language-specific code analysis and commit message generation. 🌐 --- aicodebot/cli.py | 14 +++++++++----- aicodebot/prompts.py | 23 ++++++++++++----------- 2 files changed, 21 insertions(+), 16 deletions(-) diff --git a/aicodebot/cli.py b/aicodebot/cli.py index 994728d..e9ccf3f 100644 --- a/aicodebot/cli.py +++ b/aicodebot/cli.py @@ -120,6 +120,7 @@ def commit(verbose, response_token_size, yes, skip_pre_commit, files): # noqa: files = staged_files diff_context = Coder.git_diff_context() + languages = ",".join(Coder.identify_languages(files)) if not diff_context: console.print("No changes to commit. 🤷") return @@ -166,7 +167,7 @@ def commit(verbose, response_token_size, yes, skip_pre_commit, files): # noqa: # Set up the chain chain = LLMChain(llm=llm, prompt=prompt, verbose=verbose) - response = chain.run(diff_context) + response = chain.run({"diff_context": diff_context, "languages": languages}) commit_message_approved = click.confirm( "Do you want to use this commit message (type n to edit)?", default=True @@ -333,7 +334,7 @@ def debug(command, verbose): # Set up the chain chain = LLMChain(llm=llm, prompt=prompt, verbose=verbose) - chain.run(error_output) + chain.run({"error_output": error_output, "languages": ["unix", "bash", "shell"]}) sys.exit(process.returncode) @@ -421,6 +422,7 @@ def review(commit, verbose, output_format, response_token_size, files): if not diff_context: console.print("No changes detected for review. 🤷") return + languages = ",".join(Coder.identify_languages(files)) # Load the prompt prompt = get_prompt("review", structured_output=output_format == "json") @@ -437,7 +439,7 @@ def review(commit, verbose, output_format, response_token_size, files): if output_format == "json": with console.status("Examining the diff and generating the review", spinner=DEFAULT_SPINNER): - response = chain.run(diff_context) + response = chain.run({"diff_context": diff_context, "languages": languages}) parsed_response = prompt.output_parser.parse(response) data = { @@ -458,7 +460,7 @@ def review(commit, verbose, output_format, response_token_size, files): llm.streaming = True llm.callbacks = [RichLiveCallbackHandler(live, bot_style)] - chain.run(diff_context) + chain.run({"diff_context": diff_context, "languages": languages}) @cli.command @@ -481,6 +483,7 @@ def sidekick(request, verbose, response_token_size, files): # noqa: PLR0915 # Style guides/reference code # git history context = generate_files_context(files) + languages = ",".join(Coder.identify_languages(files)) def show_file_context(files): console.print("Files loaded in this session:") @@ -552,6 +555,7 @@ def show_file_context(files): console.print(f"✅ Dropped '{filename}' from the list of files.") context = generate_files_context(files) + languages = ",".join(Coder.identify_languages(files)) show_file_context(files) continue @@ -577,7 +581,7 @@ def show_file_context(files): callback = RichLiveCallbackHandler(live, bot_style) llm.callbacks = [callback] # a fresh callback handler for each question - chain.run({"task": human_input, "context": context}) + chain.run({"task": human_input, "context": context, "languages": languages}) if request: # If we were given a request, then we only want to run once diff --git a/aicodebot/prompts.py b/aicodebot/prompts.py index 38ce00a..3e6984f 100644 --- a/aicodebot/prompts.py +++ b/aicodebot/prompts.py @@ -188,9 +188,8 @@ def get_personality_prompt(): EXPERT_SOFTWARE_ENGINEER = """ You are an expert software engineer, versed in many programming languages, -especially Python. You follow software development best practices and you know how to +especially {languages}. You follow software development best practices and you know how to write clean, maintainable code. You are a champion for code quality. -You are terse and to the point. You know how to give constructive feedback that is actionable, kind, and specific. """ @@ -276,14 +275,15 @@ def generate_files_context(files): END DIFF Instructions for the commit message: - * Start with a short summary (<72 characters). + * Start with a short summary (less than 72 characters). * Follow with a blank line and detailed text, but only if necessary. If the summary is sufficient, then omit the detailed text. * Use imperative mood (e.g., "Add feature"). * Be in GitHub-flavored markdown format. - * Include contextually appropriate emojis (optional), but don't over do it. * Have a length that scales with the length of the diff context. If the DIFF is a small change, respond quickly with a terse message so we can go faster. + * Do not repeat information that is already known from the git commit. + * Be terse. BEGIN SAMPLE COMMIT MESSAGE Update README with better instructions for installation @@ -293,8 +293,9 @@ def generate_files_context(files): new users get started faster. END SAMPLE COMMIT MESSAGE + Formatting instructions: Start your response with the commit message. No prefix or introduction. - Your entire response will be the commit message. + Your entire response will be the commit message. No quotation marks. """ ) @@ -355,7 +356,7 @@ def generate_files_context(files): * "COMMENTS" - there were some issues found, but they should not block the build and are informational only * "FAILED" - there were serious, blocking issues found that should be fixed before merging the code - The review message should be a markdown-formatted string for display with rich.Markdown or GitHub markdown. + The review message should be a markdown-formatted string for display with GitHub markdown. """ ) @@ -368,24 +369,24 @@ def get_prompt(command, structured_output=False): parser = PydanticOutputParser(pydantic_object=ReviewResult) return PromptTemplate( template=REVIEW_TEMPLATE + "\n{format_instructions}", - input_variables=["diff_context"], + input_variables=["diff_context", "languages"], partial_variables={"format_instructions": parser.get_format_instructions()}, output_parser=parser, ) else: return PromptTemplate( template=REVIEW_TEMPLATE + "\nRespond in markdown format", - input_variables=["diff_context"], + input_variables=["diff_context", "languages"], ) else: prompt_map = { "alignment": PromptTemplate(template=ALIGNMENT_TEMPLATE, input_variables=[]), - "commit": PromptTemplate(template=COMMIT_TEMPLATE, input_variables=["diff_context"]), - "debug": PromptTemplate(template=DEBUG_TEMPLATE, input_variables=["command_output"]), + "commit": PromptTemplate(template=COMMIT_TEMPLATE, input_variables=["diff_context", "languages"]), + "debug": PromptTemplate(template=DEBUG_TEMPLATE, input_variables=["command_output", "languages"]), "fun_fact": PromptTemplate(template=FUN_FACT_TEMPLATE, input_variables=["topic"]), "sidekick": PromptTemplate( - template=SIDEKICK_TEMPLATE, input_variables=["chat_history", "task", "context"] + template=SIDEKICK_TEMPLATE, input_variables=["chat_history", "task", "context", "languages"] ), }