codegen.py

import subprocess

import os

from typing import List, Dict, Optional
from pydantic import BaseModel, Field
from dspy import TypedPredictor, Signature, InputField, OutputField

from sungen.utils.dspy_tools import init_dspy


# Define Pydantic models with docstrings and field descriptions

class UserInstruction(BaseModel):
    """
    Represents the user's initial instruction for code modification or generation.
    """
    instruction: str = Field(
        ...,
        description="The user's instruction or request for code generation or modification."
    )
    target_files: List[str] = Field(
        ...,
        description="List of file paths that are to be created or modified."
    )
    context_files: List[str] = Field(
        default=[],
        description="List of file paths to be used as context during code generation."
    )


class CodeFile(BaseModel):
    """
    Represents a code file with its path and content.
    """
    path: str = Field(
        ...,
        description="The file system path of the code file."
    )
    content: str = Field(
        ...,
        description="The textual content of the code file."
    )


class CodeState(BaseModel):
    """
    Represents the current state of the codebase, including all relevant code files.
    """
    files: List[CodeFile] = Field(
        ...,
        description="List of code files representing the current codebase."
    )


class CodeChange(BaseModel):
    """
    Represents the changes to be applied to the codebase.
    """
    file_changes: Dict[str, str] = Field(
        ...,
        description="Mapping from file paths to their updated content."
    )


class TestResult(BaseModel):
    """
    Represents the result of running tests on the codebase.
    """
    success: bool = Field(
        ...,
        description="Indicates whether the tests passed successfully."
    )
    output: str = Field(
        ...,
        description="The standard output from running the tests."
    )
    error_messages: Optional[str] = Field(
        default=None,
        description="Captured error messages from failed tests, if any."
    )


# Define Signature classes with docstrings and field descriptions

class CodeGenerationSignature(Signature):
    """
    Signature for generating code changes based on user instructions and the current code state.
    """
    instruction: UserInstruction = InputField(
        description="The user's instruction for code generation or modification."
    )
    code_state: CodeState = InputField(
        description="The current state of the codebase."
    )
    error_messages: Optional[str] = InputField(
        default=None,
        description="Error messages from previous test failures, if any."
    )
    code_change: CodeChange = OutputField(
        description="The generated code changes to be applied to the codebase."
    )


class ErrorFixingSignature(Signature):
    """
    Signature for generating code fixes based on error messages from failed tests.
    """
    error_messages: str = InputField(
        description="Error messages resulting from failed tests."
    )
    code_state: CodeState = InputField(
        description="The current state of the codebase."
    )
    instruction: UserInstruction = InputField(
        description="The user's original instruction for code generation or modification."
    )
    code_change: CodeChange = OutputField(
        description="The code changes generated to fix the identified errors."
    )


# Functions using the updated models remain the same

def load_code_state(file_paths: List[str]) -> CodeState:
    files = []
    for path in file_paths:
        if os.path.exists(path):
            with open(path, 'r') as f:
                content = f.read()
        else:
            content = ""  # Empty content for new files
        files.append(CodeFile(path=path, content=content))
    return CodeState(files=files)


def generate_code(
        user_instruction: UserInstruction,
        code_state: CodeState
) -> CodeChange:
    predictor = TypedPredictor(CodeGenerationSignature)
    input_data = {
        "instruction": user_instruction,
        "code_state": code_state,
        "error_messages": None  # No errors in initial generation
    }
    prediction = predictor(**input_data)
    return prediction.code_change


def fix_errors(
        error_messages: str,
        code_state: CodeState,
        user_instruction: UserInstruction
) -> CodeChange:
    predictor = TypedPredictor(ErrorFixingSignature)
    input_data = {
        "error_messages": error_messages,
        "code_state": code_state,
        "instruction": user_instruction
    }
    prediction = predictor(**input_data)
    return prediction.code_change


def apply_code_change(code_state: CodeState, code_change: CodeChange) -> CodeState:
    updated_files = []
    for code_file in code_state.files:
        if code_file.path in code_change.file_changes:
            new_content = code_change.file_changes[code_file.path]
            # Write the new content to the file
            with open(code_file.path, 'w') as f:
                f.write(new_content)
            updated_files.append(CodeFile(path=code_file.path, content=new_content))
        else:
            updated_files.append(code_file)
    return CodeState(files=updated_files)


def run_tests() -> TestResult:
    test_cmd = "mix test"  # Or use the command from the blueprint
    result = subprocess.run(test_cmd, shell=True, capture_output=True, text=True)
    success = result.returncode == 0
    error_messages = result.stderr if not success else None
    return TestResult(success=success, output=result.stdout, error_messages=error_messages)


def iterative_code_development(user_instruction: UserInstruction, max_iterations: int = 5):
    code_state = load_code_state(user_instruction.target_files)

    for iteration in range(max_iterations):
        print(f"Iteration {iteration + 1}")

        if iteration == 0:
            code_change = generate_code(user_instruction, code_state)
        else:
            code_change = fix_errors(error_messages, code_state, user_instruction)

        code_state = apply_code_change(code_state, code_change)
        test_result = run_tests()

        if test_result.success:
            print("All tests passed.")
            break
        else:
            print("Tests failed. Attempting to fix errors...")
            error_messages = test_result.error_messages
    else:
        print("Maximum iterations reached. Tests are still failing.")

    return code_state


def main():
    init_dspy()

    user_instruction = UserInstruction(
        instruction="Create a GenServer that plays ping-pong five times and stops.",
        target_files=["lib/ping_pong_server.ex", "test/ping_pong_server_test.exs"],
        context_files=[]
    )

    final_code_state = iterative_code_development(user_instruction)

    print("Final code state:")
    for code_file in final_code_state.files:
        print(f"File: {code_file.path}")
        print(code_file.content)


if __name__ == "__main__":
    main()