Finish llm as validator

AI-4-SE · Sep 24, 2024 · e8ed4f8 · e8ed4f8
1 parent 9776389
commit e8ed4f8
Show file tree

Hide file tree

Showing 6 changed files with 109 additions and 69 deletions.
diff --git a/.gitignore b/.gitignore
@@ -11,4 +11,5 @@ docs/_build/*
 .cfgnet
 .idea/*
 .vscode
-env/*
+env/*
+*.env
diff --git a/README.md b/README.md
@@ -36,6 +36,9 @@ the `validate` command. Detected dependency conflicts will be displayed on scree
 
     cfgnet validate <project_root>
 
+To additionally validate the detected dependency conflicts with an LLM to prevent false warnings, you need to run the `validate` command with the flag `--with_llm` and the path of the env file `--env_file`. In the `.env` file, you need to specify at least the OpenAI api key as follows: `OPENAI_API_KEY=<your-openai-api-key>`. You can further define the `MODEL_NAME`, `TEMPERATURE`, and `MAX_TOKENS`. Otherwise the default values will be used.
+
+    cfgnet validate --with_llm --env_file=<path-to-env-file> <project_root>
 
 To export the reference network for visualization, use the `export` command.
 The `export` command additionally requires a `output` and `format` option.

diff --git a/src/cfgnet/launcher.py b/src/cfgnet/launcher.py
@@ -3,8 +3,9 @@
 import time
 import logging
 import json
-import click
 from typing import List, Optional
+import click
+from dotenv import load_dotenv
 from cfgnet.utility import logger
 from cfgnet.network.network import Network
 from cfgnet.network.network_configuration import NetworkConfiguration
@@ -94,11 +95,12 @@ def init(
 @main.command()
 @add_project_root_argument
 @click.option("-l", "--with_llm", is_flag=False)
-@click.option("-e", "--env_file", )
+@click.option(
+    "-e",
+    "--env_file",
+)
 def validate(
-    project_root: str, 
-    with_llm: bool,
-    env_file: Optional[str] = None
+    project_root: str, with_llm: bool, env_file: Optional[str] = None
 ):
     """Validate a reference network against a new network."""
     project_name = os.path.basename(project_root)
@@ -120,26 +122,35 @@ def validate(
     detected_conflicts = sum((conflict.count() for conflict in conflicts))
 
     logging.error(
-        "Detected %s configuration conflicts", str(detected_conflicts)
+        "Detected %s configuration conflicts.", str(detected_conflicts)
     )
 
-    completion_time = round((time.time() - start), 2)
-
-    logging.info("Done in [%s s]", completion_time)
-
     if with_llm:
-
         load_dotenv(dotenv_path=env_file)
         validator = Validator()
 
-        for conflict in conflicts:
-            if validator.validate(conflict):
-                print(conflict)
+        validated_conflicts = [
+            conflict for conflict in conflicts if validator.validate(conflict)
+        ]
+
+        logging.error(
+            "Validated %s configuration conflicts as correct.",
+            str(len(validated_conflicts)),
+        )
+
+        completion_time = round((time.time() - start), 2)
+        logging.info("Done in [%s s].", completion_time)
+
+        for conflict in validated_conflicts:
+            print(conflict)
+
     else:
+        completion_time = round((time.time() - start), 2)
+        logging.info("Done in [%s s].", completion_time)
+
         for conflict in conflicts:
             print(conflict)
 
-
     sys.exit(1)
 
 

diff --git a/src/cfgnet/utility/util.py b/src/cfgnet/utility/util.py
@@ -1,19 +1,19 @@
 from dataclasses import dataclass
-from typing import Optional
 from cfgnet.linker.link import Link
 
+
 @dataclass
 class Dependency:
     project: str
     option_name: str
-    option_file: str 
+    option_file: str
     option_value: str
     option_type: str
     option_technology: str
     dependent_option_name: str
     dependent_option_value: str
     dependent_option_type: str
-    dependent_option_file: str 
+    dependent_option_file: str
     dependent_option_technology: str
 
 
@@ -39,4 +39,4 @@ def transform(link: Link) -> Dependency:
         dependent_option_technology=link.artifact_b.concept_name,
     )
 
-    return dependency
+    return dependency
diff --git a/src/cfgnet/validator/prompts.py b/src/cfgnet/validator/prompts.py
@@ -1,25 +1,47 @@
 from dataclasses import dataclass, field
 from llama_index.core import PromptTemplate
 
+
 @dataclass
 class Templates:
-    system: PromptTemplate = field(default_factory=PromptTemplate(
-        "You are a full-stack expert in validating intra-technology and cross-technology configuration dependencies.\n" 
-        "You will be presented with configuration options found in the software project '{project}'.\n\n" 
-        "Your task is to determine whether the given configuration options actually depend on each other based on value-equality.\n\n"
-        "A value-equality dependency is present if two configuration options must have identical values in order to function correctly.\n"
-        "Inconsistencies in these configuration values can lead to configuration errors.\n"
-        "Importantly, configuration options may have equal values by accident, meaning that there is no actual dependency, but it just happens that they have equal values.\n"
-        "If the values of configuration options are identical merely to ensure consistency within a software project, the options are not considered dependent."
-    ))
-    task: PromptTemplate = field(default_factory=PromptTemplate(
-        "Carefully evaluate whether configuration option {nameA} of type {typeA} with value {valueA} in {fileA} of technology {technologyA} "
-        "depends on configuration option {nameB} of type {typeB} with value {valueB} in {fileB} of technology {technologyB} or vice versa." 
-    ))
-    format: PromptTemplate = field(default_factory=PromptTemplate(
-        "Respond in a JSON format as shown below:\n"
-        "{{\n"
-        "\t“rationale”: string, // Provide a concise explanation of whether and why the configuration options depend on each other due to value-equality.\n"
-        "\t“isDependency”: boolean // True if a dependency exists, or False otherwise.\n"
-        "}}"
-    ))
+    system: PromptTemplate = field(
+        default_factory=lambda: PromptTemplate(
+            "You are a full-stack expert in validating intra-technology "
+            "and cross-technology configuration dependencies.\n"
+            "You will be presented with configuration options "
+            "found in the software project '{project}'.\n\n"
+            "Your task is to determine whether the given configuration "
+            "options actually depend on each other based on value-equality.\n\n"
+            "A value-equality dependency is present if two configuration "
+            "options must have identical values in order to function correctly.\n"
+            "Inconsistencies in these configuration values "
+            "can lead to configuration errors.\n"
+            "Importantly, configuration options may have equal values "
+            "by accident, meaning that there is no actual dependency, "
+            "but it just happens that they have equal values.\n"
+            "If the values of configuration options are identical merely "
+            "to ensure consistency within a software project, the options "
+            "are not considered dependent."
+        )
+    )
+    task: PromptTemplate = field(
+        default_factory=lambda: PromptTemplate(
+            "Carefully evaluate whether configuration option {nameA} "
+            "of type {typeA} with value {valueA} in {fileA} of technology "
+            "{technologyA} depends on configuration option {nameB} of "
+            "type {typeB} with value {valueB} in {fileB} of technology "
+            "{technologyB} or vice versa."
+        )
+    )
+    format: PromptTemplate = field(
+        default_factory=lambda: PromptTemplate(
+            "Respond in a JSON format as shown below:\n"
+            "{{\n"
+            "\t“rationale”: string, // Provide a concise explanation "
+            "of whether and why the configuration options depend on "
+            "each other due to value-equality.\n"
+            "\t“isDependency”: boolean // True if a dependency exists,"
+            " or False otherwise.\n"
+            "}}"
+        )
+    )
diff --git a/src/cfgnet/validator/validator.py b/src/cfgnet/validator/validator.py
@@ -1,40 +1,48 @@
 import os
-import backoff
 import logging
 import json
-from openai import OpenAI, RateLimitError, APIError, APIConnectionError, Timeout
 from typing import List
-from collections import Counter
+import backoff
+from openai import (
+    OpenAI,
+    RateLimitError,
+    APIError,
+    APIConnectionError,
+    Timeout,
+)
 from cfgnet.validator.prompts import Templates
 from cfgnet.conflicts.conflict import Conflict
 from cfgnet.utility.util import transform
 
 
 class Validator:
     def __init__(self) -> None:
-        self.model_name= "gpt-4o-mini-2024-07-18"
-        self.temperature = 0.4
-        self.max_tokens = 250
-        self.repetition = 3
+        self.model_name = os.getenv("MODEL_NAME", "gpt-4o-mini-2024-07-18")
+        self.temperature = os.getenv("TEMPERATURE", "0.4")
+        self.max_tokens = os.getenv("MAX_TOKENS", "250")
         self.templates = Templates()
         self.client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
 
-    @backoff.on_exception(backoff.expo, (RateLimitError, APIError, APIConnectionError, Timeout, Exception), max_tries=5)
+    @backoff.on_exception(
+        backoff.expo,
+        (RateLimitError, APIError, APIConnectionError, Timeout, Exception),
+        max_tries=5,
+    )
     def generate(self, messages: List) -> str:
         response = self.client.chat.completions.create(
-            model=self.model_name, 
-            messages=messages,        
-            temperature=self.temperature,
+            model=self.model_name,
+            messages=messages,
+            temperature=float(self.temperature),
             response_format={"type": "json_object"},
-            max_tokens=self.max_tokens
+            max_tokens=int(self.max_tokens),
         )
-    
+
         response_content = response.choices[0].message.content
 
         if not response or len(response_content.strip()) == 0:
             logging.error("Response content was empty.")
-        
-        return response_content
+
+        return json.loads(response_content, strict=False)
 
     def validate(self, conflict: Conflict) -> bool:
         """
@@ -43,11 +51,15 @@ def validate(self, conflict: Conflict) -> bool:
         :param conflict: detected dependency conflict.
         :return: true if dependency else false.
         """
-        logging.info("Validate dependency conflict.")
+        logging.info(
+            "Validate detected dependency conflicts with %s", {self.model_name}
+        )
 
         dependency = transform(link=conflict.link)
 
-        system_prompt = self.templates.system.format(project=dependency.project)
+        system_prompt = self.templates.system.format(
+            project=dependency.project
+        )
         format_str = self.templates.format.format()
         task_prompt = self.templates.task.format(
             nameA=dependency.option_name,
@@ -66,18 +78,9 @@ def validate(self, conflict: Conflict) -> bool:
 
         messages = [
             {"role": "system", "content": system_prompt},
-            {"role": "user", "content": user_prompt}
+            {"role": "user", "content": user_prompt},
         ]
 
-        dependency_counter = Counter
-        for _ in range(self.repetition):
-            response = self.generate(messages=messages)
-            dependency_counter[response["isDependency"]] += 1
-
-        dominant_is_dependency = dependency_counter.most_common(1)[0][0]
-
-        return dominant_is_dependency
-
-
+        response = self.generate(messages=messages)
 
-
+        return response["isDependency"]