From cd2aabbac19435028ca8fa3b407f9690dc143588 Mon Sep 17 00:00:00 2001 From: Sebastian Simon Date: Mon, 23 Sep 2024 16:28:37 +0200 Subject: [PATCH] Start implementing dependency validator --- pyproject.toml | 2 + src/cfgnet/utility/util.py | 39 +++++++++++++++++ src/cfgnet/validator/__init__.py | 0 src/cfgnet/validator/prompts.py | 26 +++++++++++ src/cfgnet/validator/validator.py | 72 +++++++++++++++++++++++++++++++ 5 files changed, 139 insertions(+) create mode 100644 src/cfgnet/validator/__init__.py create mode 100644 src/cfgnet/validator/prompts.py create mode 100644 src/cfgnet/validator/validator.py diff --git a/pyproject.toml b/pyproject.toml index d0cac359..89f3f25c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -38,6 +38,8 @@ sphinxcontrib-spelling = "^7.3.2" flatdict = "^4.0.1" dockerfile = "^3.2.0" apacheconfig = "^0.3.2" +openai = "^1.47.0" +backoff = "^2.2.1" [tool.poetry.dev-dependencies] black = { version = '*', allow-prereleases = true } diff --git a/src/cfgnet/utility/util.py b/src/cfgnet/utility/util.py index cd99a753..e96b131c 100644 --- a/src/cfgnet/utility/util.py +++ b/src/cfgnet/utility/util.py @@ -1,3 +1,42 @@ +from dataclasses import dataclass +from typing import Optional +from cfgnet.linker.link import Link + +@dataclass +class Dependency: + project: str + option_name: str + option_file: str + option_value: str + option_type: str + option_technology: str + dependent_option_name: str + dependent_option_value: str + dependent_option_type: str + dependent_option_file: str + dependent_option_technology: str + + def is_test_file(abs_file_path) -> bool: + """Check if a given file is a test file.""" test_indicators = ["/tests", "test", "tests"] return any(indicator in abs_file_path for indicator in test_indicators) + + +def transform(link: Link) -> Dependency: + """Transform a link into a dependency.""" + dependency = Dependency( + project=link.artifact_a.parent.name, + option_name=link.node_a.get_options(), + option_value=link.node_a.name, + option_file=link.artifact_a.rel_file_path, + option_type=link.node_a.config_type, + option_technology=link.artifact_a.concept_name, + dependent_option_name=link.node_b.get_options(), + dependent_option_value=link.node_b.name, + dependent_option_file=link.artifact_b.rel_file_path, + dependent_option_type=link.node_b.config_type, + dependent_option_technology=link.artifact_b.concept_name, + ) + + return dependency \ No newline at end of file diff --git a/src/cfgnet/validator/__init__.py b/src/cfgnet/validator/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/cfgnet/validator/prompts.py b/src/cfgnet/validator/prompts.py new file mode 100644 index 00000000..d4d78fcc --- /dev/null +++ b/src/cfgnet/validator/prompts.py @@ -0,0 +1,26 @@ +from dataclasses import dataclass +from llama_index.core import PromptTemplate + +@dataclass +class Templates: + system: PromptTemplate = PromptTemplate( + "You are a full-stack expert in validating intra-technology and cross-technology configuration dependencies.\n" + "You will be presented with configuration options found in the software project '{project}'.\n\n" + "Your task is to determine whether the given configuration options actually depend on each other based on value-equality.\n\n" + "{dependency_str}\n\n" + "A value-equality dependency is present if two configuration options must have identical values in order to function correctly.\n" + "Inconsistencies in these configuration values can lead to configuration errors.\n" + "Importantly, configuration options may have equal values by accident, meaning that there is no actual dependency, but it just happens that they have equal values.\n" + "If the values of configuration options are identical merely to ensure consistency within a software project, the options are not considered dependent." + ) + task: PromptTemplate = PromptTemplate( + "Carefully evaluate whether configuration option {nameA} of type {typeA} with value {valueA} in {fileA} of technology {technologyA} " + "depends on configuration option {nameB} of type {typeB} with value {valueB} in {fileB} of technology {technologyB} or vice versa." + ) + format: PromptTemplate = PromptTemplate( + "Respond in a JSON format as shown below:\n" + "{{\n" + "\t“rationale”: string, // Provide a concise explanation of whether and why the configuration options depend on each other due to value-equality.\n" + "\t“isDependency”: boolean // True if a dependency exists, or False otherwise.\n" + "}}" + ) diff --git a/src/cfgnet/validator/validator.py b/src/cfgnet/validator/validator.py new file mode 100644 index 00000000..d5879319 --- /dev/null +++ b/src/cfgnet/validator/validator.py @@ -0,0 +1,72 @@ +import os +import backoff +import logging +import json +from openai import OpenAI, RateLimitError, APIError, APIConnectionError, Timeout +from typing import List +from cfgnet.validator.prompts import Templates +from cfgnet.conflicts.conflict import Conflict +from cfgnet.utility.util import transform + + +class Validator: + def __init__(self) -> None: + self.model_name= os.getenv("MODEL_NAME", default="gpt-4o-mini-2024-07-18") + self.temperature = os.getenv("TEMPERATURE", default=0.4) + self.max_tokens = os.getenv("TEMPERATURE", default=250) + self.templates = Templates() + + @backoff.on_exception(backoff.expo, (RateLimitError, APIError, APIConnectionError, Timeout, Exception), max_tries=5) + def generate(self, messages: List) -> str: + client = OpenAI(api_key=os.getenv("OPENAI_KEY")) + + response = client.chat.completions.create( + model=self.model_name, + messages=messages, + temperature=self.temperature, + response_format={"type": "json_object"}, + max_tokens=self.max_tokens + ) + + response_content = response.choices[0].message.content + + if not response or len(response_content.strip()) == 0: + logging.eror("Response content was empty.") + + return response_content + + def validate(self, conflict: Conflict) -> bool: + + dependency = transform(link=conflict.link) + + system_prompt = self.templates.system.format(project=dependency.project) + format_str = self.templates.format.format() + task_prompt = self.templates.task.format( + nameA=dependency.option_name, + typeA=dependency.option_type, + valueA=dependency.option_value, + fileA=dependency.option_file, + technologyA=dependency.option_technology, + nameB=dependency.dependent_option_name, + typeB=dependency.dependent_option_type, + valueB=dependency.dependent_option_value, + fileB=dependency.dependent_option_file, + technologyB=dependency.dependent_option_technology, + ) + + user_prompt = f"{task_prompt}\n\n{format_str}" + + messages = [ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": user_prompt} + ] + + # TODO: Add multi-aggregation + response = self.generate(messages=messages) + + + + dependency + + + \ No newline at end of file