From cd2aabbac19435028ca8fa3b407f9690dc143588 Mon Sep 17 00:00:00 2001
From: Sebastian Simon <ssimon@informatik.uni-leipzig.de>
Date: Mon, 23 Sep 2024 16:28:37 +0200
Subject: [PATCH] Start implementing dependency validator

---
 pyproject.toml                    |  2 +
 src/cfgnet/utility/util.py        | 39 +++++++++++++++++
 src/cfgnet/validator/__init__.py  |  0
 src/cfgnet/validator/prompts.py   | 26 +++++++++++
 src/cfgnet/validator/validator.py | 72 +++++++++++++++++++++++++++++++
 5 files changed, 139 insertions(+)
 create mode 100644 src/cfgnet/validator/__init__.py
 create mode 100644 src/cfgnet/validator/prompts.py
 create mode 100644 src/cfgnet/validator/validator.py

diff --git a/pyproject.toml b/pyproject.toml
index d0cac359..89f3f25c 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -38,6 +38,8 @@ sphinxcontrib-spelling = "^7.3.2"
 flatdict = "^4.0.1"
 dockerfile = "^3.2.0"
 apacheconfig = "^0.3.2"
+openai = "^1.47.0"
+backoff = "^2.2.1"
 
 [tool.poetry.dev-dependencies]
 black = { version = '*', allow-prereleases = true }
diff --git a/src/cfgnet/utility/util.py b/src/cfgnet/utility/util.py
index cd99a753..e96b131c 100644
--- a/src/cfgnet/utility/util.py
+++ b/src/cfgnet/utility/util.py
@@ -1,3 +1,42 @@
+from dataclasses import dataclass
+from typing import Optional
+from cfgnet.linker.link import Link
+
+@dataclass
+class Dependency:
+    project: str
+    option_name: str
+    option_file: str 
+    option_value: str
+    option_type: str
+    option_technology: str
+    dependent_option_name: str
+    dependent_option_value: str
+    dependent_option_type: str
+    dependent_option_file: str 
+    dependent_option_technology: str
+
+
 def is_test_file(abs_file_path) -> bool:
+    """Check if a given file is a test file."""
     test_indicators = ["/tests", "test", "tests"]
     return any(indicator in abs_file_path for indicator in test_indicators)
+
+
+def transform(link: Link) -> Dependency:
+    """Transform a link into a dependency."""
+    dependency = Dependency(
+        project=link.artifact_a.parent.name,
+        option_name=link.node_a.get_options(),
+        option_value=link.node_a.name,
+        option_file=link.artifact_a.rel_file_path,
+        option_type=link.node_a.config_type,
+        option_technology=link.artifact_a.concept_name,
+        dependent_option_name=link.node_b.get_options(),
+        dependent_option_value=link.node_b.name,
+        dependent_option_file=link.artifact_b.rel_file_path,
+        dependent_option_type=link.node_b.config_type,
+        dependent_option_technology=link.artifact_b.concept_name,
+    )
+
+    return dependency
\ No newline at end of file
diff --git a/src/cfgnet/validator/__init__.py b/src/cfgnet/validator/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/src/cfgnet/validator/prompts.py b/src/cfgnet/validator/prompts.py
new file mode 100644
index 00000000..d4d78fcc
--- /dev/null
+++ b/src/cfgnet/validator/prompts.py
@@ -0,0 +1,26 @@
+from dataclasses import dataclass
+from llama_index.core import PromptTemplate
+
+@dataclass
+class Templates:
+    system: PromptTemplate  = PromptTemplate(
+        "You are a full-stack expert in validating intra-technology and cross-technology configuration dependencies.\n" 
+        "You will be presented with configuration options found in the software project '{project}'.\n\n" 
+        "Your task is to determine whether the given configuration options actually depend on each other based on value-equality.\n\n"
+        "{dependency_str}\n\n"
+        "A value-equality dependency is present if two configuration options must have identical values in order to function correctly.\n"
+        "Inconsistencies in these configuration values can lead to configuration errors.\n"
+        "Importantly, configuration options may have equal values by accident, meaning that there is no actual dependency, but it just happens that they have equal values.\n"
+        "If the values of configuration options are identical merely to ensure consistency within a software project, the options are not considered dependent."
+    )
+    task: PromptTemplate = PromptTemplate(
+        "Carefully evaluate whether configuration option {nameA} of type {typeA} with value {valueA} in {fileA} of technology {technologyA} "
+        "depends on configuration option {nameB} of type {typeB} with value {valueB} in {fileB} of technology {technologyB} or vice versa." 
+    )
+    format: PromptTemplate = PromptTemplate(
+        "Respond in a JSON format as shown below:\n"
+        "{{\n"
+        "\t“rationale”: string, // Provide a concise explanation of whether and why the configuration options depend on each other due to value-equality.\n"
+        "\t“isDependency”: boolean // True if a dependency exists, or False otherwise.\n"
+        "}}"
+    )
diff --git a/src/cfgnet/validator/validator.py b/src/cfgnet/validator/validator.py
new file mode 100644
index 00000000..d5879319
--- /dev/null
+++ b/src/cfgnet/validator/validator.py
@@ -0,0 +1,72 @@
+import os
+import backoff
+import logging
+import json
+from openai import OpenAI, RateLimitError, APIError, APIConnectionError, Timeout
+from typing import List
+from cfgnet.validator.prompts import Templates
+from cfgnet.conflicts.conflict import Conflict
+from cfgnet.utility.util import transform
+
+
+class Validator:
+    def __init__(self) -> None:
+        self.model_name= os.getenv("MODEL_NAME", default="gpt-4o-mini-2024-07-18")
+        self.temperature = os.getenv("TEMPERATURE", default=0.4)
+        self.max_tokens = os.getenv("TEMPERATURE", default=250)
+        self.templates = Templates()
+
+    @backoff.on_exception(backoff.expo, (RateLimitError, APIError, APIConnectionError, Timeout, Exception), max_tries=5)
+    def generate(self, messages: List) -> str:
+        client = OpenAI(api_key=os.getenv("OPENAI_KEY"))
+        
+        response = client.chat.completions.create(
+            model=self.model_name, 
+            messages=messages,        
+            temperature=self.temperature,
+            response_format={"type": "json_object"},
+            max_tokens=self.max_tokens
+        )
+    
+        response_content = response.choices[0].message.content
+
+        if not response or len(response_content.strip()) == 0:
+            logging.eror("Response content was empty.")
+        
+        return response_content
+
+    def validate(self, conflict: Conflict) -> bool:
+        
+        dependency = transform(link=conflict.link)
+
+        system_prompt = self.templates.system.format(project=dependency.project)
+        format_str = self.templates.format.format()
+        task_prompt = self.templates.task.format(
+            nameA=dependency.option_name,
+            typeA=dependency.option_type,
+            valueA=dependency.option_value,
+            fileA=dependency.option_file,
+            technologyA=dependency.option_technology,
+            nameB=dependency.dependent_option_name,
+            typeB=dependency.dependent_option_type,
+            valueB=dependency.dependent_option_value,
+            fileB=dependency.dependent_option_file,
+            technologyB=dependency.dependent_option_technology,
+        )
+
+        user_prompt = f"{task_prompt}\n\n{format_str}"
+
+        messages = [
+            {"role": "system", "content": system_prompt},
+            {"role": "user", "content": user_prompt}
+        ]
+
+        # TODO: Add multi-aggregation
+        response = self.generate(messages=messages)
+
+        
+        
+        dependency
+
+
+    
\ No newline at end of file