OpenBMB · Umpire2018 · Jan 15, 2024 · Jan 13, 2024 · Jan 13, 2024 · Jan 15, 2024
diff --git a/repo_agent/chat_with_repo/chat_with_repo.py b/repo_agent/chat_with_repo/chat_with_repo.py
diff --git a/repo_agent/chat_with_repo/chroma_db/b5b385bb-ab3d-4ef3-88d4-754cfee34a37/data_level0.bin b/repo_agent/chat_with_repo/chroma_db/b5b385bb-ab3d-4ef3-88d4-754cfee34a37/data_level0.bin
diff --git a/repo_agent/chat_with_repo/chroma_db/b5b385bb-ab3d-4ef3-88d4-754cfee34a37/header.bin b/repo_agent/chat_with_repo/chroma_db/b5b385bb-ab3d-4ef3-88d4-754cfee34a37/header.bin
diff --git a/repo_agent/chat_with_repo/chroma_db/b5b385bb-ab3d-4ef3-88d4-754cfee34a37/length.bin b/repo_agent/chat_with_repo/chroma_db/b5b385bb-ab3d-4ef3-88d4-754cfee34a37/length.bin
diff --git a/repo_agent/chat_with_repo/chroma_db/b5b385bb-ab3d-4ef3-88d4-754cfee34a37/link_lists.bin b/repo_agent/chat_with_repo/chroma_db/b5b385bb-ab3d-4ef3-88d4-754cfee34a37/link_lists.bin
diff --git a/repo_agent/chat_with_repo/chroma_db/chroma.sqlite3 b/repo_agent/chat_with_repo/chroma_db/chroma.sqlite3
diff --git a/repo_agent/chat_with_repo/config.yml b/repo_agent/chat_with_repo/config.yml
@@ -0,0 +1,4 @@
+api_key: ""
+api_base: ""
+db_path: './project_hierachy.json'
+log_file: './log.txt'
diff --git a/repo_agent/chat_with_repo/gradio_ui.py b/repo_agent/chat_with_repo/gradio_ui.py
@@ -0,0 +1,54 @@
+import gradio as gr
+
+class GradioInterface:
+    def __init__(self, respond_function):
+        self.respond = respond_function
+        self.setup_gradio_interface()
+
+    def setup_gradio_interface(self):
+        css="""
+            .markdown-container:nth-of-type(2){
+                max-height: 200px; /* 设置最大高度 */
+                overflow-y: auto; /* 超出部分显示滚动条 */
+            }
+
+            .output-container:nth-of-type(5) {  
+                max-height: 150px;
+                overflow-y: auto;
+            }
+        """
+
+        with gr.Blocks(css=css) as demo:
+            gr.Markdown("""
+                # RepoChat Test
+                This is a test for retrieval repo 
+            """)
+            with gr.Row():
+                with gr.Column(scale=2):
+                    msg = gr.Textbox(label="Question Input")
+                    gr.Markdown("### question") 
+                    question = gr.Markdown(label="qa")
+                    with gr.Accordion(label="Advanced options", open=False):
+                        system = gr.Textbox(label="System message", lines=2, value="A conversation between a user and an LLM-based AI assistant. The assistant gives helpful and honest answers.")
+                    output1 = gr.Textbox(label="RAG")
+
+                with gr.Column(scale=1):
+                    output2 = gr.Textbox(label="Embedding recall")
+                with gr.Column(scale=1):
+                    output3 = gr.Textbox(label="key words")
+                    code = gr.Textbox(label="code")
+
+            btn = gr.Button("Submit")
+            btn.click(self.respond, inputs=[msg, system], outputs=[msg, output1, output2, output3, code, question])
+            msg.submit(self.respond, inputs=[msg, system], outputs=[msg, output1, output2, output3, code, question])  # Press enter to submit
+
+        gr.close_all()
+        demo.queue().launch(share=True)
+
+# 使用方法
+if __name__ == "__main__":
+    def respond_function(msg, system):
+        # 这里实现您的响应逻辑
+        return msg, "RAG_output", "Embedding_recall_output", "Key_words_output", "Code_output", "QA_output"
+
+    gradio_interface = GradioInterface(respond_function)
diff --git a/repo_agent/chat_with_repo/json_handle.py b/repo_agent/chat_with_repo/json_handle.py
@@ -0,0 +1,60 @@
+import json
+
+class JsonFileProcessor:
+    def __init__(self, file_path):
+        self.file_path = file_path
+
+    def read_json_file(self):
+        # 读取 JSON 文件作为数据库
+        with open(self.file_path, 'r', encoding='utf-8') as file:
+            data = json.load(file)
+        return data
+
+    def extract_md_contents(self):
+        # 从 JSON 数据中提取 Markdown 内容并返回一个列表
+        json_data = self.read_json_file()
+        md_contents = []
+        for file in json_data["files"]:
+            for obj in file["objects"]:
+                if "md_content" in obj:
+                    md_contents.append(obj["md_content"])
+        return md_contents
+
+    def search_in_json_nested(self, file_path, search_text):
+        # retrieve code from json
+        try:
+            with open(file_path, 'r',encoding='utf-8') as file:
+                data = json.load(file)
+
+                def recursive_search(data_item):
+                    if isinstance(data_item, dict):
+                        if 'name' in data_item and search_text.lower() in data_item['name'].lower():
+                            return data_item
+
+                        for key, value in data_item.items():
+                            if isinstance(value, (dict, list)):
+                                result = recursive_search(value)
+                                if result:
+                                    return result
+                    elif isinstance(data_item, list):
+                        for item in data_item:
+                            result = recursive_search(item)
+                            if result:
+                                return result
+
+                result = recursive_search(data)
+                if result:
+                    return result
+                else:
+                    return "No matching item found."
+
+        except FileNotFoundError:
+            return "File not found."
+        except json.JSONDecodeError:
+            return "Invalid JSON file."
+        except Exception as e:
+            return f"An error occurred: {e}"
+
+if __name__ == "__main__":
+    processor = JsonFileProcessor("database.json")
+    md_contents = processor.extract_md_contents()
diff --git a/repo_agent/chat_with_repo/logger.py b/repo_agent/chat_with_repo/logger.py
@@ -0,0 +1,14 @@
+import logging
+
+class LoggerManager:
+    def __init__(self, log_file, log_level=logging.DEBUG):
+        self.logger = logging.getLogger(__name__)
+        self.logger.setLevel(log_level)
+        file_handler = logging.FileHandler(log_file, encoding='utf-8')
+        file_handler.setLevel(log_level)
+        formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+        file_handler.setFormatter(formatter)
+        self.logger.addHandler(file_handler)
+
+    def get_logger(self):
+        return self.logger
diff --git a/repo_agent/chat_with_repo/main.py b/repo_agent/chat_with_repo/main.py
@@ -0,0 +1,24 @@
+from repo_agent.chat_with_repo.gradio_ui import GradioInterface
+import yaml
+from rag import RepoAssistant
+
+
+def load_config(config_file):
+    with open(config_file, 'r') as file:
+        return yaml.safe_load(file)
+
+
+def main():
+    config = load_config("config.yml")
+    api_key = config['api_key']
+    api_base = config['api_base']
+    db_path = config['db_path']
+    log_file = config['log_file']
+    assistant = RepoAssistant(api_key, api_base, db_path,log_file)
+    md_contents = assistant.json_data.extract_md_contents()
+    assistant.chroma_data.create_vector_store(md_contents)
+    GradioInterface(assistant.respond)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/repo_agent/chat_with_repo/project_hierachy.json b/repo_agent/chat_with_repo/project_hierachy.json
diff --git a/repo_agent/chat_with_repo/prompt.py b/repo_agent/chat_with_repo/prompt.py
@@ -0,0 +1,53 @@
+
+from llama_index.llms import OpenAI
+from logger import LoggerManager
+from repo_agent.chat_with_repo.json_handle import JsonFileProcessor
+class TextAnalysisTool:
+    def __init__(self, llm, logger, db_path):
+        jsonsearch=JsonFileProcessor(db_path)
+        self.jsonsearch=jsonsearch
+        self.llm = llm
+        self.logger = logger.get_logger()
+        self.db_path = db_path
+
+    def keyword(self, query):
+        prompt = f"Please provide a list of keywords related to the following query, requests output no more than 3 keywords, Input: {query}, Output:"
+        response = self.llm.complete(prompt)
+        return response
+
+    def tree(self, query):
+        prompt = f"Please analyze the following text and generate a tree structure based on its hierarchy:\n\n{query}"
+        response = self.llm.complete(prompt)
+        return response
+
+    def format_chat_prompt(self, message, instruction):
+        prompt = f"System:{instruction}\nUser: {message}\nAssistant:"
+        return prompt
+
+    def queryblock(self, message):
+        search_result = self.jsonsearch.search_in_json_nested(self.db_path, message)
+        if isinstance(search_result, dict):
+            search_result = search_result['code_content']
+        return str(search_result)
+
+    def nerquery(self, message):
+        query1 = """
+        The output must strictly be a pure function name or class name, without any additional characters.
+        For example:
+        Pure function names: calculateSum, processData
+        Pure class names: MyClass, DataProcessor
+        The output function name or class name should be only one.
+        """
+        query = f"Extract the most relevant class or function from the following{query1}input:\n{message}\nOutput:"
+        response = self.llm.complete(query)
+        self.logger.debug(f"Input: {message}, Output: {response}")
+        return response
+
+if __name__ == "__main__":
+    api_base = "https://api.openai.com/v1"
+    api_key = "your_api_key"
+    log_file = "your_logfile_path"
+    llm = OpenAI(api_key=api_key, api_base=api_base)
+    logger = LoggerManager(log_file)
+    db_path = "your_database_path"
+    test= TextAnalysisTool(llm,logger,db_path)