Skip to content

Commit

Permalink
Merge pull request #56 from OpenBMB/git-status-check
Browse files Browse the repository at this point in the history
Git status check
  • Loading branch information
LOGIC-10 authored Feb 27, 2024
2 parents 0f7e4bf + 07c25d4 commit dc2e720
Show file tree
Hide file tree
Showing 8 changed files with 291 additions and 3,541 deletions.
3,430 changes: 0 additions & 3,430 deletions .project_doc_record/.project_hierarchy.json

This file was deleted.

5 changes: 0 additions & 5 deletions .project_doc_record/meta-info.json

This file was deleted.

16 changes: 11 additions & 5 deletions repo_agent/__main__.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,16 @@
from repo_agent.runner import Runner
import sys

from repo_agent.runner import Runner, delete_fake_files
from repo_agent.log import logger


runner = Runner()
if len(sys.argv) == 1:
runner = Runner()

# runner.meta_info.target_repo_hierarchical_tree.print_recursive()
runner.run()
# runner.meta_info.target_repo_hierarchical_tree.print_recursive()
runner.run()

logger.info("文档任务完成。")
logger.info("文档任务完成。")
elif len(sys.argv) == 2:
if sys.argv[1] == "clean":
delete_fake_files()
206 changes: 139 additions & 67 deletions repo_agent/doc_meta_info.py

Large diffs are not rendered by default.

37 changes: 30 additions & 7 deletions repo_agent/file_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,10 @@
import os, json
import ast
from tqdm import tqdm
from colorama import Fore, Style
import threading
from typing import Dict
from repo_agent.utils.meta_info_utils import latest_verison_substring
from repo_agent.config import CONFIG
from repo_agent.log import logger
from repo_agent.utils.gitignore_checker import GitignoreChecker
Expand Down Expand Up @@ -240,22 +242,42 @@ def generate_file_structure(self, file_path):

return file_objects

def generate_overall_structure(self) -> dict:
"""
Generate the overall structure of the repository.
Returns:
dict: A dictionary representing the structure of the repository.
def generate_overall_structure(self, file_path_reflections, jump_files) -> dict:
"""获取目标仓库的文件情况,通过AST-walk获取所有对象等情况。
对于jump_files: 不会parse,当做不存在
"""
repo_structure = {}
gitignore_checker = GitignoreChecker(
directory=self.repo_path,
gitignore_path=os.path.join(self.repo_path, ".gitignore"),
)


bar = tqdm(gitignore_checker.check_files_and_folders())
for not_ignored_files in bar:
normal_file_names = not_ignored_files
if not_ignored_files in jump_files:
print(f"{Fore.LIGHTYELLOW_EX}[File-Handler] Unstaged AddFile, ignore this file: {Style.RESET_ALL}{normal_file_names}")
continue
elif not_ignored_files.endswith(latest_verison_substring):
print(f"{Fore.LIGHTYELLOW_EX}[File-Handler] Skip Latest Version, Using Git-Status Version]: {Style.RESET_ALL}{normal_file_names}")
continue
# elif not_ignored_files.endswith(latest_version):
# """如果某文件被删除但没有暂存,文件系统有fake_file但没有对应的原始文件"""
# for k,v in file_path_reflections.items():
# if v == not_ignored_files and not os.path.exists(os.path.join(CONFIG["repo_path"], not_ignored_files)):
# print(f"{Fore.LIGHTYELLOW_EX}[Unstaged DeleteFile] load fake-file-content: {Style.RESET_ALL}{k}")
# normal_file_names = k #原来的名字
# break
# if normal_file_names == not_ignored_files:
# continue

# if not_ignored_files in file_path_reflections.keys():
# not_ignored_files = file_path_reflections[not_ignored_files] #获取fake_file_path
# print(f"{Fore.LIGHTYELLOW_EX}[Unstaged ChangeFile] load fake-file-content: {Style.RESET_ALL}{normal_file_names}")

try:
repo_structure[not_ignored_files] = self.generate_file_structure(
repo_structure[normal_file_names] = self.generate_file_structure(
not_ignored_files
)
except Exception as e:
Expand Down Expand Up @@ -286,6 +308,7 @@ def convert_to_markdown_file(self, file_path=None):
file_path = self.file_path

# Find the file object in json_data that matches file_path

file_dict = json_data.get(file_path)

if file_dict is None:
Expand Down
6 changes: 4 additions & 2 deletions repo_agent/multi_task_dispatch.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
import time
import random
from typing import List, Callable, Dict, Any
from colorama import Fore, Style

from repo_agent.log import logger


Expand Down Expand Up @@ -77,8 +79,8 @@ def get_next_task(self, process_id: int):
) and self.task_dict[task_id].status == 0
if ready:
self.task_dict[task_id].status = 1
logger.info(
f"[{process_id}] get task_id {task_id}, remain task: {len(self.task_dict)}"
print(
f"{Fore.RED}[process {process_id}]{Style.RESET_ALL}: get task({task_id}), remain({len(self.task_dict)})"
)
if self.query_id % 10 == 0:
self.sync_func()
Expand Down
129 changes: 104 additions & 25 deletions repo_agent/runner.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,98 @@
import threading
import os, json
import json
import git
import itertools
from tqdm import tqdm
from typing import List
from functools import partial
import subprocess
import shutil
from concurrent.futures import ThreadPoolExecutor
from colorama import Fore, Style

from repo_agent.file_handler import FileHandler
from repo_agent.utils.meta_info_utils import latest_verison_substring
from repo_agent.change_detector import ChangeDetector
from repo_agent.project_manager import ProjectManager
from repo_agent.chat_engine import ChatEngine
from concurrent.futures import ThreadPoolExecutor
from repo_agent.doc_meta_info import MetaInfo, DocItem, DocItemType, DocItemStatus
from repo_agent.log import logger
from repo_agent.config import CONFIG
from repo_agent.multi_task_dispatch import worker
from tqdm import tqdm
from typing import List
from functools import partial
import subprocess
import shutil



def make_fake_files():
"""根据git status检测暂存区信息。如果有文件:
1. 新增文件,没有add。无视
2. 修改文件内容,没有add,原始文件重命名为fake_file,新建原本的文件名内容为git status中的文件内容
3. 删除文件,没有add,原始文件重命名为fake_file,新建原本的文件名内容为git status中的文件内容
注意: 目标仓库的文件不能以latest_verison_substring结尾
"""
delete_fake_files()

repo = git.Repo(CONFIG["repo_path"])
unstaged_changes = repo.index.diff(None) #在git status里,但是有修改没提交
untracked_files = repo.untracked_files #在文件系统里,但没在git里的文件

jump_files = [] #这里面的内容不parse、不生成文档,并且引用关系也不计算他们
for file_name in untracked_files:
if file_name.endswith(".py"):
print(f"{Fore.LIGHTMAGENTA_EX}[SKIP untracked files]: {Style.RESET_ALL}{file_name}")
jump_files.append(file_name)
for diff_file in unstaged_changes.iter_change_type('A'): #新增的、没有add的文件,都不处理
if diff_file.a_path.endswith(latest_verison_substring):
logger.error("FAKE_FILE_IN_GIT_STATUS detected! suggest to use `delete_fake_files` and re-generate document")
exit()
jump_files.append(diff_file.a_path)


file_path_reflections = {}
for diff_file in itertools.chain(unstaged_changes.iter_change_type('M'),unstaged_changes.iter_change_type('D')): # 获取修改过的文件
if diff_file.a_path.endswith(latest_verison_substring):
logger.error("FAKE_FILE_IN_GIT_STATUS detected! suggest to use `delete_fake_files` and re-generate document")
exit()
now_file_path = diff_file.a_path #针对repo_path的相对路径
if now_file_path.endswith(".py"):
raw_file_content = diff_file.a_blob.data_stream.read().decode("utf-8")
latest_file_path = now_file_path[:-3] + latest_verison_substring
if os.path.exists(os.path.join(CONFIG["repo_path"],now_file_path)):
os.rename(os.path.join(CONFIG["repo_path"],now_file_path), os.path.join(CONFIG["repo_path"], latest_file_path))

print(f"{Fore.LIGHTMAGENTA_EX}[Save Latest Version of Code]: {Style.RESET_ALL}{now_file_path} -> {latest_file_path}")
else:
print(f"{Fore.LIGHTMAGENTA_EX}[Create Temp-File for Deleted(But not Staged) Files]: {Style.RESET_ALL}{now_file_path} -> {latest_file_path}")
with open(os.path.join(CONFIG["repo_path"],latest_file_path), "w") as writer:
pass
with open(os.path.join(CONFIG["repo_path"],now_file_path), "w") as writer:
writer.write(raw_file_content)
file_path_reflections[now_file_path] = latest_file_path #real指向fake
return file_path_reflections, jump_files


def delete_fake_files():
"""在任务执行完成以后,删除所有的fake_file
"""
def gci(filepath):
# 遍历filepath下所有文件,包括子目录
files = os.listdir(filepath)
for fi in files:
fi_d = os.path.join(filepath, fi)
if os.path.isdir(fi_d):
gci(fi_d)
elif fi_d.endswith(latest_verison_substring):
origin_name = fi_d.replace(latest_verison_substring, ".py")
os.remove(origin_name)
if os.path.getsize(fi_d) == 0:
print(f"{Fore.LIGHTRED_EX}[Deleting Temp File]: {Style.RESET_ALL}{fi_d[len(CONFIG['repo_path']):]}, {origin_name[len(CONFIG['repo_path']):]}")
os.remove(fi_d)
else:
print(f"{Fore.LIGHTRED_EX}[Recovering Latest Version]: {Style.RESET_ALL}{origin_name[len(CONFIG['repo_path']):]} <- {fi_d[len(CONFIG['repo_path']):]}")
os.rename(fi_d, origin_name)

gci(CONFIG["repo_path"])

def need_to_generate(doc_item: DocItem, ignore_list: List) -> bool:
"""只生成item的,文件及更高粒度都跳过。另外如果属于一个blacklist的文件也跳过"""
if doc_item.item_status == DocItemStatus.doc_up_to_date:
Expand Down Expand Up @@ -46,8 +121,7 @@ def load_whitelist():
), f"whitelist_path must be a json-file,and must exists: {CONFIG['whitelist_path']}"
with open(CONFIG["whitelist_path"], "r") as reader:
white_list_json_data = json.load(reader)
# for i in range(len(white_list_json_data)):
# white_list_json_data[i]["file_path"] = white_list_json_data[i]["file_path"].replace("https://github.com/huggingface/transformers/blob/v4.36.1/","")

return white_list_json_data
else:
return None
Expand All @@ -63,9 +137,10 @@ def __init__(self):

if not os.path.exists(
os.path.join(CONFIG["repo_path"], CONFIG["project_hierarchy"])
): # 如果不存在全局结构信息文件夹.project_hierarchy,就新建一个
self.meta_info = MetaInfo.init_from_project_path(CONFIG["repo_path"]) # 从repo_path中初始化一个meta_info, metainfo代表了整个项目的结构信息
self.meta_info.checkpoint( # 将初始化的meta_info写入到.project_doc_record文件夹中
):
file_path_reflections, jump_files = make_fake_files()
self.meta_info = MetaInfo.init_meta_info(file_path_reflections, jump_files)
self.meta_info.checkpoint(
target_dir_path=os.path.join(
CONFIG["repo_path"], CONFIG["project_hierarchy"]
)
Expand Down Expand Up @@ -109,9 +184,9 @@ def generate_doc_for_a_single_item(self, doc_item: DocItem):

ignore_list = CONFIG.get("ignore_list", [])
if not need_to_generate(doc_item, ignore_list):
logger.info(f"内容被忽略/文档已生成,跳过:{doc_item.get_full_name()}")
print(f"内容被忽略/文档已生成,跳过:{doc_item.get_full_name()}")
else:
logger.info(f" -- 正在生成{doc_item.get_full_name()} 对象文档...")
print(f" -- 正在生成文档 {Fore.LIGHTYELLOW_EX}{doc_item.item_type.name}: {doc_item.get_full_name()}{Style.RESET_ALL}")
file_handler = FileHandler(CONFIG["repo_path"], rel_file_path)
response_message = self.chat_engine.generate_doc(
doc_item=doc_item,
Expand Down Expand Up @@ -191,11 +266,11 @@ def first_generate(self):
def markdown_refresh(self):
"""将目前最新的document信息写入到一个markdown格式的文件夹里(不管markdown内容是不是变化了)"""
with self.runner_lock:
# 首先删除doc下所有内容,然后再重新写入 (这种方法有点问题吧?@yeyn)
markdown_docs_path = os.path.join(CONFIG["repo_path"], CONFIG["Markdown_Docs_folder"])
if os.path.exists(markdown_docs_path):
shutil.rmtree(markdown_docs_path)
os.mkdir(markdown_docs_path)
# 首先删除doc下所有内容,然后再重新写入
markdown_folder = os.path.join(CONFIG["repo_path"],CONFIG["Markdown_Docs_folder"])
if os.path.exists(markdown_folder):
shutil.rmtree(markdown_folder)
os.mkdir(markdown_folder)

file_item_list = self.meta_info.get_all_files()
for file_item in tqdm(file_item_list):
Expand Down Expand Up @@ -298,8 +373,9 @@ def run(self):
1.新建的文件没有文档,因此metainfo merge后还是没有文档
2.被删除的文件和obj,本来就不在新的meta里面,相当于文档被自动删除了
3.只需要观察被修改的文件,以及引用关系需要被通知的文件去重新生成文档"""
new_meta_info = MetaInfo.init_from_project_path(CONFIG["repo_path"]) # 从repo_path中初始化一个meta_info, metainfo代表了整个项目的结构信息
new_meta_info.load_doc_from_older_meta(self.meta_info) # 从老的meta_info中加载文档信息, 目的是跟上面的new_meta_info做merge,检测出new中的变更
file_path_reflections, jump_files = make_fake_files()
new_meta_info = MetaInfo.init_meta_info(file_path_reflections, jump_files)
new_meta_info.load_doc_from_older_meta(self.meta_info)

self.meta_info = new_meta_info # 更新自身的meta_info信息为new的信息
self.meta_info.in_generation_process = True # 将in_generation_process设置为True,表示检测到变更后正在生成文档的过程中
Expand All @@ -310,11 +386,12 @@ def run(self):

task_manager = self.meta_info.get_task_manager(self.meta_info.target_repo_hierarchical_tree,task_available_func=check_task_available_func)

for item_name, item_type in self.meta_info.deleted_items_from_older_meta:
print(f"{Fore.LIGHTMAGENTA_EX}[Dir/File/Obj Delete Dected]: {Style.RESET_ALL} {item_type} {item_name}")
self.meta_info.print_task_list(task_manager.task_dict)
if task_manager.all_success:
logger.info("No tasks in the queue, all documents are completed and up to date.")
else:
self.meta_info.print_task_list(task_manager.task_dict)

exit()
task_manager.sync_func = self.markdown_refresh
threads = [
threading.Thread(
Expand All @@ -340,6 +417,7 @@ def run(self):
logger.info(f"Doc has been forwarded to the latest version")

self.markdown_refresh()
delete_fake_files()

def add_new_item(self, file_handler, json_data):
"""
Expand Down Expand Up @@ -542,8 +620,8 @@ def update_existing_item(self, file_dict, file_handler, changes_in_pyfile):
changed_obj[0],
ref_obj["obj_referencer_list"],
)
logger.info(
f"正在生成 {file_handler.file_path}中的{changed_obj[0]} 对象文档..."
print(
f"正在生成 {Fore.CYAN}{file_handler.file_path}{Style.RESET_ALL}中的{Fore.CYAN}{changed_obj[0]}{Style.RESET_ALL}对象文档."
)
futures.append(future)

Expand Down Expand Up @@ -604,6 +682,7 @@ def get_new_objects(self, file_handler):


if __name__ == "__main__":

runner = Runner()

# runner.meta_info.target_repo_hierarchical_tree.print_recursive()
Expand Down
3 changes: 3 additions & 0 deletions repo_agent/utils/meta_info_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@


latest_verison_substring = "_latest_version.py"

0 comments on commit dc2e720

Please sign in to comment.