From e0c42a6db272addc011e0f84bbe40779a39d28ac Mon Sep 17 00:00:00 2001 From: ikun-moxiaofei <1091909200@qq.com> Date: Fri, 21 Jun 2024 14:30:24 +0800 Subject: [PATCH 1/8] refactor: Reconstructs the boot script and adds the one-click boot script that uses CPU mode and GPU mode in openaiAPI mode --- config.py | 19 ++- scripts/run_for_CPU.sh | 310 +++++++++++++++++++++++++++++++++++++++++ scripts/run_for_GPU.sh | 310 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 638 insertions(+), 1 deletion(-) create mode 100755 scripts/run_for_CPU.sh create mode 100755 scripts/run_for_GPU.sh diff --git a/config.py b/config.py index 55e021fe..4662799b 100644 --- a/config.py +++ b/config.py @@ -1,3 +1,15 @@ +# -*- coding: utf-8 -*- +def get_run_config_params(): + openai_api_base = "https://api.openai.com/v1" + openai_api_key = "sk-xxxxxxx" + openai_api_model_name = "gpt-3.5-turbo-1106" + openai_api_context_length = "4096" + workers = 4 + milvus_port = 19530 + qanything_port = 8777 + use_cpu = True + return f"{openai_api_base},{openai_api_key},{openai_api_model_name},{openai_api_context_length},{workers},{milvus_port},{qanything_port},{use_cpu}" + # 模型参数 llm_config = { # 回答的最大token数,一般来说对于国内模型一个中文不到1个token,国外模型一个中文1.5-2个token @@ -52,4 +64,9 @@ # 切割文件的相邻文本重合长度 "chunk_overlap": 0 } -#### 一般情况下,除非特殊需要,不要修改一下字段参数 #### \ No newline at end of file +#### 一般情况下,除非特殊需要,不要修改一下字段参数 #### + + +if __name__ == "__main__": + import sys + sys.stdout.write(''.join(get_run_config_params())) \ No newline at end of file diff --git a/scripts/run_for_CPU.sh b/scripts/run_for_CPU.sh new file mode 100755 index 00000000..e3e9c199 --- /dev/null +++ b/scripts/run_for_CPU.sh @@ -0,0 +1,310 @@ +#!/bin/bash +echo "Script started at $(date)." +chmod +x "$0" + +# 调用 Python 脚本并捕获输出 +IFS=',' # 设置字段分隔符为逗号 +openai_api_base_with_key=$(python config.py) + +# 使用 read 命令分割字符串 +read -r openai_api_base openai_api_key openai_api_model_name openai_api_context_length workers milvus_port qanything_port use_cpu <<< "$openai_api_base_with_key" +echo "openai_api_base" $openai_api_base +echo "openai_api_key" $openai_api_key +echo "openai_api_model_name" $openai_api_model_name +echo "openai_api_context_length" $openai_api_context_length +echo "workers" $workers +echo "milvus_port" $milvus_port +echo "qanything_port" $qanything_port +echo "use_cpu" $use_cpu + + +# 检查 Conda 是否安装,如果安装就执行一下逻辑,使用原有的conda进行安装 +if command -v conda >/dev/null 2>&1; then + echo "Conda is installed." + # 检查 Conda 是否为最新版本 + echo "Checking for Conda updates..." + if conda update --no-deps --dry-run -n base -c defaults conda | grep -q "will be updated"; then + echo "An update is available for Conda." + echo "Would you like to update it? (y/n)" + read -r user_response + + if [[ "$user_response" =~ ^[Yy] ]]; then + echo "Updating Conda..." + conda update -n base -c defaults conda -y + echo "Conda has been updated." + else + echo "Skipping Conda update." + fi + else + echo "Conda is already up to date." + fi + + + # 检查特定 Conda 环境是否存在 + ENV_NAME="qanything-python" + if conda info --envs | grep -q "$ENV_NAME"; then + echo "Conda environment '$ENV_NAME' already exists." + else + echo "Conda environment '$ENV_NAME' does not exist. Proceeding with installation." + conda create -n "$ENV_NAME" python=3.10 + if [ $? -eq 0 ]; then + echo "Conda environment '$ENV_NAME' created successfully." + else + echo "Failed to create Conda environment '$ENV_NAME'." + exit 1 + fi + fi + # 激活 Conda 环境 + echo "Activating conda environment '$ENV_NAME'..." + ENV_NAME="qanything-python" + CONDA_INSTALL_PATH="$(conda info --base)" + echo $CONDA_INSTALL_PATH + chmod +x $CONDA_INSTALL_PATH + # 使用从 Conda 获取的路径来激活 Conda 环境 + source "$CONDA_INSTALL_PATH/bin/activate" "$ENV_NAME" + if [ $? -ne 0 ]; then + echo "Failed to activate conda environment '$ENV_NAME'." + exit 1 + fi + echo "Conda environment '$ENV_NAME' activated." + + # 使用 pip 从 requirements.txt 安装依赖 + set -x + set -e + pip install -r requirements.txt + if [ $? -ne 0 ]; then + echo "Failed to install dependencies from requirements.txt." + exit 1 + fi + # 判断操作系统 + if [ "$(uname)" = "Linux" ]; then + S="LinuxOrWSL" + else + S="M1mac" + fi + use_openai_api_option="true" + set -x + echo "启动命令是scripts/base_run.sh -s "$S" -w "$workers" -m "$milvus_port" -q "$qanything_port" -c -o -b "$openai_api_base" -k "$openai_api_key" " + bash scripts/base_run.sh -s "$S" -w "$workers" -m "$milvus_port" -q "$qanything_port" -c -o -b "$openai_api_base" -k "$openai_api_key" + set +x + set +e + if [ $? -ne 0 ]; then + echo "Failed to run the script for OpenAI API." + exit 1 + fi + echo "Script for OpenAI API executed successfully." + # 在脚本结束时记录时间 + echo "Script finished at $(date)." +else + echo "Conda is not installed." +fi + + +# 以下代码为用户未安装conda时执行的逻辑,该命令会在项目目录下安装一个conda并使用,该conda仅会在该脚本执行时间使用,不会填加到环境变量 +# 检查当前使用的 shell +SHELL_NAME=$(basename "$SHELL") + +# 设置对应的配置文件路径 +case "$SHELL_NAME" in + bash) + CONFIG_FILE="~/.bashrc" + ;; + zsh) + CONFIG_FILE="~/.zshrc" + ;; + *) + CONFIG_FILE="~/.profile" # 对于其他 shell,使用 .profile + ;; +esac + +echo "Config file for current shell is: $CONFIG_FILE" + +# 获取当前脚本运行的目录 +CURRENT_DIR=$(pwd) + +# 定义 Conda 安装路径为当前目录下的 anaconda3 文件夹 +CONDA_INSTALL_PATH="$CURRENT_DIR/anaconda3/bin" + +# 将 Conda 的 bin 目录添加到 PATH 环境变量中 +export PATH="$CONDA_INSTALL_PATH:$PATH" +#bash -c "source $CONFIG_FILE" + +# 定义 Anaconda 安装程序的 URL,根据不同操作系统设置 +if [ "$(uname)" = "Linux" ]; then + INSTALLER_URL="https://repo.anaconda.com/archive/Anaconda3-2024.02-1-Linux-x86_64.sh" + INSTALL_PATH="$(pwd)/anaconda3" +else + # 这里假设如果不是 Linux,则操作系统为 macOS + INSTALLER_URL="https://repo.anaconda.com/archive/Anaconda3-2024.02-1-MacOSX-x86_64.sh" + INSTALL_PATH="$(pwd)/anaconda3" +fi + + +# 赋予执行权限 +chmod +x "anaconda_installer.sh" + +bash -c "source $CONFIG_FILE" +# 检测是否已安装 Conda 并询问用户是否更新 +if command -v conda &> /dev/null; then + echo "Local Conda is already installed." + # 检查 Conda 是否为最新版本 + echo "Checking for Conda updates..." + if conda update --no-deps --dry-run -n base -c defaults conda | grep -q "will be updated"; then + echo "An update is available for Conda." + echo "Would you like to update it? (y/n)" + read -r user_response + + if [[ "$user_response" =~ ^[Yy] ]]; then + echo "Updating Conda..." + conda update -n base -c defaults conda -y + echo "Conda has been updated." + else + echo "Skipping Conda update." + fi + else + echo "Conda is already up to date." + fi +else + echo "Conda is not installed. Proceeding with a fresh installation." + # 下载 Anaconda 安装程序 + echo "Downloading Anaconda installer for $OS_TYPE..." + wget "$INSTALLER_URL" -O "anaconda_installer.sh" + + # 提示用户输入以确认安装 + read -p "Do you want to install Anaconda in $INSTALL_PATH? (y/n) " response + case "$response" in + [yY][eE][sS]|[yY]) + echo "Installing Anaconda..." + chmod +x "anaconda_installer.sh" + # 运行安装脚本 + yes | ./anaconda_installer.sh -b -p "$INSTALL_PATH" + ;; + *) + echo "Installation aborted." + exit 1 + ;; + esac + + # 检查安装是否成功 + if conda --version &> /dev/null; then + echo "Anaconda installation successful." + else + echo "Anaconda installation failed." + exit 1 + fi + + # 仅在脚本运行期间设置局部环境变量,不修改全局环境变量 + echo "Setting up local environment for Anaconda access." + export PATH="$INSTALL_PATH/bin:$PATH" + + # 可以使用 conda -V 来验证 Conda 是否可用 + echo "Verifying Conda installation with conda -V..." + conda -V + + # 获取当前脚本运行的目录,即项目路径 + PROJECT_DIR=$(pwd) + + # 定义 Conda 安装路径 +# CONDA_INSTALL_PATH="$PROJECT_DIR/anaconda3" + + # 定义 Conda 环境目录,使用项目路径下的 .conda/envs + CONDA_ENVS_DIR="$PROJECT_DIR/.conda/envs" + + # 确保 Conda 环境目录存在 + if [ ! -d "$CONDA_ENVS_DIR" ]; then + echo "Conda envs directory does not exist. Creating now at $CONDA_ENVS_DIR." + mkdir -p "$CONDA_ENVS_DIR" + if [ $? -ne 0 ]; then + echo "Failed to create Conda envs directory." + exit 1 + fi + fi + + # 确保当前用户有写权限 + if [ ! -w "$CONDA_ENVS_DIR" ]; then + echo "No write permission for the Conda envs directory. Attempting to fix permissions." + chmod u+w "$CONDA_ENVS_DIR" + if [ $? -ne 0 ]; then + echo "Failed to modify permissions for Conda envs directory." + exit 1 + fi + fi +# 脚本结束时,局部环境变量 PATH 的更改将不再影响后续命令 +fi + + +# 使用项目路径下的 Conda 安装来检查或创建环境 +ENV_NAME="qanything-python" + +# 检查环境是否存在 +if conda info --envs | grep -q "$ENV_NAME"; then + echo "Conda environment '$ENV_NAME' already exists." +else + echo "Creating conda environment '$ENV_NAME' with Python 3.10..." + "$CONDA_INSTALL_PATH/conda" create -n "$ENV_NAME" python=3.10 + if [ $? -ne 0 ]; then + echo "Failed to create conda environment '$ENV_NAME'." + exit 1 + fi + echo "Conda environment '$ENV_NAME' created successfully." +fi + +# 激活 Conda 环境 +echo "Activating conda environment '$ENV_NAME'..." +source "$CONDA_INSTALL_PATH/activate" "$ENV_NAME" +#conda activate qanything-python +echo "Conda environment created successfully." + +CONDA_INSTALL_PATH_2="$CURRENT_DIR/anaconda3" +echo "$CONDA_PREFIX" +echo "CONDA_INSTALL_PATH_2/envs/qanything-python" +# 检查激活是否成功 +if [ "$CONDA_PREFIX" != "$CONDA_INSTALL_PATH_2/envs/qanything-python" ]; then + echo "Failed to activate conda environment." + exit 1 +fi + + +# 安装 requirements.txt 中的依赖 +echo "Installing dependencies from requirements.txt..." +set -e # 使脚本在遇到错误时立即退出 +set -x # 打印出执行的每一条命令 + +conda install pip +pip install -r requirements.txt + +conda env list + +if [ $? -ne 0 ]; then + echo "Failed to install dependencies. Check the error messages above for details." + exit 1 +fi + +set +x # 关闭命令打印 +set +e # 关闭立即退出 + +echo "Dependencies installed successfully." + + +# 使用 Conda 环境运行脚本 +echo "Executing the script for OpenAI API in the 'qanything-python' environment." +# 判断操作系统 +if [ "$(uname)" = "Linux" ]; then + S="LinuxOrWSL" +else + S="M1mac" +fi +use_openai_api_option="true" +set -x +echo "启动命令是scripts/base_run.sh -s "$S" -w "$workers" -m "$milvus_port" -q "$qanything_port" -c -o -b "$openai_api_base" -k "$openai_api_key" " +bash scripts/base_run.sh -s "$S" -w "$workers" -m "$milvus_port" -q "$qanything_port" -c -o -b "$openai_api_base" -k "$openai_api_key" +set +x +if [ $? -ne 0 ]; then + echo "Failed to run the script for OpenAI API." + exit 1 +fi +echo "Script for OpenAI API executed successfully." + + +# 在脚本结束时记录时间 +echo "Script finished at $(date)." \ No newline at end of file diff --git a/scripts/run_for_GPU.sh b/scripts/run_for_GPU.sh new file mode 100755 index 00000000..64dafc26 --- /dev/null +++ b/scripts/run_for_GPU.sh @@ -0,0 +1,310 @@ +#!/bin/bash +echo "Script started at $(date)." +chmod +x "$0" + +# 调用 Python 脚本并捕获输出 +IFS=',' # 设置字段分隔符为逗号 +openai_api_base_with_key=$(python config.py) + +# 使用 read 命令分割字符串 +read -r openai_api_base openai_api_key openai_api_model_name openai_api_context_length workers milvus_port qanything_port use_cpu <<< "$openai_api_base_with_key" +echo "openai_api_base" $openai_api_base +echo "openai_api_key" $openai_api_key +echo "openai_api_model_name" $openai_api_model_name +echo "openai_api_context_length" $openai_api_context_length +echo "workers" $workers +echo "milvus_port" $milvus_port +echo "qanything_port" $qanything_port +echo "use_cpu" $use_cpu + + +# 检查 Conda 是否安装,如果安装就执行一下逻辑,使用原有的conda进行安装 +if command -v conda >/dev/null 2>&1; then + echo "Conda is installed." + # 检查 Conda 是否为最新版本 + echo "Checking for Conda updates..." + if conda update --no-deps --dry-run -n base -c defaults conda | grep -q "will be updated"; then + echo "An update is available for Conda." + echo "Would you like to update it? (y/n)" + read -r user_response + + if [[ "$user_response" =~ ^[Yy] ]]; then + echo "Updating Conda..." + conda update -n base -c defaults conda -y + echo "Conda has been updated." + else + echo "Skipping Conda update." + fi + else + echo "Conda is already up to date." + fi + + + # 检查特定 Conda 环境是否存在 + ENV_NAME="qanything-python" + if conda info --envs | grep -q "$ENV_NAME"; then + echo "Conda environment '$ENV_NAME' already exists." + else + echo "Conda environment '$ENV_NAME' does not exist. Proceeding with installation." + conda create -n "$ENV_NAME" python=3.10 + if [ $? -eq 0 ]; then + echo "Conda environment '$ENV_NAME' created successfully." + else + echo "Failed to create Conda environment '$ENV_NAME'." + exit 1 + fi + fi + # 激活 Conda 环境 + echo "Activating conda environment '$ENV_NAME'..." + ENV_NAME="qanything-python" + CONDA_INSTALL_PATH="$(conda info --base)" + echo $CONDA_INSTALL_PATH + chmod +x $CONDA_INSTALL_PATH + # 使用从 Conda 获取的路径来激活 Conda 环境 + source "$CONDA_INSTALL_PATH/bin/activate" "$ENV_NAME" + if [ $? -ne 0 ]; then + echo "Failed to activate conda environment '$ENV_NAME'." + exit 1 + fi + echo "Conda environment '$ENV_NAME' activated." + + # 使用 pip 从 requirements.txt 安装依赖 + set -x + set -e + pip install -r requirements.txt + if [ $? -ne 0 ]; then + echo "Failed to install dependencies from requirements.txt." + exit 1 + fi + # 判断操作系统 + if [ "$(uname)" = "Linux" ]; then + S="LinuxOrWSL" + else + S="M1mac" + fi + use_openai_api_option="true" + set -x + echo "启动命令是scripts/base_run.sh -s "$S" -w "$workers" -m "$milvus_port" -q "$qanything_port" -o -b "$openai_api_base" -k "$openai_api_key" " + bash scripts/base_run.sh -s "$S" -w "$workers" -m "$milvus_port" -q "$qanything_port" -o -b "$openai_api_base" -k "$openai_api_key" + set +x + set +e + if [ $? -ne 0 ]; then + echo "Failed to run the script for OpenAI API." + exit 1 + fi + echo "Script for OpenAI API executed successfully." + # 在脚本结束时记录时间 + echo "Script finished at $(date)." +else + echo "Conda is not installed." +fi + + +# 以下代码为用户未安装conda时执行的逻辑,该命令会在项目目录下安装一个conda并使用,该conda仅会在该脚本执行时间使用,不会填加到环境变量 +# 检查当前使用的 shell +SHELL_NAME=$(basename "$SHELL") + +# 设置对应的配置文件路径 +case "$SHELL_NAME" in + bash) + CONFIG_FILE="~/.bashrc" + ;; + zsh) + CONFIG_FILE="~/.zshrc" + ;; + *) + CONFIG_FILE="~/.profile" # 对于其他 shell,使用 .profile + ;; +esac + +echo "Config file for current shell is: $CONFIG_FILE" + +# 获取当前脚本运行的目录 +CURRENT_DIR=$(pwd) + +# 定义 Conda 安装路径为当前目录下的 anaconda3 文件夹 +CONDA_INSTALL_PATH="$CURRENT_DIR/anaconda3/bin" + +# 将 Conda 的 bin 目录添加到 PATH 环境变量中 +export PATH="$CONDA_INSTALL_PATH:$PATH" +#bash -c "source $CONFIG_FILE" + +# 定义 Anaconda 安装程序的 URL,根据不同操作系统设置 +if [ "$(uname)" = "Linux" ]; then + INSTALLER_URL="https://repo.anaconda.com/archive/Anaconda3-2024.02-1-Linux-x86_64.sh" + INSTALL_PATH="$(pwd)/anaconda3" +else + # 这里假设如果不是 Linux,则操作系统为 macOS + INSTALLER_URL="https://repo.anaconda.com/archive/Anaconda3-2024.02-1-MacOSX-x86_64.sh" + INSTALL_PATH="$(pwd)/anaconda3" +fi + + +# 赋予执行权限 +chmod +x "anaconda_installer.sh" + +bash -c "source $CONFIG_FILE" +# 检测是否已安装 Conda 并询问用户是否更新 +if command -v conda &> /dev/null; then + echo "Local Conda is already installed." + # 检查 Conda 是否为最新版本 + echo "Checking for Conda updates..." + if conda update --no-deps --dry-run -n base -c defaults conda | grep -q "will be updated"; then + echo "An update is available for Conda." + echo "Would you like to update it? (y/n)" + read -r user_response + + if [[ "$user_response" =~ ^[Yy] ]]; then + echo "Updating Conda..." + conda update -n base -c defaults conda -y + echo "Conda has been updated." + else + echo "Skipping Conda update." + fi + else + echo "Conda is already up to date." + fi +else + echo "Conda is not installed. Proceeding with a fresh installation." + # 下载 Anaconda 安装程序 + echo "Downloading Anaconda installer for $OS_TYPE..." + wget "$INSTALLER_URL" -O "anaconda_installer.sh" + + # 提示用户输入以确认安装 + read -p "Do you want to install Anaconda in $INSTALL_PATH? (y/n) " response + case "$response" in + [yY][eE][sS]|[yY]) + echo "Installing Anaconda..." + chmod +x "anaconda_installer.sh" + # 运行安装脚本 + yes | ./anaconda_installer.sh -b -p "$INSTALL_PATH" + ;; + *) + echo "Installation aborted." + exit 1 + ;; + esac + + # 检查安装是否成功 + if conda --version &> /dev/null; then + echo "Anaconda installation successful." + else + echo "Anaconda installation failed." + exit 1 + fi + + # 仅在脚本运行期间设置局部环境变量,不修改全局环境变量 + echo "Setting up local environment for Anaconda access." + export PATH="$INSTALL_PATH/bin:$PATH" + + # 可以使用 conda -V 来验证 Conda 是否可用 + echo "Verifying Conda installation with conda -V..." + conda -V + + # 获取当前脚本运行的目录,即项目路径 + PROJECT_DIR=$(pwd) + + # 定义 Conda 安装路径 +# CONDA_INSTALL_PATH="$PROJECT_DIR/anaconda3" + + # 定义 Conda 环境目录,使用项目路径下的 .conda/envs + CONDA_ENVS_DIR="$PROJECT_DIR/.conda/envs" + + # 确保 Conda 环境目录存在 + if [ ! -d "$CONDA_ENVS_DIR" ]; then + echo "Conda envs directory does not exist. Creating now at $CONDA_ENVS_DIR." + mkdir -p "$CONDA_ENVS_DIR" + if [ $? -ne 0 ]; then + echo "Failed to create Conda envs directory." + exit 1 + fi + fi + + # 确保当前用户有写权限 + if [ ! -w "$CONDA_ENVS_DIR" ]; then + echo "No write permission for the Conda envs directory. Attempting to fix permissions." + chmod u+w "$CONDA_ENVS_DIR" + if [ $? -ne 0 ]; then + echo "Failed to modify permissions for Conda envs directory." + exit 1 + fi + fi +# 脚本结束时,局部环境变量 PATH 的更改将不再影响后续命令 +fi + + +# 使用项目路径下的 Conda 安装来检查或创建环境 +ENV_NAME="qanything-python" + +# 检查环境是否存在 +if conda info --envs | grep -q "$ENV_NAME"; then + echo "Conda environment '$ENV_NAME' already exists." +else + echo "Creating conda environment '$ENV_NAME' with Python 3.10..." + "$CONDA_INSTALL_PATH/conda" create -n "$ENV_NAME" python=3.10 + if [ $? -ne 0 ]; then + echo "Failed to create conda environment '$ENV_NAME'." + exit 1 + fi + echo "Conda environment '$ENV_NAME' created successfully." +fi + +# 激活 Conda 环境 +echo "Activating conda environment '$ENV_NAME'..." +source "$CONDA_INSTALL_PATH/activate" "$ENV_NAME" +#conda activate qanything-python +echo "Conda environment created successfully." + +CONDA_INSTALL_PATH_2="$CURRENT_DIR/anaconda3" +echo "$CONDA_PREFIX" +echo "CONDA_INSTALL_PATH_2/envs/qanything-python" +# 检查激活是否成功 +if [ "$CONDA_PREFIX" != "$CONDA_INSTALL_PATH_2/envs/qanything-python" ]; then + echo "Failed to activate conda environment." + exit 1 +fi + + +# 安装 requirements.txt 中的依赖 +echo "Installing dependencies from requirements.txt..." +set -e # 使脚本在遇到错误时立即退出 +set -x # 打印出执行的每一条命令 + +conda install pip +pip install -r requirements.txt + +conda env list + +if [ $? -ne 0 ]; then + echo "Failed to install dependencies. Check the error messages above for details." + exit 1 +fi + +set +x # 关闭命令打印 +set +e # 关闭立即退出 + +echo "Dependencies installed successfully." + + +# 使用 Conda 环境运行脚本 +echo "Executing the script for OpenAI API in the 'qanything-python' environment." +# 判断操作系统 +if [ "$(uname)" = "Linux" ]; then + S="LinuxOrWSL" +else + S="M1mac" +fi +use_openai_api_option="true" +set -x +echo "启动命令是scripts/base_run.sh -s "$S" -w "$workers" -m "$milvus_port" -q "$qanything_port" -o -b "$openai_api_base" -k "$openai_api_key" " +bash scripts/base_run.sh -s "$S" -w "$workers" -m "$milvus_port" -q "$qanything_port" -o -b "$openai_api_base" -k "$openai_api_key" +set +x +if [ $? -ne 0 ]; then + echo "Failed to run the script for OpenAI API." + exit 1 +fi +echo "Script for OpenAI API executed successfully." + + +# 在脚本结束时记录时间 +echo "Script finished at $(date)." \ No newline at end of file From e7837ed0793ee9385a92417be9bfbb1588d780f3 Mon Sep 17 00:00:00 2001 From: ikun-moxiaofei <1091909200@qq.com> Date: Fri, 21 Jun 2024 17:01:46 +0800 Subject: [PATCH 2/8] fix: Use the.format() method to format the string, ensuring that the functions in config.py are compatible with Python 2, while optimizing some Chinese log output --- config.py | 6 +++++- scripts/run_for_CPU.sh | 25 ++++++++++++++----------- scripts/run_for_GPU.sh | 25 ++++++++++++++----------- 3 files changed, 33 insertions(+), 23 deletions(-) diff --git a/config.py b/config.py index 4662799b..9e075cf6 100644 --- a/config.py +++ b/config.py @@ -1,4 +1,6 @@ # -*- coding: utf-8 -*- +from __future__ import print_function # 确保 print 函数在 Python 2 中的行为与 Python 3 一致 + def get_run_config_params(): openai_api_base = "https://api.openai.com/v1" openai_api_key = "sk-xxxxxxx" @@ -8,7 +10,9 @@ def get_run_config_params(): milvus_port = 19530 qanything_port = 8777 use_cpu = True - return f"{openai_api_base},{openai_api_key},{openai_api_model_name},{openai_api_context_length},{workers},{milvus_port},{qanything_port},{use_cpu}" + # 使用 .format() 方法格式化字符串,以兼容 Python 2 + return "{},{},{},{},{},{},{}".format(openai_api_base, openai_api_key, openai_api_model_name, + openai_api_context_length, workers, milvus_port, qanything_port, use_cpu) # 模型参数 llm_config = { diff --git a/scripts/run_for_CPU.sh b/scripts/run_for_CPU.sh index e3e9c199..3d8376a6 100755 --- a/scripts/run_for_CPU.sh +++ b/scripts/run_for_CPU.sh @@ -8,14 +8,14 @@ openai_api_base_with_key=$(python config.py) # 使用 read 命令分割字符串 read -r openai_api_base openai_api_key openai_api_model_name openai_api_context_length workers milvus_port qanything_port use_cpu <<< "$openai_api_base_with_key" -echo "openai_api_base" $openai_api_base -echo "openai_api_key" $openai_api_key -echo "openai_api_model_name" $openai_api_model_name -echo "openai_api_context_length" $openai_api_context_length -echo "workers" $workers -echo "milvus_port" $milvus_port -echo "qanything_port" $qanything_port -echo "use_cpu" $use_cpu +echo "openai_api_base: " $openai_api_base +echo "openai_api_key: " $openai_api_key +echo "openai_api_model_name: " $openai_api_model_name +echo "openai_api_context_length: " $openai_api_context_length +echo "workers: " $workers +echo "milvus_port: " $milvus_port +echo "qanything_port: " $qanything_port +echo "use_cpu: " $use_cpu # 检查 Conda 是否安装,如果安装就执行一下逻辑,使用原有的conda进行安装 @@ -25,7 +25,7 @@ if command -v conda >/dev/null 2>&1; then echo "Checking for Conda updates..." if conda update --no-deps --dry-run -n base -c defaults conda | grep -q "will be updated"; then echo "An update is available for Conda." - echo "Would you like to update it? (y/n)" + echo "是否更新 Codna 为新版本 (y/n)" read -r user_response if [[ "$user_response" =~ ^[Yy] ]]; then @@ -51,6 +51,7 @@ if command -v conda >/dev/null 2>&1; then echo "Conda environment '$ENV_NAME' created successfully." else echo "Failed to create Conda environment '$ENV_NAME'." + echo "创建conda环境失败 '$ENV_NAME'." exit 1 fi fi @@ -64,6 +65,7 @@ if command -v conda >/dev/null 2>&1; then source "$CONDA_INSTALL_PATH/bin/activate" "$ENV_NAME" if [ $? -ne 0 ]; then echo "Failed to activate conda environment '$ENV_NAME'." + echo "激活conda环境失败 '$ENV_NAME'." exit 1 fi echo "Conda environment '$ENV_NAME' activated." @@ -151,7 +153,7 @@ if command -v conda &> /dev/null; then echo "Checking for Conda updates..." if conda update --no-deps --dry-run -n base -c defaults conda | grep -q "will be updated"; then echo "An update is available for Conda." - echo "Would you like to update it? (y/n)" + echo "是否更新 Codna 为新版本 (y/n)" read -r user_response if [[ "$user_response" =~ ^[Yy] ]]; then @@ -167,7 +169,7 @@ if command -v conda &> /dev/null; then else echo "Conda is not installed. Proceeding with a fresh installation." # 下载 Anaconda 安装程序 - echo "Downloading Anaconda installer for $OS_TYPE..." + echo "下载Anaconda安装程序到 $OS_TYPE..." wget "$INSTALLER_URL" -O "anaconda_installer.sh" # 提示用户输入以确认安装 @@ -194,6 +196,7 @@ else fi # 仅在脚本运行期间设置局部环境变量,不修改全局环境变量 + echo "仅在脚本运行期间为conda设置局部环境变量,不修改全局环境变量" echo "Setting up local environment for Anaconda access." export PATH="$INSTALL_PATH/bin:$PATH" diff --git a/scripts/run_for_GPU.sh b/scripts/run_for_GPU.sh index 64dafc26..d890e2f9 100755 --- a/scripts/run_for_GPU.sh +++ b/scripts/run_for_GPU.sh @@ -8,14 +8,14 @@ openai_api_base_with_key=$(python config.py) # 使用 read 命令分割字符串 read -r openai_api_base openai_api_key openai_api_model_name openai_api_context_length workers milvus_port qanything_port use_cpu <<< "$openai_api_base_with_key" -echo "openai_api_base" $openai_api_base -echo "openai_api_key" $openai_api_key -echo "openai_api_model_name" $openai_api_model_name -echo "openai_api_context_length" $openai_api_context_length -echo "workers" $workers -echo "milvus_port" $milvus_port -echo "qanything_port" $qanything_port -echo "use_cpu" $use_cpu +echo "openai_api_base: " $openai_api_base +echo "openai_api_key: " $openai_api_key +echo "openai_api_model_name: " $openai_api_model_name +echo "openai_api_context_length: " $openai_api_context_length +echo "workers: " $workers +echo "milvus_port: " $milvus_port +echo "qanything_port: " $qanything_port +echo "use_cpu: " $use_cpu # 检查 Conda 是否安装,如果安装就执行一下逻辑,使用原有的conda进行安装 @@ -25,7 +25,7 @@ if command -v conda >/dev/null 2>&1; then echo "Checking for Conda updates..." if conda update --no-deps --dry-run -n base -c defaults conda | grep -q "will be updated"; then echo "An update is available for Conda." - echo "Would you like to update it? (y/n)" + echo "是否更新 Codna 为新版本 (y/n)" read -r user_response if [[ "$user_response" =~ ^[Yy] ]]; then @@ -51,6 +51,7 @@ if command -v conda >/dev/null 2>&1; then echo "Conda environment '$ENV_NAME' created successfully." else echo "Failed to create Conda environment '$ENV_NAME'." + echo "创建conda环境失败 '$ENV_NAME'." exit 1 fi fi @@ -64,6 +65,7 @@ if command -v conda >/dev/null 2>&1; then source "$CONDA_INSTALL_PATH/bin/activate" "$ENV_NAME" if [ $? -ne 0 ]; then echo "Failed to activate conda environment '$ENV_NAME'." + echo "激活conda环境失败 '$ENV_NAME'." exit 1 fi echo "Conda environment '$ENV_NAME' activated." @@ -151,7 +153,7 @@ if command -v conda &> /dev/null; then echo "Checking for Conda updates..." if conda update --no-deps --dry-run -n base -c defaults conda | grep -q "will be updated"; then echo "An update is available for Conda." - echo "Would you like to update it? (y/n)" + echo "是否更新 Codna 为新版本 (y/n)" read -r user_response if [[ "$user_response" =~ ^[Yy] ]]; then @@ -167,7 +169,7 @@ if command -v conda &> /dev/null; then else echo "Conda is not installed. Proceeding with a fresh installation." # 下载 Anaconda 安装程序 - echo "Downloading Anaconda installer for $OS_TYPE..." + echo "下载Anaconda安装程序到 $OS_TYPE..." wget "$INSTALLER_URL" -O "anaconda_installer.sh" # 提示用户输入以确认安装 @@ -194,6 +196,7 @@ else fi # 仅在脚本运行期间设置局部环境变量,不修改全局环境变量 + echo "仅在脚本运行期间为conda设置局部环境变量,不修改全局环境变量" echo "Setting up local environment for Anaconda access." export PATH="$INSTALL_PATH/bin:$PATH" From 64a07f0e2cc76eabee7c17fdd1d4b0644949379c Mon Sep 17 00:00:00 2001 From: ikun-moxiaofei <1091909200@qq.com> Date: Mon, 24 Jun 2024 11:42:48 +0800 Subject: [PATCH 3/8] fix: Replace the len() function used to determine the file length when splitting the file twice with num_tokens --- qanything_kernel/core/local_file.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/qanything_kernel/core/local_file.py b/qanything_kernel/core/local_file.py index c8f7b1f1..6b504ed0 100644 --- a/qanything_kernel/core/local_file.py +++ b/qanything_kernel/core/local_file.py @@ -224,7 +224,7 @@ def split_file_to_docs(self, ocr_engine: Callable, sentence_size=SENTENCE_SIZE, new_docs.append(doc) else: last_doc = new_docs[-1] - if len(last_doc.page_content) + len(doc.page_content) < min_length: + if num_tokens(last_doc.page_content) + num_tokens(doc.page_content) < min_length: last_doc.page_content += '\n' + doc.page_content else: new_docs.append(doc) From dcf36f98c2fc0fe463178c76eb0aac58a7acdb2b Mon Sep 17 00:00:00 2001 From: ikun-moxiaofei <1091909200@qq.com> Date: Mon, 24 Jun 2024 16:38:52 +0800 Subject: [PATCH 4/8] fix: Optimize the document segmentation function of online search, and also optimize the information display of failed document uploads. --- .../connector/database/mysql/mysql_client.py | 23 +++++++++++--- qanything_kernel/core/local_doc_qa.py | 31 ++++++++++++++++--- qanything_kernel/qanything_server/handler.py | 3 +- 3 files changed, 47 insertions(+), 10 deletions(-) diff --git a/qanything_kernel/connector/database/mysql/mysql_client.py b/qanything_kernel/connector/database/mysql/mysql_client.py index e47b58a2..0adf355f 100644 --- a/qanything_kernel/connector/database/mysql/mysql_client.py +++ b/qanything_kernel/connector/database/mysql/mysql_client.py @@ -70,6 +70,21 @@ def create_tables_(self): """ self.execute_query_(query, (), commit=True) + # 旧的File不存在reason,补上默认值:' ' + # 如果存在File表,但是没有reason字段,那么添加reason字段 + query = "PRAGMA table_info(File)" + result = self.execute_query_(query, (), fetch=True) + if result: + reason_exist = False + for column_info in result: + if column_info[1] == 'reason': + reason_exist = True + break + if not reason_exist: + query = "ALTER TABLE File ADD COLUMN reason VARCHAR(512) DEFAULT ' '" + self.execute_query_(query, (), commit=True) + + query = """ CREATE TABLE IF NOT EXISTS Document ( docstore_id VARCHAR(64) PRIMARY KEY, @@ -424,9 +439,9 @@ def update_chunk_size(self, file_id, chunk_size): query = "UPDATE File SET chunk_size = ? WHERE file_id = ?" self.execute_query_(query, (chunk_size, file_id), commit=True) - def update_file_status(self, file_id, status): - query = "UPDATE File SET status = ? WHERE file_id = ?" - self.execute_query_(query, (status, file_id), commit=True) + def update_file_status(self, file_id, status, reason): + query = "UPDATE File SET status = ?, reason = ? WHERE file_id = ?" + self.execute_query_(query, (status, reason, file_id), commit=True) def from_status_to_status(self, file_ids, from_status, to_status): file_ids_str = ','.join("'{}'".format(str(x)) for x in file_ids) @@ -436,7 +451,7 @@ def from_status_to_status(self, file_ids, from_status, to_status): # [文件] 获取指定知识库下面所有文件的id和名称 def get_files(self, user_id, kb_id): - query = "SELECT file_id, file_name, status, file_size, content_length, timestamp FROM File WHERE kb_id = ? AND kb_id IN (SELECT kb_id FROM KnowledgeBase WHERE user_id = ?) AND deleted = 0" + query = "SELECT file_id, file_name, status, file_size, content_length, timestamp, reason FROM File WHERE kb_id = ? AND kb_id IN (SELECT kb_id FROM KnowledgeBase WHERE user_id = ?) AND deleted = 0" return self.execute_query_(query, (kb_id, user_id), fetch=True) def get_file_path(self, file_id): diff --git a/qanything_kernel/core/local_doc_qa.py b/qanything_kernel/core/local_doc_qa.py index f36313a4..4b2d44fe 100644 --- a/qanything_kernel/core/local_doc_qa.py +++ b/qanything_kernel/core/local_doc_qa.py @@ -1,3 +1,5 @@ +from langchain.text_splitter import RecursiveCharacterTextSplitter + from qanything_kernel.configs.model_config import VECTOR_SEARCH_TOP_K, CHUNK_SIZE, VECTOR_SEARCH_SCORE_THRESHOLD, \ PROMPT_TEMPLATE, STREAMING, OCR_MODEL_PATH from typing import List @@ -11,13 +13,14 @@ from qanything_kernel.utils.custom_log import debug_logger, qa_logger from qanything_kernel.core.tools.web_search_tool import duckduckgo_search from qanything_kernel.dependent_server.ocr_server.ocr import OCRQAnything -from qanything_kernel.utils.general_utils import num_tokens +from qanything_kernel.utils.general_utils import num_tokens, get_time from .local_file import LocalFile import traceback import base64 import numpy as np import platform import cv2 +import re class LocalDocQA: @@ -34,6 +37,12 @@ def __init__(self): self.mode: str = None self.use_cpu: bool = True self.model: str = None + self.web_splitter = RecursiveCharacterTextSplitter( + separators=["\n\n", "\n", "。", "!", "!", "?", "?", ";", ";", "……", "…", "、", ",", ",", " ", ""], + chunk_size=800, + chunk_overlap=200, + length_function=num_tokens, + ) def get_ocr_result(self, input: dict): img_file = input['img64'] @@ -94,9 +103,13 @@ async def insert_files_to_faiss(self, user_id, kb_id, local_files: List[LocalFil except Exception as e: error_info = f'split error: {traceback.format_exc()}' debug_logger.error(error_info) - self.mysql_client.update_file_status(local_file.file_id, status='red') + self.mysql_client.update_file_status(local_file.file_id, status='red', reason='split或embedding失败,请检查文件类型,仅支持[md,txt,pdf,jpg,png,jpeg,docx,xlsx,pptx,eml,csv]') failed_list.append(local_file) continue + if len(local_file.docs) == 0: + self.mysql_client.update_file_status(local_file.file_id, status='red', reason='上传文件内容为空,请检查文件内容') + debug_logger.info(f'上传文件内容为空,请检查文件内容') + continue end = time.time() self.mysql_client.update_content_length(local_file.file_id, content_length) debug_logger.info(f'split time: {end - start} {len(local_file.docs)}') @@ -104,7 +117,7 @@ async def insert_files_to_faiss(self, user_id, kb_id, local_files: List[LocalFil add_ids = await self.faiss_client.add_document(local_file.docs) insert_time = time.time() debug_logger.info(f'insert time: {insert_time - end}') - self.mysql_client.update_file_status(local_file.file_id, status='green') + self.mysql_client.update_file_status(local_file.file_id, status='green', reason=" ") success_list.append(local_file) debug_logger.info( f"insert_to_faiss: success num: {len(success_list)}, failed num: {len(failed_list)}") @@ -125,6 +138,7 @@ async def local_doc_search(self, query, kb_ids): debug_logger.info(f"local doc search retrieval_documents: {retrieval_documents}") return retrieval_documents + @get_time def get_web_search(self, queries, top_k=None): if not top_k: top_k = self.top_k @@ -132,12 +146,19 @@ def get_web_search(self, queries, top_k=None): web_content, web_documents = duckduckgo_search(query) source_documents = [] for doc in web_documents: + debug_logger.info(doc) doc.metadata['retrieval_query'] = query # 添加查询到文档的元数据中 + file_name = re.sub(r'[\uFF01-\uFF5E\u3000-\u303F]', '', doc.metadata['title']) + doc.metadata['file_name'] = file_name + '.web' + doc.metadata['file_url'] = doc.metadata['source'] + doc.metadata['embed_version'] = self.embeddings.embed_version source_documents.append(doc) + if 'description' in doc.metadata: + desc_doc = Document(page_content=doc.metadata['description'], metadata=doc.metadata) + source_documents.append(desc_doc) + source_documents = self.web_splitter.split_documents(source_documents) return web_content, source_documents - - def web_page_search(self, query, top_k=None): # 防止get_web_search调用失败,需要try catch try: diff --git a/qanything_kernel/qanything_server/handler.py b/qanything_kernel/qanything_server/handler.py index 82ed2b43..b1f85382 100644 --- a/qanything_kernel/qanything_server/handler.py +++ b/qanything_kernel/qanything_server/handler.py @@ -191,12 +191,13 @@ async def list_docs(req: request): 'yellow': "faiss插入失败,请稍后再试", 'green': "上传成功"} for file_info in file_infos: status = file_info[2] + print(file_info) if status not in status_count: status_count[status] = 1 else: status_count[status] += 1 data.append({"file_id": file_info[0], "file_name": file_info[1], "status": file_info[2], "bytes": file_info[3], - "content_length": file_info[4], "timestamp": file_info[5], "msg": msg_map[file_info[2]]}) + "content_length": file_info[4], "timestamp": file_info[5], "msg": file_info[6]}) file_name = file_info[1] file_id = file_info[0] if file_name.endswith('.faq'): From 7853012cb3f9dd3205911e11f26bccf47c007e4f Mon Sep 17 00:00:00 2001 From: xixihahaliu <141105427+xixihahaliu@users.noreply.github.com> Date: Mon, 24 Jun 2024 17:25:18 +0800 Subject: [PATCH 5/8] Update local_doc_qa.py Delete too long logs. --- qanything_kernel/core/local_doc_qa.py | 1 - 1 file changed, 1 deletion(-) diff --git a/qanything_kernel/core/local_doc_qa.py b/qanything_kernel/core/local_doc_qa.py index 4b2d44fe..d7d1ed25 100644 --- a/qanything_kernel/core/local_doc_qa.py +++ b/qanything_kernel/core/local_doc_qa.py @@ -146,7 +146,6 @@ def get_web_search(self, queries, top_k=None): web_content, web_documents = duckduckgo_search(query) source_documents = [] for doc in web_documents: - debug_logger.info(doc) doc.metadata['retrieval_query'] = query # 添加查询到文档的元数据中 file_name = re.sub(r'[\uFF01-\uFF5E\u3000-\u303F]', '', doc.metadata['title']) doc.metadata['file_name'] = file_name + '.web' From 5fd7bc3299f71976d73e3fbbdad2fcea23645744 Mon Sep 17 00:00:00 2001 From: xixihahaliu <141105427+xixihahaliu@users.noreply.github.com> Date: Mon, 24 Jun 2024 17:27:31 +0800 Subject: [PATCH 6/8] Update handler.py Delete print log --- qanything_kernel/qanything_server/handler.py | 1 - 1 file changed, 1 deletion(-) diff --git a/qanything_kernel/qanything_server/handler.py b/qanything_kernel/qanything_server/handler.py index b1f85382..e6705792 100644 --- a/qanything_kernel/qanything_server/handler.py +++ b/qanything_kernel/qanything_server/handler.py @@ -191,7 +191,6 @@ async def list_docs(req: request): 'yellow': "faiss插入失败,请稍后再试", 'green': "上传成功"} for file_info in file_infos: status = file_info[2] - print(file_info) if status not in status_count: status_count[status] = 1 else: From 1fb1c7890459266671560994e850c9cc1485cdb7 Mon Sep 17 00:00:00 2001 From: xixihahaliu Date: Tue, 25 Jun 2024 18:23:25 +0800 Subject: [PATCH 7/8] fix: Parameter usage error. --- qanything_kernel/core/local_file.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/qanything_kernel/core/local_file.py b/qanything_kernel/core/local_file.py index 09025107..d973a1f7 100644 --- a/qanything_kernel/core/local_file.py +++ b/qanything_kernel/core/local_file.py @@ -170,7 +170,7 @@ def split_file_to_docs(self, ocr_engine: Callable, sentence_size=SENTENCE_SIZE, else: try: from qanything_kernel.utils.loader.self_pdf_loader import PdfLoader - loader = PdfLoader(filename=self.file_path, root_dir=os.path.dirname(self.file_path)) + loader = PdfLoader(filename=self.file_path, save_dir=os.path.dirname(self.file_path)) markdown_dir = loader.load_to_markdown() docs = convert_markdown_to_langchaindoc(markdown_dir) docs = self.pdf_process(docs) From e9247c894cf9ed02612a8e897ee2fb35f78245cf Mon Sep 17 00:00:00 2001 From: xixihahaliu Date: Thu, 11 Jul 2024 11:00:48 +0800 Subject: [PATCH 8/8] refactor: Optimize log information, correct variable name errors. --- .../connector/embedding/embedding_onnx_backend.py | 1 + qanything_kernel/core/local_file.py | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/qanything_kernel/connector/embedding/embedding_onnx_backend.py b/qanything_kernel/connector/embedding/embedding_onnx_backend.py index 7eae6b5b..3aa4cf8d 100644 --- a/qanything_kernel/connector/embedding/embedding_onnx_backend.py +++ b/qanything_kernel/connector/embedding/embedding_onnx_backend.py @@ -21,6 +21,7 @@ def __init__(self, use_cpu: bool = False): def get_embedding(self, sentences, max_length): inputs_onnx = self._tokenizer(sentences, padding=True, truncation=True, max_length=max_length, return_tensors=self.return_tensors) + debug_logger.info(f'embedding input shape: {inputs_onnx["input_ids"].shape}') inputs_onnx = {k: v for k, v in inputs_onnx.items()} start_time = time.time() outputs_onnx = self._session.run(output_names=['output'], input_feed=inputs_onnx) diff --git a/qanything_kernel/core/local_file.py b/qanything_kernel/core/local_file.py index d973a1f7..2efb89b1 100644 --- a/qanything_kernel/core/local_file.py +++ b/qanything_kernel/core/local_file.py @@ -122,9 +122,9 @@ def table_process(doc): return new_docs @staticmethod - def pdf_process(dos: List[Document]): + def pdf_process(docs: List[Document]): new_docs = [] - for doc in dos: + for doc in docs: # metadata={'title_lst': ['#樊昊天个人简历', '##教育经历'], 'has_table': False} title_lst = doc.metadata['title_lst'] # 删除所有仅有多个#的title