Merge pull request #102 from netease-youdao/develop_for_v1.2.0

xixihahaliu · web-flow · commit 20a9372fb68b · 2024-02-04T18:56:51.000+08:00
Fix known issues, optimize user experience.
diff --git a/FAQ_zh.md b/FAQ_zh.md
@@ -61,3 +61,12 @@ We recommend to activate the WSL integration in Docker Desktop settings.
 
 ## 执行run.sh时拉取镜像失败： ⠼ error getting credentials - err: exit status 1, out: `error getting credentials - err: exit status 1, out: `A specified logon session does not exist. It may already have been terminated.``
 - 解决方案：尝试手动拉取镜像，如执行：docker pull milvusdb/milvus:v2.3.4，然后再重新执行bash run.sh
+
+
+## 执行run.sh时报错：Illegal instruction (core dumped)，或OCR服务无法启动
+- 原因：cpu不支持avx指令集，而PaddleOCR依赖avx指令集，导致无法启动OCR服务
+- 解决方案：可进入容器后执行"cat /proc/cpuinfo | grep -i avx"，如果没有输出，则说明cpu不支持avx指令集，可参考paddle官网的解决办法，在容器内重装paddleocr：https://www.paddlepaddle.org.cn/en/install/quick?docurl=/documentation/docs/en/install/pip/linux-pip_en.html#old-version-anchor-5-Choose%20CPU%2FGPU
+```
+# If you want to install the Paddle package with avx and openblas, you can use the following command to download the wheel package to the local, and then use python3 -m pip install [name].whl to install locally ([name] is the name of the wheel package):
+python3 -m pip download paddlepaddle==2.6.0 -f https://www.paddlepaddle.org.cn/whl/linux/openblas/avx/stable.html --no-index --no-deps
+```
diff --git a/README.md b/README.md
@@ -41,6 +41,8 @@
 - [What is QAnything](#What-is-QAnything)
   - [Key features](#Key-features)
   - [Architecture](#Architecture)
+- [Before You Start](#Before-You-Start)
+- [Latest Updates](#-Latest-Updates)
 - [Getting Started](#getting-started)
   - [Prerequisites](#prerequisites)
   - [Installation](#installation)
@@ -137,6 +139,12 @@ If you need to use it for commercial purposes, please follow the license of Qwen
 * [🛠️ Only use our BCEmbedding(embedding & rerank)](https://github.com/netease-youdao/BCEmbedding)
 * [📖 FAQ](FAQ_zh.md)
 
+## 🚀 Latest Updates
+
+- ***2024-01-29***: **Support for custom large models, including OpenAI API and other open-source large models, with a minimum GPU requirement of GTX 1050Ti, greatly improving deployment, debugging, and user experience.** - See More👉 [v1.2.0](https://github.com/netease-youdao/QAnything/releases/tag/v1.2.0)
+- ***2024-01-23***: **Enable rerank by default and fix various issues when starting on Windows.** - See More👉 [v1.1.1](https://github.com/netease-youdao/QAnything/releases/tag/v1.1.1)
+- ***2024-01-18***: **Support one-click startup, support Windows deployment, improve PDF, XLSX, HTML parsing efficiency.** - See More👉 [v1.1.0](https://github.com/netease-youdao/QAnything/releases/tag/v1.1.0)
+
 ## Getting Started
 
 ### Prerequisites
diff --git a/README_zh.md b/README_zh.md
@@ -36,6 +36,8 @@
 - [什么是QAnything](#什么是QAnything)
   - [特点](#特点)
   - [架构](#架构)
+- [开始之前](#开始之前)
+- [最近更新](#-最近更新-)
 - [开始](#开始)
   - [必要条件](#必要条件)
   - [下载安装](#下载安装)
@@ -128,6 +130,12 @@ QAnything使用的检索组件[BCEmbedding](https://github.com/netease-youdao/BC
 * [🛠️ 想只使用BCEmbedding(embedding & rerank)](https://github.com/netease-youdao/BCEmbedding)
 * [📖 常见问题](FAQ_zh.md)
 
+## 🚀 最近更新 
+
+- ***2024-01-29***: **支持自定义大模型，包括OpenAI API和其他开源大模型，GPU需求最低降至GTX 1050Ti，极大提升部署，调试等方面的用户体验** - 详见👉 [v1.2.0](https://github.com/netease-youdao/QAnything/releases/tag/v1.2.0)
+- ***2024-01-23***: **默认开启rerank，修复在windows上启动时存在的各类问题** - 详见👉 [v1.1.1](https://github.com/netease-youdao/QAnything/releases/tag/v1.1.1)
+- ***2024-01-18***: **支持一键启动，支持windows部署，提升pdf，xlsx，html解析效果** - 详见👉 [v1.1.0](https://github.com/netease-youdao/QAnything/releases/tag/v1.1.0)
+
 ## 开始
 
 ### 必要条件
diff --git a/qanything_kernel/connector/llm/llm_for_openai_api.py b/qanything_kernel/connector/llm/llm_for_openai_api.py
@@ -151,7 +151,7 @@ def _call(self, prompt: str, history: List[List[str]], streaming: bool=False) ->
                     top_p=self.top_p,
                     stop=[self.stop_words] if self.stop_words is not None else None,
                 )
-
+                logging.info(f"OPENAI RES: {response}")
                 for event in response:
                     if not isinstance(event, dict):
                         event = event.model_dump()
diff --git a/run.sh b/run.sh
@@ -51,6 +51,40 @@ Note: You can choose the most suitable Service Startup Command based on your own
   exit 1
 }
 
+# 检查master分支是否有新代码
+# 定义颜色
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+NC='\033[0m' # No Color
+
+# 定义醒目的提示信息
+print_important_notice() {
+    echo -e "${YELLOW}====================================================${NC}"
+    echo -e "${YELLOW}******************** 重要提示 ********************${NC}"
+    echo -e "${YELLOW}====================================================${NC}"
+    echo
+    echo -e "${RED}检测到master分支有新的代码更新，请执行 git pull 来同步最新的代码。${NC}"
+    echo
+    sleep 5
+}
+
+# 获取最新的远程仓库信息
+git fetch origin master
+
+# 获取本地master分支的最新提交
+LOCAL=$(git rev-parse master)
+# 获取远程master分支的最新提交
+REMOTE=$(git rev-parse origin/master)
+
+if [ $LOCAL != $REMOTE ]; then
+    # 本地分支与远程分支不一致，需要更新
+    print_important_notice
+else
+    echo -e "${GREEN}当前master分支已是最新，无需更新。${NC}"
+fi
+
+
 llm_api="local"
 device_id="0"
 runtime_backend="default"
@@ -109,8 +143,6 @@ if ! [[ $gpu_id1 =~ ^[0-9]+$ ]] || ! [[ $gpu_id2 =~ ^[0-9]+$ ]]; then
     exit 1
 fi
 
-# echo "GPUID1=${gpu_id1}" >> .env
-# echo "GPUID2=${gpu_id2}" >> .env
 update_or_append_to_env "GPUID1" "$gpu_id1"
 update_or_append_to_env "GPUID2" "$gpu_id2"
 
@@ -142,7 +174,6 @@ else
     OCR_USE_GPU="False"
 fi
 echo "OCR_USE_GPU=$OCR_USE_GPU because $compute_capability >= 7.5"
-# echo "OCR_USE_GPU=$OCR_USE_GPU" >> .env
 update_or_append_to_env "OCR_USE_GPU" "$OCR_USE_GPU"
 
 # 使用nvidia-smi命令获取GPU的显存大小（以MiB为单位）
@@ -154,7 +185,28 @@ echo "******************** 重要提示 ********************"
 echo "===================================================="
 echo ""
 
-if [ "$GPU1_MEMORY_SIZE" -lt 8000 ]; then  # 显存小于8GB
+# 使用默认后端且model_size_num不为0
+if [ "$runtime_backend" = "default" ] && [ "$model_size_num" -ne 0 ]; then
+    if [ -z "$gpu_series" ]; then  # 不是Nvidia 30系列或40系列
+        echo "您的显卡型号 $gpu_model 部署默认后端FasterTransformer需要Nvidia RTX 30系列或40系列显卡，将自动为您切换后端："
+        # 如果显存大于等于24GB且计算力大于等于8.6，则可以使用vllm后端
+        if [ "$GPU1_MEMORY_SIZE" -ge 24000 ] && [ $(echo "$compute_capability >= 8.6" | bc) -eq 1 ]; then
+            echo "根据匹配算法，已自动为您切换为vllm后端（推荐）"
+            runtime_backend="vllm"
+        else
+            # 自动切换huggingface后端
+            echo "根据匹配算法，已自动为您切换为huggingface后端"
+            runtime_backend="hf"
+        fi
+    fi
+fi
+
+if [ "$GPU1_MEMORY_SIZE" -lt 4000 ]; then # 显存小于4GB
+    echo "您当前的显存为 $GPU1_MEMORY_SIZE MiB 不足以部署本项目，建议升级到GTX 1050Ti或以上级别的显卡"
+    exit 1
+elif [ "$model_size_num" -eq 0 ]; then  # 模型大小为0B, 表示使用openai api，4G显存就够了
+    echo "您当前的显存为 $GPU1_MEMORY_SIZE MiB 可以使用在线的OpenAI API"
+elif [ "$GPU1_MEMORY_SIZE" -lt 8000 ]; then  # 显存小于8GB
     # 显存小于8GB，仅推荐使用在线的OpenAI API
     echo "您当前的显存为 $GPU1_MEMORY_SIZE MiB 仅推荐使用在线的OpenAI API"
     if [ "$model_size_num" -gt 0 ]; then  # 模型大小大于0B
@@ -229,7 +281,6 @@ elif [ "$GPU1_MEMORY_SIZE" -gt 25000 ]; then  # 显存大于24GB
     OFFCUT_TOKEN=0
 fi
 
-# echo "OFFCUT_TOKEN=$OFFCUT_TOKEN" >> .env
 update_or_append_to_env "OFFCUT_TOKEN" "$OFFCUT_TOKEN"
 
 if [ $llm_api = 'cloud' ]; then
@@ -287,15 +338,6 @@ echo "conv_template is set to [$conv_template]"
 echo "tensor_parallel is set to [$tensor_parallel]"
 echo "gpu_memory_utilization is set to [$gpu_memory_utilization]"
 
-# 写入环境变量.env文件
-#echo "LLM_API=${llm_api}" >> .env
-#echo "DEVICE_ID=$device_id" >> .env
-#echo "RUNTIME_BACKEND=$runtime_backend" >> .env
-#echo "MODEL_NAME=$model_name" >> .env
-#echo "CONV_TEMPLATE=$conv_template" >> .env
-#echo "TP=$tensor_parallel" >> .env
-#echo "GPU_MEM_UTILI=$gpu_memory_utilization" >> .env
-
 update_or_append_to_env "LLM_API" "$llm_api"
 update_or_append_to_env "DEVICE_ID" "$device_id"
 update_or_append_to_env "RUNTIME_BACKEND" "$runtime_backend"
@@ -403,22 +445,26 @@ if [[ -f "$user_file" ]]; then
     read -p "Do you want to use the previous host: $host? (yes/no) 是否使用上次的host: $host？(yes/no) 回车默认选yes，请输入:" use_previous
     use_previous=${use_previous:-yes}
     if [[ $use_previous != "yes" && $use_previous != "是" ]]; then
-        read -p "Are you running the code on a cloud server or on your local machine? (cloud/local) 您是在云服务器上还是本地机器上启动代码？(cloud/local) " answer
+        read -p "Are you running the code on a remote server or on your local machine? (remote/local) 您是在远程服务器上还是本地机器上启动代码？(remote/local) " answer
         if [[ $answer == "local" || $answer == "本地" ]]; then
             host="localhost"
         else
-            read -p "Please enter the server IP address 请输入服务器IP地址(示例：10.234.10.144): " host
+            read -p "Please enter the server IP address 请输入服务器公网IP地址(示例：10.234.10.144): " host
+            echo "当前设置的远程服务器IP地址为 $host, QAnything启动后，本地前端服务（浏览器打开[http://$user_ip:5052/qanything/]）将远程访问[http://$host:8777]上的后端服务，请知悉！"
+            sleep 5
         fi
         # 保存新的配置到用户文件
         echo "$host" > "$user_file"
     fi
 else
     # 如果用户文件不存在，询问用户并保存配置
-    read -p "Are you running the code on a cloud server or on your local machine? (cloud/local) 您是在云服务器上还是本地机器上启动代码？(cloud/local) " answer
+    read -p "Are you running the code on a remote server or on your local machine? (remotelocal) 您是在云服务器上还是本地机器上启动代码？(remote/local) " answer
     if [[ $answer == "local" || $answer == "本地" ]]; then
         host="localhost"
     else
-        read -p "Please enter the server IP address 请输入服务器IP地址(示例：10.234.10.144): " host
+        read -p "Please enter the server IP address 请输入服务器公网IP地址(示例：10.234.10.144): " host
+        echo "当前设置的远程服务器IP地址为 $host, QAnything启动后，本地前端服务（浏览器打开[http://$user_ip:5052/qanything/]）将远程访问[http://$host:8777]上的后端服务，请知悉！"
+        sleep 5
     fi
     # 保存配置到用户文件
     echo "$host" > "$user_file"
diff --git a/scripts/run_for_cloud_option.sh b/scripts/run_for_cloud_option.sh
@@ -15,6 +15,26 @@ update_or_append_to_env() {
   fi
 }
 
+function check_log_errors() {
+    local log_file=$1  # 将第一个参数赋值给变量log_file，表示日志文件的路径
+
+    # 检查日志文件是否存在
+    if [[ ! -f "$log_file" ]]; then
+        echo "指定的日志文件不存在: $log_file"
+        return 1
+    fi
+
+    # 使用grep命令检查"core dumped"或"Error"的存在
+    # -C 5表示打印匹配行的前后各5行
+    local pattern="core dumped|Error|error"
+    if grep -E -C 5 "$pattern" "$log_file"; then
+        echo "检测到错误信息，请查看上面的输出。"
+        exit 1
+    else
+        echo "$log_file 中未检测到明确的错误信息。请手动排查 $log_file 以获取更多信息。"
+    fi
+}
+
 script_name=$(basename "$0")
 
 usage() {
@@ -127,8 +147,6 @@ CUDA_VISIBLE_DEVICES=$gpuid1 nohup /opt/tritonserver/bin/tritonserver --model-st
 
 update_or_append_to_env "EMBED_PORT" "9001"
 update_or_append_to_env "RERANK_PORT" "9001"
-# echo "RERANK_PORT=9001" >> /workspace/qanything_local/.env
-# echo "EMBED_PORT=9001" >> /workspace/qanything_local/.env
 
 cd /workspace/qanything_local || exit
 nohup python3 -u qanything_kernel/dependent_server/rerank_for_local_serve/rerank_server.py > /workspace/qanything_local/logs/debug_logs/rerank_server.log 2>&1 &
@@ -144,8 +162,6 @@ echo "The qanything backend service is ready! (4/8)"
 echo "qanything后端服务已就绪! (4/8)"
 
 
-timeout_time=300  # npm下载超时时间300秒，triton_server启动超时时间600秒
-
 env_file="/workspace/qanything_local/front_end/.env.production"
 user_file="/workspace/qanything_local/user.config"
 user_ip=$(cat "$user_file")
@@ -165,11 +181,11 @@ cd /workspace/qanything_local/front_end || exit
 # 安装依赖
 echo "Waiting for [npm run install]（5/8)"
 npm config set registry https://registry.npmmirror.com
-timeout $timeout_time npm install
+timeout 180 npm install
 if [ $? -eq 0 ]; then
     echo "[npm run install] Installed successfully（5/8)"
 elif [ $? -eq 124 ]; then
-    echo "npm install 下载超时，可能是网络问题，请修改 npm 代理。"
+    echo "npm install 下载超时(180秒)，可能是网络问题，请修改 npm 代理。"
     exit 1
 else
     echo "Failed to install npm dependencies."
@@ -178,9 +194,12 @@ fi
 
 # 构建前端项目
 echo "Waiting for [npm run build](6/8)"
-npm run build
+timeout 180 npm run build
 if [ $? -eq 0 ]; then
     echo "[npm run build] build successfully(6/8)"
+elif [ $? -eq 124 ]; then
+    echo "npm run build 编译超时(180秒)，请查看上面的输出。"
+    exit 1
 else
     echo "Failed to build the front end."
     exit 1
@@ -190,7 +209,7 @@ fi
 nohup npm run serve 1>/workspace/qanything_local/logs/debug_logs/npm_server.log 2>&1 &
 
 # 监听前端页面服务
-tail -f npm_server.log &
+tail -f /workspace/qanything_local/logs/debug_logs/npm_server.log &
 
 front_end_start_time=$(date +%s)
 
@@ -212,41 +231,54 @@ done
 echo "The front-end service is ready!...(7/8)"
 echo "前端服务已就绪!...(7/8)"
 
-current_time=$(date +%s)
-elapsed=$((current_time - start_time))  # 计算经过的时间（秒）
-echo "Time elapsed: ${elapsed} seconds."
-echo "已耗时: ${elapsed} 秒."
+embed_rerank_log_file="/workspace/qanything_local/logs/debug_logs/embed_rerank_tritonserver.log"
+
+tail -f $embed_rerank_log_file &  # 后台输出日志文件
+tail_pid=$!  # 获取tail命令的进程ID
 
+now_time=$(date +%s)
 while true; do
+    current_time=$(date +%s)
+    elapsed_time=$((current_time - now_time))
+
     if [ "$runtime_backend" = "default" ]; then
         response_embed_rerank=$(curl -s -w "%{http_code}" http://localhost:9000/v2/health/ready -o /dev/null)
         echo "health response_embed_rerank = $response_embed_rerank"
 
+        # 检查是否超时
+        if [ $elapsed_time -ge 60 ]; then
+            kill $tail_pid  # 关闭后台的tail命令
+            echo "启动 embedding and rerank 服务超时，自动检查 $embed_rerank_log_file 中是否存在Error..."
+
+            check_log_errors $embed_rerank_log_file
+
+            exit 1
+        fi
+
         if [ "$response_embed_rerank" -eq 200 ]; then
-            echo "The llm service is ready!, now you can use the qanything service. (8/8)"
-            echo "LLM 服务已准备就绪！现在您可以使用qanything服务。（8/8)"
+            kill $tail_pid  # 关闭后台的tail命令
+            echo "The embedding and rerank service is ready!, now you can use the qanything service. (8/8)"
+            echo "Embedding 和 Rerank 服务已准备就绪！现在您可以使用qanything服务。（8/8)"
             break
-        else
-            echo "The llm service is starting up, it can be long... you have time to make a coffee :)"
-            echo "LLM 服务正在启动，可能需要一段时间...你有时间去冲杯咖啡 :)"
-
-            current_time=$(date +%s)
-            elapsed_time=$((current_time - start_time))
-
-            # 检查是否超时
-            if [ $elapsed_time -ge $((timeout_time * 2)) ]; then
-                echo "启动 LLM 服务超时，请检查项目根目录下 logs/debug_logs/embed_rerank_tritonserver.log 以获取更多信息。"
-                exit 1
-            fi
-            sleep 5
         fi
+
+        echo "The embedding and rerank service is starting up, it can be long... you have time to make a coffee :)"
+        echo "Embedding and Rerank 服务正在启动，可能需要一段时间...你有时间去冲杯咖啡 :)"
+        sleep 10
+
     else
         # cloud版本runtime只支持default
         echo "runtime_backend only support default in cloud version."
         exit 1
     fi
 done
 
+echo "开始检查日志文件中的错误信息..."
+# 调用函数并传入日志文件路径
+check_log_errors "/workspace/qanything_local/logs/debug_logs/rerank_server.log"
+check_log_errors "/workspace/qanything_local/logs/debug_logs/ocr_server.log"
+check_log_errors "/workspace/qanything_local/logs/debug_logs/sanic_api.log"
+
 current_time=$(date +%s)
 elapsed=$((current_time - start_time))  # 计算经过的时间（秒）
 echo "Time elapsed: ${elapsed} seconds."
diff --git a/scripts/run_for_local_option.sh b/scripts/run_for_local_option.sh

Original file line number	Diff line number	Diff line change
`@@ -151,7 +151,7 @@ def _call(self, prompt: str, history: List[List[str]], streaming: bool=False) ->`
`151`	`151`	`top_p=self.top_p,`
`152`	`152`	`stop=[self.stop_words] if self.stop_words is not None else None,`
`153`	`153`	`)`
`154`		`-`
	`154`	`+ logging.info(f"OPENAI RES: {response}")`
`155`	`155`	`for event in response:`
`156`	`156`	`if not isinstance(event, dict):`
`157`	`157`	`event = event.model_dump()`