update readme and fixed bug in ngpu (#2451)

* update readme and fixed ngpu bug * update png in readme * update readme and FT web ttsText
PaddlePaddle · Sep 26, 2022 · e656225 · e656225
1 parent b058a2d
commit e656225
Show file tree

Hide file tree

Showing 18 changed files with 95 additions and 635 deletions.
diff --git a/demos/speech_web/README.md b/demos/speech_web/README.md
@@ -28,14 +28,15 @@ Paddle Speech Demo 是一个以 PaddleSpeech 的语音交互功能为主体开
 
 运行效果：
 
- ![效果](https://user-images.githubusercontent.com/30135920/191188766-12e7ca15-f7b4-45f8-9da5-0c0b0bbe5fcb.png)
+ ![效果](https://user-images.githubusercontent.com/30135920/192155349-9ef93d20-730b-413d-8d50-412fedf11d4b.png)
 
 
 
 ## 基础环境安装
 
 ### 后端环境安装
 ```bash 
+# 需要先安装 PaddleSpeech
 cd speech_server
 pip install -r requirements.txt -i https://mirror.baidu.com/pypi/simple
 cd ../
@@ -44,6 +45,8 @@ cd ../
 ### 前端环境安装
 前端依赖 `node.js` ，需要提前安装，确保 `npm` 可用，`npm` 测试版本 `8.3.1`，建议下载[官网](https://nodejs.org/en/)稳定版的 `node.js`
 
+如果因为网络问题，无法下载依赖库，可以参考 FAQ 部分，`npm / yarn 下载速度慢问题`
+
 ```bash
 # 进入前端目录
 cd web_client
@@ -70,7 +73,7 @@ mkdir -p source/model
 cd source/model
 # 下载IE模型
 wget https://bj.bcebos.com/paddlenlp/applications/speech-cmd-analysis/finetune/model_state.pdparams
-cd ../../
+cd ../../../
 
 ```
 #### 启动后端服务
@@ -84,6 +87,10 @@ python main.py --port 8010
 
 ### 启动 `vc.py` 后端服务
 
+参照下面的步骤自行配置项目所需环境。
+
+Aistudio 在线体验小样本合成后端功能：[【PaddleSpeech进阶】PaddleSpeech小样本合成方案体验](https://aistudio.baidu.com/aistudio/projectdetail/4573549?sUid=2470186&shared=1&ts=1664174385948)
+
 #### 下载相关模型和音频
 
 ```bash
@@ -172,8 +179,19 @@ cd web_client
 yarn dev --port 8011
 ```
 
-默认配置下，前端中配置的后台地址信息是 localhost，确保后端服务器和打开页面的游览器在同一台机器上，不在一台机器的配置方式见下方的 FAQ：【后端如果部署在其它机器或者别的端口如何修改】
+默认配置下，前端配置的后台地址信息是 `localhost`，确保后端服务器和打开页面的游览器在同一台机器上，不在一台机器的配置方式见下方的 FAQ：【后端如果部署在其它机器或者别的端口如何修改】
+
+#### 关于前端的一些说明
+
+为了方便后期的维护，这里并没有给出打包好的 HTML 文件，而是 Vue3 的项目，使用 `yarn dev --port 8011` 的方式启动测试，方便大家debug，相当于是启动了一个前端服务器。
+
+比如我们在本机启动的这个前端服务（运行 `yarn dev --port 8011` ），我们就可以通过在游览器中通过 `http://localhost:8011` 访问前端页面
+
+如果我们在其它服务器上（例如：`*.*.*.*` ）启动这个前端服务（运行 `yarn dev --port 8011` ），我们就可以通过在游览器中访问 `http://*.*.*.*:8011` 访问前端页面
 
+那前端跟后端是什么关系呢？ 两个是独立的，只要前端能够通过代理访问到后端的接口，那就没有问题。你可以在 A 机器上部署后端服务，然后在 B 机器上部署前端服务。我们在 `./web_client/vite.config.js` 中将 `/api` 映射到的是 `http://localhost:8010`，你可以把它配置成任意你想要访问后端地址。
+
+当前端在以 `*.*.*.*` 这类以 IP 地址形式的网页中访问时，由于游览器的安全限制，会禁止录音，需要重新配置游览器的安全策略， 可以看下面 FAQ 部分： [【前端以IP地址的形式访问，无法录音】]
 
 
 ## FAQ 
@@ -210,12 +228,24 @@ ASR_SOCKET_RECORD: 'ws://localhost:8010/ws/asr/onlineStream',  // Stream ASR 接
 TTS_SOCKET_RECORD: 'ws://localhost:8010/ws/tts/online', // Stream TTS 接口
 ```
 
-#### Q：后端以IP地址的形式，前端无法录音
+#### Q：前端以IP地址的形式访问，无法录音
 
 A：这里主要是游览器安全策略的限制，需要配置游览器后重启。游览器修改配置可参考[使用js-audio-recorder报浏览器不支持getUserMedia](https://blog.csdn.net/YRY_LIKE_YOU/article/details/113745273)
 
 chrome设置地址: chrome://flags/#unsafely-treat-insecure-origin-as-secure
 
+#### Q: npm / yarn 配置淘宝镜像源
+
+A: 配置淘宝镜像源，详细可以参考 [【yarn npm 设置淘宝镜像】](https://www.jianshu.com/p/f6f43e8f9d6b)
+
+```bash
+# npm 配置淘宝镜像源
+npm config set registry https://registry.npmmirror.com
+
+# yarn 配置淘宝镜像源
+yarn config set registry http://registry.npm.taobao.org/
+```
+
 ## 参考资料
 
 vue实现录音参考资料：https://blog.csdn.net/qq_41619796/article/details/107865602#t1

diff --git a/demos/speech_web/speech_server/src/ernie_sat.py b/demos/speech_web/speech_server/src/ernie_sat.py
@@ -1,5 +1,6 @@
 import os
 
+from .util import get_ngpu
 from .util import MAIN_ROOT
 from .util import run_cmd
 
@@ -171,6 +172,7 @@ def get_cmd(self,
                 output_name: str,
                 source_lang: str,
                 target_lang: str):
+        ngpu = get_ngpu()
         cmd = f"""
             FLAGS_allocator_strategy=naive_best_fit \
             FLAGS_fraction_of_gpu_memory_to_use=0.01 \
@@ -189,7 +191,8 @@ def get_cmd(self,
                 --voc_config={voc_config} \
                 --voc_ckpt={voc_ckpt} \
                 --voc_stat={voc_stat} \
-                --output_name={output_name}
+                --output_name={output_name} \
+                --ngpu={ngpu}
         """
 
         return cmd
diff --git a/demos/speech_web/speech_server/src/finetune.py b/demos/speech_web/speech_server/src/finetune.py
@@ -1,5 +1,6 @@
 import os
 
+from .util import get_ngpu
 from .util import MAIN_ROOT
 from .util import run_cmd
 
@@ -38,7 +39,7 @@ def finetune(self, input_dir, exp_dir='temp', epoch=100):
         dump_dir = os.path.join(exp_dir, 'dump')
         output_dir = os.path.join(exp_dir, 'exp')
         lang = "zh"
-        ngpu = 1
+        ngpu = get_ngpu()
 
         cmd = f"""
             # check oov
@@ -91,7 +92,7 @@ def synthesize(self, text, wav_name, out_wav_dir, exp_dir='temp'):
         output_dir = os.path.join(exp_dir, 'exp')
         text_path = os.path.join(exp_dir, 'sentences.txt')
         lang = "zh"
-        ngpu = 1
+        ngpu = get_ngpu()
 
         model_path = f"{output_dir}/checkpoints"
         ckpt = find_max_ckpt(model_path)
@@ -117,7 +118,8 @@ def synthesize(self, text, wav_name, out_wav_dir, exp_dir='temp'):
                 --output_dir={out_wav_dir} \
                 --phones_dict={dump_dir}/phone_id_map.txt \
                 --speaker_dict={dump_dir}/speaker_id_map.txt \
-                --spk_id=0 
+                --spk_id=0 \
+                --ngpu={ngpu}
         """
 
         out_path = os.path.join(out_wav_dir, f"{wav_name}.wav")

diff --git a/demos/speech_web/speech_server/src/ge2e_clone.py b/demos/speech_web/speech_server/src/ge2e_clone.py
@@ -1,6 +1,7 @@
 import os
 import shutil
 
+from .util import get_ngpu
 from .util import MAIN_ROOT
 from .util import run_cmd
 
@@ -30,11 +31,12 @@ def vc(self, text, input_wav, out_wav):
         ref_audio_dir = os.path.realpath("tmp_dir/ge2e")
         if os.path.exists(ref_audio_dir):
             shutil.rmtree(ref_audio_dir)
-        else:
-            os.makedirs(ref_audio_dir, exist_ok=True)
-            shutil.copy(input_wav, ref_audio_dir)
+
+        os.makedirs(ref_audio_dir, exist_ok=True)
+        shutil.copy(input_wav, ref_audio_dir)
 
         output_dir = os.path.dirname(out_wav)
+        ngpu = get_ngpu()
 
         cmd = f"""
             python3 {self.BIN_DIR}/voice_cloning.py \
@@ -50,7 +52,8 @@ def vc(self, text, input_wav, out_wav):
                     --text="{text}" \
                     --input-dir={ref_audio_dir} \
                     --output-dir={output_dir} \
-                    --phones-dict={self.phones_dict}
+                    --phones-dict={self.phones_dict} \
+                    --ngpu={ngpu}
         """
 
         output_name = os.path.join(output_dir, full_file_name)

diff --git a/demos/speech_web/speech_server/src/tdnn_clone.py b/demos/speech_web/speech_server/src/tdnn_clone.py
@@ -1,6 +1,7 @@
 import os
 import shutil
 
+from .util import get_ngpu
 from .util import MAIN_ROOT
 from .util import run_cmd
 
@@ -27,11 +28,11 @@ def vc(self, text, input_wav, out_wav):
         ref_audio_dir = os.path.realpath("tmp_dir/tdnn")
         if os.path.exists(ref_audio_dir):
             shutil.rmtree(ref_audio_dir)
-        else:
-            os.makedirs(ref_audio_dir, exist_ok=True)
-            shutil.copy(input_wav, ref_audio_dir)
+        os.makedirs(ref_audio_dir, exist_ok=True)
+        shutil.copy(input_wav, ref_audio_dir)
 
         output_dir = os.path.dirname(out_wav)
+        ngpu = get_ngpu()
 
         cmd = f"""
             python3 {self.BIN_DIR}/voice_cloning.py \
@@ -47,7 +48,8 @@ def vc(self, text, input_wav, out_wav):
                     --input-dir={ref_audio_dir} \
                     --output-dir={output_dir} \
                     --phones-dict={self.phones_dict} \
-                    --use_ecapa=True
+                    --use_ecapa=True \
+                    --ngpu={ngpu}
         """
 
         output_name = os.path.join(output_dir, full_file_name)

diff --git a/demos/speech_web/speech_server/src/util.py b/demos/speech_web/speech_server/src/util.py
@@ -2,10 +2,19 @@
 import random
 import subprocess
 
+import paddle
+
 NOW_FILE_PATH = os.path.dirname(__file__)
 MAIN_ROOT = os.path.realpath(os.path.join(NOW_FILE_PATH, "../../../../"))
 
 
+def get_ngpu():
+    if paddle.device.get_device() == "cpu":
+        return 0
+    else:
+        return 1
+
+
 def randName(n=5):
     return "".join(random.sample('zyxwvutsrqponmlkjihgfedcba', n))
 

diff --git a/demos/speech_web/speech_server/vc.py b/demos/speech_web/speech_server/vc.py
@@ -281,15 +281,18 @@ async def VcCloneG2P(base: VcBaseText):
             if base.func == 'ge2e':
                 wavName = base.wavName
                 wavPath = os.path.join(VC_OUT_PATH, wavName)
-                vc_model.vc(
+                wavPath = vc_model.vc(
                     text=base.text, input_wav=base.wavPath, out_wav=wavPath)
             else:
                 wavName = base.wavName
                 wavPath = os.path.join(VC_OUT_PATH, wavName)
-                vc_model_tdnn.vc(
+                wavPath = vc_model_tdnn.vc(
                     text=base.text, input_wav=base.wavPath, out_wav=wavPath)
-            res = {"wavName": wavName, "wavPath": wavPath}
-            return SuccessRequest(result=res)
+            if wavPath:
+                res = {"wavName": wavName, "wavPath": wavPath}
+                return SuccessRequest(result=res)
+            else:
+                return ErrorRequest(message="克隆失败，检查克隆脚本是否有效")
         except Exception as e:
             print(e)
             return ErrorRequest(message="克隆失败，合成过程报错")

diff --git a/demos/speech_web/web_client/src/components/Experience.vue b/demos/speech_web/web_client/src/components/Experience.vue
@@ -47,7 +47,7 @@ import FineTuneT from './SubMenu/FineTune/FineTune.vue'
             <el-tab-pane label="小数据微调" key="7">
             <FineTuneT></FineTuneT>
             </el-tab-pane>
-            <el-tab-pane label="ENIRE SAT" key="8">
+            <el-tab-pane label="ENIRE-SAT" key="8">
             <ENIRE_SATT></ENIRE_SATT>
             </el-tab-pane>
           </el-tabs>

diff --git a/demos/speech_web/web_client/src/components/SubMenu/ASR/RealTime/RealTime.vue b/demos/speech_web/web_client/src/components/SubMenu/ASR/RealTime/RealTime.vue
@@ -58,9 +58,6 @@ export default {
     mounted () {
         this.wsUrl = apiURL.ASR_SOCKET_RECORD
         this.ws = new WebSocket(this.wsUrl)
-        if(this.ws.readyState === this.ws.CONNECTING){
-            this.$message.success("实时识别 Websocket 连接成功")
-        }
         var _that = this
         this.ws.addEventListener('message', function (event) {
                 var temp = JSON.parse(event.data);
@@ -78,7 +75,7 @@ export default {
             // 检查 websocket 状态
             // debugger
             if(this.ws.readyState != this.ws.OPEN){
-                this.$message.error("websocket 链接失败，请检查链接地址是否正确")
+                this.$message.error("websocket 链接失败，请检查 Websocket 后端服务是否正确开启")
                 return
             }