Skip to content

Commit

Permalink
Merge pull request #2138 from zh794390558/demos
Browse files Browse the repository at this point in the history
[demos] more cli for speech demos
  • Loading branch information
Jackwaterveg authored Jul 13, 2022
2 parents 16f075e + 74245cc commit ae7a73b
Show file tree
Hide file tree
Showing 21 changed files with 159 additions and 13 deletions.
Empty file modified demos/custom_streaming_asr/setup_docker.sh
100644 → 100755
Empty file.
Empty file modified demos/keyword_spotting/run.sh
100644 → 100755
Empty file.
Empty file modified demos/speaker_verification/run.sh
100644 → 100755
Empty file.
18 changes: 17 additions & 1 deletion demos/speech_recognition/run.sh
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,10 +1,26 @@
#!/bin/bash

wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav https://paddlespeech.bj.bcebos.com/PaddleAudio/en.wav
wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav
wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/en.wav

# asr
paddlespeech asr --input ./zh.wav


# asr + punc
paddlespeech asr --input ./zh.wav | paddlespeech text --task punc


# asr help
paddlespeech asr --help


# english asr
paddlespeech asr --lang en --model transformer_librispeech --input ./en.wav

# model stats
paddlespeech stats --task asr


# paddlespeech help
paddlespeech --help
Empty file modified demos/speech_server/asr_client.sh
100644 → 100755
Empty file.
Empty file modified demos/speech_server/cls_client.sh
100644 → 100755
Empty file.
2 changes: 1 addition & 1 deletion demos/speech_server/server.sh
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
#!/bin/bash

paddlespeech_server start --config_file ./conf/application.yaml
paddlespeech_server start --config_file ./conf/application.yaml &> server.log &
10 changes: 10 additions & 0 deletions demos/speech_server/sid_client.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#!/bin/bash

wget -c https://paddlespeech.bj.bcebos.com/vector/audio/85236145389.wav
wget -c https://paddlespeech.bj.bcebos.com/vector/audio/123456789.wav

# sid extract
paddlespeech_client vector --server_ip 127.0.0.1 --port 8090 --task spk --input ./85236145389.wav

# sid score
paddlespeech_client vector --server_ip 127.0.0.1 --port 8090 --task score --enroll ./85236145389.wav --test ./123456789.wav
4 changes: 4 additions & 0 deletions demos/speech_server/text_client.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
#!/bin/bash


paddlespeech_client text --server_ip 127.0.0.1 --port 8090 --input 今天的天气真好啊你下午有空吗我想约你一起去吃饭
Empty file modified demos/speech_server/tts_client.sh
100644 → 100755
Empty file.
File renamed without changes.
Empty file modified demos/streaming_asr_server/run.sh
100644 → 100755
Empty file.
7 changes: 3 additions & 4 deletions demos/streaming_asr_server/server.sh
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
export CUDA_VISIBLE_DEVICE=0,1,2,3
export CUDA_VISIBLE_DEVICE=0,1,2,3
#export CUDA_VISIBLE_DEVICE=0,1,2,3

# nohup python3 punc_server.py --config_file conf/punc_application.yaml > punc.log 2>&1 &
# nohup python3 local/punc_server.py --config_file conf/punc_application.yaml > punc.log 2>&1 &
paddlespeech_server start --config_file conf/punc_application.yaml &> punc.log &

# nohup python3 streaming_asr_server.py --config_file conf/ws_conformer_wenetspeech_application.yaml > streaming_asr.log 2>&1 &
# nohup python3 local/streaming_asr_server.py --config_file conf/ws_conformer_wenetspeech_application.yaml > streaming_asr.log 2>&1 &
paddlespeech_server start --config_file conf/ws_conformer_wenetspeech_application.yaml &> streaming_asr.log &

2 changes: 1 addition & 1 deletion demos/streaming_asr_server/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,5 @@ paddlespeech_client asr_online --server_ip 127.0.0.1 --port 8090 --input ./zh.wa

# read the wav and call streaming and punc service
# If `127.0.0.1` is not accessible, you need to use the actual service IP address.
paddlespeech_client asr_online --server_ip 127.0.0.1 --port 8290 --punc.server_ip 127.0.0.1 --punc.port 8190 --input ./zh.wav
paddlespeech_client asr_online --server_ip 127.0.0.1 --port 8090 --punc.server_ip 127.0.0.1 --punc.port 8190 --input ./zh.wav

4 changes: 2 additions & 2 deletions demos/streaming_tts_server/test_client.sh → demos/streaming_tts_server/client.sh
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@

# http client test
# If `127.0.0.1` is not accessible, you need to use the actual service IP address.
paddlespeech_client tts_online --server_ip 127.0.0.1 --port 8092 --protocol http --input "您好,欢迎使用百度飞桨语音合成服务。" --output output.wav
paddlespeech_client tts_online --server_ip 127.0.0.1 --port 8092 --protocol http --input "您好,欢迎使用百度飞桨语音合成服务。" --output output.http.wav

# websocket client test
# If `127.0.0.1` is not accessible, you need to use the actual service IP address.
# paddlespeech_client tts_online --server_ip 127.0.0.1 --port 8092 --protocol websocket --input "您好,欢迎使用百度飞桨语音合成服务。" --output output.wav
paddlespeech_client tts_online --server_ip 127.0.0.1 --port 8192 --protocol websocket --input "您好,欢迎使用百度飞桨语音合成服务。" --output output.ws.wav
103 changes: 103 additions & 0 deletions demos/streaming_tts_server/conf/tts_online_ws_application.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
# This is the parameter configuration file for streaming tts server.

#################################################################################
# SERVER SETTING #
#################################################################################
host: 0.0.0.0
port: 8192

# The task format in the engin_list is: <speech task>_<engine type>
# engine_list choices = ['tts_online', 'tts_online-onnx'], the inference speed of tts_online-onnx is faster than tts_online.
# protocol choices = ['websocket', 'http']
protocol: 'websocket'
engine_list: ['tts_online-onnx']


#################################################################################
# ENGINE CONFIG #
#################################################################################

################################### TTS #########################################
################### speech task: tts; engine_type: online #######################
tts_online:
# am (acoustic model) choices=['fastspeech2_csmsc', 'fastspeech2_cnndecoder_csmsc']
# fastspeech2_cnndecoder_csmsc support streaming am infer.
am: 'fastspeech2_csmsc'
am_config:
am_ckpt:
am_stat:
phones_dict:
tones_dict:
speaker_dict:
spk_id: 0

# voc (vocoder) choices=['mb_melgan_csmsc, hifigan_csmsc']
# Both mb_melgan_csmsc and hifigan_csmsc support streaming voc inference
voc: 'mb_melgan_csmsc'
voc_config:
voc_ckpt:
voc_stat:

# others
lang: 'zh'
device: 'cpu' # set 'gpu:id' or 'cpu'
# am_block and am_pad only for fastspeech2_cnndecoder_onnx model to streaming am infer,
# when am_pad set 12, streaming synthetic audio is the same as non-streaming synthetic audio
am_block: 72
am_pad: 12
# voc_pad and voc_block voc model to streaming voc infer,
# when voc model is mb_melgan_csmsc, voc_pad set 14, streaming synthetic audio is the same as non-streaming synthetic audio; The minimum value of pad can be set to 7, streaming synthetic audio sounds normal
# when voc model is hifigan_csmsc, voc_pad set 19, streaming synthetic audio is the same as non-streaming synthetic audio; voc_pad set 14, streaming synthetic audio sounds normal
voc_block: 36
voc_pad: 14



#################################################################################
# ENGINE CONFIG #
#################################################################################

################################### TTS #########################################
################### speech task: tts; engine_type: online-onnx #######################
tts_online-onnx:
# am (acoustic model) choices=['fastspeech2_csmsc_onnx', 'fastspeech2_cnndecoder_csmsc_onnx']
# fastspeech2_cnndecoder_csmsc_onnx support streaming am infer.
am: 'fastspeech2_cnndecoder_csmsc_onnx'
# am_ckpt is a list, if am is fastspeech2_cnndecoder_csmsc_onnx, am_ckpt = [encoder model, decoder model, postnet model];
# if am is fastspeech2_csmsc_onnx, am_ckpt = [ckpt model];
am_ckpt: # list
am_stat:
phones_dict:
tones_dict:
speaker_dict:
spk_id: 0
am_sample_rate: 24000
am_sess_conf:
device: "cpu" # set 'gpu:id' or 'cpu'
use_trt: False
cpu_threads: 4

# voc (vocoder) choices=['mb_melgan_csmsc_onnx, hifigan_csmsc_onnx']
# Both mb_melgan_csmsc_onnx and hifigan_csmsc_onnx support streaming voc inference
voc: 'hifigan_csmsc_onnx'
voc_ckpt:
voc_sample_rate: 24000
voc_sess_conf:
device: "cpu" # set 'gpu:id' or 'cpu'
use_trt: False
cpu_threads: 4

# others
lang: 'zh'
# am_block and am_pad only for fastspeech2_cnndecoder_onnx model to streaming am infer,
# when am_pad set 12, streaming synthetic audio is the same as non-streaming synthetic audio
am_block: 72
am_pad: 12
# voc_pad and voc_block voc model to streaming voc infer,
# when voc model is mb_melgan_csmsc_onnx, voc_pad set 14, streaming synthetic audio is the same as non-streaming synthetic audio; The minimum value of pad can be set to 7, streaming synthetic audio sounds normal
# when voc model is hifigan_csmsc_onnx, voc_pad set 19, streaming synthetic audio is the same as non-streaming synthetic audio; voc_pad set 14, streaming synthetic audio sounds normal
voc_block: 36
voc_pad: 14
# voc_upsample should be same as n_shift on voc config.
voc_upsample: 300

10 changes: 10 additions & 0 deletions demos/streaming_tts_server/server.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#!/bin/bash

# http server
paddlespeech_server start --config_file ./conf/tts_online_application.yaml &> tts.http.log &


# websocket server
paddlespeech_server start --config_file ./conf/tts_online_ws_application.yaml &> tts.ws.log &


3 changes: 0 additions & 3 deletions demos/streaming_tts_server/start_server.sh

This file was deleted.

8 changes: 7 additions & 1 deletion demos/text_to_speech/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,10 @@
paddlespeech tts --input 今天的天气不错啊

# Batch process
echo -e "1 欢迎光临。\n2 谢谢惠顾。" | paddlespeech tts
echo -e "1 欢迎光临。\n2 谢谢惠顾。" | paddlespeech tts

# Text Frontend
paddlespeech tts --input 今天是2022/10/29,最低温度是-3℃.



1 change: 1 addition & 0 deletions paddlespeech/server/bin/paddlespeech_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -718,6 +718,7 @@ def __call__(self,
logger.info(f"the input audio: {input}")
handler = VectorHttpHandler(server_ip=server_ip, port=port)
res = handler.run(input, audio_format, sample_rate)
logger.info(f"The spk embedding is: {res}")
return res
elif task == "score":
from paddlespeech.server.utils.audio_handler import VectorScoreHttpHandler
Expand Down

0 comments on commit ae7a73b

Please sign in to comment.