Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[demos] more cli for speech demos #2138

Merged
merged 2 commits into from
Jul 13, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Empty file modified demos/custom_streaming_asr/setup_docker.sh
100644 → 100755
Empty file.
Empty file modified demos/keyword_spotting/run.sh
100644 → 100755
Empty file.
Empty file modified demos/speaker_verification/run.sh
100644 → 100755
Empty file.
18 changes: 17 additions & 1 deletion demos/speech_recognition/run.sh
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,10 +1,26 @@
#!/bin/bash

wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav https://paddlespeech.bj.bcebos.com/PaddleAudio/en.wav
wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav
wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/en.wav

# asr
paddlespeech asr --input ./zh.wav


# asr + punc
paddlespeech asr --input ./zh.wav | paddlespeech text --task punc


# asr help
paddlespeech asr --help


# english asr
paddlespeech asr --lang en --model transformer_librispeech --input ./en.wav

# model stats
paddlespeech stats --task asr


# paddlespeech help
paddlespeech --help
Empty file modified demos/speech_server/asr_client.sh
100644 → 100755
Empty file.
Empty file modified demos/speech_server/cls_client.sh
100644 → 100755
Empty file.
2 changes: 1 addition & 1 deletion demos/speech_server/server.sh
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
#!/bin/bash

paddlespeech_server start --config_file ./conf/application.yaml
paddlespeech_server start --config_file ./conf/application.yaml &> server.log &
10 changes: 10 additions & 0 deletions demos/speech_server/sid_client.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#!/bin/bash

wget -c https://paddlespeech.bj.bcebos.com/vector/audio/85236145389.wav
wget -c https://paddlespeech.bj.bcebos.com/vector/audio/123456789.wav

# sid extract
paddlespeech_client vector --server_ip 127.0.0.1 --port 8090 --task spk --input ./85236145389.wav

# sid score
paddlespeech_client vector --server_ip 127.0.0.1 --port 8090 --task score --enroll ./85236145389.wav --test ./123456789.wav
4 changes: 4 additions & 0 deletions demos/speech_server/text_client.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
#!/bin/bash


paddlespeech_client text --server_ip 127.0.0.1 --port 8090 --input 今天的天气真好啊你下午有空吗我想约你一起去吃饭
Empty file modified demos/speech_server/tts_client.sh
100644 → 100755
Empty file.
Empty file modified demos/streaming_asr_server/run.sh
100644 → 100755
Empty file.
7 changes: 3 additions & 4 deletions demos/streaming_asr_server/server.sh
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
export CUDA_VISIBLE_DEVICE=0,1,2,3
export CUDA_VISIBLE_DEVICE=0,1,2,3
#export CUDA_VISIBLE_DEVICE=0,1,2,3

# nohup python3 punc_server.py --config_file conf/punc_application.yaml > punc.log 2>&1 &
# nohup python3 local/punc_server.py --config_file conf/punc_application.yaml > punc.log 2>&1 &
paddlespeech_server start --config_file conf/punc_application.yaml &> punc.log &

# nohup python3 streaming_asr_server.py --config_file conf/ws_conformer_wenetspeech_application.yaml > streaming_asr.log 2>&1 &
# nohup python3 local/streaming_asr_server.py --config_file conf/ws_conformer_wenetspeech_application.yaml > streaming_asr.log 2>&1 &
paddlespeech_server start --config_file conf/ws_conformer_wenetspeech_application.yaml &> streaming_asr.log &

2 changes: 1 addition & 1 deletion demos/streaming_asr_server/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,5 @@ paddlespeech_client asr_online --server_ip 127.0.0.1 --port 8090 --input ./zh.wa

# read the wav and call streaming and punc service
# If `127.0.0.1` is not accessible, you need to use the actual service IP address.
paddlespeech_client asr_online --server_ip 127.0.0.1 --port 8290 --punc.server_ip 127.0.0.1 --punc.port 8190 --input ./zh.wav
paddlespeech_client asr_online --server_ip 127.0.0.1 --port 8090 --punc.server_ip 127.0.0.1 --punc.port 8190 --input ./zh.wav

4 changes: 2 additions & 2 deletions demos/streaming_tts_server/test_client.sh → demos/streaming_tts_server/client.sh
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@

# http client test
# If `127.0.0.1` is not accessible, you need to use the actual service IP address.
paddlespeech_client tts_online --server_ip 127.0.0.1 --port 8092 --protocol http --input "您好,欢迎使用百度飞桨语音合成服务。" --output output.wav
paddlespeech_client tts_online --server_ip 127.0.0.1 --port 8092 --protocol http --input "您好,欢迎使用百度飞桨语音合成服务。" --output output.http.wav

# websocket client test
# If `127.0.0.1` is not accessible, you need to use the actual service IP address.
# paddlespeech_client tts_online --server_ip 127.0.0.1 --port 8092 --protocol websocket --input "您好,欢迎使用百度飞桨语音合成服务。" --output output.wav
paddlespeech_client tts_online --server_ip 127.0.0.1 --port 8192 --protocol websocket --input "您好,欢迎使用百度飞桨语音合成服务。" --output output.ws.wav
103 changes: 103 additions & 0 deletions demos/streaming_tts_server/conf/tts_online_ws_application.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
# This is the parameter configuration file for streaming tts server.

#################################################################################
# SERVER SETTING #
#################################################################################
host: 0.0.0.0
port: 8192

# The task format in the engin_list is: <speech task>_<engine type>
# engine_list choices = ['tts_online', 'tts_online-onnx'], the inference speed of tts_online-onnx is faster than tts_online.
# protocol choices = ['websocket', 'http']
protocol: 'websocket'
engine_list: ['tts_online-onnx']


#################################################################################
# ENGINE CONFIG #
#################################################################################

################################### TTS #########################################
################### speech task: tts; engine_type: online #######################
tts_online:
# am (acoustic model) choices=['fastspeech2_csmsc', 'fastspeech2_cnndecoder_csmsc']
# fastspeech2_cnndecoder_csmsc support streaming am infer.
am: 'fastspeech2_csmsc'
am_config:
am_ckpt:
am_stat:
phones_dict:
tones_dict:
speaker_dict:
spk_id: 0

# voc (vocoder) choices=['mb_melgan_csmsc, hifigan_csmsc']
# Both mb_melgan_csmsc and hifigan_csmsc support streaming voc inference
voc: 'mb_melgan_csmsc'
voc_config:
voc_ckpt:
voc_stat:

# others
lang: 'zh'
device: 'cpu' # set 'gpu:id' or 'cpu'
# am_block and am_pad only for fastspeech2_cnndecoder_onnx model to streaming am infer,
# when am_pad set 12, streaming synthetic audio is the same as non-streaming synthetic audio
am_block: 72
am_pad: 12
# voc_pad and voc_block voc model to streaming voc infer,
# when voc model is mb_melgan_csmsc, voc_pad set 14, streaming synthetic audio is the same as non-streaming synthetic audio; The minimum value of pad can be set to 7, streaming synthetic audio sounds normal
# when voc model is hifigan_csmsc, voc_pad set 19, streaming synthetic audio is the same as non-streaming synthetic audio; voc_pad set 14, streaming synthetic audio sounds normal
voc_block: 36
voc_pad: 14



#################################################################################
# ENGINE CONFIG #
#################################################################################

################################### TTS #########################################
################### speech task: tts; engine_type: online-onnx #######################
tts_online-onnx:
# am (acoustic model) choices=['fastspeech2_csmsc_onnx', 'fastspeech2_cnndecoder_csmsc_onnx']
# fastspeech2_cnndecoder_csmsc_onnx support streaming am infer.
am: 'fastspeech2_cnndecoder_csmsc_onnx'
# am_ckpt is a list, if am is fastspeech2_cnndecoder_csmsc_onnx, am_ckpt = [encoder model, decoder model, postnet model];
# if am is fastspeech2_csmsc_onnx, am_ckpt = [ckpt model];
am_ckpt: # list
am_stat:
phones_dict:
tones_dict:
speaker_dict:
spk_id: 0
am_sample_rate: 24000
am_sess_conf:
device: "cpu" # set 'gpu:id' or 'cpu'
use_trt: False
cpu_threads: 4

# voc (vocoder) choices=['mb_melgan_csmsc_onnx, hifigan_csmsc_onnx']
# Both mb_melgan_csmsc_onnx and hifigan_csmsc_onnx support streaming voc inference
voc: 'hifigan_csmsc_onnx'
voc_ckpt:
voc_sample_rate: 24000
voc_sess_conf:
device: "cpu" # set 'gpu:id' or 'cpu'
use_trt: False
cpu_threads: 4

# others
lang: 'zh'
# am_block and am_pad only for fastspeech2_cnndecoder_onnx model to streaming am infer,
# when am_pad set 12, streaming synthetic audio is the same as non-streaming synthetic audio
am_block: 72
am_pad: 12
# voc_pad and voc_block voc model to streaming voc infer,
# when voc model is mb_melgan_csmsc_onnx, voc_pad set 14, streaming synthetic audio is the same as non-streaming synthetic audio; The minimum value of pad can be set to 7, streaming synthetic audio sounds normal
# when voc model is hifigan_csmsc_onnx, voc_pad set 19, streaming synthetic audio is the same as non-streaming synthetic audio; voc_pad set 14, streaming synthetic audio sounds normal
voc_block: 36
voc_pad: 14
# voc_upsample should be same as n_shift on voc config.
voc_upsample: 300

10 changes: 10 additions & 0 deletions demos/streaming_tts_server/server.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#!/bin/bash

# http server
paddlespeech_server start --config_file ./conf/tts_online_application.yaml &> tts.http.log &


# websocket server
paddlespeech_server start --config_file ./conf/tts_online_ws_application.yaml &> tts.ws.log &


3 changes: 0 additions & 3 deletions demos/streaming_tts_server/start_server.sh

This file was deleted.

8 changes: 7 additions & 1 deletion demos/text_to_speech/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,10 @@
paddlespeech tts --input 今天的天气不错啊

# Batch process
echo -e "1 欢迎光临。\n2 谢谢惠顾。" | paddlespeech tts
echo -e "1 欢迎光临。\n2 谢谢惠顾。" | paddlespeech tts

# Text Frontend
paddlespeech tts --input 今天是2022/10/29,最低温度是-3℃.



1 change: 1 addition & 0 deletions paddlespeech/server/bin/paddlespeech_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -718,6 +718,7 @@ def __call__(self,
logger.info(f"the input audio: {input}")
handler = VectorHttpHandler(server_ip=server_ip, port=port)
res = handler.run(input, audio_format, sample_rate)
logger.info(f"The spk embedding is: {res}")
return res
elif task == "score":
from paddlespeech.server.utils.audio_handler import VectorScoreHttpHandler
Expand Down