diff --git a/.github/scripts/test-dart.sh b/.github/scripts/test-dart.sh index 0850a72b1..b0be657a7 100755 --- a/.github/scripts/test-dart.sh +++ b/.github/scripts/test-dart.sh @@ -66,7 +66,9 @@ echo '----------streaming zipformer ctc----------' rm -rf sherpa-onnx-* echo '----------streaming zipformer transducer----------' +./run-zipformer-transducer-itn.sh ./run-zipformer-transducer.sh +rm -f itn* rm -rf sherpa-onnx-* echo '----------streaming NeMo transducer----------' diff --git a/.github/scripts/test-dot-net.sh b/.github/scripts/test-dot-net.sh index 395c67c83..845162542 100755 --- a/.github/scripts/test-dot-net.sh +++ b/.github/scripts/test-dot-net.sh @@ -2,7 +2,13 @@ cd dotnet-examples/ -cd ./offline-decode-files +cd ./online-decode-files +./run-transducer-itn.sh +./run-zipformer2-ctc.sh +./run-transducer.sh +./run-paraformer.sh + +cd ../offline-decode-files ./run-paraformer-itn.sh ./run-telespeech-ctc.sh ./run-nemo-ctc.sh @@ -27,11 +33,6 @@ cd ../streaming-hlg-decoding/ cd ../spoken-language-identification ./run.sh -cd ../online-decode-files -./run-zipformer2-ctc.sh -./run-transducer.sh -./run-paraformer.sh - cd ../offline-tts ./run-aishell3.sh ./run-piper.sh diff --git a/.github/scripts/test-nodejs-addon-npm.sh b/.github/scripts/test-nodejs-addon-npm.sh index a6c4345a8..5ff89d30e 100755 --- a/.github/scripts/test-nodejs-addon-npm.sh +++ b/.github/scripts/test-nodejs-addon-npm.sh @@ -70,6 +70,13 @@ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/s tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2 rm sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2 +rm -f itn* + +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn-zh-number.wav + +node test_asr_streaming_transducer_itn.js + node test_asr_streaming_transducer.js rm -rf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20 @@ -120,6 +127,8 @@ rm sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2 node ./test_asr_non_streaming_paraformer.js +rm -f itn* + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn-zh-number.wav diff --git a/.github/scripts/test-nodejs-npm.sh b/.github/scripts/test-nodejs-npm.sh index 2098bb166..8428c1df5 100755 --- a/.github/scripts/test-nodejs-npm.sh +++ b/.github/scripts/test-nodejs-npm.sh @@ -15,6 +15,8 @@ curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/s ls -lh tar xvf sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2 rm sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2 + +rm -f itn* curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn-zh-number.wav curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst node ./test-offline-paraformer-itn.js @@ -57,7 +59,15 @@ rm -rf sherpa-onnx-streaming-paraformer-bilingual-zh-en curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2 tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2 rm sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2 + +rm -f itn* +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn-zh-number.wav +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst + +node ./test-online-transducer-itn.js + node ./test-online-transducer.js + rm -rf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20 curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13.tar.bz2 diff --git a/.github/workflows/build-wheels-aarch64.yaml b/.github/workflows/build-wheels-aarch64.yaml index 4bc5b79c2..d9d042324 100644 --- a/.github/workflows/build-wheels-aarch64.yaml +++ b/.github/workflows/build-wheels-aarch64.yaml @@ -2,6 +2,8 @@ name: build-wheels-aarch64 on: push: + branches: + - wheel tags: - 'v[0-9]+.[0-9]+.[0-9]+*' workflow_dispatch: diff --git a/.github/workflows/build-wheels-armv7l.yaml b/.github/workflows/build-wheels-armv7l.yaml index 6b7d74460..a2a2a49aa 100644 --- a/.github/workflows/build-wheels-armv7l.yaml +++ b/.github/workflows/build-wheels-armv7l.yaml @@ -2,6 +2,8 @@ name: build-wheels-armv7l on: push: + branches: + - wheel tags: - 'v[0-9]+.[0-9]+.[0-9]+*' workflow_dispatch: diff --git a/.github/workflows/build-wheels-linux.yaml b/.github/workflows/build-wheels-linux.yaml index 426545622..9d94f1c3a 100644 --- a/.github/workflows/build-wheels-linux.yaml +++ b/.github/workflows/build-wheels-linux.yaml @@ -2,6 +2,8 @@ name: build-wheels-linux on: push: + branches: + - wheel tags: - 'v[0-9]+.[0-9]+.[0-9]+*' workflow_dispatch: diff --git a/.github/workflows/build-wheels-macos-arm64.yaml b/.github/workflows/build-wheels-macos-arm64.yaml index bc02ce38f..5883ba483 100644 --- a/.github/workflows/build-wheels-macos-arm64.yaml +++ b/.github/workflows/build-wheels-macos-arm64.yaml @@ -2,6 +2,8 @@ name: build-wheels-macos-arm64 on: push: + branches: + - wheel tags: - 'v[0-9]+.[0-9]+.[0-9]+*' workflow_dispatch: @@ -84,7 +86,7 @@ jobs: run: | opts='--break-system-packages' v=${{ matrix.python-version }} - if [[ $v == cp38 || $v == cp39 ]]; then + if [[ $v == cp37 || $v == cp38 || $v == cp39 ]]; then opts='' fi diff --git a/.github/workflows/build-wheels-macos-x64.yaml b/.github/workflows/build-wheels-macos-x64.yaml index 8ad21d0ed..df4d8049e 100644 --- a/.github/workflows/build-wheels-macos-x64.yaml +++ b/.github/workflows/build-wheels-macos-x64.yaml @@ -101,7 +101,7 @@ jobs: run: | opts='--break-system-packages' v=${{ matrix.python-version }} - if [[ $v == cp38 || $v == cp39 ]]; then + if [[ $v == cp37 || $v == cp38 || $v == cp39 ]]; then opts='' fi diff --git a/.github/workflows/build-wheels-win32.yaml b/.github/workflows/build-wheels-win32.yaml index ab3d32b13..b2dbd157d 100644 --- a/.github/workflows/build-wheels-win32.yaml +++ b/.github/workflows/build-wheels-win32.yaml @@ -2,6 +2,8 @@ name: build-wheels-win32 on: push: + branches: + - wheel tags: - 'v[0-9]+.[0-9]+.[0-9]+*' workflow_dispatch: diff --git a/.github/workflows/run-java-test.yaml b/.github/workflows/run-java-test.yaml index 30fc6a827..14d22bd2e 100644 --- a/.github/workflows/run-java-test.yaml +++ b/.github/workflows/run-java-test.yaml @@ -173,6 +173,9 @@ jobs: shell: bash run: | cd ./java-api-examples + ./run-inverse-text-normalization-transducer.sh + rm -rf sherpa-onnx-streaming-* + ./run-streaming-decode-file-ctc.sh # Delete model files to save space rm -rf sherpa-onnx-streaming-* diff --git a/.github/workflows/test-go.yaml b/.github/workflows/test-go.yaml index 1ec654418..6f25a0139 100644 --- a/.github/workflows/test-go.yaml +++ b/.github/workflows/test-go.yaml @@ -187,6 +187,9 @@ jobs: ./run-transducer.sh rm -rf sherpa-onnx-streaming-zipformer-en-2023-06-26 + ./run-transducer-itn.sh + rm -rf sherpa-onnx-streaming-* + echo "Test paraformer" ./run-paraformer.sh rm -rf sherpa-onnx-streaming-paraformer-bilingual-zh-en diff --git a/CMakeLists.txt b/CMakeLists.txt index 974dd1c8f..dedf52767 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -7,7 +7,8 @@ project(sherpa-onnx) # Remember to update # ./nodejs-addon-examples # ./dart-api-examples/ -set(SHERPA_ONNX_VERSION "1.9.30") +# ./sherpa-onnx/flutter/CHANGELOG.md +set(SHERPA_ONNX_VERSION "1.10.0") # Disable warning about # diff --git a/android/SherpaOnnx/app/src/main/java/com/k2fsa/sherpa/onnx/MainActivity.kt b/android/SherpaOnnx/app/src/main/java/com/k2fsa/sherpa/onnx/MainActivity.kt index fa2829635..8c96623fc 100644 --- a/android/SherpaOnnx/app/src/main/java/com/k2fsa/sherpa/onnx/MainActivity.kt +++ b/android/SherpaOnnx/app/src/main/java/com/k2fsa/sherpa/onnx/MainActivity.kt @@ -196,6 +196,9 @@ class MainActivity : AppCompatActivity() { // See https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html // for a list of available models val type = 0 + var ruleFsts : String? + ruleFsts = null + Log.i(TAG, "Select model type $type") val config = OnlineRecognizerConfig( featConfig = getFeatureConfig(sampleRate = sampleRateInHz, featureDim = 80), @@ -205,6 +208,10 @@ class MainActivity : AppCompatActivity() { enableEndpoint = true, ) + if (ruleFsts != null) { + config.ruleFsts = ruleFsts + } + recognizer = OnlineRecognizer( assetManager = application.assets, config = config, diff --git a/android/SherpaOnnx2Pass/app/src/main/java/com/k2fsa/sherpa/onnx/MainActivity.kt b/android/SherpaOnnx2Pass/app/src/main/java/com/k2fsa/sherpa/onnx/MainActivity.kt index 596d03e09..d3ad9aa94 100644 --- a/android/SherpaOnnx2Pass/app/src/main/java/com/k2fsa/sherpa/onnx/MainActivity.kt +++ b/android/SherpaOnnx2Pass/app/src/main/java/com/k2fsa/sherpa/onnx/MainActivity.kt @@ -194,6 +194,8 @@ class MainActivity : AppCompatActivity() { // See https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html // for a list of available models val firstType = 9 + val firstRuleFsts: String? + firstRuleFsts = null Log.i(TAG, "Select model type $firstType for the first pass") val config = OnlineRecognizerConfig( featConfig = getFeatureConfig(sampleRate = sampleRateInHz, featureDim = 80), @@ -201,6 +203,9 @@ class MainActivity : AppCompatActivity() { endpointConfig = getEndpointConfig(), enableEndpoint = true, ) + if (firstRuleFsts != null) { + config.ruleFsts = firstRuleFsts; + } onlineRecognizer = OnlineRecognizer( assetManager = application.assets, @@ -213,6 +218,8 @@ class MainActivity : AppCompatActivity() { // See https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html // for a list of available models val secondType = 0 + var secondRuleFsts: String? + secondRuleFsts = null Log.i(TAG, "Select model type $secondType for the second pass") val config = OfflineRecognizerConfig( @@ -220,6 +227,10 @@ class MainActivity : AppCompatActivity() { modelConfig = getOfflineModelConfig(type = secondType)!!, ) + if (secondRuleFsts != null) { + config.ruleFsts = secondRuleFsts + } + offlineRecognizer = OfflineRecognizer( assetManager = application.assets, config = config, diff --git a/android/SherpaOnnxVadAsr/app/src/main/java/com/k2fsa/sherpa/onnx/MainActivity.kt b/android/SherpaOnnxVadAsr/app/src/main/java/com/k2fsa/sherpa/onnx/MainActivity.kt index e221553c2..fd7d60280 100644 --- a/android/SherpaOnnxVadAsr/app/src/main/java/com/k2fsa/sherpa/onnx/MainActivity.kt +++ b/android/SherpaOnnxVadAsr/app/src/main/java/com/k2fsa/sherpa/onnx/MainActivity.kt @@ -200,12 +200,17 @@ class MainActivity : AppCompatActivity() { // See https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html // for a list of available models val asrModelType = 0 + val asrRuleFsts: String? + asrRuleFsts = null Log.i(TAG, "Select model type ${asrModelType} for ASR") val config = OfflineRecognizerConfig( featConfig = getFeatureConfig(sampleRate = sampleRateInHz, featureDim = 80), modelConfig = getOfflineModelConfig(type = asrModelType)!!, ) + if (asrRuleFsts != null) { + config.ruleFsts = asrRuleFsts; + } offlineRecognizer = OfflineRecognizer( assetManager = application.assets, diff --git a/cmake/kaldi-decoder.cmake b/cmake/kaldi-decoder.cmake index aa937b3e4..02e62e44f 100644 --- a/cmake/kaldi-decoder.cmake +++ b/cmake/kaldi-decoder.cmake @@ -63,23 +63,15 @@ function(download_kaldi_decoder) kaldi-decoder-core kaldifst_core fst + fstfar DESTINATION ..) - if(SHERPA_ONNX_ENABLE_TTS) - install(TARGETS - fstfar - DESTINATION ..) - endif() else() install(TARGETS kaldi-decoder-core kaldifst_core fst + fstfar DESTINATION lib) - if(SHERPA_ONNX_ENABLE_TTS) - install(TARGETS - fstfar - DESTINATION lib) - endif() endif() if(WIN32 AND BUILD_SHARED_LIBS) @@ -87,12 +79,8 @@ function(download_kaldi_decoder) kaldi-decoder-core kaldifst_core fst + fstfar DESTINATION bin) - if(SHERPA_ONNX_ENABLE_TTS) - install(TARGETS - fstfar - DESTINATION bin) - endif() endif() endfunction() diff --git a/dart-api-examples/non-streaming-asr/pubspec.yaml b/dart-api-examples/non-streaming-asr/pubspec.yaml index 9253f105d..81280c349 100644 --- a/dart-api-examples/non-streaming-asr/pubspec.yaml +++ b/dart-api-examples/non-streaming-asr/pubspec.yaml @@ -10,7 +10,7 @@ environment: # Add regular dependencies here. dependencies: - sherpa_onnx: ^1.9.30 + sherpa_onnx: ^1.10.0 path: ^1.9.0 args: ^2.5.0 diff --git a/dart-api-examples/streaming-asr/bin/zipformer-transducer.dart b/dart-api-examples/streaming-asr/bin/zipformer-transducer.dart index 438af31e5..3e642d00f 100644 --- a/dart-api-examples/streaming-asr/bin/zipformer-transducer.dart +++ b/dart-api-examples/streaming-asr/bin/zipformer-transducer.dart @@ -15,6 +15,7 @@ void main(List arguments) async { ..addOption('decoder', help: 'Path to decoder model') ..addOption('joiner', help: 'Path to joiner model') ..addOption('tokens', help: 'Path to tokens.txt') + ..addOption('rule-fsts', help: 'Path to rule fsts', defaultsTo: '') ..addOption('input-wav', help: 'Path to input.wav to transcribe'); final res = parser.parse(arguments); @@ -31,6 +32,7 @@ void main(List arguments) async { final decoder = res['decoder'] as String; final joiner = res['joiner'] as String; final tokens = res['tokens'] as String; + final ruleFsts = res['rule-fsts'] as String; final inputWav = res['input-wav'] as String; final transducer = sherpa_onnx.OnlineTransducerModelConfig( @@ -45,7 +47,10 @@ void main(List arguments) async { debug: true, numThreads: 1, ); - final config = sherpa_onnx.OnlineRecognizerConfig(model: modelConfig); + final config = sherpa_onnx.OnlineRecognizerConfig( + model: modelConfig, + ruleFsts: ruleFsts, + ); final recognizer = sherpa_onnx.OnlineRecognizer(config); final waveData = sherpa_onnx.readWave(inputWav); diff --git a/dart-api-examples/streaming-asr/pubspec.yaml b/dart-api-examples/streaming-asr/pubspec.yaml index a740b371c..5305be304 100644 --- a/dart-api-examples/streaming-asr/pubspec.yaml +++ b/dart-api-examples/streaming-asr/pubspec.yaml @@ -11,7 +11,7 @@ environment: # Add regular dependencies here. dependencies: - sherpa_onnx: ^1.9.30 + sherpa_onnx: ^1.10.0 path: ^1.9.0 args: ^2.5.0 diff --git a/dart-api-examples/streaming-asr/run-zipformer-transducer-itn.sh b/dart-api-examples/streaming-asr/run-zipformer-transducer-itn.sh new file mode 100755 index 000000000..2169f71db --- /dev/null +++ b/dart-api-examples/streaming-asr/run-zipformer-transducer-itn.sh @@ -0,0 +1,28 @@ +#!/usr/bin/env bash + +set -ex + +dart pub get + +if [ ! -f ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2 + tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2 + rm sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2 +fi + +if [ ! -f ./itn-zh-number.wav ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn-zh-number.wav +fi + +if [ ! -f ./itn_zh_number.fst ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst +fi + +dart run \ + ./bin/zipformer-transducer.dart \ + --encoder ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.int8.onnx \ + --decoder ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx \ + --joiner ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.int8.onnx \ + --tokens ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt \ + --rule-fsts ./itn_zh_number.fst \ + --input-wav ./itn-zh-number.wav diff --git a/dart-api-examples/tts/pubspec.yaml b/dart-api-examples/tts/pubspec.yaml index fed0bf4e6..c3f1d9119 100644 --- a/dart-api-examples/tts/pubspec.yaml +++ b/dart-api-examples/tts/pubspec.yaml @@ -8,7 +8,7 @@ environment: # Add regular dependencies here. dependencies: - sherpa_onnx: ^1.9.30 + sherpa_onnx: ^1.10.0 path: ^1.9.0 args: ^2.5.0 diff --git a/dart-api-examples/vad/pubspec.yaml b/dart-api-examples/vad/pubspec.yaml index e0d6f6dce..8194273fb 100644 --- a/dart-api-examples/vad/pubspec.yaml +++ b/dart-api-examples/vad/pubspec.yaml @@ -9,7 +9,7 @@ environment: sdk: ^3.4.0 dependencies: - sherpa_onnx: ^1.9.30 + sherpa_onnx: ^1.10.0 path: ^1.9.0 args: ^2.5.0 diff --git a/dotnet-examples/online-decode-files/Program.cs b/dotnet-examples/online-decode-files/Program.cs index 5103dc010..3e8ee93e8 100644 --- a/dotnet-examples/online-decode-files/Program.cs +++ b/dotnet-examples/online-decode-files/Program.cs @@ -85,6 +85,10 @@ larger than this value after something that is not blank has been decoded. Used [Option("hotwords-score", Required = false, Default = 1.5F, HelpText = "hotwords score")] public float HotwordsScore { get; set; } + [Option("rule-fsts", Required = false, Default = "", + HelpText = "If not empty, path to rule fst for inverse text normalization")] + public string RuleFsts { get; set; } + [Option("files", Required = true, HelpText = "Audio files for decoding")] public IEnumerable Files { get; set; } @@ -189,6 +193,7 @@ private static void Run(Options options) config.Rule3MinUtteranceLength = options.Rule3MinUtteranceLength; config.HotwordsFile = options.HotwordsFile; config.HotwordsScore = options.HotwordsScore; + config.RuleFsts = options.RuleFsts; OnlineRecognizer recognizer = new OnlineRecognizer(config); diff --git a/dotnet-examples/online-decode-files/run-transducer-itn.sh b/dotnet-examples/online-decode-files/run-transducer-itn.sh new file mode 100755 index 000000000..17c595789 --- /dev/null +++ b/dotnet-examples/online-decode-files/run-transducer-itn.sh @@ -0,0 +1,28 @@ +#!/usr/bin/env bash + +# Please refer to +# https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html#csukuangfj-sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20-bilingual-chinese-english +# to download the model files + +set -ex +if [ ! -d ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20 ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2 + tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2 + rm sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2 +fi + +if [ ! -f ./itn-zh-number.wav ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn-zh-number.wav +fi + +if [ ! -f ./itn_zh_number.fst ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst +fi + +dotnet run -c Release \ + --tokens ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt \ + --encoder ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.int8.onnx \ + --decoder ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.int8.onnx \ + --joiner ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.int8.onnx \ + --decoding-method greedy_search \ + --files ./itn-zh-number.wav diff --git a/go-api-examples/streaming-decode-files/main.go b/go-api-examples/streaming-decode-files/main.go index 5ec2c7cbb..d96b53336 100644 --- a/go-api-examples/streaming-decode-files/main.go +++ b/go-api-examples/streaming-decode-files/main.go @@ -30,6 +30,8 @@ func main() { flag.StringVar(&config.ModelConfig.Provider, "provider", "cpu", "Provider to use") flag.StringVar(&config.DecodingMethod, "decoding-method", "greedy_search", "Decoding method. Possible values: greedy_search, modified_beam_search") flag.IntVar(&config.MaxActivePaths, "max-active-paths", 4, "Used only when --decoding-method is modified_beam_search") + flag.StringVar(&config.RuleFsts, "rule-fsts", "", "If not empty, path to rule fst for inverse text normalization") + flag.StringVar(&config.RuleFars, "rule-fars", "", "If not empty, path to rule fst archives for inverse text normalization") flag.Parse() diff --git a/go-api-examples/streaming-decode-files/run-transducer-itn.sh b/go-api-examples/streaming-decode-files/run-transducer-itn.sh new file mode 100755 index 000000000..47bb13a71 --- /dev/null +++ b/go-api-examples/streaming-decode-files/run-transducer-itn.sh @@ -0,0 +1,30 @@ +#!/usr/bin/env bash + +set -ex + +if [ ! -f ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2 + tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2 + rm sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2 +fi + +if [ ! -f ./itn-zh-number.wav ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn-zh-number.wav +fi + +if [ ! -f ./itn_zh_number.fst ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst +fi + +go mod tidy +go build + +./streaming-decode-files \ + --encoder ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.int8.onnx \ + --decoder ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx \ + --joiner ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.int8.onnx \ + --tokens ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt \ + --model-type zipformer \ + --rule-fsts ./itn_zh_number.fst \ + --debug 0 \ + ./itn-zh-number.wav diff --git a/java-api-examples/InverseTextNormalizationStreamingTransducer.java b/java-api-examples/InverseTextNormalizationStreamingTransducer.java new file mode 100644 index 000000000..c3cc325d4 --- /dev/null +++ b/java-api-examples/InverseTextNormalizationStreamingTransducer.java @@ -0,0 +1,68 @@ +// Copyright 2024 Xiaomi Corporation + +// This file shows how to use a streaming transducer +// to decode files with inverse text normalization. +import com.k2fsa.sherpa.onnx.*; + +public class InverseTextNormalizationStreamingTransducer { + public static void main(String[] args) { + // please refer to + // https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html#csukuangfj-sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20-bilingual-chinese-english + // to download model files + String encoder = + "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.int8.onnx"; + String decoder = + "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx"; + String joiner = + "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.onnx"; + String tokens = "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt"; + + // https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn-zh-number.wav + String waveFilename = "./itn-zh-number.wav"; + + // https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst + String ruleFsts = "./itn_zh_number.fst"; + + WaveReader reader = new WaveReader(waveFilename); + + OnlineTransducerModelConfig transducer = + OnlineTransducerModelConfig.builder() + .setEncoder(encoder) + .setDecoder(decoder) + .setJoiner(joiner) + .build(); + + OnlineModelConfig modelConfig = + OnlineModelConfig.builder() + .setTransducer(transducer) + .setTokens(tokens) + .setNumThreads(1) + .setDebug(true) + .build(); + + OnlineRecognizerConfig config = + OnlineRecognizerConfig.builder() + .setOnlineModelConfig(modelConfig) + .setDecodingMethod("greedy_search") + .setRuleFsts(ruleFsts) + .build(); + + OnlineRecognizer recognizer = new OnlineRecognizer(config); + OnlineStream stream = recognizer.createStream(); + stream.acceptWaveform(reader.getSamples(), reader.getSampleRate()); + + float[] tailPaddings = new float[(int) (0.8 * reader.getSampleRate())]; + stream.acceptWaveform(tailPaddings, reader.getSampleRate()); + + while (recognizer.isReady(stream)) { + recognizer.decode(stream); + } + + String text = recognizer.getResult(stream).getText(); + + System.out.printf("filename:%s\nresult:%s\n", waveFilename, text); + + stream.release(); + recognizer.release(); + } +} diff --git a/java-api-examples/run-inverse-text-normalization-transducer.sh b/java-api-examples/run-inverse-text-normalization-transducer.sh new file mode 100755 index 000000000..509d71f07 --- /dev/null +++ b/java-api-examples/run-inverse-text-normalization-transducer.sh @@ -0,0 +1,45 @@ +#!/usr/bin/env bash + +set -ex + +if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then + mkdir -p ../build + pushd ../build + cmake \ + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \ + -DSHERPA_ONNX_ENABLE_TESTS=OFF \ + -DSHERPA_ONNX_ENABLE_CHECK=OFF \ + -DBUILD_SHARED_LIBS=ON \ + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \ + -DSHERPA_ONNX_ENABLE_JNI=ON \ + .. + + make -j4 + ls -lh lib + popd +fi + +if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then + pushd ../sherpa-onnx/java-api + make + popd +fi + +if [ ! -f ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2 + tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2 + rm sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2 +fi + +if [ ! -f ./itn-zh-number.wav ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn-zh-number.wav +fi + +if [ ! -f ./itn_zh_number.fst ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst +fi + +java \ + -Djava.library.path=$PWD/../build/lib \ + -cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \ + InverseTextNormalizationStreamingTransducer.java diff --git a/kotlin-api-examples/run.sh b/kotlin-api-examples/run.sh index a96c09748..5b58620e1 100755 --- a/kotlin-api-examples/run.sh +++ b/kotlin-api-examples/run.sh @@ -203,7 +203,7 @@ function testOfflineAsr() { java -Djava.library.path=../build/lib -jar $out_filename } -function testInverseTextNormalizationAsr() { +function testInverseTextNormalizationOfflineAsr() { if [ ! -f ./sherpa-onnx-paraformer-zh-2023-03-28/tokens.txt ]; then curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2 tar xvf sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2 @@ -218,9 +218,9 @@ function testInverseTextNormalizationAsr() { curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst fi - out_filename=test_offline_asr.jar + out_filename=test_itn_offline_asr.jar kotlinc-jvm -include-runtime -d $out_filename \ - test_itn_asr.kt \ + test_itn_offline_asr.kt \ FeatureConfig.kt \ OfflineRecognizer.kt \ OfflineStream.kt \ @@ -231,6 +231,34 @@ function testInverseTextNormalizationAsr() { java -Djava.library.path=../build/lib -jar $out_filename } +function testInverseTextNormalizationOnlineAsr() { + if [ ! -f ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2 + tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2 + rm sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2 + fi + + if [ ! -f ./itn-zh-number.wav ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn-zh-number.wav + fi + + if [ ! -f ./itn_zh_number.fst ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst + fi + + out_filename=test_itn_online_asr.jar + kotlinc-jvm -include-runtime -d $out_filename \ + test_itn_online_asr.kt \ + FeatureConfig.kt \ + OnlineRecognizer.kt \ + OnlineStream.kt \ + WaveReader.kt \ + faked-asset-manager.kt + + ls -lh $out_filename + java -Djava.library.path=../build/lib -jar $out_filename +} + function testPunctuation() { if [ ! -f ./sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12/model.onnx ]; then curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/punctuation-models/sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2 @@ -257,4 +285,5 @@ testAudioTagging testSpokenLanguageIdentification testOfflineAsr testPunctuation -testInverseTextNormalizationAsr +testInverseTextNormalizationOfflineAsr +testInverseTextNormalizationOnlineAsr diff --git a/kotlin-api-examples/test_itn_asr.kt b/kotlin-api-examples/test_itn_offline_asr.kt similarity index 100% rename from kotlin-api-examples/test_itn_asr.kt rename to kotlin-api-examples/test_itn_offline_asr.kt diff --git a/kotlin-api-examples/test_itn_online_asr.kt b/kotlin-api-examples/test_itn_online_asr.kt new file mode 100644 index 000000000..27b6024b2 --- /dev/null +++ b/kotlin-api-examples/test_itn_online_asr.kt @@ -0,0 +1,41 @@ +package com.k2fsa.sherpa.onnx + +fun main() { + test() +} + +fun test() { + val recognizer = createOnlineRecognizer() + val waveFilename = "./itn-zh-number.wav"; + + val objArray = WaveReader.readWaveFromFile( + filename = waveFilename, + ) + val samples: FloatArray = objArray[0] as FloatArray + val sampleRate: Int = objArray[1] as Int + + val stream = recognizer.createStream() + stream.acceptWaveform(samples, sampleRate=sampleRate) + while (recognizer.isReady(stream)) { + recognizer.decode(stream) + } + + val result = recognizer.getResult(stream).text + println(result) + + stream.release() + recognizer.release() +} + +fun createOnlineRecognizer(): OnlineRecognizer { + val config = OnlineRecognizerConfig( + featConfig = getFeatureConfig(sampleRate = 16000, featureDim = 80), + modelConfig = getModelConfig(8)!!, + ) + + config.ruleFsts = "./itn_zh_number.fst" + println(config) + + return OnlineRecognizer(config = config) +} + diff --git a/nodejs-addon-examples/package.json b/nodejs-addon-examples/package.json index 28d846a11..9c277ef90 100644 --- a/nodejs-addon-examples/package.json +++ b/nodejs-addon-examples/package.json @@ -1,5 +1,5 @@ { "dependencies": { - "sherpa-onnx-node": "^1.9.30" + "sherpa-onnx-node": "^1.10.0" } } diff --git a/nodejs-addon-examples/test_asr_streaming_transducer_itn.js b/nodejs-addon-examples/test_asr_streaming_transducer_itn.js new file mode 100644 index 000000000..b8dfb6cb4 --- /dev/null +++ b/nodejs-addon-examples/test_asr_streaming_transducer_itn.js @@ -0,0 +1,59 @@ +// Copyright (c) 2024 Xiaomi Corporation +const sherpa_onnx = require('sherpa-onnx-node'); + +// Please download test files from +// https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models +const config = { + 'featConfig': { + 'sampleRate': 16000, + 'featureDim': 80, + }, + 'modelConfig': { + 'transducer': { + 'encoder': + './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.onnx', + 'decoder': + './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx', + 'joiner': + './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.onnx', + }, + 'tokens': + './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt', + 'numThreads': 2, + 'provider': 'cpu', + 'debug': 1, + }, + // https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst + ruleFsts: './itn_zh_number.fst', +}; + +// https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn-zh-number.wav +const waveFilename = './itn-zh-number.wav'; + +const recognizer = new sherpa_onnx.OnlineRecognizer(config); +console.log('Started') +let start = Date.now(); +const stream = recognizer.createStream(); +const wave = sherpa_onnx.readWave(waveFilename); +stream.acceptWaveform({sampleRate: wave.sampleRate, samples: wave.samples}); + +const tailPadding = new Float32Array(wave.sampleRate * 0.4); +stream.acceptWaveform({samples: tailPadding, sampleRate: wave.sampleRate}); + +while (recognizer.isReady(stream)) { + recognizer.decode(stream); +} +result = recognizer.getResult(stream) +let stop = Date.now(); +console.log('Done') + +const elapsed_seconds = (stop - start) / 1000; +const duration = wave.samples.length / wave.sampleRate; +const real_time_factor = elapsed_seconds / duration; +console.log('Wave duration', duration.toFixed(3), 'secodns') +console.log('Elapsed', elapsed_seconds.toFixed(3), 'secodns') +console.log( + `RTF = ${elapsed_seconds.toFixed(3)}/${duration.toFixed(3)} =`, + real_time_factor.toFixed(3)) +console.log(waveFilename) +console.log('result\n', result) diff --git a/nodejs-addon-examples/test_asr_streaming_transducer_microphone_itn.js b/nodejs-addon-examples/test_asr_streaming_transducer_microphone_itn.js new file mode 100644 index 000000000..34807d10c --- /dev/null +++ b/nodejs-addon-examples/test_asr_streaming_transducer_microphone_itn.js @@ -0,0 +1,88 @@ +// Copyright (c) 2023-2024 Xiaomi Corporation (authors: Fangjun Kuang) +// +const portAudio = require('naudiodon2'); +// console.log(portAudio.getDevices()); + +const sherpa_onnx = require('sherpa-onnx-node'); + +function createOnlineRecognizer() { + const config = { + 'featConfig': { + 'sampleRate': 16000, + 'featureDim': 80, + }, + 'modelConfig': { + 'transducer': { + 'encoder': + './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.onnx', + 'decoder': + './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx', + 'joiner': + './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.onnx', + }, + 'tokens': + './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt', + 'numThreads': 2, + 'provider': 'cpu', + 'debug': 1, + }, + 'decodingMethod': 'greedy_search', + 'maxActivePaths': 4, + 'enableEndpoint': true, + 'rule1MinTrailingSilence': 2.4, + 'rule2MinTrailingSilence': 1.2, + 'rule3MinUtteranceLength': 20, + // https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst + ruleFsts: './itn_zh_number.fst', + }; + + return new sherpa_onnx.OnlineRecognizer(config); +} + +const recognizer = createOnlineRecognizer(); +const stream = recognizer.createStream(); + +let lastText = ''; +let segmentIndex = 0; + +const ai = new portAudio.AudioIO({ + inOptions: { + channelCount: 1, + closeOnError: true, // Close the stream if an audio error is detected, if + // set false then just log the error + deviceId: -1, // Use -1 or omit the deviceId to select the default device + sampleFormat: portAudio.SampleFormatFloat32, + sampleRate: recognizer.config.featConfig.sampleRate + } +}); + +const display = new sherpa_onnx.Display(50); + +ai.on('data', data => { + const samples = new Float32Array(data.buffer); + + stream.acceptWaveform( + {sampleRate: recognizer.config.featConfig.sampleRate, samples: samples}); + + while (recognizer.isReady(stream)) { + recognizer.decode(stream); + } + + const isEndpoint = recognizer.isEndpoint(stream); + const text = recognizer.getResult(stream).text.toLowerCase(); + + if (text.length > 0 && lastText != text) { + lastText = text; + display.print(segmentIndex, lastText); + } + if (isEndpoint) { + if (text.length > 0) { + lastText = text; + segmentIndex += 1; + } + recognizer.reset(stream) + } +}); + +ai.start(); +console.log('Started! Please speak') diff --git a/nodejs-examples/test-online-transducer-itn.js b/nodejs-examples/test-online-transducer-itn.js new file mode 100644 index 000000000..9bc5360a2 --- /dev/null +++ b/nodejs-examples/test-online-transducer-itn.js @@ -0,0 +1,131 @@ +// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang) +// +const fs = require('fs'); +const {Readable} = require('stream'); +const wav = require('wav'); + +const sherpa_onnx = require('sherpa-onnx'); + +function createOnlineRecognizer() { + let onlineTransducerModelConfig = { + encoder: + './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.int8.onnx', + decoder: + './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx', + joiner: + './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.int8.onnx', + }; + + let onlineParaformerModelConfig = { + encoder: '', + decoder: '', + }; + + let onlineZipformer2CtcModelConfig = { + model: '', + }; + + let onlineModelConfig = { + transducer: onlineTransducerModelConfig, + paraformer: onlineParaformerModelConfig, + zipformer2Ctc: onlineZipformer2CtcModelConfig, + tokens: + './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt', + numThreads: 1, + provider: 'cpu', + debug: 1, + modelType: 'zipformer', + }; + + let featureConfig = { + sampleRate: 16000, + featureDim: 80, + }; + + let recognizerConfig = { + featConfig: featureConfig, + modelConfig: onlineModelConfig, + decodingMethod: 'greedy_search', + maxActivePaths: 4, + enableEndpoint: 1, + rule1MinTrailingSilence: 2.4, + rule2MinTrailingSilence: 1.2, + rule3MinUtteranceLength: 20, + hotwordsFile: '', + hotwordsScore: 1.5, + ctcFstDecoderConfig: { + graph: '', + maxActive: 3000, + }, + // https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst + ruleFsts: './itn_zh_number.fst', + }; + + return sherpa_onnx.createOnlineRecognizer(recognizerConfig); +} + +const recognizer = createOnlineRecognizer(); +const stream = recognizer.createStream(); + +// https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn-zh-number.wav +const waveFilename = './itn-zh-number.wav'; + +const reader = new wav.Reader(); +const readable = new Readable().wrap(reader); + +function decode(samples) { + stream.acceptWaveform(recognizer.config.featConfig.sampleRate, samples); + + while (recognizer.isReady(stream)) { + recognizer.decode(stream); + } + const text = recognizer.getResult(stream).text; + console.log(text); +} + +reader.on('format', ({audioFormat, bitDepth, channels, sampleRate}) => { + if (sampleRate != recognizer.config.featConfig.sampleRate) { + throw new Error(`Only support sampleRate ${ + recognizer.config.featConfig.sampleRate}. Given ${sampleRate}`); + } + + if (audioFormat != 1) { + throw new Error(`Only support PCM format. Given ${audioFormat}`); + } + + if (channels != 1) { + throw new Error(`Only a single channel. Given ${channel}`); + } + + if (bitDepth != 16) { + throw new Error(`Only support 16-bit samples. Given ${bitDepth}`); + } +}); + +fs.createReadStream(waveFilename, {'highWaterMark': 4096}) + .pipe(reader) + .on('finish', function(err) { + // tail padding + const floatSamples = + new Float32Array(recognizer.config.featConfig.sampleRate * 0.5); + decode(floatSamples); + stream.free(); + recognizer.free(); + }); + +readable.on('readable', function() { + let chunk; + while ((chunk = readable.read()) != null) { + const int16Samples = new Int16Array( + chunk.buffer, chunk.byteOffset, + chunk.length / Int16Array.BYTES_PER_ELEMENT); + + const floatSamples = new Float32Array(int16Samples.length); + + for (let i = 0; i < floatSamples.length; i++) { + floatSamples[i] = int16Samples[i] / 32768.0; + } + + decode(floatSamples); + } +}); diff --git a/scripts/apk/build-apk-asr-2pass.sh.in b/scripts/apk/build-apk-asr-2pass.sh.in index 24c2cd3ea..4cd5761a8 100644 --- a/scripts/apk/build-apk-asr-2pass.sh.in +++ b/scripts/apk/build-apk-asr-2pass.sh.in @@ -71,6 +71,17 @@ git checkout . pushd android/SherpaOnnx2Pass/app/src/main/java/com/k2fsa/sherpa/onnx sed -i.bak s/"firstType = 9/firstType = $type1/" ./MainActivity.kt sed -i.bak s/"secondType = 0/secondType = $type2/" ./MainActivity.kt + +{% if first.rule_fsts %} + rule_fsts={{ first.rule_fsts }} + sed -i.bak s%"firstRuleFsts = null"%"firstRuleFsts = \"$rule_fsts\""% ./MainActivity.kt +{% endif %} + +{% if second.rule_fsts %} + rule_fsts={{ second.rule_fsts }} + sed -i.bak s%"secondRuleFsts = null"%"secondRuleFsts = \"$rule_fsts\""% ./MainActivity.kt +{% endif %} + git diff popd diff --git a/scripts/apk/build-apk-asr.sh.in b/scripts/apk/build-apk-asr.sh.in index 468959f08..d2169203a 100644 --- a/scripts/apk/build-apk-asr.sh.in +++ b/scripts/apk/build-apk-asr.sh.in @@ -54,6 +54,12 @@ popd git checkout . pushd android/SherpaOnnx/app/src/main/java/com/k2fsa/sherpa/onnx sed -i.bak s/"type = 0/type = $type/" ./MainActivity.kt + +{% if model.rule_fsts %} + rule_fsts={{ model.rule_fsts }} + sed -i.bak s%"ruleFsts = null"%"ruleFsts = \"$rule_fsts\""% ./MainActivity.kt +{% endif %} + git diff popd @@ -84,6 +90,7 @@ for arch in arm64-v8a armeabi-v7a x86_64 x86; do done rm -rf ./android/SherpaOnnx/app/src/main/assets/$model_name +rm -rf ./android/SherpaOnnx/app/src/main/assets/*.fst {% endfor %} git checkout . diff --git a/scripts/apk/build-apk-vad-asr.sh.in b/scripts/apk/build-apk-vad-asr.sh.in index eb79eab5e..b1c159ece 100644 --- a/scripts/apk/build-apk-vad-asr.sh.in +++ b/scripts/apk/build-apk-vad-asr.sh.in @@ -56,6 +56,12 @@ popd git checkout . pushd android/SherpaOnnxVadAsr/app/src/main/java/com/k2fsa/sherpa/onnx sed -i.bak s/"asrModelType = 0/asrModelType = $type/" ./MainActivity.kt + +{% if model.rule_fsts %} + rule_fsts={{ model.rule_fsts }} + sed -i.bak s%"asrRuleFsts = null"%"asrRuleFsts = \"$rule_fsts\""% ./MainActivity.kt +{% endif %} + git diff popd diff --git a/scripts/apk/generate-asr-2pass-apk-script.py b/scripts/apk/generate-asr-2pass-apk-script.py index 85da8ada2..fc86b5193 100755 --- a/scripts/apk/generate-asr-2pass-apk-script.py +++ b/scripts/apk/generate-asr-2pass-apk-script.py @@ -41,6 +41,7 @@ class Model: # cmd is used to remove extra file from the model directory cmd: str = "" + rule_fsts: str = "" def get_2nd_models(): @@ -70,7 +71,11 @@ def get_2nd_models(): idx=0, lang="zh", short_name="paraformer", + rule_fsts="itn_zh_number.fst", cmd=""" + if [ ! -f itn_zh_number.fst ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst + fi pushd $model_name rm -fv README.md @@ -87,7 +92,11 @@ def get_2nd_models(): idx=4, lang="zh", short_name="zipformer", + rule_fsts="itn_zh_number.fst", cmd=""" + if [ ! -f itn_zh_number.fst ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst + fi pushd $model_name rm -rfv test_wavs @@ -117,7 +126,11 @@ def get_1st_models(): idx=8, lang="bilingual_zh_en", short_name="zipformer", + rule_fsts="itn_zh_number.fst", cmd=""" + if [ ! -f itn_zh_number.fst ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst + fi pushd $model_name rm -fv decoder-epoch-99-avg-1.int8.onnx rm -fv encoder-epoch-99-avg-1.onnx @@ -160,7 +173,11 @@ def get_1st_models(): idx=3, lang="zh", short_name="zipformer2", + rule_fsts="itn_zh_number.fst", cmd=""" + if [ ! -f itn_zh_number.fst ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst + fi pushd $model_name rm -fv exp/encoder-epoch-12-avg-4-chunk-16-left-128.onnx rm -fv exp/decoder-epoch-12-avg-4-chunk-16-left-128.int8.onnx @@ -202,7 +219,11 @@ def get_1st_models(): idx=9, lang="zh", short_name="small_zipformer", + rule_fsts="itn_zh_number.fst", cmd=""" + if [ ! -f itn_zh_number.fst ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst + fi pushd $model_name rm -fv encoder-epoch-99-avg-1.onnx rm -fv decoder-epoch-99-avg-1.int8.onnx diff --git a/scripts/apk/generate-asr-apk-script.py b/scripts/apk/generate-asr-apk-script.py index 8684877cf..05a22a921 100755 --- a/scripts/apk/generate-asr-apk-script.py +++ b/scripts/apk/generate-asr-apk-script.py @@ -42,6 +42,8 @@ class Model: # cmd is used to remove extra file from the model directory cmd: str = "" + rule_fsts: str = "" + def get_models(): models = [ @@ -50,7 +52,11 @@ def get_models(): idx=8, lang="bilingual_zh_en", short_name="zipformer", + rule_fsts="itn_zh_number.fst", cmd=""" + if [ ! -f itn_zh_number.fst ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst + fi pushd $model_name rm -fv decoder-epoch-99-avg-1.int8.onnx rm -fv encoder-epoch-99-avg-1.onnx @@ -93,7 +99,11 @@ def get_models(): idx=3, lang="zh", short_name="zipformer2", + rule_fsts="itn_zh_number.fst", cmd=""" + if [ ! -f itn_zh_number.fst ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst + fi pushd $model_name rm -fv exp/encoder-epoch-12-avg-4-chunk-16-left-128.onnx rm -fv exp/decoder-epoch-12-avg-4-chunk-16-left-128.int8.onnx @@ -135,7 +145,11 @@ def get_models(): idx=9, lang="zh", short_name="small_zipformer", + rule_fsts="itn_zh_number.fst", cmd=""" + if [ ! -f itn_zh_number.fst ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst + fi pushd $model_name rm -fv encoder-epoch-99-avg-1.onnx rm -fv decoder-epoch-99-avg-1.int8.onnx diff --git a/scripts/apk/generate-vad-asr-apk-script.py b/scripts/apk/generate-vad-asr-apk-script.py index ca38fa3fb..61188ca7f 100755 --- a/scripts/apk/generate-vad-asr-apk-script.py +++ b/scripts/apk/generate-vad-asr-apk-script.py @@ -42,6 +42,8 @@ class Model: # cmd is used to remove extra file from the model directory cmd: str = "" + rule_fsts: str = "" + # See get_2nd_models() in ./generate-asr-2pass-apk-script.py def get_models(): @@ -71,7 +73,11 @@ def get_models(): idx=0, lang="zh", short_name="paraformer", + rule_fsts="itn_zh_number.fst", cmd=""" + if [ ! -f itn_zh_number.fst ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst + fi pushd $model_name rm -v README.md @@ -88,7 +94,11 @@ def get_models(): idx=4, lang="zh", short_name="zipformer", + rule_fsts="itn_zh_number.fst", cmd=""" + if [ ! -f itn_zh_number.fst ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst + fi pushd $model_name rm -rfv test_wavs @@ -171,7 +181,11 @@ def get_models(): idx=11, lang="zh", short_name="telespeech", + rule_fsts="itn_zh_number.fst", cmd=""" + if [ ! -f itn_zh_number.fst ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst + fi pushd $model_name rm -rfv test_wavs diff --git a/scripts/dotnet/OnlineRecognizerConfig.cs b/scripts/dotnet/OnlineRecognizerConfig.cs index 6ba6f5b64..dc58fe844 100644 --- a/scripts/dotnet/OnlineRecognizerConfig.cs +++ b/scripts/dotnet/OnlineRecognizerConfig.cs @@ -26,6 +26,8 @@ public OnlineRecognizerConfig() HotwordsFile = ""; HotwordsScore = 1.5F; CtcFstDecoderConfig = new OnlineCtcFstDecoderConfig(); + RuleFsts = ""; + RuleFars = ""; } public FeatureConfig FeatConfig; public OnlineModelConfig ModelConfig; @@ -64,5 +66,11 @@ public OnlineRecognizerConfig() public float HotwordsScore; public OnlineCtcFstDecoderConfig CtcFstDecoderConfig; + + [MarshalAs(UnmanagedType.LPStr)] + public string RuleFsts; + + [MarshalAs(UnmanagedType.LPStr)] + public string RuleFars; } } diff --git a/scripts/go/_internal/streaming-decode-files/run-transducer-itn.sh b/scripts/go/_internal/streaming-decode-files/run-transducer-itn.sh new file mode 120000 index 000000000..0e1f525a5 --- /dev/null +++ b/scripts/go/_internal/streaming-decode-files/run-transducer-itn.sh @@ -0,0 +1 @@ +../../../../go-api-examples/streaming-decode-files/run-transducer-itn.sh \ No newline at end of file diff --git a/scripts/go/release.sh b/scripts/go/release.sh index 77d663f1b..d46eb1cf7 100755 --- a/scripts/go/release.sh +++ b/scripts/go/release.sh @@ -79,8 +79,8 @@ function osx() { mkdir t cd t - wget -q https://huggingface.co/csukuangfj/sherpa-onnx-wheels/resolve/main/sherpa_onnx-${SHERPA_ONNX_VERSION}-cp38-cp38-macosx_11_0_x86_64.whl - unzip ./sherpa_onnx-${SHERPA_ONNX_VERSION}-cp38-cp38-macosx_11_0_x86_64.whl + wget -q https://huggingface.co/csukuangfj/sherpa-onnx-wheels/resolve/main/sherpa_onnx-${SHERPA_ONNX_VERSION}-cp39-cp39-macosx_11_0_x86_64.whl + unzip ./sherpa_onnx-${SHERPA_ONNX_VERSION}-cp39-cp39-macosx_11_0_x86_64.whl cp -v sherpa_onnx/lib/*.dylib $dst/ @@ -93,8 +93,8 @@ function osx() { mkdir t cd t - wget -q https://huggingface.co/csukuangfj/sherpa-onnx-wheels/resolve/main/sherpa_onnx-${SHERPA_ONNX_VERSION}-cp38-cp38-macosx_11_0_arm64.whl - unzip ./sherpa_onnx-${SHERPA_ONNX_VERSION}-cp38-cp38-macosx_11_0_arm64.whl + wget -q https://huggingface.co/csukuangfj/sherpa-onnx-wheels/resolve/main/sherpa_onnx-${SHERPA_ONNX_VERSION}-cp39-cp39-macosx_11_0_arm64.whl + unzip ./sherpa_onnx-${SHERPA_ONNX_VERSION}-cp39-cp39-macosx_11_0_arm64.whl cp -v sherpa_onnx/lib/*.dylib $dst/ @@ -126,7 +126,6 @@ function windows() { unzip ./sherpa_onnx-${SHERPA_ONNX_VERSION}-cp38-cp38-win_amd64.whl cp -v sherpa_onnx-${SHERPA_ONNX_VERSION}.data/data/bin/*.dll $dst - cp -v sherpa_onnx-${SHERPA_ONNX_VERSION}.data/data/bin/*.lib $dst cd .. rm -rf t @@ -139,7 +138,6 @@ function windows() { unzip ./sherpa_onnx-${SHERPA_ONNX_VERSION}-cp38-cp38-win32.whl cp -v sherpa_onnx-${SHERPA_ONNX_VERSION}.data/data/bin/*.dll $dst - cp -v sherpa_onnx-${SHERPA_ONNX_VERSION}.data/data/bin/*.lib $dst cd .. rm -rf t diff --git a/scripts/go/sherpa_onnx.go b/scripts/go/sherpa_onnx.go index 437f4f328..898c0c21c 100644 --- a/scripts/go/sherpa_onnx.go +++ b/scripts/go/sherpa_onnx.go @@ -127,7 +127,11 @@ type OnlineRecognizerConfig struct { Rule1MinTrailingSilence float32 Rule2MinTrailingSilence float32 Rule3MinUtteranceLength float32 + HotwordsFile string + HotwordsScore float32 CtcFstDecoderConfig OnlineCtcFstDecoderConfig + RuleFsts string + RuleFars string } // It contains the recognition result for a online stream. @@ -204,6 +208,17 @@ func NewOnlineRecognizer(config *OnlineRecognizerConfig) *OnlineRecognizer { c.rule2_min_trailing_silence = C.float(config.Rule2MinTrailingSilence) c.rule3_min_utterance_length = C.float(config.Rule3MinUtteranceLength) + c.hotwords_file = C.CString(config.HotwordsFile) + defer C.free(unsafe.Pointer(c.hotwords_file)) + + c.hotwords_score = C.float(config.HotwordsScore) + + c.rule_fsts = C.CString(config.RuleFsts) + defer C.free(unsafe.Pointer(c.rule_fsts)) + + c.rule_fars = C.CString(config.RuleFars) + defer C.free(unsafe.Pointer(c.rule_fars)) + c.ctc_fst_decoder_config.graph = C.CString(config.CtcFstDecoderConfig.Graph) defer C.free(unsafe.Pointer(c.ctc_fst_decoder_config.graph)) c.ctc_fst_decoder_config.max_active = C.int(config.CtcFstDecoderConfig.MaxActive) diff --git a/scripts/node-addon-api/src/streaming-asr.cc b/scripts/node-addon-api/src/streaming-asr.cc index 59312a230..81482c824 100644 --- a/scripts/node-addon-api/src/streaming-asr.cc +++ b/scripts/node-addon-api/src/streaming-asr.cc @@ -189,6 +189,8 @@ static Napi::External CreateOnlineRecognizerWrapper( rule3MinUtteranceLength); SHERPA_ONNX_ASSIGN_ATTR_STR(hotwords_file, hotwordsFile); SHERPA_ONNX_ASSIGN_ATTR_FLOAT(hotwords_score, hotwordsScore); + SHERPA_ONNX_ASSIGN_ATTR_STR(rule_fsts, ruleFsts); + SHERPA_ONNX_ASSIGN_ATTR_STR(rule_fars, ruleFars); c.ctc_fst_decoder_config = GetCtcFstDecoderConfig(o); @@ -246,6 +248,14 @@ static Napi::External CreateOnlineRecognizerWrapper( delete[] c.hotwords_file; } + if (c.rule_fsts) { + delete[] c.rule_fsts; + } + + if (c.rule_fars) { + delete[] c.rule_fars; + } + if (c.ctc_fst_decoder_config.graph) { delete[] c.ctc_fst_decoder_config.graph; } diff --git a/sherpa-onnx/c-api/c-api.cc b/sherpa-onnx/c-api/c-api.cc index 01e2191c3..2d0118833 100644 --- a/sherpa-onnx/c-api/c-api.cc +++ b/sherpa-onnx/c-api/c-api.cc @@ -110,6 +110,9 @@ SherpaOnnxOnlineRecognizer *CreateOnlineRecognizer( recognizer_config.ctc_fst_decoder_config.max_active = SHERPA_ONNX_OR(config->ctc_fst_decoder_config.max_active, 3000); + recognizer_config.rule_fsts = SHERPA_ONNX_OR(config->rule_fsts, ""); + recognizer_config.rule_fars = SHERPA_ONNX_OR(config->rule_fars, ""); + if (config->model_config.debug) { SHERPA_ONNX_LOGE("%s\n", recognizer_config.ToString().c_str()); } diff --git a/sherpa-onnx/c-api/c-api.h b/sherpa-onnx/c-api/c-api.h index 0229f8059..e9637ae7c 100644 --- a/sherpa-onnx/c-api/c-api.h +++ b/sherpa-onnx/c-api/c-api.h @@ -144,6 +144,8 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOnlineRecognizerConfig { float hotwords_score; SherpaOnnxOnlineCtcFstDecoderConfig ctc_fst_decoder_config; + const char *rule_fsts; + const char *rule_fars; } SherpaOnnxOnlineRecognizerConfig; SHERPA_ONNX_API typedef struct SherpaOnnxOnlineRecognizerResult { diff --git a/sherpa-onnx/csrc/CMakeLists.txt b/sherpa-onnx/csrc/CMakeLists.txt index 6edb82402..bac0499a8 100644 --- a/sherpa-onnx/csrc/CMakeLists.txt +++ b/sherpa-onnx/csrc/CMakeLists.txt @@ -190,9 +190,10 @@ if(NOT BUILD_SHARED_LIBS AND APPLE) target_link_libraries(sherpa-onnx-core "-framework Foundation") endif() +target_link_libraries(sherpa-onnx-core fstfar fst) + if(SHERPA_ONNX_ENABLE_TTS) target_link_libraries(sherpa-onnx-core piper_phonemize) - target_link_libraries(sherpa-onnx-core fstfar fst) target_link_libraries(sherpa-onnx-core cppjieba) endif() diff --git a/sherpa-onnx/csrc/offline-recognizer-impl.cc b/sherpa-onnx/csrc/offline-recognizer-impl.cc index 546d0f9bf..80a6766ce 100644 --- a/sherpa-onnx/csrc/offline-recognizer-impl.cc +++ b/sherpa-onnx/csrc/offline-recognizer-impl.cc @@ -425,9 +425,6 @@ std::string OfflineRecognizerImpl::ApplyInverseTextNormalization( if (!itn_list_.empty()) { for (const auto &tn : itn_list_) { text = tn->Normalize(text); - if (config_.model_config.debug) { - SHERPA_ONNX_LOGE("After inverse text normalization: %s", text.c_str()); - } } } diff --git a/sherpa-onnx/csrc/online-recognizer-impl.cc b/sherpa-onnx/csrc/online-recognizer-impl.cc index 89d172f97..2784ad24c 100644 --- a/sherpa-onnx/csrc/online-recognizer-impl.cc +++ b/sherpa-onnx/csrc/online-recognizer-impl.cc @@ -4,6 +4,8 @@ #include "sherpa-onnx/csrc/online-recognizer-impl.h" +#include + #if __ANDROID_API__ >= 9 #include @@ -186,9 +188,6 @@ std::string OnlineRecognizerImpl::ApplyInverseTextNormalization( if (!itn_list_.empty()) { for (const auto &tn : itn_list_) { text = tn->Normalize(text); - if (config_.model_config.debug) { - SHERPA_ONNX_LOGE("After inverse text normalization: %s", text.c_str()); - } } } diff --git a/sherpa-onnx/flutter/CHANGELOG.md b/sherpa-onnx/flutter/CHANGELOG.md index 5f912155b..f7524ea9d 100644 --- a/sherpa-onnx/flutter/CHANGELOG.md +++ b/sherpa-onnx/flutter/CHANGELOG.md @@ -1,3 +1,7 @@ +## 1.10.0 + +* Add inverse text normalization + ## 1.9.30 * Add TTS diff --git a/sherpa-onnx/flutter/lib/src/online_recognizer.dart b/sherpa-onnx/flutter/lib/src/online_recognizer.dart index bee1f2683..49ca3d2e8 100644 --- a/sherpa-onnx/flutter/lib/src/online_recognizer.dart +++ b/sherpa-onnx/flutter/lib/src/online_recognizer.dart @@ -111,11 +111,13 @@ class OnlineRecognizerConfig { this.hotwordsFile = '', this.hotwordsScore = 1.5, this.ctcFstDecoderConfig = const OnlineCtcFstDecoderConfig(), + this.ruleFsts = '', + this.ruleFars = '', }); @override String toString() { - return 'OnlineRecognizerConfig(feat: $feat, model: $model, decodingMethod: $decodingMethod, maxActivePaths: $maxActivePaths, enableEndpoint: $enableEndpoint, rule1MinTrailingSilence: $rule1MinTrailingSilence, rule2MinTrailingSilence: $rule2MinTrailingSilence, rule3MinUtteranceLength: $rule3MinUtteranceLength, hotwordsFile: $hotwordsFile, hotwordsScore: $hotwordsScore, ctcFstDecoderConfig: $ctcFstDecoderConfig)'; + return 'OnlineRecognizerConfig(feat: $feat, model: $model, decodingMethod: $decodingMethod, maxActivePaths: $maxActivePaths, enableEndpoint: $enableEndpoint, rule1MinTrailingSilence: $rule1MinTrailingSilence, rule2MinTrailingSilence: $rule2MinTrailingSilence, rule3MinUtteranceLength: $rule3MinUtteranceLength, hotwordsFile: $hotwordsFile, hotwordsScore: $hotwordsScore, ctcFstDecoderConfig: $ctcFstDecoderConfig, ruleFsts: $ruleFsts, ruleFars: $ruleFars)'; } final FeatureConfig feat; @@ -137,6 +139,8 @@ class OnlineRecognizerConfig { final double hotwordsScore; final OnlineCtcFstDecoderConfig ctcFstDecoderConfig; + final String ruleFsts; + final String ruleFars; } class OnlineRecognizerResult { @@ -201,9 +205,13 @@ class OnlineRecognizer { c.ref.ctcFstDecoderConfig.graph = config.ctcFstDecoderConfig.graph.toNativeUtf8(); c.ref.ctcFstDecoderConfig.maxActive = config.ctcFstDecoderConfig.maxActive; + c.ref.ruleFsts = config.ruleFsts.toNativeUtf8(); + c.ref.ruleFars = config.ruleFars.toNativeUtf8(); final ptr = SherpaOnnxBindings.createOnlineRecognizer?.call(c) ?? nullptr; + calloc.free(c.ref.ruleFars); + calloc.free(c.ref.ruleFsts); calloc.free(c.ref.ctcFstDecoderConfig.graph); calloc.free(c.ref.hotwordsFile); calloc.free(c.ref.decodingMethod); diff --git a/sherpa-onnx/flutter/lib/src/sherpa_onnx_bindings.dart b/sherpa-onnx/flutter/lib/src/sherpa_onnx_bindings.dart index 70d9572e7..cd3e7781d 100644 --- a/sherpa-onnx/flutter/lib/src/sherpa_onnx_bindings.dart +++ b/sherpa-onnx/flutter/lib/src/sherpa_onnx_bindings.dart @@ -205,6 +205,9 @@ final class SherpaOnnxOnlineRecognizerConfig extends Struct { external double hotwordsScore; external SherpaOnnxOnlineCtcFstDecoderConfig ctcFstDecoderConfig; + + external Pointer ruleFsts; + external Pointer ruleFars; } final class SherpaOnnxSileroVadModelConfig extends Struct { diff --git a/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OnlineRecognizerConfig.java b/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OnlineRecognizerConfig.java index af4b76d4d..cb9afd2e1 100644 --- a/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OnlineRecognizerConfig.java +++ b/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OnlineRecognizerConfig.java @@ -15,6 +15,8 @@ public class OnlineRecognizerConfig { private final int maxActivePaths; private final String hotwordsFile; private final float hotwordsScore; + private final String ruleFsts; + private final String ruleFars; private OnlineRecognizerConfig(Builder builder) { this.featConfig = builder.featConfig; @@ -27,6 +29,8 @@ private OnlineRecognizerConfig(Builder builder) { this.maxActivePaths = builder.maxActivePaths; this.hotwordsFile = builder.hotwordsFile; this.hotwordsScore = builder.hotwordsScore; + this.ruleFsts = builder.ruleFsts; + this.ruleFars = builder.ruleFars; } public static Builder builder() { @@ -48,6 +52,8 @@ public static class Builder { private int maxActivePaths = 4; private String hotwordsFile = ""; private float hotwordsScore = 1.5f; + private String ruleFsts = ""; + private String ruleFars = ""; public OnlineRecognizerConfig build() { return new OnlineRecognizerConfig(this); @@ -102,5 +108,15 @@ public Builder setHotwordsScore(float hotwordsScore) { this.hotwordsScore = hotwordsScore; return this; } + + public Builder setRuleFsts(String ruleFsts) { + this.ruleFsts = ruleFsts; + return this; + } + + public Builder setRuleFars(String ruleFars) { + this.ruleFars = ruleFars; + return this; + } } } diff --git a/sherpa-onnx/jni/online-recognizer.cc b/sherpa-onnx/jni/online-recognizer.cc index e8044526e..d8acd0fed 100644 --- a/sherpa-onnx/jni/online-recognizer.cc +++ b/sherpa-onnx/jni/online-recognizer.cc @@ -37,6 +37,18 @@ static OnlineRecognizerConfig GetConfig(JNIEnv *env, jobject config) { fid = env->GetFieldID(cls, "hotwordsScore", "F"); ans.hotwords_score = env->GetFloatField(config, fid); + fid = env->GetFieldID(cls, "ruleFsts", "Ljava/lang/String;"); + s = (jstring)env->GetObjectField(config, fid); + p = env->GetStringUTFChars(s, nullptr); + ans.rule_fsts = p; + env->ReleaseStringUTFChars(s, p); + + fid = env->GetFieldID(cls, "ruleFars", "Ljava/lang/String;"); + s = (jstring)env->GetObjectField(config, fid); + p = env->GetStringUTFChars(s, nullptr); + ans.rule_fars = p; + env->ReleaseStringUTFChars(s, p); + //---------- feat config ---------- fid = env->GetFieldID(cls, "featConfig", "Lcom/k2fsa/sherpa/onnx/FeatureConfig;"); diff --git a/sherpa-onnx/kotlin-api/OnlineRecognizer.kt b/sherpa-onnx/kotlin-api/OnlineRecognizer.kt index 93a21e07e..de47a5ebd 100644 --- a/sherpa-onnx/kotlin-api/OnlineRecognizer.kt +++ b/sherpa-onnx/kotlin-api/OnlineRecognizer.kt @@ -69,6 +69,8 @@ data class OnlineRecognizerConfig( var maxActivePaths: Int = 4, var hotwordsFile: String = "", var hotwordsScore: Float = 1.5f, + var ruleFsts: String = "", + var ruleFars: String = "", ) data class OnlineRecognizerResult( diff --git a/swift-api-examples/SherpaOnnx.swift b/swift-api-examples/SherpaOnnx.swift index 24082a827..432abcb61 100644 --- a/swift-api-examples/SherpaOnnx.swift +++ b/swift-api-examples/SherpaOnnx.swift @@ -135,7 +135,9 @@ func sherpaOnnxOnlineRecognizerConfig( maxActivePaths: Int = 4, hotwordsFile: String = "", hotwordsScore: Float = 1.5, - ctcFstDecoderConfig: SherpaOnnxOnlineCtcFstDecoderConfig = sherpaOnnxOnlineCtcFstDecoderConfig() + ctcFstDecoderConfig: SherpaOnnxOnlineCtcFstDecoderConfig = sherpaOnnxOnlineCtcFstDecoderConfig(), + ruleFsts: String = "", + ruleFars: String = "" ) -> SherpaOnnxOnlineRecognizerConfig { return SherpaOnnxOnlineRecognizerConfig( feat_config: featConfig, @@ -148,7 +150,9 @@ func sherpaOnnxOnlineRecognizerConfig( rule3_min_utterance_length: rule3MinUtteranceLength, hotwords_file: toCPointer(hotwordsFile), hotwords_score: hotwordsScore, - ctc_fst_decoder_config: ctcFstDecoderConfig + ctc_fst_decoder_config: ctcFstDecoderConfig, + rule_fsts: toCPointer(ruleFsts), + rule_fars: toCPointer(ruleFars) ) } diff --git a/wasm/asr/CMakeLists.txt b/wasm/asr/CMakeLists.txt index b46fe39a1..2a6dd13f6 100644 --- a/wasm/asr/CMakeLists.txt +++ b/wasm/asr/CMakeLists.txt @@ -40,6 +40,8 @@ string(APPEND MY_FLAGS " -sEXPORTED_FUNCTIONS=[_CopyHeap,_malloc,_free,${all_exp string(APPEND MY_FLAGS "--preload-file ${CMAKE_CURRENT_SOURCE_DIR}/assets@. ") string(APPEND MY_FLAGS " -sEXPORTED_RUNTIME_METHODS=['ccall','stringToUTF8','setValue','getValue','lengthBytesUTF8','UTF8ToString'] ") +message(STATUS "MY_FLAGS: ${MY_FLAGS}") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${MY_FLAGS}") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${MY_FLAGS}") set(CMAKE_EXECUTBLE_LINKER_FLAGS "${CMAKE_EXECUTBLE_LINKER_FLAGS} ${MY_FLAGS}") diff --git a/wasm/asr/sherpa-onnx-asr.js b/wasm/asr/sherpa-onnx-asr.js index 2179fd87d..3341a093c 100644 --- a/wasm/asr/sherpa-onnx-asr.js +++ b/wasm/asr/sherpa-onnx-asr.js @@ -239,7 +239,7 @@ function initSherpaOnnxOnlineRecognizerConfig(config, Module) { const ctcFstDecoder = initSherpaOnnxOnlineCtcFstDecoderConfig( config.ctcFstDecoderConfig, Module) - const len = feat.len + model.len + 8 * 4 + ctcFstDecoder.len; + const len = feat.len + model.len + 8 * 4 + ctcFstDecoder.len + 2 * 4; const ptr = Module._malloc(len); let offset = 0; @@ -251,7 +251,10 @@ function initSherpaOnnxOnlineRecognizerConfig(config, Module) { const decodingMethodLen = Module.lengthBytesUTF8(config.decodingMethod) + 1; const hotwordsFileLen = Module.lengthBytesUTF8(config.hotwordsFile) + 1; - const bufferLen = decodingMethodLen + hotwordsFileLen; + const ruleFstsFileLen = Module.lengthBytesUTF8(config.ruleFsts || '') + 1; + const ruleFarsFileLen = Module.lengthBytesUTF8(config.ruleFars || '') + 1; + const bufferLen = + decodingMethodLen + hotwordsFileLen + ruleFstsFileLen + ruleFarsFileLen; const buffer = Module._malloc(bufferLen); offset = 0; @@ -259,6 +262,13 @@ function initSherpaOnnxOnlineRecognizerConfig(config, Module) { offset += decodingMethodLen; Module.stringToUTF8(config.hotwordsFile, buffer + offset, hotwordsFileLen); + offset += hotwordsFileLen; + + Module.stringToUTF8(config.ruleFsts || '', buffer + offset, ruleFstsFileLen); + offset += ruleFstsFileLen; + + Module.stringToUTF8(config.ruleFars || '', buffer + offset, ruleFarsFileLen); + offset += ruleFarsFileLen; offset = feat.len + model.len; Module.setValue(ptr + offset, buffer, 'i8*'); // decoding method @@ -286,6 +296,16 @@ function initSherpaOnnxOnlineRecognizerConfig(config, Module) { offset += 4; Module._CopyHeap(ctcFstDecoder.ptr, ctcFstDecoder.len, ptr + offset); + offset += ctcFstDecoder.len; + + Module.setValue( + ptr + offset, buffer + decodingMethodLen + hotwordsFileLen, 'i8*'); + offset += 4; + + Module.setValue( + ptr + offset, + buffer + decodingMethodLen + hotwordsFileLen + ruleFstsFileLen, 'i8*'); + offset += 4; return { buffer: buffer, ptr: ptr, len: len, feat: feat, model: model, @@ -363,7 +383,9 @@ function createOnlineRecognizer(Module, myConfig) { ctcFstDecoderConfig: { graph: '', maxActive: 3000, - } + }, + ruleFsts: '', + ruleFars: '', }; if (myConfig) { recognizerConfig = myConfig; diff --git a/wasm/asr/sherpa-onnx-wasm-main-asr.cc b/wasm/asr/sherpa-onnx-wasm-main-asr.cc index de0cf1430..07e5736de 100644 --- a/wasm/asr/sherpa-onnx-wasm-main-asr.cc +++ b/wasm/asr/sherpa-onnx-wasm-main-asr.cc @@ -26,7 +26,7 @@ static_assert(sizeof(SherpaOnnxOnlineCtcFstDecoderConfig) == 2 * 4, ""); static_assert(sizeof(SherpaOnnxOnlineRecognizerConfig) == sizeof(SherpaOnnxFeatureConfig) + sizeof(SherpaOnnxOnlineModelConfig) + 8 * 4 + - sizeof(SherpaOnnxOnlineCtcFstDecoderConfig), + sizeof(SherpaOnnxOnlineCtcFstDecoderConfig) + 2 * 4, ""); void MyPrint(SherpaOnnxOnlineRecognizerConfig *config) { @@ -71,6 +71,8 @@ void MyPrint(SherpaOnnxOnlineRecognizerConfig *config) { config->rule3_min_utterance_length); fprintf(stdout, "hotwords_file: %s\n", config->hotwords_file); fprintf(stdout, "hotwords_score: %.2f\n", config->hotwords_score); + fprintf(stdout, "rule_fsts: %s\n", config->rule_fsts); + fprintf(stdout, "rule_fars: %s\n", config->rule_fars); fprintf(stdout, "----------ctc fst decoder config----------\n"); fprintf(stdout, "graph: %s\n", config->ctc_fst_decoder_config.graph); diff --git a/wasm/kws/CMakeLists.txt b/wasm/kws/CMakeLists.txt index f083892cc..dfa6f7743 100644 --- a/wasm/kws/CMakeLists.txt +++ b/wasm/kws/CMakeLists.txt @@ -31,6 +31,7 @@ string(APPEND MY_FLAGS " -sSTACK_SIZE=10485760 ") string(APPEND MY_FLAGS " -sEXPORTED_FUNCTIONS=[_CopyHeap,_malloc,_free,${all_exported_functions}] ") string(APPEND MY_FLAGS "--preload-file ${CMAKE_CURRENT_SOURCE_DIR}/assets@. ") string(APPEND MY_FLAGS " -sEXPORTED_RUNTIME_METHODS=['ccall','stringToUTF8','setValue','getValue','lengthBytesUTF8','UTF8ToString'] ") + message(STATUS "MY_FLAGS: ${MY_FLAGS}") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${MY_FLAGS}") @@ -51,4 +52,4 @@ install( "$/sherpa-onnx-wasm-kws-main.data" DESTINATION bin/wasm -) \ No newline at end of file +) diff --git a/wasm/tts/CMakeLists.txt b/wasm/tts/CMakeLists.txt index 15fe8dd34..618a98a1d 100644 --- a/wasm/tts/CMakeLists.txt +++ b/wasm/tts/CMakeLists.txt @@ -31,6 +31,8 @@ string(APPEND MY_FLAGS " -sEXPORTED_FUNCTIONS=[_CopyHeap,_malloc,_free,${all_exp string(APPEND MY_FLAGS "--preload-file ${CMAKE_CURRENT_SOURCE_DIR}/assets@. ") string(APPEND MY_FLAGS " -sEXPORTED_RUNTIME_METHODS=['ccall','stringToUTF8','setValue','getValue','lengthBytesUTF8','UTF8ToString'] ") +message(STATUS "MY_FLAGS: ${MY_FLAGS}") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${MY_FLAGS}") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${MY_FLAGS}") set(CMAKE_EXECUTBLE_LINKER_FLAGS "${CMAKE_EXECUTBLE_LINKER_FLAGS} ${MY_FLAGS}")