From a2522e29fe3e7ff8c99e66c62faa7783c669d86f Mon Sep 17 00:00:00 2001 From: David Scripka Date: Tue, 7 Nov 2023 06:36:59 -0500 Subject: [PATCH 1/5] Updated CLI args --- examples/capture_activations.py | 35 ++++++++++++++++++++++++--------- 1 file changed, 26 insertions(+), 9 deletions(-) diff --git a/examples/capture_activations.py b/examples/capture_activations.py index fae900e..8f8e80d 100644 --- a/examples/capture_activations.py +++ b/examples/capture_activations.py @@ -68,10 +68,26 @@ default=False, required=False ) +parser=argparse.ArgumentParser() +parser.add_argument( + "--chunk_size", + help="How much audio (in number of 16khz samples) to predict on at once", + type=int, + default=1280, + required=False +) +parser.add_argument( + "--model_path", + help="The path of a specific model to load", + type=str, + default="", + required=False +) parser.add_argument( - "--model", - help="The model to use for openWakeWord, leave blank to use all available models", + "--inference_framework", + help="The inference framework to use (either 'onnx' or 'tflite'", type=str, + default='tflite', required=False ) parser.add_argument( @@ -87,25 +103,26 @@ FORMAT = pyaudio.paInt16 CHANNELS = 1 RATE = 16000 -CHUNK = 1280 +CHUNK = args.chunk_size audio = pyaudio.PyAudio() mic_stream = audio.open(format=FORMAT, channels=CHANNELS, rate=RATE, input=True, frames_per_buffer=CHUNK) # Load pre-trained openwakeword models -if args.model: +if args.model_path: model_paths = openwakeword.get_pretrained_model_paths() for path in model_paths: - if args.model in path: + if args.model_path in path: model_path = path if model_path: owwModel = Model( - wakeword_model_paths=[model_path], + wakeword_models=[model_path], enable_speex_noise_suppression=args.noise_suppression, - vad_threshold = args.vad_threshold - ) + vad_threshold = args.vad_threshold, + inference_framework=args.inference_framework + ) else: - print(f'Could not find model \"{args.model}\"') + print(f'Could not find model \"{args.model_path}\"') exit() else: owwModel = Model( From 8376848be5db4574145bab10c0312ccf1aebd6e2 Mon Sep 17 00:00:00 2001 From: David Scripka Date: Wed, 8 Nov 2023 07:48:56 -0500 Subject: [PATCH 2/5] Basic web streaming example [skip ci] --- examples/web/README.md | 15 ++++ examples/web/streaming_client.html | 112 +++++++++++++++++++++++++++++ examples/web/streaming_server.py | 108 ++++++++++++++++++++++++++++ 3 files changed, 235 insertions(+) create mode 100644 examples/web/README.md create mode 100644 examples/web/streaming_client.html create mode 100644 examples/web/streaming_server.py diff --git a/examples/web/README.md b/examples/web/README.md new file mode 100644 index 0000000..2f9c21b --- /dev/null +++ b/examples/web/README.md @@ -0,0 +1,15 @@ +# Examples + +This folder contains examples of using openWakeWord with web applications. + +## Websocket Streaming + +As openWakeWord does not have a native Javascript port, using it within a web browswer is best accomplished with websocket streaming of the audio data from the browser to a simple Python application. To install the requirements for this example: + +``` +pip install aiohttp +``` + +The `streaming_client.html` page shows a simple implementation of audio capture and streamimng from a microphone and streaming in a browser, and the `streaming_server.py` file is the corresponding websocket server that passes the audio into openWakeWord. + +To run the example, execute `python streaming_server.py` (add the `--help` argument to see options) and navigate to `localhost:9000` in your browser. \ No newline at end of file diff --git a/examples/web/streaming_client.html b/examples/web/streaming_client.html new file mode 100644 index 0000000..3bf5005 --- /dev/null +++ b/examples/web/streaming_client.html @@ -0,0 +1,112 @@ + + + + + + + Websocket Microphone Streaming + + +

Streaming Audio to openWakeWord Using Websockets

+ + + + + \ No newline at end of file diff --git a/examples/web/streaming_server.py b/examples/web/streaming_server.py new file mode 100644 index 0000000..c386bbe --- /dev/null +++ b/examples/web/streaming_server.py @@ -0,0 +1,108 @@ +# Copyright 2023 David Scripka. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +####################################################################################### + +# This example scripts runs openWakeWord in a simple web server receiving audio +# from a web page using websockets. + +####################################################################################### + +# Imports +import aiohttp +from aiohttp import web +import numpy as np +from openwakeword import Model +import resampy +import argparse + +# Define websocket handler +async def websocket_handler(request): + ws = web.WebSocketResponse() + await ws.prepare(request) + + # Start listening for websocket messages + async for msg in ws: + # Get the sample rate of the microphone from the browser + if msg.type == aiohttp.WSMsgType.TEXT: + sample_rate = int(msg.data) + elif msg.type == aiohttp.WSMsgType.ERROR: + print(f"WebSocket error: {ws.exception()}") + else: + # Get audio data from websocket + audio_bytes = msg.data + + # Add extra bytes of silence if needed + if len(msg.data) % 2 == 1: + audio_bytes += (b'\x00') + + # Convert audio to correct format and sample rate + data = np.frombuffer(audio_bytes, dtype=np.int16) + if sample_rate != 16000: + data = resampy.resample(data, sample_rate, 16000) + + # Get openWakeWord predictions and set to browser client + predictions = owwModel.predict(data) + + activations = [] + for key in predictions: + if predictions[key] >= 0.5: + activations.append(key) + + if activations != []: + await ws.send_str(str(activations)) + + return ws + +# Define static file handler +async def static_file_handler(request): + return web.FileResponse('./streaming_client.html') + +app = web.Application() +app.add_routes([web.get('/ws', websocket_handler), web.get('/', static_file_handler)]) + +if __name__ == '__main__': + # Parse CLI arguments + parser=argparse.ArgumentParser() + parser.add_argument( + "--chunk_size", + help="How much audio (in number of samples) to predict on at once", + type=int, + default=1280, + required=False + ) + parser.add_argument( + "--model_path", + help="The path of a specific model to load", + type=str, + default="", + required=False + ) + parser.add_argument( + "--inference_framework", + help="The inference framework to use (either 'onnx' or 'tflite'", + type=str, + default='tflite', + required=False + ) + args=parser.parse_args() + + # Load openWakeWord models + if args.model_path != "": + owwModel = Model(wakeword_models=[args.model_path], inference_framework=args.inference_framework) + else: + owwModel = Model(inference_framework=args.inference_framework) + + # Start webapp + web.run_app(app, host='localhost', port=9000) \ No newline at end of file From 58ec0943806b5907b2a3218eb0935b1cdf1c92b3 Mon Sep 17 00:00:00 2001 From: David Scripka Date: Thu, 9 Nov 2023 08:36:43 -0500 Subject: [PATCH 3/5] Added table for detections and styling to websocket example --- examples/web/streaming_client.html | 216 ++++++++++++++++++++--------- examples/web/streaming_server.py | 6 +- 2 files changed, 155 insertions(+), 67 deletions(-) diff --git a/examples/web/streaming_client.html b/examples/web/streaming_client.html index 3bf5005..44c98d8 100644 --- a/examples/web/streaming_client.html +++ b/examples/web/streaming_client.html @@ -1,31 +1,127 @@ - Websocket Microphone Streaming +

Streaming Audio to openWakeWord Using Websockets

- + + + + + + + + + + + +
WakewordDetected
\ No newline at end of file diff --git a/examples/web/streaming_server.py b/examples/web/streaming_server.py index c386bbe..449d251 100644 --- a/examples/web/streaming_server.py +++ b/examples/web/streaming_server.py @@ -26,12 +26,16 @@ from openwakeword import Model import resampy import argparse +import json # Define websocket handler async def websocket_handler(request): ws = web.WebSocketResponse() await ws.prepare(request) + # Send loaded models + await ws.send_str(json.dumps({"loaded_models": list(owwModel.models.keys())})) + # Start listening for websocket messages async for msg in ws: # Get the sample rate of the microphone from the browser @@ -61,7 +65,7 @@ async def websocket_handler(request): activations.append(key) if activations != []: - await ws.send_str(str(activations)) + await ws.send_str(json.dumps({"activations": activations})) return ws From a46d1e1f5199211dcb4f5aa2459499f295f3dd5c Mon Sep 17 00:00:00 2001 From: David Scripka Date: Thu, 9 Nov 2023 20:14:17 -0500 Subject: [PATCH 4/5] Updated readmes for new examples [skip ci] --- README.md | 7 ++++++- examples/web/README.md | 8 +++++++- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index f93e2d4..97784f8 100644 --- a/README.md +++ b/README.md @@ -11,6 +11,9 @@ openWakeWord is an open-source wakeword library that can be used to create voice # Updates +**2023/11/09** +- Added example scripts under `examples/web` that demonstrate streaming audio from a web application into openWakeWord. + **2023/10/11** - Significant improvements to the process of [training new models](#training-new-models), including an example Google Colab notebook demonstrating how to train a basic wake word model in <1 hour. @@ -240,9 +243,11 @@ Future release road maps may have non-english support. In particular, [Mycroft.A **Can openWakeWord be run in a browser with javascript?** - While the ONNX runtime [does support javascript](https://onnxruntime.ai/docs/get-started/with-javascript.html), much of the other functionality required for openWakeWord models would need to be ported. This is not currently on the roadmap, but please open an issue/start a discussion if this feature is of particular interest. +- As a potential work-around for some applications, the example scripts in `examples/web` demonstrate how audio can be captured in a browser and streaming via websockets into openWakeWord running in a Python backend server. +- Other potential options could include projects like `pyodide` (see [here](https://github.com/pyodide/pyodide/issues/4220)) for a related issue. **Is there a C++ version of openWakeWord?** -- While the ONNX runtime [also has a C++ API](https://onnxruntime.ai/docs/get-started/with-cpp.html), there isn't an official C++ implementation of the full openWakeWord library. However, [@synesthesiam](https://github.com/synesthesiam) has created a [C++ version](https://github.com/rhasspy/openWakeWord-cpp) of openWakeWord with basic functionality implemented. +- While the ONNX runtime [also has a C++ API](https://onnxruntime.ai/docs/get-started/with-cpp.html), there isn't an official C++ implementation of the full openWakeWord library. However, [@synesthesiam](https://github.com/synesthesiam) has created a [C++ version of openWakeWord](https://github.com/rhasspy/openWakeWord-cpp) with basic functionality implemented. **Why are there three separate models instead of just one?** - Separating the models was an intentional choice to provide flexibility and optimize the efficiency of the end-to-end prediction process. For example, with separate melspectrogram, embedding, and prediction models, each one can operate on different size inputs of audio to optimize overall latency and share computations between models. It certainly is possible to make a combined model with all of the steps integrated, though, if that was a requirement of a particular use case. diff --git a/examples/web/README.md b/examples/web/README.md index 2f9c21b..bd4e970 100644 --- a/examples/web/README.md +++ b/examples/web/README.md @@ -8,8 +8,14 @@ As openWakeWord does not have a native Javascript port, using it within a web br ``` pip install aiohttp +pip install resampy ``` The `streaming_client.html` page shows a simple implementation of audio capture and streamimng from a microphone and streaming in a browser, and the `streaming_server.py` file is the corresponding websocket server that passes the audio into openWakeWord. -To run the example, execute `python streaming_server.py` (add the `--help` argument to see options) and navigate to `localhost:9000` in your browser. \ No newline at end of file +To run the example, execute `python streaming_server.py` (add the `--help` argument to see options) and navigate to `localhost:9000` in your browser. + +Note that this example is illustrative only, and integration of this approach with other web applications may have different requirements. In particular, some key considerations: + +- This example captures PCM audio from the web browser and streams full 16-bit integer representations of ~250 ms audio chunks over the websocket connection. In practice, bandwidth efficient streams of compressed audio may be more suitable for some applications. +- The browser captures audio at the native sampling rate of the capture device, which can require re-sampling prior to passing the audio data to openWakeWord. This example uses the `resampy` library which has a good balance between performance and quality, but other resampling approaches that optimize different aspects may be more suitable for some applications. \ No newline at end of file From b85645ed68575d2e6535a8060da0454888a7c38a Mon Sep 17 00:00:00 2001 From: David Scripka Date: Thu, 9 Nov 2023 20:27:28 -0500 Subject: [PATCH 5/5] Added missing attribution [skip ci] --- examples/web/streaming_client.html | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/examples/web/streaming_client.html b/examples/web/streaming_client.html index 44c98d8..c2df273 100644 --- a/examples/web/streaming_client.html +++ b/examples/web/streaming_client.html @@ -121,7 +121,8 @@

Streaming Audio to openWakeWord Using Websockets

} }; - // Create microphone capture stream + // Create microphone capture stream for 16-bit PCM audio data + // Code based on the excellent tutorial by Ragy Morkas: https://medium.com/@ragymorkos/gettineg-monochannel-16-bit-signed-integer-pcm-audio-samples-from-the-microphone-in-the-browser-8d4abf81164d navigator.getUserMedia = navigator.getUserMedia || navigator.webkitGetUserMedia || navigator.mozGetUserMedia ||