diff --git a/README.md b/README.md index c5c7872..7c462ae 100644 --- a/README.md +++ b/README.md @@ -127,7 +127,7 @@ To use RealtimeSTT with GPU support via CUDA please follow these steps: 4. **Install PyTorch with CUDA support**: ```bash pip uninstall torch - pip install torch==2.0.1+cu118 torchaudio==2.0.2 --index-url https://download.pytorch.org/whl/cu118 + pip install torch==2.2.2+cu118 torchaudio==2.2.2 --index-url https://download.pytorch.org/whl/cu118 ``` ## Quick Start diff --git a/RealtimeSTT/audio_recorder.py b/RealtimeSTT/audio_recorder.py index 98d19e6..137543f 100644 --- a/RealtimeSTT/audio_recorder.py +++ b/RealtimeSTT/audio_recorder.py @@ -27,6 +27,7 @@ """ import torch.multiprocessing as mp +import torch from typing import List, Union import faster_whisper import collections @@ -36,10 +37,10 @@ import threading import webrtcvad import itertools +import platform import pyaudio import logging import struct -import torch import halo import time import os @@ -65,6 +66,10 @@ BUFFER_SIZE = 512 INT16_MAX_ABS_VALUE = 32768.0 +INIT_HANDLE_BUFFER_OVERFLOW = False +if platform.system() != 'Darwin': + INIT_HANDLE_BUFFER_OVERFLOW = True + class AudioToTextRecorder: """ @@ -126,7 +131,8 @@ def __init__(self, on_wakeword_detection_start=None, on_wakeword_detection_end=None, on_recorded_chunk=None, - debug_mode=False + debug_mode=False, + handle_buffer_overflow: bool = INIT_HANDLE_BUFFER_OVERFLOW, ): """ Initializes an audio recorder and transcription @@ -253,12 +259,13 @@ def __init__(self, with the recorded audio chunk as its argument. - debug_mode (bool, default=False): If set to True, the system will print additional debug information to the console. + - log_buffer_overflow (bool, default=True): If set to True, the system + will log a warning when an input overflow occurs during recording. Raises: Exception: Errors related to initializing transcription model, wake word detection, or audio recording. """ - self.language = language self.compute_type = compute_type self.input_device_index = input_device_index @@ -297,6 +304,7 @@ def __init__(self, on_realtime_transcription_stabilized ) self.debug_mode = debug_mode + self.handle_buffer_overflow = handle_buffer_overflow self.allowed_latency_limit = ALLOWED_LATENCY_LIMIT self.level = level @@ -988,20 +996,20 @@ def _recording_worker(self): if self.on_recorded_chunk: self.on_recorded_chunk(data) - # Handle queue overflow - queue_overflow_logged = False - - while (self.audio_queue.qsize() > - self.allowed_latency_limit): - - if not queue_overflow_logged: - logging.warning("Audio queue size exceeds latency " - "limit. Current size: " + if self.handle_buffer_overflow: + # Handle queue overflow + if (self.audio_queue.qsize() > + self.allowed_latency_limit): + logging.warning("Audio queue size exceeds " + "latency limit. Current size: " f"{self.audio_queue.qsize()}. " "Discarding old audio chunks." ) - queue_overflow_logged = True - data = self.audio_queue.get() + + while (self.audio_queue.qsize() > + self.allowed_latency_limit): + + data = self.audio_queue.get() except BrokenPipeError: print("BrokenPipeError _recording_worker") diff --git a/requirements.txt b/requirements.txt index 784761c..c94dfd8 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,5 +3,5 @@ faster-whisper==1.0.1 pvporcupine==1.9.5 webrtcvad==2.0.10 halo==0.0.31 -torch==2.1.2 -torchaudio==2.1.2 \ No newline at end of file +torch==2.2.2 +torchaudio==2.2.2 \ No newline at end of file