From ae7b7d086922a9d9cb36dd368371f3227ee51d68 Mon Sep 17 00:00:00 2001 From: "dingfeng.wong" Date: Tue, 22 Jul 2025 22:21:06 +0800 Subject: [PATCH] a --- README.md | 22 ++++++++++++++++++++++ realtimesst.log | 31 +++++++++++++++++++++++++++++++ src/tooling/stt_cli.py | 23 ++++++++++++++++++++--- 3 files changed, 73 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 21791b3..0eabb38 100644 --- a/README.md +++ b/README.md @@ -288,6 +288,22 @@ The following wake words are supported: - porcupine - terminator +### Wake Word Engines + +Two wake word engines are supported: + +- **openwakeword** (default) - Open source, free to use, good accuracy +- **pvporcupine** - Picovoice's Porcupine engine, highly optimized + +Choose the engine based on your requirements: +```bash +# Use OpenWakeWord (default) +tooling stt listen --wakeword-engine openwakeword + +# Use Porcupine for better performance +tooling stt listen --wakeword-engine pvporcupine +``` + ### Available Models | Model | Speed | Accuracy | Memory | Use Case | @@ -311,6 +327,7 @@ Options: --save-to-file PATH Save transcriptions to a file --sensitivity FLOAT Wake word sensitivity (0.0 to 1.0) [default: 0.6] --device TEXT Device to use (auto, cuda, cpu) [default: auto] + --wakeword-engine TEXT Wake word engine (openwakeword, pvporcupine) [default: openwakeword] --verbose Show verbose output and configuration --help Show this message and exit ``` @@ -342,6 +359,11 @@ tooling stt test --duration 5 --model tiny tooling stt listen --language es --sensitivity 0.8 --wake-word "hey google" ``` +**Use different wake word engine:** +```bash +tooling stt listen --wakeword-engine pvporcupine --wake-word alexa +``` + ### How it Works 1. **Initialization**: Loads the selected Whisper model and sets up audio processing diff --git a/realtimesst.log b/realtimesst.log index 8c54f41..c7f30d7 100644 --- a/realtimesst.log +++ b/realtimesst.log @@ -15,3 +15,34 @@ NoneType: None 2025-07-22 22:11:01.946 - RealTimeSTT: realtimestt - INFO - State changed from 'inactive' to 'listening' 2025-07-22 22:11:01.947 - RealTimeSTT: realtimestt - DEBUG - Waiting for recording start 2025-07-22 22:11:01.981 - RealTimeSTT: realtimestt - INFO - State changed from 'listening' to 'wakeword' +2025-07-22 22:13:18.955 - RealTimeSTT: realtimestt - DEBUG - Finishing recording thread +2025-07-22 22:13:18.956 - RealTimeSTT: realtimestt - DEBUG - No samples removed, final audio length: 0 +2025-07-22 22:13:18.956 - RealTimeSTT: realtimestt - INFO - State changed from 'wakeword' to 'inactive' +2025-07-22 22:13:18.962 - RealTimeSTT: realtimestt - DEBUG - Terminating reader process +2025-07-22 22:13:19.621 - RealTimeSTT: realtimestt - DEBUG - Terminating transcription process +2025-07-22 22:13:19.622 - RealTimeSTT: realtimestt - DEBUG - Finishing realtime thread +2025-07-22 22:15:33.358 - RealTimeSTT: realtimestt - INFO - Starting RealTimeSTT +2025-07-22 22:15:33.369 - RealTimeSTT: realtimestt - INFO - Initializing audio recording (creating pyAudio input stream, sample rate: 16000 buffer size: 512 +2025-07-22 22:15:33.377 - RealTimeSTT: realtimestt - INFO - Initializing WebRTC voice with Sensitivity 3 +2025-07-22 22:15:33.377 - RealTimeSTT: realtimestt - DEBUG - WebRTC VAD voice activity detection engine initialized successfully +2025-07-22 22:15:34.053 - RealTimeSTT: realtimestt - DEBUG - Silero VAD voice activity detection engine initialized successfully +2025-07-22 22:15:34.054 - RealTimeSTT: realtimestt - DEBUG - Starting realtime worker +2025-07-22 22:15:34.054 - RealTimeSTT: realtimestt - DEBUG - Waiting for main transcription model to start +2025-07-22 22:15:37.769 - RealTimeSTT: realtimestt - DEBUG - Main transcription model ready +2025-07-22 22:15:37.769 - RealTimeSTT: realtimestt - DEBUG - RealtimeSTT initialization completed successfully +2025-07-22 22:15:37.771 - RealTimeSTT: realtimestt - INFO - recording started +2025-07-22 22:15:37.771 - RealTimeSTT: realtimestt - INFO - State changed from 'inactive' to 'recording' +2025-07-22 22:15:40.784 - RealTimeSTT: realtimestt - INFO - recording stopped +2025-07-22 22:15:40.785 - RealTimeSTT: realtimestt - INFO - Setting listen time +2025-07-22 22:15:40.785 - RealTimeSTT: realtimestt - DEBUG - No samples removed, final audio length: 49152 +2025-07-22 22:15:40.785 - RealTimeSTT: realtimestt - INFO - State changed from 'recording' to 'inactive' +2025-07-22 22:15:40.882 - RealTimeSTT: realtimestt - INFO - State changed from 'inactive' to 'transcribing' +2025-07-22 22:15:40.882 - RealTimeSTT: realtimestt - DEBUG - Adding transcription request, no early transcription started +2025-07-22 22:15:40.901 - RealTimeSTT: realtimestt - DEBUG - Receive from parent_transcription_pipe after sendiung transcription request, transcribe_count: 1 +2025-07-22 22:15:41.002 - RealTimeSTT: realtimestt - DEBUG - Receive from parent_transcription_pipe after sendiung transcription request, transcribe_count: 1 +2025-07-22 22:15:41.030 - RealTimeSTT: realtimestt - INFO - State changed from 'transcribing' to 'inactive' +2025-07-22 22:15:41.040 - RealTimeSTT: realtimestt - DEBUG - Model tiny completed transcription in 0.16 seconds +2025-07-22 22:15:41.041 - RealTimeSTT: realtimestt - DEBUG - Finishing recording thread +2025-07-22 22:15:41.047 - RealTimeSTT: realtimestt - DEBUG - Terminating reader process +2025-07-22 22:15:41.781 - RealTimeSTT: realtimestt - DEBUG - Terminating transcription process +2025-07-22 22:15:41.781 - RealTimeSTT: realtimestt - DEBUG - Finishing realtime thread diff --git a/src/tooling/stt_cli.py b/src/tooling/stt_cli.py index d1c3eee..92a1520 100644 --- a/src/tooling/stt_cli.py +++ b/src/tooling/stt_cli.py @@ -112,6 +112,7 @@ if RUMPS_AVAILABLE: self.realtime = True self.sensitivity = 0.6 self.device = "auto" + self.wakeword_backend = "pvporcupine" self.save_to_file = None # Menu setup @@ -229,6 +230,7 @@ if RUMPS_AVAILABLE: recorder_config = { "model": self.model, "wake_words": self.wake_word, + "wakeword_backend": self.wakeword_backend, "wake_words_sensitivity": self.sensitivity, "device": self.device, "on_recording_start": self.on_recording_start, @@ -422,9 +424,9 @@ def listen_cmd( default="auto", help="Device to use (auto, cuda, cpu)" ), - wakeword_engine: str = typer.Option( - default="openwakeword", - help="Wake word engine to use (openwakeword, pvporcupine)" + wakeword_backend: str = typer.Option( + default="pvporcupine", + help="Wake word backend to use (pvporcupine, openwakeword)" ), verbose: bool = typer.Option( default=False, @@ -452,6 +454,13 @@ def listen_cmd( console.print(f"Valid options: {', '.join(valid_wake_words)}") raise typer.Exit(1) + # Validate wakeword backend + valid_backends = ["pvporcupine", "openwakeword"] + if wakeword_backend.lower() not in valid_backends: + console.print(f"[bold red]❌ Invalid wakeword backend: {wakeword_backend}[/bold red]") + console.print(f"Valid options: {', '.join(valid_backends)}") + raise typer.Exit(1) + # Determine device if device == "auto": try: @@ -479,6 +488,7 @@ def listen_cmd( config_table.add_column("Value", style="green") config_table.add_row("Wake Word", wake_word) + config_table.add_row("Wakeword Backend", wakeword_backend) config_table.add_row("Model", model) config_table.add_row("Language", language if language else "Auto-detect") config_table.add_row("Device", device) @@ -534,6 +544,7 @@ def listen_cmd( recorder_config = { "model": model, "wake_words": wake_word, + "wakeword_backend": wakeword_backend, "wake_words_sensitivity": sensitivity, "device": device, "on_recording_start": on_recording_start, @@ -738,12 +749,18 @@ def info_cmd(): console.print(f"\n[bold cyan]Available Models:[/bold cyan]") console.print(", ".join(models)) + # Available wakeword backends + backends = ["pvporcupine", "openwakeword"] + console.print(f"\n[bold cyan]Available Wakeword Backends:[/bold cyan]") + console.print(", ".join(backends)) + # Usage examples console.print(f"\n[bold cyan]Usage Examples:[/bold cyan]") examples = [ "tooling stt listen # Use jarvis wake word with base model", "tooling stt listen --wake-word alexa # Use alexa wake word", "tooling stt listen --model tiny # Use faster tiny model", + "tooling stt listen --wakeword-engine pvporcupine # Use pvporcupine engine", "tooling stt test --duration 5 # Test for 5 seconds", "tooling stt listen --save-to-file transcripts.txt # Save to file", "tooling stt statusbar # Launch status bar app"