From ae7b7d086922a9d9cb36dd368371f3227ee51d68 Mon Sep 17 00:00:00 2001
From: "dingfeng.wong" <dingfeng.wong@okg.com>
Date: Tue, 22 Jul 2025 22:21:06 +0800
Subject: [PATCH] a

---
 README.md              | 22 ++++++++++++++++++++++
 realtimesst.log        | 31 +++++++++++++++++++++++++++++++
 src/tooling/stt_cli.py | 23 ++++++++++++++++++++---
 3 files changed, 73 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index 21791b3..0eabb38 100644
--- a/README.md
+++ b/README.md
@@ -288,6 +288,22 @@ The following wake words are supported:
 - porcupine
 - terminator
 
+### Wake Word Engines
+
+Two wake word engines are supported:
+
+- **openwakeword** (default) - Open source, free to use, good accuracy
+- **pvporcupine** - Picovoice's Porcupine engine, highly optimized
+
+Choose the engine based on your requirements:
+```bash
+# Use OpenWakeWord (default)
+tooling stt listen --wakeword-engine openwakeword
+
+# Use Porcupine for better performance
+tooling stt listen --wakeword-engine pvporcupine
+```
+
 ### Available Models
 
 | Model | Speed | Accuracy | Memory | Use Case |
@@ -311,6 +327,7 @@ Options:
   --save-to-file PATH    Save transcriptions to a file
   --sensitivity FLOAT    Wake word sensitivity (0.0 to 1.0) [default: 0.6]
   --device TEXT          Device to use (auto, cuda, cpu) [default: auto]
+  --wakeword-engine TEXT Wake word engine (openwakeword, pvporcupine) [default: openwakeword]
   --verbose              Show verbose output and configuration
   --help                 Show this message and exit
 ```
@@ -342,6 +359,11 @@ tooling stt test --duration 5 --model tiny
 tooling stt listen --language es --sensitivity 0.8 --wake-word "hey google"
 ```
 
+**Use different wake word engine:**
+```bash
+tooling stt listen --wakeword-engine pvporcupine --wake-word alexa
+```
+
 ### How it Works
 
 1. **Initialization**: Loads the selected Whisper model and sets up audio processing
diff --git a/realtimesst.log b/realtimesst.log
index 8c54f41..c7f30d7 100644
--- a/realtimesst.log
+++ b/realtimesst.log
@@ -15,3 +15,34 @@ NoneType: None
 2025-07-22 22:11:01.946 - RealTimeSTT: realtimestt - INFO - State changed from 'inactive' to 'listening'
 2025-07-22 22:11:01.947 - RealTimeSTT: realtimestt - DEBUG - Waiting for recording start
 2025-07-22 22:11:01.981 - RealTimeSTT: realtimestt - INFO - State changed from 'listening' to 'wakeword'
+2025-07-22 22:13:18.955 - RealTimeSTT: realtimestt - DEBUG - Finishing recording thread
+2025-07-22 22:13:18.956 - RealTimeSTT: realtimestt - DEBUG - No samples removed, final audio length: 0
+2025-07-22 22:13:18.956 - RealTimeSTT: realtimestt - INFO - State changed from 'wakeword' to 'inactive'
+2025-07-22 22:13:18.962 - RealTimeSTT: realtimestt - DEBUG - Terminating reader process
+2025-07-22 22:13:19.621 - RealTimeSTT: realtimestt - DEBUG - Terminating transcription process
+2025-07-22 22:13:19.622 - RealTimeSTT: realtimestt - DEBUG - Finishing realtime thread
+2025-07-22 22:15:33.358 - RealTimeSTT: realtimestt - INFO - Starting RealTimeSTT
+2025-07-22 22:15:33.369 - RealTimeSTT: realtimestt - INFO - Initializing audio recording (creating pyAudio input stream, sample rate: 16000 buffer size: 512
+2025-07-22 22:15:33.377 - RealTimeSTT: realtimestt - INFO - Initializing WebRTC voice with Sensitivity 3
+2025-07-22 22:15:33.377 - RealTimeSTT: realtimestt - DEBUG - WebRTC VAD voice activity detection engine initialized successfully
+2025-07-22 22:15:34.053 - RealTimeSTT: realtimestt - DEBUG - Silero VAD voice activity detection engine initialized successfully
+2025-07-22 22:15:34.054 - RealTimeSTT: realtimestt - DEBUG - Starting realtime worker
+2025-07-22 22:15:34.054 - RealTimeSTT: realtimestt - DEBUG - Waiting for main transcription model to start
+2025-07-22 22:15:37.769 - RealTimeSTT: realtimestt - DEBUG - Main transcription model ready
+2025-07-22 22:15:37.769 - RealTimeSTT: realtimestt - DEBUG - RealtimeSTT initialization completed successfully
+2025-07-22 22:15:37.771 - RealTimeSTT: realtimestt - INFO - recording started
+2025-07-22 22:15:37.771 - RealTimeSTT: realtimestt - INFO - State changed from 'inactive' to 'recording'
+2025-07-22 22:15:40.784 - RealTimeSTT: realtimestt - INFO - recording stopped
+2025-07-22 22:15:40.785 - RealTimeSTT: realtimestt - INFO - Setting listen time
+2025-07-22 22:15:40.785 - RealTimeSTT: realtimestt - DEBUG - No samples removed, final audio length: 49152
+2025-07-22 22:15:40.785 - RealTimeSTT: realtimestt - INFO - State changed from 'recording' to 'inactive'
+2025-07-22 22:15:40.882 - RealTimeSTT: realtimestt - INFO - State changed from 'inactive' to 'transcribing'
+2025-07-22 22:15:40.882 - RealTimeSTT: realtimestt - DEBUG - Adding transcription request, no early transcription started
+2025-07-22 22:15:40.901 - RealTimeSTT: realtimestt - DEBUG - Receive from parent_transcription_pipe after sendiung transcription request, transcribe_count: 1
+2025-07-22 22:15:41.002 - RealTimeSTT: realtimestt - DEBUG - Receive from parent_transcription_pipe after sendiung transcription request, transcribe_count: 1
+2025-07-22 22:15:41.030 - RealTimeSTT: realtimestt - INFO - State changed from 'transcribing' to 'inactive'
+2025-07-22 22:15:41.040 - RealTimeSTT: realtimestt - DEBUG - Model tiny completed transcription in 0.16 seconds
+2025-07-22 22:15:41.041 - RealTimeSTT: realtimestt - DEBUG - Finishing recording thread
+2025-07-22 22:15:41.047 - RealTimeSTT: realtimestt - DEBUG - Terminating reader process
+2025-07-22 22:15:41.781 - RealTimeSTT: realtimestt - DEBUG - Terminating transcription process
+2025-07-22 22:15:41.781 - RealTimeSTT: realtimestt - DEBUG - Finishing realtime thread
diff --git a/src/tooling/stt_cli.py b/src/tooling/stt_cli.py
index d1c3eee..92a1520 100644
--- a/src/tooling/stt_cli.py
+++ b/src/tooling/stt_cli.py
@@ -112,6 +112,7 @@ if RUMPS_AVAILABLE:
             self.realtime = True
             self.sensitivity = 0.6
             self.device = "auto"
+            self.wakeword_backend = "pvporcupine"
             self.save_to_file = None
             
             # Menu setup
@@ -229,6 +230,7 @@ if RUMPS_AVAILABLE:
                 recorder_config = {
                     "model": self.model,
                     "wake_words": self.wake_word,
+                    "wakeword_backend": self.wakeword_backend,
                     "wake_words_sensitivity": self.sensitivity,
                     "device": self.device,
                     "on_recording_start": self.on_recording_start,
@@ -422,9 +424,9 @@ def listen_cmd(
         default="auto",
         help="Device to use (auto, cuda, cpu)"
     ),
-    wakeword_engine: str = typer.Option(
-        default="openwakeword",
-        help="Wake word engine to use (openwakeword, pvporcupine)"
+    wakeword_backend: str = typer.Option(
+        default="pvporcupine",
+        help="Wake word backend to use (pvporcupine, openwakeword)"
     ),
     verbose: bool = typer.Option(
         default=False,
@@ -452,6 +454,13 @@ def listen_cmd(
         console.print(f"Valid options: {', '.join(valid_wake_words)}")
         raise typer.Exit(1)
     
+    # Validate wakeword backend
+    valid_backends = ["pvporcupine", "openwakeword"]
+    if wakeword_backend.lower() not in valid_backends:
+        console.print(f"[bold red]❌ Invalid wakeword backend: {wakeword_backend}[/bold red]")
+        console.print(f"Valid options: {', '.join(valid_backends)}")
+        raise typer.Exit(1)
+    
     # Determine device
     if device == "auto":
         try:
@@ -479,6 +488,7 @@ def listen_cmd(
         config_table.add_column("Value", style="green")
         
         config_table.add_row("Wake Word", wake_word)
+        config_table.add_row("Wakeword Backend", wakeword_backend)
         config_table.add_row("Model", model)
         config_table.add_row("Language", language if language else "Auto-detect")
         config_table.add_row("Device", device)
@@ -534,6 +544,7 @@ def listen_cmd(
         recorder_config = {
             "model": model,
             "wake_words": wake_word,
+            "wakeword_backend": wakeword_backend,
             "wake_words_sensitivity": sensitivity,
             "device": device,
             "on_recording_start": on_recording_start,
@@ -738,12 +749,18 @@ def info_cmd():
     console.print(f"\n[bold cyan]Available Models:[/bold cyan]")
     console.print(", ".join(models))
     
+    # Available wakeword backends
+    backends = ["pvporcupine", "openwakeword"]
+    console.print(f"\n[bold cyan]Available Wakeword Backends:[/bold cyan]")
+    console.print(", ".join(backends))
+    
     # Usage examples
     console.print(f"\n[bold cyan]Usage Examples:[/bold cyan]")
     examples = [
         "tooling stt listen                    # Use jarvis wake word with base model",
         "tooling stt listen --wake-word alexa  # Use alexa wake word",
         "tooling stt listen --model tiny       # Use faster tiny model",
+        "tooling stt listen --wakeword-engine pvporcupine  # Use pvporcupine engine",
         "tooling stt test --duration 5         # Test for 5 seconds",
         "tooling stt listen --save-to-file transcripts.txt  # Save to file",
         "tooling stt statusbar                 # Launch status bar app"