This commit is contained in:
dingfeng.wong
2025-07-22 22:21:06 +08:00
parent 15657df63f
commit ae7b7d0869
3 changed files with 73 additions and 3 deletions
+22
View File
@@ -288,6 +288,22 @@ The following wake words are supported:
- porcupine - porcupine
- terminator - terminator
### Wake Word Engines
Two wake word engines are supported:
- **openwakeword** (default) - Open source, free to use, good accuracy
- **pvporcupine** - Picovoice's Porcupine engine, highly optimized
Choose the engine based on your requirements:
```bash
# Use OpenWakeWord (default)
tooling stt listen --wakeword-engine openwakeword
# Use Porcupine for better performance
tooling stt listen --wakeword-engine pvporcupine
```
### Available Models ### Available Models
| Model | Speed | Accuracy | Memory | Use Case | | Model | Speed | Accuracy | Memory | Use Case |
@@ -311,6 +327,7 @@ Options:
--save-to-file PATH Save transcriptions to a file --save-to-file PATH Save transcriptions to a file
--sensitivity FLOAT Wake word sensitivity (0.0 to 1.0) [default: 0.6] --sensitivity FLOAT Wake word sensitivity (0.0 to 1.0) [default: 0.6]
--device TEXT Device to use (auto, cuda, cpu) [default: auto] --device TEXT Device to use (auto, cuda, cpu) [default: auto]
--wakeword-engine TEXT Wake word engine (openwakeword, pvporcupine) [default: openwakeword]
--verbose Show verbose output and configuration --verbose Show verbose output and configuration
--help Show this message and exit --help Show this message and exit
``` ```
@@ -342,6 +359,11 @@ tooling stt test --duration 5 --model tiny
tooling stt listen --language es --sensitivity 0.8 --wake-word "hey google" tooling stt listen --language es --sensitivity 0.8 --wake-word "hey google"
``` ```
**Use different wake word engine:**
```bash
tooling stt listen --wakeword-engine pvporcupine --wake-word alexa
```
### How it Works ### How it Works
1. **Initialization**: Loads the selected Whisper model and sets up audio processing 1. **Initialization**: Loads the selected Whisper model and sets up audio processing
+31
View File
@@ -15,3 +15,34 @@ NoneType: None
2025-07-22 22:11:01.946 - RealTimeSTT: realtimestt - INFO - State changed from 'inactive' to 'listening' 2025-07-22 22:11:01.946 - RealTimeSTT: realtimestt - INFO - State changed from 'inactive' to 'listening'
2025-07-22 22:11:01.947 - RealTimeSTT: realtimestt - DEBUG - Waiting for recording start 2025-07-22 22:11:01.947 - RealTimeSTT: realtimestt - DEBUG - Waiting for recording start
2025-07-22 22:11:01.981 - RealTimeSTT: realtimestt - INFO - State changed from 'listening' to 'wakeword' 2025-07-22 22:11:01.981 - RealTimeSTT: realtimestt - INFO - State changed from 'listening' to 'wakeword'
2025-07-22 22:13:18.955 - RealTimeSTT: realtimestt - DEBUG - Finishing recording thread
2025-07-22 22:13:18.956 - RealTimeSTT: realtimestt - DEBUG - No samples removed, final audio length: 0
2025-07-22 22:13:18.956 - RealTimeSTT: realtimestt - INFO - State changed from 'wakeword' to 'inactive'
2025-07-22 22:13:18.962 - RealTimeSTT: realtimestt - DEBUG - Terminating reader process
2025-07-22 22:13:19.621 - RealTimeSTT: realtimestt - DEBUG - Terminating transcription process
2025-07-22 22:13:19.622 - RealTimeSTT: realtimestt - DEBUG - Finishing realtime thread
2025-07-22 22:15:33.358 - RealTimeSTT: realtimestt - INFO - Starting RealTimeSTT
2025-07-22 22:15:33.369 - RealTimeSTT: realtimestt - INFO - Initializing audio recording (creating pyAudio input stream, sample rate: 16000 buffer size: 512
2025-07-22 22:15:33.377 - RealTimeSTT: realtimestt - INFO - Initializing WebRTC voice with Sensitivity 3
2025-07-22 22:15:33.377 - RealTimeSTT: realtimestt - DEBUG - WebRTC VAD voice activity detection engine initialized successfully
2025-07-22 22:15:34.053 - RealTimeSTT: realtimestt - DEBUG - Silero VAD voice activity detection engine initialized successfully
2025-07-22 22:15:34.054 - RealTimeSTT: realtimestt - DEBUG - Starting realtime worker
2025-07-22 22:15:34.054 - RealTimeSTT: realtimestt - DEBUG - Waiting for main transcription model to start
2025-07-22 22:15:37.769 - RealTimeSTT: realtimestt - DEBUG - Main transcription model ready
2025-07-22 22:15:37.769 - RealTimeSTT: realtimestt - DEBUG - RealtimeSTT initialization completed successfully
2025-07-22 22:15:37.771 - RealTimeSTT: realtimestt - INFO - recording started
2025-07-22 22:15:37.771 - RealTimeSTT: realtimestt - INFO - State changed from 'inactive' to 'recording'
2025-07-22 22:15:40.784 - RealTimeSTT: realtimestt - INFO - recording stopped
2025-07-22 22:15:40.785 - RealTimeSTT: realtimestt - INFO - Setting listen time
2025-07-22 22:15:40.785 - RealTimeSTT: realtimestt - DEBUG - No samples removed, final audio length: 49152
2025-07-22 22:15:40.785 - RealTimeSTT: realtimestt - INFO - State changed from 'recording' to 'inactive'
2025-07-22 22:15:40.882 - RealTimeSTT: realtimestt - INFO - State changed from 'inactive' to 'transcribing'
2025-07-22 22:15:40.882 - RealTimeSTT: realtimestt - DEBUG - Adding transcription request, no early transcription started
2025-07-22 22:15:40.901 - RealTimeSTT: realtimestt - DEBUG - Receive from parent_transcription_pipe after sendiung transcription request, transcribe_count: 1
2025-07-22 22:15:41.002 - RealTimeSTT: realtimestt - DEBUG - Receive from parent_transcription_pipe after sendiung transcription request, transcribe_count: 1
2025-07-22 22:15:41.030 - RealTimeSTT: realtimestt - INFO - State changed from 'transcribing' to 'inactive'
2025-07-22 22:15:41.040 - RealTimeSTT: realtimestt - DEBUG - Model tiny completed transcription in 0.16 seconds
2025-07-22 22:15:41.041 - RealTimeSTT: realtimestt - DEBUG - Finishing recording thread
2025-07-22 22:15:41.047 - RealTimeSTT: realtimestt - DEBUG - Terminating reader process
2025-07-22 22:15:41.781 - RealTimeSTT: realtimestt - DEBUG - Terminating transcription process
2025-07-22 22:15:41.781 - RealTimeSTT: realtimestt - DEBUG - Finishing realtime thread
+20 -3
View File
@@ -112,6 +112,7 @@ if RUMPS_AVAILABLE:
self.realtime = True self.realtime = True
self.sensitivity = 0.6 self.sensitivity = 0.6
self.device = "auto" self.device = "auto"
self.wakeword_backend = "pvporcupine"
self.save_to_file = None self.save_to_file = None
# Menu setup # Menu setup
@@ -229,6 +230,7 @@ if RUMPS_AVAILABLE:
recorder_config = { recorder_config = {
"model": self.model, "model": self.model,
"wake_words": self.wake_word, "wake_words": self.wake_word,
"wakeword_backend": self.wakeword_backend,
"wake_words_sensitivity": self.sensitivity, "wake_words_sensitivity": self.sensitivity,
"device": self.device, "device": self.device,
"on_recording_start": self.on_recording_start, "on_recording_start": self.on_recording_start,
@@ -422,9 +424,9 @@ def listen_cmd(
default="auto", default="auto",
help="Device to use (auto, cuda, cpu)" help="Device to use (auto, cuda, cpu)"
), ),
wakeword_engine: str = typer.Option( wakeword_backend: str = typer.Option(
default="openwakeword", default="pvporcupine",
help="Wake word engine to use (openwakeword, pvporcupine)" help="Wake word backend to use (pvporcupine, openwakeword)"
), ),
verbose: bool = typer.Option( verbose: bool = typer.Option(
default=False, default=False,
@@ -452,6 +454,13 @@ def listen_cmd(
console.print(f"Valid options: {', '.join(valid_wake_words)}") console.print(f"Valid options: {', '.join(valid_wake_words)}")
raise typer.Exit(1) raise typer.Exit(1)
# Validate wakeword backend
valid_backends = ["pvporcupine", "openwakeword"]
if wakeword_backend.lower() not in valid_backends:
console.print(f"[bold red]❌ Invalid wakeword backend: {wakeword_backend}[/bold red]")
console.print(f"Valid options: {', '.join(valid_backends)}")
raise typer.Exit(1)
# Determine device # Determine device
if device == "auto": if device == "auto":
try: try:
@@ -479,6 +488,7 @@ def listen_cmd(
config_table.add_column("Value", style="green") config_table.add_column("Value", style="green")
config_table.add_row("Wake Word", wake_word) config_table.add_row("Wake Word", wake_word)
config_table.add_row("Wakeword Backend", wakeword_backend)
config_table.add_row("Model", model) config_table.add_row("Model", model)
config_table.add_row("Language", language if language else "Auto-detect") config_table.add_row("Language", language if language else "Auto-detect")
config_table.add_row("Device", device) config_table.add_row("Device", device)
@@ -534,6 +544,7 @@ def listen_cmd(
recorder_config = { recorder_config = {
"model": model, "model": model,
"wake_words": wake_word, "wake_words": wake_word,
"wakeword_backend": wakeword_backend,
"wake_words_sensitivity": sensitivity, "wake_words_sensitivity": sensitivity,
"device": device, "device": device,
"on_recording_start": on_recording_start, "on_recording_start": on_recording_start,
@@ -738,12 +749,18 @@ def info_cmd():
console.print(f"\n[bold cyan]Available Models:[/bold cyan]") console.print(f"\n[bold cyan]Available Models:[/bold cyan]")
console.print(", ".join(models)) console.print(", ".join(models))
# Available wakeword backends
backends = ["pvporcupine", "openwakeword"]
console.print(f"\n[bold cyan]Available Wakeword Backends:[/bold cyan]")
console.print(", ".join(backends))
# Usage examples # Usage examples
console.print(f"\n[bold cyan]Usage Examples:[/bold cyan]") console.print(f"\n[bold cyan]Usage Examples:[/bold cyan]")
examples = [ examples = [
"tooling stt listen # Use jarvis wake word with base model", "tooling stt listen # Use jarvis wake word with base model",
"tooling stt listen --wake-word alexa # Use alexa wake word", "tooling stt listen --wake-word alexa # Use alexa wake word",
"tooling stt listen --model tiny # Use faster tiny model", "tooling stt listen --model tiny # Use faster tiny model",
"tooling stt listen --wakeword-engine pvporcupine # Use pvporcupine engine",
"tooling stt test --duration 5 # Test for 5 seconds", "tooling stt test --duration 5 # Test for 5 seconds",
"tooling stt listen --save-to-file transcripts.txt # Save to file", "tooling stt listen --save-to-file transcripts.txt # Save to file",
"tooling stt statusbar # Launch status bar app" "tooling stt statusbar # Launch status bar app"