a
This commit is contained in:
@@ -288,6 +288,22 @@ The following wake words are supported:
|
|||||||
- porcupine
|
- porcupine
|
||||||
- terminator
|
- terminator
|
||||||
|
|
||||||
|
### Wake Word Engines
|
||||||
|
|
||||||
|
Two wake word engines are supported:
|
||||||
|
|
||||||
|
- **openwakeword** (default) - Open source, free to use, good accuracy
|
||||||
|
- **pvporcupine** - Picovoice's Porcupine engine, highly optimized
|
||||||
|
|
||||||
|
Choose the engine based on your requirements:
|
||||||
|
```bash
|
||||||
|
# Use OpenWakeWord (default)
|
||||||
|
tooling stt listen --wakeword-engine openwakeword
|
||||||
|
|
||||||
|
# Use Porcupine for better performance
|
||||||
|
tooling stt listen --wakeword-engine pvporcupine
|
||||||
|
```
|
||||||
|
|
||||||
### Available Models
|
### Available Models
|
||||||
|
|
||||||
| Model | Speed | Accuracy | Memory | Use Case |
|
| Model | Speed | Accuracy | Memory | Use Case |
|
||||||
@@ -311,6 +327,7 @@ Options:
|
|||||||
--save-to-file PATH Save transcriptions to a file
|
--save-to-file PATH Save transcriptions to a file
|
||||||
--sensitivity FLOAT Wake word sensitivity (0.0 to 1.0) [default: 0.6]
|
--sensitivity FLOAT Wake word sensitivity (0.0 to 1.0) [default: 0.6]
|
||||||
--device TEXT Device to use (auto, cuda, cpu) [default: auto]
|
--device TEXT Device to use (auto, cuda, cpu) [default: auto]
|
||||||
|
--wakeword-engine TEXT Wake word engine (openwakeword, pvporcupine) [default: openwakeword]
|
||||||
--verbose Show verbose output and configuration
|
--verbose Show verbose output and configuration
|
||||||
--help Show this message and exit
|
--help Show this message and exit
|
||||||
```
|
```
|
||||||
@@ -342,6 +359,11 @@ tooling stt test --duration 5 --model tiny
|
|||||||
tooling stt listen --language es --sensitivity 0.8 --wake-word "hey google"
|
tooling stt listen --language es --sensitivity 0.8 --wake-word "hey google"
|
||||||
```
|
```
|
||||||
|
|
||||||
|
**Use different wake word engine:**
|
||||||
|
```bash
|
||||||
|
tooling stt listen --wakeword-engine pvporcupine --wake-word alexa
|
||||||
|
```
|
||||||
|
|
||||||
### How it Works
|
### How it Works
|
||||||
|
|
||||||
1. **Initialization**: Loads the selected Whisper model and sets up audio processing
|
1. **Initialization**: Loads the selected Whisper model and sets up audio processing
|
||||||
|
|||||||
@@ -15,3 +15,34 @@ NoneType: None
|
|||||||
2025-07-22 22:11:01.946 - RealTimeSTT: realtimestt - INFO - State changed from 'inactive' to 'listening'
|
2025-07-22 22:11:01.946 - RealTimeSTT: realtimestt - INFO - State changed from 'inactive' to 'listening'
|
||||||
2025-07-22 22:11:01.947 - RealTimeSTT: realtimestt - DEBUG - Waiting for recording start
|
2025-07-22 22:11:01.947 - RealTimeSTT: realtimestt - DEBUG - Waiting for recording start
|
||||||
2025-07-22 22:11:01.981 - RealTimeSTT: realtimestt - INFO - State changed from 'listening' to 'wakeword'
|
2025-07-22 22:11:01.981 - RealTimeSTT: realtimestt - INFO - State changed from 'listening' to 'wakeword'
|
||||||
|
2025-07-22 22:13:18.955 - RealTimeSTT: realtimestt - DEBUG - Finishing recording thread
|
||||||
|
2025-07-22 22:13:18.956 - RealTimeSTT: realtimestt - DEBUG - No samples removed, final audio length: 0
|
||||||
|
2025-07-22 22:13:18.956 - RealTimeSTT: realtimestt - INFO - State changed from 'wakeword' to 'inactive'
|
||||||
|
2025-07-22 22:13:18.962 - RealTimeSTT: realtimestt - DEBUG - Terminating reader process
|
||||||
|
2025-07-22 22:13:19.621 - RealTimeSTT: realtimestt - DEBUG - Terminating transcription process
|
||||||
|
2025-07-22 22:13:19.622 - RealTimeSTT: realtimestt - DEBUG - Finishing realtime thread
|
||||||
|
2025-07-22 22:15:33.358 - RealTimeSTT: realtimestt - INFO - Starting RealTimeSTT
|
||||||
|
2025-07-22 22:15:33.369 - RealTimeSTT: realtimestt - INFO - Initializing audio recording (creating pyAudio input stream, sample rate: 16000 buffer size: 512
|
||||||
|
2025-07-22 22:15:33.377 - RealTimeSTT: realtimestt - INFO - Initializing WebRTC voice with Sensitivity 3
|
||||||
|
2025-07-22 22:15:33.377 - RealTimeSTT: realtimestt - DEBUG - WebRTC VAD voice activity detection engine initialized successfully
|
||||||
|
2025-07-22 22:15:34.053 - RealTimeSTT: realtimestt - DEBUG - Silero VAD voice activity detection engine initialized successfully
|
||||||
|
2025-07-22 22:15:34.054 - RealTimeSTT: realtimestt - DEBUG - Starting realtime worker
|
||||||
|
2025-07-22 22:15:34.054 - RealTimeSTT: realtimestt - DEBUG - Waiting for main transcription model to start
|
||||||
|
2025-07-22 22:15:37.769 - RealTimeSTT: realtimestt - DEBUG - Main transcription model ready
|
||||||
|
2025-07-22 22:15:37.769 - RealTimeSTT: realtimestt - DEBUG - RealtimeSTT initialization completed successfully
|
||||||
|
2025-07-22 22:15:37.771 - RealTimeSTT: realtimestt - INFO - recording started
|
||||||
|
2025-07-22 22:15:37.771 - RealTimeSTT: realtimestt - INFO - State changed from 'inactive' to 'recording'
|
||||||
|
2025-07-22 22:15:40.784 - RealTimeSTT: realtimestt - INFO - recording stopped
|
||||||
|
2025-07-22 22:15:40.785 - RealTimeSTT: realtimestt - INFO - Setting listen time
|
||||||
|
2025-07-22 22:15:40.785 - RealTimeSTT: realtimestt - DEBUG - No samples removed, final audio length: 49152
|
||||||
|
2025-07-22 22:15:40.785 - RealTimeSTT: realtimestt - INFO - State changed from 'recording' to 'inactive'
|
||||||
|
2025-07-22 22:15:40.882 - RealTimeSTT: realtimestt - INFO - State changed from 'inactive' to 'transcribing'
|
||||||
|
2025-07-22 22:15:40.882 - RealTimeSTT: realtimestt - DEBUG - Adding transcription request, no early transcription started
|
||||||
|
2025-07-22 22:15:40.901 - RealTimeSTT: realtimestt - DEBUG - Receive from parent_transcription_pipe after sendiung transcription request, transcribe_count: 1
|
||||||
|
2025-07-22 22:15:41.002 - RealTimeSTT: realtimestt - DEBUG - Receive from parent_transcription_pipe after sendiung transcription request, transcribe_count: 1
|
||||||
|
2025-07-22 22:15:41.030 - RealTimeSTT: realtimestt - INFO - State changed from 'transcribing' to 'inactive'
|
||||||
|
2025-07-22 22:15:41.040 - RealTimeSTT: realtimestt - DEBUG - Model tiny completed transcription in 0.16 seconds
|
||||||
|
2025-07-22 22:15:41.041 - RealTimeSTT: realtimestt - DEBUG - Finishing recording thread
|
||||||
|
2025-07-22 22:15:41.047 - RealTimeSTT: realtimestt - DEBUG - Terminating reader process
|
||||||
|
2025-07-22 22:15:41.781 - RealTimeSTT: realtimestt - DEBUG - Terminating transcription process
|
||||||
|
2025-07-22 22:15:41.781 - RealTimeSTT: realtimestt - DEBUG - Finishing realtime thread
|
||||||
|
|||||||
+20
-3
@@ -112,6 +112,7 @@ if RUMPS_AVAILABLE:
|
|||||||
self.realtime = True
|
self.realtime = True
|
||||||
self.sensitivity = 0.6
|
self.sensitivity = 0.6
|
||||||
self.device = "auto"
|
self.device = "auto"
|
||||||
|
self.wakeword_backend = "pvporcupine"
|
||||||
self.save_to_file = None
|
self.save_to_file = None
|
||||||
|
|
||||||
# Menu setup
|
# Menu setup
|
||||||
@@ -229,6 +230,7 @@ if RUMPS_AVAILABLE:
|
|||||||
recorder_config = {
|
recorder_config = {
|
||||||
"model": self.model,
|
"model": self.model,
|
||||||
"wake_words": self.wake_word,
|
"wake_words": self.wake_word,
|
||||||
|
"wakeword_backend": self.wakeword_backend,
|
||||||
"wake_words_sensitivity": self.sensitivity,
|
"wake_words_sensitivity": self.sensitivity,
|
||||||
"device": self.device,
|
"device": self.device,
|
||||||
"on_recording_start": self.on_recording_start,
|
"on_recording_start": self.on_recording_start,
|
||||||
@@ -422,9 +424,9 @@ def listen_cmd(
|
|||||||
default="auto",
|
default="auto",
|
||||||
help="Device to use (auto, cuda, cpu)"
|
help="Device to use (auto, cuda, cpu)"
|
||||||
),
|
),
|
||||||
wakeword_engine: str = typer.Option(
|
wakeword_backend: str = typer.Option(
|
||||||
default="openwakeword",
|
default="pvporcupine",
|
||||||
help="Wake word engine to use (openwakeword, pvporcupine)"
|
help="Wake word backend to use (pvporcupine, openwakeword)"
|
||||||
),
|
),
|
||||||
verbose: bool = typer.Option(
|
verbose: bool = typer.Option(
|
||||||
default=False,
|
default=False,
|
||||||
@@ -452,6 +454,13 @@ def listen_cmd(
|
|||||||
console.print(f"Valid options: {', '.join(valid_wake_words)}")
|
console.print(f"Valid options: {', '.join(valid_wake_words)}")
|
||||||
raise typer.Exit(1)
|
raise typer.Exit(1)
|
||||||
|
|
||||||
|
# Validate wakeword backend
|
||||||
|
valid_backends = ["pvporcupine", "openwakeword"]
|
||||||
|
if wakeword_backend.lower() not in valid_backends:
|
||||||
|
console.print(f"[bold red]❌ Invalid wakeword backend: {wakeword_backend}[/bold red]")
|
||||||
|
console.print(f"Valid options: {', '.join(valid_backends)}")
|
||||||
|
raise typer.Exit(1)
|
||||||
|
|
||||||
# Determine device
|
# Determine device
|
||||||
if device == "auto":
|
if device == "auto":
|
||||||
try:
|
try:
|
||||||
@@ -479,6 +488,7 @@ def listen_cmd(
|
|||||||
config_table.add_column("Value", style="green")
|
config_table.add_column("Value", style="green")
|
||||||
|
|
||||||
config_table.add_row("Wake Word", wake_word)
|
config_table.add_row("Wake Word", wake_word)
|
||||||
|
config_table.add_row("Wakeword Backend", wakeword_backend)
|
||||||
config_table.add_row("Model", model)
|
config_table.add_row("Model", model)
|
||||||
config_table.add_row("Language", language if language else "Auto-detect")
|
config_table.add_row("Language", language if language else "Auto-detect")
|
||||||
config_table.add_row("Device", device)
|
config_table.add_row("Device", device)
|
||||||
@@ -534,6 +544,7 @@ def listen_cmd(
|
|||||||
recorder_config = {
|
recorder_config = {
|
||||||
"model": model,
|
"model": model,
|
||||||
"wake_words": wake_word,
|
"wake_words": wake_word,
|
||||||
|
"wakeword_backend": wakeword_backend,
|
||||||
"wake_words_sensitivity": sensitivity,
|
"wake_words_sensitivity": sensitivity,
|
||||||
"device": device,
|
"device": device,
|
||||||
"on_recording_start": on_recording_start,
|
"on_recording_start": on_recording_start,
|
||||||
@@ -738,12 +749,18 @@ def info_cmd():
|
|||||||
console.print(f"\n[bold cyan]Available Models:[/bold cyan]")
|
console.print(f"\n[bold cyan]Available Models:[/bold cyan]")
|
||||||
console.print(", ".join(models))
|
console.print(", ".join(models))
|
||||||
|
|
||||||
|
# Available wakeword backends
|
||||||
|
backends = ["pvporcupine", "openwakeword"]
|
||||||
|
console.print(f"\n[bold cyan]Available Wakeword Backends:[/bold cyan]")
|
||||||
|
console.print(", ".join(backends))
|
||||||
|
|
||||||
# Usage examples
|
# Usage examples
|
||||||
console.print(f"\n[bold cyan]Usage Examples:[/bold cyan]")
|
console.print(f"\n[bold cyan]Usage Examples:[/bold cyan]")
|
||||||
examples = [
|
examples = [
|
||||||
"tooling stt listen # Use jarvis wake word with base model",
|
"tooling stt listen # Use jarvis wake word with base model",
|
||||||
"tooling stt listen --wake-word alexa # Use alexa wake word",
|
"tooling stt listen --wake-word alexa # Use alexa wake word",
|
||||||
"tooling stt listen --model tiny # Use faster tiny model",
|
"tooling stt listen --model tiny # Use faster tiny model",
|
||||||
|
"tooling stt listen --wakeword-engine pvporcupine # Use pvporcupine engine",
|
||||||
"tooling stt test --duration 5 # Test for 5 seconds",
|
"tooling stt test --duration 5 # Test for 5 seconds",
|
||||||
"tooling stt listen --save-to-file transcripts.txt # Save to file",
|
"tooling stt listen --save-to-file transcripts.txt # Save to file",
|
||||||
"tooling stt statusbar # Launch status bar app"
|
"tooling stt statusbar # Launch status bar app"
|
||||||
|
|||||||
Reference in New Issue
Block a user