From dcb3f9d368b2b6d7a0cfe1d4279e0e1cdec33ada Mon Sep 17 00:00:00 2001 From: "dingfeng.wong" Date: Tue, 22 Jul 2025 22:02:48 +0800 Subject: [PATCH] stt --- README.md | 215 ++++++++++++++++++++ pyproject.toml | 1 + requirements.txt | 114 ++++++++++- src/tooling/cli.py | 4 + src/tooling/stt_cli.py | 450 +++++++++++++++++++++++++++++++++++++++++ 5 files changed, 775 insertions(+), 9 deletions(-) create mode 100644 src/tooling/stt_cli.py diff --git a/README.md b/README.md index 1fd0f72..21791b3 100644 --- a/README.md +++ b/README.md @@ -170,6 +170,221 @@ ocr-screenshot --screenshot-method interactive --save-image ocr-screenshot --screenshot-method mss --monitor-number 2 ``` +## Speech-to-Text (STT) Tool + +A real-time speech-to-text tool using RealtimeSTT with wake word activation. Features the "jarvis" wake word by default and supports live transcription with various output options. + +### Features + +- πŸŽ™οΈ **Real-time transcription** - Live speech-to-text conversion +- 🎯 **Wake word activation** - Multiple wake words including "jarvis" +- ⚑ **GPU acceleration** - CUDA support for faster processing +- πŸ”„ **Live display** - Real-time transcription preview +- πŸ’Ύ **File output** - Save transcriptions to text files +- πŸŽ›οΈ **Multiple models** - Choose from tiny to large Whisper models +- 🌍 **Multi-language** - Support for multiple languages +- πŸ§ͺ **Test mode** - Test functionality without wake words + +### Installation + +The STT dependencies are included in the base installation: +```bash +pip install . +``` + +For optimal performance with GPU acceleration: +```bash +# For CUDA 11.8 +pip install torch==2.5.1+cu118 torchaudio==2.5.1 --index-url https://download.pytorch.org/whl/cu118 + +# For CUDA 12.X +pip install torch==2.5.1+cu121 torchaudio==2.5.1 --index-url https://download.pytorch.org/whl/cu121 +``` + +### Usage + +#### Basic Commands + +Start STT with jarvis wake word: +```bash +tooling stt listen +``` + +Test STT without wake words: +```bash +tooling stt test +``` + +Show system information: +```bash +tooling stt info +``` + +#### Wake Word Options + +Use different wake words: +```bash +# Use alexa wake word +tooling stt listen --wake-word alexa + +# Use hey google wake word +tooling stt listen --wake-word "hey google" + +# Use computer wake word +tooling stt listen --wake-word computer +``` + +#### Model Selection + +Choose different Whisper models for speed vs accuracy: +```bash +# Fastest (tiny model) +tooling stt listen --model tiny + +# Balanced (base model, default) +tooling stt listen --model base + +# Best accuracy (large model) +tooling stt listen --model large-v2 +``` + +#### Advanced Features + +Save transcriptions to file: +```bash +tooling stt listen --save-to-file transcripts.txt +``` + +Disable real-time display for better performance: +```bash +tooling stt listen --no-realtime +``` + +Set custom sensitivity and language: +```bash +tooling stt listen --sensitivity 0.8 --language en --verbose +``` + +Force CPU usage: +```bash +tooling stt listen --device cpu +``` + +### Available Wake Words + +The following wake words are supported: +- **jarvis** (default) +- alexa +- americano +- blueberry +- bumblebee +- computer +- grapefruits +- grasshopper +- hey google +- hey siri +- ok google +- picovoice +- porcupine +- terminator + +### Available Models + +| Model | Speed | Accuracy | Memory | Use Case | +|-------|-------|----------|--------|----------| +| **tiny** | ⚑⚑⚑ | ⭐⭐ | 39MB | Testing, low-power devices | +| **base** | ⚑⚑ | ⭐⭐⭐ | 74MB | Balanced (default) | +| **small** | ⚑ | ⭐⭐⭐⭐ | 244MB | Better accuracy | +| **medium** | 🐌 | ⭐⭐⭐⭐⭐ | 769MB | High accuracy | +| **large-v2** | 🐌🐌 | ⭐⭐⭐⭐⭐ | 1550MB | Best accuracy | + +### Command Line Options + +```bash +tooling stt listen [OPTIONS] + +Options: + --wake-word TEXT Wake word to activate recording [default: jarvis] + --model TEXT Whisper model (tiny, base, small, medium, large-v2) [default: base] + --language TEXT Language code for transcription (empty for auto-detection) + --realtime/--no-realtime Enable real-time transcription display [default: realtime] + --save-to-file PATH Save transcriptions to a file + --sensitivity FLOAT Wake word sensitivity (0.0 to 1.0) [default: 0.6] + --device TEXT Device to use (auto, cuda, cpu) [default: auto] + --verbose Show verbose output and configuration + --help Show this message and exit +``` + +### Examples + +**Basic usage with jarvis:** +```bash +tooling stt listen +``` + +**Fast transcription with tiny model:** +```bash +tooling stt listen --model tiny --wake-word computer +``` + +**High accuracy with file output:** +```bash +tooling stt listen --model large-v2 --save-to-file meeting_notes.txt --verbose +``` + +**Quick test without wake words:** +```bash +tooling stt test --duration 5 --model tiny +``` + +**Custom language and sensitivity:** +```bash +tooling stt listen --language es --sensitivity 0.8 --wake-word "hey google" +``` + +### How it Works + +1. **Initialization**: Loads the selected Whisper model and sets up audio processing +2. **Wake Word Detection**: Listens for the specified wake word using Porcupine or OpenWakeWord +3. **Voice Activity Detection**: Uses WebRTC VAD and Silero VAD for accurate speech detection +4. **Real-time Transcription**: Processes audio chunks in real-time (optional) +5. **Final Transcription**: Generates high-quality final transcription when speech ends +6. **Output**: Displays results and optionally saves to file + +### Performance Tips + +- **GPU**: Use CUDA for 3-5x faster transcription +- **Model**: Use `tiny` or `base` for real-time applications +- **Sensitivity**: Adjust wake word sensitivity based on environment noise +- **Device**: Set `--device cpu` if experiencing GPU memory issues +- **Real-time**: Disable `--no-realtime` for better final transcription performance + +### Troubleshooting + +**No microphone detected:** +```bash +# Check audio devices +tooling stt info +``` + +**CUDA not available:** +```bash +# Install CUDA-enabled PyTorch +pip install torch==2.5.1+cu121 torchaudio==2.5.1 --index-url https://download.pytorch.org/whl/cu121 +``` + +**Wake word not detected:** +```bash +# Increase sensitivity +tooling stt listen --sensitivity 0.8 --verbose +``` + +**Poor transcription quality:** +```bash +# Use larger model +tooling stt listen --model large-v2 +``` + ## Development Guide ### How to Add New Packages diff --git a/pyproject.toml b/pyproject.toml index 30b8d22..4062c5d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -30,6 +30,7 @@ screenshot-all = [ [project.scripts] ocr-screenshot = "tooling.cli:cli_main" +tooling = "tooling.cli:cli_main" [build-system] requires = ["hatchling"] diff --git a/requirements.txt b/requirements.txt index 391a651..49e88ef 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,34 +2,63 @@ # uv pip compile pyproject.toml -o requirements.txt anyascii==0.3.3 # via python-doctr +av==15.0.0 + # via faster-whisper certifi==2025.7.14 # via requests +cffi==1.17.1 + # via soundfile charset-normalizer==3.4.2 # via requests click==8.2.1 # via typer +colorama==0.4.6 + # via + # halo + # log-symbols +coloredlogs==15.0.1 + # via onnxruntime +ctranslate2==4.6.0 + # via faster-whisper defusedxml==0.7.1 # via python-doctr +enum34==1.1.10 + # via pvporcupine +faster-whisper==1.1.1 + # via realtimestt filelock==3.18.0 # via # huggingface-hub # torch +flatbuffers==25.2.10 + # via onnxruntime fsspec==2025.7.0 # via # huggingface-hub # torch h5py==3.14.0 # via python-doctr +halo==0.0.31 + # via realtimestt hf-xet==1.1.5 # via huggingface-hub huggingface-hub==0.33.4 - # via python-doctr + # via + # faster-whisper + # python-doctr + # tokenizers +humanfriendly==10.0 + # via coloredlogs idna==3.10 # via requests jinja2==3.1.6 # via torch +joblib==1.5.1 + # via scikit-learn langdetect==1.0.9 # via python-doctr +log-symbols==0.0.14 + # via halo markdown-it-py==3.0.0 # via rich markupsafe==3.0.2 @@ -42,30 +71,55 @@ networkx==3.5 # via torch numpy==2.3.1 # via + # ctranslate2 # h5py # onnx + # onnxruntime # opencv-python + # pvporcupine # python-doctr + # scikit-learn # scipy # shapely + # soundfile # torchvision onnx==1.18.0 # via python-doctr +onnxruntime==1.22.1 + # via + # faster-whisper + # openwakeword opencv-python==4.11.0.86 # via python-doctr +openwakeword==0.6.0 + # via realtimestt packaging==25.0 - # via huggingface-hub + # via + # huggingface-hub + # onnxruntime pillow==11.3.0 # via # tooling (pyproject.toml) # python-doctr # torchvision protobuf==6.31.1 - # via onnx + # via + # onnx + # onnxruntime +pvporcupine==1.9.5 + # via realtimestt +pyaudio==0.2.14 + # via realtimestt pyclipper==1.3.0.post6 # via python-doctr +pycparser==2.22 + # via cffi pygments==2.19.2 # via rich +pyobjc-core==11.1 + # via pyobjc-framework-cocoa +pyobjc-framework-cocoa==11.1 + # via rumps pypdfium2==4.30.0 # via python-doctr pyperclip==1.9.0 @@ -73,34 +127,70 @@ pyperclip==1.9.0 python-doctr==1.0.0 # via tooling (pyproject.toml) pyyaml==6.0.2 - # via huggingface-hub + # via + # ctranslate2 + # huggingface-hub rapidfuzz==3.13.0 # via python-doctr +realtimestt==0.3.104 + # via tooling (pyproject.toml) requests==2.32.4 - # via huggingface-hub + # via + # huggingface-hub + # openwakeword rich==14.0.0 # via # tooling (pyproject.toml) # typer -scipy==1.16.0 - # via python-doctr +rumps==0.4.0 + # via tooling (pyproject.toml) +scikit-learn==1.7.1 + # via openwakeword +scipy==1.15.2 + # via + # openwakeword + # python-doctr + # realtimestt + # scikit-learn +setuptools==80.9.0 + # via ctranslate2 shapely==2.1.1 # via python-doctr shellingham==1.5.4 # via typer six==1.17.0 - # via langdetect + # via + # halo + # langdetect +soundfile==0.13.1 + # via realtimestt +spinners==0.0.24 + # via halo sympy==1.14.0 - # via torch + # via + # onnxruntime + # torch +termcolor==3.1.0 + # via halo +threadpoolctl==3.6.0 + # via scikit-learn +tokenizers==0.21.2 + # via faster-whisper torch==2.7.1 # via # python-doctr + # realtimestt + # torchaudio # torchvision +torchaudio==2.7.1 + # via realtimestt torchvision==0.22.1 # via python-doctr tqdm==4.67.1 # via + # faster-whisper # huggingface-hub + # openwakeword # python-doctr typer==0.16.0 # via tooling (pyproject.toml) @@ -114,3 +204,9 @@ urllib3==2.5.0 # via requests validators==0.35.0 # via python-doctr +webrtcvad-wheels==2.0.14 + # via realtimestt +websocket-client==1.8.0 + # via realtimestt +websockets==15.0.1 + # via realtimestt diff --git a/src/tooling/cli.py b/src/tooling/cli.py index cf6fb0a..990dcc7 100644 --- a/src/tooling/cli.py +++ b/src/tooling/cli.py @@ -9,6 +9,7 @@ import typer from rich.console import Console from .ocr_cli import ocr_app +from .stt_cli import stt_app # Create main app app = typer.Typer( @@ -22,6 +23,9 @@ console = Console() # Add OCR subcommand app.add_typer(ocr_app, name="ocr", help="OCR screenshot tools") +# Add STT subcommand +app.add_typer(stt_app, name="stt", help="Speech-to-text tools with wake word activation") + @app.command() def version(): """Show version information.""" diff --git a/src/tooling/stt_cli.py b/src/tooling/stt_cli.py new file mode 100644 index 0000000..def935a --- /dev/null +++ b/src/tooling/stt_cli.py @@ -0,0 +1,450 @@ +#!/usr/bin/env python3 +""" +Speech-to-Text CLI Tool + +A command-line tool that provides real-time speech-to-text transcription +using RealtimeSTT with wake word activation and various output options. +""" + +import datetime +import os +import tempfile +from pathlib import Path +from typing import Optional, Callable +import threading +import time + +import typer +from rich.console import Console +from rich.panel import Panel +from rich.progress import Progress, SpinnerColumn, TextColumn +from rich.live import Live +from rich.text import Text +from rich.table import Table + +# Create STT app that can be imported as a subcommand +stt_app = typer.Typer( + name="stt", + help="Real-time speech-to-text with wake word activation", + rich_markup_mode="rich" +) + +console = Console() + +# Global variables for managing the recorder +_recorder = None +_recording_active = False +_transcription_buffer = [] + + +class TranscriptionDisplay: + """Handle live display of transcriptions.""" + + def __init__(self, show_realtime: bool = True): + self.show_realtime = show_realtime + self.realtime_text = "" + self.final_text = "" + self.status = "Initializing..." + + def create_display(self) -> Table: + """Create the display table.""" + table = Table.grid(padding=1) + table.add_column(style="cyan", no_wrap=False) + + # Status + table.add_row(f"[bold blue]Status:[/bold blue] {self.status}") + table.add_row("") + + # Realtime transcription + if self.show_realtime and self.realtime_text: + table.add_row("[bold yellow]πŸŽ™οΈ Live transcription:[/bold yellow]") + table.add_row(f"[dim]{self.realtime_text}[/dim]") + table.add_row("") + + # Final transcription + if self.final_text: + table.add_row("[bold green]βœ… Final transcription:[/bold green]") + table.add_row(self.final_text) + table.add_row("") + + return table + + def update_status(self, status: str): + """Update the status.""" + self.status = status + + def update_realtime(self, text: str): + """Update realtime transcription.""" + self.realtime_text = text + + def add_final(self, text: str): + """Add final transcription.""" + if text.strip(): + timestamp = datetime.datetime.now().strftime("%H:%M:%S") + self.final_text += f"[{timestamp}] {text}\n" + + +@stt_app.command("listen") +def listen_cmd( + wake_word: str = typer.Option( + default="jarvis", + help="Wake word to activate recording (jarvis, alexa, hey google, etc.)" + ), + model: str = typer.Option( + default="base", + help="Whisper model to use (tiny, base, small, medium, large-v2)" + ), + language: str = typer.Option( + default="", + help="Language code for transcription (empty for auto-detection)" + ), + realtime: bool = typer.Option( + default=True, + help="Enable real-time transcription display" + ), + save_to_file: Optional[Path] = typer.Option( + default=None, + help="Save transcriptions to a file" + ), + sensitivity: float = typer.Option( + default=0.6, + help="Wake word sensitivity (0.0 to 1.0)" + ), + device: str = typer.Option( + default="auto", + help="Device to use (auto, cuda, cpu)" + ), + verbose: bool = typer.Option( + default=False, + help="Show verbose output and configuration" + ) +): + """Start real-time speech-to-text with wake word activation.""" + + try: + from RealtimeSTT import AudioToTextRecorder + except ImportError: + console.print("[bold red]❌ RealtimeSTT not installed.[/bold red]") + console.print("Install with: [bold]pip install RealtimeSTT[/bold]") + raise typer.Exit(1) + + # Validate wake word + valid_wake_words = [ + "alexa", "americano", "blueberry", "bumblebee", "computer", + "grapefruits", "grasshopper", "hey google", "hey siri", "jarvis", + "ok google", "picovoice", "porcupine", "terminator" + ] + + if wake_word.lower() not in valid_wake_words: + console.print(f"[bold red]❌ Invalid wake word: {wake_word}[/bold red]") + console.print(f"Valid options: {', '.join(valid_wake_words)}") + raise typer.Exit(1) + + # Determine device + if device == "auto": + try: + import torch + device = "cuda" if torch.cuda.is_available() else "cpu" + except ImportError: + device = "cpu" + + # Create transcription display + display = TranscriptionDisplay(show_realtime=realtime) + + # File output setup + output_file = None + if save_to_file: + save_to_file.parent.mkdir(parents=True, exist_ok=True) + output_file = open(save_to_file, 'a', encoding='utf-8') + timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") + output_file.write(f"\n=== STT Session Started: {timestamp} ===\n") + output_file.flush() + + # Show configuration if verbose + if verbose: + config_table = Table(title="STT Configuration") + config_table.add_column("Setting", style="cyan") + config_table.add_column("Value", style="green") + + config_table.add_row("Wake Word", wake_word) + config_table.add_row("Model", model) + config_table.add_row("Language", language if language else "Auto-detect") + config_table.add_row("Device", device) + config_table.add_row("Realtime Display", str(realtime)) + config_table.add_row("Sensitivity", str(sensitivity)) + if save_to_file: + config_table.add_row("Output File", str(save_to_file)) + + console.print(config_table) + console.print() + + # Callback functions + def on_realtime_transcription(text: str): + """Handle real-time transcription updates.""" + if realtime: + display.update_realtime(text) + + def on_transcription_complete(text: str): + """Handle completed transcriptions.""" + if text.strip(): + display.add_final(text) + + # Save to file if specified + if output_file: + timestamp = datetime.datetime.now().strftime("%H:%M:%S") + output_file.write(f"[{timestamp}] {text}\n") + output_file.flush() + + def on_recording_start(): + """Called when recording starts.""" + display.update_status("πŸŽ™οΈ Recording... (speak now)") + + def on_recording_stop(): + """Called when recording stops.""" + display.update_status("⏸️ Processing transcription...") + + def on_wakeword_detected(): + """Called when wake word is detected.""" + display.update_status(f"🎯 Wake word '{wake_word}' detected!") + + def on_wakeword_timeout(): + """Called when wake word times out.""" + display.update_status(f"⏰ Waiting for wake word '{wake_word}'...") + + def on_wakeword_detection_start(): + """Called when starting to listen for wake words.""" + display.update_status(f"πŸ‘‚ Listening for wake word '{wake_word}'...") + + try: + display.update_status("πŸ”§ Initializing STT engine...") + + # Configure recorder parameters + recorder_config = { + "model": model, + "wake_words": wake_word, + "wake_words_sensitivity": sensitivity, + "device": device, + "on_recording_start": on_recording_start, + "on_recording_stop": on_recording_stop, + "on_wakeword_detected": on_wakeword_detected, + "on_wakeword_timeout": on_wakeword_timeout, + "on_wakeword_detection_start": on_wakeword_detection_start, + } + + if language: + recorder_config["language"] = language + + if realtime: + recorder_config.update({ + "enable_realtime_transcription": True, + "on_realtime_transcription_update": on_realtime_transcription, + }) + + # Initialize recorder + recorder = AudioToTextRecorder(**recorder_config) + + # Show initial instructions + console.print(Panel( + f"[bold]Speech-to-Text Ready![/bold]\n\n" + f"β€’ Say '[bold cyan]{wake_word}[/bold cyan]' to activate recording\n" + f"β€’ Speak clearly after activation\n" + f"β€’ Press [bold red]Ctrl+C[/bold red] to stop\n" + f"β€’ Model: [bold]{model}[/bold] | Device: [bold]{device}[/bold]", + title="🎀 STT Instructions", + border_style="green" + )) + + # Start live display + with Live(display.create_display(), refresh_per_second=10, console=console) as live: + try: + while True: + # Get transcription (this will wait for wake word and then record) + text = recorder.text() + if text: + on_transcription_complete(text) + live.update(display.create_display()) + + # Small delay to prevent high CPU usage + time.sleep(0.1) + + except KeyboardInterrupt: + display.update_status("πŸ›‘ Stopping STT...") + live.update(display.create_display()) + raise + + except KeyboardInterrupt: + console.print("\n[bold yellow]⚠️ STT stopped by user.[/bold yellow]") + except Exception as e: + console.print(f"\n[bold red]❌ STT error: {e}[/bold red]") + if verbose: + import traceback + console.print(f"[dim]{traceback.format_exc()}[/dim]") + raise typer.Exit(1) + finally: + # Cleanup + if 'recorder' in locals(): + try: + recorder.shutdown() + except: + pass + + if output_file: + timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") + output_file.write(f"=== STT Session Ended: {timestamp} ===\n\n") + output_file.close() + console.print(f"\n[green]πŸ’Ύ Transcriptions saved to: {save_to_file}[/green]") + + +@stt_app.command("test") +def test_cmd( + duration: int = typer.Option( + default=10, + help="Test duration in seconds" + ), + model: str = typer.Option( + default="tiny", + help="Whisper model to use for testing" + ) +): + """Test STT functionality without wake words.""" + + try: + from RealtimeSTT import AudioToTextRecorder + except ImportError: + console.print("[bold red]❌ RealtimeSTT not installed.[/bold red]") + console.print("Install with: [bold]pip install RealtimeSTT[/bold]") + raise typer.Exit(1) + + console.print(Panel( + f"[bold]STT Test Mode[/bold]\n\n" + f"β€’ Duration: [bold]{duration}[/bold] seconds\n" + f"β€’ Model: [bold]{model}[/bold]\n" + f"β€’ No wake word required\n" + f"β€’ Start speaking when you see 'Recording...'", + title="πŸ§ͺ Test Configuration", + border_style="blue" + )) + + try: + with Progress( + SpinnerColumn(), + TextColumn("[progress.description]{task.description}"), + console=console, + ) as progress: + + init_task = progress.add_task("[cyan]Initializing STT engine...", total=None) + + recorder = AudioToTextRecorder( + model=model, + wake_words="", # No wake words for test + ) + + progress.update(init_task, description="[green]βœ“ STT engine ready") + progress.stop() + + console.print(f"\n[bold green]πŸŽ™οΈ Recording for {duration} seconds...[/bold green]") + console.print("[yellow]Start speaking now![/yellow]") + + # Manual recording for test + recorder.start() + + # Show countdown + for remaining in range(duration, 0, -1): + console.print(f"\r[bold cyan]⏰ {remaining} seconds remaining...[/bold cyan]", end="") + time.sleep(1) + + console.print(f"\r[bold blue]⏸️ Processing transcription...[/bold blue]") + + recorder.stop() + text = recorder.text() + + if text: + console.print("\n[bold green]βœ… Test completed successfully![/bold green]") + console.print(Panel( + text, + title="πŸ“ Transcribed Text", + border_style="green" + )) + else: + console.print("\n[bold yellow]⚠️ No speech detected during test.[/bold yellow]") + console.print("[dim]Try speaking louder or check your microphone.[/dim]") + + except KeyboardInterrupt: + console.print("\n[bold yellow]⚠️ Test cancelled by user.[/bold yellow]") + except Exception as e: + console.print(f"\n[bold red]❌ Test failed: {e}[/bold red]") + raise typer.Exit(1) + finally: + if 'recorder' in locals(): + try: + recorder.shutdown() + except: + pass + + +@stt_app.command("info") +def info_cmd(): + """Show STT system information and available options.""" + + console.print(Panel( + "[bold blue]STT System Information[/bold blue]", + border_style="blue" + )) + + # Check RealtimeSTT installation + try: + from RealtimeSTT import AudioToTextRecorder + console.print("[green]βœ… RealtimeSTT installed[/green]") + + # Check CUDA availability + try: + import torch + cuda_available = torch.cuda.is_available() + if cuda_available: + console.print(f"[green]βœ… CUDA available (GPU: {torch.cuda.get_device_name()})[/green]") + else: + console.print("[yellow]⚠️ CUDA not available (CPU only)[/yellow]") + except ImportError: + console.print("[yellow]⚠️ PyTorch not available[/yellow]") + + except ImportError: + console.print("[red]❌ RealtimeSTT not installed[/red]") + console.print("Install with: [bold]pip install RealtimeSTT[/bold]") + + # Available wake words + wake_words = [ + "alexa", "americano", "blueberry", "bumblebee", "computer", + "grapefruits", "grasshopper", "hey google", "hey siri", "jarvis", + "ok google", "picovoice", "porcupine", "terminator" + ] + + console.print(f"\n[bold cyan]Available Wake Words:[/bold cyan]") + console.print(", ".join(wake_words)) + + # Available models + models = ["tiny", "tiny.en", "base", "base.en", "small", "small.en", "medium", "medium.en", "large-v1", "large-v2"] + console.print(f"\n[bold cyan]Available Models:[/bold cyan]") + console.print(", ".join(models)) + + # Usage examples + console.print(f"\n[bold cyan]Usage Examples:[/bold cyan]") + examples = [ + "tooling stt listen # Use jarvis wake word with base model", + "tooling stt listen --wake-word alexa # Use alexa wake word", + "tooling stt listen --model tiny # Use faster tiny model", + "tooling stt test --duration 5 # Test for 5 seconds", + "tooling stt listen --save-to-file transcripts.txt # Save to file" + ] + + for example in examples: + console.print(f" [dim]${example}[/dim]") + + +# For backward compatibility when run directly +def cli_main(): + """Entry point for the STT CLI script when run directly.""" + stt_app() + + +if __name__ == "__main__": + stt_app() \ No newline at end of file