diff --git a/src/tooling/cli.py b/src/tooling/cli.py index b4d1ab7..cf6fb0a 100644 --- a/src/tooling/cli.py +++ b/src/tooling/cli.py @@ -1,231 +1,35 @@ #!/usr/bin/env python3 """ -OCR Screenshot CLI Tool +Main CLI for tooling package -A command-line tool that takes a region screenshot on macOS, -performs OCR using DocTR, and copies the result to clipboard. +A command-line interface that provides various tools including OCR screenshot functionality. """ -import datetime -import os -import tempfile -from pathlib import Path -from typing import Optional - import typer from rich.console import Console -from rich.panel import Panel -from rich.progress import Progress, SpinnerColumn, TextColumn -from rich.syntax import Syntax -from .ocr_screenshot import copy_to_clipboard, perform_ocr, take_region_screenshot, perform_ocr_with_annotation, take_region_screenshot_cross_platform +from .ocr_cli import ocr_app +# Create main app app = typer.Typer( - name="ocr-screenshot", - help="Take a region screenshot, perform OCR, and copy result to clipboard", + name="tooling", + help="A collection of command-line tools for productivity", rich_markup_mode="rich" ) console = Console() +# Add OCR subcommand +app.add_typer(ocr_app, name="ocr", help="OCR screenshot tools") @app.command() -def main( - lang: str = typer.Option( - default="eng", - help="Language code for OCR (default: eng)" - ), - save_image: bool = typer.Option( - default=False, - help="Save the screenshot image instead of deleting it" - ), - output_dir: Path = typer.Option( - default=Path.home() / "Desktop", - help="Directory to save screenshot if --save-image is used (default: ~/Desktop)" - ), - verbose: bool = typer.Option( - default=False, - help="Show verbose output" - ), - annotate: bool = typer.Option( - default=False, - help="Create an annotated version of the image showing detected text regions" - ), - show_words: bool = typer.Option( - default=True, - help="Show word-level bounding boxes in annotation (default: True)" - ), - show_lines: bool = typer.Option( - default=False, - help="Show line-level bounding boxes in annotation" - ), - show_blocks: bool = typer.Option( - default=False, - help="Show block-level bounding boxes in annotation" - ), - show_text: bool = typer.Option( - default=False, - help="Overlay detected text on the annotated image" - ), - screenshot_method: str = typer.Option( - default="auto", - help="Screenshot method to use: auto, mss, pyautogui, pillow, pyscreenshot, macos, interactive" - ), - monitor_number: int = typer.Option( - default=0, - help="Monitor number to capture (0=all monitors, 1+=specific monitor, only for MSS method)" - ) -): - """Take a region screenshot, perform OCR, and copy result to clipboard.""" - - # Create screenshot path - if save_image: - timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S") - screenshot_path = output_dir / f"ocr_screenshot_{timestamp}.png" - output_dir.mkdir(parents=True, exist_ok=True) - else: - temp_fd, temp_path = tempfile.mkstemp(suffix='.png') - os.close(temp_fd) - screenshot_path = Path(temp_path) - - try: - # Step 1: Take screenshot - if verbose: - console.print(f"\n[bold blue]📸 Taking screenshot using method: {screenshot_method}[/bold blue]") - - # Show method-specific instructions - if screenshot_method == "macos": - console.print(Panel( - "[bold]macOS Screenshot Instructions:[/bold]\n" - "• Drag to select a region\n" - "• Press [bold]Space[/bold] to capture entire window\n" - "• Press [bold]Escape[/bold] to cancel", - title="Screenshot Controls", - border_style="blue" - )) - elif screenshot_method == "interactive": - console.print(Panel( - "[bold]Interactive Screenshot Instructions:[/bold]\n" - "• Follow the prompts to select region corners\n" - "• Position mouse and press ENTER at each corner", - title="Screenshot Controls", - border_style="green" - )) - elif screenshot_method in ["mss", "pillow", "pyautogui", "pyscreenshot"]: - console.print(Panel( - f"[bold]{screenshot_method.upper()} Screenshot:[/bold]\n" - "• Full screen capture (region selection not supported in CLI yet)\n" - "• Use --screenshot-method interactive for region selection", - title="Screenshot Info", - border_style="yellow" - )) - else: - console.print(f"[bold blue]📸 Taking screenshot ({screenshot_method})...[/bold blue]") - - if not take_region_screenshot_cross_platform(str(screenshot_path), method=screenshot_method, monitor_number=monitor_number): - console.print(f"[bold red]❌ Screenshot failed with method '{screenshot_method}'.[/bold red]") - if screenshot_method == "auto": - console.print("[yellow]💡 Try installing additional screenshot libraries:[/yellow]") - console.print(" pip install mss pyautogui pyscreenshot") - raise typer.Exit(1) - - if verbose: - console.print(f"[green]✓ Screenshot saved to: {screenshot_path}[/green]") - - # Step 2: Perform OCR (with optional annotation) - with Progress( - SpinnerColumn(), - TextColumn("[progress.description]{task.description}"), - console=console, - transient=True - ) as progress: - task = progress.add_task("[bold cyan]🔍 Performing OCR...", total=None) - - if annotate: - # Create annotation output path - annotation_path = None - if save_image: - base_name = screenshot_path.stem - annotation_path = output_dir / f"{base_name}_annotated.png" - - extracted_text, annotated_image_path = perform_ocr_with_annotation( - str(screenshot_path), - lang, - create_annotated=True, - annotation_output_path=str(annotation_path) if annotation_path else None, - show_words=show_words, - show_lines=show_lines, - show_blocks=show_blocks, - show_text=show_text - ) - - if annotated_image_path and verbose: - console.print(f"[green]✓ Annotated image saved to: {annotated_image_path}[/green]") - else: - extracted_text = perform_ocr(str(screenshot_path), lang) - - progress.update(task, description="[green]✓ OCR complete") - - if not extracted_text: - console.print("[bold red]❌ No text found in the image.[/bold red]") - raise typer.Exit(1) - - # Step 3: Copy to clipboard - with Progress( - SpinnerColumn(), - TextColumn("[progress.description]{task.description}"), - console=console, - transient=True - ) as progress: - task = progress.add_task("[bold cyan]📋 Copying to clipboard...", total=None) - if copy_to_clipboard(extracted_text): - progress.update(task, description="[green]✓ Copied to clipboard") - else: - progress.update(task, description="[red]✗ Failed to copy") - console.print("[bold red]❌ Failed to copy to clipboard.[/bold red]") - console.print("\n[bold]Extracted text:[/bold]") - from rich.text import Text - plain_text = Text(extracted_text) - console.print(Panel(plain_text, border_style="yellow")) - raise typer.Exit(1) - - # Success message - success_msg = "\n[bold green]✅ Text extracted and copied to clipboard![/bold green]" - if annotate: - success_msg += "\n[bold blue]📝 Annotated image created showing detected text regions.[/bold blue]" - console.print(success_msg) - - if verbose: - console.print("\n[bold]Extracted text:[/bold]") - # Use syntax highlighting if it looks like code - if any(keyword in extracted_text.lower() for keyword in ['def ', 'function', 'class ', 'import ', 'from ', '{}', '[]']): - syntax = Syntax(extracted_text, "python", theme="monokai", line_numbers=False) - console.print(Panel(syntax, border_style="green")) - else: - # Treat as plain text to prevent Rich markup interpretation - from rich.text import Text - plain_text = Text(extracted_text) - console.print(Panel(plain_text, border_style="green")) - - except KeyboardInterrupt: - console.print("\n[bold yellow]⚠️ Operation cancelled by user.[/bold yellow]") - raise typer.Exit(1) - except Exception as e: - console.print(f"[bold red]❌ Unexpected error: {e}[/bold red]") - raise typer.Exit(1) - finally: - # Clean up temporary file if not saving - if not save_image and screenshot_path.exists(): - try: - screenshot_path.unlink() - except Exception: - pass # Ignore cleanup errors - +def version(): + """Show version information.""" + console.print("[bold green]tooling[/bold green] [blue]v0.1.0[/blue]") def cli_main(): """Entry point for the CLI script.""" app() - if __name__ == "__main__": - app() \ No newline at end of file + app() diff --git a/src/tooling/ocr_cli.py b/src/tooling/ocr_cli.py new file mode 100644 index 0000000..ccc6ffe --- /dev/null +++ b/src/tooling/ocr_cli.py @@ -0,0 +1,233 @@ +#!/usr/bin/env python3 +""" +OCR Screenshot CLI Tool + +A command-line tool that takes a region screenshot on macOS, +performs OCR using DocTR, and copies the result to clipboard. +""" + +import datetime +import os +import tempfile +from pathlib import Path +from typing import Optional + +import typer +from rich.console import Console +from rich.panel import Panel +from rich.progress import Progress, SpinnerColumn, TextColumn +from rich.syntax import Syntax + +from .ocr_screenshot import copy_to_clipboard, perform_ocr, take_region_screenshot, perform_ocr_with_annotation, take_region_screenshot_cross_platform + +# Create OCR app that can be imported as a subcommand +ocr_app = typer.Typer( + name="ocr", + help="Take a region screenshot, perform OCR, and copy result to clipboard", + rich_markup_mode="rich" +) + +console = Console() + + +@ocr_app.command("screenshot") +def screenshot_cmd( + lang: str = typer.Option( + default="eng", + help="Language code for OCR (default: eng)" + ), + save_image: bool = typer.Option( + default=False, + help="Save the screenshot image instead of deleting it" + ), + output_dir: Path = typer.Option( + default=Path.home() / "Desktop", + help="Directory to save screenshot if --save-image is used (default: ~/Desktop)" + ), + verbose: bool = typer.Option( + default=False, + help="Show verbose output" + ), + annotate: bool = typer.Option( + default=False, + help="Create an annotated version of the image showing detected text regions" + ), + show_words: bool = typer.Option( + default=True, + help="Show word-level bounding boxes in annotation (default: True)" + ), + show_lines: bool = typer.Option( + default=False, + help="Show line-level bounding boxes in annotation" + ), + show_blocks: bool = typer.Option( + default=False, + help="Show block-level bounding boxes in annotation" + ), + show_text: bool = typer.Option( + default=False, + help="Overlay detected text on the annotated image" + ), + screenshot_method: str = typer.Option( + default="auto", + help="Screenshot method to use: auto, mss, pyautogui, pillow, pyscreenshot, macos, interactive" + ), + monitor_number: int = typer.Option( + default=0, + help="Monitor number to capture (0=all monitors, 1+=specific monitor, only for MSS method)" + ) +): + """Take a region screenshot, perform OCR, and copy result to clipboard.""" + + # Create screenshot path + if save_image: + timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S") + screenshot_path = output_dir / f"ocr_screenshot_{timestamp}.png" + output_dir.mkdir(parents=True, exist_ok=True) + else: + temp_fd, temp_path = tempfile.mkstemp(suffix='.png') + os.close(temp_fd) + screenshot_path = Path(temp_path) + + try: + # Step 1: Take screenshot + if verbose: + console.print(f"\n[bold blue]📸 Taking screenshot using method: {screenshot_method}[/bold blue]") + + # Show method-specific instructions + if screenshot_method == "macos": + console.print(Panel( + "[bold]macOS Screenshot Instructions:[/bold]\n" + "• Drag to select a region\n" + "• Press [bold]Space[/bold] to capture entire window\n" + "• Press [bold]Escape[/bold] to cancel", + title="Screenshot Controls", + border_style="blue" + )) + elif screenshot_method == "interactive": + console.print(Panel( + "[bold]Interactive Screenshot Instructions:[/bold]\n" + "• Follow the prompts to select region corners\n" + "• Position mouse and press ENTER at each corner", + title="Screenshot Controls", + border_style="green" + )) + elif screenshot_method in ["mss", "pillow", "pyautogui", "pyscreenshot"]: + console.print(Panel( + f"[bold]{screenshot_method.upper()} Screenshot:[/bold]\n" + "• Full screen capture (region selection not supported in CLI yet)\n" + "• Use --screenshot-method interactive for region selection", + title="Screenshot Info", + border_style="yellow" + )) + else: + console.print(f"[bold blue]📸 Taking screenshot ({screenshot_method})...[/bold blue]") + + if not take_region_screenshot_cross_platform(str(screenshot_path), method=screenshot_method, monitor_number=monitor_number): + console.print(f"[bold red]❌ Screenshot failed with method '{screenshot_method}'.[/bold red]") + if screenshot_method == "auto": + console.print("[yellow]💡 Try installing additional screenshot libraries:[/yellow]") + console.print(" pip install mss pyautogui pyscreenshot") + raise typer.Exit(1) + + if verbose: + console.print(f"[green]✓ Screenshot saved to: {screenshot_path}[/green]") + + # Step 2: Perform OCR (with optional annotation) + with Progress( + SpinnerColumn(), + TextColumn("[progress.description]{task.description}"), + console=console, + transient=True + ) as progress: + task = progress.add_task("[bold cyan]🔍 Performing OCR...", total=None) + + if annotate: + # Create annotation output path + annotation_path = None + if save_image: + base_name = screenshot_path.stem + annotation_path = output_dir / f"{base_name}_annotated.png" + + extracted_text, annotated_image_path = perform_ocr_with_annotation( + str(screenshot_path), + lang, + create_annotated=True, + annotation_output_path=str(annotation_path) if annotation_path else None, + show_words=show_words, + show_lines=show_lines, + show_blocks=show_blocks, + show_text=show_text + ) + + if annotated_image_path and verbose: + console.print(f"[green]✓ Annotated image saved to: {annotated_image_path}[/green]") + else: + extracted_text = perform_ocr(str(screenshot_path), lang) + + progress.update(task, description="[green]✓ OCR complete") + + if not extracted_text: + console.print("[bold red]❌ No text found in the image.[/bold red]") + raise typer.Exit(1) + + # Step 3: Copy to clipboard + with Progress( + SpinnerColumn(), + TextColumn("[progress.description]{task.description}"), + console=console, + transient=True + ) as progress: + task = progress.add_task("[bold cyan]📋 Copying to clipboard...", total=None) + if copy_to_clipboard(extracted_text): + progress.update(task, description="[green]✓ Copied to clipboard") + else: + progress.update(task, description="[red]✗ Failed to copy") + console.print("[bold red]❌ Failed to copy to clipboard.[/bold red]") + console.print("\n[bold]Extracted text:[/bold]") + from rich.text import Text + plain_text = Text(extracted_text) + console.print(Panel(plain_text, border_style="yellow")) + raise typer.Exit(1) + + # Success message + success_msg = "\n[bold green]✅ Text extracted and copied to clipboard![/bold green]" + if annotate: + success_msg += "\n[bold blue]📝 Annotated image created showing detected text regions.[/bold blue]" + console.print(success_msg) + + if verbose: + console.print("\n[bold]Extracted text:[/bold]") + # Use syntax highlighting if it looks like code + if any(keyword in extracted_text.lower() for keyword in ['def ', 'function', 'class ', 'import ', 'from ', '{}', '[]']): + syntax = Syntax(extracted_text, "python", theme="monokai", line_numbers=False) + console.print(Panel(syntax, border_style="green")) + else: + # Treat as plain text to prevent Rich markup interpretation + from rich.text import Text + plain_text = Text(extracted_text) + console.print(Panel(plain_text, border_style="green")) + + except KeyboardInterrupt: + console.print("\n[bold yellow]⚠️ Operation cancelled by user.[/bold yellow]") + raise typer.Exit(1) + except Exception as e: + console.print(f"[bold red]❌ Unexpected error: {e}[/bold red]") + raise typer.Exit(1) + finally: + # Clean up temporary file if not saving + if not save_image and screenshot_path.exists(): + try: + screenshot_path.unlink() + except Exception: + pass # Ignore cleanup errors + + +# For backward compatibility when run directly +def cli_main(): + """Entry point for the OCR CLI script when run directly.""" + ocr_app() + + +if __name__ == "__main__": + ocr_app() \ No newline at end of file