t

2025-07-22 21:57:29 +08:00
parent 43b90e0d65
commit 0726aa60ed
2 changed files with 245 additions and 208 deletions
@@ -1,231 +1,35 @@
 #!/usr/bin/env python3
 """
-OCR Screenshot CLI Tool
+Main CLI for tooling package
-A command-line tool that takes a region screenshot on macOS,
+A command-line interface that provides various tools including OCR screenshot functionality.
 performs OCR using DocTR, and copies the result to clipboard.
 """
 import datetime
 import os
 import tempfile
 from pathlib import Path
 from typing import Optional
 import typer
 from rich.console import Console
 from rich.panel import Panel
 from rich.progress import Progress, SpinnerColumn, TextColumn
 from rich.syntax import Syntax
-from .ocr_screenshot import copy_to_clipboard, perform_ocr, take_region_screenshot, perform_ocr_with_annotation, take_region_screenshot_cross_platform
+from .ocr_cli import ocr_app
 # Create main app
 app = typer.Typer(
-    name="ocr-screenshot",
+    name="tooling",
-    help="Take a region screenshot, perform OCR, and copy result to clipboard",
+    help="A collection of command-line tools for productivity",
    rich_markup_mode="rich"
 )
 console = Console()
 # Add OCR subcommand
 app.add_typer(ocr_app, name="ocr", help="OCR screenshot tools")
@app.command()
-def main(
+def version():
-    lang: str = typer.Option(
+    """Show version information."""
-        default="eng",
+    console.print("[bold green]tooling[/bold green] [blue]v0.1.0[/blue]")
        help="Language code for OCR (default: eng)"
    ),
    save_image: bool = typer.Option(
        default=False,
        help="Save the screenshot image instead of deleting it"
    ),
    output_dir: Path = typer.Option(
        default=Path.home() / "Desktop",
        help="Directory to save screenshot if --save-image is used (default: ~/Desktop)"
    ),
    verbose: bool = typer.Option(
        default=False,
        help="Show verbose output"
    ),
    annotate: bool = typer.Option(
        default=False,
        help="Create an annotated version of the image showing detected text regions"
    ),
    show_words: bool = typer.Option(
        default=True,
        help="Show word-level bounding boxes in annotation (default: True)"
    ),
    show_lines: bool = typer.Option(
        default=False,
        help="Show line-level bounding boxes in annotation"
    ),
    show_blocks: bool = typer.Option(
        default=False,
        help="Show block-level bounding boxes in annotation"
    ),
    show_text: bool = typer.Option(
        default=False,
        help="Overlay detected text on the annotated image"
    ),
    screenshot_method: str = typer.Option(
        default="auto",
        help="Screenshot method to use: auto, mss, pyautogui, pillow, pyscreenshot, macos, interactive"
    ),
    monitor_number: int = typer.Option(
        default=0,
        help="Monitor number to capture (0=all monitors, 1+=specific monitor, only for MSS method)"
    )
 ):
    """Take a region screenshot, perform OCR, and copy result to clipboard."""
    # Create screenshot path
    if save_image:
        timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
        screenshot_path = output_dir / f"ocr_screenshot_{timestamp}.png"
        output_dir.mkdir(parents=True, exist_ok=True)
    else:
        temp_fd, temp_path = tempfile.mkstemp(suffix='.png')
        os.close(temp_fd)
        screenshot_path = Path(temp_path)
    try:
        # Step 1: Take screenshot
        if verbose:
            console.print(f"\n[bold blue]📸 Taking screenshot using method: {screenshot_method}[/bold blue]")
            # Show method-specific instructions
            if screenshot_method == "macos":
                console.print(Panel(
                    "[bold]macOS Screenshot Instructions:[/bold]\n"
                    "• Drag to select a region\n"
                    "• Press [bold]Space[/bold] to capture entire window\n"
                    "• Press [bold]Escape[/bold] to cancel",
                    title="Screenshot Controls",
                    border_style="blue"
                ))
            elif screenshot_method == "interactive":
                console.print(Panel(
                    "[bold]Interactive Screenshot Instructions:[/bold]\n"
                    "• Follow the prompts to select region corners\n"
                    "• Position mouse and press ENTER at each corner",
                    title="Screenshot Controls",
                    border_style="green"
                ))
            elif screenshot_method in ["mss", "pillow", "pyautogui", "pyscreenshot"]:
                console.print(Panel(
                    f"[bold]{screenshot_method.upper()} Screenshot:[/bold]\n"
                    "• Full screen capture (region selection not supported in CLI yet)\n"
                    "• Use --screenshot-method interactive for region selection",
                    title="Screenshot Info",
                    border_style="yellow"
                ))
        else:
            console.print(f"[bold blue]📸 Taking screenshot ({screenshot_method})...[/bold blue]")
        if not take_region_screenshot_cross_platform(str(screenshot_path), method=screenshot_method, monitor_number=monitor_number):
            console.print(f"[bold red]❌ Screenshot failed with method '{screenshot_method}'.[/bold red]")
            if screenshot_method == "auto":
                console.print("[yellow]💡 Try installing additional screenshot libraries:[/yellow]")
                console.print("   pip install mss pyautogui pyscreenshot")
            raise typer.Exit(1)
        if verbose:
            console.print(f"[green]✓ Screenshot saved to: {screenshot_path}[/green]")
        # Step 2: Perform OCR (with optional annotation)
        with Progress(
            SpinnerColumn(),
            TextColumn("[progress.description]{task.description}"),
            console=console,
            transient=True
        ) as progress:
            task = progress.add_task("[bold cyan]🔍 Performing OCR...", total=None)
            if annotate:
                # Create annotation output path
                annotation_path = None
                if save_image:
                    base_name = screenshot_path.stem
                    annotation_path = output_dir / f"{base_name}_annotated.png"
                extracted_text, annotated_image_path = perform_ocr_with_annotation(
                    str(screenshot_path), 
                    lang,
                    create_annotated=True,
                    annotation_output_path=str(annotation_path) if annotation_path else None,
                    show_words=show_words,
                    show_lines=show_lines,
                    show_blocks=show_blocks,
                    show_text=show_text
                )
                if annotated_image_path and verbose:
                    console.print(f"[green]✓ Annotated image saved to: {annotated_image_path}[/green]")
            else:
                extracted_text = perform_ocr(str(screenshot_path), lang)
            progress.update(task, description="[green]✓ OCR complete")
        if not extracted_text:
            console.print("[bold red]❌ No text found in the image.[/bold red]")
            raise typer.Exit(1)
        # Step 3: Copy to clipboard
        with Progress(
            SpinnerColumn(),
            TextColumn("[progress.description]{task.description}"),
            console=console,
            transient=True
        ) as progress:
            task = progress.add_task("[bold cyan]📋 Copying to clipboard...", total=None)
            if copy_to_clipboard(extracted_text):
                progress.update(task, description="[green]✓ Copied to clipboard")
            else:
                progress.update(task, description="[red]✗ Failed to copy")
                console.print("[bold red]❌ Failed to copy to clipboard.[/bold red]")
                console.print("\n[bold]Extracted text:[/bold]")
                from rich.text import Text
                plain_text = Text(extracted_text)
                console.print(Panel(plain_text, border_style="yellow"))
                raise typer.Exit(1)
        # Success message
        success_msg = "\n[bold green]✅ Text extracted and copied to clipboard![/bold green]"
        if annotate:
            success_msg += "\n[bold blue]📝 Annotated image created showing detected text regions.[/bold blue]"
        console.print(success_msg)
        if verbose:
            console.print("\n[bold]Extracted text:[/bold]")
            # Use syntax highlighting if it looks like code
            if any(keyword in extracted_text.lower() for keyword in ['def ', 'function', 'class ', 'import ', 'from ', '{}', '[]']):
                syntax = Syntax(extracted_text, "python", theme="monokai", line_numbers=False)
                console.print(Panel(syntax, border_style="green"))
            else:
                # Treat as plain text to prevent Rich markup interpretation
                from rich.text import Text
                plain_text = Text(extracted_text)
                console.print(Panel(plain_text, border_style="green"))
    except KeyboardInterrupt:
        console.print("\n[bold yellow]⚠️  Operation cancelled by user.[/bold yellow]")
        raise typer.Exit(1)
    except Exception as e:
        console.print(f"[bold red]❌ Unexpected error: {e}[/bold red]")
        raise typer.Exit(1)
    finally:
        # Clean up temporary file if not saving
        if not save_image and screenshot_path.exists():
            try:
                screenshot_path.unlink()
            except Exception:
                pass  # Ignore cleanup errors
 def cli_main():
    """Entry point for the CLI script."""
    app()
 if __name__ == "__main__":
    app()
@@ -0,0 +1,233 @@
 #!/usr/bin/env python3
 """
 OCR Screenshot CLI Tool
 A command-line tool that takes a region screenshot on macOS,
 performs OCR using DocTR, and copies the result to clipboard.
 """
 import datetime
 import os
 import tempfile
 from pathlib import Path
 from typing import Optional
 import typer
 from rich.console import Console
 from rich.panel import Panel
 from rich.progress import Progress, SpinnerColumn, TextColumn
 from rich.syntax import Syntax
 from .ocr_screenshot import copy_to_clipboard, perform_ocr, take_region_screenshot, perform_ocr_with_annotation, take_region_screenshot_cross_platform
 # Create OCR app that can be imported as a subcommand
 ocr_app = typer.Typer(
    name="ocr",
    help="Take a region screenshot, perform OCR, and copy result to clipboard",
    rich_markup_mode="rich"
 )
 console = Console()
@ocr_app.command("screenshot")
 def screenshot_cmd(
    lang: str = typer.Option(
        default="eng",
        help="Language code for OCR (default: eng)"
    ),
    save_image: bool = typer.Option(
        default=False,
        help="Save the screenshot image instead of deleting it"
    ),
    output_dir: Path = typer.Option(
        default=Path.home() / "Desktop",
        help="Directory to save screenshot if --save-image is used (default: ~/Desktop)"
    ),
    verbose: bool = typer.Option(
        default=False,
        help="Show verbose output"
    ),
    annotate: bool = typer.Option(
        default=False,
        help="Create an annotated version of the image showing detected text regions"
    ),
    show_words: bool = typer.Option(
        default=True,
        help="Show word-level bounding boxes in annotation (default: True)"
    ),
    show_lines: bool = typer.Option(
        default=False,
        help="Show line-level bounding boxes in annotation"
    ),
    show_blocks: bool = typer.Option(
        default=False,
        help="Show block-level bounding boxes in annotation"
    ),
    show_text: bool = typer.Option(
        default=False,
        help="Overlay detected text on the annotated image"
    ),
    screenshot_method: str = typer.Option(
        default="auto",
        help="Screenshot method to use: auto, mss, pyautogui, pillow, pyscreenshot, macos, interactive"
    ),
    monitor_number: int = typer.Option(
        default=0,
        help="Monitor number to capture (0=all monitors, 1+=specific monitor, only for MSS method)"
    )
 ):
    """Take a region screenshot, perform OCR, and copy result to clipboard."""
    # Create screenshot path
    if save_image:
        timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
        screenshot_path = output_dir / f"ocr_screenshot_{timestamp}.png"
        output_dir.mkdir(parents=True, exist_ok=True)
    else:
        temp_fd, temp_path = tempfile.mkstemp(suffix='.png')
        os.close(temp_fd)
        screenshot_path = Path(temp_path)
    try:
        # Step 1: Take screenshot
        if verbose:
            console.print(f"\n[bold blue]📸 Taking screenshot using method: {screenshot_method}[/bold blue]")
            # Show method-specific instructions
            if screenshot_method == "macos":
                console.print(Panel(
                    "[bold]macOS Screenshot Instructions:[/bold]\n"
                    "• Drag to select a region\n"
                    "• Press [bold]Space[/bold] to capture entire window\n"
                    "• Press [bold]Escape[/bold] to cancel",
                    title="Screenshot Controls",
                    border_style="blue"
                ))
            elif screenshot_method == "interactive":
                console.print(Panel(
                    "[bold]Interactive Screenshot Instructions:[/bold]\n"
                    "• Follow the prompts to select region corners\n"
                    "• Position mouse and press ENTER at each corner",
                    title="Screenshot Controls",
                    border_style="green"
                ))
            elif screenshot_method in ["mss", "pillow", "pyautogui", "pyscreenshot"]:
                console.print(Panel(
                    f"[bold]{screenshot_method.upper()} Screenshot:[/bold]\n"
                    "• Full screen capture (region selection not supported in CLI yet)\n"
                    "• Use --screenshot-method interactive for region selection",
                    title="Screenshot Info",
                    border_style="yellow"
                ))
        else:
            console.print(f"[bold blue]📸 Taking screenshot ({screenshot_method})...[/bold blue]")
        if not take_region_screenshot_cross_platform(str(screenshot_path), method=screenshot_method, monitor_number=monitor_number):
            console.print(f"[bold red]❌ Screenshot failed with method '{screenshot_method}'.[/bold red]")
            if screenshot_method == "auto":
                console.print("[yellow]💡 Try installing additional screenshot libraries:[/yellow]")
                console.print("   pip install mss pyautogui pyscreenshot")
            raise typer.Exit(1)
        if verbose:
            console.print(f"[green]✓ Screenshot saved to: {screenshot_path}[/green]")
        # Step 2: Perform OCR (with optional annotation)
        with Progress(
            SpinnerColumn(),
            TextColumn("[progress.description]{task.description}"),
            console=console,
            transient=True
        ) as progress:
            task = progress.add_task("[bold cyan]🔍 Performing OCR...", total=None)
            if annotate:
                # Create annotation output path
                annotation_path = None
                if save_image:
                    base_name = screenshot_path.stem
                    annotation_path = output_dir / f"{base_name}_annotated.png"
                extracted_text, annotated_image_path = perform_ocr_with_annotation(
                    str(screenshot_path), 
                    lang,
                    create_annotated=True,
                    annotation_output_path=str(annotation_path) if annotation_path else None,
                    show_words=show_words,
                    show_lines=show_lines,
                    show_blocks=show_blocks,
                    show_text=show_text
                )
                if annotated_image_path and verbose:
                    console.print(f"[green]✓ Annotated image saved to: {annotated_image_path}[/green]")
            else:
                extracted_text = perform_ocr(str(screenshot_path), lang)
            progress.update(task, description="[green]✓ OCR complete")
        if not extracted_text:
            console.print("[bold red]❌ No text found in the image.[/bold red]")
            raise typer.Exit(1)
        # Step 3: Copy to clipboard
        with Progress(
            SpinnerColumn(),
            TextColumn("[progress.description]{task.description}"),
            console=console,
            transient=True
        ) as progress:
            task = progress.add_task("[bold cyan]📋 Copying to clipboard...", total=None)
            if copy_to_clipboard(extracted_text):
                progress.update(task, description="[green]✓ Copied to clipboard")
            else:
                progress.update(task, description="[red]✗ Failed to copy")
                console.print("[bold red]❌ Failed to copy to clipboard.[/bold red]")
                console.print("\n[bold]Extracted text:[/bold]")
                from rich.text import Text
                plain_text = Text(extracted_text)
                console.print(Panel(plain_text, border_style="yellow"))
                raise typer.Exit(1)
        # Success message
        success_msg = "\n[bold green]✅ Text extracted and copied to clipboard![/bold green]"
        if annotate:
            success_msg += "\n[bold blue]📝 Annotated image created showing detected text regions.[/bold blue]"
        console.print(success_msg)
        if verbose:
            console.print("\n[bold]Extracted text:[/bold]")
            # Use syntax highlighting if it looks like code
            if any(keyword in extracted_text.lower() for keyword in ['def ', 'function', 'class ', 'import ', 'from ', '{}', '[]']):
                syntax = Syntax(extracted_text, "python", theme="monokai", line_numbers=False)
                console.print(Panel(syntax, border_style="green"))
            else:
                # Treat as plain text to prevent Rich markup interpretation
                from rich.text import Text
                plain_text = Text(extracted_text)
                console.print(Panel(plain_text, border_style="green"))
    except KeyboardInterrupt:
        console.print("\n[bold yellow]⚠️  Operation cancelled by user.[/bold yellow]")
        raise typer.Exit(1)
    except Exception as e:
        console.print(f"[bold red]❌ Unexpected error: {e}[/bold red]")
        raise typer.Exit(1)
    finally:
        # Clean up temporary file if not saving
        if not save_image and screenshot_path.exists():
            try:
                screenshot_path.unlink()
            except Exception:
                pass  # Ignore cleanup errors
 # For backward compatibility when run directly
 def cli_main():
    """Entry point for the OCR CLI script when run directly."""
    ocr_app()
 if __name__ == "__main__":
    ocr_app()