t

2025-07-22 21:57:29 +08:00
parent 43b90e0d65
commit 0726aa60ed
2 changed files with 245 additions and 208 deletions
@@ -1,231 +1,35 @@
 #!/usr/bin/env python3
 """
-OCR Screenshot CLI Tool
+Main CLI for tooling package

-A command-line tool that takes a region screenshot on macOS,
-performs OCR using DocTR, and copies the result to clipboard.
+A command-line interface that provides various tools including OCR screenshot functionality.
 """

-import datetime
-import os
-import tempfile
-from pathlib import Path
-from typing import Optional
-
 import typer
 from rich.console import Console
-from rich.panel import Panel
-from rich.progress import Progress, SpinnerColumn, TextColumn
-from rich.syntax import Syntax

-from .ocr_screenshot import copy_to_clipboard, perform_ocr, take_region_screenshot, perform_ocr_with_annotation, take_region_screenshot_cross_platform
+from .ocr_cli import ocr_app

+# Create main app
 app = typer.Typer(
-    name="ocr-screenshot",
-    help="Take a region screenshot, perform OCR, and copy result to clipboard",
+    name="tooling",
+    help="A collection of command-line tools for productivity",
    rich_markup_mode="rich"
 )

 console = Console()

+# Add OCR subcommand
+app.add_typer(ocr_app, name="ocr", help="OCR screenshot tools")

@app.command()
-def main(
-    lang: str = typer.Option(
-        default="eng",
-        help="Language code for OCR (default: eng)"
-    ),
-    save_image: bool = typer.Option(
-        default=False,
-        help="Save the screenshot image instead of deleting it"
-    ),
-    output_dir: Path = typer.Option(
-        default=Path.home() / "Desktop",
-        help="Directory to save screenshot if --save-image is used (default: ~/Desktop)"
-    ),
-    verbose: bool = typer.Option(
-        default=False,
-        help="Show verbose output"
-    ),
-    annotate: bool = typer.Option(
-        default=False,
-        help="Create an annotated version of the image showing detected text regions"
-    ),
-    show_words: bool = typer.Option(
-        default=True,
-        help="Show word-level bounding boxes in annotation (default: True)"
-    ),
-    show_lines: bool = typer.Option(
-        default=False,
-        help="Show line-level bounding boxes in annotation"
-    ),
-    show_blocks: bool = typer.Option(
-        default=False,
-        help="Show block-level bounding boxes in annotation"
-    ),
-    show_text: bool = typer.Option(
-        default=False,
-        help="Overlay detected text on the annotated image"
-    ),
-    screenshot_method: str = typer.Option(
-        default="auto",
-        help="Screenshot method to use: auto, mss, pyautogui, pillow, pyscreenshot, macos, interactive"
-    ),
-    monitor_number: int = typer.Option(
-        default=0,
-        help="Monitor number to capture (0=all monitors, 1+=specific monitor, only for MSS method)"
-    )
-):
-    """Take a region screenshot, perform OCR, and copy result to clipboard."""
-    
-    # Create screenshot path
-    if save_image:
-        timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
-        screenshot_path = output_dir / f"ocr_screenshot_{timestamp}.png"
-        output_dir.mkdir(parents=True, exist_ok=True)
-    else:
-        temp_fd, temp_path = tempfile.mkstemp(suffix='.png')
-        os.close(temp_fd)
-        screenshot_path = Path(temp_path)
-    
-    try:
-        # Step 1: Take screenshot
-        if verbose:
-            console.print(f"\n[bold blue]📸 Taking screenshot using method: {screenshot_method}[/bold blue]")
-            
-            # Show method-specific instructions
-            if screenshot_method == "macos":
-                console.print(Panel(
-                    "[bold]macOS Screenshot Instructions:[/bold]\n"
-                    "• Drag to select a region\n"
-                    "• Press [bold]Space[/bold] to capture entire window\n"
-                    "• Press [bold]Escape[/bold] to cancel",
-                    title="Screenshot Controls",
-                    border_style="blue"
-                ))
-            elif screenshot_method == "interactive":
-                console.print(Panel(
-                    "[bold]Interactive Screenshot Instructions:[/bold]\n"
-                    "• Follow the prompts to select region corners\n"
-                    "• Position mouse and press ENTER at each corner",
-                    title="Screenshot Controls",
-                    border_style="green"
-                ))
-            elif screenshot_method in ["mss", "pillow", "pyautogui", "pyscreenshot"]:
-                console.print(Panel(
-                    f"[bold]{screenshot_method.upper()} Screenshot:[/bold]\n"
-                    "• Full screen capture (region selection not supported in CLI yet)\n"
-                    "• Use --screenshot-method interactive for region selection",
-                    title="Screenshot Info",
-                    border_style="yellow"
-                ))
-        else:
-            console.print(f"[bold blue]📸 Taking screenshot ({screenshot_method})...[/bold blue]")
-        
-        if not take_region_screenshot_cross_platform(str(screenshot_path), method=screenshot_method, monitor_number=monitor_number):
-            console.print(f"[bold red]❌ Screenshot failed with method '{screenshot_method}'.[/bold red]")
-            if screenshot_method == "auto":
-                console.print("[yellow]💡 Try installing additional screenshot libraries:[/yellow]")
-                console.print("   pip install mss pyautogui pyscreenshot")
-            raise typer.Exit(1)
-        
-        if verbose:
-            console.print(f"[green]✓ Screenshot saved to: {screenshot_path}[/green]")
-        
-        # Step 2: Perform OCR (with optional annotation)
-        with Progress(
-            SpinnerColumn(),
-            TextColumn("[progress.description]{task.description}"),
-            console=console,
-            transient=True
-        ) as progress:
-            task = progress.add_task("[bold cyan]🔍 Performing OCR...", total=None)
-            
-            if annotate:
-                # Create annotation output path
-                annotation_path = None
-                if save_image:
-                    base_name = screenshot_path.stem
-                    annotation_path = output_dir / f"{base_name}_annotated.png"
-                
-                extracted_text, annotated_image_path = perform_ocr_with_annotation(
-                    str(screenshot_path), 
-                    lang,
-                    create_annotated=True,
-                    annotation_output_path=str(annotation_path) if annotation_path else None,
-                    show_words=show_words,
-                    show_lines=show_lines,
-                    show_blocks=show_blocks,
-                    show_text=show_text
-                )
-                
-                if annotated_image_path and verbose:
-                    console.print(f"[green]✓ Annotated image saved to: {annotated_image_path}[/green]")
-            else:
-                extracted_text = perform_ocr(str(screenshot_path), lang)
-                
-            progress.update(task, description="[green]✓ OCR complete")
-        
-        if not extracted_text:
-            console.print("[bold red]❌ No text found in the image.[/bold red]")
-            raise typer.Exit(1)
-        
-        # Step 3: Copy to clipboard
-        with Progress(
-            SpinnerColumn(),
-            TextColumn("[progress.description]{task.description}"),
-            console=console,
-            transient=True
-        ) as progress:
-            task = progress.add_task("[bold cyan]📋 Copying to clipboard...", total=None)
-            if copy_to_clipboard(extracted_text):
-                progress.update(task, description="[green]✓ Copied to clipboard")
-            else:
-                progress.update(task, description="[red]✗ Failed to copy")
-                console.print("[bold red]❌ Failed to copy to clipboard.[/bold red]")
-                console.print("\n[bold]Extracted text:[/bold]")
-                from rich.text import Text
-                plain_text = Text(extracted_text)
-                console.print(Panel(plain_text, border_style="yellow"))
-                raise typer.Exit(1)
-        
-        # Success message
-        success_msg = "\n[bold green]✅ Text extracted and copied to clipboard![/bold green]"
-        if annotate:
-            success_msg += "\n[bold blue]📝 Annotated image created showing detected text regions.[/bold blue]"
-        console.print(success_msg)
-        
-        if verbose:
-            console.print("\n[bold]Extracted text:[/bold]")
-            # Use syntax highlighting if it looks like code
-            if any(keyword in extracted_text.lower() for keyword in ['def ', 'function', 'class ', 'import ', 'from ', '{}', '[]']):
-                syntax = Syntax(extracted_text, "python", theme="monokai", line_numbers=False)
-                console.print(Panel(syntax, border_style="green"))
-            else:
-                # Treat as plain text to prevent Rich markup interpretation
-                from rich.text import Text
-                plain_text = Text(extracted_text)
-                console.print(Panel(plain_text, border_style="green"))
-        
-    except KeyboardInterrupt:
-        console.print("\n[bold yellow]⚠️  Operation cancelled by user.[/bold yellow]")
-        raise typer.Exit(1)
-    except Exception as e:
-        console.print(f"[bold red]❌ Unexpected error: {e}[/bold red]")
-        raise typer.Exit(1)
-    finally:
-        # Clean up temporary file if not saving
-        if not save_image and screenshot_path.exists():
-            try:
-                screenshot_path.unlink()
-            except Exception:
-                pass  # Ignore cleanup errors
-
+def version():
+    """Show version information."""
+    console.print("[bold green]tooling[/bold green] [blue]v0.1.0[/blue]")

 def cli_main():
    """Entry point for the CLI script."""
    app()

-
 if __name__ == "__main__":
-    app() 
+    app()
@@ -0,0 +1,233 @@
+#!/usr/bin/env python3
+"""
+OCR Screenshot CLI Tool
+
+A command-line tool that takes a region screenshot on macOS,
+performs OCR using DocTR, and copies the result to clipboard.
+"""
+
+import datetime
+import os
+import tempfile
+from pathlib import Path
+from typing import Optional
+
+import typer
+from rich.console import Console
+from rich.panel import Panel
+from rich.progress import Progress, SpinnerColumn, TextColumn
+from rich.syntax import Syntax
+
+from .ocr_screenshot import copy_to_clipboard, perform_ocr, take_region_screenshot, perform_ocr_with_annotation, take_region_screenshot_cross_platform
+
+# Create OCR app that can be imported as a subcommand
+ocr_app = typer.Typer(
+    name="ocr",
+    help="Take a region screenshot, perform OCR, and copy result to clipboard",
+    rich_markup_mode="rich"
+)
+
+console = Console()
+
+
+@ocr_app.command("screenshot")
+def screenshot_cmd(
+    lang: str = typer.Option(
+        default="eng",
+        help="Language code for OCR (default: eng)"
+    ),
+    save_image: bool = typer.Option(
+        default=False,
+        help="Save the screenshot image instead of deleting it"
+    ),
+    output_dir: Path = typer.Option(
+        default=Path.home() / "Desktop",
+        help="Directory to save screenshot if --save-image is used (default: ~/Desktop)"
+    ),
+    verbose: bool = typer.Option(
+        default=False,
+        help="Show verbose output"
+    ),
+    annotate: bool = typer.Option(
+        default=False,
+        help="Create an annotated version of the image showing detected text regions"
+    ),
+    show_words: bool = typer.Option(
+        default=True,
+        help="Show word-level bounding boxes in annotation (default: True)"
+    ),
+    show_lines: bool = typer.Option(
+        default=False,
+        help="Show line-level bounding boxes in annotation"
+    ),
+    show_blocks: bool = typer.Option(
+        default=False,
+        help="Show block-level bounding boxes in annotation"
+    ),
+    show_text: bool = typer.Option(
+        default=False,
+        help="Overlay detected text on the annotated image"
+    ),
+    screenshot_method: str = typer.Option(
+        default="auto",
+        help="Screenshot method to use: auto, mss, pyautogui, pillow, pyscreenshot, macos, interactive"
+    ),
+    monitor_number: int = typer.Option(
+        default=0,
+        help="Monitor number to capture (0=all monitors, 1+=specific monitor, only for MSS method)"
+    )
+):
+    """Take a region screenshot, perform OCR, and copy result to clipboard."""
+    
+    # Create screenshot path
+    if save_image:
+        timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
+        screenshot_path = output_dir / f"ocr_screenshot_{timestamp}.png"
+        output_dir.mkdir(parents=True, exist_ok=True)
+    else:
+        temp_fd, temp_path = tempfile.mkstemp(suffix='.png')
+        os.close(temp_fd)
+        screenshot_path = Path(temp_path)
+    
+    try:
+        # Step 1: Take screenshot
+        if verbose:
+            console.print(f"\n[bold blue]📸 Taking screenshot using method: {screenshot_method}[/bold blue]")
+            
+            # Show method-specific instructions
+            if screenshot_method == "macos":
+                console.print(Panel(
+                    "[bold]macOS Screenshot Instructions:[/bold]\n"
+                    "• Drag to select a region\n"
+                    "• Press [bold]Space[/bold] to capture entire window\n"
+                    "• Press [bold]Escape[/bold] to cancel",
+                    title="Screenshot Controls",
+                    border_style="blue"
+                ))
+            elif screenshot_method == "interactive":
+                console.print(Panel(
+                    "[bold]Interactive Screenshot Instructions:[/bold]\n"
+                    "• Follow the prompts to select region corners\n"
+                    "• Position mouse and press ENTER at each corner",
+                    title="Screenshot Controls",
+                    border_style="green"
+                ))
+            elif screenshot_method in ["mss", "pillow", "pyautogui", "pyscreenshot"]:
+                console.print(Panel(
+                    f"[bold]{screenshot_method.upper()} Screenshot:[/bold]\n"
+                    "• Full screen capture (region selection not supported in CLI yet)\n"
+                    "• Use --screenshot-method interactive for region selection",
+                    title="Screenshot Info",
+                    border_style="yellow"
+                ))
+        else:
+            console.print(f"[bold blue]📸 Taking screenshot ({screenshot_method})...[/bold blue]")
+        
+        if not take_region_screenshot_cross_platform(str(screenshot_path), method=screenshot_method, monitor_number=monitor_number):
+            console.print(f"[bold red]❌ Screenshot failed with method '{screenshot_method}'.[/bold red]")
+            if screenshot_method == "auto":
+                console.print("[yellow]💡 Try installing additional screenshot libraries:[/yellow]")
+                console.print("   pip install mss pyautogui pyscreenshot")
+            raise typer.Exit(1)
+        
+        if verbose:
+            console.print(f"[green]✓ Screenshot saved to: {screenshot_path}[/green]")
+        
+        # Step 2: Perform OCR (with optional annotation)
+        with Progress(
+            SpinnerColumn(),
+            TextColumn("[progress.description]{task.description}"),
+            console=console,
+            transient=True
+        ) as progress:
+            task = progress.add_task("[bold cyan]🔍 Performing OCR...", total=None)
+            
+            if annotate:
+                # Create annotation output path
+                annotation_path = None
+                if save_image:
+                    base_name = screenshot_path.stem
+                    annotation_path = output_dir / f"{base_name}_annotated.png"
+                
+                extracted_text, annotated_image_path = perform_ocr_with_annotation(
+                    str(screenshot_path), 
+                    lang,
+                    create_annotated=True,
+                    annotation_output_path=str(annotation_path) if annotation_path else None,
+                    show_words=show_words,
+                    show_lines=show_lines,
+                    show_blocks=show_blocks,
+                    show_text=show_text
+                )
+                
+                if annotated_image_path and verbose:
+                    console.print(f"[green]✓ Annotated image saved to: {annotated_image_path}[/green]")
+            else:
+                extracted_text = perform_ocr(str(screenshot_path), lang)
+                
+            progress.update(task, description="[green]✓ OCR complete")
+        
+        if not extracted_text:
+            console.print("[bold red]❌ No text found in the image.[/bold red]")
+            raise typer.Exit(1)
+        
+        # Step 3: Copy to clipboard
+        with Progress(
+            SpinnerColumn(),
+            TextColumn("[progress.description]{task.description}"),
+            console=console,
+            transient=True
+        ) as progress:
+            task = progress.add_task("[bold cyan]📋 Copying to clipboard...", total=None)
+            if copy_to_clipboard(extracted_text):
+                progress.update(task, description="[green]✓ Copied to clipboard")
+            else:
+                progress.update(task, description="[red]✗ Failed to copy")
+                console.print("[bold red]❌ Failed to copy to clipboard.[/bold red]")
+                console.print("\n[bold]Extracted text:[/bold]")
+                from rich.text import Text
+                plain_text = Text(extracted_text)
+                console.print(Panel(plain_text, border_style="yellow"))
+                raise typer.Exit(1)
+        
+        # Success message
+        success_msg = "\n[bold green]✅ Text extracted and copied to clipboard![/bold green]"
+        if annotate:
+            success_msg += "\n[bold blue]📝 Annotated image created showing detected text regions.[/bold blue]"
+        console.print(success_msg)
+        
+        if verbose:
+            console.print("\n[bold]Extracted text:[/bold]")
+            # Use syntax highlighting if it looks like code
+            if any(keyword in extracted_text.lower() for keyword in ['def ', 'function', 'class ', 'import ', 'from ', '{}', '[]']):
+                syntax = Syntax(extracted_text, "python", theme="monokai", line_numbers=False)
+                console.print(Panel(syntax, border_style="green"))
+            else:
+                # Treat as plain text to prevent Rich markup interpretation
+                from rich.text import Text
+                plain_text = Text(extracted_text)
+                console.print(Panel(plain_text, border_style="green"))
+        
+    except KeyboardInterrupt:
+        console.print("\n[bold yellow]⚠️  Operation cancelled by user.[/bold yellow]")
+        raise typer.Exit(1)
+    except Exception as e:
+        console.print(f"[bold red]❌ Unexpected error: {e}[/bold red]")
+        raise typer.Exit(1)
+    finally:
+        # Clean up temporary file if not saving
+        if not save_image and screenshot_path.exists():
+            try:
+                screenshot_path.unlink()
+            except Exception:
+                pass  # Ignore cleanup errors
+
+
+# For backward compatibility when run directly
+def cli_main():
+    """Entry point for the OCR CLI script when run directly."""
+    ocr_app()
+
+
+if __name__ == "__main__":
+    ocr_app()