This commit is contained in:
dingfeng.wong
2025-07-22 21:57:29 +08:00
parent 43b90e0d65
commit 0726aa60ed
2 changed files with 245 additions and 208 deletions
+12 -208
View File
@@ -1,231 +1,35 @@
#!/usr/bin/env python3
"""
OCR Screenshot CLI Tool
Main CLI for tooling package
A command-line tool that takes a region screenshot on macOS,
performs OCR using DocTR, and copies the result to clipboard.
A command-line interface that provides various tools including OCR screenshot functionality.
"""
import datetime
import os
import tempfile
from pathlib import Path
from typing import Optional
import typer
from rich.console import Console
from rich.panel import Panel
from rich.progress import Progress, SpinnerColumn, TextColumn
from rich.syntax import Syntax
from .ocr_screenshot import copy_to_clipboard, perform_ocr, take_region_screenshot, perform_ocr_with_annotation, take_region_screenshot_cross_platform
from .ocr_cli import ocr_app
# Create main app
app = typer.Typer(
name="ocr-screenshot",
help="Take a region screenshot, perform OCR, and copy result to clipboard",
name="tooling",
help="A collection of command-line tools for productivity",
rich_markup_mode="rich"
)
console = Console()
# Add OCR subcommand
app.add_typer(ocr_app, name="ocr", help="OCR screenshot tools")
@app.command()
def main(
lang: str = typer.Option(
default="eng",
help="Language code for OCR (default: eng)"
),
save_image: bool = typer.Option(
default=False,
help="Save the screenshot image instead of deleting it"
),
output_dir: Path = typer.Option(
default=Path.home() / "Desktop",
help="Directory to save screenshot if --save-image is used (default: ~/Desktop)"
),
verbose: bool = typer.Option(
default=False,
help="Show verbose output"
),
annotate: bool = typer.Option(
default=False,
help="Create an annotated version of the image showing detected text regions"
),
show_words: bool = typer.Option(
default=True,
help="Show word-level bounding boxes in annotation (default: True)"
),
show_lines: bool = typer.Option(
default=False,
help="Show line-level bounding boxes in annotation"
),
show_blocks: bool = typer.Option(
default=False,
help="Show block-level bounding boxes in annotation"
),
show_text: bool = typer.Option(
default=False,
help="Overlay detected text on the annotated image"
),
screenshot_method: str = typer.Option(
default="auto",
help="Screenshot method to use: auto, mss, pyautogui, pillow, pyscreenshot, macos, interactive"
),
monitor_number: int = typer.Option(
default=0,
help="Monitor number to capture (0=all monitors, 1+=specific monitor, only for MSS method)"
)
):
"""Take a region screenshot, perform OCR, and copy result to clipboard."""
# Create screenshot path
if save_image:
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
screenshot_path = output_dir / f"ocr_screenshot_{timestamp}.png"
output_dir.mkdir(parents=True, exist_ok=True)
else:
temp_fd, temp_path = tempfile.mkstemp(suffix='.png')
os.close(temp_fd)
screenshot_path = Path(temp_path)
try:
# Step 1: Take screenshot
if verbose:
console.print(f"\n[bold blue]📸 Taking screenshot using method: {screenshot_method}[/bold blue]")
# Show method-specific instructions
if screenshot_method == "macos":
console.print(Panel(
"[bold]macOS Screenshot Instructions:[/bold]\n"
"• Drag to select a region\n"
"• Press [bold]Space[/bold] to capture entire window\n"
"• Press [bold]Escape[/bold] to cancel",
title="Screenshot Controls",
border_style="blue"
))
elif screenshot_method == "interactive":
console.print(Panel(
"[bold]Interactive Screenshot Instructions:[/bold]\n"
"• Follow the prompts to select region corners\n"
"• Position mouse and press ENTER at each corner",
title="Screenshot Controls",
border_style="green"
))
elif screenshot_method in ["mss", "pillow", "pyautogui", "pyscreenshot"]:
console.print(Panel(
f"[bold]{screenshot_method.upper()} Screenshot:[/bold]\n"
"• Full screen capture (region selection not supported in CLI yet)\n"
"• Use --screenshot-method interactive for region selection",
title="Screenshot Info",
border_style="yellow"
))
else:
console.print(f"[bold blue]📸 Taking screenshot ({screenshot_method})...[/bold blue]")
if not take_region_screenshot_cross_platform(str(screenshot_path), method=screenshot_method, monitor_number=monitor_number):
console.print(f"[bold red]❌ Screenshot failed with method '{screenshot_method}'.[/bold red]")
if screenshot_method == "auto":
console.print("[yellow]💡 Try installing additional screenshot libraries:[/yellow]")
console.print(" pip install mss pyautogui pyscreenshot")
raise typer.Exit(1)
if verbose:
console.print(f"[green]✓ Screenshot saved to: {screenshot_path}[/green]")
# Step 2: Perform OCR (with optional annotation)
with Progress(
SpinnerColumn(),
TextColumn("[progress.description]{task.description}"),
console=console,
transient=True
) as progress:
task = progress.add_task("[bold cyan]🔍 Performing OCR...", total=None)
if annotate:
# Create annotation output path
annotation_path = None
if save_image:
base_name = screenshot_path.stem
annotation_path = output_dir / f"{base_name}_annotated.png"
extracted_text, annotated_image_path = perform_ocr_with_annotation(
str(screenshot_path),
lang,
create_annotated=True,
annotation_output_path=str(annotation_path) if annotation_path else None,
show_words=show_words,
show_lines=show_lines,
show_blocks=show_blocks,
show_text=show_text
)
if annotated_image_path and verbose:
console.print(f"[green]✓ Annotated image saved to: {annotated_image_path}[/green]")
else:
extracted_text = perform_ocr(str(screenshot_path), lang)
progress.update(task, description="[green]✓ OCR complete")
if not extracted_text:
console.print("[bold red]❌ No text found in the image.[/bold red]")
raise typer.Exit(1)
# Step 3: Copy to clipboard
with Progress(
SpinnerColumn(),
TextColumn("[progress.description]{task.description}"),
console=console,
transient=True
) as progress:
task = progress.add_task("[bold cyan]📋 Copying to clipboard...", total=None)
if copy_to_clipboard(extracted_text):
progress.update(task, description="[green]✓ Copied to clipboard")
else:
progress.update(task, description="[red]✗ Failed to copy")
console.print("[bold red]❌ Failed to copy to clipboard.[/bold red]")
console.print("\n[bold]Extracted text:[/bold]")
from rich.text import Text
plain_text = Text(extracted_text)
console.print(Panel(plain_text, border_style="yellow"))
raise typer.Exit(1)
# Success message
success_msg = "\n[bold green]✅ Text extracted and copied to clipboard![/bold green]"
if annotate:
success_msg += "\n[bold blue]📝 Annotated image created showing detected text regions.[/bold blue]"
console.print(success_msg)
if verbose:
console.print("\n[bold]Extracted text:[/bold]")
# Use syntax highlighting if it looks like code
if any(keyword in extracted_text.lower() for keyword in ['def ', 'function', 'class ', 'import ', 'from ', '{}', '[]']):
syntax = Syntax(extracted_text, "python", theme="monokai", line_numbers=False)
console.print(Panel(syntax, border_style="green"))
else:
# Treat as plain text to prevent Rich markup interpretation
from rich.text import Text
plain_text = Text(extracted_text)
console.print(Panel(plain_text, border_style="green"))
except KeyboardInterrupt:
console.print("\n[bold yellow]⚠️ Operation cancelled by user.[/bold yellow]")
raise typer.Exit(1)
except Exception as e:
console.print(f"[bold red]❌ Unexpected error: {e}[/bold red]")
raise typer.Exit(1)
finally:
# Clean up temporary file if not saving
if not save_image and screenshot_path.exists():
try:
screenshot_path.unlink()
except Exception:
pass # Ignore cleanup errors
def version():
"""Show version information."""
console.print("[bold green]tooling[/bold green] [blue]v0.1.0[/blue]")
def cli_main():
"""Entry point for the CLI script."""
app()
if __name__ == "__main__":
app()
app()
+233
View File
@@ -0,0 +1,233 @@
#!/usr/bin/env python3
"""
OCR Screenshot CLI Tool
A command-line tool that takes a region screenshot on macOS,
performs OCR using DocTR, and copies the result to clipboard.
"""
import datetime
import os
import tempfile
from pathlib import Path
from typing import Optional
import typer
from rich.console import Console
from rich.panel import Panel
from rich.progress import Progress, SpinnerColumn, TextColumn
from rich.syntax import Syntax
from .ocr_screenshot import copy_to_clipboard, perform_ocr, take_region_screenshot, perform_ocr_with_annotation, take_region_screenshot_cross_platform
# Create OCR app that can be imported as a subcommand
ocr_app = typer.Typer(
name="ocr",
help="Take a region screenshot, perform OCR, and copy result to clipboard",
rich_markup_mode="rich"
)
console = Console()
@ocr_app.command("screenshot")
def screenshot_cmd(
lang: str = typer.Option(
default="eng",
help="Language code for OCR (default: eng)"
),
save_image: bool = typer.Option(
default=False,
help="Save the screenshot image instead of deleting it"
),
output_dir: Path = typer.Option(
default=Path.home() / "Desktop",
help="Directory to save screenshot if --save-image is used (default: ~/Desktop)"
),
verbose: bool = typer.Option(
default=False,
help="Show verbose output"
),
annotate: bool = typer.Option(
default=False,
help="Create an annotated version of the image showing detected text regions"
),
show_words: bool = typer.Option(
default=True,
help="Show word-level bounding boxes in annotation (default: True)"
),
show_lines: bool = typer.Option(
default=False,
help="Show line-level bounding boxes in annotation"
),
show_blocks: bool = typer.Option(
default=False,
help="Show block-level bounding boxes in annotation"
),
show_text: bool = typer.Option(
default=False,
help="Overlay detected text on the annotated image"
),
screenshot_method: str = typer.Option(
default="auto",
help="Screenshot method to use: auto, mss, pyautogui, pillow, pyscreenshot, macos, interactive"
),
monitor_number: int = typer.Option(
default=0,
help="Monitor number to capture (0=all monitors, 1+=specific monitor, only for MSS method)"
)
):
"""Take a region screenshot, perform OCR, and copy result to clipboard."""
# Create screenshot path
if save_image:
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
screenshot_path = output_dir / f"ocr_screenshot_{timestamp}.png"
output_dir.mkdir(parents=True, exist_ok=True)
else:
temp_fd, temp_path = tempfile.mkstemp(suffix='.png')
os.close(temp_fd)
screenshot_path = Path(temp_path)
try:
# Step 1: Take screenshot
if verbose:
console.print(f"\n[bold blue]📸 Taking screenshot using method: {screenshot_method}[/bold blue]")
# Show method-specific instructions
if screenshot_method == "macos":
console.print(Panel(
"[bold]macOS Screenshot Instructions:[/bold]\n"
"• Drag to select a region\n"
"• Press [bold]Space[/bold] to capture entire window\n"
"• Press [bold]Escape[/bold] to cancel",
title="Screenshot Controls",
border_style="blue"
))
elif screenshot_method == "interactive":
console.print(Panel(
"[bold]Interactive Screenshot Instructions:[/bold]\n"
"• Follow the prompts to select region corners\n"
"• Position mouse and press ENTER at each corner",
title="Screenshot Controls",
border_style="green"
))
elif screenshot_method in ["mss", "pillow", "pyautogui", "pyscreenshot"]:
console.print(Panel(
f"[bold]{screenshot_method.upper()} Screenshot:[/bold]\n"
"• Full screen capture (region selection not supported in CLI yet)\n"
"• Use --screenshot-method interactive for region selection",
title="Screenshot Info",
border_style="yellow"
))
else:
console.print(f"[bold blue]📸 Taking screenshot ({screenshot_method})...[/bold blue]")
if not take_region_screenshot_cross_platform(str(screenshot_path), method=screenshot_method, monitor_number=monitor_number):
console.print(f"[bold red]❌ Screenshot failed with method '{screenshot_method}'.[/bold red]")
if screenshot_method == "auto":
console.print("[yellow]💡 Try installing additional screenshot libraries:[/yellow]")
console.print(" pip install mss pyautogui pyscreenshot")
raise typer.Exit(1)
if verbose:
console.print(f"[green]✓ Screenshot saved to: {screenshot_path}[/green]")
# Step 2: Perform OCR (with optional annotation)
with Progress(
SpinnerColumn(),
TextColumn("[progress.description]{task.description}"),
console=console,
transient=True
) as progress:
task = progress.add_task("[bold cyan]🔍 Performing OCR...", total=None)
if annotate:
# Create annotation output path
annotation_path = None
if save_image:
base_name = screenshot_path.stem
annotation_path = output_dir / f"{base_name}_annotated.png"
extracted_text, annotated_image_path = perform_ocr_with_annotation(
str(screenshot_path),
lang,
create_annotated=True,
annotation_output_path=str(annotation_path) if annotation_path else None,
show_words=show_words,
show_lines=show_lines,
show_blocks=show_blocks,
show_text=show_text
)
if annotated_image_path and verbose:
console.print(f"[green]✓ Annotated image saved to: {annotated_image_path}[/green]")
else:
extracted_text = perform_ocr(str(screenshot_path), lang)
progress.update(task, description="[green]✓ OCR complete")
if not extracted_text:
console.print("[bold red]❌ No text found in the image.[/bold red]")
raise typer.Exit(1)
# Step 3: Copy to clipboard
with Progress(
SpinnerColumn(),
TextColumn("[progress.description]{task.description}"),
console=console,
transient=True
) as progress:
task = progress.add_task("[bold cyan]📋 Copying to clipboard...", total=None)
if copy_to_clipboard(extracted_text):
progress.update(task, description="[green]✓ Copied to clipboard")
else:
progress.update(task, description="[red]✗ Failed to copy")
console.print("[bold red]❌ Failed to copy to clipboard.[/bold red]")
console.print("\n[bold]Extracted text:[/bold]")
from rich.text import Text
plain_text = Text(extracted_text)
console.print(Panel(plain_text, border_style="yellow"))
raise typer.Exit(1)
# Success message
success_msg = "\n[bold green]✅ Text extracted and copied to clipboard![/bold green]"
if annotate:
success_msg += "\n[bold blue]📝 Annotated image created showing detected text regions.[/bold blue]"
console.print(success_msg)
if verbose:
console.print("\n[bold]Extracted text:[/bold]")
# Use syntax highlighting if it looks like code
if any(keyword in extracted_text.lower() for keyword in ['def ', 'function', 'class ', 'import ', 'from ', '{}', '[]']):
syntax = Syntax(extracted_text, "python", theme="monokai", line_numbers=False)
console.print(Panel(syntax, border_style="green"))
else:
# Treat as plain text to prevent Rich markup interpretation
from rich.text import Text
plain_text = Text(extracted_text)
console.print(Panel(plain_text, border_style="green"))
except KeyboardInterrupt:
console.print("\n[bold yellow]⚠️ Operation cancelled by user.[/bold yellow]")
raise typer.Exit(1)
except Exception as e:
console.print(f"[bold red]❌ Unexpected error: {e}[/bold red]")
raise typer.Exit(1)
finally:
# Clean up temporary file if not saving
if not save_image and screenshot_path.exists():
try:
screenshot_path.unlink()
except Exception:
pass # Ignore cleanup errors
# For backward compatibility when run directly
def cli_main():
"""Entry point for the OCR CLI script when run directly."""
ocr_app()
if __name__ == "__main__":
ocr_app()