t
This commit is contained in:
+11
-207
@@ -1,231 +1,35 @@
|
|||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
"""
|
"""
|
||||||
OCR Screenshot CLI Tool
|
Main CLI for tooling package
|
||||||
|
|
||||||
A command-line tool that takes a region screenshot on macOS,
|
A command-line interface that provides various tools including OCR screenshot functionality.
|
||||||
performs OCR using DocTR, and copies the result to clipboard.
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import datetime
|
|
||||||
import os
|
|
||||||
import tempfile
|
|
||||||
from pathlib import Path
|
|
||||||
from typing import Optional
|
|
||||||
|
|
||||||
import typer
|
import typer
|
||||||
from rich.console import Console
|
from rich.console import Console
|
||||||
from rich.panel import Panel
|
|
||||||
from rich.progress import Progress, SpinnerColumn, TextColumn
|
|
||||||
from rich.syntax import Syntax
|
|
||||||
|
|
||||||
from .ocr_screenshot import copy_to_clipboard, perform_ocr, take_region_screenshot, perform_ocr_with_annotation, take_region_screenshot_cross_platform
|
from .ocr_cli import ocr_app
|
||||||
|
|
||||||
|
# Create main app
|
||||||
app = typer.Typer(
|
app = typer.Typer(
|
||||||
name="ocr-screenshot",
|
name="tooling",
|
||||||
help="Take a region screenshot, perform OCR, and copy result to clipboard",
|
help="A collection of command-line tools for productivity",
|
||||||
rich_markup_mode="rich"
|
rich_markup_mode="rich"
|
||||||
)
|
)
|
||||||
|
|
||||||
console = Console()
|
console = Console()
|
||||||
|
|
||||||
|
# Add OCR subcommand
|
||||||
|
app.add_typer(ocr_app, name="ocr", help="OCR screenshot tools")
|
||||||
|
|
||||||
@app.command()
|
@app.command()
|
||||||
def main(
|
def version():
|
||||||
lang: str = typer.Option(
|
"""Show version information."""
|
||||||
default="eng",
|
console.print("[bold green]tooling[/bold green] [blue]v0.1.0[/blue]")
|
||||||
help="Language code for OCR (default: eng)"
|
|
||||||
),
|
|
||||||
save_image: bool = typer.Option(
|
|
||||||
default=False,
|
|
||||||
help="Save the screenshot image instead of deleting it"
|
|
||||||
),
|
|
||||||
output_dir: Path = typer.Option(
|
|
||||||
default=Path.home() / "Desktop",
|
|
||||||
help="Directory to save screenshot if --save-image is used (default: ~/Desktop)"
|
|
||||||
),
|
|
||||||
verbose: bool = typer.Option(
|
|
||||||
default=False,
|
|
||||||
help="Show verbose output"
|
|
||||||
),
|
|
||||||
annotate: bool = typer.Option(
|
|
||||||
default=False,
|
|
||||||
help="Create an annotated version of the image showing detected text regions"
|
|
||||||
),
|
|
||||||
show_words: bool = typer.Option(
|
|
||||||
default=True,
|
|
||||||
help="Show word-level bounding boxes in annotation (default: True)"
|
|
||||||
),
|
|
||||||
show_lines: bool = typer.Option(
|
|
||||||
default=False,
|
|
||||||
help="Show line-level bounding boxes in annotation"
|
|
||||||
),
|
|
||||||
show_blocks: bool = typer.Option(
|
|
||||||
default=False,
|
|
||||||
help="Show block-level bounding boxes in annotation"
|
|
||||||
),
|
|
||||||
show_text: bool = typer.Option(
|
|
||||||
default=False,
|
|
||||||
help="Overlay detected text on the annotated image"
|
|
||||||
),
|
|
||||||
screenshot_method: str = typer.Option(
|
|
||||||
default="auto",
|
|
||||||
help="Screenshot method to use: auto, mss, pyautogui, pillow, pyscreenshot, macos, interactive"
|
|
||||||
),
|
|
||||||
monitor_number: int = typer.Option(
|
|
||||||
default=0,
|
|
||||||
help="Monitor number to capture (0=all monitors, 1+=specific monitor, only for MSS method)"
|
|
||||||
)
|
|
||||||
):
|
|
||||||
"""Take a region screenshot, perform OCR, and copy result to clipboard."""
|
|
||||||
|
|
||||||
# Create screenshot path
|
|
||||||
if save_image:
|
|
||||||
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
||||||
screenshot_path = output_dir / f"ocr_screenshot_{timestamp}.png"
|
|
||||||
output_dir.mkdir(parents=True, exist_ok=True)
|
|
||||||
else:
|
|
||||||
temp_fd, temp_path = tempfile.mkstemp(suffix='.png')
|
|
||||||
os.close(temp_fd)
|
|
||||||
screenshot_path = Path(temp_path)
|
|
||||||
|
|
||||||
try:
|
|
||||||
# Step 1: Take screenshot
|
|
||||||
if verbose:
|
|
||||||
console.print(f"\n[bold blue]📸 Taking screenshot using method: {screenshot_method}[/bold blue]")
|
|
||||||
|
|
||||||
# Show method-specific instructions
|
|
||||||
if screenshot_method == "macos":
|
|
||||||
console.print(Panel(
|
|
||||||
"[bold]macOS Screenshot Instructions:[/bold]\n"
|
|
||||||
"• Drag to select a region\n"
|
|
||||||
"• Press [bold]Space[/bold] to capture entire window\n"
|
|
||||||
"• Press [bold]Escape[/bold] to cancel",
|
|
||||||
title="Screenshot Controls",
|
|
||||||
border_style="blue"
|
|
||||||
))
|
|
||||||
elif screenshot_method == "interactive":
|
|
||||||
console.print(Panel(
|
|
||||||
"[bold]Interactive Screenshot Instructions:[/bold]\n"
|
|
||||||
"• Follow the prompts to select region corners\n"
|
|
||||||
"• Position mouse and press ENTER at each corner",
|
|
||||||
title="Screenshot Controls",
|
|
||||||
border_style="green"
|
|
||||||
))
|
|
||||||
elif screenshot_method in ["mss", "pillow", "pyautogui", "pyscreenshot"]:
|
|
||||||
console.print(Panel(
|
|
||||||
f"[bold]{screenshot_method.upper()} Screenshot:[/bold]\n"
|
|
||||||
"• Full screen capture (region selection not supported in CLI yet)\n"
|
|
||||||
"• Use --screenshot-method interactive for region selection",
|
|
||||||
title="Screenshot Info",
|
|
||||||
border_style="yellow"
|
|
||||||
))
|
|
||||||
else:
|
|
||||||
console.print(f"[bold blue]📸 Taking screenshot ({screenshot_method})...[/bold blue]")
|
|
||||||
|
|
||||||
if not take_region_screenshot_cross_platform(str(screenshot_path), method=screenshot_method, monitor_number=monitor_number):
|
|
||||||
console.print(f"[bold red]❌ Screenshot failed with method '{screenshot_method}'.[/bold red]")
|
|
||||||
if screenshot_method == "auto":
|
|
||||||
console.print("[yellow]💡 Try installing additional screenshot libraries:[/yellow]")
|
|
||||||
console.print(" pip install mss pyautogui pyscreenshot")
|
|
||||||
raise typer.Exit(1)
|
|
||||||
|
|
||||||
if verbose:
|
|
||||||
console.print(f"[green]✓ Screenshot saved to: {screenshot_path}[/green]")
|
|
||||||
|
|
||||||
# Step 2: Perform OCR (with optional annotation)
|
|
||||||
with Progress(
|
|
||||||
SpinnerColumn(),
|
|
||||||
TextColumn("[progress.description]{task.description}"),
|
|
||||||
console=console,
|
|
||||||
transient=True
|
|
||||||
) as progress:
|
|
||||||
task = progress.add_task("[bold cyan]🔍 Performing OCR...", total=None)
|
|
||||||
|
|
||||||
if annotate:
|
|
||||||
# Create annotation output path
|
|
||||||
annotation_path = None
|
|
||||||
if save_image:
|
|
||||||
base_name = screenshot_path.stem
|
|
||||||
annotation_path = output_dir / f"{base_name}_annotated.png"
|
|
||||||
|
|
||||||
extracted_text, annotated_image_path = perform_ocr_with_annotation(
|
|
||||||
str(screenshot_path),
|
|
||||||
lang,
|
|
||||||
create_annotated=True,
|
|
||||||
annotation_output_path=str(annotation_path) if annotation_path else None,
|
|
||||||
show_words=show_words,
|
|
||||||
show_lines=show_lines,
|
|
||||||
show_blocks=show_blocks,
|
|
||||||
show_text=show_text
|
|
||||||
)
|
|
||||||
|
|
||||||
if annotated_image_path and verbose:
|
|
||||||
console.print(f"[green]✓ Annotated image saved to: {annotated_image_path}[/green]")
|
|
||||||
else:
|
|
||||||
extracted_text = perform_ocr(str(screenshot_path), lang)
|
|
||||||
|
|
||||||
progress.update(task, description="[green]✓ OCR complete")
|
|
||||||
|
|
||||||
if not extracted_text:
|
|
||||||
console.print("[bold red]❌ No text found in the image.[/bold red]")
|
|
||||||
raise typer.Exit(1)
|
|
||||||
|
|
||||||
# Step 3: Copy to clipboard
|
|
||||||
with Progress(
|
|
||||||
SpinnerColumn(),
|
|
||||||
TextColumn("[progress.description]{task.description}"),
|
|
||||||
console=console,
|
|
||||||
transient=True
|
|
||||||
) as progress:
|
|
||||||
task = progress.add_task("[bold cyan]📋 Copying to clipboard...", total=None)
|
|
||||||
if copy_to_clipboard(extracted_text):
|
|
||||||
progress.update(task, description="[green]✓ Copied to clipboard")
|
|
||||||
else:
|
|
||||||
progress.update(task, description="[red]✗ Failed to copy")
|
|
||||||
console.print("[bold red]❌ Failed to copy to clipboard.[/bold red]")
|
|
||||||
console.print("\n[bold]Extracted text:[/bold]")
|
|
||||||
from rich.text import Text
|
|
||||||
plain_text = Text(extracted_text)
|
|
||||||
console.print(Panel(plain_text, border_style="yellow"))
|
|
||||||
raise typer.Exit(1)
|
|
||||||
|
|
||||||
# Success message
|
|
||||||
success_msg = "\n[bold green]✅ Text extracted and copied to clipboard![/bold green]"
|
|
||||||
if annotate:
|
|
||||||
success_msg += "\n[bold blue]📝 Annotated image created showing detected text regions.[/bold blue]"
|
|
||||||
console.print(success_msg)
|
|
||||||
|
|
||||||
if verbose:
|
|
||||||
console.print("\n[bold]Extracted text:[/bold]")
|
|
||||||
# Use syntax highlighting if it looks like code
|
|
||||||
if any(keyword in extracted_text.lower() for keyword in ['def ', 'function', 'class ', 'import ', 'from ', '{}', '[]']):
|
|
||||||
syntax = Syntax(extracted_text, "python", theme="monokai", line_numbers=False)
|
|
||||||
console.print(Panel(syntax, border_style="green"))
|
|
||||||
else:
|
|
||||||
# Treat as plain text to prevent Rich markup interpretation
|
|
||||||
from rich.text import Text
|
|
||||||
plain_text = Text(extracted_text)
|
|
||||||
console.print(Panel(plain_text, border_style="green"))
|
|
||||||
|
|
||||||
except KeyboardInterrupt:
|
|
||||||
console.print("\n[bold yellow]⚠️ Operation cancelled by user.[/bold yellow]")
|
|
||||||
raise typer.Exit(1)
|
|
||||||
except Exception as e:
|
|
||||||
console.print(f"[bold red]❌ Unexpected error: {e}[/bold red]")
|
|
||||||
raise typer.Exit(1)
|
|
||||||
finally:
|
|
||||||
# Clean up temporary file if not saving
|
|
||||||
if not save_image and screenshot_path.exists():
|
|
||||||
try:
|
|
||||||
screenshot_path.unlink()
|
|
||||||
except Exception:
|
|
||||||
pass # Ignore cleanup errors
|
|
||||||
|
|
||||||
|
|
||||||
def cli_main():
|
def cli_main():
|
||||||
"""Entry point for the CLI script."""
|
"""Entry point for the CLI script."""
|
||||||
app()
|
app()
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
app()
|
app()
|
||||||
@@ -0,0 +1,233 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
OCR Screenshot CLI Tool
|
||||||
|
|
||||||
|
A command-line tool that takes a region screenshot on macOS,
|
||||||
|
performs OCR using DocTR, and copies the result to clipboard.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import datetime
|
||||||
|
import os
|
||||||
|
import tempfile
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
import typer
|
||||||
|
from rich.console import Console
|
||||||
|
from rich.panel import Panel
|
||||||
|
from rich.progress import Progress, SpinnerColumn, TextColumn
|
||||||
|
from rich.syntax import Syntax
|
||||||
|
|
||||||
|
from .ocr_screenshot import copy_to_clipboard, perform_ocr, take_region_screenshot, perform_ocr_with_annotation, take_region_screenshot_cross_platform
|
||||||
|
|
||||||
|
# Create OCR app that can be imported as a subcommand
|
||||||
|
ocr_app = typer.Typer(
|
||||||
|
name="ocr",
|
||||||
|
help="Take a region screenshot, perform OCR, and copy result to clipboard",
|
||||||
|
rich_markup_mode="rich"
|
||||||
|
)
|
||||||
|
|
||||||
|
console = Console()
|
||||||
|
|
||||||
|
|
||||||
|
@ocr_app.command("screenshot")
|
||||||
|
def screenshot_cmd(
|
||||||
|
lang: str = typer.Option(
|
||||||
|
default="eng",
|
||||||
|
help="Language code for OCR (default: eng)"
|
||||||
|
),
|
||||||
|
save_image: bool = typer.Option(
|
||||||
|
default=False,
|
||||||
|
help="Save the screenshot image instead of deleting it"
|
||||||
|
),
|
||||||
|
output_dir: Path = typer.Option(
|
||||||
|
default=Path.home() / "Desktop",
|
||||||
|
help="Directory to save screenshot if --save-image is used (default: ~/Desktop)"
|
||||||
|
),
|
||||||
|
verbose: bool = typer.Option(
|
||||||
|
default=False,
|
||||||
|
help="Show verbose output"
|
||||||
|
),
|
||||||
|
annotate: bool = typer.Option(
|
||||||
|
default=False,
|
||||||
|
help="Create an annotated version of the image showing detected text regions"
|
||||||
|
),
|
||||||
|
show_words: bool = typer.Option(
|
||||||
|
default=True,
|
||||||
|
help="Show word-level bounding boxes in annotation (default: True)"
|
||||||
|
),
|
||||||
|
show_lines: bool = typer.Option(
|
||||||
|
default=False,
|
||||||
|
help="Show line-level bounding boxes in annotation"
|
||||||
|
),
|
||||||
|
show_blocks: bool = typer.Option(
|
||||||
|
default=False,
|
||||||
|
help="Show block-level bounding boxes in annotation"
|
||||||
|
),
|
||||||
|
show_text: bool = typer.Option(
|
||||||
|
default=False,
|
||||||
|
help="Overlay detected text on the annotated image"
|
||||||
|
),
|
||||||
|
screenshot_method: str = typer.Option(
|
||||||
|
default="auto",
|
||||||
|
help="Screenshot method to use: auto, mss, pyautogui, pillow, pyscreenshot, macos, interactive"
|
||||||
|
),
|
||||||
|
monitor_number: int = typer.Option(
|
||||||
|
default=0,
|
||||||
|
help="Monitor number to capture (0=all monitors, 1+=specific monitor, only for MSS method)"
|
||||||
|
)
|
||||||
|
):
|
||||||
|
"""Take a region screenshot, perform OCR, and copy result to clipboard."""
|
||||||
|
|
||||||
|
# Create screenshot path
|
||||||
|
if save_image:
|
||||||
|
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||||
|
screenshot_path = output_dir / f"ocr_screenshot_{timestamp}.png"
|
||||||
|
output_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
else:
|
||||||
|
temp_fd, temp_path = tempfile.mkstemp(suffix='.png')
|
||||||
|
os.close(temp_fd)
|
||||||
|
screenshot_path = Path(temp_path)
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Step 1: Take screenshot
|
||||||
|
if verbose:
|
||||||
|
console.print(f"\n[bold blue]📸 Taking screenshot using method: {screenshot_method}[/bold blue]")
|
||||||
|
|
||||||
|
# Show method-specific instructions
|
||||||
|
if screenshot_method == "macos":
|
||||||
|
console.print(Panel(
|
||||||
|
"[bold]macOS Screenshot Instructions:[/bold]\n"
|
||||||
|
"• Drag to select a region\n"
|
||||||
|
"• Press [bold]Space[/bold] to capture entire window\n"
|
||||||
|
"• Press [bold]Escape[/bold] to cancel",
|
||||||
|
title="Screenshot Controls",
|
||||||
|
border_style="blue"
|
||||||
|
))
|
||||||
|
elif screenshot_method == "interactive":
|
||||||
|
console.print(Panel(
|
||||||
|
"[bold]Interactive Screenshot Instructions:[/bold]\n"
|
||||||
|
"• Follow the prompts to select region corners\n"
|
||||||
|
"• Position mouse and press ENTER at each corner",
|
||||||
|
title="Screenshot Controls",
|
||||||
|
border_style="green"
|
||||||
|
))
|
||||||
|
elif screenshot_method in ["mss", "pillow", "pyautogui", "pyscreenshot"]:
|
||||||
|
console.print(Panel(
|
||||||
|
f"[bold]{screenshot_method.upper()} Screenshot:[/bold]\n"
|
||||||
|
"• Full screen capture (region selection not supported in CLI yet)\n"
|
||||||
|
"• Use --screenshot-method interactive for region selection",
|
||||||
|
title="Screenshot Info",
|
||||||
|
border_style="yellow"
|
||||||
|
))
|
||||||
|
else:
|
||||||
|
console.print(f"[bold blue]📸 Taking screenshot ({screenshot_method})...[/bold blue]")
|
||||||
|
|
||||||
|
if not take_region_screenshot_cross_platform(str(screenshot_path), method=screenshot_method, monitor_number=monitor_number):
|
||||||
|
console.print(f"[bold red]❌ Screenshot failed with method '{screenshot_method}'.[/bold red]")
|
||||||
|
if screenshot_method == "auto":
|
||||||
|
console.print("[yellow]💡 Try installing additional screenshot libraries:[/yellow]")
|
||||||
|
console.print(" pip install mss pyautogui pyscreenshot")
|
||||||
|
raise typer.Exit(1)
|
||||||
|
|
||||||
|
if verbose:
|
||||||
|
console.print(f"[green]✓ Screenshot saved to: {screenshot_path}[/green]")
|
||||||
|
|
||||||
|
# Step 2: Perform OCR (with optional annotation)
|
||||||
|
with Progress(
|
||||||
|
SpinnerColumn(),
|
||||||
|
TextColumn("[progress.description]{task.description}"),
|
||||||
|
console=console,
|
||||||
|
transient=True
|
||||||
|
) as progress:
|
||||||
|
task = progress.add_task("[bold cyan]🔍 Performing OCR...", total=None)
|
||||||
|
|
||||||
|
if annotate:
|
||||||
|
# Create annotation output path
|
||||||
|
annotation_path = None
|
||||||
|
if save_image:
|
||||||
|
base_name = screenshot_path.stem
|
||||||
|
annotation_path = output_dir / f"{base_name}_annotated.png"
|
||||||
|
|
||||||
|
extracted_text, annotated_image_path = perform_ocr_with_annotation(
|
||||||
|
str(screenshot_path),
|
||||||
|
lang,
|
||||||
|
create_annotated=True,
|
||||||
|
annotation_output_path=str(annotation_path) if annotation_path else None,
|
||||||
|
show_words=show_words,
|
||||||
|
show_lines=show_lines,
|
||||||
|
show_blocks=show_blocks,
|
||||||
|
show_text=show_text
|
||||||
|
)
|
||||||
|
|
||||||
|
if annotated_image_path and verbose:
|
||||||
|
console.print(f"[green]✓ Annotated image saved to: {annotated_image_path}[/green]")
|
||||||
|
else:
|
||||||
|
extracted_text = perform_ocr(str(screenshot_path), lang)
|
||||||
|
|
||||||
|
progress.update(task, description="[green]✓ OCR complete")
|
||||||
|
|
||||||
|
if not extracted_text:
|
||||||
|
console.print("[bold red]❌ No text found in the image.[/bold red]")
|
||||||
|
raise typer.Exit(1)
|
||||||
|
|
||||||
|
# Step 3: Copy to clipboard
|
||||||
|
with Progress(
|
||||||
|
SpinnerColumn(),
|
||||||
|
TextColumn("[progress.description]{task.description}"),
|
||||||
|
console=console,
|
||||||
|
transient=True
|
||||||
|
) as progress:
|
||||||
|
task = progress.add_task("[bold cyan]📋 Copying to clipboard...", total=None)
|
||||||
|
if copy_to_clipboard(extracted_text):
|
||||||
|
progress.update(task, description="[green]✓ Copied to clipboard")
|
||||||
|
else:
|
||||||
|
progress.update(task, description="[red]✗ Failed to copy")
|
||||||
|
console.print("[bold red]❌ Failed to copy to clipboard.[/bold red]")
|
||||||
|
console.print("\n[bold]Extracted text:[/bold]")
|
||||||
|
from rich.text import Text
|
||||||
|
plain_text = Text(extracted_text)
|
||||||
|
console.print(Panel(plain_text, border_style="yellow"))
|
||||||
|
raise typer.Exit(1)
|
||||||
|
|
||||||
|
# Success message
|
||||||
|
success_msg = "\n[bold green]✅ Text extracted and copied to clipboard![/bold green]"
|
||||||
|
if annotate:
|
||||||
|
success_msg += "\n[bold blue]📝 Annotated image created showing detected text regions.[/bold blue]"
|
||||||
|
console.print(success_msg)
|
||||||
|
|
||||||
|
if verbose:
|
||||||
|
console.print("\n[bold]Extracted text:[/bold]")
|
||||||
|
# Use syntax highlighting if it looks like code
|
||||||
|
if any(keyword in extracted_text.lower() for keyword in ['def ', 'function', 'class ', 'import ', 'from ', '{}', '[]']):
|
||||||
|
syntax = Syntax(extracted_text, "python", theme="monokai", line_numbers=False)
|
||||||
|
console.print(Panel(syntax, border_style="green"))
|
||||||
|
else:
|
||||||
|
# Treat as plain text to prevent Rich markup interpretation
|
||||||
|
from rich.text import Text
|
||||||
|
plain_text = Text(extracted_text)
|
||||||
|
console.print(Panel(plain_text, border_style="green"))
|
||||||
|
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
console.print("\n[bold yellow]⚠️ Operation cancelled by user.[/bold yellow]")
|
||||||
|
raise typer.Exit(1)
|
||||||
|
except Exception as e:
|
||||||
|
console.print(f"[bold red]❌ Unexpected error: {e}[/bold red]")
|
||||||
|
raise typer.Exit(1)
|
||||||
|
finally:
|
||||||
|
# Clean up temporary file if not saving
|
||||||
|
if not save_image and screenshot_path.exists():
|
||||||
|
try:
|
||||||
|
screenshot_path.unlink()
|
||||||
|
except Exception:
|
||||||
|
pass # Ignore cleanup errors
|
||||||
|
|
||||||
|
|
||||||
|
# For backward compatibility when run directly
|
||||||
|
def cli_main():
|
||||||
|
"""Entry point for the OCR CLI script when run directly."""
|
||||||
|
ocr_app()
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
ocr_app()
|
||||||
Reference in New Issue
Block a user