add ocr

2025-07-22 01:17:49 +08:00
commit 5f0d1d63de
12 changed files with 7165 additions and 0 deletions
@@ -0,0 +1,29 @@
+# Python-generated files
+__pycache__/
+*.py[oc]
+build/
+dist/
+wheels/
+*.egg-info
+
+# Virtual environments
+.venv
+# Python project specific ignores (absolute paths from /Users/wongdingfeng/.config/tooling)
+/Users/wongdingfeng/.config/tooling/.venv/
+/Users/wongdingfeng/.config/tooling/__pycache__/
+/Users/wongdingfeng/.config/tooling/*.pyc
+/Users/wongdingfeng/.config/tooling/*.pyo
+/Users/wongdingfeng/.config/tooling/*.pyd
+/Users/wongdingfeng/.config/tooling/.pytest_cache/
+/Users/wongdingfeng/.config/tooling/.mypy_cache/
+/Users/wongdingfeng/.config/tooling/.ruff_cache/
+/Users/wongdingfeng/.config/tooling/dist/
+/Users/wongdingfeng/.config/tooling/build/
+/Users/wongdingfeng/.config/tooling/*.egg-info/
+/Users/wongdingfeng/.config/tooling/offline_packages/
+/Users/wongdingfeng/.config/tooling/.coverage
+/Users/wongdingfeng/.config/tooling/htmlcov/
+/Users/wongdingfeng/.config/tooling/.tox/
+/Users/wongdingfeng/.config/tooling/.env
+/Users/wongdingfeng/.config/tooling/.DS_Store
+/Users/wongdingfeng/.config/tooling/Thumbs.db
@@ -0,0 +1 @@
+3.11
@@ -0,0 +1 @@
+aghB8gTFIq7cp8wI
@@ -0,0 +1,81 @@
+
+# Tooling
+
+A collection of useful command-line tools.
+
+## OCR Screenshot Tool
+
+A CLI tool that takes region screenshots on macOS, performs OCR using Tesseract, and copies the result to clipboard.
+
+### Prerequisites
+
+- macOS (uses built-in `screencapture` command)
+- Tesseract OCR (install with `brew install tesseract`)
+
+### Usage
+
+Basic usage (takes screenshot, performs OCR, copies to clipboard):
+```bash
+uv run ocr-screenshot
+```
+
+With verbose output:
+```bash
+uv run ocr-screenshot --verbose
+```
+
+Save the screenshot image:
+```bash
+uv run ocr-screenshot --save-image
+```
+
+Specify OCR language (e.g., for Chinese):
+```bash
+uv run ocr-screenshot --lang chi_sim
+```
+
+### How it works
+
+1. **Screenshot**: Click and drag to select a region, or press Space to capture an entire window
+2. **OCR**: The selected region is processed with Tesseract OCR
+3. **Clipboard**: The extracted text is automatically copied to your clipboard
+
+## Development Guide
+
+### How to Add New Packages
+
+To add a new production dependency (e.g., 'requests'):
+```bash
+uv add requests
+```
+
+To add a new development dependency (e.g., 'ipdb'):
+```bash
+uv add --dev ipdb
+```
+
+After adding dependencies, always re-generate requirements.txt:
+```bash
+uv pip compile pyproject.toml -o requirements.txt
+```
+
+### How to Build Packages
+
+To build your project's distributable packages (.whl, .tar.gz):
+```bash
+python -m build
+```
+
+Or using the virtual environment directly:
+```bash
+./venv/bin/python -m build
+```
+
+### Offline Build
+
+To build offline packages for deployment:
+```bash
+./dev_scripts/build_offline.sh
+```
+
+This will create offline_packages/ with all dependencies and install.sh
@@ -0,0 +1,14 @@
+#!/usr/bin/env bash
+
+PROJECT_PATH="${1:-/Users/wongdingfeng/.config/tooling}"
+OFFLINE_PACKAGES_DIR="$PROJECT_PATH/offline_packages"
+
+source "$PROJECT_PATH/.venv/bin/activate"
+
+"$PROJECT_PATH/.venv/bin/pip" download -r "$PROJECT_PATH/requirements.txt" --dest "$OFFLINE_PACKAGES_DIR"
+uv build --wheel
+cp "$PROJECT_PATH/dist/"*.whl "$OFFLINE_PACKAGES_DIR/install.whl"
+
+cat << EOF > "$OFFLINE_PACKAGES_DIR/install.sh"
+pip install --no-index --find-links=./ install.whl
+EOF
@@ -0,0 +1,28 @@
+[project]
+name = "tooling"
+version = "0.1.0"
+description = "Add your description here"
+readme = "README.md"
+authors = [
+    { name = "dingfeng.wong", email = "dingfeng.wong@okg.com" }
+]
+requires-python = ">=3.11"
+dependencies = [
+    "pillow>=11.1.0",
+    "pyperclip>=1.9.0",
+    "pytesseract>=0.3.13",
+    "typer>=0.12.0",
+    "rich>=13.0.0",
+]
+
+[project.scripts]
+ocr-screenshot = "tooling.cli:main"
+
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[dependency-groups]
+dev = [
+    "open-webui>=0.6.5",
+]
@@ -0,0 +1,2 @@
+# This file was autogenerated by uv via the following command:
+#    uv pip compile pyproject.toml -o requirements.txt
@@ -0,0 +1,2 @@
+def hello() -> str:
+    return "Hello from tooling!"
@@ -0,0 +1,149 @@
+#!/usr/bin/env python3
+"""
+OCR Screenshot CLI Tool
+
+A command-line tool that takes a region screenshot on macOS,
+performs OCR using Tesseract, and copies the result to clipboard.
+"""
+
+import datetime
+import os
+import tempfile
+from pathlib import Path
+from typing import Optional
+
+import typer
+from rich.console import Console
+from rich.panel import Panel
+from rich.progress import Progress, SpinnerColumn, TextColumn
+from rich.syntax import Syntax
+
+from .ocr_screenshot import copy_to_clipboard, perform_ocr, take_region_screenshot
+
+app = typer.Typer(
+    name="ocr-screenshot",
+    help="Take a region screenshot, perform OCR, and copy result to clipboard",
+    rich_markup_mode="rich"
+)
+
+console = Console()
+
+
+@app.command()
+def main(
+    lang: str = typer.Option(
+        "eng",
+        "--lang",
+        help="Language code for OCR (default: eng)"
+    ),
+    save_image: bool = typer.Option(
+        False,
+        "--save-image",
+        help="Save the screenshot image instead of deleting it"
+    ),
+    output_dir: Path = typer.Option(
+        Path.home() / "Desktop",
+        "--output-dir",
+        help="Directory to save screenshot if --save-image is used (default: ~/Desktop)"
+    ),
+    verbose: bool = typer.Option(
+        False,
+        "--verbose", "-v",
+        help="Show verbose output"
+    )
+):
+    """Take a region screenshot, perform OCR, and copy result to clipboard."""
+    
+    # Create screenshot path
+    if save_image:
+        timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
+        screenshot_path = output_dir / f"ocr_screenshot_{timestamp}.png"
+        output_dir.mkdir(parents=True, exist_ok=True)
+    else:
+        temp_fd, temp_path = tempfile.mkstemp(suffix='.png')
+        os.close(temp_fd)
+        screenshot_path = Path(temp_path)
+    
+    try:
+        # Step 1: Take screenshot
+        if verbose:
+            console.print("\n[bold blue]📸 Taking region screenshot...[/bold blue]")
+            console.print(Panel(
+                "[bold]Instructions:[/bold]\n"
+                "• Drag to select a region\n"
+                "• Press [bold]Space[/bold] to capture entire window\n"
+                "• Press [bold]Escape[/bold] to cancel",
+                title="Screenshot Controls",
+                border_style="blue"
+            ))
+        else:
+            console.print("[bold blue]📸 Taking screenshot...[/bold blue]")
+        
+        if not take_region_screenshot(str(screenshot_path)):
+            console.print("[bold red]❌ Screenshot cancelled or failed.[/bold red]")
+            raise typer.Exit(1)
+        
+        if verbose:
+            console.print(f"[green]✓ Screenshot saved to: {screenshot_path}[/green]")
+        
+        # Step 2: Perform OCR
+        with Progress(
+            SpinnerColumn(),
+            TextColumn("[progress.description]{task.description}"),
+            console=console,
+            transient=True
+        ) as progress:
+            task = progress.add_task("[bold cyan]🔍 Performing OCR...", total=None)
+            extracted_text = perform_ocr(str(screenshot_path), lang)
+            progress.update(task, description="[green]✓ OCR complete")
+        
+        if not extracted_text:
+            console.print("[bold red]❌ No text found in the image.[/bold red]")
+            raise typer.Exit(1)
+        
+        # Step 3: Copy to clipboard
+        with Progress(
+            SpinnerColumn(),
+            TextColumn("[progress.description]{task.description}"),
+            console=console,
+            transient=True
+        ) as progress:
+            task = progress.add_task("[bold cyan]📋 Copying to clipboard...", total=None)
+            if copy_to_clipboard(extracted_text):
+                progress.update(task, description="[green]✓ Copied to clipboard")
+            else:
+                progress.update(task, description="[red]✗ Failed to copy")
+                console.print("[bold red]❌ Failed to copy to clipboard.[/bold red]")
+                console.print("\n[bold]Extracted text:[/bold]")
+                console.print(Panel(extracted_text, border_style="yellow"))
+                raise typer.Exit(1)
+        
+        # Success message
+        console.print("\n[bold green]✅ Text extracted and copied to clipboard![/bold green]")
+        
+        if verbose:
+            console.print("\n[bold]Extracted text:[/bold]")
+            # Use syntax highlighting if it looks like code
+            if any(keyword in extracted_text.lower() for keyword in ['def ', 'function', 'class ', 'import ', 'from ', '{}', '[]']):
+                syntax = Syntax(extracted_text, "python", theme="monokai", line_numbers=False)
+                console.print(Panel(syntax, border_style="green"))
+            else:
+                console.print(Panel(extracted_text, border_style="green"))
+        
+    except KeyboardInterrupt:
+        console.print("\n[bold yellow]⚠️  Operation cancelled by user.[/bold yellow]")
+        raise typer.Exit(1)
+    except Exception as e:
+        console.print(f"[bold red]❌ Unexpected error: {e}[/bold red]")
+        raise typer.Exit(1)
+    finally:
+        # Clean up temporary file if not saving
+        if not save_image and screenshot_path.exists():
+            try:
+                screenshot_path.unlink()
+            except Exception:
+                pass  # Ignore cleanup errors
+
+
+if __name__ == "__main__":
+    app() 
@@ -0,0 +1,89 @@
+#!/usr/bin/env python3
+"""
+OCR Screenshot Core Functions
+
+Core functionality for taking screenshots, performing OCR, and clipboard operations.
+"""
+
+import os
+import subprocess
+
+import pytesseract
+import pyperclip
+from PIL import Image
+
+
+def take_region_screenshot(output_path: str) -> bool:
+    """
+    Take a region screenshot on macOS using the built-in screencapture command.
+    
+    Args:
+        output_path: Path where the screenshot will be saved
+        
+    Returns:
+        True if screenshot was taken successfully, False otherwise
+    """
+    try:
+        # Use screencapture with -s flag for selection mode and -i for interactive mode
+        result = subprocess.run([
+            'screencapture', 
+            '-s',  # Selection mode (drag to select region)
+            '-i',  # Interactive mode (space to select window, escape to cancel)
+            output_path
+        ], check=True, capture_output=True, text=True)
+        
+        # Check if file was actually created (user might have cancelled)
+        return os.path.exists(output_path) and os.path.getsize(output_path) > 0
+        
+    except subprocess.CalledProcessError as e:
+        print(f"Error taking screenshot: {e}")
+        return False
+    except Exception as e:
+        print(f"Unexpected error: {e}")
+        return False
+
+
+def perform_ocr(image_path: str, lang: str = 'eng') -> str:
+    """
+    Perform OCR on the given image using Tesseract.
+    
+    Args:
+        image_path: Path to the image file
+        lang: Language code for OCR (default: 'eng')
+        
+    Returns:
+        Extracted text from the image
+    """
+    try:
+        # Open the image
+        image = Image.open(image_path)
+        
+        # Perform OCR
+        text = pytesseract.image_to_string(image, lang=lang)
+        
+        return text.strip()
+        
+    except Exception as e:
+        print(f"Error performing OCR: {e}")
+        return ""
+
+
+def copy_to_clipboard(text: str) -> bool:
+    """
+    Copy text to clipboard.
+    
+    Args:
+        text: Text to copy to clipboard
+        
+    Returns:
+        True if successful, False otherwise
+    """
+    try:
+        pyperclip.copy(text)
+        return True
+    except Exception as e:
+        print(f"Error copying to clipboard: {e}")
+        return False
+
+
+