add ocr
This commit is contained in:
+29
@@ -0,0 +1,29 @@
|
|||||||
|
# Python-generated files
|
||||||
|
__pycache__/
|
||||||
|
*.py[oc]
|
||||||
|
build/
|
||||||
|
dist/
|
||||||
|
wheels/
|
||||||
|
*.egg-info
|
||||||
|
|
||||||
|
# Virtual environments
|
||||||
|
.venv
|
||||||
|
# Python project specific ignores (absolute paths from /Users/wongdingfeng/.config/tooling)
|
||||||
|
/Users/wongdingfeng/.config/tooling/.venv/
|
||||||
|
/Users/wongdingfeng/.config/tooling/__pycache__/
|
||||||
|
/Users/wongdingfeng/.config/tooling/*.pyc
|
||||||
|
/Users/wongdingfeng/.config/tooling/*.pyo
|
||||||
|
/Users/wongdingfeng/.config/tooling/*.pyd
|
||||||
|
/Users/wongdingfeng/.config/tooling/.pytest_cache/
|
||||||
|
/Users/wongdingfeng/.config/tooling/.mypy_cache/
|
||||||
|
/Users/wongdingfeng/.config/tooling/.ruff_cache/
|
||||||
|
/Users/wongdingfeng/.config/tooling/dist/
|
||||||
|
/Users/wongdingfeng/.config/tooling/build/
|
||||||
|
/Users/wongdingfeng/.config/tooling/*.egg-info/
|
||||||
|
/Users/wongdingfeng/.config/tooling/offline_packages/
|
||||||
|
/Users/wongdingfeng/.config/tooling/.coverage
|
||||||
|
/Users/wongdingfeng/.config/tooling/htmlcov/
|
||||||
|
/Users/wongdingfeng/.config/tooling/.tox/
|
||||||
|
/Users/wongdingfeng/.config/tooling/.env
|
||||||
|
/Users/wongdingfeng/.config/tooling/.DS_Store
|
||||||
|
/Users/wongdingfeng/.config/tooling/Thumbs.db
|
||||||
@@ -0,0 +1 @@
|
|||||||
|
3.11
|
||||||
@@ -0,0 +1 @@
|
|||||||
|
aghB8gTFIq7cp8wI
|
||||||
@@ -0,0 +1,81 @@
|
|||||||
|
|
||||||
|
# Tooling
|
||||||
|
|
||||||
|
A collection of useful command-line tools.
|
||||||
|
|
||||||
|
## OCR Screenshot Tool
|
||||||
|
|
||||||
|
A CLI tool that takes region screenshots on macOS, performs OCR using Tesseract, and copies the result to clipboard.
|
||||||
|
|
||||||
|
### Prerequisites
|
||||||
|
|
||||||
|
- macOS (uses built-in `screencapture` command)
|
||||||
|
- Tesseract OCR (install with `brew install tesseract`)
|
||||||
|
|
||||||
|
### Usage
|
||||||
|
|
||||||
|
Basic usage (takes screenshot, performs OCR, copies to clipboard):
|
||||||
|
```bash
|
||||||
|
uv run ocr-screenshot
|
||||||
|
```
|
||||||
|
|
||||||
|
With verbose output:
|
||||||
|
```bash
|
||||||
|
uv run ocr-screenshot --verbose
|
||||||
|
```
|
||||||
|
|
||||||
|
Save the screenshot image:
|
||||||
|
```bash
|
||||||
|
uv run ocr-screenshot --save-image
|
||||||
|
```
|
||||||
|
|
||||||
|
Specify OCR language (e.g., for Chinese):
|
||||||
|
```bash
|
||||||
|
uv run ocr-screenshot --lang chi_sim
|
||||||
|
```
|
||||||
|
|
||||||
|
### How it works
|
||||||
|
|
||||||
|
1. **Screenshot**: Click and drag to select a region, or press Space to capture an entire window
|
||||||
|
2. **OCR**: The selected region is processed with Tesseract OCR
|
||||||
|
3. **Clipboard**: The extracted text is automatically copied to your clipboard
|
||||||
|
|
||||||
|
## Development Guide
|
||||||
|
|
||||||
|
### How to Add New Packages
|
||||||
|
|
||||||
|
To add a new production dependency (e.g., 'requests'):
|
||||||
|
```bash
|
||||||
|
uv add requests
|
||||||
|
```
|
||||||
|
|
||||||
|
To add a new development dependency (e.g., 'ipdb'):
|
||||||
|
```bash
|
||||||
|
uv add --dev ipdb
|
||||||
|
```
|
||||||
|
|
||||||
|
After adding dependencies, always re-generate requirements.txt:
|
||||||
|
```bash
|
||||||
|
uv pip compile pyproject.toml -o requirements.txt
|
||||||
|
```
|
||||||
|
|
||||||
|
### How to Build Packages
|
||||||
|
|
||||||
|
To build your project's distributable packages (.whl, .tar.gz):
|
||||||
|
```bash
|
||||||
|
python -m build
|
||||||
|
```
|
||||||
|
|
||||||
|
Or using the virtual environment directly:
|
||||||
|
```bash
|
||||||
|
./venv/bin/python -m build
|
||||||
|
```
|
||||||
|
|
||||||
|
### Offline Build
|
||||||
|
|
||||||
|
To build offline packages for deployment:
|
||||||
|
```bash
|
||||||
|
./dev_scripts/build_offline.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
This will create offline_packages/ with all dependencies and install.sh
|
||||||
Executable
+14
@@ -0,0 +1,14 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
PROJECT_PATH="${1:-/Users/wongdingfeng/.config/tooling}"
|
||||||
|
OFFLINE_PACKAGES_DIR="$PROJECT_PATH/offline_packages"
|
||||||
|
|
||||||
|
source "$PROJECT_PATH/.venv/bin/activate"
|
||||||
|
|
||||||
|
"$PROJECT_PATH/.venv/bin/pip" download -r "$PROJECT_PATH/requirements.txt" --dest "$OFFLINE_PACKAGES_DIR"
|
||||||
|
uv build --wheel
|
||||||
|
cp "$PROJECT_PATH/dist/"*.whl "$OFFLINE_PACKAGES_DIR/install.whl"
|
||||||
|
|
||||||
|
cat << EOF > "$OFFLINE_PACKAGES_DIR/install.sh"
|
||||||
|
pip install --no-index --find-links=./ install.whl
|
||||||
|
EOF
|
||||||
@@ -0,0 +1,28 @@
|
|||||||
|
[project]
|
||||||
|
name = "tooling"
|
||||||
|
version = "0.1.0"
|
||||||
|
description = "Add your description here"
|
||||||
|
readme = "README.md"
|
||||||
|
authors = [
|
||||||
|
{ name = "dingfeng.wong", email = "dingfeng.wong@okg.com" }
|
||||||
|
]
|
||||||
|
requires-python = ">=3.11"
|
||||||
|
dependencies = [
|
||||||
|
"pillow>=11.1.0",
|
||||||
|
"pyperclip>=1.9.0",
|
||||||
|
"pytesseract>=0.3.13",
|
||||||
|
"typer>=0.12.0",
|
||||||
|
"rich>=13.0.0",
|
||||||
|
]
|
||||||
|
|
||||||
|
[project.scripts]
|
||||||
|
ocr-screenshot = "tooling.cli:main"
|
||||||
|
|
||||||
|
[build-system]
|
||||||
|
requires = ["hatchling"]
|
||||||
|
build-backend = "hatchling.build"
|
||||||
|
|
||||||
|
[dependency-groups]
|
||||||
|
dev = [
|
||||||
|
"open-webui>=0.6.5",
|
||||||
|
]
|
||||||
@@ -0,0 +1,2 @@
|
|||||||
|
# This file was autogenerated by uv via the following command:
|
||||||
|
# uv pip compile pyproject.toml -o requirements.txt
|
||||||
@@ -0,0 +1,2 @@
|
|||||||
|
def hello() -> str:
|
||||||
|
return "Hello from tooling!"
|
||||||
@@ -0,0 +1,149 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
OCR Screenshot CLI Tool
|
||||||
|
|
||||||
|
A command-line tool that takes a region screenshot on macOS,
|
||||||
|
performs OCR using Tesseract, and copies the result to clipboard.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import datetime
|
||||||
|
import os
|
||||||
|
import tempfile
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
import typer
|
||||||
|
from rich.console import Console
|
||||||
|
from rich.panel import Panel
|
||||||
|
from rich.progress import Progress, SpinnerColumn, TextColumn
|
||||||
|
from rich.syntax import Syntax
|
||||||
|
|
||||||
|
from .ocr_screenshot import copy_to_clipboard, perform_ocr, take_region_screenshot
|
||||||
|
|
||||||
|
app = typer.Typer(
|
||||||
|
name="ocr-screenshot",
|
||||||
|
help="Take a region screenshot, perform OCR, and copy result to clipboard",
|
||||||
|
rich_markup_mode="rich"
|
||||||
|
)
|
||||||
|
|
||||||
|
console = Console()
|
||||||
|
|
||||||
|
|
||||||
|
@app.command()
|
||||||
|
def main(
|
||||||
|
lang: str = typer.Option(
|
||||||
|
"eng",
|
||||||
|
"--lang",
|
||||||
|
help="Language code for OCR (default: eng)"
|
||||||
|
),
|
||||||
|
save_image: bool = typer.Option(
|
||||||
|
False,
|
||||||
|
"--save-image",
|
||||||
|
help="Save the screenshot image instead of deleting it"
|
||||||
|
),
|
||||||
|
output_dir: Path = typer.Option(
|
||||||
|
Path.home() / "Desktop",
|
||||||
|
"--output-dir",
|
||||||
|
help="Directory to save screenshot if --save-image is used (default: ~/Desktop)"
|
||||||
|
),
|
||||||
|
verbose: bool = typer.Option(
|
||||||
|
False,
|
||||||
|
"--verbose", "-v",
|
||||||
|
help="Show verbose output"
|
||||||
|
)
|
||||||
|
):
|
||||||
|
"""Take a region screenshot, perform OCR, and copy result to clipboard."""
|
||||||
|
|
||||||
|
# Create screenshot path
|
||||||
|
if save_image:
|
||||||
|
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||||
|
screenshot_path = output_dir / f"ocr_screenshot_{timestamp}.png"
|
||||||
|
output_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
else:
|
||||||
|
temp_fd, temp_path = tempfile.mkstemp(suffix='.png')
|
||||||
|
os.close(temp_fd)
|
||||||
|
screenshot_path = Path(temp_path)
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Step 1: Take screenshot
|
||||||
|
if verbose:
|
||||||
|
console.print("\n[bold blue]📸 Taking region screenshot...[/bold blue]")
|
||||||
|
console.print(Panel(
|
||||||
|
"[bold]Instructions:[/bold]\n"
|
||||||
|
"• Drag to select a region\n"
|
||||||
|
"• Press [bold]Space[/bold] to capture entire window\n"
|
||||||
|
"• Press [bold]Escape[/bold] to cancel",
|
||||||
|
title="Screenshot Controls",
|
||||||
|
border_style="blue"
|
||||||
|
))
|
||||||
|
else:
|
||||||
|
console.print("[bold blue]📸 Taking screenshot...[/bold blue]")
|
||||||
|
|
||||||
|
if not take_region_screenshot(str(screenshot_path)):
|
||||||
|
console.print("[bold red]❌ Screenshot cancelled or failed.[/bold red]")
|
||||||
|
raise typer.Exit(1)
|
||||||
|
|
||||||
|
if verbose:
|
||||||
|
console.print(f"[green]✓ Screenshot saved to: {screenshot_path}[/green]")
|
||||||
|
|
||||||
|
# Step 2: Perform OCR
|
||||||
|
with Progress(
|
||||||
|
SpinnerColumn(),
|
||||||
|
TextColumn("[progress.description]{task.description}"),
|
||||||
|
console=console,
|
||||||
|
transient=True
|
||||||
|
) as progress:
|
||||||
|
task = progress.add_task("[bold cyan]🔍 Performing OCR...", total=None)
|
||||||
|
extracted_text = perform_ocr(str(screenshot_path), lang)
|
||||||
|
progress.update(task, description="[green]✓ OCR complete")
|
||||||
|
|
||||||
|
if not extracted_text:
|
||||||
|
console.print("[bold red]❌ No text found in the image.[/bold red]")
|
||||||
|
raise typer.Exit(1)
|
||||||
|
|
||||||
|
# Step 3: Copy to clipboard
|
||||||
|
with Progress(
|
||||||
|
SpinnerColumn(),
|
||||||
|
TextColumn("[progress.description]{task.description}"),
|
||||||
|
console=console,
|
||||||
|
transient=True
|
||||||
|
) as progress:
|
||||||
|
task = progress.add_task("[bold cyan]📋 Copying to clipboard...", total=None)
|
||||||
|
if copy_to_clipboard(extracted_text):
|
||||||
|
progress.update(task, description="[green]✓ Copied to clipboard")
|
||||||
|
else:
|
||||||
|
progress.update(task, description="[red]✗ Failed to copy")
|
||||||
|
console.print("[bold red]❌ Failed to copy to clipboard.[/bold red]")
|
||||||
|
console.print("\n[bold]Extracted text:[/bold]")
|
||||||
|
console.print(Panel(extracted_text, border_style="yellow"))
|
||||||
|
raise typer.Exit(1)
|
||||||
|
|
||||||
|
# Success message
|
||||||
|
console.print("\n[bold green]✅ Text extracted and copied to clipboard![/bold green]")
|
||||||
|
|
||||||
|
if verbose:
|
||||||
|
console.print("\n[bold]Extracted text:[/bold]")
|
||||||
|
# Use syntax highlighting if it looks like code
|
||||||
|
if any(keyword in extracted_text.lower() for keyword in ['def ', 'function', 'class ', 'import ', 'from ', '{}', '[]']):
|
||||||
|
syntax = Syntax(extracted_text, "python", theme="monokai", line_numbers=False)
|
||||||
|
console.print(Panel(syntax, border_style="green"))
|
||||||
|
else:
|
||||||
|
console.print(Panel(extracted_text, border_style="green"))
|
||||||
|
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
console.print("\n[bold yellow]⚠️ Operation cancelled by user.[/bold yellow]")
|
||||||
|
raise typer.Exit(1)
|
||||||
|
except Exception as e:
|
||||||
|
console.print(f"[bold red]❌ Unexpected error: {e}[/bold red]")
|
||||||
|
raise typer.Exit(1)
|
||||||
|
finally:
|
||||||
|
# Clean up temporary file if not saving
|
||||||
|
if not save_image and screenshot_path.exists():
|
||||||
|
try:
|
||||||
|
screenshot_path.unlink()
|
||||||
|
except Exception:
|
||||||
|
pass # Ignore cleanup errors
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
app()
|
||||||
@@ -0,0 +1,89 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
OCR Screenshot Core Functions
|
||||||
|
|
||||||
|
Core functionality for taking screenshots, performing OCR, and clipboard operations.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import subprocess
|
||||||
|
|
||||||
|
import pytesseract
|
||||||
|
import pyperclip
|
||||||
|
from PIL import Image
|
||||||
|
|
||||||
|
|
||||||
|
def take_region_screenshot(output_path: str) -> bool:
|
||||||
|
"""
|
||||||
|
Take a region screenshot on macOS using the built-in screencapture command.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
output_path: Path where the screenshot will be saved
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
True if screenshot was taken successfully, False otherwise
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# Use screencapture with -s flag for selection mode and -i for interactive mode
|
||||||
|
result = subprocess.run([
|
||||||
|
'screencapture',
|
||||||
|
'-s', # Selection mode (drag to select region)
|
||||||
|
'-i', # Interactive mode (space to select window, escape to cancel)
|
||||||
|
output_path
|
||||||
|
], check=True, capture_output=True, text=True)
|
||||||
|
|
||||||
|
# Check if file was actually created (user might have cancelled)
|
||||||
|
return os.path.exists(output_path) and os.path.getsize(output_path) > 0
|
||||||
|
|
||||||
|
except subprocess.CalledProcessError as e:
|
||||||
|
print(f"Error taking screenshot: {e}")
|
||||||
|
return False
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Unexpected error: {e}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def perform_ocr(image_path: str, lang: str = 'eng') -> str:
|
||||||
|
"""
|
||||||
|
Perform OCR on the given image using Tesseract.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
image_path: Path to the image file
|
||||||
|
lang: Language code for OCR (default: 'eng')
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Extracted text from the image
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# Open the image
|
||||||
|
image = Image.open(image_path)
|
||||||
|
|
||||||
|
# Perform OCR
|
||||||
|
text = pytesseract.image_to_string(image, lang=lang)
|
||||||
|
|
||||||
|
return text.strip()
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error performing OCR: {e}")
|
||||||
|
return ""
|
||||||
|
|
||||||
|
|
||||||
|
def copy_to_clipboard(text: str) -> bool:
|
||||||
|
"""
|
||||||
|
Copy text to clipboard.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
text: Text to copy to clipboard
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
True if successful, False otherwise
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
pyperclip.copy(text)
|
||||||
|
return True
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error copying to clipboard: {e}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
Reference in New Issue
Block a user