add ocr
This commit is contained in:
+29
@@ -0,0 +1,29 @@
|
||||
# Python-generated files
|
||||
__pycache__/
|
||||
*.py[oc]
|
||||
build/
|
||||
dist/
|
||||
wheels/
|
||||
*.egg-info
|
||||
|
||||
# Virtual environments
|
||||
.venv
|
||||
# Python project specific ignores (absolute paths from /Users/wongdingfeng/.config/tooling)
|
||||
/Users/wongdingfeng/.config/tooling/.venv/
|
||||
/Users/wongdingfeng/.config/tooling/__pycache__/
|
||||
/Users/wongdingfeng/.config/tooling/*.pyc
|
||||
/Users/wongdingfeng/.config/tooling/*.pyo
|
||||
/Users/wongdingfeng/.config/tooling/*.pyd
|
||||
/Users/wongdingfeng/.config/tooling/.pytest_cache/
|
||||
/Users/wongdingfeng/.config/tooling/.mypy_cache/
|
||||
/Users/wongdingfeng/.config/tooling/.ruff_cache/
|
||||
/Users/wongdingfeng/.config/tooling/dist/
|
||||
/Users/wongdingfeng/.config/tooling/build/
|
||||
/Users/wongdingfeng/.config/tooling/*.egg-info/
|
||||
/Users/wongdingfeng/.config/tooling/offline_packages/
|
||||
/Users/wongdingfeng/.config/tooling/.coverage
|
||||
/Users/wongdingfeng/.config/tooling/htmlcov/
|
||||
/Users/wongdingfeng/.config/tooling/.tox/
|
||||
/Users/wongdingfeng/.config/tooling/.env
|
||||
/Users/wongdingfeng/.config/tooling/.DS_Store
|
||||
/Users/wongdingfeng/.config/tooling/Thumbs.db
|
||||
@@ -0,0 +1 @@
|
||||
3.11
|
||||
@@ -0,0 +1 @@
|
||||
aghB8gTFIq7cp8wI
|
||||
@@ -0,0 +1,81 @@
|
||||
|
||||
# Tooling
|
||||
|
||||
A collection of useful command-line tools.
|
||||
|
||||
## OCR Screenshot Tool
|
||||
|
||||
A CLI tool that takes region screenshots on macOS, performs OCR using Tesseract, and copies the result to clipboard.
|
||||
|
||||
### Prerequisites
|
||||
|
||||
- macOS (uses built-in `screencapture` command)
|
||||
- Tesseract OCR (install with `brew install tesseract`)
|
||||
|
||||
### Usage
|
||||
|
||||
Basic usage (takes screenshot, performs OCR, copies to clipboard):
|
||||
```bash
|
||||
uv run ocr-screenshot
|
||||
```
|
||||
|
||||
With verbose output:
|
||||
```bash
|
||||
uv run ocr-screenshot --verbose
|
||||
```
|
||||
|
||||
Save the screenshot image:
|
||||
```bash
|
||||
uv run ocr-screenshot --save-image
|
||||
```
|
||||
|
||||
Specify OCR language (e.g., for Chinese):
|
||||
```bash
|
||||
uv run ocr-screenshot --lang chi_sim
|
||||
```
|
||||
|
||||
### How it works
|
||||
|
||||
1. **Screenshot**: Click and drag to select a region, or press Space to capture an entire window
|
||||
2. **OCR**: The selected region is processed with Tesseract OCR
|
||||
3. **Clipboard**: The extracted text is automatically copied to your clipboard
|
||||
|
||||
## Development Guide
|
||||
|
||||
### How to Add New Packages
|
||||
|
||||
To add a new production dependency (e.g., 'requests'):
|
||||
```bash
|
||||
uv add requests
|
||||
```
|
||||
|
||||
To add a new development dependency (e.g., 'ipdb'):
|
||||
```bash
|
||||
uv add --dev ipdb
|
||||
```
|
||||
|
||||
After adding dependencies, always re-generate requirements.txt:
|
||||
```bash
|
||||
uv pip compile pyproject.toml -o requirements.txt
|
||||
```
|
||||
|
||||
### How to Build Packages
|
||||
|
||||
To build your project's distributable packages (.whl, .tar.gz):
|
||||
```bash
|
||||
python -m build
|
||||
```
|
||||
|
||||
Or using the virtual environment directly:
|
||||
```bash
|
||||
./venv/bin/python -m build
|
||||
```
|
||||
|
||||
### Offline Build
|
||||
|
||||
To build offline packages for deployment:
|
||||
```bash
|
||||
./dev_scripts/build_offline.sh
|
||||
```
|
||||
|
||||
This will create offline_packages/ with all dependencies and install.sh
|
||||
Executable
+14
@@ -0,0 +1,14 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
PROJECT_PATH="${1:-/Users/wongdingfeng/.config/tooling}"
|
||||
OFFLINE_PACKAGES_DIR="$PROJECT_PATH/offline_packages"
|
||||
|
||||
source "$PROJECT_PATH/.venv/bin/activate"
|
||||
|
||||
"$PROJECT_PATH/.venv/bin/pip" download -r "$PROJECT_PATH/requirements.txt" --dest "$OFFLINE_PACKAGES_DIR"
|
||||
uv build --wheel
|
||||
cp "$PROJECT_PATH/dist/"*.whl "$OFFLINE_PACKAGES_DIR/install.whl"
|
||||
|
||||
cat << EOF > "$OFFLINE_PACKAGES_DIR/install.sh"
|
||||
pip install --no-index --find-links=./ install.whl
|
||||
EOF
|
||||
@@ -0,0 +1,28 @@
|
||||
[project]
|
||||
name = "tooling"
|
||||
version = "0.1.0"
|
||||
description = "Add your description here"
|
||||
readme = "README.md"
|
||||
authors = [
|
||||
{ name = "dingfeng.wong", email = "dingfeng.wong@okg.com" }
|
||||
]
|
||||
requires-python = ">=3.11"
|
||||
dependencies = [
|
||||
"pillow>=11.1.0",
|
||||
"pyperclip>=1.9.0",
|
||||
"pytesseract>=0.3.13",
|
||||
"typer>=0.12.0",
|
||||
"rich>=13.0.0",
|
||||
]
|
||||
|
||||
[project.scripts]
|
||||
ocr-screenshot = "tooling.cli:main"
|
||||
|
||||
[build-system]
|
||||
requires = ["hatchling"]
|
||||
build-backend = "hatchling.build"
|
||||
|
||||
[dependency-groups]
|
||||
dev = [
|
||||
"open-webui>=0.6.5",
|
||||
]
|
||||
@@ -0,0 +1,2 @@
|
||||
# This file was autogenerated by uv via the following command:
|
||||
# uv pip compile pyproject.toml -o requirements.txt
|
||||
@@ -0,0 +1,2 @@
|
||||
def hello() -> str:
|
||||
return "Hello from tooling!"
|
||||
@@ -0,0 +1,149 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
OCR Screenshot CLI Tool
|
||||
|
||||
A command-line tool that takes a region screenshot on macOS,
|
||||
performs OCR using Tesseract, and copies the result to clipboard.
|
||||
"""
|
||||
|
||||
import datetime
|
||||
import os
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
import typer
|
||||
from rich.console import Console
|
||||
from rich.panel import Panel
|
||||
from rich.progress import Progress, SpinnerColumn, TextColumn
|
||||
from rich.syntax import Syntax
|
||||
|
||||
from .ocr_screenshot import copy_to_clipboard, perform_ocr, take_region_screenshot
|
||||
|
||||
app = typer.Typer(
|
||||
name="ocr-screenshot",
|
||||
help="Take a region screenshot, perform OCR, and copy result to clipboard",
|
||||
rich_markup_mode="rich"
|
||||
)
|
||||
|
||||
console = Console()
|
||||
|
||||
|
||||
@app.command()
|
||||
def main(
|
||||
lang: str = typer.Option(
|
||||
"eng",
|
||||
"--lang",
|
||||
help="Language code for OCR (default: eng)"
|
||||
),
|
||||
save_image: bool = typer.Option(
|
||||
False,
|
||||
"--save-image",
|
||||
help="Save the screenshot image instead of deleting it"
|
||||
),
|
||||
output_dir: Path = typer.Option(
|
||||
Path.home() / "Desktop",
|
||||
"--output-dir",
|
||||
help="Directory to save screenshot if --save-image is used (default: ~/Desktop)"
|
||||
),
|
||||
verbose: bool = typer.Option(
|
||||
False,
|
||||
"--verbose", "-v",
|
||||
help="Show verbose output"
|
||||
)
|
||||
):
|
||||
"""Take a region screenshot, perform OCR, and copy result to clipboard."""
|
||||
|
||||
# Create screenshot path
|
||||
if save_image:
|
||||
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
screenshot_path = output_dir / f"ocr_screenshot_{timestamp}.png"
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
else:
|
||||
temp_fd, temp_path = tempfile.mkstemp(suffix='.png')
|
||||
os.close(temp_fd)
|
||||
screenshot_path = Path(temp_path)
|
||||
|
||||
try:
|
||||
# Step 1: Take screenshot
|
||||
if verbose:
|
||||
console.print("\n[bold blue]📸 Taking region screenshot...[/bold blue]")
|
||||
console.print(Panel(
|
||||
"[bold]Instructions:[/bold]\n"
|
||||
"• Drag to select a region\n"
|
||||
"• Press [bold]Space[/bold] to capture entire window\n"
|
||||
"• Press [bold]Escape[/bold] to cancel",
|
||||
title="Screenshot Controls",
|
||||
border_style="blue"
|
||||
))
|
||||
else:
|
||||
console.print("[bold blue]📸 Taking screenshot...[/bold blue]")
|
||||
|
||||
if not take_region_screenshot(str(screenshot_path)):
|
||||
console.print("[bold red]❌ Screenshot cancelled or failed.[/bold red]")
|
||||
raise typer.Exit(1)
|
||||
|
||||
if verbose:
|
||||
console.print(f"[green]✓ Screenshot saved to: {screenshot_path}[/green]")
|
||||
|
||||
# Step 2: Perform OCR
|
||||
with Progress(
|
||||
SpinnerColumn(),
|
||||
TextColumn("[progress.description]{task.description}"),
|
||||
console=console,
|
||||
transient=True
|
||||
) as progress:
|
||||
task = progress.add_task("[bold cyan]🔍 Performing OCR...", total=None)
|
||||
extracted_text = perform_ocr(str(screenshot_path), lang)
|
||||
progress.update(task, description="[green]✓ OCR complete")
|
||||
|
||||
if not extracted_text:
|
||||
console.print("[bold red]❌ No text found in the image.[/bold red]")
|
||||
raise typer.Exit(1)
|
||||
|
||||
# Step 3: Copy to clipboard
|
||||
with Progress(
|
||||
SpinnerColumn(),
|
||||
TextColumn("[progress.description]{task.description}"),
|
||||
console=console,
|
||||
transient=True
|
||||
) as progress:
|
||||
task = progress.add_task("[bold cyan]📋 Copying to clipboard...", total=None)
|
||||
if copy_to_clipboard(extracted_text):
|
||||
progress.update(task, description="[green]✓ Copied to clipboard")
|
||||
else:
|
||||
progress.update(task, description="[red]✗ Failed to copy")
|
||||
console.print("[bold red]❌ Failed to copy to clipboard.[/bold red]")
|
||||
console.print("\n[bold]Extracted text:[/bold]")
|
||||
console.print(Panel(extracted_text, border_style="yellow"))
|
||||
raise typer.Exit(1)
|
||||
|
||||
# Success message
|
||||
console.print("\n[bold green]✅ Text extracted and copied to clipboard![/bold green]")
|
||||
|
||||
if verbose:
|
||||
console.print("\n[bold]Extracted text:[/bold]")
|
||||
# Use syntax highlighting if it looks like code
|
||||
if any(keyword in extracted_text.lower() for keyword in ['def ', 'function', 'class ', 'import ', 'from ', '{}', '[]']):
|
||||
syntax = Syntax(extracted_text, "python", theme="monokai", line_numbers=False)
|
||||
console.print(Panel(syntax, border_style="green"))
|
||||
else:
|
||||
console.print(Panel(extracted_text, border_style="green"))
|
||||
|
||||
except KeyboardInterrupt:
|
||||
console.print("\n[bold yellow]⚠️ Operation cancelled by user.[/bold yellow]")
|
||||
raise typer.Exit(1)
|
||||
except Exception as e:
|
||||
console.print(f"[bold red]❌ Unexpected error: {e}[/bold red]")
|
||||
raise typer.Exit(1)
|
||||
finally:
|
||||
# Clean up temporary file if not saving
|
||||
if not save_image and screenshot_path.exists():
|
||||
try:
|
||||
screenshot_path.unlink()
|
||||
except Exception:
|
||||
pass # Ignore cleanup errors
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
app()
|
||||
@@ -0,0 +1,89 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
OCR Screenshot Core Functions
|
||||
|
||||
Core functionality for taking screenshots, performing OCR, and clipboard operations.
|
||||
"""
|
||||
|
||||
import os
|
||||
import subprocess
|
||||
|
||||
import pytesseract
|
||||
import pyperclip
|
||||
from PIL import Image
|
||||
|
||||
|
||||
def take_region_screenshot(output_path: str) -> bool:
|
||||
"""
|
||||
Take a region screenshot on macOS using the built-in screencapture command.
|
||||
|
||||
Args:
|
||||
output_path: Path where the screenshot will be saved
|
||||
|
||||
Returns:
|
||||
True if screenshot was taken successfully, False otherwise
|
||||
"""
|
||||
try:
|
||||
# Use screencapture with -s flag for selection mode and -i for interactive mode
|
||||
result = subprocess.run([
|
||||
'screencapture',
|
||||
'-s', # Selection mode (drag to select region)
|
||||
'-i', # Interactive mode (space to select window, escape to cancel)
|
||||
output_path
|
||||
], check=True, capture_output=True, text=True)
|
||||
|
||||
# Check if file was actually created (user might have cancelled)
|
||||
return os.path.exists(output_path) and os.path.getsize(output_path) > 0
|
||||
|
||||
except subprocess.CalledProcessError as e:
|
||||
print(f"Error taking screenshot: {e}")
|
||||
return False
|
||||
except Exception as e:
|
||||
print(f"Unexpected error: {e}")
|
||||
return False
|
||||
|
||||
|
||||
def perform_ocr(image_path: str, lang: str = 'eng') -> str:
|
||||
"""
|
||||
Perform OCR on the given image using Tesseract.
|
||||
|
||||
Args:
|
||||
image_path: Path to the image file
|
||||
lang: Language code for OCR (default: 'eng')
|
||||
|
||||
Returns:
|
||||
Extracted text from the image
|
||||
"""
|
||||
try:
|
||||
# Open the image
|
||||
image = Image.open(image_path)
|
||||
|
||||
# Perform OCR
|
||||
text = pytesseract.image_to_string(image, lang=lang)
|
||||
|
||||
return text.strip()
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error performing OCR: {e}")
|
||||
return ""
|
||||
|
||||
|
||||
def copy_to_clipboard(text: str) -> bool:
|
||||
"""
|
||||
Copy text to clipboard.
|
||||
|
||||
Args:
|
||||
text: Text to copy to clipboard
|
||||
|
||||
Returns:
|
||||
True if successful, False otherwise
|
||||
"""
|
||||
try:
|
||||
pyperclip.copy(text)
|
||||
return True
|
||||
except Exception as e:
|
||||
print(f"Error copying to clipboard: {e}")
|
||||
return False
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user