4495a3cc62
- Add AGENTS.md with repo guidelines - Add lightrag-mcp: FastMCP server exposing insert_documents() + query_documents() to LLM agents via stdio transport, talks to LightRAG REST API - Add scripts/patch-vllm-cpu.py for CPU inference patching - Add .env.vllm for vLLM configuration - Update flake.nix with expanded dev shell - Update .env.lightrag - Remove CLAUDE.md (replaced by AGENTS.md)
52 lines
1.5 KiB
Python
52 lines
1.5 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Patch vllm's cpu_platform_plugin to respect VLLM_TARGET_DEVICE=cpu.
|
|
|
|
The upstream CUDA build only activates the CPU platform on macOS or when
|
|
the version string contains "cpu" (source builds). This patch adds a third
|
|
condition: if VLLM_TARGET_DEVICE=cpu is set in the environment.
|
|
|
|
Run after every `uv pip install vllm` — idempotent.
|
|
"""
|
|
import pathlib
|
|
import sys
|
|
|
|
venv = pathlib.Path(__file__).parent.parent / "vllm" / ".venv"
|
|
target = venv / "lib" / "python3.12" / "site-packages" / "vllm" / "platforms" / "__init__.py"
|
|
|
|
if not target.exists():
|
|
print(f"vllm not installed at {target}, skipping patch")
|
|
sys.exit(0)
|
|
|
|
content = target.read_text()
|
|
|
|
if "VLLM_TARGET_DEVICE" in content:
|
|
print("patch already applied")
|
|
sys.exit(0)
|
|
|
|
old = '''\
|
|
if not is_cpu:
|
|
import sys
|
|
|
|
is_cpu = sys.platform.startswith("darwin")
|
|
if is_cpu:
|
|
logger.debug(
|
|
"Confirmed CPU platform is available because the machine is MacOS."
|
|
)'''
|
|
|
|
new = old + '''
|
|
|
|
if not is_cpu:
|
|
is_cpu = os.environ.get("VLLM_TARGET_DEVICE", "").lower() == "cpu"
|
|
if is_cpu:
|
|
logger.debug(
|
|
"Confirmed CPU platform is available because VLLM_TARGET_DEVICE=cpu."
|
|
)'''
|
|
|
|
if old not in content:
|
|
print("ERROR: patch target not found — vllm version may have changed", file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
target.write_text(content.replace(old, new, 1))
|
|
print("patched cpu_platform_plugin to respect VLLM_TARGET_DEVICE=cpu")
|