Files
rags/scripts/patch-vllm-cpu.py
T
tomatocream 4495a3cc62 feat: add lightrag-mcp MCP server + agent tooling
- Add AGENTS.md with repo guidelines
- Add lightrag-mcp: FastMCP server exposing insert_documents() + query_documents()
  to LLM agents via stdio transport, talks to LightRAG REST API
- Add scripts/patch-vllm-cpu.py for CPU inference patching
- Add .env.vllm for vLLM configuration
- Update flake.nix with expanded dev shell
- Update .env.lightrag
- Remove CLAUDE.md (replaced by AGENTS.md)
2026-04-19 21:46:47 +08:00

52 lines
1.5 KiB
Python

#!/usr/bin/env python3
"""
Patch vllm's cpu_platform_plugin to respect VLLM_TARGET_DEVICE=cpu.
The upstream CUDA build only activates the CPU platform on macOS or when
the version string contains "cpu" (source builds). This patch adds a third
condition: if VLLM_TARGET_DEVICE=cpu is set in the environment.
Run after every `uv pip install vllm` — idempotent.
"""
import pathlib
import sys
venv = pathlib.Path(__file__).parent.parent / "vllm" / ".venv"
target = venv / "lib" / "python3.12" / "site-packages" / "vllm" / "platforms" / "__init__.py"
if not target.exists():
print(f"vllm not installed at {target}, skipping patch")
sys.exit(0)
content = target.read_text()
if "VLLM_TARGET_DEVICE" in content:
print("patch already applied")
sys.exit(0)
old = '''\
if not is_cpu:
import sys
is_cpu = sys.platform.startswith("darwin")
if is_cpu:
logger.debug(
"Confirmed CPU platform is available because the machine is MacOS."
)'''
new = old + '''
if not is_cpu:
is_cpu = os.environ.get("VLLM_TARGET_DEVICE", "").lower() == "cpu"
if is_cpu:
logger.debug(
"Confirmed CPU platform is available because VLLM_TARGET_DEVICE=cpu."
)'''
if old not in content:
print("ERROR: patch target not found — vllm version may have changed", file=sys.stderr)
sys.exit(1)
target.write_text(content.replace(old, new, 1))
print("patched cpu_platform_plugin to respect VLLM_TARGET_DEVICE=cpu")