rags/scripts/patch-vllm-cpu.py

#!/usr/bin/env python3
"""
Patch vllm's cpu_platform_plugin to respect VLLM_TARGET_DEVICE=cpu.

The upstream CUDA build only activates the CPU platform on macOS or when
the version string contains "cpu" (source builds). This patch adds a third
condition: if VLLM_TARGET_DEVICE=cpu is set in the environment.

Run after every `uv pip install vllm` — idempotent.
"""
import pathlib
import sys

venv = pathlib.Path(__file__).parent.parent / "vllm" / ".venv"
target = venv / "lib" / "python3.12" / "site-packages" / "vllm" / "platforms" / "__init__.py"

if not target.exists():
    print(f"vllm not installed at {target}, skipping patch")
    sys.exit(0)

content = target.read_text()

if "VLLM_TARGET_DEVICE" in content:
    print("patch already applied")
    sys.exit(0)

old = '''\
        if not is_cpu:
            import sys

            is_cpu = sys.platform.startswith("darwin")
            if is_cpu:
                logger.debug(
                    "Confirmed CPU platform is available because the machine is MacOS."
                )'''

new = old + '''

        if not is_cpu:
            is_cpu = os.environ.get("VLLM_TARGET_DEVICE", "").lower() == "cpu"
            if is_cpu:
                logger.debug(
                    "Confirmed CPU platform is available because VLLM_TARGET_DEVICE=cpu."
                )'''

if old not in content:
    print("ERROR: patch target not found — vllm version may have changed", file=sys.stderr)
    sys.exit(1)

target.write_text(content.replace(old, new, 1))
print("patched cpu_platform_plugin to respect VLLM_TARGET_DEVICE=cpu")