# vllm server configuration # Used by: nix run .#vllm-start-llm and nix run .#vllm-start-embed # Force CPU backend — no CUDA/ROCm GPU on this machine VLLM_TARGET_DEVICE=cpu VLLM_LLM_MODEL=Qwen/Qwen3-0.6B VLLM_LLM_PORT=8000 # VLLM_LLM_EXTRA_ARGS=--dtype bfloat16 --max-model-len 4096 VLLM_EMBED_MODEL=Qwen/Qwen3-Embedding-0.6B VLLM_EMBED_PORT=8001 # VLLM_EMBED_EXTRA_ARGS=--dtype bfloat16