2ffdfd2976
qwen3-embedding:8b needs >60s on first load. Set EMBEDDING_TIMEOUT=300 (worker executes at 2x = 600s) and TIMEOUT=600 for LLM calls. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
23 lines
432 B
Bash
23 lines
432 B
Bash
# LLM via Ollama
|
|
LLM_BINDING=ollama
|
|
LLM_MODEL=qwen2.5:7b
|
|
LLM_BINDING_HOST=http://localhost:11434
|
|
|
|
# Embeddings via Ollama
|
|
EMBEDDING_BINDING=ollama
|
|
EMBEDDING_MODEL=qwen3-embedding:8b
|
|
EMBEDDING_BINDING_HOST=http://localhost:11434
|
|
EMBEDDING_DIM=4096
|
|
|
|
# Storage (local files)
|
|
RAG_DIR=./rag_storage
|
|
|
|
# Timeouts (in seconds) — increase for large local models
|
|
EMBEDDING_TIMEOUT=300
|
|
TIMEOUT=600
|
|
|
|
# Server
|
|
HOST=0.0.0.0
|
|
PORT=9621
|
|
WORKERS=1
|