Compare commits
10 Commits
f0bd44cc14
...
4495a3cc62
| Author | SHA1 | Date | |
|---|---|---|---|
| 4495a3cc62 | |||
| c5dc2cf637 | |||
| deddc2b6c9 | |||
| c47a45af48 | |||
| 22e1e754b6 | |||
| 11168df880 | |||
| 73c3b20f0b | |||
| 2ffdfd2976 | |||
| 3a7249375c | |||
| 232f2a4bf0 |
+3
-4
@@ -6,9 +6,8 @@ NEO4J_PASSWORD=neo4j
|
||||
# LLM via Ollama (OpenAI-compatible)
|
||||
OPENAI_API_KEY=ollama
|
||||
OPENAI_BASE_URL=http://localhost:11434/v1
|
||||
MODEL_NAME=qwen2.5:7b
|
||||
MODEL_NAME=qwen3:0.6b
|
||||
|
||||
# Embeddings via Ollama
|
||||
# nomic-embed-text dim=768, mxbai-embed-large dim=1024
|
||||
EMBEDDING_MODEL=nomic-embed-text
|
||||
EMBEDDING_DIM=768
|
||||
EMBEDDING_MODEL=qwen3-embedding:0.6b
|
||||
EMBEDDING_DIM=1024
|
||||
|
||||
+12
-7
@@ -1,17 +1,22 @@
|
||||
# LLM via Ollama
|
||||
LLM_BINDING=ollama
|
||||
LLM_MODEL=qwen2.5:7b
|
||||
LLM_BINDING_HOST=http://localhost:11434
|
||||
LLM_BINDING=openai
|
||||
LLM_MODEL=minimax/minimax-m2.7
|
||||
LLM_BINDING_HOST=https://openrouter.ai/api/v1
|
||||
LLM_BINDING_API_KEY=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
|
||||
|
||||
# Embeddings via Ollama
|
||||
# Embeddings via Ollama (port 11434)
|
||||
EMBEDDING_BINDING=ollama
|
||||
EMBEDDING_MODEL=nomic-embed-text
|
||||
EMBEDDING_MODEL=qwen3-embedding:4b
|
||||
EMBEDDING_BINDING_HOST=http://localhost:11434
|
||||
EMBEDDING_DIM=768
|
||||
EMBEDDING_API_KEY=
|
||||
EMBEDDING_DIM=2560
|
||||
|
||||
# Storage (local files)
|
||||
RAG_DIR=./rag_storage
|
||||
|
||||
# Timeouts (in seconds)
|
||||
EMBEDDING_TIMEOUT=60
|
||||
TIMEOUT=60
|
||||
|
||||
# Server
|
||||
HOST=0.0.0.0
|
||||
PORT=9621
|
||||
|
||||
@@ -0,0 +1,13 @@
|
||||
# vllm server configuration
|
||||
# Used by: nix run .#vllm-start-llm and nix run .#vllm-start-embed
|
||||
|
||||
# Force CPU backend — no CUDA/ROCm GPU on this machine
|
||||
VLLM_TARGET_DEVICE=cpu
|
||||
|
||||
VLLM_LLM_MODEL=Qwen/Qwen3-0.6B
|
||||
VLLM_LLM_PORT=8000
|
||||
# VLLM_LLM_EXTRA_ARGS=--dtype bfloat16 --max-model-len 4096
|
||||
|
||||
VLLM_EMBED_MODEL=Qwen/Qwen3-Embedding-0.6B
|
||||
VLLM_EMBED_PORT=8001
|
||||
# VLLM_EMBED_EXTRA_ARGS=--dtype bfloat16
|
||||
@@ -3,3 +3,7 @@ data/
|
||||
__pycache__/
|
||||
*.pyc
|
||||
.env.*.local
|
||||
.direnv/
|
||||
rag_storage/
|
||||
inputs/
|
||||
*.log
|
||||
|
||||
@@ -0,0 +1,66 @@
|
||||
# RAGS
|
||||
|
||||
Private learning tool. Ingest study materials → knowledge graph → query → export Anki flashcards.
|
||||
|
||||
Two systems:
|
||||
- **LightRAG** (`lightrag/`) — graph-based RAG server (primary interface)
|
||||
- **Graphiti** (`graphiti/`) — temporal knowledge graph library (Python library only, needs Neo4j)
|
||||
|
||||
## Quick Start
|
||||
|
||||
```sh
|
||||
# Ollama must be running first on :11434 with:
|
||||
# qwen3:0.6b (LLM)
|
||||
# qwen3-embedding:0.6b (embeddings)
|
||||
|
||||
# Start LightRAG only (LLM + embeddings handled externally by Ollama)
|
||||
nix run .#start
|
||||
# → http://localhost:9621/webui (React frontend)
|
||||
# → http://localhost:9621/docs (Swagger API)
|
||||
|
||||
# Graphiti needs Neo4j running first
|
||||
nix run .#neo4j-start # separate terminal
|
||||
nix develop .#graphiti
|
||||
```
|
||||
|
||||
**Always enter via `nix develop` from repo root** — never activate venvs directly. The shellHook sources `.env.lightrag` and sets `LD_LIBRARY_PATH`.
|
||||
|
||||
## Configuration
|
||||
|
||||
### `.env.lightrag`
|
||||
**Restart LightRAG after changes.**
|
||||
|
||||
| Var | Value |
|
||||
|-----|-------|
|
||||
| `LLM_BINDING` | `ollama` |
|
||||
| `LLM_MODEL` | `qwen3:0.6b` |
|
||||
| `LLM_BINDING_HOST` | `http://localhost:11434` |
|
||||
| `EMBEDDING_BINDING` | `ollama` |
|
||||
| `EMBEDDING_MODEL` | `qwen3-embedding:0.6b` |
|
||||
| `EMBEDDING_DIM` | `1024` |
|
||||
|
||||
Verify embedding works:
|
||||
```sh
|
||||
curl -s http://localhost:11434/api/embed \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"model":"qwen3-embedding:0.6b","input":"test"}'
|
||||
```
|
||||
|
||||
**Critical:** If `EMBEDDING_DIM` changes, delete `rag_storage/` before restarting — old vectors are incompatible.
|
||||
|
||||
## LightRAG Storage
|
||||
File-based by default (`JsonKVStorage`, `NanoVectorDBStorage`, `NetworkXStorage`). All data in `rag_storage/` (gitignored). Safe to delete to reset.
|
||||
|
||||
## Nix / NixOS Notes
|
||||
- `UV_PYTHON` pinned to nix-provided Python 3.12 (system has 3.14)
|
||||
- `LD_LIBRARY_PATH` set in shellHook for native wheels
|
||||
- LightRAG installs with `--extra api --extra offline-llm`
|
||||
- WebUI (React/Bun) built on first shell entry if `lightrag/lightrag/api/webui/` missing
|
||||
|
||||
## Known Issue: Pipeline Stuck
|
||||
|
||||
After config changes, pipeline may show `busy: true` with pending async locks. Symptoms:
|
||||
- `GET /documents/pipeline_status` returns `busy: true`, `request_pending: true`
|
||||
- New inserts stay at `status: pending`
|
||||
|
||||
Fix: delete `rag_storage/`, restart. Or `POST /documents/cancel_pipeline`.
|
||||
+8
-8
@@ -20,8 +20,8 @@ Both run fully local via Ollama. No cloud, no API keys.
|
||||
Install Ollama and pull the required models:
|
||||
|
||||
#+begin_src sh
|
||||
ollama pull qwen2.5:7b
|
||||
ollama pull nomic-embed-text
|
||||
ollama pull qwen3:0.6b
|
||||
ollama pull qwen3-embedding:0.6b
|
||||
#+end_src
|
||||
|
||||
Ollama must be running before starting either service.
|
||||
@@ -83,9 +83,9 @@ Data persists in =./data/neo4j/=. Web UI at =http://localhost:7474=.
|
||||
| Variable | Default | Notes |
|
||||
|----------------------+----------------------+--------------------------|
|
||||
| =LLM_BINDING= | =ollama= | |
|
||||
| =LLM_MODEL= | =qwen2.5:7b= | Change to any Ollama model |
|
||||
| =EMBEDDING_MODEL= | =nomic-embed-text= | |
|
||||
| =EMBEDDING_DIM= | =768= | Must match model |
|
||||
| =LLM_MODEL= | =qwen3:0.6b= | Change to any Ollama model |
|
||||
| =EMBEDDING_MODEL= | =qwen3-embedding:0.6b= | |
|
||||
| =EMBEDDING_DIM= | =1024= | Must match model |
|
||||
| =RAG_DIR= | =./rag_storage= | Where graph data lives |
|
||||
| =PORT= | =9621= | |
|
||||
|
||||
@@ -96,9 +96,9 @@ Data persists in =./data/neo4j/=. Web UI at =http://localhost:7474=.
|
||||
| =NEO4J_URI= | =bolt://localhost:7687= | |
|
||||
| =OPENAI_BASE_URL= | =http://localhost:11434/v1= | Ollama OpenAI-compatible API |
|
||||
| =OPENAI_API_KEY= | =ollama= | Dummy value, required by SDK |
|
||||
| =MODEL_NAME= | =qwen2.5:7b= | |
|
||||
| =EMBEDDING_MODEL= | =nomic-embed-text= | |
|
||||
| =EMBEDDING_DIM= | =768= | Must match model |
|
||||
| =MODEL_NAME= | =qwen3:0.6b= | |
|
||||
| =EMBEDDING_MODEL= | =qwen3-embedding:0.6b= | |
|
||||
| =EMBEDDING_DIM= | =1024= | Must match model |
|
||||
|
||||
* Structure
|
||||
|
||||
|
||||
@@ -3,11 +3,17 @@
|
||||
|
||||
inputs.nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable";
|
||||
|
||||
outputs = { self, nixpkgs }:
|
||||
outputs =
|
||||
{ self, nixpkgs }:
|
||||
let
|
||||
system = "x86_64-linux";
|
||||
pkgs = nixpkgs.legacyPackages.${system};
|
||||
|
||||
stdLibs = pkgs.lib.makeLibraryPath [
|
||||
pkgs.stdenv.cc.cc
|
||||
pkgs.zlib
|
||||
];
|
||||
|
||||
startNeo4j = pkgs.writeShellScript "start-neo4j" ''
|
||||
set -e
|
||||
: "''${RAGS_ROOT:=$PWD}"
|
||||
@@ -41,23 +47,86 @@
|
||||
${pkgs.neo4j}/bin/neo4j stop
|
||||
'';
|
||||
|
||||
in {
|
||||
startAll = pkgs.writeShellScript "start-all" ''
|
||||
set -e
|
||||
: "''${RAGS_ROOT:=$PWD}"
|
||||
|
||||
if [ -f "$RAGS_ROOT/.env.lightrag" ]; then
|
||||
set -a; source "$RAGS_ROOT/.env.lightrag"; set +a
|
||||
fi
|
||||
|
||||
LIGHTRAG_BIN="$RAGS_ROOT/lightrag/.venv/bin/lightrag-server"
|
||||
LOG_DIR="$RAGS_ROOT/logs"
|
||||
mkdir -p "$LOG_DIR"
|
||||
|
||||
LIGHTRAG_PID=""
|
||||
cleanup() {
|
||||
echo ""
|
||||
echo "Shutting down..."
|
||||
[ -n "$LIGHTRAG_PID" ] && kill "$LIGHTRAG_PID" 2>/dev/null || true
|
||||
wait 2>/dev/null || true
|
||||
}
|
||||
trap cleanup EXIT INT TERM
|
||||
|
||||
echo "Starting LightRAG server..."
|
||||
"$LIGHTRAG_BIN" >> "$LOG_DIR/lightrag.log" 2>&1 &
|
||||
LIGHTRAG_PID=$!
|
||||
|
||||
wait_for() {
|
||||
local label=$1 url=$2 tries=0
|
||||
printf " Waiting for %s" "$label"
|
||||
while ! ${pkgs.curl}/bin/curl -so /dev/null --max-time 2 "$url" 2>/dev/null; do
|
||||
tries=$((tries+1))
|
||||
[ $tries -ge 300 ] && { echo " TIMEOUT — check logs/$label.log"; exit 1; }
|
||||
printf "."
|
||||
sleep 1
|
||||
done
|
||||
echo " ready"
|
||||
}
|
||||
|
||||
wait_for "lightrag" "http://localhost:9621/docs"
|
||||
|
||||
echo ""
|
||||
echo "All services up:"
|
||||
echo " LightRAG webui: http://localhost:9621/webui"
|
||||
echo " LightRAG API: http://localhost:9621/docs"
|
||||
echo " Ollama LLM: http://localhost:11434 (external)"
|
||||
echo " Ollama embed: http://localhost:11434/api/embed (external)"
|
||||
echo " logs: $LOG_DIR/"
|
||||
echo ""
|
||||
echo "Ctrl+C to stop everything."
|
||||
echo ""
|
||||
|
||||
tail -f "$LOG_DIR/lightrag.log"
|
||||
'';
|
||||
|
||||
in
|
||||
{
|
||||
devShells.${system} = {
|
||||
|
||||
lightrag = pkgs.mkShell {
|
||||
packages = [ pkgs.uv pkgs.python312 pkgs.curl ];
|
||||
packages = [
|
||||
pkgs.uv
|
||||
pkgs.python312
|
||||
pkgs.curl
|
||||
pkgs.bun
|
||||
];
|
||||
|
||||
shellHook = ''
|
||||
RAGS_ROOT="$PWD"
|
||||
export VIRTUAL_ENV="$RAGS_ROOT/lightrag/.venv"
|
||||
export UV_PROJECT_ENVIRONMENT="$VIRTUAL_ENV"
|
||||
export UV_PYTHON="${pkgs.python312}/bin/python3.12"
|
||||
export LD_LIBRARY_PATH="${pkgs.lib.makeLibraryPath [ pkgs.stdenv.cc.cc pkgs.zlib ]}:$LD_LIBRARY_PATH"
|
||||
cd "$RAGS_ROOT/lightrag"
|
||||
export LD_LIBRARY_PATH="${stdLibs}:$LD_LIBRARY_PATH"
|
||||
|
||||
echo "Syncing lightrag venv..."
|
||||
uv sync --extra api --extra offline-llm --quiet
|
||||
source .venv/bin/activate
|
||||
(cd "$RAGS_ROOT/lightrag" && uv sync --extra api --extra offline-llm --quiet)
|
||||
source "$RAGS_ROOT/lightrag/.venv/bin/activate"
|
||||
|
||||
if [ ! -d "$RAGS_ROOT/lightrag/lightrag/api/webui" ]; then
|
||||
echo "Building frontend (first time, ~1min)..."
|
||||
(cd "$RAGS_ROOT/lightrag/lightrag_webui" && bun install --frozen-lockfile --silent && bun run build:bun)
|
||||
fi
|
||||
|
||||
if [ -f "$RAGS_ROOT/.env.lightrag" ]; then
|
||||
set -a; source "$RAGS_ROOT/.env.lightrag"; set +a
|
||||
@@ -65,22 +134,27 @@
|
||||
|
||||
echo ""
|
||||
echo "LightRAG shell ready."
|
||||
echo " start: lightrag-server"
|
||||
echo " config: $RAGS_ROOT/.env.lightrag"
|
||||
echo " needs: ollama with qwen2.5:7b + nomic-embed-text"
|
||||
echo " start server: lightrag-server"
|
||||
echo " start all: nix run .#start"
|
||||
echo " config: $RAGS_ROOT/.env.lightrag"
|
||||
echo ""
|
||||
'';
|
||||
};
|
||||
|
||||
graphiti = pkgs.mkShell {
|
||||
packages = [ pkgs.uv pkgs.python312 pkgs.neo4j pkgs.curl ];
|
||||
packages = [
|
||||
pkgs.uv
|
||||
pkgs.python312
|
||||
pkgs.neo4j
|
||||
pkgs.curl
|
||||
];
|
||||
|
||||
shellHook = ''
|
||||
RAGS_ROOT="$PWD"
|
||||
export VIRTUAL_ENV="$RAGS_ROOT/graphiti/.venv"
|
||||
export UV_PROJECT_ENVIRONMENT="$VIRTUAL_ENV"
|
||||
export UV_PYTHON="${pkgs.python312}/bin/python3.12"
|
||||
export LD_LIBRARY_PATH="${pkgs.lib.makeLibraryPath [ pkgs.stdenv.cc.cc pkgs.zlib ]}:$LD_LIBRARY_PATH"
|
||||
export LD_LIBRARY_PATH="${stdLibs}:$LD_LIBRARY_PATH"
|
||||
cd "$RAGS_ROOT/graphiti"
|
||||
|
||||
echo "Syncing graphiti venv..."
|
||||
@@ -95,7 +169,6 @@
|
||||
echo "Graphiti shell ready."
|
||||
echo " neo4j: nix run .#neo4j-start (in another terminal, run first)"
|
||||
echo " config: $RAGS_ROOT/.env.graphiti"
|
||||
echo " needs: ollama with qwen2.5:7b + nomic-embed-text"
|
||||
echo ""
|
||||
'';
|
||||
};
|
||||
@@ -103,8 +176,18 @@
|
||||
};
|
||||
|
||||
apps.${system} = {
|
||||
neo4j-start = { type = "app"; program = "${startNeo4j}"; };
|
||||
neo4j-stop = { type = "app"; program = "${stopNeo4j}"; };
|
||||
start = {
|
||||
type = "app";
|
||||
program = "${startAll}";
|
||||
};
|
||||
neo4j-start = {
|
||||
type = "app";
|
||||
program = "${startNeo4j}";
|
||||
};
|
||||
neo4j-stop = {
|
||||
type = "app";
|
||||
program = "${stopNeo4j}";
|
||||
};
|
||||
};
|
||||
};
|
||||
}
|
||||
|
||||
@@ -0,0 +1,3 @@
|
||||
OPENAI_API_KEY=your-openai-api-key-here
|
||||
LIGHTRAG_WORKING_DIR=./lightrag_workspace
|
||||
LIGHTRAG_EMBEDDING_MODEL=text-embedding-3-small
|
||||
@@ -0,0 +1 @@
|
||||
3.10
|
||||
@@ -0,0 +1,57 @@
|
||||
import os
|
||||
import httpx
|
||||
from fastmcp import FastMCP
|
||||
|
||||
LIGHTRAG_URL = os.getenv("LIGHTRAG_URL", "http://localhost:9621")
|
||||
|
||||
mcp = FastMCP("LightRAG")
|
||||
|
||||
|
||||
@mcp.tool
|
||||
async def insert_documents(documents: list[str]) -> str:
|
||||
"""Insert text documents into LightRAG for indexing.
|
||||
|
||||
Args:
|
||||
documents: List of document strings to index. Each string is treated as a separate document.
|
||||
|
||||
Returns:
|
||||
Tracking ID for the insertion operation.
|
||||
"""
|
||||
async with httpx.AsyncClient(timeout=120.0) as client:
|
||||
r = await client.post(
|
||||
f"{LIGHTRAG_URL}/documents/texts",
|
||||
json={"texts": documents},
|
||||
)
|
||||
r.raise_for_status()
|
||||
data = r.json()
|
||||
return data.get("track_id", data.get("message", "unknown"))
|
||||
|
||||
|
||||
@mcp.tool
|
||||
async def query_documents(query: str, mode: str = "mix", top_k: int = 60) -> dict:
|
||||
"""Query LightRAG and retrieve relevant context without LLM generation.
|
||||
|
||||
Args:
|
||||
query: The search query string.
|
||||
mode: Retrieval mode - "local", "global", "hybrid", "naive", "mix" (default: "mix").
|
||||
top_k: Number of top results to retrieve (default: 60).
|
||||
|
||||
Returns:
|
||||
Structured retrieval data including entities, relationships, and text chunks.
|
||||
"""
|
||||
async with httpx.AsyncClient(timeout=120.0) as client:
|
||||
r = await client.post(
|
||||
f"{LIGHTRAG_URL}/query/data",
|
||||
json={
|
||||
"query": query,
|
||||
"mode": mode,
|
||||
"only_need_context": True,
|
||||
"top_k": top_k,
|
||||
},
|
||||
)
|
||||
r.raise_for_status()
|
||||
return r.json()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
mcp.run()
|
||||
@@ -0,0 +1,11 @@
|
||||
[project]
|
||||
name = "lightrag-mcp"
|
||||
version = "0.1.0"
|
||||
description = "Add your description here"
|
||||
readme = "README.md"
|
||||
requires-python = ">=3.10"
|
||||
dependencies = [
|
||||
"fastmcp>=3.2.4",
|
||||
"httpx>=0.28.1",
|
||||
"lightrag-hku>=1.4.15",
|
||||
]
|
||||
@@ -0,0 +1,83 @@
|
||||
import asyncio
|
||||
import os
|
||||
from mcp import ClientSession, StdioServerParameters
|
||||
from mcp.client.stdio import stdio_client
|
||||
|
||||
|
||||
async def main():
|
||||
server = StdioServerParameters(
|
||||
command="uv",
|
||||
args=[
|
||||
"run",
|
||||
"--directory",
|
||||
"/home/df/projects/rags/lightrag-mcp",
|
||||
"python",
|
||||
"main.py",
|
||||
],
|
||||
)
|
||||
|
||||
async with stdio_client(server) as (read, write):
|
||||
async with ClientSession(read, write) as session:
|
||||
await session.initialize()
|
||||
|
||||
print("--- INSERT ---")
|
||||
result = await session.call_tool(
|
||||
"insert_documents",
|
||||
arguments={
|
||||
"documents": [
|
||||
"Python is a high-level programming language known for its simplicity and readability.",
|
||||
"JavaScript was created in 1995 by Brendan Eich at Netscape.",
|
||||
"Machine learning is a subset of artificial intelligence that enables systems to learn from data.",
|
||||
]
|
||||
},
|
||||
)
|
||||
print(f"Insert result: {result.content[0].text[:200]}")
|
||||
|
||||
print("\n--- QUERY (mix) ---")
|
||||
result = await session.call_tool(
|
||||
"query_documents",
|
||||
arguments={
|
||||
"query": "Tell me about programming languages",
|
||||
"mode": "mix",
|
||||
"top_k": 60,
|
||||
},
|
||||
)
|
||||
import json
|
||||
|
||||
data = json.loads(result.content[0].text)
|
||||
d = data.get("data", {})
|
||||
print(f"Entities: {len(d.get('entities', []))}")
|
||||
print(f"Relationships: {len(d.get('relationships', []))}")
|
||||
print(f"Chunks: {len(d.get('chunks', []))}")
|
||||
for c in d.get("chunks", [])[:2]:
|
||||
print(f" - {c.get('content', '')[:100]}")
|
||||
|
||||
print("\n--- QUERY (local) ---")
|
||||
result = await session.call_tool(
|
||||
"query_documents",
|
||||
arguments={"query": "What is Python?", "mode": "local", "top_k": 60},
|
||||
)
|
||||
data = json.loads(result.content[0].text)
|
||||
d = data.get("data", {})
|
||||
print(f"Entities: {len(d.get('entities', []))}")
|
||||
print(f"Chunks: {len(d.get('chunks', []))}")
|
||||
|
||||
print("\n--- QUERY (global) ---")
|
||||
result = await session.call_tool(
|
||||
"query_documents",
|
||||
arguments={
|
||||
"query": "What topics are covered?",
|
||||
"mode": "global",
|
||||
"top_k": 60,
|
||||
},
|
||||
)
|
||||
data = json.loads(result.content[0].text)
|
||||
d = data.get("data", {})
|
||||
print(f"Entities: {len(d.get('entities', []))}")
|
||||
print(f"Relationships: {len(d.get('relationships', []))}")
|
||||
|
||||
print("\nDone!")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
@@ -0,0 +1,77 @@
|
||||
import httpx
|
||||
import asyncio
|
||||
|
||||
|
||||
async def main():
|
||||
base_url = "http://localhost:9621"
|
||||
|
||||
async with httpx.AsyncClient(timeout=120.0) as client:
|
||||
print("--- INSERT ---")
|
||||
docs = [
|
||||
"Python is a high-level programming language known for its simplicity and readability.",
|
||||
"JavaScript was created in 1995 by Brendan Eich at Netscape.",
|
||||
"Machine learning is a subset of artificial intelligence that enables systems to learn from data.",
|
||||
"LightRAG combines knowledge graph and vector retrieval for enhanced RAG applications.",
|
||||
"FastMCP is a framework for building MCP servers in Python.",
|
||||
]
|
||||
r = await client.post(f"{base_url}/documents/texts", json={"texts": docs})
|
||||
r.raise_for_status()
|
||||
print(f"Inserted: {r.json()}")
|
||||
|
||||
print("\n--- QUERY (mix mode) ---")
|
||||
r = await client.post(
|
||||
f"{base_url}/query/data",
|
||||
json={
|
||||
"query": "Tell me about programming languages",
|
||||
"mode": "mix",
|
||||
"only_need_context": True,
|
||||
"top_k": 60,
|
||||
},
|
||||
)
|
||||
r.raise_for_status()
|
||||
result = r.json()
|
||||
print(f"mode=mix keys: {list(result.keys())}")
|
||||
if "chunks" in result:
|
||||
print(f" chunks: {len(result['chunks'])} returned")
|
||||
for c in result["chunks"][:2]:
|
||||
print(f" - {c.get('content', '')[:100]}...")
|
||||
|
||||
print("\n--- QUERY (local mode) ---")
|
||||
r = await client.post(
|
||||
f"{base_url}/query/data",
|
||||
json={
|
||||
"query": "What is Python?",
|
||||
"mode": "local",
|
||||
"only_need_context": True,
|
||||
"top_k": 60,
|
||||
},
|
||||
)
|
||||
r.raise_for_status()
|
||||
result = r.json()
|
||||
print(f"mode=local keys: {list(result.keys())}")
|
||||
if "chunks" in result:
|
||||
print(f" chunks: {len(result['chunks'])} returned")
|
||||
|
||||
print("\n--- QUERY (global mode) ---")
|
||||
r = await client.post(
|
||||
f"{base_url}/query/data",
|
||||
json={
|
||||
"query": "What topics are covered?",
|
||||
"mode": "global",
|
||||
"only_need_context": True,
|
||||
"top_k": 60,
|
||||
},
|
||||
)
|
||||
r.raise_for_status()
|
||||
result = r.json()
|
||||
print(f"mode=global keys: {list(result.keys())}")
|
||||
if "entities" in result:
|
||||
print(f" entities: {len(result['entities'])} returned")
|
||||
if "relationships" in result:
|
||||
print(f" relationships: {len(result['relationships'])} returned")
|
||||
|
||||
print("\nDone!")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
Generated
+3048
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,51 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Patch vllm's cpu_platform_plugin to respect VLLM_TARGET_DEVICE=cpu.
|
||||
|
||||
The upstream CUDA build only activates the CPU platform on macOS or when
|
||||
the version string contains "cpu" (source builds). This patch adds a third
|
||||
condition: if VLLM_TARGET_DEVICE=cpu is set in the environment.
|
||||
|
||||
Run after every `uv pip install vllm` — idempotent.
|
||||
"""
|
||||
import pathlib
|
||||
import sys
|
||||
|
||||
venv = pathlib.Path(__file__).parent.parent / "vllm" / ".venv"
|
||||
target = venv / "lib" / "python3.12" / "site-packages" / "vllm" / "platforms" / "__init__.py"
|
||||
|
||||
if not target.exists():
|
||||
print(f"vllm not installed at {target}, skipping patch")
|
||||
sys.exit(0)
|
||||
|
||||
content = target.read_text()
|
||||
|
||||
if "VLLM_TARGET_DEVICE" in content:
|
||||
print("patch already applied")
|
||||
sys.exit(0)
|
||||
|
||||
old = '''\
|
||||
if not is_cpu:
|
||||
import sys
|
||||
|
||||
is_cpu = sys.platform.startswith("darwin")
|
||||
if is_cpu:
|
||||
logger.debug(
|
||||
"Confirmed CPU platform is available because the machine is MacOS."
|
||||
)'''
|
||||
|
||||
new = old + '''
|
||||
|
||||
if not is_cpu:
|
||||
is_cpu = os.environ.get("VLLM_TARGET_DEVICE", "").lower() == "cpu"
|
||||
if is_cpu:
|
||||
logger.debug(
|
||||
"Confirmed CPU platform is available because VLLM_TARGET_DEVICE=cpu."
|
||||
)'''
|
||||
|
||||
if old not in content:
|
||||
print("ERROR: patch target not found — vllm version may have changed", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
target.write_text(content.replace(old, new, 1))
|
||||
print("patched cpu_platform_plugin to respect VLLM_TARGET_DEVICE=cpu")
|
||||
Reference in New Issue
Block a user