feat: add lightrag-mcp MCP server + agent tooling

- Add AGENTS.md with repo guidelines
- Add lightrag-mcp: FastMCP server exposing insert_documents() + query_documents()
  to LLM agents via stdio transport, talks to LightRAG REST API
- Add scripts/patch-vllm-cpu.py for CPU inference patching
- Add .env.vllm for vLLM configuration
- Update flake.nix with expanded dev shell
- Update .env.lightrag
- Remove CLAUDE.md (replaced by AGENTS.md)
This commit is contained in:
2026-04-19 21:46:47 +08:00
parent c5dc2cf637
commit 4495a3cc62
14 changed files with 3510 additions and 100 deletions
+3
View File
@@ -0,0 +1,3 @@
OPENAI_API_KEY=your-openai-api-key-here
LIGHTRAG_WORKING_DIR=./lightrag_workspace
LIGHTRAG_EMBEDDING_MODEL=text-embedding-3-small
+1
View File
@@ -0,0 +1 @@
3.10
View File
+57
View File
@@ -0,0 +1,57 @@
import os
import httpx
from fastmcp import FastMCP
LIGHTRAG_URL = os.getenv("LIGHTRAG_URL", "http://localhost:9621")
mcp = FastMCP("LightRAG")
@mcp.tool
async def insert_documents(documents: list[str]) -> str:
"""Insert text documents into LightRAG for indexing.
Args:
documents: List of document strings to index. Each string is treated as a separate document.
Returns:
Tracking ID for the insertion operation.
"""
async with httpx.AsyncClient(timeout=120.0) as client:
r = await client.post(
f"{LIGHTRAG_URL}/documents/texts",
json={"texts": documents},
)
r.raise_for_status()
data = r.json()
return data.get("track_id", data.get("message", "unknown"))
@mcp.tool
async def query_documents(query: str, mode: str = "mix", top_k: int = 60) -> dict:
"""Query LightRAG and retrieve relevant context without LLM generation.
Args:
query: The search query string.
mode: Retrieval mode - "local", "global", "hybrid", "naive", "mix" (default: "mix").
top_k: Number of top results to retrieve (default: 60).
Returns:
Structured retrieval data including entities, relationships, and text chunks.
"""
async with httpx.AsyncClient(timeout=120.0) as client:
r = await client.post(
f"{LIGHTRAG_URL}/query/data",
json={
"query": query,
"mode": mode,
"only_need_context": True,
"top_k": top_k,
},
)
r.raise_for_status()
return r.json()
if __name__ == "__main__":
mcp.run()
+11
View File
@@ -0,0 +1,11 @@
[project]
name = "lightrag-mcp"
version = "0.1.0"
description = "Add your description here"
readme = "README.md"
requires-python = ">=3.10"
dependencies = [
"fastmcp>=3.2.4",
"httpx>=0.28.1",
"lightrag-hku>=1.4.15",
]
+83
View File
@@ -0,0 +1,83 @@
import asyncio
import os
from mcp import ClientSession, StdioServerParameters
from mcp.client.stdio import stdio_client
async def main():
server = StdioServerParameters(
command="uv",
args=[
"run",
"--directory",
"/home/df/projects/rags/lightrag-mcp",
"python",
"main.py",
],
)
async with stdio_client(server) as (read, write):
async with ClientSession(read, write) as session:
await session.initialize()
print("--- INSERT ---")
result = await session.call_tool(
"insert_documents",
arguments={
"documents": [
"Python is a high-level programming language known for its simplicity and readability.",
"JavaScript was created in 1995 by Brendan Eich at Netscape.",
"Machine learning is a subset of artificial intelligence that enables systems to learn from data.",
]
},
)
print(f"Insert result: {result.content[0].text[:200]}")
print("\n--- QUERY (mix) ---")
result = await session.call_tool(
"query_documents",
arguments={
"query": "Tell me about programming languages",
"mode": "mix",
"top_k": 60,
},
)
import json
data = json.loads(result.content[0].text)
d = data.get("data", {})
print(f"Entities: {len(d.get('entities', []))}")
print(f"Relationships: {len(d.get('relationships', []))}")
print(f"Chunks: {len(d.get('chunks', []))}")
for c in d.get("chunks", [])[:2]:
print(f" - {c.get('content', '')[:100]}")
print("\n--- QUERY (local) ---")
result = await session.call_tool(
"query_documents",
arguments={"query": "What is Python?", "mode": "local", "top_k": 60},
)
data = json.loads(result.content[0].text)
d = data.get("data", {})
print(f"Entities: {len(d.get('entities', []))}")
print(f"Chunks: {len(d.get('chunks', []))}")
print("\n--- QUERY (global) ---")
result = await session.call_tool(
"query_documents",
arguments={
"query": "What topics are covered?",
"mode": "global",
"top_k": 60,
},
)
data = json.loads(result.content[0].text)
d = data.get("data", {})
print(f"Entities: {len(d.get('entities', []))}")
print(f"Relationships: {len(d.get('relationships', []))}")
print("\nDone!")
if __name__ == "__main__":
asyncio.run(main())
+77
View File
@@ -0,0 +1,77 @@
import httpx
import asyncio
async def main():
base_url = "http://localhost:9621"
async with httpx.AsyncClient(timeout=120.0) as client:
print("--- INSERT ---")
docs = [
"Python is a high-level programming language known for its simplicity and readability.",
"JavaScript was created in 1995 by Brendan Eich at Netscape.",
"Machine learning is a subset of artificial intelligence that enables systems to learn from data.",
"LightRAG combines knowledge graph and vector retrieval for enhanced RAG applications.",
"FastMCP is a framework for building MCP servers in Python.",
]
r = await client.post(f"{base_url}/documents/texts", json={"texts": docs})
r.raise_for_status()
print(f"Inserted: {r.json()}")
print("\n--- QUERY (mix mode) ---")
r = await client.post(
f"{base_url}/query/data",
json={
"query": "Tell me about programming languages",
"mode": "mix",
"only_need_context": True,
"top_k": 60,
},
)
r.raise_for_status()
result = r.json()
print(f"mode=mix keys: {list(result.keys())}")
if "chunks" in result:
print(f" chunks: {len(result['chunks'])} returned")
for c in result["chunks"][:2]:
print(f" - {c.get('content', '')[:100]}...")
print("\n--- QUERY (local mode) ---")
r = await client.post(
f"{base_url}/query/data",
json={
"query": "What is Python?",
"mode": "local",
"only_need_context": True,
"top_k": 60,
},
)
r.raise_for_status()
result = r.json()
print(f"mode=local keys: {list(result.keys())}")
if "chunks" in result:
print(f" chunks: {len(result['chunks'])} returned")
print("\n--- QUERY (global mode) ---")
r = await client.post(
f"{base_url}/query/data",
json={
"query": "What topics are covered?",
"mode": "global",
"only_need_context": True,
"top_k": 60,
},
)
r.raise_for_status()
result = r.json()
print(f"mode=global keys: {list(result.keys())}")
if "entities" in result:
print(f" entities: {len(result['entities'])} returned")
if "relationships" in result:
print(f" relationships: {len(result['relationships'])} returned")
print("\nDone!")
if __name__ == "__main__":
asyncio.run(main())
+3048
View File
File diff suppressed because it is too large Load Diff