feat: add lightrag-mcp MCP server + agent tooling

- Add AGENTS.md with repo guidelines - Add lightrag-mcp: FastMCP server exposing insert_documents() + query_documents() to LLM agents via stdio transport, talks to LightRAG REST API - Add scripts/patch-vllm-cpu.py for CPU inference patching - Add .env.vllm for vLLM configuration - Update flake.nix with expanded dev shell - Update .env.lightrag - Remove CLAUDE.md (replaced by AGENTS.md)
2026-04-19 21:46:47 +08:00
parent c5dc2cf637
commit 4495a3cc62
14 changed files with 3510 additions and 100 deletions
@@ -0,0 +1,3 @@
+OPENAI_API_KEY=your-openai-api-key-here
+LIGHTRAG_WORKING_DIR=./lightrag_workspace
+LIGHTRAG_EMBEDDING_MODEL=text-embedding-3-small
@@ -0,0 +1 @@
+3.10
@@ -0,0 +1,57 @@
+import os
+import httpx
+from fastmcp import FastMCP
+
+LIGHTRAG_URL = os.getenv("LIGHTRAG_URL", "http://localhost:9621")
+
+mcp = FastMCP("LightRAG")
+
+
+@mcp.tool
+async def insert_documents(documents: list[str]) -> str:
+    """Insert text documents into LightRAG for indexing.
+
+    Args:
+        documents: List of document strings to index. Each string is treated as a separate document.
+
+    Returns:
+        Tracking ID for the insertion operation.
+    """
+    async with httpx.AsyncClient(timeout=120.0) as client:
+        r = await client.post(
+            f"{LIGHTRAG_URL}/documents/texts",
+            json={"texts": documents},
+        )
+        r.raise_for_status()
+        data = r.json()
+        return data.get("track_id", data.get("message", "unknown"))
+
+
+@mcp.tool
+async def query_documents(query: str, mode: str = "mix", top_k: int = 60) -> dict:
+    """Query LightRAG and retrieve relevant context without LLM generation.
+
+    Args:
+        query: The search query string.
+        mode: Retrieval mode - "local", "global", "hybrid", "naive", "mix" (default: "mix").
+        top_k: Number of top results to retrieve (default: 60).
+
+    Returns:
+        Structured retrieval data including entities, relationships, and text chunks.
+    """
+    async with httpx.AsyncClient(timeout=120.0) as client:
+        r = await client.post(
+            f"{LIGHTRAG_URL}/query/data",
+            json={
+                "query": query,
+                "mode": mode,
+                "only_need_context": True,
+                "top_k": top_k,
+            },
+        )
+        r.raise_for_status()
+        return r.json()
+
+
+if __name__ == "__main__":
+    mcp.run()
@@ -0,0 +1,11 @@
+[project]
+name = "lightrag-mcp"
+version = "0.1.0"
+description = "Add your description here"
+readme = "README.md"
+requires-python = ">=3.10"
+dependencies = [
+    "fastmcp>=3.2.4",
+    "httpx>=0.28.1",
+    "lightrag-hku>=1.4.15",
+]
@@ -0,0 +1,83 @@
+import asyncio
+import os
+from mcp import ClientSession, StdioServerParameters
+from mcp.client.stdio import stdio_client
+
+
+async def main():
+    server = StdioServerParameters(
+        command="uv",
+        args=[
+            "run",
+            "--directory",
+            "/home/df/projects/rags/lightrag-mcp",
+            "python",
+            "main.py",
+        ],
+    )
+
+    async with stdio_client(server) as (read, write):
+        async with ClientSession(read, write) as session:
+            await session.initialize()
+
+            print("--- INSERT ---")
+            result = await session.call_tool(
+                "insert_documents",
+                arguments={
+                    "documents": [
+                        "Python is a high-level programming language known for its simplicity and readability.",
+                        "JavaScript was created in 1995 by Brendan Eich at Netscape.",
+                        "Machine learning is a subset of artificial intelligence that enables systems to learn from data.",
+                    ]
+                },
+            )
+            print(f"Insert result: {result.content[0].text[:200]}")
+
+            print("\n--- QUERY (mix) ---")
+            result = await session.call_tool(
+                "query_documents",
+                arguments={
+                    "query": "Tell me about programming languages",
+                    "mode": "mix",
+                    "top_k": 60,
+                },
+            )
+            import json
+
+            data = json.loads(result.content[0].text)
+            d = data.get("data", {})
+            print(f"Entities: {len(d.get('entities', []))}")
+            print(f"Relationships: {len(d.get('relationships', []))}")
+            print(f"Chunks: {len(d.get('chunks', []))}")
+            for c in d.get("chunks", [])[:2]:
+                print(f"  - {c.get('content', '')[:100]}")
+
+            print("\n--- QUERY (local) ---")
+            result = await session.call_tool(
+                "query_documents",
+                arguments={"query": "What is Python?", "mode": "local", "top_k": 60},
+            )
+            data = json.loads(result.content[0].text)
+            d = data.get("data", {})
+            print(f"Entities: {len(d.get('entities', []))}")
+            print(f"Chunks: {len(d.get('chunks', []))}")
+
+            print("\n--- QUERY (global) ---")
+            result = await session.call_tool(
+                "query_documents",
+                arguments={
+                    "query": "What topics are covered?",
+                    "mode": "global",
+                    "top_k": 60,
+                },
+            )
+            data = json.loads(result.content[0].text)
+            d = data.get("data", {})
+            print(f"Entities: {len(d.get('entities', []))}")
+            print(f"Relationships: {len(d.get('relationships', []))}")
+
+    print("\nDone!")
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
@@ -0,0 +1,77 @@
+import httpx
+import asyncio
+
+
+async def main():
+    base_url = "http://localhost:9621"
+
+    async with httpx.AsyncClient(timeout=120.0) as client:
+        print("--- INSERT ---")
+        docs = [
+            "Python is a high-level programming language known for its simplicity and readability.",
+            "JavaScript was created in 1995 by Brendan Eich at Netscape.",
+            "Machine learning is a subset of artificial intelligence that enables systems to learn from data.",
+            "LightRAG combines knowledge graph and vector retrieval for enhanced RAG applications.",
+            "FastMCP is a framework for building MCP servers in Python.",
+        ]
+        r = await client.post(f"{base_url}/documents/texts", json={"texts": docs})
+        r.raise_for_status()
+        print(f"Inserted: {r.json()}")
+
+        print("\n--- QUERY (mix mode) ---")
+        r = await client.post(
+            f"{base_url}/query/data",
+            json={
+                "query": "Tell me about programming languages",
+                "mode": "mix",
+                "only_need_context": True,
+                "top_k": 60,
+            },
+        )
+        r.raise_for_status()
+        result = r.json()
+        print(f"mode=mix keys: {list(result.keys())}")
+        if "chunks" in result:
+            print(f"  chunks: {len(result['chunks'])} returned")
+            for c in result["chunks"][:2]:
+                print(f"    - {c.get('content', '')[:100]}...")
+
+        print("\n--- QUERY (local mode) ---")
+        r = await client.post(
+            f"{base_url}/query/data",
+            json={
+                "query": "What is Python?",
+                "mode": "local",
+                "only_need_context": True,
+                "top_k": 60,
+            },
+        )
+        r.raise_for_status()
+        result = r.json()
+        print(f"mode=local keys: {list(result.keys())}")
+        if "chunks" in result:
+            print(f"  chunks: {len(result['chunks'])} returned")
+
+        print("\n--- QUERY (global mode) ---")
+        r = await client.post(
+            f"{base_url}/query/data",
+            json={
+                "query": "What topics are covered?",
+                "mode": "global",
+                "only_need_context": True,
+                "top_k": 60,
+            },
+        )
+        r.raise_for_status()
+        result = r.json()
+        print(f"mode=global keys: {list(result.keys())}")
+        if "entities" in result:
+            print(f"  entities: {len(result['entities'])} returned")
+        if "relationships" in result:
+            print(f"  relationships: {len(result['relationships'])} returned")
+
+    print("\nDone!")
+
+
+if __name__ == "__main__":
+    asyncio.run(main())