LLM Coding Assistant

Purpose

End-to-end implementation of an LLM-powered coding assistant that uses MCP tools for file system access, RAG for codebase retrieval, and an agentic loop for multi-step code generation and refactoring. Synthesized from: MCP Protocol, Agentic Coding, Repo RAG for Code, LLM Code Generation, RAG Architecture, Agentic Loop.

Architecture

IDE / CLI interface
    │
    │  User request: "Add auth middleware to the FastAPI app"
    ▼
Agentic loop (Claude / GPT-4o)
    │
    ├── Plan: read relevant files → understand structure → write patch
    │
    ├─► MCP Tool calls:
    │     ├── search_code("middleware")     ← ripgrep search
    │     ├── read_file("src/main.py")      ← read current code
    │     ├── read_file("tests/test_api.py")
    │     └── run_tests("tests/")           ← verify current tests pass
    │
    ├─► RAG retrieval:
    │     ├── embed query "FastAPI auth middleware JWT"
    │     ├── vector search → top-k code chunks
    │     └── inject into context: relevant patterns from codebase + docs
    │
    ├─► LLM generates patch (structured output via instructor)
    │
    └─► Tool call: write_file("src/middleware/auth.py", code)
             + run_tests("tests/") ← verify tests still pass

Final response: diff + explanation to user

Component stack:

LayerTechnologyRole
LLMClaude 3.5 / GPT-4oCode generation, planning
Agent frameworkMCP + custom loopTool orchestration
Code retrievalFAISS + code embeddingsCodebase-aware context
Structured outputinstructor + PydanticType-safe LLM responses
Sandboxingsubprocess + path guardsSafe tool execution

Examples

1. Code RAG index (rag/indexer.py):

import ast, pathlib
from sentence_transformers import SentenceTransformer
import faiss, numpy as np, json
 
MODEL = SentenceTransformer("jinaai/jina-embeddings-v2-base-code")
 
def chunk_python_file(path: pathlib.Path) -> list[dict]:
    """Split a Python file into function/class chunks."""
    source = path.read_text()
    try:
        tree = ast.parse(source)
    except SyntaxError:
        return [{"path": str(path), "code": source, "name": str(path.name)}]
    chunks = []
    for node in ast.walk(tree):
        if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef, ast.ClassDef)):
            lines = source.splitlines()
            chunk_lines = lines[node.lineno - 1 : node.end_lineno]
            chunks.append({
                "path": str(path),
                "name": node.name,
                "code": "\n".join(chunk_lines),
                "lineno": node.lineno,
            })
    return chunks or [{"path": str(path), "code": source, "name": str(path.name)}]
 
def build_index(repo_root: str, index_path: str):
    repo = pathlib.Path(repo_root)
    all_chunks = []
    for py_file in repo.rglob("*.py"):
        if "/.venv" in str(py_file) or "/node_modules" in str(py_file):
            continue
        all_chunks.extend(chunk_python_file(py_file))
 
    texts = [f"# {c['path']}:{c['name']}\n{c['code']}" for c in all_chunks]
    embeddings = MODEL.encode(texts, normalize_embeddings=True, show_progress_bar=True)
    embeddings = np.array(embeddings, dtype="float32")
 
    index = faiss.IndexFlatIP(embeddings.shape[1])   # inner product = cosine on normalised
    index.add(embeddings)
 
    faiss.write_index(index, f"{index_path}.faiss")
    with open(f"{index_path}.meta.json", "w") as f:
        json.dump(all_chunks, f)
 
    print(f"Indexed {len(all_chunks)} chunks from {repo_root}")

2. RAG retrieval (rag/retriever.py):

import faiss, json, numpy as np
from sentence_transformers import SentenceTransformer
 
class CodeRetriever:
    def __init__(self, index_path: str):
        self.model = SentenceTransformer("jinaai/jina-embeddings-v2-base-code")
        self.index = faiss.read_index(f"{index_path}.faiss")
        with open(f"{index_path}.meta.json") as f:
            self.chunks = json.load(f)
 
    def search(self, query: str, top_k: int = 5) -> list[dict]:
        emb = self.model.encode([query], normalize_embeddings=True)
        emb = np.array(emb, dtype="float32")
        scores, idxs = self.index.search(emb, top_k)
        return [
            {**self.chunks[i], "score": float(scores[0][j])}
            for j, i in enumerate(idxs[0])
            if i < len(self.chunks)
        ]

3. Agentic loop with MCP tools (agent/loop.py):

import anthropic, json
from rag.retriever import CodeRetriever
from mcp_client import MCPClient   # wraps the MCP server we built
 
client = anthropic.Anthropic()
retriever = CodeRetriever("/workspace/.mcp/index")
mcp = MCPClient("/workspace/.mcp/server.py")
 
SYSTEM = """You are an expert software engineer working inside a code repository.
You have access to tools to read/write files, run tests, and search code.
Before writing code, read the relevant files. After writing, run tests to verify correctness.
"""
 
def run_agent(user_request: str, max_turns: int = 10):
    # Augment with RAG context
    rag_hits = retriever.search(user_request, top_k=4)
    rag_context = "\n\n".join(
        f"```python\n# {h['path']}:{h['name']}\n{h['code']}\n```" for h in rag_hits
    )
 
    messages = [{
        "role": "user",
        "content": f"{user_request}\n\nRelevant codebase context:\n{rag_context}"
    }]
    tools = mcp.list_tools_as_anthropic_schema()
 
    for _ in range(max_turns):
        response = client.messages.create(
            model="claude-3-5-sonnet-20241022",
            max_tokens=4096,
            system=SYSTEM,
            tools=tools,
            messages=messages,
        )
        messages.append({"role": "assistant", "content": response.content})
 
        if response.stop_reason == "end_turn":
            # Extract final text response
            for block in response.content:
                if block.type == "text":
                    return block.text
            return "Done"
 
        # Handle tool calls
        tool_results = []
        for block in response.content:
            if block.type == "tool_use":
                result = mcp.call_tool(block.name, block.input)
                tool_results.append({
                    "type": "tool_result",
                    "tool_use_id": block.id,
                    "content": result,
                })
        messages.append({"role": "user", "content": tool_results})
 
    return "Max turns reached"

4. CLI entrypoint (main.py):

import sys
from agent.loop import run_agent
 
if __name__ == "__main__":
    request = " ".join(sys.argv[1:]) or input("Request: ")
    print(run_agent(request))

Usage:

# Build codebase index
python -m rag.indexer --repo /workspace --output /workspace/.mcp/index
 
# Run assistant
python main.py "Add rate limiting middleware to the FastAPI app"
python main.py "Write a pytest test for the predict endpoint"
python main.py "Refactor UserService to use async/await throughout"

References