LLM Grounding & Citations#
An answer without a source is just an opinion. Ground your LLM to the documents it retrieved.
What Is Grounding?#
Grounding means forcing the LLM to:
- Only use retrieved context — no hallucinated facts
- Cite every claim — map each sentence to a source chunk
- Admit uncertainty — say “I don’t know” when context is insufficient
System Prompt for Grounded Answers#
The simplest form of grounding is a strict system prompt:
GROUNDED_SYSTEM_PROMPT = """
You are a precise research assistant. You MUST follow these rules:
1. Answer ONLY using the provided context chunks.
2. If the answer is not in the context, say: "I don't have enough information to answer this."
3. Cite your sources using [CHUNK_N] notation after every factual claim.
4. Do not combine information from different chunks into a single claim without citing both.
5. Do not add background knowledge not present in the context.
"""
def grounded_answer(query: str, context_chunks: list[str]) -> str:
from openai import OpenAI
client = OpenAI()
# Format context with chunk IDs
formatted_context = "\n\n".join(
f"[CHUNK_{i+1}]: {chunk}"
for i, chunk in enumerate(context_chunks)
)
response = client.chat.completions.create(
model="gpt-4o-mini",
messages=[
{"role": "system", "content": GROUNDED_SYSTEM_PROMPT},
{
"role": "user",
"content": f"Context:\n{formatted_context}\n\nQuestion: {query}"
}
],
temperature=0,
)
return response.choices[0].message.content
# Example
chunks = [
"FAISS was developed by Facebook AI Research in 2017.",
"FAISS supports both CPU and GPU index types.",
"The HNSW algorithm provides approximate nearest-neighbor search.",
]
answer = grounded_answer("Who made FAISS and does it support GPU?", chunks)
print(answer)
# "FAISS was developed by Facebook AI Research [CHUNK_1]. It supports both CPU and GPU [CHUNK_2]."Structured Citations with Pydantic#
Return structured answers where every claim has a source:
from pydantic import BaseModel
from openai import OpenAI
import json
client = OpenAI()
class Citation(BaseModel):
chunk_id: int
quote: str # relevant excerpt from the chunk
class GroundedAnswer(BaseModel):
answer: str
citations: list[Citation]
confidence: float # 0-1, how confident the model is
def structured_grounded_answer(query: str, chunks: list[str]) -> GroundedAnswer:
formatted = "\n".join(f"[{i+1}] {c}" for i, c in enumerate(chunks))
response = client.chat.completions.create(
model="gpt-4o-mini",
messages=[
{
"role": "system",
"content": (
"Answer the question using the numbered context chunks. "
"Return JSON with fields: answer (string), "
"citations (list of {chunk_id, quote}), confidence (0-1)."
)
},
{"role": "user", "content": f"Context:\n{formatted}\n\nQuestion: {query}"}
],
response_format={"type": "json_object"},
temperature=0,
)
data = json.loads(response.choices[0].message.content)
return GroundedAnswer(**data)
result = structured_grounded_answer(
"What does FAISS support?",
["FAISS supports GPU acceleration.", "FAISS handles billion-scale vector search."]
)
print(f"Answer: {result.answer}")
print(f"Confidence: {result.confidence}")
for c in result.citations:
print(f" [Chunk {c.chunk_id}]: {c.quote}")Self-Verification Prompt#
Ask the LLM to verify its own answer against the context:
def self_verify(query: str, answer: str, context_chunks: list[str]) -> dict:
"""Ask the LLM to check if its answer is supported by the context."""
context = "\n\n".join(context_chunks)
response = client.chat.completions.create(
model="gpt-4o-mini",
messages=[
{
"role": "system",
"content": (
"You are a fact-checker. Given a question, an answer, and context chunks, "
"determine if every claim in the answer is supported by the context. "
"Return JSON: {supported: bool, unsupported_claims: list[str], explanation: str}"
)
},
{
"role": "user",
"content": (
f"Question: {query}\n\n"
f"Answer: {answer}\n\n"
f"Context:\n{context}"
)
}
],
response_format={"type": "json_object"},
temperature=0,
)
return json.loads(response.choices[0].message.content)