Mini Project: AI Customer Support Bot with RAG
Build a production customer support bot: ingests FAQ documents into ChromaDB, uses RAG to retrieve relevant answers, maintains conversation history, falls back gracefully when the answer is not in the knowledge base, and streams responses. Complete FastAPI backend included.
Production RAG Customer Support Bot
from fastapi import FastAPI, HTTPException
from fastapi.responses import StreamingResponse
from pydantic import BaseModel
from openai import OpenAI
import chromadb
from chromadb.utils.embedding_functions import OpenAIEmbeddingFunction
from typing import AsyncGenerator
import json, uuid
app = FastAPI(title="AI Customer Support API")
client = OpenAI()
# ─── VECTOR STORE SETUP ───────────────────────────────
embedding_fn = OpenAIEmbeddingFunction(
api_key="your-api-key", # in production: os.environ["OPENAI_API_KEY"]
model_name="text-embedding-3-small",
)
chroma_client = chromadb.PersistentClient(path="./chroma_db")
collection = chroma_client.get_or_create_collection(
"support_knowledge",
embedding_function=embedding_fn,
metadata={"hnsw:space": "cosine"},
)
# ─── INGEST KNOWLEDGE BASE ────────────────────────────
def ingest_documents(documents: list[dict]) -> None:
"""Add documents to the vector store."""
existing_ids = set(collection.get()["ids"])
new_docs = [d for d in documents if d["id"] not in existing_ids]
if new_docs:
collection.add(
ids=[d["id"] for d in new_docs],
documents=[d["text"] for d in new_docs],
metadatas=[{"source": d.get("source", "faq"), "category": d.get("category", "general")} for d in new_docs],
)
print(f"Ingested {len(new_docs)} new documents")
# Sample knowledge base
faq_docs = [
{"id": "faq-001", "text": "To reset your password: go to login page → click 'Forgot Password' → enter email → check inbox for reset link.", "category": "account"},
{"id": "faq-002", "text": "Subscription plans: Basic ($9/mo, 5 projects), Pro ($19/mo, unlimited projects), Enterprise ($99/mo, team features, SLA).", "category": "billing"},
{"id": "faq-003", "text": "To cancel your subscription: Account Settings → Billing → Cancel Subscription. You keep access until end of billing period.", "category": "billing"},
{"id": "faq-004", "text": "API rate limits: Free tier 100 req/min, Pro 1000 req/min, Enterprise unlimited. Rate limit headers in every response.", "category": "technical"},
{"id": "faq-005", "text": "Data export: Account Settings → Data → Export. Downloads all your data as JSON/CSV within 24 hours.", "category": "account"},
]
ingest_documents(faq_docs)
# ─── SESSION STATE (in-memory; use Redis for production) ─
sessions: dict[str, list[dict]] = {}
# ─── API MODELS ──────────────────────────────────────
class ChatRequest(BaseModel):
session_id: str = ""
message: str
class ChatResponse(BaseModel):
session_id: str
answer: str
sources: list[str]
# ─── RAG + CHAT ENDPOINT ──────────────────────────────
@app.post("/chat", response_model=ChatResponse)
async def chat(request: ChatRequest):
session_id = request.session_id or str(uuid.uuid4())
history = sessions.setdefault(session_id, [])
# 1. Retrieve relevant documents
results = collection.query(
query_texts=[request.message], n_results=3,
include=["documents", "metadatas", "distances"]
)
retrieved_docs = results["documents"][0]
retrieved_meta = results["metadatas"][0]
distances = results["distances"][0]
# Filter: only use docs with cosine similarity > 0.3 (distance < 0.7)
relevant_docs = [(doc, meta) for doc, meta, dist in zip(retrieved_docs, retrieved_meta, distances) if dist < 0.7]
if not relevant_docs:
context_str = "No specific documentation found."
sources = []
else:
context_str = "\n\n".join(f"[FAQ - {m['category']}]\n{d}" for d, m in relevant_docs)
sources = [m["category"] for _, m in relevant_docs]
# 2. Build augmented prompt
system_prompt = f"""You are a helpful customer support agent.
Answer ONLY from the provided context. Say "I don't have that information, let me connect you to a human agent" if the answer isn't in the context.
Be concise, friendly, and professional.
Context:
{context_str}"""
messages = [{"role": "system", "content": system_prompt}]
messages.extend(history[-10:]) # last 5 exchanges
messages.append({"role": "user", "content": request.message})
# 3. Generate response
response = client.chat.completions.create(
model="gpt-4o-mini", messages=messages, temperature=0.3, max_tokens=300
)
answer = response.choices[0].message.content
# 4. Update conversation history
history.extend([{"role": "user", "content": request.message}, {"role": "assistant", "content": answer}])
sessions[session_id] = history
return ChatResponse(session_id=session_id, answer=answer, sources=sources)
# uvicorn solution:module-7:app --reload --port 8000Tip
Tip
Practice Mini Project AI Customer Support Bot with RAG in small, isolated examples before integrating into larger projects. Breaking concepts into small experiments builds genuine understanding faster than reading alone.
RAG = search + generate. Reduces hallucinations. Vector DB stores knowledge. LLM reasons over retrieved context.
Practice Task
Note
Practice Task — (1) Write a working example of Mini Project AI Customer Support Bot with RAG from scratch without looking at notes. (2) Modify it to handle an edge case (empty input, null value, or error state). (3) Share your solution in the Priygop community for feedback.
Quick Quiz
Common Mistake
Warning
A common mistake with Mini Project AI Customer Support Bot with RAG is skipping edge case testing — empty inputs, null values, and unexpected data types. Always validate boundary conditions to write robust, production-ready ai code.