Mini Project: AI Customer Support Bot with RAG

Build a production customer support bot: ingests FAQ documents into ChromaDB, uses RAG to retrieve relevant answers, maintains conversation history, falls back gracefully when the answer is not in the knowledge base, and streams responses. Complete FastAPI backend included.

60 min•By Priygop Team•Updated 2026

Production RAG Customer Support Bot

from fastapi import FastAPI, HTTPException
from fastapi.responses import StreamingResponse
from pydantic import BaseModel
from openai import OpenAI
import chromadb
from chromadb.utils.embedding_functions import OpenAIEmbeddingFunction
from typing import AsyncGenerator
import json, uuid

app = FastAPI(title="AI Customer Support API")
client = OpenAI()

# ─── VECTOR STORE SETUP ───────────────────────────────
embedding_fn = OpenAIEmbeddingFunction(
    api_key="your-api-key",  # in production: os.environ["OPENAI_API_KEY"]
    model_name="text-embedding-3-small",
)
chroma_client = chromadb.PersistentClient(path="./chroma_db")
collection = chroma_client.get_or_create_collection(
    "support_knowledge",
    embedding_function=embedding_fn,
    metadata={"hnsw:space": "cosine"},
)

# ─── INGEST KNOWLEDGE BASE ────────────────────────────
def ingest_documents(documents: list[dict]) -> None:
    """Add documents to the vector store."""
    existing_ids = set(collection.get()["ids"])
    new_docs = [d for d in documents if d["id"] not in existing_ids]
    if new_docs:
        collection.add(
            ids=[d["id"] for d in new_docs],
            documents=[d["text"] for d in new_docs],
            metadatas=[{"source": d.get("source", "faq"), "category": d.get("category", "general")} for d in new_docs],
        )
        print(f"Ingested {len(new_docs)} new documents")

# Sample knowledge base
faq_docs = [
    {"id": "faq-001", "text": "To reset your password: go to login page → click 'Forgot Password' → enter email → check inbox for reset link.", "category": "account"},
    {"id": "faq-002", "text": "Subscription plans: Basic ($9/mo, 5 projects), Pro ($19/mo, unlimited projects), Enterprise ($99/mo, team features, SLA).", "category": "billing"},
    {"id": "faq-003", "text": "To cancel your subscription: Account Settings → Billing → Cancel Subscription. You keep access until end of billing period.", "category": "billing"},
    {"id": "faq-004", "text": "API rate limits: Free tier 100 req/min, Pro 1000 req/min, Enterprise unlimited. Rate limit headers in every response.", "category": "technical"},
    {"id": "faq-005", "text": "Data export: Account Settings → Data → Export. Downloads all your data as JSON/CSV within 24 hours.", "category": "account"},
]
ingest_documents(faq_docs)

# ─── SESSION STATE (in-memory; use Redis for production) ─
sessions: dict[str, list[dict]] = {}

# ─── API MODELS ──────────────────────────────────────
class ChatRequest(BaseModel):
    session_id: str = ""
    message: str

class ChatResponse(BaseModel):
    session_id: str
    answer: str
    sources: list[str]

# ─── RAG + CHAT ENDPOINT ──────────────────────────────
@app.post("/chat", response_model=ChatResponse)
async def chat(request: ChatRequest):
    session_id = request.session_id or str(uuid.uuid4())
    history = sessions.setdefault(session_id, [])

    # 1. Retrieve relevant documents
    results = collection.query(
        query_texts=[request.message], n_results=3,
        include=["documents", "metadatas", "distances"]
    )
    retrieved_docs = results["documents"][0]
    retrieved_meta = results["metadatas"][0]
    distances = results["distances"][0]

    # Filter: only use docs with cosine similarity > 0.3 (distance < 0.7)
    relevant_docs = [(doc, meta) for doc, meta, dist in zip(retrieved_docs, retrieved_meta, distances) if dist < 0.7]

    if not relevant_docs:
        context_str = "No specific documentation found."
        sources = []
    else:
        context_str = "\n\n".join(f"[FAQ - {m['category']}]\n{d}" for d, m in relevant_docs)
        sources = [m["category"] for _, m in relevant_docs]

    # 2. Build augmented prompt
    system_prompt = f"""You are a helpful customer support agent.
Answer ONLY from the provided context. Say "I don't have that information, let me connect you to a human agent" if the answer isn't in the context.
Be concise, friendly, and professional.

Context:
{context_str}"""

    messages = [{"role": "system", "content": system_prompt}]
    messages.extend(history[-10:])  # last 5 exchanges
    messages.append({"role": "user", "content": request.message})

    # 3. Generate response
    response = client.chat.completions.create(
        model="gpt-4o-mini", messages=messages, temperature=0.3, max_tokens=300
    )
    answer = response.choices[0].message.content

    # 4. Update conversation history
    history.extend([{"role": "user", "content": request.message}, {"role": "assistant", "content": answer}])
    sessions[session_id] = history

    return ChatResponse(session_id=session_id, answer=answer, sources=sources)

# uvicorn solution:module-7:app --reload --port 8000

Tip

Practice Mini Project AI Customer Support Bot with RAG in small, isolated examples before integrating into larger projects. Breaking concepts into small experiments builds genuine understanding faster than reading alone.

Diagram

Loading diagram…

RAG = search + generate. Reduces hallucinations. Vector DB stores knowledge. LLM reasons over retrieved context.

Practice Task

Note

Practice Task — (1) Write a working example of Mini Project AI Customer Support Bot with RAG from scratch without looking at notes. (2) Modify it to handle an edge case (empty input, null value, or error state). (3) Share your solution in the Priygop community for feedback.

Quick Quiz

Common Mistake

Warning

A common mistake with Mini Project AI Customer Support Bot with RAG is skipping edge case testing — empty inputs, null values, and unexpected data types. Always validate boundary conditions to write robust, production-ready ai code.

Topics in This Module