Mini Project: AI Image Generation App

Build a web application for AI image generation using Stable Diffusion XL with a FastAPI backend and Gradio frontend. Features: text-to-image, image-to-image, prompt enhancement with GPT-4o, and gallery storage.

60 min•By Priygop Team•Updated 2026

AI Image Generation FastAPI App

from fastapi import FastAPI, HTTPException
from fastapi.responses import Response
from pydantic import BaseModel, Field
from diffusers import DiffusionPipeline, StableDiffusionImg2ImgPipeline
import torch
import io
from PIL import Image
import base64
from openai import OpenAI
import uuid, os

app = FastAPI(title="AI Image Generation API")
openai_client = OpenAI()

# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
# MODEL LOADING (on startup)
# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
device = "cuda" if torch.cuda.is_available() else "cpu"
txt2img_pipe = DiffusionPipeline.from_pretrained(
    "stabilityai/stable-diffusion-xl-base-1.0",
    torch_dtype=torch.float16, variant="fp16",
).to(device)
txt2img_pipe.enable_vae_slicing()
os.makedirs("generated_images", exist_ok=True)

# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
# PROMPT ENHANCEMENT
# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
def enhance_prompt(user_prompt: str) -> str:
    '''Use GPT-4o to enhance a simple prompt for better SD output.'''
    response = openai_client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[{
            "role": "system",
            "content": '''You are an expert at writing Stable Diffusion prompts.
Enhance the given prompt with descriptive details about lighting, style, quality.
Do NOT change the subject or concept. Output ONLY the enhanced prompt, nothing else.
Keep it under 75 words.'''
        }, {
            "role": "user",
            "content": f"Enhance this prompt: '{user_prompt}'"
        }],
        max_tokens=100, temperature=0.7,
    )
    return response.choices[0].message.content.strip()

# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
# API ENDPOINTS
# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
class GenerationRequest(BaseModel):
    prompt: str = Field(..., min_length=3, max_length=500)
    negative_prompt: str = "blurry, low quality, distorted, watermark, text"
    steps: int = Field(default=25, ge=10, le=50)
    guidance_scale: float = Field(default=7.5, ge=1.0, le=20.0)
    width: int = Field(default=1024, ge=512, le=1024)
    height: int = Field(default=1024, ge=512, le=1024)
    enhance_prompt: bool = True
    seed: int = -1

@app.post("/generate")
async def generate_image(request: GenerationRequest):
    try:
        prompt = request.prompt
        if request.enhance_prompt:
            prompt = enhance_prompt(prompt)
            print(f"Enhanced prompt: {prompt}")

        generator = torch.Generator(device).manual_seed(
            request.seed if request.seed >= 0 else torch.randint(0, 2**31, (1,)).item()
        )

        image = txt2img_pipe(
            prompt=prompt,
            negative_prompt=request.negative_prompt,
            num_inference_steps=request.steps,
            guidance_scale=request.guidance_scale,
            width=request.width, height=request.height,
            generator=generator,
        ).images[0]

        # Save and return as base64
        img_id = str(uuid.uuid4())[:8]
        img_path = f"generated_images/{img_id}.png"
        image.save(img_path)

        buffer = io.BytesIO()
        image.save(buffer, format="PNG")
        img_b64 = base64.b64encode(buffer.getvalue()).decode()

        return {"id": img_id, "enhanced_prompt": prompt, "image_base64": img_b64, "saved_path": img_path}

    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))

@app.get("/gallery")
async def get_gallery() -> dict:
    '''List all generated images.'''
    images = [f for f in os.listdir("generated_images") if f.endswith(".png")]
    return {"count": len(images), "images": sorted(images, reverse=True)[:20]}

# Run: uvicorn app:app --host 0.0.0.0 --port 8000 --workers 1

Tip

Practice Mini Project AI Image Generation App in small, isolated examples before integrating into larger projects. Breaking concepts into small experiments builds genuine understanding faster than reading alone.

Diagram

Loading diagram…

Better prompts = better AI output. Structure, examples, and constraints matter.

Practice Task

Note

Practice Task — (1) Write a working example of Mini Project AI Image Generation App from scratch without looking at notes. (2) Modify it to handle an edge case (empty input, null value, or error state). (3) Share your solution in the Priygop community for feedback.

Quick Quiz

Common Mistake

Warning

A common mistake with Mini Project AI Image Generation App is skipping edge case testing — empty inputs, null values, and unexpected data types. Always validate boundary conditions to write robust, production-ready ai code.

Topics in This Module

AI Image Generation FastAPI App

from fastapi import FastAPI, HTTPException
from fastapi.responses import Response
from pydantic import BaseModel, Field
from diffusers import DiffusionPipeline, StableDiffusionImg2ImgPipeline
import torch
import io
from PIL import Image
import base64
from openai import OpenAI
import uuid, os

app = FastAPI(title="AI Image Generation API")
openai_client = OpenAI()

# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
# MODEL LOADING (on startup)
# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
device = "cuda" if torch.cuda.is_available() else "cpu"
txt2img_pipe = DiffusionPipeline.from_pretrained(
    "stabilityai/stable-diffusion-xl-base-1.0",
    torch_dtype=torch.float16, variant="fp16",
).to(device)
txt2img_pipe.enable_vae_slicing()
os.makedirs("generated_images", exist_ok=True)

# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
# PROMPT ENHANCEMENT
# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
def enhance_prompt(user_prompt: str) -> str:
    '''Use GPT-4o to enhance a simple prompt for better SD output.'''
    response = openai_client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[{
            "role": "system",
            "content": '''You are an expert at writing Stable Diffusion prompts.
Enhance the given prompt with descriptive details about lighting, style, quality.
Do NOT change the subject or concept. Output ONLY the enhanced prompt, nothing else.
Keep it under 75 words.'''
        }, {
            "role": "user",
            "content": f"Enhance this prompt: '{user_prompt}'"
        }],
        max_tokens=100, temperature=0.7,
    )
    return response.choices[0].message.content.strip()

# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
# API ENDPOINTS
# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
class GenerationRequest(BaseModel):
    prompt: str = Field(..., min_length=3, max_length=500)
    negative_prompt: str = "blurry, low quality, distorted, watermark, text"
    steps: int = Field(default=25, ge=10, le=50)
    guidance_scale: float = Field(default=7.5, ge=1.0, le=20.0)
    width: int = Field(default=1024, ge=512, le=1024)
    height: int = Field(default=1024, ge=512, le=1024)
    enhance_prompt: bool = True
    seed: int = -1

@app.post("/generate")
async def generate_image(request: GenerationRequest):
    try:
        prompt = request.prompt
        if request.enhance_prompt:
            prompt = enhance_prompt(prompt)
            print(f"Enhanced prompt: {prompt}")

        generator = torch.Generator(device).manual_seed(
            request.seed if request.seed >= 0 else torch.randint(0, 2**31, (1,)).item()
        )

        image = txt2img_pipe(
            prompt=prompt,
            negative_prompt=request.negative_prompt,
            num_inference_steps=request.steps,
            guidance_scale=request.guidance_scale,
            width=request.width, height=request.height,
            generator=generator,
        ).images[0]

        # Save and return as base64
        img_id = str(uuid.uuid4())[:8]
        img_path = f"generated_images/{img_id}.png"
        image.save(img_path)

        buffer = io.BytesIO()
        image.save(buffer, format="PNG")
        img_b64 = base64.b64encode(buffer.getvalue()).decode()

        return {"id": img_id, "enhanced_prompt": prompt, "image_base64": img_b64, "saved_path": img_path}

    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))

@app.get("/gallery")
async def get_gallery() -> dict:
    '''List all generated images.'''
    images = [f for f in os.listdir("generated_images") if f.endswith(".png")]
    return {"count": len(images), "images": sorted(images, reverse=True)[:20]}

# Run: uvicorn app:app --host 0.0.0.0 --port 8000 --workers 1

Tip

Diagram

Loading diagram…

Better prompts = better AI output. Structure, examples, and constraints matter.

Topics in This Module