Object Detection — YOLO Concepts

Classification answers 'WHAT is in this image?'. Detection answers 'WHAT and WHERE?'. YOLO (You Only Look Once) made real-time object detection practical by treating detection as a single regression problem — no separate region proposal step.

20 min•By Priygop Team•Updated 2026

Object Detection with YOLOv8

# pip install ultralytics
from ultralytics import YOLO
import torch

# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
# YOLO v8 — State of the art real-time detection (2023)
# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

# Load pre-trained YOLO v8 (trained on COCO — 80 classes)
model = YOLO("yolov8n.pt")  # n=nano (fastest), s/m/l/x = larger, more accurate

# ── Inference on an image ──────────────────────────────
results = model("https://ultralytics.com/images/bus.jpg")
for r in results:
    boxes = r.boxes                    # bounding box objects
    for box in boxes:
        cls  = int(box.cls[0])         # class index
        conf = float(box.conf[0])      # confidence score
        xyxy = box.xyxy[0].tolist()    # [x1, y1, x2, y2] pixel coordinates
        name = model.names[cls]        # class name
        print(f"Detected: {name:12s} | conf={conf:.2%} | box={[round(c) for c in xyxy]}")

# ── Fine-tune on custom dataset ───────────────────────
# Dataset format (YOLO format — one .txt per image):
# class_id cx cy width height  (all normalized 0-1)
# 0 0.45 0.62 0.30 0.45
# 1 0.78 0.23 0.15 0.28

# data.yaml:
# path: ./custom_dataset
# train: images/train
# val: images/val
# names: {0: 'car', 1: 'pedestrian', 2: 'cyclist'}

results = model.train(
    data="data.yaml",
    epochs=100,
    imgsz=640,
    batch=16,
    device=0 if torch.cuda.is_available() else "cpu",
    project="runs/detect",
    name="yolo_custom_v1",
    patience=30,         # early stopping
    save_period=10,      # save checkpoint every 10 epochs
    augment=True,        # mosaic, copy-paste augmentation
)

# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
# HOW YOLO WORKS — Key Concepts
# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

yolo_concepts = {
    "Grid cells": "Divide image into SxS grid. Each cell predicts B boxes.",
    "Anchor boxes": "Predefined box shapes matched to detected object shapes.",
    "Bounding box regression": "Predict (dx, dy, dw, dh) offset from anchor box.",
    "Objectness score": "Probability that a box contains any object (IoU with GT).",
    "Class probability": "P(class | object) for each of 80 COCO classes.",
    "IoU (Intersection over Union)": "Overlap metric. IoU > 0.5 = correct detection.",
    "NMS (Non-Max Suppression)": "Remove duplicate detections for same object.",
}

# Evaluation metrics:
detection_metrics = {
    "mAP@0.5":    "mean Average Precision at IoU=0.5 threshold — main benchmark",
    "mAP@0.5:0.95": "averaged over IoU 0.5 to 0.95 — stricter, COCO standard",
    "Precision":  "Of all detections, what fraction are correct?",
    "Recall":     "Of all ground truth objects, what fraction did we find?",
    "FPS":        "Frames per second — real-time needs >24 FPS",
}

print("YOLO v8 variants (speed vs accuracy tradeoff):")
variants = {
    "yolov8n": {"params": "3.2M",  "mAP": "37.3", "FPS_GPU": "1208"},
    "yolov8s": {"params": "11.2M", "mAP": "44.9", "FPS_GPU": "702"},
    "yolov8m": {"params": "25.9M", "mAP": "50.2", "FPS_GPU": "383"},
    "yolov8l": {"params": "43.7M", "mAP": "52.9", "FPS_GPU": "258"},
    "yolov8x": {"params": "68.2M", "mAP": "53.9", "FPS_GPU": "178"},
}
for name, stats in variants.items():
    print(f"  {name}: {stats['params']:6s} params | mAP={stats['mAP']} | {stats['FPS_GPU']} FPS")

Tip

Practice Object Detection YOLO Concepts in small, isolated examples before integrating into larger projects. Breaking concepts into small experiments builds genuine understanding faster than reading alone.

Diagram

Loading diagram…

Better prompts = better AI output. Structure, examples, and constraints matter.

Practice Task

Note

Practice Task — (1) Write a working example of Object Detection YOLO Concepts from scratch without looking at notes. (2) Modify it to handle an edge case (empty input, null value, or error state). (3) Share your solution in the Priygop community for feedback.

Quick Quiz

Common Mistake

Warning

A common mistake with Object Detection YOLO Concepts is skipping edge case testing — empty inputs, null values, and unexpected data types. Always validate boundary conditions to write robust, production-ready ai code.

Topics in This Module

Object Detection with YOLOv8

# pip install ultralytics
from ultralytics import YOLO
import torch

# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
# YOLO v8 — State of the art real-time detection (2023)
# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

# Load pre-trained YOLO v8 (trained on COCO — 80 classes)
model = YOLO("yolov8n.pt")  # n=nano (fastest), s/m/l/x = larger, more accurate

# ── Inference on an image ──────────────────────────────
results = model("https://ultralytics.com/images/bus.jpg")
for r in results:
    boxes = r.boxes                    # bounding box objects
    for box in boxes:
        cls  = int(box.cls[0])         # class index
        conf = float(box.conf[0])      # confidence score
        xyxy = box.xyxy[0].tolist()    # [x1, y1, x2, y2] pixel coordinates
        name = model.names[cls]        # class name
        print(f"Detected: {name:12s} | conf={conf:.2%} | box={[round(c) for c in xyxy]}")

# ── Fine-tune on custom dataset ───────────────────────
# Dataset format (YOLO format — one .txt per image):
# class_id cx cy width height  (all normalized 0-1)
# 0 0.45 0.62 0.30 0.45
# 1 0.78 0.23 0.15 0.28

# data.yaml:
# path: ./custom_dataset
# train: images/train
# val: images/val
# names: {0: 'car', 1: 'pedestrian', 2: 'cyclist'}

results = model.train(
    data="data.yaml",
    epochs=100,
    imgsz=640,
    batch=16,
    device=0 if torch.cuda.is_available() else "cpu",
    project="runs/detect",
    name="yolo_custom_v1",
    patience=30,         # early stopping
    save_period=10,      # save checkpoint every 10 epochs
    augment=True,        # mosaic, copy-paste augmentation
)

# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
# HOW YOLO WORKS — Key Concepts
# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

yolo_concepts = {
    "Grid cells": "Divide image into SxS grid. Each cell predicts B boxes.",
    "Anchor boxes": "Predefined box shapes matched to detected object shapes.",
    "Bounding box regression": "Predict (dx, dy, dw, dh) offset from anchor box.",
    "Objectness score": "Probability that a box contains any object (IoU with GT).",
    "Class probability": "P(class | object) for each of 80 COCO classes.",
    "IoU (Intersection over Union)": "Overlap metric. IoU > 0.5 = correct detection.",
    "NMS (Non-Max Suppression)": "Remove duplicate detections for same object.",
}

# Evaluation metrics:
detection_metrics = {
    "mAP@0.5":    "mean Average Precision at IoU=0.5 threshold — main benchmark",
    "mAP@0.5:0.95": "averaged over IoU 0.5 to 0.95 — stricter, COCO standard",
    "Precision":  "Of all detections, what fraction are correct?",
    "Recall":     "Of all ground truth objects, what fraction did we find?",
    "FPS":        "Frames per second — real-time needs >24 FPS",
}

print("YOLO v8 variants (speed vs accuracy tradeoff):")
variants = {
    "yolov8n": {"params": "3.2M",  "mAP": "37.3", "FPS_GPU": "1208"},
    "yolov8s": {"params": "11.2M", "mAP": "44.9", "FPS_GPU": "702"},
    "yolov8m": {"params": "25.9M", "mAP": "50.2", "FPS_GPU": "383"},
    "yolov8l": {"params": "43.7M", "mAP": "52.9", "FPS_GPU": "258"},
    "yolov8x": {"params": "68.2M", "mAP": "53.9", "FPS_GPU": "178"},
}
for name, stats in variants.items():
    print(f"  {name}: {stats['params']:6s} params | mAP={stats['mAP']} | {stats['FPS_GPU']} FPS")

Tip

Diagram

Loading diagram…

Better prompts = better AI output. Structure, examples, and constraints matter.

Topics in This Module