CNN Architectures — LeNet to EfficientNet

CNN architecture design has evolved dramatically since LeNet (1998). Understanding WHY each architecture was invented — ResNet's residual connections to solve vanishing gradients, VGG's uniform 3×3 convolutions, EfficientNet's compound scaling — helps you choose the right backbone and design custom architectures.

25 min•By Priygop Team•Updated 2026

Building CNN Architectures from Scratch

import torch
import torch.nn as nn

# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
# The classic CNN pattern: Conv → BN → ReLU → Pool
# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

def conv_bn_relu(in_c: int, out_c: int, k: int = 3, s: int = 1, p: int = 1) -> nn.Sequential:
    """The fundamental CNN building block pattern."""
    return nn.Sequential(
        nn.Conv2d(in_c, out_c, k, stride=s, padding=p, bias=False),
        nn.BatchNorm2d(out_c),   # BN after conv, before ReLU
        nn.ReLU(inplace=True),   # inplace=True saves memory
    )

# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
# RESIDUAL BLOCK — the key innovation of ResNet (2015)
# Solves: vanishing gradient in 50+ layer networks
# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

class ResidualBlock(nn.Module):
    """ResNet's bottleneck residual block."""
    expansion = 4

    def __init__(self, in_planes: int, planes: int, stride: int = 1):
        super().__init__()
        # Bottleneck: reduce → convolve → expand
        self.conv1 = nn.Conv2d(in_planes, planes, 1, bias=False)  # 1×1 reduce
        self.bn1   = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, 3, stride=stride, padding=1, bias=False)
        self.bn2   = nn.BatchNorm2d(planes)
        self.conv3 = nn.Conv2d(planes, planes * self.expansion, 1, bias=False)  # 1×1 expand
        self.bn3   = nn.BatchNorm2d(planes * self.expansion)
        self.relu  = nn.ReLU(inplace=True)

        # Shortcut: match dimensions if they changed
        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != planes * self.expansion:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, planes * self.expansion, 1, stride=stride, bias=False),
                nn.BatchNorm2d(planes * self.expansion),
            )

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        identity = self.shortcut(x)   # skip connection
        out = self.relu(self.bn1(self.conv1(x)))
        out = self.relu(self.bn2(self.conv2(out)))
        out = self.bn3(self.conv3(out))
        out += identity   # ADD residual — gradient highway!
        return self.relu(out)

# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
# SIMPLE ConvNet — custom architecture for 10-class problem
# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

class CustomCNN(nn.Module):
    def __init__(self, n_classes: int = 10):
        super().__init__()
        self.features = nn.Sequential(
            conv_bn_relu(3, 32),           # [B, 32, 224, 224]
            conv_bn_relu(32, 64),          # [B, 64, 224, 224]
            nn.MaxPool2d(2, 2),            # [B, 64, 112, 112]
            conv_bn_relu(64, 128),         # [B, 128, 112, 112]
            conv_bn_relu(128, 128),        # [B, 128, 112, 112]
            nn.MaxPool2d(2, 2),            # [B, 128, 56, 56]
            conv_bn_relu(128, 256),        # [B, 256, 56, 56]
            nn.MaxPool2d(2, 2),            # [B, 256, 28, 28]
        )
        self.global_pool = nn.AdaptiveAvgPool2d((1, 1))  # [B, 256, 1, 1]
        self.classifier = nn.Sequential(
            nn.Linear(256, 512),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(512, n_classes),
        )

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        x = self.features(x)
        x = self.global_pool(x).flatten(1)  # [B, 256]
        return self.classifier(x)

model = CustomCNN(n_classes=10)
x = torch.randn(4, 3, 224, 224)
print(f"Output: {model(x).shape}")  # [4, 10]
params = sum(p.numel() for p in model.parameters())
print(f"Parameters: {params:,}")

Tip

Practice CNN Architectures LeNet to EfficientNet in small, isolated examples before integrating into larger projects. Breaking concepts into small experiments builds genuine understanding faster than reading alone.

Diagram

Loading diagram…

Technical diagram.

Practice Task

Note

Practice Task — (1) Write a working example of CNN Architectures LeNet to EfficientNet from scratch without looking at notes. (2) Modify it to handle an edge case (empty input, null value, or error state). (3) Share your solution in the Priygop community for feedback.

Quick Quiz

Common Mistake

Warning

A common mistake with CNN Architectures LeNet to EfficientNet is skipping edge case testing — empty inputs, null values, and unexpected data types. Always validate boundary conditions to write robust, production-ready ai code.

Topics in This Module