CNN Architectures — LeNet to EfficientNet
CNN architecture design has evolved dramatically since LeNet (1998). Understanding WHY each architecture was invented — ResNet's residual connections to solve vanishing gradients, VGG's uniform 3×3 convolutions, EfficientNet's compound scaling — helps you choose the right backbone and design custom architectures.
Building CNN Architectures from Scratch
import torch
import torch.nn as nn
# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
# The classic CNN pattern: Conv → BN → ReLU → Pool
# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
def conv_bn_relu(in_c: int, out_c: int, k: int = 3, s: int = 1, p: int = 1) -> nn.Sequential:
"""The fundamental CNN building block pattern."""
return nn.Sequential(
nn.Conv2d(in_c, out_c, k, stride=s, padding=p, bias=False),
nn.BatchNorm2d(out_c), # BN after conv, before ReLU
nn.ReLU(inplace=True), # inplace=True saves memory
)
# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
# RESIDUAL BLOCK — the key innovation of ResNet (2015)
# Solves: vanishing gradient in 50+ layer networks
# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
class ResidualBlock(nn.Module):
"""ResNet's bottleneck residual block."""
expansion = 4
def __init__(self, in_planes: int, planes: int, stride: int = 1):
super().__init__()
# Bottleneck: reduce → convolve → expand
self.conv1 = nn.Conv2d(in_planes, planes, 1, bias=False) # 1×1 reduce
self.bn1 = nn.BatchNorm2d(planes)
self.conv2 = nn.Conv2d(planes, planes, 3, stride=stride, padding=1, bias=False)
self.bn2 = nn.BatchNorm2d(planes)
self.conv3 = nn.Conv2d(planes, planes * self.expansion, 1, bias=False) # 1×1 expand
self.bn3 = nn.BatchNorm2d(planes * self.expansion)
self.relu = nn.ReLU(inplace=True)
# Shortcut: match dimensions if they changed
self.shortcut = nn.Sequential()
if stride != 1 or in_planes != planes * self.expansion:
self.shortcut = nn.Sequential(
nn.Conv2d(in_planes, planes * self.expansion, 1, stride=stride, bias=False),
nn.BatchNorm2d(planes * self.expansion),
)
def forward(self, x: torch.Tensor) -> torch.Tensor:
identity = self.shortcut(x) # skip connection
out = self.relu(self.bn1(self.conv1(x)))
out = self.relu(self.bn2(self.conv2(out)))
out = self.bn3(self.conv3(out))
out += identity # ADD residual — gradient highway!
return self.relu(out)
# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
# SIMPLE ConvNet — custom architecture for 10-class problem
# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
class CustomCNN(nn.Module):
def __init__(self, n_classes: int = 10):
super().__init__()
self.features = nn.Sequential(
conv_bn_relu(3, 32), # [B, 32, 224, 224]
conv_bn_relu(32, 64), # [B, 64, 224, 224]
nn.MaxPool2d(2, 2), # [B, 64, 112, 112]
conv_bn_relu(64, 128), # [B, 128, 112, 112]
conv_bn_relu(128, 128), # [B, 128, 112, 112]
nn.MaxPool2d(2, 2), # [B, 128, 56, 56]
conv_bn_relu(128, 256), # [B, 256, 56, 56]
nn.MaxPool2d(2, 2), # [B, 256, 28, 28]
)
self.global_pool = nn.AdaptiveAvgPool2d((1, 1)) # [B, 256, 1, 1]
self.classifier = nn.Sequential(
nn.Linear(256, 512),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(512, n_classes),
)
def forward(self, x: torch.Tensor) -> torch.Tensor:
x = self.features(x)
x = self.global_pool(x).flatten(1) # [B, 256]
return self.classifier(x)
model = CustomCNN(n_classes=10)
x = torch.randn(4, 3, 224, 224)
print(f"Output: {model(x).shape}") # [4, 10]
params = sum(p.numel() for p in model.parameters())
print(f"Parameters: {params:,}")Tip
Tip
Practice CNN Architectures LeNet to EfficientNet in small, isolated examples before integrating into larger projects. Breaking concepts into small experiments builds genuine understanding faster than reading alone.
Technical diagram.
Practice Task
Note
Practice Task — (1) Write a working example of CNN Architectures LeNet to EfficientNet from scratch without looking at notes. (2) Modify it to handle an edge case (empty input, null value, or error state). (3) Share your solution in the Priygop community for feedback.
Quick Quiz
Common Mistake
Warning
A common mistake with CNN Architectures LeNet to EfficientNet is skipping edge case testing — empty inputs, null values, and unexpected data types. Always validate boundary conditions to write robust, production-ready ai code.