Text Classification — Sentiment Analysis with BERT
Text classification assigns a label to a piece of text: sentiment (positive/negative), topic category, spam detection, intent recognition. Fine-tuning pre-trained BERT on a classification task achieves 95%+ accuracy on most benchmark datasets with only 3-5 epochs of training.
Fine-grained Sentiment Analysis with DistilBERT
import torch
import torch.nn as nn
from transformers import AutoTokenizer, AutoModel, get_linear_schedule_with_warmup
from torch.utils.data import Dataset, DataLoader
from torch.optim import AdamW
# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
# BERT-based Text Classifier
# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
class TextDataset(Dataset):
"""Dataset for text classification tasks."""
def __init__(self, texts: list[str], labels: list[int], tokenizer, max_len: int = 128):
self.texts = texts
self.labels = labels
self.tokenizer = tokenizer
self.max_len = max_len
def __len__(self): return len(self.texts)
def __getitem__(self, idx: int) -> dict:
enc = self.tokenizer(
self.texts[idx],
max_length=self.max_len,
padding="max_length",
truncation=True,
return_tensors="pt",
)
return {
"input_ids": enc["input_ids"].squeeze(0),
"attention_mask": enc["attention_mask"].squeeze(0),
"label": torch.tensor(self.labels[idx]),
}
class BERTClassifier(nn.Module):
"""DistilBERT + classification head."""
def __init__(self, model_name: str = "distilbert-base-uncased", n_classes: int = 2, dropout: float = 0.3):
super().__init__()
self.bert = AutoModel.from_pretrained(model_name)
hidden_size = self.bert.config.hidden_size # 768
self.classifier = nn.Sequential(
nn.Linear(hidden_size, hidden_size // 2),
nn.GELU(),
nn.Dropout(dropout),
nn.Linear(hidden_size // 2, n_classes),
)
nn.init.xavier_uniform_(self.classifier[0].weight)
def forward(self, input_ids: torch.Tensor, attention_mask: torch.Tensor) -> torch.Tensor:
outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
# outputs.last_hidden_state: [B, seq_len, 768]
# Use [CLS] token representation (position 0) as sentence embedding
cls_output = outputs.last_hidden_state[:, 0, :] # [B, 768]
return self.classifier(cls_output)
# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
# Training — SST-2 (Stanford Sentiment Treebank)
# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")
model = BERTClassifier(n_classes=2).to(device)
# Sample data (in practice, load SST-2 from HuggingFace Datasets)
train_texts = ["I loved this movie!", "Terrible film, waste of time.", "Absolutely wonderful.", "Boring and predictable."] * 100
train_labels = [1, 0, 1, 0] * 100 # 1=positive, 0=negative
dataset = TextDataset(train_texts, train_labels, tokenizer)
loader = DataLoader(dataset, batch_size=16, shuffle=True)
# Optimizer with different LRs: BERT needs tiny lr to avoid catastrophic forgetting
optimizer = AdamW([
{"params": model.bert.parameters(), "lr": 2e-5}, # pre-trained: very small
{"params": model.classifier.parameters(), "lr": 1e-4}, # new head: normal
], weight_decay=0.01)
total_steps = len(loader) * 3 # 3 epochs
scheduler = get_linear_schedule_with_warmup(
optimizer,
num_warmup_steps=total_steps // 10, # 10% warmup
num_training_steps=total_steps
)
loss_fn = nn.CrossEntropyLoss()
for epoch in range(3):
model.train()
total_loss = 0
for batch in loader:
input_ids = batch["input_ids"].to(device)
attention_mask = batch["attention_mask"].to(device)
labels = batch["label"].to(device)
optimizer.zero_grad()
logits = model(input_ids, attention_mask)
loss = loss_fn(logits, labels)
loss.backward()
torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
optimizer.step()
scheduler.step()
total_loss += loss.item()
print(f"Epoch {epoch+1} | avg loss: {total_loss/len(loader):.4f}")
# Expected: ~95-96% accuracy on SST-2 test setTip
Tip
Practice Text Classification Sentiment Analysis with BERT in small, isolated examples before integrating into larger projects. Breaking concepts into small experiments builds genuine understanding faster than reading alone.
Modern NLP = Transformer-based. Pre-train, then fine-tune.
Practice Task
Note
Practice Task — (1) Write a working example of Text Classification Sentiment Analysis with BERT from scratch without looking at notes. (2) Modify it to handle an edge case (empty input, null value, or error state). (3) Share your solution in the Priygop community for feedback.
Quick Quiz
Common Mistake
Warning
A common mistake with Text Classification Sentiment Analysis with BERT is skipping edge case testing — empty inputs, null values, and unexpected data types. Always validate boundary conditions to write robust, production-ready ai code.