Mini Project: Sentiment Analysis — IMDB with BiLSTM + BERT Comparison
Build two sentiment classifiers on the IMDB movie review dataset (50K reviews): first with a BiLSTM and GloVe embeddings (~88% accuracy) then with fine-tuned DistilBERT (~94% accuracy). Directly compare the two approaches to understand why Transformers outperform RNNs.
IMDB Sentiment Analysis Project
import torch
import torch.nn as nn
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments
from datasets import load_dataset
import numpy as np
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
# APPROACH 1: DistilBERT Fine-tuning (recommended)
# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
model_name = "distilbert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_name)
# Load IMDB from HuggingFace Datasets (50K reviews, balanced pos/neg)
dataset = load_dataset("imdb")
train_ds = dataset["train"] # 25,000 reviews
test_ds = dataset["test"] # 25,000 reviews
def tokenize(batch):
return tokenizer(batch["text"], truncation=True, max_length=512, padding="max_length")
tokenized_train = train_ds.map(tokenize, batched=True, batch_size=512)
tokenized_test = test_ds.map(tokenize, batched=True, batch_size=512)
tokenized_train.set_format("torch", columns=["input_ids", "attention_mask", "label"])
tokenized_test.set_format("torch", columns=["input_ids", "attention_mask", "label"])
# Model
model_bert = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)
# HuggingFace Trainer — handles training loop for you
def compute_metrics(eval_pred):
logits, labels = eval_pred
predictions = np.argmax(logits, axis=-1)
accuracy = (predictions == labels).mean()
return {"accuracy": accuracy}
training_args = TrainingArguments(
output_dir="./results/imdb-distilbert",
num_train_epochs=3,
per_device_train_batch_size=32,
per_device_eval_batch_size=64,
learning_rate=2e-5,
weight_decay=0.01,
evaluation_strategy="epoch",
save_strategy="best",
load_best_model_at_end=True,
fp16=torch.cuda.is_available(), # AMP
logging_steps=100,
)
trainer = Trainer(
model=model_bert,
args=training_args,
train_dataset=tokenized_train,
eval_dataset=tokenized_test,
compute_metrics=compute_metrics,
)
trainer.train()
results = trainer.evaluate()
print(f"\nDistilBERT IMDB accuracy: {results['eval_accuracy']:.2%}")
# Expected: ~93-94%
# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
# APPROACH 2: BiLSTM from scratch for comparison
# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
class BiLSTMClassifier(nn.Module):
def __init__(self, vocab_size=30000, embed_dim=256, hidden=128, dropout=0.4):
super().__init__()
self.emb = nn.Embedding(vocab_size, embed_dim, padding_idx=0)
self.lstm = nn.LSTM(embed_dim, hidden, 2, batch_first=True, dropout=dropout, bidirectional=True)
self.fc = nn.Linear(hidden * 2, 2)
self.drop = nn.Dropout(dropout)
def forward(self, x, mask):
e = self.drop(self.emb(x))
o, _ = self.lstm(e)
o = o * mask.unsqueeze(-1).float()
pooled = o.max(1).values
return self.fc(self.drop(pooled))
# Expected BiLSTM accuracy: ~85-88% — Transformers win by 6-8 points
print("\nComparison Summary:")
print(" BiLSTM (scratch, GloVe init): ~85-88% accuracy")
print(" DistilBERT (fine-tuned): ~93-94% accuracy")
print(" BERT-Large (fine-tuned): ~95-96% accuracy")
print(" Key advantage: Transformers see FULL context simultaneously")
print(" BiLSTM advantage: 20x fewer parameters, faster inference")Tip
Tip
Practice Mini Project Sentiment Analysis IMDB with BiLSTM BERT Comparison in small, isolated examples before integrating into larger projects. Breaking concepts into small experiments builds genuine understanding faster than reading alone.
GPT-4 = strong reasoning. Claude = safety + long context. Gemini = multimodal. Llama = local/open.
Practice Task
Note
Practice Task — (1) Write a working example of Mini Project Sentiment Analysis IMDB with BiLSTM BERT Comparison from scratch without looking at notes. (2) Modify it to handle an edge case (empty input, null value, or error state). (3) Share your solution in the Priygop community for feedback.
Quick Quiz
Common Mistake
Warning
A common mistake with Mini Project Sentiment Analysis IMDB with BiLSTM BERT Comparison is skipping edge case testing — empty inputs, null values, and unexpected data types. Always validate boundary conditions to write robust, production-ready ai code.