Model Monitoring Dashboard
Production ML monitoring tracks: prediction distribution (did output shift?), input feature statistics (drift), model performance metrics when ground truth is available (often delayed), and system metrics (latency, error rate). Effective monitoring catches silent model degradation — the most common production ML failure mode — before it impacts business outcomes.
Building a Model Performance Monitor
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import ks_2samp
np.random.seed(42)
class ModelMonitor:
"""Production model monitoring with automatic alerting."""
def __init__(self, model, reference_df: pd.DataFrame, feature_cols: list, threshold_psi: float = 0.2):
self.model = model
self.reference_df = reference_df
self.feature_cols = feature_cols
self.threshold_psi = threshold_psi
self.inference_log: list = []
self.alert_log: list = []
# Pre-compute reference prediction distribution
self.ref_probs = model.predict_proba(reference_df[feature_cols])[:, 1]
def log_inference(self, features: pd.DataFrame, prediction_prob: float, timestamp=None):
"""Log each production inference."""
import datetime
import json
record = {
"timestamp": timestamp or datetime.datetime.now().isoformat(),
"pred_prob": prediction_prob,
**features.iloc[0].to_dict(),
}
self.inference_log.append(record)
def compute_psi(self, ref: np.ndarray, prod: np.ndarray, n_bins: int = 10) -> float:
bins = np.percentile(ref, np.linspace(0, 100, n_bins + 1))
bins[0] -= 1e-8; bins[-1] += 1e-8
ref_pct = np.clip(np.histogram(ref, bins=bins)[0] / len(ref), 1e-6, None)
prod_pct = np.clip(np.histogram(prod, bins=bins)[0] / len(prod), 1e-6, None)
return float(np.sum((prod_pct - ref_pct) * np.log(prod_pct / ref_pct)))
def run_monitoring_report(self) -> dict:
"""Generate monitoring snapshot from logged inferences."""
if len(self.inference_log) < 50:
return {"status": "NOT_ENOUGH_DATA", "n_inferences": len(self.inference_log)}
log_df = pd.DataFrame(self.inference_log)
prod_probs = log_df["pred_prob"].values
# 1. PREDICTION DISTRIBUTION DRIFT
pred_psi = self.compute_psi(self.ref_probs, prod_probs)
pred_ks = ks_2samp(self.ref_probs, prod_probs)
# 2. FEATURE DRIFT
feature_drift = {}
for col in self.feature_cols:
if col not in log_df.columns: continue
psi = self.compute_psi(self.reference_df[col].values, log_df[col].values)
feature_drift[col] = {"psi": round(psi, 4), "status": "DRIFT" if psi > self.threshold_psi else "OK"}
# 3. PREDICTION STATISTICS
stats = {
"n_inferences": len(log_df),
"pred_mean": round(prod_probs.mean(), 4),
"pred_std": round(prod_probs.std(), 4),
"positive_rate": round((prod_probs > 0.5).mean(), 4),
"ref_pos_rate": round((self.ref_probs > 0.5).mean(), 4),
}
# 4. ALERTS
alerts = []
if pred_psi > self.threshold_psi:
alerts.append(f"ALERT: Prediction distribution drift PSI={pred_psi:.3f} (threshold={self.threshold_psi})")
for col, drift in feature_drift.items():
if drift["status"] == "DRIFT":
alerts.append(f"ALERT: Feature '{col}' PSI={drift['psi']} -- significant drift!")
if abs(stats["positive_rate"] - stats["ref_pos_rate"]) > 0.1:
alerts.append(f"ALERT: Positive rate shifted from {stats['ref_pos_rate']:.1%} to {stats['positive_rate']:.1%}")
report = {
"status": "ALERT" if alerts else "OK",
"alerts": alerts,
"prediction_psi": round(pred_psi, 4),
"feature_drift": feature_drift,
"stats": stats,
}
return report
# SIMULATE MONITOR IN ACTION
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
# Build and train a simple model
X_ref = pd.DataFrame({
"age": np.random.normal(38, 12, 1000).clip(18, 75),
"income": np.random.exponential(55000, 1000).clip(15000, 200000),
"credit": np.random.normal(680, 80, 1000).clip(300, 850),
})
y_ref = np.random.choice([0,1], 1000, p=[0.83, 0.17])
pipe = Pipeline([("sc", StandardScaler()), ("m", GradientBoostingClassifier(n_estimators=50, random_state=42))])
pipe.fit(X_ref, y_ref)
monitor = ModelMonitor(pipe, X_ref, ["age", "income", "credit"], threshold_psi=0.15)
# Simulate 200 inferences with drift
for i in range(200):
features = pd.DataFrame([{
"age": np.random.normal(30, 10), # younger customers (drift!)
"income": np.random.exponential(40000), # lower income
"credit": np.random.normal(620, 90), # lower credit
}])
prob = pipe.predict_proba(features)[0, 1]
monitor.log_inference(features, prob)
report = monitor.run_monitoring_report()
print(f"Monitoring Status: {report['status']}")
print(f"Alerts ({len(report['alerts'])}):")
for alert in report["alerts"]:
print(f" {alert}")
print(f"\nPrediction PSI: {report['prediction_psi']}")
print(f"Feature drift summary:")
for feat, d in report["feature_drift"].items():
print(f" {feat}: PSI={d['psi']} ({d['status']})")Tip
Tip
Practice Model Monitoring Dashboard in small, isolated examples before integrating into larger projects. Breaking concepts into small experiments builds genuine understanding faster than reading alone.
Technical diagram.
Practice Task
Note
Practice Task — (1) Write a working example of Model Monitoring Dashboard from scratch without looking at notes. (2) Modify it to handle an edge case (empty input, null value, or error state). (3) Share your solution in the Priygop community for feedback.
Quick Quiz
Common Mistake
Warning
A common mistake with Model Monitoring Dashboard is skipping edge case testing — empty inputs, null values, and unexpected data types. Always validate boundary conditions to write robust, production-ready ml code.