Model Interpretability — SHAP and LIME

AI systems must be explainable, especially in high-stakes domains (medicine, law, finance). SHAP (SHapley Additive exPlanations) provides mathematically rigorous feature contributions based on game theory. LIME creates local linear approximations. Gradient-based saliency maps show which input parts most influenced a neural network's prediction.

25 min•By Priygop Team•Updated 2026

SHAP Explanations for ML Models

import shap
import lime
import lime.lime_tabular
import numpy as np
import pandas as pd
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split

# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
# DATASET: Breast cancer classification (high-stakes!)
# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
data = load_breast_cancer()
X = pd.DataFrame(data.data, columns=data.feature_names)
y = data.target  # 0=malignant, 1=benign

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = GradientBoostingClassifier(n_estimators=200, max_depth=3, learning_rate=0.1)
model.fit(X_train, y_train)
print(f"Test accuracy: {model.score(X_test, y_test):.2%}")

# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
# SHAP -- global AND local explanations
# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
explainer = shap.TreeExplainer(model)     # TreeExplainer: fast exact SHAP for tree models
shap_values = explainer.shap_values(X_test)
# shap_values: [n_samples, n_features]
# shap_values[i, j]: feature j's contribution to sample i's prediction
# Positive value: pushed prediction toward class 1 (benign)
# Negative value: pushed prediction toward class 0 (malignant)

# GLOBAL: Feature importance (mean absolute SHAP across all samples)
global_importance = pd.DataFrame({
    "feature": X.columns,
    "mean_abs_shap": np.abs(shap_values).mean(axis=0),
}).sort_values("mean_abs_shap", ascending=False)

print("\nTop 5 Most Important Features (SHAP):")
for _, row in global_importance.head(5).iterrows():
    print(f"  {row['feature']:30s}: {row['mean_abs_shap']:.4f}")

# LOCAL: Explain a single patient's prediction
patient_idx = 5
patient = X_test.iloc[[patient_idx]]
patient_shap = shap_values[patient_idx]
patient_pred = model.predict_proba(patient)[0]

print(f"\nPatient {patient_idx} prediction: {patient_pred[1]:.1%} probability malignant")
print("Feature contributions for this patient:")
feature_contributions = list(zip(X.columns, patient_shap))
feature_contributions.sort(key=lambda x: abs(x[1]), reverse=True)
for feature, shap_val in feature_contributions[:5]:
    direction = "towards malignant" if shap_val > 0 else "towards benign"
    print(f"  {feature:30s}: {shap_val:+.4f} ({direction})")

# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
# LIME -- local linear approximation
# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
lime_explainer = lime.lime_tabular.LimeTabularExplainer(
    training_data=X_train.values,
    feature_names=data.feature_names.tolist(),
    class_names=data.target_names.tolist(),
    mode="classification",
)

lime_exp = lime_explainer.explain_instance(
    data_row=X_test.iloc[patient_idx].values,
    predict_fn=model.predict_proba,
    num_features=6,       # top 6 features in local explanation
    num_samples=1000,     # perturb sample 1000 times to build local model
)

print("\nLIME Local Explanation:")
for feat, weight in lime_exp.as_list():
    direction = "-> malignant" if weight > 0 else "-> benign"
    print(f"  {feat:40s}: {weight:+.4f} ({direction})")

# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
# SHAP FOR TRANSFORMERS (text)
# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
from transformers import pipeline

classifier = pipeline("text-classification", model="distilbert-base-uncased-finetuned-sst-2-english")

text_explainer = shap.Explainer(classifier)
shap_text_values = text_explainer(["The movie was surprisingly bad, I expected so much more."])

# shap_text_values shows each word's contribution to the sentiment prediction
# "bad" -> strongly negative contribution
# "surprisingly" -> slightly positive (reduces negativity)
# "more" -> neutral
print("\nText SHAP values are available for visualization with shap.plots.text(shap_text_values)")

Tip

Practice Model Interpretability SHAP and LIME in small, isolated examples before integrating into larger projects. Breaking concepts into small experiments builds genuine understanding faster than reading alone.

Diagram

Loading diagram…

Technical diagram.

Practice Task

Note

Practice Task — (1) Write a working example of Model Interpretability SHAP and LIME from scratch without looking at notes. (2) Modify it to handle an edge case (empty input, null value, or error state). (3) Share your solution in the Priygop community for feedback.

Quick Quiz

Common Mistake

Warning

A common mistake with Model Interpretability SHAP and LIME is skipping edge case testing — empty inputs, null values, and unexpected data types. Always validate boundary conditions to write robust, production-ready ai code.

Topics in This Module

Model Interpretability — SHAP and LIME

25 min•By Priygop Team•Updated 2026

SHAP Explanations for ML Models

import shap
import lime
import lime.lime_tabular
import numpy as np
import pandas as pd
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split

# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
# DATASET: Breast cancer classification (high-stakes!)
# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
data = load_breast_cancer()
X = pd.DataFrame(data.data, columns=data.feature_names)
y = data.target  # 0=malignant, 1=benign

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = GradientBoostingClassifier(n_estimators=200, max_depth=3, learning_rate=0.1)
model.fit(X_train, y_train)
print(f"Test accuracy: {model.score(X_test, y_test):.2%}")

# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
# SHAP -- global AND local explanations
# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
explainer = shap.TreeExplainer(model)     # TreeExplainer: fast exact SHAP for tree models
shap_values = explainer.shap_values(X_test)
# shap_values: [n_samples, n_features]
# shap_values[i, j]: feature j's contribution to sample i's prediction
# Positive value: pushed prediction toward class 1 (benign)
# Negative value: pushed prediction toward class 0 (malignant)

# GLOBAL: Feature importance (mean absolute SHAP across all samples)
global_importance = pd.DataFrame({
    "feature": X.columns,
    "mean_abs_shap": np.abs(shap_values).mean(axis=0),
}).sort_values("mean_abs_shap", ascending=False)

print("\nTop 5 Most Important Features (SHAP):")
for _, row in global_importance.head(5).iterrows():
    print(f"  {row['feature']:30s}: {row['mean_abs_shap']:.4f}")

# LOCAL: Explain a single patient's prediction
patient_idx = 5
patient = X_test.iloc[[patient_idx]]
patient_shap = shap_values[patient_idx]
patient_pred = model.predict_proba(patient)[0]

print(f"\nPatient {patient_idx} prediction: {patient_pred[1]:.1%} probability malignant")
print("Feature contributions for this patient:")
feature_contributions = list(zip(X.columns, patient_shap))
feature_contributions.sort(key=lambda x: abs(x[1]), reverse=True)
for feature, shap_val in feature_contributions[:5]:
    direction = "towards malignant" if shap_val > 0 else "towards benign"
    print(f"  {feature:30s}: {shap_val:+.4f} ({direction})")

# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
# LIME -- local linear approximation
# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
lime_explainer = lime.lime_tabular.LimeTabularExplainer(
    training_data=X_train.values,
    feature_names=data.feature_names.tolist(),
    class_names=data.target_names.tolist(),
    mode="classification",
)

lime_exp = lime_explainer.explain_instance(
    data_row=X_test.iloc[patient_idx].values,
    predict_fn=model.predict_proba,
    num_features=6,       # top 6 features in local explanation
    num_samples=1000,     # perturb sample 1000 times to build local model
)

print("\nLIME Local Explanation:")
for feat, weight in lime_exp.as_list():
    direction = "-> malignant" if weight > 0 else "-> benign"
    print(f"  {feat:40s}: {weight:+.4f} ({direction})")

# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
# SHAP FOR TRANSFORMERS (text)
# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
from transformers import pipeline

classifier = pipeline("text-classification", model="distilbert-base-uncased-finetuned-sst-2-english")

text_explainer = shap.Explainer(classifier)
shap_text_values = text_explainer(["The movie was surprisingly bad, I expected so much more."])

# shap_text_values shows each word's contribution to the sentiment prediction
# "bad" -> strongly negative contribution
# "surprisingly" -> slightly positive (reduces negativity)
# "more" -> neutral
print("\nText SHAP values are available for visualization with shap.plots.text(shap_text_values)")

Tip

Diagram

Loading diagram…

Technical diagram.

Topics in This Module