AI Bias — Detection, Measurement, and Mitigation

AI bias is not a bug to fix — it's a systematic property of models trained on biased data, with biased labels, for biased objectives. Understanding WHEN bias is harmful (hiring, lending, medical), how to MEASURE it (disparate impact, equalized odds, calibration), and how to MITIGATE it (reweighting, fairness constraints, post-processing) is essential for responsible AI engineering.

25 min•By Priygop Team•Updated 2026

Measuring and Mitigating Model Bias

import numpy as np
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
# TYPES OF BIAS in AI systems
# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
bias_types = {
    "Historical bias": "Training data reflects past discrimination (e.g., hiring data biased toward men)",
    "Representation bias": "Underrepresentation of minorities in training data -> poor performance on them",
    "Measurement bias": "Proxy variables introduce proxy discrimination (zip code as proxy for race)",
    "Deployment bias": "Model used in context different from training (e.g., tool trained on adults used on children)",
    "Automation bias": "Humans over-rely on AI predictions, amplifying model errors for protected groups",
    "Feedback loops": "Biased predictions -> biased data -> retrained biased model (amplifying bias over time)",
}

# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
# MEASURING FAIRNESS -- multiple metrics (choose based on context!)
# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

np.random.seed(42)
n = 2000
protected_attr = np.random.choice([0, 1], n, p=[0.6, 0.4])  # 0=majority, 1=minority
features = np.random.randn(n, 5)
# Inject bias: minority group's true outcome slightly underrepresented in labels
true_labels  = ((features[:, 0] + features[:, 1] + protected_attr * 0.5) > 0).astype(int)
# Simulate biased predictions (model performs worse on minority)
predictions = ((features[:, 0] + features[:, 1] + protected_attr * -0.5) > 0.1).astype(int)

def fairness_metrics(y_true: np.ndarray, y_pred: np.ndarray, protected: np.ndarray) -> dict:
    '''Compute multiple fairness metrics for a binary classifier.'''
    majority_mask = protected == 0
    minority_mask = protected == 1

    # Accuracy per group
    acc_maj = accuracy_score(y_true[majority_mask], y_pred[majority_mask])
    acc_min = accuracy_score(y_true[minority_mask], y_pred[minority_mask])

    # Positive prediction rates per group (for Demographic Parity)
    ppr_maj = y_pred[majority_mask].mean()
    ppr_min = y_pred[minority_mask].mean()

    # True Positive Rates per group (Equal Opportunity)
    tp_maj = ((y_true == 1) & (y_pred == 1) & majority_mask).sum()
    fn_maj = ((y_true == 1) & (y_pred == 0) & majority_mask).sum()
    tpr_maj = tp_maj / (tp_maj + fn_maj + 1e-10)

    tp_min = ((y_true == 1) & (y_pred == 1) & minority_mask).sum()
    fn_min = ((y_true == 1) & (y_pred == 0) & minority_mask).sum()
    tpr_min = tp_min / (tp_min + fn_min + 1e-10)

    # Disparate Impact ratio: minority positive rate / majority positive rate
    # Fair: > 0.8 (US legal standard "80% rule")
    disparate_impact = ppr_min / (ppr_maj + 1e-10)

    return {
        "Accuracy (majority)":      round(acc_maj, 3),
        "Accuracy (minority)":      round(acc_min, 3),
        "Accuracy gap":             round(acc_maj - acc_min, 3),
        "Positive pred. rate (majority)": round(ppr_maj, 3),
        "Positive pred. rate (minority)": round(ppr_min, 3),
        "Disparate Impact":         round(disparate_impact, 3),  # < 0.8 = legally biased!
        "TPR (majority)":           round(tpr_maj, 3),
        "TPR (minority)":           round(tpr_min, 3),
        "Equal Opportunity gap":    round(tpr_maj - tpr_min, 3),
    }

metrics_biased = fairness_metrics(true_labels, predictions, protected_attr)
print("\nFairness Metrics (biased model):")
for metric, value in metrics_biased.items():
    flag = " <-- FAIR" if "gap" not in metric.lower() and 0.8 <= value <= 1.25 else ""
    flag = " <-- BIASED!" if metric == "Disparate Impact" and value < 0.8 else flag
    print(f"  {metric:35s}: {value}{flag}")

# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
# MITIGATION -- reweighting (pre-processing)
# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
# Upweight minority samples in training loss to compensate for underrepresentation

sample_weights = np.ones(n)
# Give minority group 2x weight
sample_weights[protected_attr == 1] = 2.0
sample_weights = sample_weights / sample_weights.mean()  # normalize

model = LogisticRegression()
model.fit(features, true_labels, sample_weight=sample_weights)
predictions_fair = model.predict(features)

metrics_fair = fairness_metrics(true_labels, predictions_fair, protected_attr)
print("\nFairness Metrics (re-weighted model):")
for metric, value in metrics_fair.items():
    print(f"  {metric:35s}: {value}")

Tip

Practice AI Bias Detection Measurement and Mitigation in small, isolated examples before integrating into larger projects. Breaking concepts into small experiments builds genuine understanding faster than reading alone.

Diagram

Loading diagram…

Technical diagram.

Practice Task

Note

Practice Task — (1) Write a working example of AI Bias Detection Measurement and Mitigation from scratch without looking at notes. (2) Modify it to handle an edge case (empty input, null value, or error state). (3) Share your solution in the Priygop community for feedback.

Quick Quiz

Common Mistake

Warning

A common mistake with AI Bias Detection Measurement and Mitigation is skipping edge case testing — empty inputs, null values, and unexpected data types. Always validate boundary conditions to write robust, production-ready ai code.

Topics in This Module

AI Bias — Detection, Measurement, and Mitigation

25 min•By Priygop Team•Updated 2026

Measuring and Mitigating Model Bias

import numpy as np
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
# TYPES OF BIAS in AI systems
# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
bias_types = {
    "Historical bias": "Training data reflects past discrimination (e.g., hiring data biased toward men)",
    "Representation bias": "Underrepresentation of minorities in training data -> poor performance on them",
    "Measurement bias": "Proxy variables introduce proxy discrimination (zip code as proxy for race)",
    "Deployment bias": "Model used in context different from training (e.g., tool trained on adults used on children)",
    "Automation bias": "Humans over-rely on AI predictions, amplifying model errors for protected groups",
    "Feedback loops": "Biased predictions -> biased data -> retrained biased model (amplifying bias over time)",
}

# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
# MEASURING FAIRNESS -- multiple metrics (choose based on context!)
# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

np.random.seed(42)
n = 2000
protected_attr = np.random.choice([0, 1], n, p=[0.6, 0.4])  # 0=majority, 1=minority
features = np.random.randn(n, 5)
# Inject bias: minority group's true outcome slightly underrepresented in labels
true_labels  = ((features[:, 0] + features[:, 1] + protected_attr * 0.5) > 0).astype(int)
# Simulate biased predictions (model performs worse on minority)
predictions = ((features[:, 0] + features[:, 1] + protected_attr * -0.5) > 0.1).astype(int)

def fairness_metrics(y_true: np.ndarray, y_pred: np.ndarray, protected: np.ndarray) -> dict:
    '''Compute multiple fairness metrics for a binary classifier.'''
    majority_mask = protected == 0
    minority_mask = protected == 1

    # Accuracy per group
    acc_maj = accuracy_score(y_true[majority_mask], y_pred[majority_mask])
    acc_min = accuracy_score(y_true[minority_mask], y_pred[minority_mask])

    # Positive prediction rates per group (for Demographic Parity)
    ppr_maj = y_pred[majority_mask].mean()
    ppr_min = y_pred[minority_mask].mean()

    # True Positive Rates per group (Equal Opportunity)
    tp_maj = ((y_true == 1) & (y_pred == 1) & majority_mask).sum()
    fn_maj = ((y_true == 1) & (y_pred == 0) & majority_mask).sum()
    tpr_maj = tp_maj / (tp_maj + fn_maj + 1e-10)

    tp_min = ((y_true == 1) & (y_pred == 1) & minority_mask).sum()
    fn_min = ((y_true == 1) & (y_pred == 0) & minority_mask).sum()
    tpr_min = tp_min / (tp_min + fn_min + 1e-10)

    # Disparate Impact ratio: minority positive rate / majority positive rate
    # Fair: > 0.8 (US legal standard "80% rule")
    disparate_impact = ppr_min / (ppr_maj + 1e-10)

    return {
        "Accuracy (majority)":      round(acc_maj, 3),
        "Accuracy (minority)":      round(acc_min, 3),
        "Accuracy gap":             round(acc_maj - acc_min, 3),
        "Positive pred. rate (majority)": round(ppr_maj, 3),
        "Positive pred. rate (minority)": round(ppr_min, 3),
        "Disparate Impact":         round(disparate_impact, 3),  # < 0.8 = legally biased!
        "TPR (majority)":           round(tpr_maj, 3),
        "TPR (minority)":           round(tpr_min, 3),
        "Equal Opportunity gap":    round(tpr_maj - tpr_min, 3),
    }

metrics_biased = fairness_metrics(true_labels, predictions, protected_attr)
print("\nFairness Metrics (biased model):")
for metric, value in metrics_biased.items():
    flag = " <-- FAIR" if "gap" not in metric.lower() and 0.8 <= value <= 1.25 else ""
    flag = " <-- BIASED!" if metric == "Disparate Impact" and value < 0.8 else flag
    print(f"  {metric:35s}: {value}{flag}")

# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
# MITIGATION -- reweighting (pre-processing)
# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
# Upweight minority samples in training loss to compensate for underrepresentation

sample_weights = np.ones(n)
# Give minority group 2x weight
sample_weights[protected_attr == 1] = 2.0
sample_weights = sample_weights / sample_weights.mean()  # normalize

model = LogisticRegression()
model.fit(features, true_labels, sample_weight=sample_weights)
predictions_fair = model.predict(features)

metrics_fair = fairness_metrics(true_labels, predictions_fair, protected_attr)
print("\nFairness Metrics (re-weighted model):")
for metric, value in metrics_fair.items():
    print(f"  {metric:35s}: {value}")

Tip

Diagram

Loading diagram…

Technical diagram.

Topics in This Module