Privacy-Preserving AI — Differential Privacy & Federated Learning

AI models memorize training data and can leak private information — even without access to the training set, attackers can sometimes reconstruct training examples using membership inference attacks. Differential Privacy provides mathematical privacy guarantees. Federated Learning trains on distributed data without centralizing it.

25 min•By Priygop Team•Updated 2026

Differential Privacy and Federated Learning

# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
# DIFFERENTIAL PRIVACY — mathematical privacy guarantee
# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

# PRIVACY ATTACKS ON ML MODELS:
privacy_attacks = {
    "Membership inference": "Given a model, determine if a specific sample was in training data",
    "Model inversion":      "Reconstruct training data from model predictions (faces from face recognition model)",
    "Data extraction":      "Prompt engineering to extract memorized training text from LLMs",
    "Attribute inference":  "Infer sensitive attributes (health status, income) from model predictions",
}

# DIFFERENTIAL PRIVACY: Adding calibrated noise during training
# Formal guarantee: presence/absence of any single person's data
# changes model behavior by at most factor e^epsilon

# Epsilon (privacy budget):
# epsilon < 1: very high privacy, significant utility loss
# epsilon = 1-10: reasonable privacy, small utility loss
# epsilon > 10: mostly theoretical, weak practical guarantee

# DP-SGD (Differentially Private Stochastic Gradient Descent):
# 1. Compute per-sample gradients (instead of batch average)
# 2. Clip each sample's gradient to bound sensitivity
# 3. Add Gaussian noise scaled to clipping + privacy budget
# 4. Take the average noisy gradient as the update

# pip install opacus  (PyTorch DP library from Meta)
from opacus import PrivacyEngine
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset

model = nn.Sequential(nn.Linear(20, 64), nn.ReLU(), nn.Linear(64, 2))
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

# Synthetic dataset
X = torch.randn(1000, 20)
y = torch.randint(0, 2, (1000,))
loader = DataLoader(TensorDataset(X, y), batch_size=64, shuffle=True)

privacy_engine = PrivacyEngine()
model_dp, optimizer_dp, loader_dp = privacy_engine.make_private(
    module=model,
    optimizer=optimizer,
    data_loader=loader,
    noise_multiplier=1.1,    # how much noise to add (larger = more private, worse accuracy)
    max_grad_norm=1.0,       # gradient clipping bound (sensitivity)
)

for epoch in range(5):
    for X_batch, y_batch in loader_dp:
        optimizer_dp.zero_grad()
        nn.CrossEntropyLoss()(model_dp(X_batch), y_batch).backward()
        optimizer_dp.step()

    epsilon = privacy_engine.get_epsilon(delta=1e-5)
    print(f"Epoch {epoch+1}: epsilon = {epsilon:.2f}")
# Lower epsilon = stronger privacy = slightly lower accuracy

# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
# FEDERATED LEARNING -- train without centralizing data
# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
# pip install flwr (Flower -- federated learning framework)
import flwr as fl
import torch
from collections import OrderedDict

class FederatedClient(fl.client.NumPyClient):
    '''Each hospital/phone/organization is one client.'''
    def __init__(self, model, local_train_data, local_val_data):
        self.model = model
        self.train_data = local_train_data
        self.val_data = local_val_data

    def get_parameters(self, config):
        '''Return current model weights to server.'''
        return [val.cpu().numpy() for _, val in self.model.state_dict().items()]

    def set_parameters(self, parameters):
        '''Update model with aggregated weights from server.'''
        params_dict = zip(self.model.state_dict().keys(), parameters)
        state_dict = OrderedDict({k: torch.tensor(v) for k, v in params_dict})
        self.model.load_state_dict(state_dict, strict=True)

    def fit(self, parameters, config):
        '''Train locally for a few epochs, NEVER share raw data.'''
        self.set_parameters(parameters)  # receive global model
        self.model.train()
        optimizer = torch.optim.Adam(self.model.parameters(), lr=1e-3)

        for _ in range(config.get("local_epochs", 3)):
            for X, y in self.train_data:
                optimizer.zero_grad()
                nn.CrossEntropyLoss()(self.model(X), y).backward()
                optimizer.step()

        # Return: updated weights (not data!) + number of samples trained on
        return self.get_parameters(config={}), len(self.train_data.dataset), {}

    def evaluate(self, parameters, config):
        '''Evaluate local model on local data.'''
        self.set_parameters(parameters)
        self.model.eval()
        loss = accuracy = 0
        with torch.no_grad():
            for X, y in self.val_data:
                out = self.model(X)
                loss += nn.CrossEntropyLoss()(out, y).item()
                accuracy += (out.argmax(1) == y).float().mean().item()

        n = len(self.val_data)
        return loss/n, len(self.val_data.dataset), {"accuracy": accuracy/n}

# FedAvg: server aggregates client weights by weighted average
# 1. Server sends current global model to all clients
# 2. Each client trains locally for 3 epochs on their private data
# 3. Clients send ONLY updated weights back (data never leaves)
# 4. Server computes weighted average of all clients' weights
# 5. Updated global model sent to next round of clients

# Real-world use cases:
federated_use_cases = {
    "Keyboard autocomplete":    "Google Gboard -- learns your typing patterns on-device",
    "Medical AI":               "Hospitals train on patient data without sharing records across institutions",
    "Fraud detection":          "Banks collaborate on fraud models without sharing customer transactions",
    "Autonomous vehicles":      "Cars share driving patterns without uploading personal identifiable routes",
}

for case, example in federated_use_cases.items():
    print(f"  {case:30s}: {example}")

Tip

Practice PrivacyPreserving AI Differential Privacy Federated Learning in small, isolated examples before integrating into larger projects. Breaking concepts into small experiments builds genuine understanding faster than reading alone.

Diagram

Loading diagram…

Technical diagram.

Practice Task

Note

Practice Task — (1) Write a working example of PrivacyPreserving AI Differential Privacy Federated Learning from scratch without looking at notes. (2) Modify it to handle an edge case (empty input, null value, or error state). (3) Share your solution in the Priygop community for feedback.

Quick Quiz

Common Mistake

Warning

A common mistake with PrivacyPreserving AI Differential Privacy Federated Learning is skipping edge case testing — empty inputs, null values, and unexpected data types. Always validate boundary conditions to write robust, production-ready ai code.

Topics in This Module

Privacy-Preserving AI — Differential Privacy & Federated Learning

25 min•By Priygop Team•Updated 2026

Differential Privacy and Federated Learning

# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
# DIFFERENTIAL PRIVACY — mathematical privacy guarantee
# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

# PRIVACY ATTACKS ON ML MODELS:
privacy_attacks = {
    "Membership inference": "Given a model, determine if a specific sample was in training data",
    "Model inversion":      "Reconstruct training data from model predictions (faces from face recognition model)",
    "Data extraction":      "Prompt engineering to extract memorized training text from LLMs",
    "Attribute inference":  "Infer sensitive attributes (health status, income) from model predictions",
}

# DIFFERENTIAL PRIVACY: Adding calibrated noise during training
# Formal guarantee: presence/absence of any single person's data
# changes model behavior by at most factor e^epsilon

# Epsilon (privacy budget):
# epsilon < 1: very high privacy, significant utility loss
# epsilon = 1-10: reasonable privacy, small utility loss
# epsilon > 10: mostly theoretical, weak practical guarantee

# DP-SGD (Differentially Private Stochastic Gradient Descent):
# 1. Compute per-sample gradients (instead of batch average)
# 2. Clip each sample's gradient to bound sensitivity
# 3. Add Gaussian noise scaled to clipping + privacy budget
# 4. Take the average noisy gradient as the update

# pip install opacus  (PyTorch DP library from Meta)
from opacus import PrivacyEngine
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset

model = nn.Sequential(nn.Linear(20, 64), nn.ReLU(), nn.Linear(64, 2))
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

# Synthetic dataset
X = torch.randn(1000, 20)
y = torch.randint(0, 2, (1000,))
loader = DataLoader(TensorDataset(X, y), batch_size=64, shuffle=True)

privacy_engine = PrivacyEngine()
model_dp, optimizer_dp, loader_dp = privacy_engine.make_private(
    module=model,
    optimizer=optimizer,
    data_loader=loader,
    noise_multiplier=1.1,    # how much noise to add (larger = more private, worse accuracy)
    max_grad_norm=1.0,       # gradient clipping bound (sensitivity)
)

for epoch in range(5):
    for X_batch, y_batch in loader_dp:
        optimizer_dp.zero_grad()
        nn.CrossEntropyLoss()(model_dp(X_batch), y_batch).backward()
        optimizer_dp.step()

    epsilon = privacy_engine.get_epsilon(delta=1e-5)
    print(f"Epoch {epoch+1}: epsilon = {epsilon:.2f}")
# Lower epsilon = stronger privacy = slightly lower accuracy

# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
# FEDERATED LEARNING -- train without centralizing data
# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
# pip install flwr (Flower -- federated learning framework)
import flwr as fl
import torch
from collections import OrderedDict

class FederatedClient(fl.client.NumPyClient):
    '''Each hospital/phone/organization is one client.'''
    def __init__(self, model, local_train_data, local_val_data):
        self.model = model
        self.train_data = local_train_data
        self.val_data = local_val_data

    def get_parameters(self, config):
        '''Return current model weights to server.'''
        return [val.cpu().numpy() for _, val in self.model.state_dict().items()]

    def set_parameters(self, parameters):
        '''Update model with aggregated weights from server.'''
        params_dict = zip(self.model.state_dict().keys(), parameters)
        state_dict = OrderedDict({k: torch.tensor(v) for k, v in params_dict})
        self.model.load_state_dict(state_dict, strict=True)

    def fit(self, parameters, config):
        '''Train locally for a few epochs, NEVER share raw data.'''
        self.set_parameters(parameters)  # receive global model
        self.model.train()
        optimizer = torch.optim.Adam(self.model.parameters(), lr=1e-3)

        for _ in range(config.get("local_epochs", 3)):
            for X, y in self.train_data:
                optimizer.zero_grad()
                nn.CrossEntropyLoss()(self.model(X), y).backward()
                optimizer.step()

        # Return: updated weights (not data!) + number of samples trained on
        return self.get_parameters(config={}), len(self.train_data.dataset), {}

    def evaluate(self, parameters, config):
        '''Evaluate local model on local data.'''
        self.set_parameters(parameters)
        self.model.eval()
        loss = accuracy = 0
        with torch.no_grad():
            for X, y in self.val_data:
                out = self.model(X)
                loss += nn.CrossEntropyLoss()(out, y).item()
                accuracy += (out.argmax(1) == y).float().mean().item()

        n = len(self.val_data)
        return loss/n, len(self.val_data.dataset), {"accuracy": accuracy/n}

# FedAvg: server aggregates client weights by weighted average
# 1. Server sends current global model to all clients
# 2. Each client trains locally for 3 epochs on their private data
# 3. Clients send ONLY updated weights back (data never leaves)
# 4. Server computes weighted average of all clients' weights
# 5. Updated global model sent to next round of clients

# Real-world use cases:
federated_use_cases = {
    "Keyboard autocomplete":    "Google Gboard -- learns your typing patterns on-device",
    "Medical AI":               "Hospitals train on patient data without sharing records across institutions",
    "Fraud detection":          "Banks collaborate on fraud models without sharing customer transactions",
    "Autonomous vehicles":      "Cars share driving patterns without uploading personal identifiable routes",
}

for case, example in federated_use_cases.items():
    print(f"  {case:30s}: {example}")

Tip

Diagram

Loading diagram…

Technical diagram.

Topics in This Module