MLOps — CI/CD for Machine Learning
MLOps applies DevOps principles to ML: version control for data and models, automated testing, continuous integration/deployment pipelines, and experiment tracking. Without MLOps, ML teams ship slowly, struggle to reproduce experiments, and deploy unreliably.
MLflow and GitHub Actions for ML CI/CD
import mlflow
import mlflow.pytorch
import torch
# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
# MLFLOW -- experiment tracking, model registry
# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
mlflow.set_tracking_uri("http://mlflow-server:5000") # or use DAGsHub/Weights&Biases
mlflow.set_experiment("sentiment-classifier-v2")
# Everything you want tracked goes inside with mlflow.start_run()
with mlflow.start_run(run_name="distilbert-sst2-lr2e5") as run:
# Log hyperparameters
params = {"model": "distilbert-base-uncased", "lr": 2e-5, "epochs": 3, "batch_size": 32}
mlflow.log_params(params)
# Simulate training -- in real training, log after each epoch
for epoch in range(3):
train_loss = 0.15 - epoch * 0.03
val_acc = 0.88 + epoch * 0.025
mlflow.log_metrics({"train_loss": train_loss, "val_accuracy": val_acc}, step=epoch)
# Log the model
dummy_model = torch.nn.Linear(768, 2) # replace with real model
mlflow.pytorch.log_model(
dummy_model,
artifact_path="model",
registered_model_name="sentiment-classifier",
)
# Log artifacts
mlflow.log_artifact("confusion_matrix.png")
mlflow.log_artifact("classification_report.txt")
print(f"Run ID: {run.info.run_id}")
print(f"Model URI: {mlflow.get_artifact_uri('model')}")
# Model Registry workflow
client = mlflow.tracking.MlflowClient()
# Register new version
client.create_model_version(
name="sentiment-classifier",
source=f"runs:/{run.info.run_id}/model",
run_id=run.info.run_id,
)
# Promote to staging/production after review
client.transition_model_version_stage(
name="sentiment-classifier",
version=3,
stage="Production", # Staging | Production | Archived
)
# Load production model
production_model = mlflow.pytorch.load_model("models:/sentiment-classifier/Production")
# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
# CI/CD PIPELINE (GitHub Actions)
# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
# .github/workflows/ml_pipeline.yml (simplified)
cicd_pipeline = '''
name: ML Training and Deployment Pipeline
on:
push:
branches: [main]
paths: ['src/**', 'configs/**', 'data/**']
jobs:
test:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Run unit tests
run: pytest tests/ -v --cov=src --cov-report=xml
- name: Run data validation
run: python scripts/validate_data.py --config configs/data_schema.yaml
train:
needs: test
runs-on: [self-hosted, gpu] # your GPU runner
steps:
- name: Train model
run: python train.py --config configs/training.yaml
- name: Register model to MLflow
run: python scripts/register_model.py
evaluate:
needs: train
runs-on: [self-hosted, gpu]
steps:
- name: Evaluate on holdout test set
run: python evaluate.py --model production-candidate
- name: Check accuracy threshold
run: python scripts/check_metrics.py --min-accuracy 0.92
deploy:
needs: evaluate
runs-on: ubuntu-latest
steps:
- name: Build and push Docker image
run: |
docker build -t ai-api:latest .
docker push registry/ai-api:latest
- name: Deploy to Kubernetes
run: kubectl set image deployment/ai-api ai-api=registry/ai-api:latest
- name: Verify rollout
run: kubectl rollout status deployment/ai-api --timeout=5m
'''
print("MLOps best practices:")
mlops_practices = [
"Version EVERYTHING: code, data, model, config -- reproducibility is critical",
"Automated testing: unit tests for preprocessing, integration tests for API",
"Shadow deployment: run new model in parallel, compare outputs before switching",
"Feature stores: centralize feature computation (Feast, Tecton, Hopsworks)",
"Model cards: document model capabilities, limitations, and fairness evaluations",
]
for i, practice in enumerate(mlops_practices, 1):
print(f" {i}. {practice}")Tip
Tip
Practice MLOps CICD for Machine Learning in small, isolated examples before integrating into larger projects. Breaking concepts into small experiments builds genuine understanding faster than reading alone.
Deep Learning ⊂ Machine Learning ⊂ Artificial Intelligence
Practice Task
Note
Practice Task — (1) Write a working example of MLOps CICD for Machine Learning from scratch without looking at notes. (2) Modify it to handle an edge case (empty input, null value, or error state). (3) Share your solution in the Priygop community for feedback.
Quick Quiz
Common Mistake
Warning
A common mistake with MLOps CICD for Machine Learning is skipping edge case testing — empty inputs, null values, and unexpected data types. Always validate boundary conditions to write robust, production-ready ai code.