MLOps — CI/CD for Machine Learning

MLOps applies DevOps principles to ML: version control for data and models, automated testing, continuous integration/deployment pipelines, and experiment tracking. Without MLOps, ML teams ship slowly, struggle to reproduce experiments, and deploy unreliably.

20 min•By Priygop Team•Updated 2026

MLflow and GitHub Actions for ML CI/CD

import mlflow
import mlflow.pytorch
import torch

# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
# MLFLOW -- experiment tracking, model registry
# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
mlflow.set_tracking_uri("http://mlflow-server:5000")  # or use DAGsHub/Weights&Biases
mlflow.set_experiment("sentiment-classifier-v2")

# Everything you want tracked goes inside with mlflow.start_run()
with mlflow.start_run(run_name="distilbert-sst2-lr2e5") as run:
    # Log hyperparameters
    params = {"model": "distilbert-base-uncased", "lr": 2e-5, "epochs": 3, "batch_size": 32}
    mlflow.log_params(params)

    # Simulate training -- in real training, log after each epoch
    for epoch in range(3):
        train_loss = 0.15 - epoch * 0.03
        val_acc = 0.88 + epoch * 0.025
        mlflow.log_metrics({"train_loss": train_loss, "val_accuracy": val_acc}, step=epoch)

    # Log the model
    dummy_model = torch.nn.Linear(768, 2)  # replace with real model
    mlflow.pytorch.log_model(
        dummy_model,
        artifact_path="model",
        registered_model_name="sentiment-classifier",
    )

    # Log artifacts
    mlflow.log_artifact("confusion_matrix.png")
    mlflow.log_artifact("classification_report.txt")

    print(f"Run ID: {run.info.run_id}")
    print(f"Model URI: {mlflow.get_artifact_uri('model')}")

# Model Registry workflow
client = mlflow.tracking.MlflowClient()
# Register new version
client.create_model_version(
    name="sentiment-classifier",
    source=f"runs:/{run.info.run_id}/model",
    run_id=run.info.run_id,
)

# Promote to staging/production after review
client.transition_model_version_stage(
    name="sentiment-classifier",
    version=3,
    stage="Production",  # Staging | Production | Archived
)

# Load production model
production_model = mlflow.pytorch.load_model("models:/sentiment-classifier/Production")

# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
# CI/CD PIPELINE (GitHub Actions)
# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
# .github/workflows/ml_pipeline.yml (simplified)
cicd_pipeline = '''
name: ML Training and Deployment Pipeline
on:
  push:
    branches: [main]
    paths: ['src/**', 'configs/**', 'data/**']

jobs:
  test:
    runs-on: ubuntu-latest
    steps:
    - uses: actions/checkout@v4
    - name: Run unit tests
      run: pytest tests/ -v --cov=src --cov-report=xml
    - name: Run data validation
      run: python scripts/validate_data.py --config configs/data_schema.yaml

  train:
    needs: test
    runs-on: [self-hosted, gpu]    # your GPU runner
    steps:
    - name: Train model
      run: python train.py --config configs/training.yaml
    - name: Register model to MLflow
      run: python scripts/register_model.py

  evaluate:
    needs: train
    runs-on: [self-hosted, gpu]
    steps:
    - name: Evaluate on holdout test set
      run: python evaluate.py --model production-candidate
    - name: Check accuracy threshold
      run: python scripts/check_metrics.py --min-accuracy 0.92

  deploy:
    needs: evaluate
    runs-on: ubuntu-latest
    steps:
    - name: Build and push Docker image
      run: |
        docker build -t ai-api:latest .
        docker push registry/ai-api:latest
    - name: Deploy to Kubernetes
      run: kubectl set image deployment/ai-api ai-api=registry/ai-api:latest
    - name: Verify rollout
      run: kubectl rollout status deployment/ai-api --timeout=5m
'''

print("MLOps best practices:")
mlops_practices = [
    "Version EVERYTHING: code, data, model, config -- reproducibility is critical",
    "Automated testing: unit tests for preprocessing, integration tests for API",
    "Shadow deployment: run new model in parallel, compare outputs before switching",
    "Feature stores: centralize feature computation (Feast, Tecton, Hopsworks)",
    "Model cards: document model capabilities, limitations, and fairness evaluations",
]
for i, practice in enumerate(mlops_practices, 1):
    print(f"  {i}. {practice}")

Tip

Practice MLOps CICD for Machine Learning in small, isolated examples before integrating into larger projects. Breaking concepts into small experiments builds genuine understanding faster than reading alone.

Diagram

Loading diagram…

Deep Learning ⊂ Machine Learning ⊂ Artificial Intelligence

Practice Task

Note

Practice Task — (1) Write a working example of MLOps CICD for Machine Learning from scratch without looking at notes. (2) Modify it to handle an edge case (empty input, null value, or error state). (3) Share your solution in the Priygop community for feedback.

Quick Quiz

Common Mistake

Warning

A common mistake with MLOps CICD for Machine Learning is skipping edge case testing — empty inputs, null values, and unexpected data types. Always validate boundary conditions to write robust, production-ready ai code.

Topics in This Module

MLflow and GitHub Actions for ML CI/CD

import mlflow
import mlflow.pytorch
import torch

# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
# MLFLOW -- experiment tracking, model registry
# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
mlflow.set_tracking_uri("http://mlflow-server:5000")  # or use DAGsHub/Weights&Biases
mlflow.set_experiment("sentiment-classifier-v2")

# Everything you want tracked goes inside with mlflow.start_run()
with mlflow.start_run(run_name="distilbert-sst2-lr2e5") as run:
    # Log hyperparameters
    params = {"model": "distilbert-base-uncased", "lr": 2e-5, "epochs": 3, "batch_size": 32}
    mlflow.log_params(params)

    # Simulate training -- in real training, log after each epoch
    for epoch in range(3):
        train_loss = 0.15 - epoch * 0.03
        val_acc = 0.88 + epoch * 0.025
        mlflow.log_metrics({"train_loss": train_loss, "val_accuracy": val_acc}, step=epoch)

    # Log the model
    dummy_model = torch.nn.Linear(768, 2)  # replace with real model
    mlflow.pytorch.log_model(
        dummy_model,
        artifact_path="model",
        registered_model_name="sentiment-classifier",
    )

    # Log artifacts
    mlflow.log_artifact("confusion_matrix.png")
    mlflow.log_artifact("classification_report.txt")

    print(f"Run ID: {run.info.run_id}")
    print(f"Model URI: {mlflow.get_artifact_uri('model')}")

# Model Registry workflow
client = mlflow.tracking.MlflowClient()
# Register new version
client.create_model_version(
    name="sentiment-classifier",
    source=f"runs:/{run.info.run_id}/model",
    run_id=run.info.run_id,
)

# Promote to staging/production after review
client.transition_model_version_stage(
    name="sentiment-classifier",
    version=3,
    stage="Production",  # Staging | Production | Archived
)

# Load production model
production_model = mlflow.pytorch.load_model("models:/sentiment-classifier/Production")

# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
# CI/CD PIPELINE (GitHub Actions)
# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
# .github/workflows/ml_pipeline.yml (simplified)
cicd_pipeline = '''
name: ML Training and Deployment Pipeline
on:
  push:
    branches: [main]
    paths: ['src/**', 'configs/**', 'data/**']

jobs:
  test:
    runs-on: ubuntu-latest
    steps:
    - uses: actions/checkout@v4
    - name: Run unit tests
      run: pytest tests/ -v --cov=src --cov-report=xml
    - name: Run data validation
      run: python scripts/validate_data.py --config configs/data_schema.yaml

  train:
    needs: test
    runs-on: [self-hosted, gpu]    # your GPU runner
    steps:
    - name: Train model
      run: python train.py --config configs/training.yaml
    - name: Register model to MLflow
      run: python scripts/register_model.py

  evaluate:
    needs: train
    runs-on: [self-hosted, gpu]
    steps:
    - name: Evaluate on holdout test set
      run: python evaluate.py --model production-candidate
    - name: Check accuracy threshold
      run: python scripts/check_metrics.py --min-accuracy 0.92

  deploy:
    needs: evaluate
    runs-on: ubuntu-latest
    steps:
    - name: Build and push Docker image
      run: |
        docker build -t ai-api:latest .
        docker push registry/ai-api:latest
    - name: Deploy to Kubernetes
      run: kubectl set image deployment/ai-api ai-api=registry/ai-api:latest
    - name: Verify rollout
      run: kubectl rollout status deployment/ai-api --timeout=5m
'''

print("MLOps best practices:")
mlops_practices = [
    "Version EVERYTHING: code, data, model, config -- reproducibility is critical",
    "Automated testing: unit tests for preprocessing, integration tests for API",
    "Shadow deployment: run new model in parallel, compare outputs before switching",
    "Feature stores: centralize feature computation (Feast, Tecton, Hopsworks)",
    "Model cards: document model capabilities, limitations, and fairness evaluations",
]
for i, practice in enumerate(mlops_practices, 1):
    print(f"  {i}. {practice}")

Tip

Diagram

Loading diagram…

Deep Learning ⊂ Machine Learning ⊂ Artificial Intelligence

Topics in This Module