Batch Inference & Scheduled Scoring

Not all ML predictions need to be real-time. Batch inference (scoring all records in a dataset at once) is 100x more efficient than one-at-a-time API calls — perfect for nightly churn scoring, monthly credit reviews, and weekly recommendation updates. Batch scoring leverages vectorized numpy operations and parallelization, making it drastically faster than loop-based inference.

15 min•By Priygop Team•Updated 2026

Efficient Batch Inference with Chunking and Parallelism

import numpy as np
import pandas as pd
import joblib
import time
from concurrent.futures import ProcessPoolExecutor, as_completed
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

np.random.seed(42)

# SETUP: TRAIN AND SAVE A MODEL
def build_model():
    X = pd.DataFrame({
        "age":    np.random.normal(38, 12, 2000).clip(18, 75),
        "income": np.random.exponential(55000, 2000).clip(15000, 200000),
        "credit": np.random.normal(680, 80, 2000).clip(300, 850),
        "loan":   np.random.exponential(18000, 2000).clip(1000, 80000),
    })
    y = np.random.choice([0, 1], 2000, p=[0.83, 0.17])
    pipe = Pipeline([("sc", StandardScaler()), ("m", GradientBoostingClassifier(n_estimators=100, random_state=42))])
    pipe.fit(X, y)
    joblib.dump(pipe, "batch_model.joblib")

build_model()
model = joblib.load("batch_model.joblib")

# LARGE BATCH: 1M records to score
N_MILLION = 1_000_000
df_all = pd.DataFrame({
    "customer_id": range(N_MILLION),
    "age":         np.random.normal(38, 12, N_MILLION).clip(18, 75),
    "income":      np.random.exponential(55000, N_MILLION).clip(15000, 200000),
    "credit":      np.random.normal(680, 80, N_MILLION).clip(300, 850),
    "loan":        np.random.exponential(18000, N_MILLION).clip(1000, 80000),
})
feature_cols = ["age", "income", "credit", "loan"]

# APPROACH 1: NAIVE LOOP (extremely slow -- never do this)
print("Approach 1: Naive loop (showing why it's slow)...")
sample_100 = df_all.head(100)
t0 = time.time()
loop_probs = [model.predict_proba(sample_100.iloc[[i]][feature_cols])[0, 1] for i in range(len(sample_100))]
t_loop = time.time() - t0
print(f"  100 rows via loop: {t_loop*1000:.0f}ms -> extrapolated 1M: {t_loop*10000:.0f}s (HOURS!)")

# APPROACH 2: FULL BATCH (vectorized)
t0 = time.time()
all_probs = model.predict_proba(df_all[feature_cols])[:, 1]
t_batch = time.time() - t0
throughput = N_MILLION / t_batch
print(f"\nApproach 2: Full batch vectorized: {t_batch:.1f}s | {throughput:,.0f} rows/sec")

# APPROACH 3: CHUNKED BATCH (memory efficient for very large datasets)
CHUNK_SIZE = 100_000

def score_chunk(chunk_df: pd.DataFrame) -> pd.Series:
    """Score one chunk -- callable by parallel workers."""
    model = joblib.load("batch_model.joblib")  # each worker loads its own model
    probs = model.predict_proba(chunk_df[feature_cols])[:, 1]
    return pd.Series(probs, index=chunk_df.index)

t0 = time.time()
results = pd.Series(dtype=float, index=df_all.index)
for chunk_start in range(0, len(df_all), CHUNK_SIZE):
    chunk = df_all.iloc[chunk_start:chunk_start + CHUNK_SIZE]
    results.iloc[chunk_start:chunk_start + CHUNK_SIZE] = model.predict_proba(chunk[feature_cols])[:, 1]
t_chunked = time.time() - t0
print(f"Approach 3: Chunked ({CHUNK_SIZE} rows): {t_chunked:.1f}s")

# OUTPUT: SCORED DATASET
df_all["default_probability"] = all_probs
df_all["risk_tier"] = pd.cut(all_probs, bins=[0, 0.25, 0.6, 1.0], labels=["LOW", "MEDIUM", "HIGH"])

print(f"\nScored {N_MILLION:,} customers. Risk distribution:")
print(df_all["risk_tier"].value_counts().to_string())
print(f"Average default probability: {all_probs.mean():.1%}")

# BATCH OUTPUT: SAVE TO CSV OR DB
high_risk = df_all[df_all["risk_tier"] == "HIGH"][["customer_id", "default_probability"]]
print(f"\nHigh risk customers: {len(high_risk):,} -- send to collections team")

# In production, write to database:
# high_risk.to_sql("risk_scores", engine, if_exists="replace", index=False, chunksize=10000)
# Or to parquet for cost efficiency:
# df_all[["customer_id","default_probability","risk_tier"]].to_parquet("scores_2024_01_01.parquet")

Tip

Practice Batch Inference Scheduled Scoring in small, isolated examples before integrating into larger projects. Breaking concepts into small experiments builds genuine understanding faster than reading alone.

Diagram

Loading diagram…

θ = θ - α × ∇L(θ). Too high α = diverge. Too low = slow.

Practice Task

Note

Practice Task — (1) Write a working example of Batch Inference Scheduled Scoring from scratch without looking at notes. (2) Modify it to handle an edge case (empty input, null value, or error state). (3) Share your solution in the Priygop community for feedback.

Quick Quiz

Common Mistake

Warning

A common mistake with Batch Inference Scheduled Scoring is skipping edge case testing — empty inputs, null values, and unexpected data types. Always validate boundary conditions to write robust, production-ready ml code.

Topics in This Module

Batch Inference & Scheduled Scoring

15 min•By Priygop Team•Updated 2026

Efficient Batch Inference with Chunking and Parallelism

import numpy as np
import pandas as pd
import joblib
import time
from concurrent.futures import ProcessPoolExecutor, as_completed
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

np.random.seed(42)

# SETUP: TRAIN AND SAVE A MODEL
def build_model():
    X = pd.DataFrame({
        "age":    np.random.normal(38, 12, 2000).clip(18, 75),
        "income": np.random.exponential(55000, 2000).clip(15000, 200000),
        "credit": np.random.normal(680, 80, 2000).clip(300, 850),
        "loan":   np.random.exponential(18000, 2000).clip(1000, 80000),
    })
    y = np.random.choice([0, 1], 2000, p=[0.83, 0.17])
    pipe = Pipeline([("sc", StandardScaler()), ("m", GradientBoostingClassifier(n_estimators=100, random_state=42))])
    pipe.fit(X, y)
    joblib.dump(pipe, "batch_model.joblib")

build_model()
model = joblib.load("batch_model.joblib")

# LARGE BATCH: 1M records to score
N_MILLION = 1_000_000
df_all = pd.DataFrame({
    "customer_id": range(N_MILLION),
    "age":         np.random.normal(38, 12, N_MILLION).clip(18, 75),
    "income":      np.random.exponential(55000, N_MILLION).clip(15000, 200000),
    "credit":      np.random.normal(680, 80, N_MILLION).clip(300, 850),
    "loan":        np.random.exponential(18000, N_MILLION).clip(1000, 80000),
})
feature_cols = ["age", "income", "credit", "loan"]

# APPROACH 1: NAIVE LOOP (extremely slow -- never do this)
print("Approach 1: Naive loop (showing why it's slow)...")
sample_100 = df_all.head(100)
t0 = time.time()
loop_probs = [model.predict_proba(sample_100.iloc[[i]][feature_cols])[0, 1] for i in range(len(sample_100))]
t_loop = time.time() - t0
print(f"  100 rows via loop: {t_loop*1000:.0f}ms -> extrapolated 1M: {t_loop*10000:.0f}s (HOURS!)")

# APPROACH 2: FULL BATCH (vectorized)
t0 = time.time()
all_probs = model.predict_proba(df_all[feature_cols])[:, 1]
t_batch = time.time() - t0
throughput = N_MILLION / t_batch
print(f"\nApproach 2: Full batch vectorized: {t_batch:.1f}s | {throughput:,.0f} rows/sec")

# APPROACH 3: CHUNKED BATCH (memory efficient for very large datasets)
CHUNK_SIZE = 100_000

def score_chunk(chunk_df: pd.DataFrame) -> pd.Series:
    """Score one chunk -- callable by parallel workers."""
    model = joblib.load("batch_model.joblib")  # each worker loads its own model
    probs = model.predict_proba(chunk_df[feature_cols])[:, 1]
    return pd.Series(probs, index=chunk_df.index)

t0 = time.time()
results = pd.Series(dtype=float, index=df_all.index)
for chunk_start in range(0, len(df_all), CHUNK_SIZE):
    chunk = df_all.iloc[chunk_start:chunk_start + CHUNK_SIZE]
    results.iloc[chunk_start:chunk_start + CHUNK_SIZE] = model.predict_proba(chunk[feature_cols])[:, 1]
t_chunked = time.time() - t0
print(f"Approach 3: Chunked ({CHUNK_SIZE} rows): {t_chunked:.1f}s")

# OUTPUT: SCORED DATASET
df_all["default_probability"] = all_probs
df_all["risk_tier"] = pd.cut(all_probs, bins=[0, 0.25, 0.6, 1.0], labels=["LOW", "MEDIUM", "HIGH"])

print(f"\nScored {N_MILLION:,} customers. Risk distribution:")
print(df_all["risk_tier"].value_counts().to_string())
print(f"Average default probability: {all_probs.mean():.1%}")

# BATCH OUTPUT: SAVE TO CSV OR DB
high_risk = df_all[df_all["risk_tier"] == "HIGH"][["customer_id", "default_probability"]]
print(f"\nHigh risk customers: {len(high_risk):,} -- send to collections team")

# In production, write to database:
# high_risk.to_sql("risk_scores", engine, if_exists="replace", index=False, chunksize=10000)
# Or to parquet for cost efficiency:
# df_all[["customer_id","default_probability","risk_tier"]].to_parquet("scores_2024_01_01.parquet")

Tip

Diagram

Loading diagram…

θ = θ - α × ∇L(θ). Too high α = diverge. Too low = slow.

Topics in This Module