Learn AI and Machine Learning fundamentals including types of ML, AI applications, and machine learning workflow.
Learn AI and Machine Learning fundamentals including types of ML, AI applications, and machine learning workflow.
Understand the definition, types, and comparison of AI with human intelligence.
Content by: Nirav Khanpara
AI/ML Engineer
Artificial Intelligence (AI) is the simulation of human intelligence in machines that are programmed to think and learn like humans. The term can also be applied to any machine that exhibits traits associated with a human mind such as learning and problem-solving.
Test your understanding of this topic:
Explore the different types of machine learning: supervised, unsupervised, semi-supervised, and reinforcement learning.
Content by: Nirav Khanpara
AI/ML Engineer
Learning with labeled training data to predict outcomes for new data. Includes Classification (e.g., spam vs non-spam emails), Regression (e.g., house prices, temperature), and examples like Linear Regression, Logistic Regression, Decision Trees, Random Forest.
Finding hidden patterns in data without labeled responses. Includes Clustering (grouping similar data points) and Dimensionality Reduction (reducing the number of features). Examples: K-Means Clustering, Principal Component Analysis (PCA).
Combines labeled and unlabeled data for training. Uses a small amount of labeled data with a large amount of unlabeled data. Cost-effective when labeling data is expensive. Examples: Self-training, Co-training.
Learning through interaction with an environment. Components include Agent (the learning system), Environment (the world the agent interacts with), and Rewards (feedback that guides learning). Examples: Game playing, robotics, autonomous vehicles.
Test your understanding of this topic:
Discover how AI is applied in various industries like healthcare, finance, transportation, and entertainment.
Content by: Nirav Khanpara
AI/ML Engineer
Test your understanding of this topic:
Learn the steps involved in a typical machine learning workflow, from problem definition to model deployment.
Content by: Nirav Khanpara
AI/ML Engineer
Test your understanding of this topic:
Get hands-on experience with practical AI implementations including basic ML models and data analysis.
Content by: Nirav Khanpara
AI/ML Engineer
# Example: Complete Machine Learning Workflow Implementation
# Implementing a simple ML project from start to finish
#!/usr/bin/env python3
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler
import warnings
warnings.filterwarnings('ignore')
class MLWorkflowExample:
def __init__(self):
self.data = None
self.X_train = None
self.X_test = None
self.y_train = None
self.y_test = None
self.model = None
self.scaler = StandardScaler()
def generate_sample_data(self, n_samples=1000):
"""Generate sample data for demonstration"""
np.random.seed(42)
# Generate synthetic housing data
square_feet = np.random.normal(2000, 500, n_samples)
bedrooms = np.random.randint(1, 6, n_samples)
bathrooms = np.random.randint(1, 4, n_samples)
age = np.random.randint(0, 50, n_samples)
# Generate target variable (house price) with some noise
base_price = 200000
price_per_sqft = 100
price_per_bedroom = 25000
price_per_bathroom = 15000
price_per_age = -1000
prices = (base_price +
square_feet * price_per_sqft +
bedrooms * price_per_bedroom +
bathrooms * price_per_bathroom +
age * price_per_age +
np.random.normal(0, 10000, n_samples))
# Create DataFrame
self.data = pd.DataFrame({
'square_feet': square_feet,
'bedrooms': bedrooms,
'bathrooms': bathrooms,
'age': age,
'price': prices
})
print(f"Generated {n_samples} sample housing records")
return self.data
def explore_data(self):
"""Perform exploratory data analysis"""
print("=== Data Exploration ===")
print(f"Dataset shape: {self.data.shape}")
print("\nFirst few rows:")
print(self.data.head())
print("\nData types:")
print(self.data.dtypes)
print("\nBasic statistics:")
print(self.data.describe())
print("\nMissing values:")
print(self.data.isnull().sum())
# Create visualizations
self._create_visualizations()
def _create_visualizations(self):
"""Create exploratory data visualizations"""
fig, axes = plt.subplots(2, 2, figsize=(15, 10))
fig.suptitle('Housing Data Analysis', fontsize=16)
# Price distribution
axes[0, 0].hist(self.data['price'], bins=30, alpha=0.7, color='skyblue')
axes[0, 0].set_title('House Price Distribution')
axes[0, 0].set_xlabel('Price ($)')
axes[0, 0].set_ylabel('Frequency')
# Square feet vs Price
axes[0, 1].scatter(self.data['square_feet'], self.data['price'], alpha=0.6)
axes[0, 1].set_title('Square Feet vs Price')
axes[0, 1].set_xlabel('Square Feet')
axes[0, 1].set_ylabel('Price ($)')
# Bedrooms distribution
axes[1, 0].hist(self.data['bedrooms'], bins=range(1, 7), alpha=0.7, color='lightgreen')
axes[1, 0].set_title('Bedrooms Distribution')
axes[1, 0].set_xlabel('Number of Bedrooms')
axes[1, 0].set_ylabel('Frequency')
# Correlation heatmap
correlation_matrix = self.data.corr()
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', ax=axes[1, 1])
axes[1, 1].set_title('Feature Correlation Matrix')
plt.tight_layout()
plt.show()
# Print correlations with price
print("\nCorrelations with Price:")
price_correlations = self.data.corr()['price'].sort_values(ascending=False)
print(price_correlations)
def prepare_data(self):
"""Prepare data for machine learning"""
print("\n=== Data Preparation ===")
# Separate features and target
X = self.data.drop('price', axis=1)
y = self.data['price']
# Split data into training and testing sets
self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(
X, y, test_size=0.2, random_state=42
)
print(f"Training set size: {self.X_train.shape[0]} samples")
print(f"Testing set size: {self.X_test.shape[0]} samples")
print(f"Number of features: {self.X_train.shape[1]}")
# Scale features
self.X_train_scaled = self.scaler.fit_transform(self.X_train)
self.X_test_scaled = self.scaler.transform(self.X_test)
print("Features scaled using StandardScaler")
return self.X_train_scaled, self.X_test_scaled, self.y_train, self.y_test
def train_model(self):
"""Train a linear regression model"""
print("\n=== Model Training ===")
# Initialize and train the model
self.model = LinearRegression()
self.model.fit(self.X_train_scaled, self.y_train)
print("Linear Regression model trained successfully")
print(f"Model coefficients: {self.model.coef_}")
print(f"Model intercept: {self.model.intercept_:.2f}")
# Make predictions
y_train_pred = self.model.predict(self.X_train_scaled)
y_test_pred = self.model.predict(self.X_test_scaled)
# Calculate metrics
train_mse = mean_squared_error(self.y_train, y_train_pred)
test_mse = mean_squared_error(self.y_test, y_test_pred)
train_r2 = r2_score(self.y_train, y_train_pred)
test_r2 = r2_score(self.y_test, y_test_pred)
print("\nModel Performance:")
print(f"Training MSE: {train_mse:,.2f}")
print(f"Testing MSE: {test_mse:,.2f}")
print(f"Training R²: {train_r2:.4f}")
print(f"Testing R²: {test_r2:.4f}")
return y_train_pred, y_test_pred
def evaluate_model(self, y_train_pred, y_test_pred):
"""Evaluate model performance"""
print("\n=== Model Evaluation ===")
# Create evaluation visualizations
fig, axes = plt.subplots(1, 2, figsize=(15, 5))
# Training predictions vs actual
axes[0].scatter(self.y_train, y_train_pred, alpha=0.6, color='blue')
axes[0].plot([self.y_train.min(), self.y_train.max()],
[self.y_train.min(), self.y_train.max()], 'r--', lw=2)
axes[0].set_title('Training: Predicted vs Actual')
axes[0].set_xlabel('Actual Price ($)')
axes[0].set_ylabel('Predicted Price ($)')
# Testing predictions vs actual
axes[1].scatter(self.y_test, y_test_pred, alpha=0.6, color='green')
axes[1].plot([self.y_test.min(), self.y_test.max()],
[self.y_test.min(), self.y_test.max()], 'r--', lw=2)
axes[1].set_title('Testing: Predicted vs Actual')
axes[1].set_xlabel('Actual Price ($)')
axes[1].set_ylabel('Predicted Price ($)')
plt.tight_layout()
plt.show()
# Feature importance analysis
feature_importance = pd.DataFrame({
'feature': self.data.drop('price', axis=1).columns,
'coefficient': self.model.coef_
})
feature_importance = feature_importance.sort_values('coefficient', key=abs, ascending=False)
print("\nFeature Importance (by absolute coefficient value):")
print(feature_importance)
# Residual analysis
residuals = self.y_test - y_test_pred
plt.figure(figsize=(12, 4))
plt.subplot(1, 2, 1)
plt.scatter(y_test_pred, residuals, alpha=0.6)
plt.axhline(y=0, color='r', linestyle='--')
plt.title('Residual Plot')
plt.xlabel('Predicted Price ($)')
plt.ylabel('Residuals')
plt.subplot(1, 2, 2)
plt.hist(residuals, bins=30, alpha=0.7, color='orange')
plt.title('Residual Distribution')
plt.xlabel('Residuals')
plt.ylabel('Frequency')
plt.tight_layout()
plt.show()
def make_predictions(self, new_data):
"""Make predictions on new data"""
print("\n=== Making Predictions ===")
# Scale new data
new_data_scaled = self.scaler.transform(new_data)
# Make predictions
predictions = self.model.predict(new_data_scaled)
# Create results DataFrame
results = pd.DataFrame({
'square_feet': new_data['square_feet'],
'bedrooms': new_data['bedrooms'],
'bathrooms': new_data['bathrooms'],
'age': new_data['age'],
'predicted_price': predictions
})
print("Predictions for new data:")
print(results)
return predictions
def run_complete_workflow(self):
"""Run the complete ML workflow"""
print("=== Complete Machine Learning Workflow ===")
print("This example demonstrates a typical ML project from start to finish\n")
# Step 1: Generate data
print("Step 1: Data Generation")
self.generate_sample_data()
# Step 2: Explore data
print("\nStep 2: Data Exploration")
self.explore_data()
# Step 3: Prepare data
print("\nStep 3: Data Preparation")
self.prepare_data()
# Step 4: Train model
print("\nStep 4: Model Training")
y_train_pred, y_test_pred = self.train_model()
# Step 5: Evaluate model
print("\nStep 5: Model Evaluation")
self.evaluate_model(y_train_pred, y_test_pred)
# Step 6: Make predictions
print("\nStep 6: Making Predictions")
new_houses = pd.DataFrame({
'square_feet': [2500, 1800, 3200],
'bedrooms': [3, 2, 4],
'bathrooms': [2, 2, 3],
'age': [5, 15, 2]
})
self.make_predictions(new_houses)
print("\n=== Workflow Complete ===")
print("This demonstrates the complete ML workflow:")
print("1. Data Generation/Collection")
print("2. Data Exploration & Analysis")
print("3. Data Preparation & Preprocessing")
print("4. Model Training")
print("5. Model Evaluation")
print("6. Making Predictions")
print("\nNext steps would include:")
print("- Model deployment to production")
print("- Continuous monitoring and retraining")
print("- Model versioning and management")
# Example: Simple Classification Problem
class SimpleClassificationExample:
def __init__(self):
self.model = None
def generate_classification_data(self):
"""Generate sample data for classification"""
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
# Generate synthetic classification data
X, y = make_classification(
n_samples=1000,
n_features=20,
n_informative=15,
n_redundant=5,
n_classes=2,
random_state=42
)
# Split data
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.2, random_state=42
)
return X_train, X_test, y_train, y_test
def train_classification_model(self, X_train, y_train):
"""Train a simple classification model"""
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
# Train Random Forest classifier
self.model = RandomForestClassifier(n_estimators=100, random_state=42)
self.model.fit(X_train, y_train)
return self.model
def evaluate_classification_model(self, X_test, y_test):
"""Evaluate classification model performance"""
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
# Make predictions
y_pred = self.model.predict(X_test)
# Calculate metrics
accuracy = accuracy_score(y_test, y_pred)
print(f"Classification Accuracy: {accuracy:.4f}")
print("\nClassification Report:")
print(classification_report(y_test, y_pred))
# Confusion matrix
cm = confusion_matrix(y_test, y_pred)
print("\nConfusion Matrix:")
print(cm)
return accuracy
# Usage examples
if __name__ == "__main__":
print("=== AI/ML Module 1: Practical Examples ===\n")
# Example 1: Complete ML Workflow
print("Example 1: Complete Machine Learning Workflow")
print("=" * 50)
ml_workflow = MLWorkflowExample()
ml_workflow.run_complete_workflow()
print("\n" + "=" * 50)
# Example 2: Simple Classification
print("Example 2: Simple Classification Problem")
print("=" * 50)
classification_example = SimpleClassificationExample()
X_train, X_test, y_train, y_test = classification_example.generate_classification_data()
model = classification_example.train_classification_model(X_train, y_train)
accuracy = classification_example.evaluate_classification_model(X_test, y_test)
print("\nBoth examples demonstrate:")
print("- Data generation and exploration")
print("- Data preprocessing and preparation")
print("- Model training and evaluation")
print("- Making predictions on new data")
print("- Complete ML workflow implementation")
Test your understanding of this topic:
Continue your learning journey and master the next set of concepts.
Continue to Module 2