📚 Introduction
MLflow is an open-source platform designed to manage the complete machine learning lifecycle. It helps data scientists track experiments, reproduce results, deploy models, and create a centralized model registry.
Why Experiment Tracking Matters
Benefit | Description |
---|---|
🔄 Reproducibility | Recreate results and understand past decisions |
🗂️ Organization | Keep track of work across many iterations and experiments |
⚙️ Optimization | Easily compare different approaches and parameter configurations |
👥 Collaboration | Share and communicate results with team members |
Key Terminology
- ML experiment: The entire process of building a machine learning model
- Experiment run: A single trial within an ML experiment
- Run artifact: Files associated with a run (models, visualizations, datasets)
- Experiment metadata: Data describing the experiment (parameters, metrics, etc.)
🧩 MLflow Components
MLflow consists of four main modules:
- Tracking: Record and query experiments (parameters, metrics, code versions, etc.)
- Models: Package ML models in a standard format for deployment
- Model Registry: Store, annotate, and manage models in a central repository
- Projects: Package code in a reusable and reproducible form (not covered in this guide)
🚀 Getting Started
Installation
# Using pip
pip install mlflow
# Using conda
conda install -c conda-forge mlflow
Setting Up the MLflow Server
# Basic usage
mlflow ui
# With SQLite backend (recommended)
mlflow ui --backend-store-uri sqlite:///mlflow.db
# With specific artifact location
mlflow ui --backend-store-uri sqlite:///mlflow.db --default-artifact-root ./mlruns
After running the commands above, access the UI at: http://localhost:5000
💻 Working with MLflow
Initializing MLflow in Your Code
import mlflow
# Set the tracking server URI
mlflow.set_tracking_uri("sqlite:///mlflow.db")
# Set the experiment name
mlflow.set_experiment("my-classification-project")
Basic Experiment Tracking
with mlflow.start_run(run_name="basic-model"):
# 1. Log parameters
mlflow.log_param("algorithm", "RandomForest")
mlflow.log_param("n_estimators", 100)
# 2. Log metrics
mlflow.log_metric("accuracy", 0.85)
mlflow.log_metric("f1_score", 0.82)
# 3. Log model
mlflow.sklearn.log_model(model, "model")
# 4. Log artifacts (e.g., feature importance plot)
mlflow.log_artifact("feature_importance.png")
Using the MLflow Client API
from mlflow.tracking import MlflowClient
# Initialize the client
client = MlflowClient(tracking_uri="sqlite:///mlflow.db")
# Create a new experiment
experiment_id = client.create_experiment("customer-churn-prediction")
# Get experiment by name
experiment = client.get_experiment_by_name("customer-churn-prediction")
📊 Advanced Tracking Features
Nested Runs
# Parent run
with mlflow.start_run(run_name="parent") as parent_run:
mlflow.log_param("parent_param", "parent_value")
# Child run
with mlflow.start_run(run_name="child", nested=True) as child_run:
mlflow.log_param("child_param", "child_value")
mlflow.log_metric("child_metric", 1.0)
Hyperparameter Optimization with MLflow
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials
def objective(params):
with mlflow.start_run(nested=True):
# Log hyperparameters
mlflow.log_params(params)
# Your model training code here
# ...
# Log results
mlflow.log_metric("rmse", rmse)
return {"loss": rmse, "status": STATUS_OK}
# Define search space
search_space = {
"learning_rate": hp.loguniform("learning_rate", -5, 0),
"max_depth": hp.randint("max_depth", 3, 10),
"min_child_weight": hp.randint("min_child_weight", 1, 10),
"subsample": hp.uniform("subsample", 0.5, 1.0)
}
# Run optimization
with mlflow.start_run(run_name="hyperopt_tuning"):
best_params = fmin(
fn=objective,
space=search_space,
algo=tpe.suggest,
max_evals=50,
trials=Trials()
)
Autologging
MLflow can automatically log parameters, metrics, and models without manual logging statements:
# Enable autologging for all supported frameworks
mlflow.autolog()
# Or enable for specific framework
mlflow.sklearn.autolog()
mlflow.xgboost.autolog()
mlflow.pytorch.autolog()
mlflow.tensorflow.autolog()
What gets autologged:
- Model parameters
- Evaluation metrics
- Model artifacts
- Model signatures
- Input examples
- Package dependencies
📦 Managing Models
Saving and Loading Models
# Saving a model
with mlflow.start_run():
# Train your model
model = RandomForestClassifier()
model.fit(X_train, y_train)
# Log model
mlflow.sklearn.log_model(
sk_model=model,
artifact_path="random_forest_model",
registered_model_name="rf_classification"
)
# Loading a model
model_uri = "runs:/<run_id>/random_forest_model"
loaded_model = mlflow.sklearn.load_model(model_uri)
# Make predictions
predictions = loaded_model.predict(X_test)
Model Signatures and Input Examples
import pandas as pd
from mlflow.models.signature import infer_signature
# Generate model signature
X_sample = X_train.iloc[:5]
y_sample = model.predict(X_sample)
signature = infer_signature(X_sample, y_sample)
# Log model with signature and input example
with mlflow.start_run():
mlflow.sklearn.log_model(
model,
"model",
signature=signature,
input_example=X_sample
)
📋 Model Registry
The Model Registry provides a centralized repository for managing the full lifecycle of MLflow Models.
Model Registry Workflow
- Register a model from a run
- Transition models between stages (Staging, Production, Archived)
- Version models automatically
- Annotate models with descriptions and tags
- Deploy models to various serving platforms
Registering Models
# From UI: Click "Register Model" on the run page
# From code
with mlflow.start_run():
mlflow.sklearn.log_model(
model,
"model",
registered_model_name="customer-churn-predictor"
)
# Or using existing run
run_id = "abcdef123456"
model_uri = f"runs:/{run_id}/model"
mlflow.register_model(model_uri, "customer-churn-predictor")
Managing Model Versions
from mlflow.tracking import MlflowClient
client = MlflowClient()
model_name = "customer-churn-predictor"
# Get all versions of a model
versions = client.get_latest_versions(model_name)
for v in versions:
print(f"Version: {v.version}, Stage: {v.current_stage}")
# Transition a model to production
client.transition_model_version_stage(
name=model_name,
version=2,
stage="Production",
archive_existing_versions=True # Archive any existing production versions
)
# Add description to model version
client.update_model_version(
name=model_name,
version=2,
description="This model was trained on dataset v2 with improved features"
)
Loading Models from Registry
# Load the latest Production model
production_model = mlflow.pyfunc.load_model(
model_uri=f"models:/{model_name}/Production"
)
# Load a specific version
specific_model = mlflow.pyfunc.load_model(
model_uri=f"models:/{model_name}/2"
)
📝 Best Practices
Project Structure
my-ml-project/
├── data/ # Data files
├── notebooks/ # Jupyter notebooks for exploration
├── src/ # Source code
│ ├── train.py # Training script
│ ├── predict.py # Prediction script
│ └── utils.py # Utility functions
├── mlruns/ # MLflow tracking data (if using local storage)
├── mlflow.db # MLflow SQLite database
├── README.md # Project documentation
└── requirements.txt # Project dependencies
Tips for Effective MLflow Usage
- Use descriptive run names to easily identify experiments
- Create separate experiments for different problems or approaches
- Log all parameters that affect your model's performance
- Use tags to add searchable metadata to runs
- Always version your data and log data paths/versions
- Set up a dedicated tracking server for team collaboration
- Integrate with CI/CD pipelines for automated model deployment
- Create standardized model training workflows with common logging patterns
🔍 Comparison with Other Tools
Feature | MLflow | TensorBoard | Weight & Biases | Neptune |
---|---|---|---|---|
Experiment Tracking | ✅ | ✅ | ✅ | ✅ |
Model Registry | ✅ | ❌ | ✅ | ✅ |
Model Packaging | ✅ | ❌ | ❌ | ❌ |
Open Source | ✅ | ✅ | ❌ | ❌ |
UI Visualization | Basic | Advanced | Advanced | Advanced |
Team Collaboration | Basic | Limited | Advanced | Advanced |
Learning Curve | Easy | Moderate | Easy | Easy |
🔧 Complete Example: NYC Taxi Fare Prediction
import mlflow
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
# Set up MLflow
mlflow.set_tracking_uri("sqlite:///mlflow.db")
mlflow.set_experiment("nyc-taxi-fare-prediction")
# Load data
df = pd.read_parquet("data/green_tripdata_2021-01.parquet")
# Data preprocessing
# ... (feature engineering code here)
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
df.drop('fare_amount', axis=1), df['fare_amount'], test_size=0.2, random_state=42
)
# Model training with MLflow tracking
with mlflow.start_run(run_name="random-forest-baseline"):
# Set parameters
n_estimators = 100
max_depth = 10
# Log parameters
mlflow.log_param("model_type", "RandomForest")
mlflow.log_param("n_estimators", n_estimators)
mlflow.log_param("max_depth", max_depth)
mlflow.log_param("train_data_shape", X_train.shape)
mlflow.log_param("test_data_shape", X_test.shape)
# Train model
model = RandomForestRegressor(n_estimators=n_estimators, max_depth=max_depth, random_state=42)
model.fit(X_train, y_train)
# Evaluate model
y_pred = model.predict(X_test)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
r2 = r2_score(y_test, y_pred)
# Log metrics
mlflow.log_metric("rmse", rmse)
mlflow.log_metric("r2", r2)
# Log feature importance as a figure
import matplotlib.pyplot as plt
feature_importance = pd.DataFrame(
model.feature_importances_,
index=X_train.columns,
columns=['importance']
).sort_values('importance', ascending=False)
plt.figure(figsize=(10, 8))
feature_importance[:10].plot(kind='barh')
plt.title('Feature Importance')
plt.tight_layout()
plt.savefig("feature_importance.png")
mlflow.log_artifact("feature_importance.png")
# Log model
mlflow.sklearn.log_model(
model,
"random_forest_model",
registered_model_name="nyc-taxi-fare-predictor"
)
print(f"Model trained with RMSE: {rmse:.4f}, R²: {r2:.4f}")
print(f"Run ID: {mlflow.active_run().info.run_id}")
Top comments (0)