DEV Community

Sudhakar Daggubati
Sudhakar Daggubati

Posted on

PCI GPU passthrough

Little snippet to prepare the host for GPU passthrough

#!/usr/bin/env bash
set -euo pipefail

echo  " This enables vfio passthroug on ubuntu 24+ "

# =========================
# CONFIG
# =========================
GPU_PCI="0000:01:00.0"
GPU_AUDIO_PCI="0000:01:00.1"
GPU_IDS="10de:2820,10de:22bd"


GRUB="/etc/default/grub"
VFIO_CONF="/etc/modprobe.d/vfio.conf"
BLACKLIST="/etc/modprobe.d/blacklist-nvidia.conf"
RECOVERY_FLAG="/boot/vfio_last_state"
SAFE_ENTRY_FILE="/etc/grub.d/40_vfio_safe_mode"

# ⚠️ IMPORTANT: replace once with `blkid`
ROOT_UUID="$(findmnt -no UUID /)"
ROOT_FSTYPE="$(findmnt -no FSTYPE /)"

# SAFETY CHECK: Ensure UUID was actually captured
if [[ -z "$ROOT_UUID" ]]; then
    echo "[!] ERROR: Could not determine ROOT_UUID."
    echo "    Check if findmnt is installed or if you have sufficient permissions."
    exit 1
fi


# 2. Check for Filesystem Compatibility
if [[ "$ROOT_FSTYPE" != ext* ]]; then
    echo "===================================================="
    echo " [!] WARNING: NON-EXT FILESYSTEM DETECTED ($ROOT_FSTYPE)"
    echo "===================================================="
    echo " Your Safe Mode GRUB entry is currently hardcoded for 'insmod ext2'."
    echo " To ensure recovery works, you should update the 'insmod' line"
    echo " in the ensure_safe_mode_entry function to 'insmod $ROOT_FSTYPE'."
    echo ""
    read -p "Continue anyway? (y/n): " confirm
    [[ "$confirm" != "y" ]] && exit 1
fi


# =========================
# BANNER
# =========================
show_recovery_banner() {
    echo ""
    echo "===================================================="
    echo " VFIO SAFE CONTROLLER"
    echo "===================================================="
    echo ""
    echo "RECOVERY OPTIONS:"
    echo "  → GRUB: 'Ubuntu (SAFE MODE - iGPU only)'"
    echo "  → OR 'Advanced options'"
    echo ""
    echo "SAFE MODE = NO VFIO, NO NVIDIA, iGPU ONLY"
    echo "===================================================="
    echo ""
}

# =========================
# VM SAFETY CHECK
# =========================
check_vms() {
    echo "[*] Checking running VMs..."

    RUNNING=$(virsh list --state-running 2>/dev/null | awk 'NR>2 {print $2}')

    if [[ -z "${RUNNING// }" ]]; then
        RUNNING=$(virsh list 2>/dev/null | awk 'NR>2 && $3=="running" {print $2}')
    fi

    if [[ -n "$RUNNING" ]]; then
        echo "[!] Running VMs detected:"
        echo "$RUNNING"
        return 1
    fi

    echo "[✓] No running VMs"
    return 0
}

# =========================
# PREFLIGHT
# =========================
preflight() {
    echo "[*] Preflight check..."

    local blocked=0
    local running_vms

    running_vms=$(virsh list --name 2>/dev/null | sed '/^$/d')

    if [[ -z "$running_vms" ]]; then
        echo "[✓] No running VMs"
        echo "[✓] Preflight OK"
        return 0
    fi

    for vm in $running_vms; do
        if virsh dumpxml "$vm" 2>/dev/null | grep -qE "$GPU_PCI|$GPU_AUDIO_PCI"; then
            echo "[!] Running VM '$vm' is using configured GPU passthrough devices"
            blocked=1
        else
            echo "[i] Running VM '$vm' does not use passthrough GPU"
        fi
    done

    if [[ $blocked -eq 1 ]]; then
        echo "[!] Stop affected VM(s) before changing VFIO state"
        return 1
    fi

    echo "[✓] No running VM currently uses the dGPU"
    echo "[✓] Preflight OK"
    return 0
}

# =========================
# STATUS
# =========================
status() {

    echo "===================="
    echo " SYSTEM STATUS"
    echo "===================="

    virsh list 2>/dev/null || true
    echo ""

    lspci -nnk -s ${GPU_PCI:5:7} || true
}

# =========================
# SAFE MODE GRUB ENTRY
# =========================
ensure_safe_mode_entry() {
    echo "[*] Ensuring SAFE MODE GRUB entry for $ROOT_FSTYPE..."

    local grub_mod="$ROOT_FSTYPE"
    [[ "$ROOT_FSTYPE" == ext* ]] && grub_mod="ext2"

    sudo tee "$SAFE_ENTRY_FILE" >/dev/null <<EOF
#!/bin/sh
exec tail -n +3 \$0

menuentry "Ubuntu (SAFE MODE - iGPU only, NO VFIO)" {
    insmod part_gpt
    insmod $grub_mod
    search --no-floppy --fs-uuid --set=root $ROOT_UUID
    linux /boot/vmlinuz root=UUID=$ROOT_UUID ro quiet splash intel_iommu=off modprobe.blacklist=vfio_pci,vfio,vfio_iommu_type1,nvidia,nvidia_drm,nvidia_modeset
    initrd /boot/initrd.img
}
EOF

    sudo chmod +x "$SAFE_ENTRY_FILE"
}


# =========================
# GRUB APPLY VFIO
# =========================
apply_vfio_grub() {
    sudo cp "$GRUB" "$GRUB.bak.$(date +%s)"

    sudo sed -i 's/vfio-pci.ids=[^ ]*//g' "$GRUB"
    sudo sed -i 's/intel_iommu=on//g' "$GRUB"
    sudo sed -i 's/iommu=pt//g' "$GRUB"

    #sudo sed -i "s/^GRUB_CMDLINE_LINUX_DEFAULT=\"/GRUB_CMDLINE_LINUX_DEFAULT=\"quiet splash intel_iommu=on iommu=pt vfio-pci.ids=$GPU_IDS /" "$GRUB"
    sudo sed -i 's/^GRUB_CMDLINE_LINUX_DEFAULT=.*/GRUB_CMDLINE_LINUX_DEFAULT="quiet splash intel_iommu=on iommu=pt vfio-pci.ids='"$GPU_IDS"' rd.driver.pre=vfio-pci"/' "$GRUB"
}

verify_vfio_binding() {
    echo "[*] Checking GPU driver binding..."

    if lspci -nnk -s ${GPU_PCI:5:7} | grep -q "vfio-pci"; then
        echo "[✓] GPU correctly bound to VFIO"
        return 0
    else
        echo "[!] VFIO FAILED — GPU still not isolated"
        echo "    Do NOT launch VM"
        return 1
    fi
}


# =========================
# GRUB RESTORE (iGPU)
# =========================
restore_igpu_grub() {
    sudo cp "$GRUB" "$GRUB.bak.$(date +%s)"

    sudo sed -i 's/vfio-pci.ids=[^ ]*//g' "$GRUB"
    sudo sed -i 's/intel_iommu=on//g' "$GRUB"
    sudo sed -i 's/iommu=pt//g' "$GRUB"

    sudo sed -i 's/^GRUB_CMDLINE_LINUX_DEFAULT=.*/GRUB_CMDLINE_LINUX_DEFAULT="quiet splash"/' "$GRUB"
}

# =========================
# ENABLE VFIO
# =========================
enable_flow() {
    show_recovery_banner
    preflight || exit 1

    echo "[*] Enabling VFIO..."


    cat <<EOF | sudo tee "$BLACKLIST" >/dev/null
blacklist nvidia
blacklist nvidia_drm
blacklist nvidia_modeset
blacklist nvidia_uvm
blacklist nvidia_nouveau
EOF

    printf '%s\n' \
    "options vfio-pci ids=$GPU_IDS disable_vga=1" \
    "softdep nvidia pre: vfio-pci" \
    "softdep nvidia_drm pre: vfio-pci" \
    "softdep nvidia_modeset pre: vfio-pci" \
    "softdep nvidia_uvm pre: vfio-pci" \
    | sudo tee "$VFIO_CONF" >/dev/null

    # Block udev from loading nvidia modules
    sudo tee /etc/udev/rules.d/71-nvidia.rules >/dev/null << 'EOF'
# VFIO override — block nvidia module loading via udev
ACTION=="add", DEVPATH=="/bus/pci/drivers/nvidia", RUN+="/bin/false"
EOF

    sudo systemctl disable --now nvidia-persistenced.service 2>/dev/null || true
    sudo systemctl disable --now nvidia-powerd.service 2>/dev/null || true
    sudo systemctl disable --now nvidia-cdi-refresh.path 2>/dev/null || true



    apply_vfio_grub
    ensure_safe_mode_entry

    echo "$GPU_IDS" | sudo tee "$RECOVERY_FLAG" >/dev/null

    sudo update-initramfs -u
    sudo update-grub
    echo "[*] Verifying GRUB configuration..."
    grep -q "vfio-pci.ids=$GPU_IDS" "$GRUB" || {
        echo "[!] GRUB injection failed"
        exit 1
    }

    echo "[✓] VFIO enabled safely"
}

# =========================
# DISABLE VFIO
# =========================
disable_flow() {
    show_recovery_banner

    sudo rm -f "$VFIO_CONF"
    sudo rm -f "$BLACKLIST"
    sudo rm -f /etc/udev/rules.d/71-nvidia.rules
    sudo udevadm control --reload-rules

    restore_igpu_grub
    ensure_safe_mode_entry

    sudo rm -f "$RECOVERY_FLAG"

    # Re-enable nvidia services
    sudo systemctl enable --now nvidia-persistenced.service 2>/dev/null || true
    sudo systemctl enable --now nvidia-powerd.service 2>/dev/null || true
    sudo systemctl enable --now nvidia-cdi-refresh.path 2>/dev/null || true

    sudo update-initramfs -u
    sudo update-grub

    echo "[✓] VFIO disabled"
}

# =========================
# RECOVERY MODE
# =========================
recover_flow() {
    echo "[!] RECOVERY MODE ACTIVATED"

    restore_igpu_grub

    sudo rm -f "$VFIO_CONF"
    sudo rm -f "$BLACKLIST"
    sudo rm -f "$RECOVERY_FLAG"

    sudo rm -f /etc/udev/rules.d/71-nvidia.rules
    sudo udevadm control --reload-rules

    ensure_safe_mode_entry

    # Re-enable nvidia services
    sudo systemctl enable --now nvidia-persistenced.service 2>/dev/null || true
    sudo systemctl enable --now nvidia-powerd.service 2>/dev/null || true
    sudo systemctl enable --now nvidia-cdi-refresh.path 2>/dev/null || true

    sudo update-initramfs -u
    sudo update-grub

    echo "[✓] System restored to iGPU SAFE MODE"
}

# =========================
# MAIN CONTROLLER
# =========================
main() {
    show_recovery_banner

    case "${1:-}" in
        status)
            status
            ;;
        preflight)
            preflight
            ;;
        enable)
            enable_flow
            ;;
        disable)
            disable_flow
            ;;
        recover)
            recover_flow
            ;;
        verify)
            verify_vfio_binding || exit 1
            ;;

        *)
            echo "Usage: $0 {status|preflight|enable|disable|recover|verify}"
            ;;
    esac
}

main "$@"

Enter fullscreen mode Exit fullscreen mode

Top comments (0)