Have you ever wanted to download large files from Google Drive but found the process frustrating? Browser downloads often fail for huge files (like models that are several GBs in size), and keeping track of multiple model files can be confusing. Additionally, inconsistent network bandwidth can result in cancellation of downloads altogether.
In this beginner-friendly guide, I'll walk you through a Python script that solves these problems elegantly. This tool automatically downloads large files, verifies they're complete, and helps you organize them, all with simple commands! For this guide, we'll be downloading some large AI models from Google Drive.
What You'll Need
Before we dive in, let's make sure you have the essentials:
- Python installed (version 3.6 or newer)
- Basic command line knowledge (don't worry, I'll guide you through it)
- Google Drive links for the models you want to download
Installing Required Packages
First, you'll need to install the required packages with this simple command:
pip install gdown
The gdown library is specifically designed to download large files from Google Drive without the headaches of browser downloads.
Script Breakdown
Let's break down what this script does in simple terms:
- It reads a configuration file that contains all your model details.
- It checks if files already exist before downloading (saving your bandwidth).
- It automatically retries failed downloads.
- It verifies file integrity by checking sizes.
- It gives you clean status reports.
Here's the full script:
import gdown
import os
import sys
import time
from pathlib import Path
import hashlib
import json
from typing import Dict, Optional
class GoogleDriveModelDownloader:
def __init__(self, download_dir: str = "", config_file: str = "models_config.json"):
self.download_dir = Path(download_dir)
self.config_file = config_file
self.download_dir.mkdir(exist_ok=True)
def load_config(self) -> Dict:
"""Load models configuration from JSON file"""
try:
with open(self.config_file, 'r') as f:
return json.load(f)
except FileNotFoundError:
print(f"Config file {self.config_file} not found!")
return {"models": {}}
except json.JSONDecodeError as e:
print(f"Error parsing config file: {e}")
return {"models": {}}
def get_file_hash(self, filepath: Path) -> Optional[str]:
"""Calculate MD5 hash of a file"""
if not filepath.exists():
return None
hash_md5 = hashlib.md5()
with open(filepath, "rb") as f:
for chunk in iter(lambda: f.read(4096), b""):
hash_md5.update(chunk)
return hash_md5.hexdigest()
def file_exists_and_complete(self, filepath: Path, expected_size_mb: Optional[int] = None) -> bool:
"""Check if file exists and is complete"""
if not filepath.exists():
return False
# Check file size if expected size is provided
if expected_size_mb:
file_size_mb = filepath.stat().st_size / (1024 * 1024)
if file_size_mb < expected_size_mb * 0.95: # Allow 5% tolerance
print(f"File {filepath.name} exists but seems incomplete ({file_size_mb:.1f}MB vs expected {expected_size_mb}MB)")
return False
return True
def download_with_retry(self, url: str, output_path: Path, max_retries: int = 3) -> bool:
"""Download file with retry logic"""
for attempt in range(max_retries):
try:
print(f"Attempt {attempt + 1}/{max_retries}: Downloading to {output_path}")
# Use gdown with fuzzy matching for large files
gdown.download(url, str(output_path), quiet=False, fuzzy=True)
if output_path.exists():
print(f"Successfully downloaded {output_path.name}")
return True
else:
print(f"Download failed - file not found after download")
except Exception as e:
print(f"Download attempt {attempt + 1} failed: {str(e)}")
if attempt < max_retries - 1:
wait_time = (attempt + 1) * 10 # Exponential backoff
print(f"Waiting {wait_time} seconds before retry...")
time.sleep(wait_time)
else:
print(f"All {max_retries} attempts failed for {output_path.name}")
return False
def download_model(self, filename: str, file_id: str, description: str = "", expected_size_mb: Optional[int] = None) -> bool:
"""Download a single model file"""
output_path = self.download_dir / filename
# Check if file already exists and is complete
if self.file_exists_and_complete(output_path, expected_size_mb):
print(f"{filename} already exists and appears complete, skipping download")
return True
# Remove incomplete file if it exists
if output_path.exists():
print(f"Removing incomplete file: {filename}")
output_path.unlink()
print(f"\n{'='*60}")
print(f"Downloading: {filename}")
print(f"Description: {description}")
if expected_size_mb:
print(f"Expected size: {expected_size_mb} MB")
print(f"{'='*60}")
url = f"https://drive.google.com/uc?id={file_id}"
start_time = time.time()
success = self.download_with_retry(url, output_path)
if success:
end_time = time.time()
duration = end_time - start_time
file_size_mb = output_path.stat().st_size / (1024 * 1024)
print(f"Download completed in {duration:.1f} seconds")
print(f"File size: {file_size_mb:.1f} MB")
# Verify file size if expected
if expected_size_mb and abs(file_size_mb - expected_size_mb) > expected_size_mb * 0.05:
print(f"Warning: File size differs significantly from expected size")
return success
def download_all_models(self, force_redownload: bool = False):
"""Download all models from configuration"""
config = self.load_config()
models = config.get("models", {})
if not models:
print("No models configured. Please edit the config file.")
return
print(f"Found {len(models)} models to download")
print(f"Download directory: {self.download_dir.absolute()}")
successful_downloads = 0
failed_downloads = 0
for filename, model_info in models.items():
file_id = model_info.get("file_id", "")
description = model_info.get("description", "")
expected_size_mb = model_info.get("expected_size_mb")
if not file_id or "replace_with_actual" in file_id:
print(f"Skipping {filename} - no valid file ID configured")
continue
success = self.download_model(filename, file_id, description, expected_size_mb)
if success:
successful_downloads += 1
else:
failed_downloads += 1
# Brief pause between downloads
if successful_downloads + failed_downloads < len(models):
time.sleep(2)
print(f"Download Summary:")
print(f"Successful: {successful_downloads}")
print(f"Failed: {failed_downloads}")
print(f"Total models: {len(models)}")
def list_models(self):
"""List all configured models"""
config = self.load_config()
models = config.get("models", {})
if not models:
print("No models configured.")
return
print(f"Configured Models:")
for filename, model_info in models.items():
file_id = model_info.get("file_id", "")
description = model_info.get("description", "")
expected_size_mb = model_info.get("expected_size_mb")
output_path = self.download_dir / filename
status = "Downloaded" if output_path.exists() else "Not downloaded"
print(f"File: {filename}")
print(f"Description: {description}")
if expected_size_mb:
print(f"Expected size: {expected_size_mb} MB")
print(f"Status: {status}")
print("-" * 40)
def main():
print("Google Drive Large Model Downloader")
# Initialize downloader
downloader = GoogleDriveModelDownloader()
if len(sys.argv) > 1:
command = sys.argv[1].lower()
if command == "list":
downloader.list_models()
elif command == "download":
downloader.download_all_models()
elif command == "force":
downloader.download_all_models(force_redownload=True)
else:
print("Unknown command. Use: list, download, or force")
else:
print("Usage:")
print(" python script.py list - List configured models")
print(" python script.py download - Download all models")
print(" python script.py force - Force redownload all models")
print("\nFirst run will create a sample config file.")
if __name__ == "__main__":
main()
Setting Up Your Configuration
The magic of this script is in its configuration file. Create a new file called models_config.json in the same folder as your script with this structure:
{
"models": {
"my_model.safetensors": {
"file_id": "1ABC123xyz...",
"description": "My text generation model",
"expected_size_mb": 4000
},
"another_model.bin": {
"file_id": "1DEF456uvw...",
"description": "Image processing model",
"expected_size_mb": 2500
}
}
}
How to find the file_id:
- Open your Google Drive share link (looks like: https://drive.google.com/file/d/FILE_ID/view?usp=sharing)
- The
FILE_IDpart (between /d/ and /view) is what you need - Copy just that
IDpart into your config file
The Google Drive share link should look this:
How to Use the Script
Once you have your script and config file ready, you can use three simple commands:
1. List The Model Files
python <script_name>.py list
This will show all configured models and their download status without downloading anything.
2. Download The Model Files
python <script_name>.py download
This will download only models that aren't already present or appear incomplete.
3. Redownload Everything
python <script_name>.py force
This ignores existing files and downloads everything again (useful when files get corrupted).
How the Script Works
Let me explain some of the smart features that make this script reliable:
Smart Download Resumption
If your internet connection drops during a large download, the script automatically retries up to 3 times. It uses an "exponential backoff" strategy, waiting longer between each retry (10 seconds, then 20, then 30).
File Integrity Checking
After downloading, the script checks if the file size matches what was expected (within a 5% tolerance). This helps catch incomplete downloads that might otherwise seem successful.
Progress Feedback
Unlike many download scripts, this one gives you clear feedback:
- Expected file sizes
- Download duration
- Success/failure status
- File size verification
Troubleshooting Common Issues
Issue: "Config file not found"
Solution: The first time you run the script, create an empty models_config.json file, then run python <script_name>.py list to see instructions for setting it up.
Issue: Download fails repeatedly
Solution: Google Drive sometimes blocks too many downloads. Try:
- Waiting a few hours before retrying.
- Using a different network.
- Breaking large downloads into smaller sessions.
Issue: Files appear incomplete despite successful download
Solution: Use the force command to redownload those specific files. Also check if your disk has enough free space.
Practical Use Case
Let's say you're working with Stable Diffusion models for AI image generation. You might have:
- A base model (~4GB)
- Several LoRA adapters (~150MB each)
- An upscaler model (~1GB)
Instead of manually downloading each file (and risking failures on the large ones), you can configure them all in your JSON file and download them with one command!
Downloading large AI models doesn't have to be frustrating. With this script, you can reliably download, verify, and organize all your model files with minimal effort.
Remember that Google Drive has download quotas, so if you're downloading many large files, space out your downloads to avoid temporary blocks.
Happy model downloading!!!
Need help? If you run into issues or want to share how you're using this script, leave a comment below!
Top comments (0)