Sitemap Generation at Scale
I needed to generate sitemaps for 3.5 million URLs at PlayTeraBox.online.
The Approach
import psycopg2
BATCH = 50000 # Google's max per sitemap
def generate_sitemaps():
conn = psycopg2.connect('postgresql://...')
cur = conn.cursor()
cur.execute('SELECT COUNT(*) FROM videos')
total = cur.fetchone()[0]
batches = (total // BATCH) + 1
for i in range(min(batches, 70)):
cur.execute('SELECT id FROM videos LIMIT %s OFFSET %s', (BATCH, i*BATCH))
ids = cur.fetchall()
# Write sitemap XML
print(f'Sitemap {i}: {len(ids)} URLs')
Result
- 70 sitemaps × 50k = 3.5M URLs
- All submitted to Google Search Console
Live at PlayTeraBox.online
Top comments (0)