Python CSV to Database: Import 100,000 Rows in 10 Seconds
Importing CSV data manually through a UI is slow and error-prone. Python can do it in seconds with proper batching.
import sqlite3
import csv
import time
from pathlib import Path
def import_csv_to_db(csv_path, db_path, table_name, batch_size=1000):
conn = sqlite3.connect(db_path)
with open(csv_path, 'r', encoding='utf-8-sig') as f:
reader = csv.DictReader(f)
headers = reader.fieldnames
# Create table from headers
col_defs = ', '.join(f'`{h}` TEXT' for h in headers)
conn.execute(f'CREATE TABLE IF NOT EXISTS {table_name} ({col_defs})')
placeholders = ', '.join('?' * len(headers))
insert_sql = f'INSERT INTO {table_name} VALUES ({placeholders})'
batch = []
total = 0
start = time.time()
for row in reader:
batch.append([row[h] for h in headers])
if len(batch) >= batch_size:
conn.executemany(insert_sql, batch)
conn.commit()
total += len(batch)
elapsed = time.time() - start
print(f"Imported {total:,} rows ({total/elapsed:.0f} rows/sec)")
batch = []
if batch:
conn.executemany(insert_sql, batch)
conn.commit()
total += len(batch)
elapsed = time.time() - start
print(f"\n✅ Done: {total:,} rows in {elapsed:.2f}s ({total/elapsed:.0f} rows/sec)")
conn.close()
return total
# Usage
rows = import_csv_to_db('sales_data.csv', 'business.db', 'sales')
Add data validation
def import_with_validation(csv_path, db_path, table_name, validators=None):
validators = validators or {}
errors = []
with open(csv_path, 'r') as f:
for i, row in enumerate(csv.DictReader(f), 1):
row_errors = []
for field, validator in validators.items():
try:
validator(row[field])
except ValueError as e:
row_errors.append(f"Row {i}: {field} - {e}")
if row_errors:
errors.extend(row_errors)
if errors:
print(f"Validation errors:")
for err in errors[:10]: # Show first 10
print(f" {err}")
return False
return import_csv_to_db(csv_path, db_path, table_name)
# Example validators
import_with_validation('orders.csv', 'db.sqlite', 'orders', validators={
'email': lambda x: x if '@' in x else (_ for _ in ()).throw(ValueError("Invalid email")),
'amount': lambda x: float(x) if float(x) > 0 else (_ for _ in ()).throw(ValueError("Must be positive")),
})
Performance tips
- Use
executemany()notexecute()in a loop — 10-100x faster - Set
PRAGMA journal_mode=WALfor concurrent reads - Use
PRAGMA synchronous=NORMALfor 3x faster writes during import
This script handles 100,000+ rows comfortably.
Get 50+ Python automation scripts: https://lukassbrad.gumroad.com/l/ugeka
Top comments (0)