DEV Community

Brad
Brad

Posted on

Python CSV to Database: Import 100,000 Rows in 10 Seconds

Python CSV to Database: Import 100,000 Rows in 10 Seconds

Importing CSV data manually through a UI is slow and error-prone. Python can do it in seconds with proper batching.

import sqlite3
import csv
import time
from pathlib import Path

def import_csv_to_db(csv_path, db_path, table_name, batch_size=1000):
    conn = sqlite3.connect(db_path)

    with open(csv_path, 'r', encoding='utf-8-sig') as f:
        reader = csv.DictReader(f)
        headers = reader.fieldnames

        # Create table from headers
        col_defs = ', '.join(f'`{h}` TEXT' for h in headers)
        conn.execute(f'CREATE TABLE IF NOT EXISTS {table_name} ({col_defs})')

        placeholders = ', '.join('?' * len(headers))
        insert_sql = f'INSERT INTO {table_name} VALUES ({placeholders})'

        batch = []
        total = 0
        start = time.time()

        for row in reader:
            batch.append([row[h] for h in headers])

            if len(batch) >= batch_size:
                conn.executemany(insert_sql, batch)
                conn.commit()
                total += len(batch)
                elapsed = time.time() - start
                print(f"Imported {total:,} rows ({total/elapsed:.0f} rows/sec)")
                batch = []

        if batch:
            conn.executemany(insert_sql, batch)
            conn.commit()
            total += len(batch)

        elapsed = time.time() - start
        print(f"\n✅ Done: {total:,} rows in {elapsed:.2f}s ({total/elapsed:.0f} rows/sec)")

    conn.close()
    return total

# Usage
rows = import_csv_to_db('sales_data.csv', 'business.db', 'sales')
Enter fullscreen mode Exit fullscreen mode

Add data validation

def import_with_validation(csv_path, db_path, table_name, validators=None):
    validators = validators or {}
    errors = []

    with open(csv_path, 'r') as f:
        for i, row in enumerate(csv.DictReader(f), 1):
            row_errors = []
            for field, validator in validators.items():
                try:
                    validator(row[field])
                except ValueError as e:
                    row_errors.append(f"Row {i}: {field} - {e}")

            if row_errors:
                errors.extend(row_errors)

    if errors:
        print(f"Validation errors:")
        for err in errors[:10]:  # Show first 10
            print(f"  {err}")
        return False

    return import_csv_to_db(csv_path, db_path, table_name)

# Example validators
import_with_validation('orders.csv', 'db.sqlite', 'orders', validators={
    'email': lambda x: x if '@' in x else (_ for _ in ()).throw(ValueError("Invalid email")),
    'amount': lambda x: float(x) if float(x) > 0 else (_ for _ in ()).throw(ValueError("Must be positive")),
})
Enter fullscreen mode Exit fullscreen mode

Performance tips

  • Use executemany() not execute() in a loop — 10-100x faster
  • Set PRAGMA journal_mode=WAL for concurrent reads
  • Use PRAGMA synchronous=NORMAL for 3x faster writes during import

This script handles 100,000+ rows comfortably.

Get 50+ Python automation scripts: https://lukassbrad.gumroad.com/l/ugeka

Top comments (0)