You've found data on a website. You need it in your database. The workflow usually involves: export to CSV, import into SQL tool, deal with type mismatches, fix encoding issues.
What if you could go directly from HTML table to valid SQL?
This guide shows how to generate CREATE TABLE and INSERT INTO statements from any web table, handling type inference, identifier sanitization, and proper escaping. This is the approach I use in HTML Table Exporter.
The End Result
From a table like this:
| Product Name | Price | In Stock |
|-----------------|--------|----------|
| Widget Pro | 29.99 | Yes |
| Gadget Basic | 14.50 | No |
Generate:
-- Exported from HTML Table Exporter PRO
CREATE TABLE products (
product_name TEXT,
price REAL,
in_stock TEXT
);
INSERT INTO products (product_name, price, in_stock) VALUES
('Widget Pro', 29.99, 'Yes'),
('Gadget Basic', 14.50, 'No');
Let's build this step by step.
Step 1: Extract the Table Matrix
First, extract raw data from the HTML table:
function extractTableMatrix(table) {
const rows = Array.from(table.rows);
const grid = [];
rows.forEach((rowEl, rowIndex) => {
if (!grid[rowIndex]) grid[rowIndex] = [];
let colIndex = 0;
Array.from(rowEl.cells).forEach(cell => {
while (grid[rowIndex][colIndex] !== undefined) colIndex++;
const text = cell.textContent.trim();
const rowSpan = parseInt(cell.rowSpan) || 1;
const colSpan = parseInt(cell.colSpan) || 1;
for (let r = 0; r < rowSpan; r++) {
if (!grid[rowIndex + r]) grid[rowIndex + r] = [];
for (let c = 0; c < colSpan; c++) {
grid[rowIndex + r][colIndex + c] = text;
}
}
colIndex += colSpan;
});
});
return grid;
}
Output: A 2D array where rows[0] is headers and rows[1+] is data.
Step 2: Sanitize Column Names
SQL identifiers have strict rules. Headers like "Product Name" or "Price ($)" need sanitization:
function sanitizeSqlIdentifier(header, fallbackIndex) {
let id = (header || "").toString().trim();
if (!id) {
return `col_${fallbackIndex + 1}`;
}
// Normalize unicode (remove accents)
id = id.normalize("NFD").replace(/[\u0300-\u036f]/g, "");
// Convert to lowercase snake_case
id = id
.toLowerCase()
.replace(/[^a-z0-9]+/g, "_")
.replace(/^_+|_+$/g, "");
// Ensure doesn't start with number
if (/^\d/.test(id)) {
id = `col_${id}`;
}
// Fallback if empty after sanitization
if (!id) {
return `col_${fallbackIndex + 1}`;
}
return id;
}
// Examples:
// "Product Name" → "product_name"
// "Price ($)" → "price"
// "2024 Revenue" → "col_2024_revenue"
// "" → "col_1"
Step 3: Handle Duplicate Column Names
Tables sometimes have duplicate headers. SQL requires unique column names:
function makeUniqueColumnNames(headers) {
const used = new Set();
return headers.map((header, idx) => {
let name = sanitizeSqlIdentifier(header, idx);
let candidate = name;
let counter = 1;
while (used.has(candidate)) {
candidate = `${name}_${counter}`;
counter++;
}
used.add(candidate);
return candidate;
});
}
// ["Name", "Name", "Value"] → ["name", "name_1", "value"]
Step 4: Infer SQL Types
Analyze data to determine appropriate SQL types:
function inferSqlColumnTypes(rows, headerRowIndex = 0) {
const headerRow = rows[headerRowIndex] || [];
const dataRows = rows.slice(headerRowIndex + 1);
const types = new Array(headerRow.length).fill("TEXT");
for (let col = 0; col < headerRow.length; col++) {
// Sample up to 50 values per column
const values = [];
for (let r = 0; r < Math.min(dataRows.length, 50); r++) {
const cell = dataRows[r][col];
const v = cell != null ? String(cell).trim() : "";
if (v !== "") values.push(v);
}
if (values.length === 0) {
types[col] = "TEXT";
continue;
}
// Check if all values match a type
let allInt = true;
let allNumeric = true;
for (const v of values) {
if (!/^[-+]?\d+$/.test(v)) {
allInt = false;
}
if (!/^[-+]?\d+([.,]\d+)?$/.test(v)) {
allNumeric = false;
}
}
if (allInt) {
types[col] = "INTEGER";
} else if (allNumeric) {
types[col] = "REAL";
} else {
types[col] = "TEXT";
}
}
return types;
}
Type priority: INTEGER > REAL > TEXT
If 100% of non-empty values are integers, use INTEGER. If they're all numeric (including decimals), use REAL. Otherwise, TEXT.
Step 5: Escape Values Properly
SQL injection isn't just a security concern—it corrupts your data. Proper escaping:
function sqlEscapeValue(raw, type) {
// NULL handling
if (raw == null) return "NULL";
const v = String(raw).trim();
if (v === "") return "NULL";
// Numeric types
if (type === "INTEGER" || type === "REAL") {
// Normalize decimal separator
const normalized = v.replace(",", ".");
const num = Number(normalized);
if (!Number.isNaN(num) && Number.isFinite(num)) {
return normalized;
}
// Fall through to TEXT if not a valid number
}
// TEXT: escape single quotes by doubling them
const escaped = v.replace(/'/g, "''");
return `'${escaped}'`;
}
// Examples:
// sqlEscapeValue("Hello", "TEXT") → "'Hello'"
// sqlEscapeValue("It's good", "TEXT") → "'It''s good'"
// sqlEscapeValue("29.99", "REAL") → "29.99"
// sqlEscapeValue("", "TEXT") → "NULL"
// sqlEscapeValue(null, "TEXT") → "NULL"
Step 6: Generate the SQL
Putting it all together:
function tableToSql(tableInfo) {
const rows = tableInfo.rows || [];
if (rows.length < 2) return "";
const headerRowIndex = tableInfo.headerRowIndex || 0;
const headerRow = rows[headerRowIndex];
const dataRows = rows.slice(headerRowIndex + 1);
if (!headerRow || headerRow.length === 0) return "";
// Generate column names
const columnNames = makeUniqueColumnNames(headerRow);
// Infer types
const types = inferSqlColumnTypes(rows, headerRowIndex);
// Generate table name
const rawTableName = tableInfo.name || tableInfo.slug || "table";
const tableName = sanitizeSqlIdentifier(rawTableName, 0) || "table_export";
// CREATE TABLE statement
const createLines = columnNames.map((col, i) =>
` ${col} ${types[i] || "TEXT"}`
);
const createStmt = `CREATE TABLE ${tableName} (\n${createLines.join(",\n")}\n);`;
// INSERT statements
const insertHeader = `INSERT INTO ${tableName} (${columnNames.join(", ")}) VALUES`;
const valueLines = dataRows.map(row => {
const values = columnNames.map((_, i) => {
const cell = row[i];
const type = types[i] || "TEXT";
return sqlEscapeValue(cell, type);
});
return ` (${values.join(", ")})`;
});
// Combine
let sql = `-- Exported from HTML Table Exporter\n\n${createStmt}\n\n`;
if (valueLines.length > 0) {
sql += `${insertHeader}\n${valueLines.join(",\n")};\n`;
}
return sql;
}
Full Example
Input table:
const tableInfo = {
name: "Q1 Sales Data",
rows: [
["Product", "Units Sold", "Revenue ($)", "Profitable"],
["Widget A", "1,234", "45,678.90", "Yes"],
["Widget B", "567", "12,345.67", "No"],
["Gadget X", "890", "23,456.78", "Yes"]
]
};
Output:
-- Exported from HTML Table Exporter
CREATE TABLE q1_sales_data (
product TEXT,
units_sold INTEGER,
revenue REAL,
profitable TEXT
);
INSERT INTO q1_sales_data (product, units_sold, revenue, profitable) VALUES
('Widget A', 1234, 45678.90, 'Yes'),
('Widget B', 567, 12345.67, 'No'),
('Gadget X', 890, 23456.78, 'Yes');
Note:
- Column names sanitized (
Revenue ($)→revenue) - Numbers detected and unquoted
- Commas in numbers handled
- Table name from metadata
Handling Edge Cases
NULL Values
Many web tables represent missing data differently:
const NULL_PATTERNS = ["N/A", "n/a", "-", "--", "null", "none", "."];
function sqlEscapeValue(raw, type, nullPatterns = NULL_PATTERNS) {
if (raw == null) return "NULL";
const v = String(raw).trim();
if (v === "" || nullPatterns.includes(v.toLowerCase())) {
return "NULL";
}
// ... rest of function
}
Large Datasets
For tables with thousands of rows, batch the INSERTs:
function tableToSqlBatched(tableInfo, batchSize = 1000) {
// ... same setup ...
const batches = [];
for (let i = 0; i < valueLines.length; i += batchSize) {
const batch = valueLines.slice(i, i + batchSize);
batches.push(`${insertHeader}\n${batch.join(",\n")};`);
}
return `${createStmt}\n\n${batches.join("\n\n")}`;
}
Database-Specific Syntax
SQLite, PostgreSQL, and MySQL have slight syntax differences:
function tableToSql(tableInfo, dialect = "sqlite") {
// ... same column/type logic ...
// Dialect-specific type mapping
const typeMap = {
sqlite: { INTEGER: "INTEGER", REAL: "REAL", TEXT: "TEXT" },
postgresql: { INTEGER: "INTEGER", REAL: "NUMERIC", TEXT: "TEXT" },
mysql: { INTEGER: "INT", REAL: "DECIMAL(10,2)", TEXT: "VARCHAR(255)" }
};
const dialectTypes = typeMap[dialect] || typeMap.sqlite;
const createLines = columnNames.map((col, i) =>
` ${col} ${dialectTypes[types[i]] || dialectTypes.TEXT}`
);
// ... rest of function ...
}
Browser Bookmarklet
Quick solution for occasional use:
javascript:(function(){
const table = document.querySelector("table");
if (!table) { alert("No table found"); return; }
const rows = Array.from(table.rows).map(r =>
Array.from(r.cells).map(c => c.textContent.trim())
);
const headers = rows[0].map((h, i) =>
(h || `col_${i+1}`).toLowerCase().replace(/[^a-z0-9]+/g, "_")
);
const values = rows.slice(1).map(row =>
"(" + row.map(v => `'${v.replace(/'/g, "''")}'`).join(", ") + ")"
).join(",\n");
const sql = `INSERT INTO table_data (${headers.join(", ")}) VALUES\n${values};`;
navigator.clipboard.writeText(sql);
alert("SQL copied to clipboard!");
})();
When to Use This
Good for:
- Quick data imports into SQLite/PostgreSQL
- Seeding test databases
- One-time data migrations
- Learning SQL by example
Not ideal for:
- Large-scale ETL (use proper tools)
- Complex schemas (foreign keys, constraints)
- Binary data
For production data pipelines, proper ETL tools with validation are better. For "I need this table in my database right now," direct SQL generation is fast.
Try It Without Code
If you don't want to build this yourself, HTML Table Exporter PRO generates SQL with one click. The free version exports to CSV/JSON/Excel; PRO adds SQL, NDJSON, and data cleaning.
For a comparison of different export approaches, see our guide on how to export HTML tables to CSV.
Learn more at gauchogrid.com/html-table-exporter or try it on the Chrome Web Store.
What database are you importing web data into? I'm curious about the use cases.
Top comments (0)