X Xerobit

Import CSV to Database — PostgreSQL, MySQL, SQLite, and Node.js

Import CSV files into PostgreSQL, MySQL, and SQLite using COPY commands, LOAD DATA INFILE, and Node.js streaming inserts. Covers bulk import performance, handling encoding...

Mian Ali Khalid · · 5 min read
Use the tool
CSV to JSON Converter
Convert CSV files to JSON with proper quoting and escaping.
Open CSV to JSON Converter →

Importing CSV into a database efficiently requires the right bulk-import command for your database engine. Native import commands are 10–100x faster than row-by-row INSERT statements.

Convert CSV to JSON first with the CSV to JSON Converter.

PostgreSQL: COPY command

-- Create target table:
CREATE TABLE users (
    id SERIAL PRIMARY KEY,
    name TEXT NOT NULL,
    email TEXT UNIQUE NOT NULL,
    age INTEGER,
    created_at DATE
);

-- Import from CSV (server-side file — requires superuser):
COPY users (name, email, age, created_at)
FROM '/path/to/users.csv'
WITH (
    FORMAT CSV,
    HEADER true,       -- Skip header row
    DELIMITER ',',
    NULL '',           -- Empty string → NULL
    ENCODING 'UTF8'
);

-- Import from client-side file (psql \copy):
\copy users (name, email, age, created_at)
FROM 'users.csv'
WITH (FORMAT CSV, HEADER true);
# Command line:
psql -d mydb -c "\copy users FROM 'users.csv' WITH (FORMAT CSV, HEADER true)"

# With pg_restore for large files:
psql -d mydb -c "COPY users FROM STDIN WITH (FORMAT CSV, HEADER true)" < users.csv

PostgreSQL: handle errors with a staging table

-- Import into staging table first (all TEXT columns):
CREATE TEMP TABLE users_staging (
    name TEXT,
    email TEXT,
    age TEXT,
    created_at TEXT
);

\copy users_staging FROM 'users.csv' WITH (FORMAT CSV, HEADER true);

-- Validate and cast:
INSERT INTO users (name, email, age, created_at)
SELECT
    TRIM(name),
    LOWER(TRIM(email)),
    CASE WHEN age ~ '^[0-9]+$' THEN age::INTEGER ELSE NULL END,
    CASE WHEN created_at ~ '^\d{4}-\d{2}-\d{2}$' THEN created_at::DATE ELSE NULL END
FROM users_staging
WHERE email IS NOT NULL AND email != ''
ON CONFLICT (email) DO UPDATE SET
    name = EXCLUDED.name,
    age = EXCLUDED.age;

DROP TABLE users_staging;

MySQL: LOAD DATA INFILE

-- Load CSV (requires FILE privilege):
LOAD DATA INFILE '/path/to/users.csv'
INTO TABLE users
FIELDS TERMINATED BY ','
ENCLOSED BY '"'
LINES TERMINATED BY '\n'
IGNORE 1 ROWS  -- Skip header
(name, email, @age, @created_at)
SET
    age = NULLIF(@age, ''),
    created_at = STR_TO_DATE(@created_at, '%Y-%m-%d');

-- Local file (client-side):
LOAD DATA LOCAL INFILE 'users.csv'
INTO TABLE users
FIELDS TERMINATED BY ','
ENCLOSED BY '"'
LINES TERMINATED BY '\n'
IGNORE 1 ROWS;

SQLite: import CSV

# SQLite CLI:
sqlite3 database.db

.mode csv
.headers on
.import users.csv users   -- Table must exist, or SQLite creates it with all TEXT columns
import sqlite3
import csv

def import_csv_to_sqlite(csv_path: str, db_path: str, table: str):
    conn = sqlite3.connect(db_path)
    cur = conn.cursor()
    
    with open(csv_path, 'r', encoding='utf-8') as f:
        reader = csv.DictReader(f)
        
        # Create table from headers:
        fields = reader.fieldnames
        cur.execute(f"""
            CREATE TABLE IF NOT EXISTS {table} (
                {', '.join(f'"{f}" TEXT' for f in fields)}
            )
        """)
        
        # Bulk insert:
        placeholders = ', '.join(['?'] * len(fields))
        rows = [[row[f] for f in fields] for row in reader]
        cur.executemany(
            f"INSERT INTO {table} VALUES ({placeholders})",
            rows
        )
    
    conn.commit()
    print(f"Imported {cur.rowcount} rows")
    conn.close()

Node.js: stream CSV into PostgreSQL

import { createReadStream } from 'fs';
import { parse } from 'csv-parse';
import { Pool } from 'pg'; // npm install pg csv-parse

const pool = new Pool({ connectionString: process.env.DATABASE_URL });

async function importCSVToPostgres(filePath) {
  const client = await pool.connect();
  
  try {
    await client.query('BEGIN');
    
    let count = 0;
    const batchSize = 1000;
    let batch = [];

    await new Promise((resolve, reject) => {
      createReadStream(filePath)
        .pipe(parse({ columns: true, skip_empty_lines: true, cast: true }))
        .on('data', async (row) => {
          batch.push([row.name, row.email?.toLowerCase(), row.age]);
          
          if (batch.length >= batchSize) {
            const current = [...batch];
            batch = [];
            count += current.length;
            
            // Bulk insert with unnest:
            await client.query(`
              INSERT INTO users (name, email, age)
              SELECT * FROM UNNEST($1::text[], $2::text[], $3::int[])
              ON CONFLICT (email) DO NOTHING
            `, [
              current.map(r => r[0]),
              current.map(r => r[1]),
              current.map(r => r[2]),
            ]);
          }
        })
        .on('end', resolve)
        .on('error', reject);
    });

    // Insert remaining batch:
    if (batch.length > 0) {
      count += batch.length;
      await client.query(`
        INSERT INTO users (name, email, age)
        SELECT * FROM UNNEST($1::text[], $2::text[], $3::int[])
        ON CONFLICT (email) DO NOTHING
      `, [batch.map(r => r[0]), batch.map(r => r[1]), batch.map(r => r[2])]);
    }

    await client.query('COMMIT');
    console.log(`Imported ${count} rows`);
  } catch (err) {
    await client.query('ROLLBACK');
    throw err;
  } finally {
    client.release();
  }
}

Performance tips

TechniqueSpeedup
Use COPY/LOAD DATA instead of INSERT10–100×
Disable indexes before import, rebuild after2–5×
Use transactions for batches of 1000 rows5–10×
Set synchronous_commit = off (PostgreSQL)
Increase innodb_buffer_pool_size (MySQL)
-- PostgreSQL: disable/rebuild indexes around COPY:
ALTER TABLE users DISABLE TRIGGER ALL;
-- ... run COPY ...
ALTER TABLE users ENABLE TRIGGER ALL;
VACUUM ANALYZE users;

Related posts

Related tool

CSV to JSON Converter

Convert CSV files to JSON with proper quoting and escaping.

Written by Mian Ali Khalid. Part of the Data & Format pillar.