CSV to JSON Basics
CSV files are flat tabular data that converts to JSON arrays of objects:
Conversion Process:
- Parse headers - First row becomes object keys
- Parse rows - Each row becomes a JSON object
- Map values - Columns map to object properties
- Type conversion - Optionally parse numbers/booleans
Input CSV:
name,age,activeJohn,30,trueJane,25,false
Output JSON:
1[2 {"name": "John", "age": "30", "active": "true"},3 {"name": "Jane", "age": "25", "active": "false"}4]
With type detection:
1[2 {"name": "John", "age": 30, "active": true},3 {"name": "Jane", "age": 25, "active": false}4]
Convert CSV to JSON in JavaScript
Build a robust CSV parser:
// Basic CSV to JSON
function csvToJson(csv, options = {}) {
const { delimiter = ',', typeDetection = false } = options;
const lines = csv.trim().split('\n');
const headers = parseCsvLine(lines[0], delimiter);
return lines.slice(1)
.filter(line => line.trim())
.map(line => {
const values = parseCsvLine(line, delimiter);
return headers.reduce((obj, header, i) => {
let value = values[i] || '';
if (typeDetection) {
value = parseValue(value);
}
obj[header.trim()] = value;
return obj;
}, {});
});
}
// Parse CSV line handling quotes
function parseCsvLine(line, delimiter = ',') {
const result = [];
let current = '';
let inQuotes = false;
for (let i = 0; i < line.length; i++) {
const char = line[i];
if (char === '"') {
if (inQuotes && line[i + 1] === '"') {
current += '"';
i++;
} else {
inQuotes = !inQuotes;
}
} else if (char === delimiter && !inQuotes) {
result.push(current);
current = '';
} else {
current += char;
}
}
result.push(current);
return result;
}
// Detect and convert types
function parseValue(value) {
if (value === '') return null;
if (value.toLowerCase() === 'true') return true;
if (value.toLowerCase() === 'false') return false;
if (value.toLowerCase() === 'null') return null;
if (!isNaN(value) && value.trim() !== '') return Number(value);
return value;
}
// Usage
const csv = `name,age,active
John,30,true
Jane,25,false`;
console.log(JSON.stringify(csvToJson(csv, { typeDetection: true }), null, 2));Convert CSV to JSON in Python
Python's csv module and pandas handle CSV parsing:
import csv
import json
# Using csv module
def csv_to_json(csv_string, type_detection=False):
lines = csv_string.strip().split('\n')
reader = csv.DictReader(lines)
result = []
for row in reader:
if type_detection:
row = {k: parse_value(v) for k, v in row.items()}
result.append(row)
return result
def parse_value(value):
if value == '':
return None
if value.lower() == 'true':
return True
if value.lower() == 'false':
return False
try:
if '.' in value:
return float(value)
return int(value)
except ValueError:
return value
# Using pandas (recommended for large files)
import pandas as pd
# CSV file to JSON
df = pd.read_csv("data.csv")
json_data = df.to_json(orient='records', indent=2)
print(json_data)
# Or save to file
df.to_json("output.json", orient='records', indent=2)
# With type inference (pandas does this automatically)
df = pd.read_csv("data.csv", dtype={
'id': int,
'price': float,
'active': bool
})
# CSV string to JSON
from io import StringIO
csv_data = """name,age,active
John,30,true
Jane,25,false"""
df = pd.read_csv(StringIO(csv_data))
print(df.to_json(orient='records'))Command Line Conversion
Convert CSV to JSON using CLI tools:
# Using miller (mlr)
# Install: brew install miller
mlr --csv --ojson cat data.csv > output.json
# Using jq with @csv (reverse)
# Note: jq reads JSON, not CSV directly
# Using csvkit
# pip install csvkit
csvjson data.csv > output.json
# With indentation
csvjson data.csv | python -m json.tool > output.json
# Using Node.js csvtojson
# npm install -g csvtojson
csvtojson data.csv > output.json
# Python one-liner
python -c "import csv,json,sys; print(json.dumps(list(csv.DictReader(sys.stdin)), indent=2))" < data.csv > output.json
# Convert multiple files
for f in *.csv; do
csvjson "$f" > "${f%.csv}.json"
done
# With specific delimiter
mlr --csv --fs ';' --ojson cat semicolon-data.csv > output.jsonHandle Edge Cases
Common CSV parsing challenges:
// Different delimiters
function detectDelimiter(line) {
const delimiters = [',', ';', '\t', '|'];
return delimiters.reduce((best, d) => {
const count = line.split(d).length;
return count > best.count ? { delimiter: d, count } : best;
}, { delimiter: ',', count: 0 }).delimiter;
}
// Handle BOM (Byte Order Mark)
function removeBOM(text) {
return text.replace(/^\uFEFF/, '');
}
// Normalize line endings
function normalizeLineEndings(text) {
return text.replace(/\r\n/g, '\n').replace(/\r/g, '\n');
}
// Handle inconsistent columns
function csvToJsonWithMissingValues(csv) {
const lines = csv.trim().split('\n');
const headers = parseCsvLine(lines[0]);
return lines.slice(1).map(line => {
const values = parseCsvLine(line);
return headers.reduce((obj, header, i) => {
// Use null for missing values
obj[header] = values[i] !== undefined ? values[i] : null;
return obj;
}, {});
});
}
// Handle multiline values (quoted)
const csvWithMultiline = `name,description
John,"This is a
multiline description"
Jane,"Single line"`;
// The parseCsvLine function handles this with quote tracking
// Skip empty rows
function csvToJsonSkipEmpty(csv) {
const lines = csv.trim().split('\n')
.filter(line => line.trim() && !line.match(/^,*$/));
// ... rest of parsing
}
// Nested JSON from dot notation headers
const flatCsv = "user.name,user.email,orders\nJohn,john@example.com,5";
// Can be transformed to nested objects if neededFrequently Asked Questions
How do I handle CSV files with different delimiters?
Auto-detect by counting occurrences of common delimiters (comma, semicolon, tab) in the first line, or specify the delimiter explicitly.
Are numbers automatically converted?
By default, all values are strings. Enable type detection to convert numbers, booleans, and null values automatically.
How do I handle quoted fields with commas?
Proper CSV parsers handle this by tracking quote state. Fields like "New York, NY" are parsed as single values.
What about CSV files from Excel?
Excel CSVs may have BOM characters and use different encodings. Remove BOM and ensure UTF-8 encoding for best results.