Why Convert XML to JSON?
Common reasons to convert XML to JSON:
- REST API Integration - Modern APIs use JSON, not XML
- JavaScript/Node.js - JSON is native to JavaScript
- Smaller Payload - JSON is typically 30-50% smaller than XML
- Frontend Development - React, Vue, Angular work better with JSON
- Mobile Apps - JSON parsing is faster on mobile devices
- Legacy System Migration - Moving from SOAP to REST
Key Differences Between XML and JSON:
| Feature | XML | JSON |
|---|---|---|
| Syntax | Tags with attributes | Key-value pairs |
| Data types | All text (needs parsing) | Native types |
| Arrays | Repeated elements | Native arrays |
| Comments | Supported | Not supported |
| Size | Larger | Smaller |
Convert XML to JSON in JavaScript
Use the DOMParser for browser or xml2js for Node.js:
// Browser: Using DOMParser
function xmlToJson(xmlString) {
const parser = new DOMParser();
const doc = parser.parseFromString(xmlString, 'text/xml');
function nodeToJson(node) {
const result = {};
// Handle attributes
if (node.attributes) {
for (let i = 0; i < node.attributes.length; i++) {
const attr = node.attributes[i];
result['@' + attr.name] = attr.value;
}
}
// Handle child nodes
const children = Array.from(node.childNodes);
const elementChildren = children.filter(n => n.nodeType === 1);
const textContent = children
.filter(n => n.nodeType === 3)
.map(n => n.textContent.trim())
.join('');
if (elementChildren.length === 0) {
if (Object.keys(result).length === 0) {
return textContent || null;
}
if (textContent) result['#text'] = textContent;
return result;
}
// Group children by tag name
for (const child of elementChildren) {
const tagName = child.tagName;
const childValue = nodeToJson(child);
if (result[tagName]) {
if (!Array.isArray(result[tagName])) {
result[tagName] = [result[tagName]];
}
result[tagName].push(childValue);
} else {
result[tagName] = childValue;
}
}
return result;
}
const root = doc.documentElement;
return { [root.tagName]: nodeToJson(root) };
}
// Example usage
const xml = `
<book id="1" category="fiction">
<title lang="en">The Great Gatsby</title>
<author>F. Scott Fitzgerald</author>
<year>1925</year>
</book>`;
console.log(JSON.stringify(xmlToJson(xml), null, 2));
// Node.js: Using xml2js (npm install xml2js)
const xml2js = require('xml2js');
async function parseXml(xmlString) {
const parser = new xml2js.Parser({
explicitArray: false,
mergeAttrs: true
});
return await parser.parseStringPromise(xmlString);
}Convert XML to JSON in Python
Python's xmltodict library provides elegant XML to JSON conversion:
# pip install xmltodict
import xmltodict
import json
xml_string = """
<bookstore>
<book category="fiction">
<title lang="en">Harry Potter</title>
<author>J.K. Rowling</author>
<price>29.99</price>
</book>
<book category="non-fiction">
<title lang="en">Learning Python</title>
<author>Mark Lutz</author>
<price>39.95</price>
</book>
</bookstore>
"""
# Convert XML to Python dict
data = xmltodict.parse(xml_string)
# Convert to JSON
json_string = json.dumps(data, indent=2)
print(json_string)
# Convert XML file to JSON file
def xml_to_json_file(xml_path, json_path):
with open(xml_path, 'r') as xml_file:
data = xmltodict.parse(xml_file.read())
with open(json_path, 'w') as json_file:
json.dump(data, json_file, indent=2)
# Using ElementTree (built-in, more control)
import xml.etree.ElementTree as ET
def element_to_dict(element):
result = {}
# Attributes (prefixed with @)
for key, value in element.attrib.items():
result[f'@{key}'] = value
# Child elements
for child in element:
child_data = element_to_dict(child)
if child.tag in result:
if not isinstance(result[child.tag], list):
result[child.tag] = [result[child.tag]]
result[child.tag].append(child_data)
else:
result[child.tag] = child_data
# Text content
if element.text and element.text.strip():
if result:
result['#text'] = element.text.strip()
else:
return element.text.strip()
return result
root = ET.fromstring(xml_string)
json_data = {root.tag: element_to_dict(root)}
print(json.dumps(json_data, indent=2))Convert XML to JSON in PHP
PHP has built-in functions for XML to JSON conversion:
<?php
// Simple conversion using simplexml
$xml_string = <<<XML
<users>
<user id="1">
<name>John Doe</name>
<email>john@example.com</email>
</user>
<user id="2">
<name>Jane Smith</name>
<email>jane@example.com</email>
</user>
</users>
XML;
// Method 1: Simple conversion
$xml = simplexml_load_string($xml_string);
$json = json_encode($xml, JSON_PRETTY_PRINT);
echo $json;
// Method 2: Preserve attributes (prefixed with @)
function xmlToArray($xml) {
$result = [];
// Handle attributes
foreach ($xml->attributes() as $key => $value) {
$result["@$key"] = (string)$value;
}
// Handle child elements
foreach ($xml->children() as $name => $child) {
$childData = xmlToArray($child);
if (isset($result[$name])) {
if (!is_array($result[$name]) || !isset($result[$name][0])) {
$result[$name] = [$result[$name]];
}
$result[$name][] = $childData;
} else {
$result[$name] = $childData;
}
}
// Handle text content
$text = trim((string)$xml);
if (empty($result) && $text !== '') {
return $text;
}
if ($text !== '') {
$result['#text'] = $text;
}
return $result;
}
$xml = simplexml_load_string($xml_string);
$array = [$xml->getName() => xmlToArray($xml)];
echo json_encode($array, JSON_PRETTY_PRINT);
?>Command Line Conversion
Convert XML to JSON using CLI tools:
# Using xq (part of yq - brew install yq)
xq . input.xml > output.json
# Using Python one-liner with xmltodict
python -c "import xmltodict,json,sys; print(json.dumps(xmltodict.parse(sys.stdin.read()), indent=2))" < input.xml
# Using Node.js with xml2js
npx -p xml2js -c 'const xml2js=require("xml2js"); const fs=require("fs"); new xml2js.Parser().parseString(fs.readFileSync(0,"utf8"),(e,r)=>console.log(JSON.stringify(r,null,2)))' < input.xml
# Using Ruby
ruby -r'rexml/document' -rjson -e '
def to_h(e)
h = {}
e.attributes.each { |k,v| h["@#{k}"] = v.to_s }
e.elements.each { |c|
v = to_h(c)
h[c.name] = h[c.name] ? [*h[c.name], v] : v
}
t = e.texts.map(&:to_s).join.strip
h.empty? ? (t.empty? ? nil : t) : (t.empty? ? h : h.merge("#text"=>t))
end
d = REXML::Document.new(ARGF.read)
puts JSON.pretty_generate({d.root.name => to_h(d.root)})
' < input.xmlHandling XML Attributes
XML attributes need special handling in JSON. Common conventions:
Convention 1: @ prefix for attributes
<book id="1" category="fiction"><title>The Great Gatsby</title></book>
Becomes:
1{2 "book": {3 "@id": "1",4 "@category": "fiction",5 "title": "The Great Gatsby"6 }7}
Convention 2: Separate _attributes object
1{2 "book": {3 "_attributes": { "id": "1", "category": "fiction" },4 "title": "The Great Gatsby"5 }6}
Mixed content (text + elements)
<p>Hello <b>world</b>!</p>
Becomes (with #text for text nodes):
1{2 "p": {3 "#text": "Hello !",4 "b": "world"5 }6}
Our tool uses the @ prefix convention which is widely adopted.
Converting RSS/Atom Feeds to JSON
RSS and Atom feeds are XML-based. Here's how to convert them:
// RSS Feed to JSON
async function rssFeedToJson(url) {
const response = await fetch(url);
const xmlText = await response.text();
const parser = new DOMParser();
const doc = parser.parseFromString(xmlText, 'text/xml');
const items = Array.from(doc.querySelectorAll('item')).map(item => ({
title: item.querySelector('title')?.textContent,
link: item.querySelector('link')?.textContent,
description: item.querySelector('description')?.textContent,
pubDate: item.querySelector('pubDate')?.textContent,
guid: item.querySelector('guid')?.textContent,
}));
return {
title: doc.querySelector('channel > title')?.textContent,
description: doc.querySelector('channel > description')?.textContent,
link: doc.querySelector('channel > link')?.textContent,
items
};
}
// Usage
const feed = await rssFeedToJson('https://example.com/feed.xml');
console.log(JSON.stringify(feed, null, 2));Best Practices
Follow these guidelines for XML to JSON conversion:
1. Handle Arrays Consistently
- Repeated XML elements should become JSON arrays
- Even single elements might need array treatment for consistency
2. Preserve Data Types
- Parse numbers:
"123"→123 - Parse booleans:
"true"→true - Keep strings as strings
3. Handle Namespaces
<ns:element xmlns:ns="http://example.com">
Options:
- Strip namespaces:
"element" - Preserve with prefix:
"ns:element" - Use full URI:
"http://example.com:element"
4. Error Handling
Always validate XML before parsing:
1const parser = new DOMParser();2const doc = parser.parseFromString(xml, 'text/xml');3const error = doc.querySelector('parsererror');4if (error) {5 throw new Error('Invalid XML: ' + error.textContent);6}
5. Large Files
For large XML files, use streaming parsers like SAX instead of DOM parsing.