This repository has been archived by the owner on May 28, 2024. It is now read-only.
forked from hay/xml2json
-
Notifications
You must be signed in to change notification settings - Fork 0
/
xml2json.py
executable file
·210 lines (179 loc) · 7.11 KB
/
xml2json.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
#!/usr/bin/env python
"""
xml2json.py Convert XML to JSON
Forked from http://github.com/hay/xml2json
The aim of this fork is to preserve the order of XML subelements.
Relies on ElementTree for the XML parsing. This is based on
pesterfish.py but uses a different XML->JSON mapping.
The XML->JSON mapping is described at
http://www.xml.com/pub/a/2006/05/31/converting-between-xml-and-json.html
Rewritten to a command line utility by Hay Kranen < github.com/hay > with
contributions from George Hamilton (gmh04) and Dan Brown (jdanbrown)
XML JSON
<e/> {"#tag":"e"}
<e>text</e> {"#tag":"e", "#children":["text"]}
<e name="value" /> {"#tag":"e", "@name":"value"}
<e name="value">text</e>
{"#tag":"e", "@name":"value", "#children":["text"]}
<e> <a>text</a ><b>text</b> </e>
{"#tag":"e", "#children": [{"#tag":"a","#children":["text"]},
{"#tag":"b","#children":["text"]}]}
<e> <a>text</a> <a>text</a> </e>
{"#tag":"e", "#children": [{"#tag":"a","#children":["text"]},
{"#tag":"a","#children":["text"]}]}
<e> text <a>text</a> </e>
{"#tag":"e", "#children": ["text",{"#tag":"a","#children":["text"]}]}
This is very similar to the mapping used for Yahoo Web Services
(http://developer.yahoo.com/common/json.html#xml).
This is a mess in that it is so unpredictable -- it requires lots of testing
(e.g. to see if values are lists or strings or dictionaries). For use
in Python this could be vastly cleaner. Think about whether the internal
form can be more self-consistent while maintaining good external characteristics
for the JSON.
Look at the Yahoo version closely to see how it works. Maybe can adopt
that completely if it makes more sense...
R. White, 2006 November 6
"""
import xml.etree.cElementTree as ET
import json, optparse, sys
def elem_to_internal(elem, strip=1):
"""
Convert an Element into an internal dictionary (not JSON!).
:param elem: the element to be parsed
:type elem: Element
:param strip: flag to indicate wether the leading/trailing whitespaces
should be ignored during parsing
:type strip: bool
:returns: the result of the parsing as a dictionary entry in the following
form: {[element tag name]:[result of the parsing of the contents]}.
:rtype: dict
"""
my_d = {}
my_d['#tag'] = elem.tag
for key, value in elem.attrib.items():
my_d['@'+key] = value
my_d['#children'] = []
if elem.text is not None:
text = elem.text.strip() if strip else elem.text
if text != '':
my_d['#children'].append(text)
# loop over subelements to merge them
for subelem in elem:
my_d['#children'].append(elem_to_internal(subelem, strip=strip))
if subelem.tail is not None:
text = subelem.tail.strip() if strip else subelem.tail
if text != '':
my_d['#children'].append(text)
return my_d
def internal_to_elem(pfsh, factory=ET.Element):
"""
Convert an internal dictionary (not JSON!) into an Element.
:param pfsh: the internal dictionary structure
:type pfsh: dict
:param factory: element factory which should be used. Whatever
Element implementation we could import will be
used by default; if you want to use something else,
pass the Element class as the factory parameter.
:type factory: ET.Element factory
:returns: the ElementTree DOM structure for the XML.
:rtype: ET.Element
"""
my_el = factory(pfsh['#tag'], {key[1:]: value\
for (key, value) in pfsh.items() if key[0] == '@'})
child_el = None
for child in pfsh['#children']:
if isinstance(child, basestring):
if child_el is None:
my_el.text = child
else:
child_el.tail = child
else:
child_el = internal_to_elem(child)
my_el.append(child_el)
return my_el
def elem2json(elem, strip=1):
"""
Convert an ElementTree or Element into a JSON string.
:param elem: the element to be parsed
:type elem: Element
:param strip: flag to indicate wether the leading/trailing whitespaces
should be ignored during parsing
:type strip: bool
:returns: the result of the parsing as a JSON string.
:rtype: string
"""
if hasattr(elem, 'getroot'):
elem = elem.getroot()
return json.dumps(elem_to_internal(elem, strip=strip))
def json2elem(json_data, factory=ET.Element):
"""
Convert a JSON string into an Element.
:param json_data: the JSON data
:type json_data: string
:param factory: element factory which should be used. Whatever
Element implementation we could import will be
used by default; if you want to use something else,
pass the Element class as the factory parameter.
:type factory: ET.Element factory
:returns: the ElementTree DOM structure for the XML.
:rtype: ET.Element
"""
return internal_to_elem(json.loads(json_data), factory)
def xml2json(xmlstring, strip=1):
"""
Convert an XML string into a JSON string.
:param xmlstring: the XML string to be parsed
:type xmlstring: string
:param strip: flag to indicate wether the leading/trailing whitespaces
should be ignored during parsing
:type strip: bool
:returns: the result of the parsing as a JSON string.
:rtype: string
"""
elem = ET.fromstring(xmlstring)
return elem2json(elem, strip=strip)
def json2xml(json_data, factory=ET.Element):
"""
Convert a JSON string into an XML string.
:param json_data: the JSON data
:type json_data: string
:param factory: element factory which should be used. Whatever
Element implementation we could import will be
used by default; if you want to use something else,
pass the Element class as the factory parameter.
:type factory: ET.Element factory
:returns: the XML string
:rtype: string
"""
elem = internal_to_elem(json.loads(json_data), factory)
return ET.tostring(elem)
def main():
"""
command line access for this module
see options description below
"""
opt = optparse.OptionParser(
description='Converts XML to JSON or the other way around',
prog='xml2json',
usage='%prog -t xml2json -o file.json file.xml'
)
opt.add_option('--type', '-t', help="'xml2json' or 'json2xml'")
opt.add_option('--out', '-o', help="Write to OUT instead of stdout")
options, arguments = opt.parse_args()
if len(arguments) == 1:
my_input = open(arguments[0]).read()
else:
opt.print_help()
sys.exit(-1)
if (options.type == "xml2json"):
out = xml2json(my_input, strip=0)
else:
out = json2xml(my_input)
if (options.out):
my_file = open(options.out, 'w')
my_file.write(out)
my_file.close()
else:
print out
if __name__ == "__main__":
main()