This repository has been archived by the owner on Feb 20, 2024. It is now read-only.
forked from mcspring/XML2Dict
-
Notifications
You must be signed in to change notification settings - Fork 0
/
encoder.py
160 lines (128 loc) · 4.71 KB
/
encoder.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
#!/usr/bin/env python
# encoding: utf-8
'''
XML2Dict: Convert xml string to python dict
@author: Mc.Spring
@contact: [email protected]
@since: Created on 2009-5-18
@todo: Add namespace support
@copyright: Copyright (C) 2009 MC.Spring Team. All rights reserved.
@license: http://www.apache.org/licenses/LICENSE-2.0 Apache License
'''
try:
import xml.etree.ElementTree as ET
except:
import cElementTree as ET # for 2.4
__all__ = ['XML2Dict']
class XML2Dict(object):
def __init__(self, coding='UTF-8'):
self._coding = coding
def _parse_node(self, node):
tree = {}
#Save childrens
for child in node.getchildren():
ctag = child.tag
cattr = child.attrib
ctext = child.text.strip().encode(self._coding) if child.text is not None else ''
ctree = self._parse_node(child)
if not ctree:
cdict = self._make_dict(ctag, ctext, cattr)
else:
cdict = self._make_dict(ctag, ctree, cattr)
if ctag not in tree: # First time found
tree.update(cdict)
continue
atag = '@' + ctag
atree = tree[ctag]
if not isinstance(atree, list):
if not isinstance(atree, dict):
atree = {}
if atag in tree:
atree['#'+ctag] = tree[atag]
del tree[atag]
tree[ctag] = [atree] # Multi entries, change to list
if cattr:
ctree['#'+ctag] = cattr
tree[ctag].append(ctree)
return tree
def _make_dict(self, tag, value, attr=None):
'''Generate a new dict with tag and value
If attr is not None then convert tag name to @tag
and convert tuple list to dict
'''
ret = {tag: value}
# Save attributes as @tag value
if attr:
atag = '@' + tag
aattr = {}
for k, v in attr.items():
aattr[k] = v
ret[atag] = aattr
del atag
del aattr
return ret
def parse(self, xml):
'''Parse xml string to python dict
'''
EL = ET.fromstring(xml)
return self._make_dict(EL.tag, self._parse_node(EL), EL.attrib)
if __name__ == '__main__':
test = {'one': '''<rss author="Mc.Spring" version="2.0">
<channel>
<description>je m' appelle twinsen.</description>
<copyright>Copyright 2000-2009 Twinsen Liang all rights reserved</copyright>
<title>Twinsen Liang</title>
<language>zh-cn</language>
<image>
<url>http://www.twinsenliang.net/logo.gif</url>
<link>http://www.twinsenliang.net/</link>
<description>Twinsen Liang</description>
<title>Twinsen Liang</title>
</image>
<generator>TXmlSave 2.0</generator>
<item>
<category>skill</category>
<description>This is the second article content, thanks!</description>
<pubDate>Mon, 15 Apr 200902:04:52 +0800</pubDate>
<author>[email protected](TwinsenLiang)</author>
<title>This is the second article title</title>
<link target="_blank">http://www.twinsenliang.net/skill/20090414.html</link>
<guid>http://www.twinsenliang.net/skill/20090414.html</guid>
</item>
<item>
<category>skill</category>
<description>This isthe second article content, thanks!</description>
<pubDate>Mon, 15 Apr 2009 02:04:52 +0800</pubDate>
<author>[email protected](TwinsenLiang)</author>
<title>This is the second article title</title>
<link target="_blank">http://www.twinsenliang.net/skill/20090414.html</link>
<guid>http://www.twinsenliang.net/skill/20090414.html</guid>
</item>
<link>http://www.twinsenliang.net</link>
<webMaster>[email protected](twinsen)</webMaster>
</channel>
</rss>''',
'two': '''<class id="test">
<student id="1234">
<age>24</age>
<name>thiru</name>
</student>
<student id="5678">
<age>28</age>
<name>bharath</name>
</student>
</class>''',
'three': '''<class id="test"></class>''',
'four': '''<person>
<name>spring</name>
<age></age>
<address />
</person>''',
'five': '''<doc>
<x a="1" />
<x a="2" />
</doc>'''}
for item in test:
obj = XML2Dict(coding='utf-8')
print(obj.parse(test[item]))
print