forked from evanhunter/PJMT
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathXML.php
397 lines (297 loc) · 15 KB
/
XML.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
<?php
/******************************************************************************
*
* Filename: XML.php
*
* Description: Provides functions for parsing and constructing XML information
*
* Author: Evan Hunter
*
* Date: 27/7/2004
*
* Project: JPEG Metadata
*
* Revision: 1.10
*
* Changes: 1.00 -> 1.10 : Changed read_xml_array_from_text to fix problem that
* caused the whitespace (especially newlines) to be
* destroyed when converting xml text to an xml array
*
* URL: http://electronics.ozhiker.com
*
* License: This file is part of the PHP JPEG Metadata Toolkit.
*
* The PHP JPEG Metadata Toolkit is free software; you can
* redistribute it and/or modify it under the terms of the
* GNU General Public License as published by the Free Software
* Foundation; either version 2 of the License, or (at your
* option) any later version.
*
* The PHP JPEG Metadata Toolkit is distributed in the hope
* that it will be useful, but WITHOUT ANY WARRANTY; without
* even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License
* for more details.
*
* You should have received a copy of the GNU General Public
* License along with the PHP JPEG Metadata Toolkit; if not,
* write to the Free Software Foundation, Inc., 59 Temple
* Place, Suite 330, Boston, MA 02111-1307 USA
*
* If you require a different license for commercial or other
* purposes, please contact the author: [email protected]
*
******************************************************************************/
include_once 'Unicode.php'; // Unicode is required as XML is always Unicode encoded
/******************************************************************************
*
* Function: read_xml_array_from_text
*
* Description: Parses a string containing XML, and returns the resulting
* tree structure array, which contains all the XML information.
* Note: White space and comments in the XML are ignored
* Note: All text information contained in the tree structure
* is encoded as Unicode UTF-8. Hence text will appear as
* normal ASCII except where there is an extended character.
*
* Parameters: xmltext - a string containing the XML to be parsed
*
* Returns: output - the tree structure array containing the XML information
* FALSE - if an error occured
*
******************************************************************************/
function read_xml_array_from_text( $xmltext )
{
// Check if there actually is any text to parse
if ( trim( $xmltext ) == "" )
{
return FALSE;
}
// Create an instance of a xml parser to parse the XML text
$xml_parser = xml_parser_create( "UTF-8" );
// Change: Fixed problem that caused the whitespace (especially newlines) to be destroyed when converting xml text to an xml array, as of revision 1.10
// We would like to remove unneccessary white space, but this will also
// remove things like newlines (
) in the XML values, so white space
// will have to be removed later
if ( xml_parser_set_option($xml_parser,XML_OPTION_SKIP_WHITE,0) == FALSE )
{
// Error setting case folding - destroy the parser and return
xml_parser_free($xml_parser);
return FALSE;
}
// to use XML code correctly we have to turn case folding
// (uppercasing) off. XML is case sensitive and upper
// casing is in reality XML standards violation
if ( xml_parser_set_option($xml_parser,XML_OPTION_CASE_FOLDING,0) == FALSE )
{
// Error setting case folding - destroy the parser and return
xml_parser_free($xml_parser);
return FALSE;
}
// Parse the XML text into a array structure
if ( xml_parse_into_struct($xml_parser, $xmltext, $vals, $index) == 0 )
{
// Error Parsing XML - destroy the parser and return
xml_parser_free($xml_parser);
return FALSE;
}
// Destroy the xml parser
xml_parser_free($xml_parser);
// Change: Fixed problem that caused the whitespace (especially newlines) to be destroyed when converting xml text to an xml array, as of revision 1.10
// Since the xml was processed with whitespace enabled, it will have many values which are
// only whitespace. These need to be removed to make a sensible array.
$newvals = array( );
// Cycle through each of the items
foreach( $vals as $valno => $val )
{
// If the item has a whitespace only value, remove it
if ( ( array_key_exists( 'value', $val ) ) && (trim( $val[ 'value' ] ) == "" ) )
{
unset( $val[ 'value' ] );
}
// If the item has a value (which will be non blank now) or is of type other than cdata, add it to the new array
if ( ( $val[ 'type' ] != 'cdata' ) || ( array_key_exists( 'value', $val ) ) )
{
$newvals[] = $val;
}
}
// The xml_parse_into_struct function returns a flat version
// of the XML data, where each tag has a level number attached.
// This is very difficult to work with, so it needs to be
// converted to a tree structure before being returned
$i = 0;
return xml_get_children($newvals, $i);
}
/******************************************************************************
* End of Function: read_xml_array_from_text
******************************************************************************/
/******************************************************************************
*
* Function: write_xml_array_to_text
*
* Description: Takes a tree structure array (in the same format as returned
* by read_xml_array_from_text, and constructs a string containing
* the equivalent XML. This function is recursive, and produces
* XML which has correct indents.
* Note: All text information contained in the tree structure
* can be either 7-bit ASCII or encoded as Unicode UTF-8,
* since UTF-8 passes 7-bit ASCII text unchanged.
*
* Parameters: xmlarray - the tree structure array containing the information to
* be converted to XML
* indentlevel - the indent level of the top level tags (usually zero)
*
* Returns: output - the string containing the equivalent XML
* FALSE - if an error occured
*
******************************************************************************/
function write_xml_array_to_text( $xmlarray, $indentlevel )
{
// Create a string to receive the XML
$output_xml_text = "";
// Cycle through each xml element at this level
foreach ($xmlarray as $xml_elem)
{
// Add the indent, then the cleaned tag name to the output
$output_xml_text .= str_repeat ( " ", $indentlevel ) . "<" . xml_UTF8_clean( $xml_elem['tag'] );
// Check if there are any attributes for this tag
if (array_key_exists('attributes',$xml_elem))
{
// There are attributes
// Cycle through each attribute for this tag
foreach ($xml_elem['attributes'] as $xml_attr_name => $xml_attr_val)
{
// Add the cleaned attribute name, and cleaned attribute value to the output
$output_xml_text .= " ". xml_UTF8_clean( $xml_attr_name ) ." ='" . xml_UTF8_clean( $xml_attr_val ) ."'";
}
}
// Add the 'greater-than' to close this tag to the output
$output_xml_text .= ">";
// Check if this element has any text inside it.
if (array_key_exists('value',$xml_elem) )
{
// There is text for this element - clean it and add it to the output
$output_xml_text .= xml_UTF8_clean( $xml_elem['value'] );
}
// Check if there are any lower levels contained by this element
if (array_key_exists('children',$xml_elem) )
{
// There are sub-elements for this element
// Add a newline to the output, so the sub-elements start on a fresh line
$output_xml_text .= "\n";
// Recursively call this function to output the sub-elements, and add the result to the output
$output_xml_text .= write_xml_array_to_text( $xml_elem['children'], $indentlevel + 1 );
// Add an indent to the output for the closing tag, since we are on a new line due to the sub-elements
$output_xml_text .= str_repeat ( " ", $indentlevel );
}
// Add the cleaned closing tag to the output
$output_xml_text .= "</" .xml_UTF8_clean($xml_elem['tag']) . ">\n";
}
// Return the XML text
return $output_xml_text;
}
/******************************************************************************
* End of Function: write_xml_array_to_text
******************************************************************************/
/******************************************************************************
*
* INTERNAL FUNCTIONS
*
******************************************************************************/
/******************************************************************************
*
* Internal Function: xml_get_children
*
* Description: Used by the read_xml_array_from_text function.
* This function recursively converts the values retrieved from
* the xml_parse_into_struct function into a tree structure array,
* which is much more useful and easier to use.
*
* Parameters: input_xml_array - the flat array of XML elements retrieved
* from xml_parse_into_struct
* $item_num - the number of the element at which the conversion
* should start (usually zero when called from another
* function, this is used for recursion)
*
* Returns: children - the tree structure array containing XML elements
* FALSE - if an error occured
*
******************************************************************************/
function xml_get_children( &$input_xml_array, &$item_num )
{
// Make an array to receive the output XML tree structure
$children = array();
// Cycle through all the elements of the input XML array
while ( $item_num < count( $input_xml_array ) )
{
// Retrieve the current array element
$v = &$input_xml_array[ $item_num++ ];
// Check what type of XML array element this is, and process accordingly
switch ( $v['type'] )
{
case 'cdata': // This is a non parsed Character Data tag
case 'complete': // This is a pair of XML matching tags possibly with text (but no tags) inside
$children[] = xml_get_child( $v );
break;
case 'open': // This is a single opening tag
// Recursively get the children for this opening tag
$children[] = xml_get_child( $v, xml_get_children( $input_xml_array, $item_num ) );
break; // This is a single opening tag
case 'close': // This is a single closing tag
break 2; // leave "while" loop (and the function)
}
}
// Return the results
return $children;
}
/******************************************************************************
* End of Function: xml_get_children
******************************************************************************/
/******************************************************************************
*
* Internal Function: xml_get_child
*
* Description: Used by the xml_get_children function.
* Takes an element from an array provided by xml_parse_into_struct
* and returns an element for a tree structure array
*
* Parameters: input_xml_item - the item from the array provided by xml_parse_into_struct
* children - an array of sub-elements to be added to the tree
* structure array. Null or missing value indicate no
* sub-elements are to be added.
*
* Returns: child - the element for a tree structure array
* FALSE - if an error occured
*
******************************************************************************/
function xml_get_child( &$input_xml_item, $children = NULL )
{
// Create an array to receive the child structure
$child = array();
// If the input item has the 'tag' element set, copy it to the child
if ( isset( $input_xml_item['tag'] ) )
{
$child['tag'] = $input_xml_item['tag'] ;
}
// If the input item has the 'value' element set, copy it to the child
if ( isset( $input_xml_item['value'] ) )
{
$child['value'] = $input_xml_item['value'] ;
}
// If the input item has the 'attributes' element set, copy it to the child
if ( isset( $input_xml_item['attributes'] ) )
{
$child['attributes'] = $input_xml_item['attributes'];
}
// If children have been specified, add them to the child
if ( is_array( $children ) )
{
$child['children'] = $children;
}
// Return the child structure
return $child;
}
/******************************************************************************
* End of Function: xml_get_children
******************************************************************************/
?>