1: <?php
2: /**
3: * CakePHP(tm) : Rapid Development Framework (https://cakephp.org)
4: * Copyright (c) Cake Software Foundation, Inc. (https://cakefoundation.org)
5: *
6: * Licensed under The MIT License
7: * For full copyright and license information, please see the LICENSE.txt
8: * Redistributions of files must retain the above copyright notice.
9: *
10: * @copyright Copyright (c) Cake Software Foundation, Inc. (https://cakefoundation.org)
11: * @link https://cakephp.org CakePHP(tm) Project
12: * @since 0.10.3
13: * @license https://opensource.org/licenses/mit-license.php MIT License
14: */
15: namespace Cake\Utility;
16:
17: use Cake\Utility\Exception\XmlException;
18: use DOMDocument;
19: use DOMNode;
20: use DOMText;
21: use Exception;
22: use SimpleXMLElement;
23:
24: /**
25: * XML handling for CakePHP.
26: *
27: * The methods in these classes enable the datasources that use XML to work.
28: */
29: class Xml
30: {
31:
32: /**
33: * Initialize SimpleXMLElement or DOMDocument from a given XML string, file path, URL or array.
34: *
35: * ### Usage:
36: *
37: * Building XML from a string:
38: *
39: * ```
40: * $xml = Xml::build('<example>text</example>');
41: * ```
42: *
43: * Building XML from string (output DOMDocument):
44: *
45: * ```
46: * $xml = Xml::build('<example>text</example>', ['return' => 'domdocument']);
47: * ```
48: *
49: * Building XML from a file path:
50: *
51: * ```
52: * $xml = Xml::build('/path/to/an/xml/file.xml');
53: * ```
54: *
55: * Building XML from a remote URL:
56: *
57: * ```
58: * use Cake\Http\Client;
59: *
60: * $http = new Client();
61: * $response = $http->get('http://example.com/example.xml');
62: * $xml = Xml::build($response->body());
63: * ```
64: *
65: * Building from an array:
66: *
67: * ```
68: * $value = [
69: * 'tags' => [
70: * 'tag' => [
71: * [
72: * 'id' => '1',
73: * 'name' => 'defect'
74: * ],
75: * [
76: * 'id' => '2',
77: * 'name' => 'enhancement'
78: * ]
79: * ]
80: * ]
81: * ];
82: * $xml = Xml::build($value);
83: * ```
84: *
85: * When building XML from an array ensure that there is only one top level element.
86: *
87: * ### Options
88: *
89: * - `return` Can be 'simplexml' to return object of SimpleXMLElement or 'domdocument' to return DOMDocument.
90: * - `loadEntities` Defaults to false. Set to true to enable loading of `<!ENTITY` definitions. This
91: * is disabled by default for security reasons.
92: * - `readFile` Set to false to disable file reading. This is important to disable when
93: * putting user data into Xml::build(). If enabled local files will be read if they exist.
94: * Defaults to true for backwards compatibility reasons.
95: * - `parseHuge` Enable the `LIBXML_PARSEHUGE` flag.
96: *
97: * If using array as input, you can pass `options` from Xml::fromArray.
98: *
99: * @param string|array $input XML string, a path to a file, a URL or an array
100: * @param array $options The options to use
101: * @return \SimpleXMLElement|\DOMDocument SimpleXMLElement or DOMDocument
102: * @throws \Cake\Utility\Exception\XmlException
103: */
104: public static function build($input, array $options = [])
105: {
106: $defaults = [
107: 'return' => 'simplexml',
108: 'loadEntities' => false,
109: 'readFile' => true,
110: 'parseHuge' => false,
111: ];
112: $options += $defaults;
113:
114: if (is_array($input) || is_object($input)) {
115: return static::fromArray($input, $options);
116: }
117:
118: if (strpos($input, '<') !== false) {
119: return static::_loadXml($input, $options);
120: }
121:
122: if ($options['readFile'] && file_exists($input)) {
123: return static::_loadXml(file_get_contents($input), $options);
124: }
125:
126: if (!is_string($input)) {
127: throw new XmlException('Invalid input.');
128: }
129:
130: throw new XmlException('XML cannot be read.');
131: }
132:
133: /**
134: * Parse the input data and create either a SimpleXmlElement object or a DOMDocument.
135: *
136: * @param string $input The input to load.
137: * @param array $options The options to use. See Xml::build()
138: * @return \SimpleXMLElement|\DOMDocument
139: * @throws \Cake\Utility\Exception\XmlException
140: */
141: protected static function _loadXml($input, $options)
142: {
143: $hasDisable = function_exists('libxml_disable_entity_loader');
144: $internalErrors = libxml_use_internal_errors(true);
145: if ($hasDisable && !$options['loadEntities']) {
146: libxml_disable_entity_loader(true);
147: }
148: $flags = 0;
149: if (!empty($options['parseHuge'])) {
150: $flags |= LIBXML_PARSEHUGE;
151: }
152: try {
153: if ($options['return'] === 'simplexml' || $options['return'] === 'simplexmlelement') {
154: $flags |= LIBXML_NOCDATA;
155: $xml = new SimpleXMLElement($input, $flags);
156: } else {
157: $xml = new DOMDocument();
158: $xml->loadXML($input, $flags);
159: }
160:
161: return $xml;
162: } catch (Exception $e) {
163: throw new XmlException('Xml cannot be read. ' . $e->getMessage(), null, $e);
164: } finally {
165: if ($hasDisable && !$options['loadEntities']) {
166: libxml_disable_entity_loader(false);
167: }
168: libxml_use_internal_errors($internalErrors);
169: }
170: }
171:
172: /**
173: * Parse the input html string and create either a SimpleXmlElement object or a DOMDocument.
174: *
175: * @param string $input The input html string to load.
176: * @param array $options The options to use. See Xml::build()
177: * @return \SimpleXMLElement|\DOMDocument
178: * @throws \Cake\Utility\Exception\XmlException
179: */
180: public static function loadHtml($input, $options = [])
181: {
182: $defaults = [
183: 'return' => 'simplexml',
184: 'loadEntities' => false,
185: ];
186: $options += $defaults;
187:
188: $hasDisable = function_exists('libxml_disable_entity_loader');
189: $internalErrors = libxml_use_internal_errors(true);
190: if ($hasDisable && !$options['loadEntities']) {
191: libxml_disable_entity_loader(true);
192: }
193: $flags = 0;
194: if (!empty($options['parseHuge'])) {
195: $flags |= LIBXML_PARSEHUGE;
196: }
197: try {
198: $xml = new DOMDocument();
199: $xml->loadHTML($input, $flags);
200:
201: if ($options['return'] === 'simplexml' || $options['return'] === 'simplexmlelement') {
202: $flags |= LIBXML_NOCDATA;
203: $xml = simplexml_import_dom($xml);
204: }
205:
206: return $xml;
207: } catch (Exception $e) {
208: throw new XmlException('Xml cannot be read. ' . $e->getMessage(), null, $e);
209: } finally {
210: if ($hasDisable && !$options['loadEntities']) {
211: libxml_disable_entity_loader(false);
212: }
213: libxml_use_internal_errors($internalErrors);
214: }
215: }
216:
217: /**
218: * Transform an array into a SimpleXMLElement
219: *
220: * ### Options
221: *
222: * - `format` If create childs ('tags') or attributes ('attributes').
223: * - `pretty` Returns formatted Xml when set to `true`. Defaults to `false`
224: * - `version` Version of XML document. Default is 1.0.
225: * - `encoding` Encoding of XML document. If null remove from XML header. Default is the some of application.
226: * - `return` If return object of SimpleXMLElement ('simplexml') or DOMDocument ('domdocument'). Default is SimpleXMLElement.
227: *
228: * Using the following data:
229: *
230: * ```
231: * $value = [
232: * 'root' => [
233: * 'tag' => [
234: * 'id' => 1,
235: * 'value' => 'defect',
236: * '@' => 'description'
237: * ]
238: * ]
239: * ];
240: * ```
241: *
242: * Calling `Xml::fromArray($value, 'tags');` Will generate:
243: *
244: * `<root><tag><id>1</id><value>defect</value>description</tag></root>`
245: *
246: * And calling `Xml::fromArray($value, 'attributes');` Will generate:
247: *
248: * `<root><tag id="1" value="defect">description</tag></root>`
249: *
250: * @param array|\Cake\Collection\Collection $input Array with data or a collection instance.
251: * @param string|array $options The options to use or a string to use as format.
252: * @return \SimpleXMLElement|\DOMDocument SimpleXMLElement or DOMDocument
253: * @throws \Cake\Utility\Exception\XmlException
254: */
255: public static function fromArray($input, $options = [])
256: {
257: if (is_object($input) && method_exists($input, 'toArray') && is_callable([$input, 'toArray'])) {
258: $input = call_user_func([$input, 'toArray']);
259: }
260: if (!is_array($input) || count($input) !== 1) {
261: throw new XmlException('Invalid input.');
262: }
263: $key = key($input);
264: if (is_int($key)) {
265: throw new XmlException('The key of input must be alphanumeric');
266: }
267:
268: if (!is_array($options)) {
269: $options = ['format' => (string)$options];
270: }
271: $defaults = [
272: 'format' => 'tags',
273: 'version' => '1.0',
274: 'encoding' => mb_internal_encoding(),
275: 'return' => 'simplexml',
276: 'pretty' => false
277: ];
278: $options += $defaults;
279:
280: $dom = new DOMDocument($options['version'], $options['encoding']);
281: if ($options['pretty']) {
282: $dom->formatOutput = true;
283: }
284: self::_fromArray($dom, $dom, $input, $options['format']);
285:
286: $options['return'] = strtolower($options['return']);
287: if ($options['return'] === 'simplexml' || $options['return'] === 'simplexmlelement') {
288: return new SimpleXMLElement($dom->saveXML());
289: }
290:
291: return $dom;
292: }
293:
294: /**
295: * Recursive method to create childs from array
296: *
297: * @param \DOMDocument $dom Handler to DOMDocument
298: * @param \DOMElement $node Handler to DOMElement (child)
299: * @param array $data Array of data to append to the $node.
300: * @param string $format Either 'attributes' or 'tags'. This determines where nested keys go.
301: * @return void
302: * @throws \Cake\Utility\Exception\XmlException
303: */
304: protected static function _fromArray($dom, $node, &$data, $format)
305: {
306: if (empty($data) || !is_array($data)) {
307: return;
308: }
309: foreach ($data as $key => $value) {
310: if (is_string($key)) {
311: if (is_object($value) && method_exists($value, 'toArray') && is_callable([$value, 'toArray'])) {
312: $value = call_user_func([$value, 'toArray']);
313: }
314:
315: if (!is_array($value)) {
316: if (is_bool($value)) {
317: $value = (int)$value;
318: } elseif ($value === null) {
319: $value = '';
320: }
321: $isNamespace = strpos($key, 'xmlns:');
322: if ($isNamespace !== false) {
323: $node->setAttributeNS('http://www.w3.org/2000/xmlns/', $key, $value);
324: continue;
325: }
326: if ($key[0] !== '@' && $format === 'tags') {
327: if (!is_numeric($value)) {
328: // Escape special characters
329: // https://www.w3.org/TR/REC-xml/#syntax
330: // https://bugs.php.net/bug.php?id=36795
331: $child = $dom->createElement($key, '');
332: $child->appendChild(new DOMText($value));
333: } else {
334: $child = $dom->createElement($key, $value);
335: }
336: $node->appendChild($child);
337: } else {
338: if ($key[0] === '@') {
339: $key = substr($key, 1);
340: }
341: $attribute = $dom->createAttribute($key);
342: $attribute->appendChild($dom->createTextNode($value));
343: $node->appendChild($attribute);
344: }
345: } else {
346: if ($key[0] === '@') {
347: throw new XmlException('Invalid array');
348: }
349: if (is_numeric(implode('', array_keys($value)))) {
350: // List
351: foreach ($value as $item) {
352: $itemData = compact('dom', 'node', 'key', 'format');
353: $itemData['value'] = $item;
354: static::_createChild($itemData);
355: }
356: } else {
357: // Struct
358: static::_createChild(compact('dom', 'node', 'key', 'value', 'format'));
359: }
360: }
361: } else {
362: throw new XmlException('Invalid array');
363: }
364: }
365: }
366:
367: /**
368: * Helper to _fromArray(). It will create childs of arrays
369: *
370: * @param array $data Array with information to create childs
371: * @return void
372: */
373: protected static function _createChild($data)
374: {
375: $data += [
376: 'dom' => null,
377: 'node' => null,
378: 'key' => null,
379: 'value' => null,
380: 'format' => null,
381: ];
382:
383: $value = $data['value'];
384: $dom = $data['dom'];
385: $key = $data['key'];
386: $format = $data['format'];
387: $node = $data['node'];
388:
389: $childNS = $childValue = null;
390: if (is_object($value) && method_exists($value, 'toArray') && is_callable([$value, 'toArray'])) {
391: $value = call_user_func([$value, 'toArray']);
392: }
393: if (is_array($value)) {
394: if (isset($value['@'])) {
395: $childValue = (string)$value['@'];
396: unset($value['@']);
397: }
398: if (isset($value['xmlns:'])) {
399: $childNS = $value['xmlns:'];
400: unset($value['xmlns:']);
401: }
402: } elseif (!empty($value) || $value === 0 || $value === '0') {
403: $childValue = (string)$value;
404: }
405:
406: $child = $dom->createElement($key);
407: if ($childValue !== null) {
408: $child->appendChild($dom->createTextNode($childValue));
409: }
410: if ($childNS) {
411: $child->setAttribute('xmlns', $childNS);
412: }
413:
414: static::_fromArray($dom, $child, $value, $format);
415: $node->appendChild($child);
416: }
417:
418: /**
419: * Returns this XML structure as an array.
420: *
421: * @param \SimpleXMLElement|\DOMDocument|\DOMNode $obj SimpleXMLElement, DOMDocument or DOMNode instance
422: * @return array Array representation of the XML structure.
423: * @throws \Cake\Utility\Exception\XmlException
424: */
425: public static function toArray($obj)
426: {
427: if ($obj instanceof DOMNode) {
428: $obj = simplexml_import_dom($obj);
429: }
430: if (!($obj instanceof SimpleXMLElement)) {
431: throw new XmlException('The input is not instance of SimpleXMLElement, DOMDocument or DOMNode.');
432: }
433: $result = [];
434: $namespaces = array_merge(['' => ''], $obj->getNamespaces(true));
435: static::_toArray($obj, $result, '', array_keys($namespaces));
436:
437: return $result;
438: }
439:
440: /**
441: * Recursive method to toArray
442: *
443: * @param \SimpleXMLElement $xml SimpleXMLElement object
444: * @param array $parentData Parent array with data
445: * @param string $ns Namespace of current child
446: * @param array $namespaces List of namespaces in XML
447: * @return void
448: */
449: protected static function _toArray($xml, &$parentData, $ns, $namespaces)
450: {
451: $data = [];
452:
453: foreach ($namespaces as $namespace) {
454: foreach ($xml->attributes($namespace, true) as $key => $value) {
455: if (!empty($namespace)) {
456: $key = $namespace . ':' . $key;
457: }
458: $data['@' . $key] = (string)$value;
459: }
460:
461: foreach ($xml->children($namespace, true) as $child) {
462: static::_toArray($child, $data, $namespace, $namespaces);
463: }
464: }
465:
466: $asString = trim((string)$xml);
467: if (empty($data)) {
468: $data = $asString;
469: } elseif (strlen($asString) > 0) {
470: $data['@'] = $asString;
471: }
472:
473: if (!empty($ns)) {
474: $ns .= ':';
475: }
476: $name = $ns . $xml->getName();
477: if (isset($parentData[$name])) {
478: if (!is_array($parentData[$name]) || !isset($parentData[$name][0])) {
479: $parentData[$name] = [$parentData[$name]];
480: }
481: $parentData[$name][] = $data;
482: } else {
483: $parentData[$name] = $data;
484: }
485: }
486: }
487: