lxml.tests.selftest
1
2
3
4
5
6
7
8
9
10
11
12 import re , sys
13
15 if sys . version_info [ 0 ] < 3 :
16 return sys . stdout
17 class bytes_stdout ( object ) :
18 def write ( self , data ) :
19 if isinstance ( data , bytes ) :
20 data = data . decode ( 'ISO8859-1' )
21 sys . stdout . write ( data )
22 return bytes_stdout ( )
23
24 try :
25 from StringIO import StringIO as BytesIO
26 except ImportError :
27 from io import BytesIO
28
29 from lxml import etree as ElementTree
30 from lxml import _elementpath as ElementPath
31 from lxml import ElementInclude
32 ET = ElementTree
33
34
35
36
37
38
39
41 xml_data = re . sub ( r'\s*xmlns:[a-z0-9]+="http://www.w3.org/2001/XInclude"' , '' , xml_data )
42 xml_data = xml_data . replace ( ' />' , '/>' )
43 if xml_data [ - 1 : ] == '\n' :
44 xml_data = xml_data [ : - 1 ]
45 return xml_data
46
62
65
68
73
74 SAMPLE_XML = ElementTree . XML ( """
75 <body>
76 <tag class='a'>text</tag>
77 <tag class='b' />
78 <section>
79 <tag class='b' id='inner'>subtext</tag>
80 </section>
81 </body>
82 """ )
83
84
85
86
88 len ( string )
89 for char in string :
90 if len ( char ) != 1 :
91 print ( "expected one-character string, got %r" % char )
92 new_string = string + ""
93 new_string = string + " "
94 string [ : 0 ]
95
100
102 len ( mapping )
103 keys = mapping . keys ( )
104 items = mapping . items ( )
105 for key in keys :
106 item = mapping [ key ]
107 mapping [ "key" ] = "value"
108 if mapping [ "key" ] != "value" :
109 print ( "expected value string, got %r" % mapping [ "key" ] )
110
126
129
130
131
132
134 """
135 >>> from elementtree.ElementTree import *
136 >>> from elementtree.ElementInclude import *
137 >>> from elementtree.ElementPath import *
138 >>> from elementtree.HTMLTreeBuilder import *
139 >>> from elementtree.SimpleXMLWriter import *
140 >>> from elementtree.TidyTools import *
141 """
142
143
144 del sanity
145
147 """
148 >>> ElementTree.VERSION
149 '1.3a2'
150 """
151
152
153 del version
154
156 """
157 Test element tree interface.
158
159 >>> element = ElementTree.Element("tag")
160 >>> check_element(element)
161 >>> tree = ElementTree.ElementTree(element)
162 >>> check_element_tree(tree)
163 """
164
166 """
167 >>> elem = ElementTree.XML("<body><tag/></body>")
168 >>> serialize(elem)
169 '<body><tag/></body>'
170 >>> e = ElementTree.Element("tag2")
171 >>> elem.append(e)
172 >>> serialize(elem)
173 '<body><tag/><tag2/></body>'
174 >>> elem.remove(e)
175 >>> serialize(elem)
176 '<body><tag/></body>'
177 >>> elem.insert(0, e)
178 >>> serialize(elem)
179 '<body><tag2/><tag/></body>'
180 >>> elem.remove(e)
181 >>> elem.extend([e])
182 >>> serialize(elem)
183 '<body><tag/><tag2/></body>'
184 >>> elem.remove(e)
185 """
186
188 """
189 Test find methods using the elementpath fallback.
190
191 >>> CurrentElementPath = ElementTree.ElementPath
192 >>> ElementTree.ElementPath = ElementTree._SimpleElementPath()
193 >>> elem = SAMPLE_XML
194 >>> elem.find("tag").tag
195 'tag'
196 >>> ElementTree.ElementTree(elem).find("tag").tag
197 'tag'
198 >>> elem.findtext("tag")
199 'text'
200 >>> elem.findtext("tog")
201 >>> elem.findtext("tog", "default")
202 'default'
203 >>> ElementTree.ElementTree(elem).findtext("tag")
204 'text'
205 >>> summarize_list(elem.findall("tag"))
206 ['tag', 'tag']
207 >>> summarize_list(elem.findall(".//tag"))
208 ['tag', 'tag', 'tag']
209
210 Path syntax doesn't work in this case.
211
212 >>> elem.find("section/tag")
213 >>> elem.findtext("section/tag")
214 >>> elem.findall("section/tag")
215 []
216
217 >>> ElementTree.ElementPath = CurrentElementPath
218 """
219
220
221 del simplefind
222
224 """
225 Test find methods (including xpath syntax).
226
227 >>> elem = SAMPLE_XML
228 >>> elem.find("tag").tag
229 'tag'
230 >>> ElementTree.ElementTree(elem).find("tag").tag
231 'tag'
232 >>> elem.find("section/tag").tag
233 'tag'
234 >>> ElementTree.ElementTree(elem).find("section/tag").tag
235 'tag'
236 >>> elem.findtext("tag")
237 'text'
238 >>> elem.findtext("tog")
239 >>> elem.findtext("tog", "default")
240 'default'
241 >>> ElementTree.ElementTree(elem).findtext("tag")
242 'text'
243 >>> elem.findtext("section/tag")
244 'subtext'
245 >>> ElementTree.ElementTree(elem).findtext("section/tag")
246 'subtext'
247 >>> summarize_list(elem.findall("tag"))
248 ['tag', 'tag']
249 >>> summarize_list(elem.findall("*"))
250 ['tag', 'tag', 'section']
251 >>> summarize_list(elem.findall(".//tag"))
252 ['tag', 'tag', 'tag']
253 >>> summarize_list(elem.findall("section/tag"))
254 ['tag']
255 >>> summarize_list(elem.findall("section//tag"))
256 ['tag']
257 >>> summarize_list(elem.findall("section/*"))
258 ['tag']
259 >>> summarize_list(elem.findall("section//*"))
260 ['tag']
261 >>> summarize_list(elem.findall("section/.//*"))
262 ['tag']
263 >>> summarize_list(elem.findall("*/*"))
264 ['tag']
265 >>> summarize_list(elem.findall("*//*"))
266 ['tag']
267 >>> summarize_list(elem.findall("*/tag"))
268 ['tag']
269 >>> summarize_list(elem.findall("*/./tag"))
270 ['tag']
271 >>> summarize_list(elem.findall("./tag"))
272 ['tag', 'tag']
273 >>> summarize_list(elem.findall(".//tag"))
274 ['tag', 'tag', 'tag']
275 >>> summarize_list(elem.findall("././tag"))
276 ['tag', 'tag']
277 >>> summarize_list(elem.findall(".//tag[@class]"))
278 ['tag', 'tag', 'tag']
279 >>> summarize_list(elem.findall(".//tag[@class='a']"))
280 ['tag']
281 >>> summarize_list(elem.findall(".//tag[@class='b']"))
282 ['tag', 'tag']
283 >>> summarize_list(elem.findall(".//tag[@id]"))
284 ['tag']
285 >>> summarize_list(elem.findall(".//section[tag]"))
286 ['section']
287 >>> summarize_list(elem.findall(".//section[element]"))
288 []
289 >>> summarize_list(elem.findall("../tag"))
290 []
291 >>> summarize_list(elem.findall("section/../tag"))
292 ['tag', 'tag']
293 >>> summarize_list(ElementTree.ElementTree(elem).findall("./tag"))
294 ['tag', 'tag']
295
296 FIXME: ET's Path module handles this case incorrectly; this gives
297 a warning in 1.3, and the behaviour will be modified in 1.4.
298
299 >>> summarize_list(ElementTree.ElementTree(elem).findall("/tag"))
300 ['tag', 'tag']
301 """
302
304 """
305 Check bad or unsupported path expressions.
306
307 >>> elem = SAMPLE_XML
308 >>> elem.findall("/tag")
309 Traceback (most recent call last):
310 SyntaxError: cannot use absolute path on element
311
312 # this is supported in ET 1.3:
313 #>>> elem.findall("section//")
314 #Traceback (most recent call last):
315 #SyntaxError: invalid path
316 """
317
319 """
320 Test parsing from file.
321
322 >>> tree = ElementTree.parse("samples/simple.xml")
323 >>> normalize_crlf(tree)
324 >>> tree.write(stdout())
325 <root>
326 <element key="value">text</element>
327 <element>text</element>tail
328 <empty-element/>
329 </root>
330 >>> tree = ElementTree.parse("samples/simple-ns.xml")
331 >>> normalize_crlf(tree)
332 >>> tree.write(stdout())
333 <root xmlns="http://namespace/">
334 <element key="value">text</element>
335 <element>text</element>tail
336 <empty-element/>
337 </root>
338
339 ## <ns0:root xmlns:ns0="http://namespace/">
340 ## <ns0:element key="value">text</ns0:element>
341 ## <ns0:element>text</ns0:element>tail
342 ## <ns0:empty-element/>
343 ## </ns0:root>
344 """
345
347 """
348 Test HTML parsing.
349
350 >>> # p = HTMLTreeBuilder.TreeBuilder()
351 >>> p = ElementTree.HTMLParser()
352 >>> p.feed("<p><p>spam<b>egg</b></p>")
353 >>> serialize(p.close())
354 '<p>spam<b>egg</b></p>'
355 """
356
357
358 del parsehtml
359
361 r"""
362 >>> element = ElementTree.XML("<html><body>text</body></html>")
363 >>> ElementTree.ElementTree(element).write(stdout())
364 <html><body>text</body></html>
365 >>> element = ElementTree.fromstring("<html><body>text</body></html>")
366 >>> ElementTree.ElementTree(element).write(stdout())
367 <html><body>text</body></html>
368
369 ## >>> sequence = ["<html><body>", "text</bo", "dy></html>"]
370 ## >>> element = ElementTree.fromstringlist(sequence)
371 ## >>> ElementTree.ElementTree(element).write(stdout())
372 ## <html><body>text</body></html>
373
374 >>> print(repr(ElementTree.tostring(element)).lstrip('b'))
375 '<html><body>text</body></html>'
376
377 # looks different in lxml
378 # >>> print(ElementTree.tostring(element, "ascii"))
379 # <?xml version='1.0' encoding='ascii'?>
380 # <html><body>text</body></html>
381
382 >>> _, ids = ElementTree.XMLID("<html><body>text</body></html>")
383 >>> len(ids)
384 0
385 >>> _, ids = ElementTree.XMLID("<html><body id='body'>text</body></html>")
386 >>> len(ids)
387 1
388 >>> ids["body"].tag
389 'body'
390 """
391
393 """
394 Test the xmllib-based parser.
395
396 >>> from elementtree import SimpleXMLTreeBuilder
397 >>> parser = SimpleXMLTreeBuilder.TreeBuilder()
398 >>> tree = ElementTree.parse("samples/simple.xml", parser)
399 >>> normalize_crlf(tree)
400 >>> tree.write(sys.stdout)
401 <root>
402 <element key="value">text</element>
403 <element>text</element>tail
404 <empty-element />
405 </root>
406 """
407
408
409 del simpleparsefile
410
412 """
413 Test iterparse interface.
414
415 >>> iterparse = ElementTree.iterparse
416
417 >>> context = iterparse("samples/simple.xml")
418 >>> for action, elem in context:
419 ... print("%s %s" % (action, elem.tag))
420 end element
421 end element
422 end empty-element
423 end root
424 >>> context.root.tag
425 'root'
426
427 >>> context = iterparse("samples/simple-ns.xml")
428 >>> for action, elem in context:
429 ... print("%s %s" % (action, elem.tag))
430 end {http://namespace/}element
431 end {http://namespace/}element
432 end {http://namespace/}empty-element
433 end {http://namespace/}root
434
435 >>> events = ()
436 >>> context = iterparse("samples/simple.xml", events)
437 >>> for action, elem in context:
438 ... print("%s %s" % (action, elem.tag))
439
440 >>> events = ()
441 >>> context = iterparse("samples/simple.xml", events=events)
442 >>> for action, elem in context:
443 ... print("%s %s" % (action, elem.tag))
444
445 >>> events = ("start", "end")
446 >>> context = iterparse("samples/simple.xml", events)
447 >>> for action, elem in context:
448 ... print("%s %s" % (action, elem.tag))
449 start root
450 start element
451 end element
452 start element
453 end element
454 start empty-element
455 end empty-element
456 end root
457
458 >>> events = ("start", "end", "start-ns", "end-ns")
459 >>> context = iterparse("samples/simple-ns.xml", events)
460 >>> for action, elem in context:
461 ... if action in ("start", "end"):
462 ... print("%s %s" % (action, elem.tag))
463 ... else:
464 ... print("%s %s" % (action, elem))
465 start-ns ('', 'http://namespace/')
466 start {http://namespace/}root
467 start {http://namespace/}element
468 end {http://namespace/}element
469 start {http://namespace/}element
470 end {http://namespace/}element
471 start {http://namespace/}empty-element
472 end {http://namespace/}empty-element
473 end {http://namespace/}root
474 end-ns None
475
476 """
477
479 """
480 Test the "fancy" parser.
481
482 Sanity check.
483 >>> from elementtree import XMLTreeBuilder
484 >>> parser = XMLTreeBuilder.FancyTreeBuilder()
485 >>> tree = ElementTree.parse("samples/simple.xml", parser)
486 >>> normalize_crlf(tree)
487 >>> tree.write(sys.stdout)
488 <root>
489 <element key="value">text</element>
490 <element>text</element>tail
491 <empty-element />
492 </root>
493
494 Callback check.
495 >>> class MyFancyParser(XMLTreeBuilder.FancyTreeBuilder):
496 ... def start(self, elem):
497 ... print("START %s" % elem.tag)
498 ... def end(self, elem):
499 ... print("END %s" % elem.tag)
500 >>> parser = MyFancyParser()
501 >>> tree = ElementTree.parse("samples/simple.xml", parser)
502 START root
503 START element
504 END element
505 START element
506 END element
507 START empty-element
508 END empty-element
509 END root
510 """
511
512
513 del fancyparsefile
514
516 """
517 >>> elem = ElementTree.Element("tag")
518 >>> elem.text = "text"
519 >>> serialize(elem)
520 '<tag>text</tag>'
521 >>> ElementTree.SubElement(elem, "subtag").text = "subtext"
522 >>> serialize(elem)
523 '<tag>text<subtag>subtext</subtag></tag>'
524
525 ## Test tag suppression
526 ## >>> elem.tag = None
527 ## >>> serialize(elem)
528 ## 'text<subtag>subtext</subtag>'
529 """
530
532 """
533 >>> elem = ElementTree.XML("<html><body>text</body></html>")
534 >>> print(repr(ElementTree.tostring(elem)).lstrip('b'))
535 '<html><body>text</body></html>'
536 >>> elem = ElementTree.fromstring("<html><body>text</body></html>")
537 >>> print(repr(ElementTree.tostring(elem)).lstrip('b'))
538 '<html><body>text</body></html>'
539 """
540
542 r"""
543 Test encoding issues.
544
545 >>> elem = ElementTree.Element("tag")
546 >>> elem.text = u'abc'
547 >>> serialize(elem)
548 '<tag>abc</tag>'
549 >>> serialize(elem, encoding="utf-8")
550 '<tag>abc</tag>'
551 >>> serialize(elem, encoding="us-ascii")
552 '<tag>abc</tag>'
553 >>> serialize(elem, encoding="iso-8859-1").lower()
554 "<?xml version='1.0' encoding='iso-8859-1'?>\n<tag>abc</tag>"
555
556 >>> elem.text = "<&\"\'>"
557 >>> serialize(elem)
558 '<tag><&"\'></tag>'
559 >>> serialize(elem, encoding="utf-8")
560 '<tag><&"\'></tag>'
561 >>> serialize(elem, encoding="us-ascii") # cdata characters
562 '<tag><&"\'></tag>'
563 >>> serialize(elem, encoding="iso-8859-1").lower()
564 '<?xml version=\'1.0\' encoding=\'iso-8859-1\'?>\n<tag><&"\'></tag>'
565
566 >>> elem.attrib["key"] = "<&\"\'>"
567 >>> elem.text = None
568 >>> serialize(elem)
569 '<tag key="<&"\'>"/>'
570 >>> serialize(elem, encoding="utf-8")
571 '<tag key="<&"\'>"/>'
572 >>> serialize(elem, encoding="us-ascii")
573 '<tag key="<&"\'>"/>'
574 >>> serialize(elem, encoding="iso-8859-1").lower()
575 '<?xml version=\'1.0\' encoding=\'iso-8859-1\'?>\n<tag key="<&"\'>"/>'
576
577 >>> elem.text = u'\xe5\xf6\xf6<>'
578 >>> elem.attrib.clear()
579 >>> serialize(elem)
580 '<tag>åöö<></tag>'
581 >>> serialize(elem, encoding="utf-8")
582 '<tag>\xc3\xa5\xc3\xb6\xc3\xb6<></tag>'
583 >>> serialize(elem, encoding="us-ascii")
584 '<tag>åöö<></tag>'
585 >>> serialize(elem, encoding="iso-8859-1").lower()
586 "<?xml version='1.0' encoding='iso-8859-1'?>\n<tag>\xe5\xf6\xf6<></tag>"
587
588 >>> elem.attrib["key"] = u'\xe5\xf6\xf6<>'
589 >>> elem.text = None
590 >>> serialize(elem)
591 '<tag key="åöö<>"/>'
592 >>> serialize(elem, encoding="utf-8")
593 '<tag key="\xc3\xa5\xc3\xb6\xc3\xb6<>"/>'
594 >>> serialize(elem, encoding="us-ascii")
595 '<tag key="åöö<>"/>'
596 >>> serialize(elem, encoding="iso-8859-1").lower()
597 '<?xml version=\'1.0\' encoding=\'iso-8859-1\'?>\n<tag key="\xe5\xf6\xf6<>"/>'
598 """
599
600 if sys . version_info [ 0 ] >= 3 :
601 encoding . __doc__ = encoding . __doc__ . replace ( "u'" , "'" )
602
604 r"""
605 Test serialization methods.
606
607 >>> e = ET.XML("<html><link/><script>1 < 2</script></html>")
608 >>> e.tail = "\n"
609 >>> serialize(e)
610 '<html><link /><script>1 < 2</script></html>\n'
611 >>> serialize(e, method=None)
612 '<html><link /><script>1 < 2</script></html>\n'
613 >>> serialize(e, method="xml")
614 '<html><link /><script>1 < 2</script></html>\n'
615 >>> serialize(e, method="html")
616 '<html><link><script>1 < 2</script></html>\n'
617 >>> serialize(e, method="text")
618 '1 < 2\n'
619
620 """
621
622
623 del methods
624
626 """
627 Test iterators.
628
629 >>> e = ET.XML("<html><body>this is a <i>paragraph</i>.</body>..</html>")
630 >>> summarize_list(e.iter())
631 ['html', 'body', 'i']
632 >>> summarize_list(e.find("body").iter())
633 ['body', 'i']
634 >>> "".join(e.itertext())
635 'this is a paragraph...'
636 >>> "".join(e.find("body").itertext())
637 'this is a paragraph.'
638 """
639
640 ENTITY_XML = """\
641 <!DOCTYPE points [
642 <!ENTITY % user-entities SYSTEM 'user-entities.xml'>
643 %user-entities;
644 ]>
645 <document>&entity;</document>
646 """
647
649 """
650 Test entity handling.
651
652 1) bad entities
653
654 >>> ElementTree.XML("<document>&entity;</document>")
655 Traceback (most recent call last):
656 ExpatError: undefined entity: line 1, column 10
657
658 >>> ElementTree.XML(ENTITY_XML)
659 Traceback (most recent call last):
660 ExpatError: undefined entity &entity;: line 5, column 10
661
662 (add more tests here)
663
664 """
665
666
667 del entity
668
670 """
671 Test error handling.
672
673 >>> error("foo").position
674 (1, 0)
675 >>> error("<tag>&foo;</tag>").position
676 (1, 5)
677 >>> error("foobar<").position
678 (1, 6)
679
680 """
681 try :
682 ET . XML ( xml )
683 except ET . ParseError :
684 return sys . exc_value
685
686
687 del error
688
690 """
691 Test namespace issues.
692
693 1) xml namespace
694
695 >>> elem = ElementTree.XML("<tag xml:lang='en' />")
696 >>> serialize(elem) # 1.1
697 '<tag xml:lang="en"/>'
698
699 2) other "well-known" namespaces
700
701 >>> elem = ElementTree.XML("<rdf:RDF xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#' />")
702 >>> serialize(elem) # 2.1
703 '<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"/>'
704
705 >>> elem = ElementTree.XML("<html:html xmlns:html='http://www.w3.org/1999/xhtml' />")
706 >>> serialize(elem) # 2.2
707 '<html:html xmlns:html="http://www.w3.org/1999/xhtml"/>'
708
709 >>> elem = ElementTree.XML("<soap:Envelope xmlns:soap='http://schemas.xmlsoap.org/soap/envelope' />")
710 >>> serialize(elem) # 2.3
711 '<soap:Envelope xmlns:soap="http://schemas.xmlsoap.org/soap/envelope"/>'
712
713 3) unknown namespaces
714
715 """
716
718 """
719 Test QName handling.
720
721 1) decorated tags
722
723 >>> elem = ElementTree.Element("{uri}tag")
724 >>> serialize(elem) # 1.1
725 '<ns0:tag xmlns:ns0="uri"/>'
726 >>> elem = ElementTree.Element(ElementTree.QName("{uri}tag"))
727 >>> serialize(elem) # 1.2
728 '<ns0:tag xmlns:ns0="uri"/>'
729 >>> elem = ElementTree.Element(ElementTree.QName("uri", "tag"))
730 >>> serialize(elem) # 1.3
731 '<ns0:tag xmlns:ns0="uri"/>'
732
733 # ns/attribute order ...
734
735 ## 2) decorated attributes
736
737 ## >>> elem.clear()
738 ## >>> elem.attrib["{uri}key"] = "value"
739 ## >>> serialize(elem) # 2.1
740 ## '<ns0:tag ns0:key="value" xmlns:ns0="uri"/>'
741
742 ## >>> elem.clear()
743 ## >>> elem.attrib[ElementTree.QName("{uri}key")] = "value"
744 ## >>> serialize(elem) # 2.2
745 ## '<ns0:tag ns0:key="value" xmlns:ns0="uri"/>'
746
747 ## 3) decorated values are not converted by default, but the
748 ## QName wrapper can be used for values
749
750 ## >>> elem.clear()
751 ## >>> elem.attrib["{uri}key"] = "{uri}value"
752 ## >>> serialize(elem) # 3.1
753 ## '<ns0:tag ns0:key="{uri}value" xmlns:ns0="uri"/>'
754
755 ## >>> elem.clear()
756 ## >>> elem.attrib["{uri}key"] = ElementTree.QName("{uri}value")
757 ## >>> serialize(elem) # 3.2
758 ## '<ns0:tag ns0:key="ns0:value" xmlns:ns0="uri"/>'
759
760 ## >>> elem.clear()
761 ## >>> subelem = ElementTree.Element("tag")
762 ## >>> subelem.attrib["{uri1}key"] = ElementTree.QName("{uri2}value")
763 ## >>> elem.append(subelem)
764 ## >>> elem.append(subelem)
765 ## >>> serialize(elem) # 3.3
766 ## '<ns0:tag xmlns:ns0="uri"><tag ns1:key="ns2:value" xmlns:ns1="uri1" xmlns:ns2="uri2"/><tag ns1:key="ns2:value" xmlns:ns1="uri1" xmlns:ns2="uri2"/></ns0:tag>'
767
768 """
769
771 """
772 Test the XPath tokenizer.
773
774 >>> # tests from the xml specification
775 >>> xpath_tokenizer("*")
776 ['*']
777 >>> xpath_tokenizer("text()")
778 ['text', '()']
779 >>> xpath_tokenizer("@name")
780 ['@', 'name']
781 >>> xpath_tokenizer("@*")
782 ['@', '*']
783 >>> xpath_tokenizer("para[1]")
784 ['para', '[', '1', ']']
785 >>> xpath_tokenizer("para[last()]")
786 ['para', '[', 'last', '()', ']']
787 >>> xpath_tokenizer("*/para")
788 ['*', '/', 'para']
789 >>> xpath_tokenizer("/doc/chapter[5]/section[2]")
790 ['/', 'doc', '/', 'chapter', '[', '5', ']', '/', 'section', '[', '2', ']']
791 >>> xpath_tokenizer("chapter//para")
792 ['chapter', '//', 'para']
793 >>> xpath_tokenizer("//para")
794 ['//', 'para']
795 >>> xpath_tokenizer("//olist/item")
796 ['//', 'olist', '/', 'item']
797 >>> xpath_tokenizer(".")
798 ['.']
799 >>> xpath_tokenizer(".//para")
800 ['.', '//', 'para']
801 >>> xpath_tokenizer("..")
802 ['..']
803 >>> xpath_tokenizer("../@lang")
804 ['..', '/', '@', 'lang']
805 >>> xpath_tokenizer("chapter[title]")
806 ['chapter', '[', 'title', ']']
807 >>> xpath_tokenizer("employee[@secretary and @assistant]")
808 ['employee', '[', '@', 'secretary', '', 'and', '', '@', 'assistant', ']']
809
810 >>> # additional tests
811 >>> xpath_tokenizer("{http://spam}egg")
812 ['{http://spam}egg']
813 >>> xpath_tokenizer("./spam.egg")
814 ['.', '/', 'spam.egg']
815 >>> xpath_tokenizer(".//{http://spam}egg")
816 ['.', '//', '{http://spam}egg']
817 """
818 out = [ ]
819 for op , tag in ElementPath . xpath_tokenizer ( p ) :
820 out . append ( op or tag )
821 return out
822
823
824
825
826 XINCLUDE = { }
827
828 XINCLUDE [ "C1.xml" ] = """\
829 <?xml version='1.0'?>
830 <document xmlns:xi="http://www.w3.org/2001/XInclude">
831 <p>120 Mz is adequate for an average home user.</p>
832 <xi:include href="disclaimer.xml"/>
833 </document>
834 """
835
836 XINCLUDE [ "disclaimer.xml" ] = """\
837 <?xml version='1.0'?>
838 <disclaimer>
839 <p>The opinions represented herein represent those of the individual
840 and should not be interpreted as official policy endorsed by this
841 organization.</p>
842 </disclaimer>
843 """
844
845 XINCLUDE [ "C2.xml" ] = """\
846 <?xml version='1.0'?>
847 <document xmlns:xi="http://www.w3.org/2001/XInclude">
848 <p>This document has been accessed
849 <xi:include href="count.txt" parse="text"/> times.</p>
850 </document>
851 """
852
853 XINCLUDE [ "count.txt" ] = "324387"
854
855 XINCLUDE [ "C3.xml" ] = """\
856 <?xml version='1.0'?>
857 <document xmlns:xi="http://www.w3.org/2001/XInclude">
858 <p>The following is the source of the "data.xml" resource:</p>
859 <example><xi:include href="data.xml" parse="text"/></example>
860 </document>
861 """
862
863 XINCLUDE [ "data.xml" ] = """\
864 <?xml version='1.0'?>
865 <data>
866 <item><![CDATA[Brooks & Shields]]></item>
867 </data>
868 """
869
870 XINCLUDE [ "C5.xml" ] = """\
871 <?xml version='1.0'?>
872 <div xmlns:xi="http://www.w3.org/2001/XInclude">
873 <xi:include href="example.txt" parse="text">
874 <xi:fallback>
875 <xi:include href="fallback-example.txt" parse="text">
876 <xi:fallback><a href="mailto:bob@example.org">Report error</a></xi:fallback>
877 </xi:include>
878 </xi:fallback>
879 </xi:include>
880 </div>
881 """
882
883 XINCLUDE [ "default.xml" ] = """\
884 <?xml version='1.0'?>
885 <document xmlns:xi="http://www.w3.org/2001/XInclude">
886 <p>Example.</p>
887 <xi:include href="samples/simple.xml"/>
888 </document>
889 """
890
899
901 r"""
902 Basic inclusion example (XInclude C.1)
903
904 >>> document = xinclude_loader("C1.xml")
905 >>> ElementInclude.include(document, xinclude_loader)
906 >>> print(serialize(document)) # C1
907 <document>
908 <p>120 Mz is adequate for an average home user.</p>
909 <disclaimer>
910 <p>The opinions represented herein represent those of the individual
911 and should not be interpreted as official policy endorsed by this
912 organization.</p>
913 </disclaimer>
914 </document>
915
916 Textual inclusion example (XInclude C.2)
917
918 >>> document = xinclude_loader("C2.xml")
919 >>> ElementInclude.include(document, xinclude_loader)
920 >>> print(serialize(document)) # C2
921 <document>
922 <p>This document has been accessed
923 324387 times.</p>
924 </document>
925
926 Textual inclusion of XML example (XInclude C.3)
927
928 >>> document = xinclude_loader("C3.xml")
929 >>> ElementInclude.include(document, xinclude_loader)
930 >>> print(serialize(document)) # C3
931 <document>
932 <p>The following is the source of the "data.xml" resource:</p>
933 <example><?xml version='1.0'?>
934 <data>
935 <item><![CDATA[Brooks & Shields]]></item>
936 </data>
937 </example>
938 </document>
939
940 ## Fallback example (XInclude C.5)
941 ## Note! Fallback support is not yet implemented
942
943 ## >>> document = xinclude_loader("C5.xml")
944 ## >>> ElementInclude.include(document, xinclude_loader)
945 ## Traceback (most recent call last):
946 ## IOError: resource not found
947 ## >>> # print(serialize(document)) # C5
948
949 """
950
952 """
953 >>> document = xinclude_loader("default.xml")
954 >>> ElementInclude.include(document)
955 >>> print(serialize(document)) # default
956 <document>
957 <p>Example.</p>
958 <root>
959 <element key="value">text</element>
960 <element>text</element>tail
961 <empty-element/>
962 </root>
963 </document>
964 """
965
966
967
968
970 r"""
971 >>> file = BytesIO()
972 >>> w = SimpleXMLWriter.XMLWriter(file)
973 >>> html = w.start("html")
974 >>> x = w.start("head")
975 >>> w.element("title", "my document")
976 >>> w.data("\n")
977 >>> w.element("meta", name="hello", value="goodbye")
978 >>> w.data("\n")
979 >>> w.end()
980 >>> x = w.start("body")
981 >>> w.element("h1", "this is a heading")
982 >>> w.data("\n")
983 >>> w.element("p", u"this is a paragraph")
984 >>> w.data("\n")
985 >>> w.element("p", u"reserved characters: <&>")
986 >>> w.data("\n")
987 >>> w.element("p", u"detta är också ett stycke")
988 >>> w.data("\n")
989 >>> w.close(html)
990 >>> print(file.getvalue())
991 <html><head><title>my document</title>
992 <meta name="hello" value="goodbye" />
993 </head><body><h1>this is a heading</h1>
994 <p>this is a paragraph</p>
995 <p>reserved characters: <&></p>
996 <p>detta är också ett stycke</p>
997 </body></html>
998 """
999
1000
1001 del xmlwriter
1002
1003
1004
1005
1036
1037
1038 del bug_xmltoolkit21
1039
1050
1061
1071
1072
1073 del bug_xmltoolkitX1
1074
1103
1104
1105 del bug_xmltoolkit39
1106
1154
1155
1156 del bug_xmltoolkit45
1157
1168
1169
1170 del bug_xmltoolkit46
1171
1180
1181
1182 del bug_xmltoolkit54
1183
1192
1193
1194 del bug_xmltoolkit55
1195
1197 """
1198 >>> parser = ET.XMLParser()
1199 >>> parser.version
1200 'Expat 2.0.0'
1201 >>> parser.feed(open("samples/simple.xml").read())
1202 >>> print(serialize(parser.close()))
1203 <root>
1204 <element key="value">text</element>
1205 <element>text</element>tail
1206 <empty-element />
1207 </root>
1208 """
1209
1210
1211 del bug_200708_version
1212
1214 r"""
1215
1216 Preserve newlines in attributes.
1217
1218 >>> e = ET.Element('SomeTag', text="def _f():\n return 3\n")
1219 >>> ET.tostring(e)
1220 '<SomeTag text="def _f(): return 3 " />'
1221 >>> ET.XML(ET.tostring(e)).get("text")
1222 'def _f():\n return 3\n'
1223 >>> ET.tostring(ET.XML(ET.tostring(e)))
1224 '<SomeTag text="def _f(): return 3 " />'
1225 """
1226
1227
1228 del bug_200708_newline
1229
1231 """
1232
1233 >>> e = ET.Element("{default}elem")
1234 >>> s = ET.SubElement(e, "{default}elem")
1235 >>> serialize(e, default_namespace="default") # 1
1236 '<elem xmlns="default"><elem /></elem>'
1237
1238 >>> e = ET.Element("{default}elem")
1239 >>> s = ET.SubElement(e, "{default}elem")
1240 >>> s = ET.SubElement(e, "{not-default}elem")
1241 >>> serialize(e, default_namespace="default") # 2
1242 '<elem xmlns="default" xmlns:ns1="not-default"><elem /><ns1:elem /></elem>'
1243
1244 >>> e = ET.Element("{default}elem")
1245 >>> s = ET.SubElement(e, "{default}elem")
1246 >>> s = ET.SubElement(e, "elem") # unprefixed name
1247 >>> serialize(e, default_namespace="default") # 3
1248 Traceback (most recent call last):
1249 ValueError: cannot use non-qualified names with default_namespace option
1250
1251 """
1252
1253
1254 del bug_200709_default_namespace
1255
1256
1257
1258 if __name__ == "__main__" :
1259 import doctest , selftest
1260 failed , tested = doctest . testmod ( selftest )
1261 print ( "%d tests ok." % ( tested - failed ) )
1262 if failed > 0 :
1263 print ( "%d tests failed. Exiting with non-zero return code." % failed )
1264 sys . exit ( 1 )
1265