lxml.tests.test_etree
1
2
3 """
4 Tests specific to the extended etree API
5
6 Tests that apply to the general ElementTree API should go into
7 test_elementtree
8 """
9
10 from __future__ import absolute_import
11
12 import os . path
13 import unittest
14 import copy
15 import sys
16 import re
17 import gc
18 import operator
19 import tempfile
20 import textwrap
21 import zlib
22 import gzip
23 from contextlib import closing , contextmanager
24
25 from . common_imports import etree , StringIO , BytesIO , HelperTestCase
26 from . common_imports import fileInTestDir , fileUrlInTestDir , read_file , path2url
27 from . common_imports import SillyFileLike , LargeFileLikeUnicode , doctest , make_doctest
28 from . common_imports import canonicalize , _str , _bytes
29
30 print ( "" )
31 print ( "TESTED VERSION: %s" % etree . __version__ )
32 print ( " Python: " + repr ( sys . version_info ) )
33 print ( " lxml.etree: " + repr ( etree . LXML_VERSION ) )
34 print ( " libxml used: " + repr ( etree . LIBXML_VERSION ) )
35 print ( " libxml compiled: " + repr ( etree . LIBXML_COMPILED_VERSION ) )
36 print ( " libxslt used: " + repr ( etree . LIBXSLT_VERSION ) )
37 print ( " libxslt compiled: " + repr ( etree . LIBXSLT_COMPILED_VERSION ) )
38 print ( "" )
39
40 try :
41 _unicode = unicode
42 except NameError :
43
44 _unicode = str
45
46
47 @ contextmanager
48 - def tmpfile ( ) :
55
58 """Tests only for etree, not ElementTree"""
59 etree = etree
60
71
80
88
95
97 Element = self . etree . Element
98 el = Element ( 'name' )
99 self . assertRaises ( ValueError , Element , '{}' )
100 self . assertRaises ( ValueError , setattr , el , 'tag' , '{}' )
101
102 self . assertRaises ( ValueError , Element , '{test}' )
103 self . assertRaises ( ValueError , setattr , el , 'tag' , '{test}' )
104
106 Element = self . etree . Element
107 self . assertRaises ( ValueError , Element , 'p:name' )
108 self . assertRaises ( ValueError , Element , '{test}p:name' )
109
110 el = Element ( 'name' )
111 self . assertRaises ( ValueError , setattr , el , 'tag' , 'p:name' )
112
114 Element = self . etree . Element
115 self . assertRaises ( ValueError , Element , "p'name" )
116 self . assertRaises ( ValueError , Element , 'p"name' )
117
118 self . assertRaises ( ValueError , Element , "{test}p'name" )
119 self . assertRaises ( ValueError , Element , '{test}p"name' )
120
121 el = Element ( 'name' )
122 self . assertRaises ( ValueError , setattr , el , 'tag' , "p'name" )
123 self . assertRaises ( ValueError , setattr , el , 'tag' , 'p"name' )
124
126 Element = self . etree . Element
127 self . assertRaises ( ValueError , Element , ' name ' )
128 self . assertRaises ( ValueError , Element , 'na me' )
129 self . assertRaises ( ValueError , Element , '{test} name' )
130
131 el = Element ( 'name' )
132 self . assertRaises ( ValueError , setattr , el , 'tag' , ' name ' )
133
141
149
151 Element = self . etree . Element
152 SubElement = self . etree . SubElement
153
154 el = Element ( 'name' )
155 self . assertRaises ( ValueError , SubElement , el , "p'name" )
156 self . assertRaises ( ValueError , SubElement , el , "{test}p'name" )
157
158 self . assertRaises ( ValueError , SubElement , el , 'p"name' )
159 self . assertRaises ( ValueError , SubElement , el , '{test}p"name' )
160
169
178
180 QName = self . etree . QName
181 self . assertRaises ( ValueError , QName , '' )
182 self . assertRaises ( ValueError , QName , None )
183 self . assertRaises ( ValueError , QName , None , None )
184 self . assertRaises ( ValueError , QName , 'test' , '' )
185
192
194 QName = self . etree . QName
195 self . assertRaises ( ValueError , QName , 'p:name' )
196 self . assertRaises ( ValueError , QName , 'test' , 'p:name' )
197
199 QName = self . etree . QName
200 self . assertRaises ( ValueError , QName , ' name ' )
201 self . assertRaises ( ValueError , QName , 'na me' )
202 self . assertRaises ( ValueError , QName , 'test' , ' name' )
203
211
213
214 QName = self . etree . QName
215 qname1 = QName ( 'http://myns' , 'a' )
216 a = self . etree . Element ( qname1 , nsmap = { 'p' : 'http://myns' } )
217
218 qname2 = QName ( a )
219 self . assertEqual ( a . tag , qname1 . text )
220 self . assertEqual ( a . tag , qname1 )
221 self . assertEqual ( qname1 . text , qname2 . text )
222 self . assertEqual ( qname1 , qname2 . text )
223 self . assertEqual ( qname1 . text , qname2 )
224 self . assertEqual ( qname1 , qname2 )
225
227
228 etree = self . etree
229 qname = etree . QName ( 'http://myns' , 'a' )
230 a = etree . Element ( qname , nsmap = { 'p' : 'http://myns' } )
231 a . text = qname
232
233 self . assertEqual ( "p:a" , a . text )
234
243
258
264
274
286
288 Element = self . etree . Element
289
290 keys = [ "attr%d" % i for i in range ( 10 ) ]
291 values = [ "TEST-%d" % i for i in range ( 10 ) ]
292 items = list ( zip ( keys , values ) )
293
294 root = Element ( "root" )
295 for key , value in items :
296 root . set ( key , value )
297 self . assertEqual ( keys , root . attrib . keys ( ) )
298 self . assertEqual ( values , root . attrib . values ( ) )
299
300 root2 = Element ( "root2" , root . attrib ,
301 attr_99 = 'TOAST-1' , attr_98 = 'TOAST-2' )
302 self . assertEqual ( [ 'attr_98' , 'attr_99' ] + keys ,
303 root2 . attrib . keys ( ) )
304 self . assertEqual ( [ 'TOAST-2' , 'TOAST-1' ] + values ,
305 root2 . attrib . values ( ) )
306
307 self . assertEqual ( keys , root . attrib . keys ( ) )
308 self . assertEqual ( values , root . attrib . values ( ) )
309
311
312
313 Element = self . etree . Element
314 root = Element ( "root" )
315 self . assertRaises ( TypeError , root . set , "newattr" , 5 )
316 self . assertRaises ( TypeError , root . set , "newattr" , object )
317 self . assertRaises ( TypeError , root . set , "newattr" , None )
318 self . assertRaises ( TypeError , root . set , "newattr" )
319
333
355
357 XML = self . etree . XML
358 xml = _bytes ( '<test><a><b><c/></b></a><x><a><b/><c/></a></x></test>' )
359
360 root = XML ( xml )
361 self . etree . strip_elements ( root , 'a' )
362 self . assertEqual ( _bytes ( '<test><x></x></test>' ) ,
363 self . _writeElement ( root ) )
364
365 root = XML ( xml )
366 self . etree . strip_elements ( root , 'b' , 'c' , 'X' , 'Y' , 'Z' )
367 self . assertEqual ( _bytes ( '<test><a></a><x><a></a></x></test>' ) ,
368 self . _writeElement ( root ) )
369
370 root = XML ( xml )
371 self . etree . strip_elements ( root , 'c' )
372 self . assertEqual ( _bytes ( '<test><a><b></b></a><x><a><b></b></a></x></test>' ) ,
373 self . _writeElement ( root ) )
374
376 XML = self . etree . XML
377 xml = _bytes ( '<test>TEST<n:a xmlns:n="urn:a">A<b>B<c xmlns="urn:c"/>C</b>BT</n:a>AT<x>X<a>A<b xmlns="urn:a"/>BT<c xmlns="urn:x"/>CT</a>AT</x>XT</test>' )
378
379 root = XML ( xml )
380 self . etree . strip_elements ( root , 'a' )
381 self . assertEqual ( _bytes ( '<test>TEST<n:a xmlns:n="urn:a">A<b>B<c xmlns="urn:c"></c>C</b>BT</n:a>AT<x>X</x>XT</test>' ) ,
382 self . _writeElement ( root ) )
383
384 root = XML ( xml )
385 self . etree . strip_elements ( root , '{urn:a}b' , 'c' )
386 self . assertEqual ( _bytes ( '<test>TEST<n:a xmlns:n="urn:a">A<b>B<c xmlns="urn:c"></c>C</b>BT</n:a>AT<x>X<a>A<c xmlns="urn:x"></c>CT</a>AT</x>XT</test>' ) ,
387 self . _writeElement ( root ) )
388
389 root = XML ( xml )
390 self . etree . strip_elements ( root , '{urn:a}*' , 'c' )
391 self . assertEqual ( _bytes ( '<test>TEST<x>X<a>A<c xmlns="urn:x"></c>CT</a>AT</x>XT</test>' ) ,
392 self . _writeElement ( root ) )
393
394 root = XML ( xml )
395 self . etree . strip_elements ( root , '{urn:a}*' , 'c' , with_tail = False )
396 self . assertEqual ( _bytes ( '<test>TESTAT<x>X<a>ABT<c xmlns="urn:x"></c>CT</a>AT</x>XT</test>' ) ,
397 self . _writeElement ( root ) )
398
417
443
470
497
516
529
540
546
548 XML = self . etree . XML
549 root = XML ( _bytes ( "<test><?mypi my='1' test=\" abc \" quotes=\"' '\" only names ?></test>" ) )
550 self . assertEqual ( root [ 0 ] . target , "mypi" )
551 self . assertEqual ( root [ 0 ] . get ( 'my' ) , "1" )
552 self . assertEqual ( root [ 0 ] . get ( 'test' ) , " abc " )
553 self . assertEqual ( root [ 0 ] . get ( 'quotes' ) , "' '" )
554 self . assertEqual ( root [ 0 ] . get ( 'only' ) , None )
555 self . assertEqual ( root [ 0 ] . get ( 'names' ) , None )
556 self . assertEqual ( root [ 0 ] . get ( 'nope' ) , None )
557
559 XML = self . etree . XML
560 root = XML ( _bytes ( "<test><?mypi my='1' test=\" abc \" quotes=\"' '\" only names ?></test>" ) )
561 self . assertEqual ( root [ 0 ] . target , "mypi" )
562 self . assertEqual ( root [ 0 ] . attrib [ 'my' ] , "1" )
563 self . assertEqual ( root [ 0 ] . attrib [ 'test' ] , " abc " )
564 self . assertEqual ( root [ 0 ] . attrib [ 'quotes' ] , "' '" )
565 self . assertRaises ( KeyError , root [ 0 ] . attrib . __getitem__ , 'only' )
566 self . assertRaises ( KeyError , root [ 0 ] . attrib . __getitem__ , 'names' )
567 self . assertRaises ( KeyError , root [ 0 ] . attrib . __getitem__ , 'nope' )
568
570
571 ProcessingInstruction = self . etree . ProcessingInstruction
572
573 a = ProcessingInstruction ( "PI" , "ONE" )
574 b = copy . deepcopy ( a )
575 b . text = "ANOTHER"
576
577 self . assertEqual ( 'ONE' , a . text )
578 self . assertEqual ( 'ANOTHER' , b . text )
579
595
610
621
633
652
657
670
681
682 f = BytesIO ( '<a><!--A--><b><!-- B --><c/></b><!--C--></a>' )
683 events = list ( iterparse ( f , events = ( 'end' , 'comment' ) ) )
684 root = events [ - 1 ] [ 1 ]
685 self . assertEqual ( 6 , len ( events ) )
686 self . assertEqual ( [ 'A' , ' B ' , 'c' , 'b' , 'C' , 'a' ] ,
687 [ name ( * item ) for item in events ] )
688 self . assertEqual (
689 _bytes ( '<a><!--A--><b><!-- B --><c/></b><!--C--></a>' ) ,
690 tostring ( root ) )
691
703
704 f = BytesIO ( '<?pia a?><a><?pib b?><b><?pic c?><c/></b><?pid d?></a><?pie e?>' )
705 events = list ( iterparse ( f , events = ( 'end' , 'pi' ) ) )
706 root = events [ - 2 ] [ 1 ]
707 self . assertEqual ( 8 , len ( events ) )
708 self . assertEqual ( [ ( 'pia' , 'a' ) , ( 'pib' , 'b' ) , ( 'pic' , 'c' ) , 'c' , 'b' ,
709 ( 'pid' , 'd' ) , 'a' , ( 'pie' , 'e' ) ] ,
710 [ name ( * item ) for item in events ] )
711 self . assertEqual (
712 _bytes ( '<?pia a?><a><?pib b?><b><?pic c?><c/></b><?pid d?></a><?pie e?>' ) ,
713 tostring ( ElementTree ( root ) ) )
714
729
735
737 iterparse = self . etree . iterparse
738 f = BytesIO ( '<a><b><c/></a>' )
739 it = iterparse ( f , events = ( 'start' , 'end' ) , recover = True )
740 events = [ ( ev , el . tag ) for ev , el in it ]
741 root = it . root
742 self . assertTrue ( root is not None )
743
744 self . assertEqual ( 1 , events . count ( ( 'start' , 'a' ) ) )
745 self . assertEqual ( 1 , events . count ( ( 'end' , 'a' ) ) )
746
747 self . assertEqual ( 1 , events . count ( ( 'start' , 'b' ) ) )
748 self . assertEqual ( 1 , events . count ( ( 'end' , 'b' ) ) )
749
750 self . assertEqual ( 1 , events . count ( ( 'start' , 'c' ) ) )
751 self . assertEqual ( 1 , events . count ( ( 'end' , 'c' ) ) )
752
754 iterparse = self . etree . iterparse
755 f = BytesIO ( '<a><b><c/></d><b><c/></a></b>' )
756 it = iterparse ( f , events = ( 'start' , 'end' ) , recover = True )
757 events = [ ( ev , el . tag ) for ev , el in it ]
758 root = it . root
759 self . assertTrue ( root is not None )
760
761 self . assertEqual ( 1 , events . count ( ( 'start' , 'a' ) ) )
762 self . assertEqual ( 1 , events . count ( ( 'end' , 'a' ) ) )
763
764 self . assertEqual ( 2 , events . count ( ( 'start' , 'b' ) ) )
765 self . assertEqual ( 2 , events . count ( ( 'end' , 'b' ) ) )
766
767 self . assertEqual ( 2 , events . count ( ( 'start' , 'c' ) ) )
768 self . assertEqual ( 2 , events . count ( ( 'end' , 'c' ) ) )
769
771 iterparse = self . etree . iterparse
772 f = BytesIO ( """
773 <a> \n \n <b> b test </b> \n
774
775 \n\t <c> \n </c> </a> \n """ )
776 iterator = iterparse ( f , remove_blank_text = True )
777 text = [ ( element . text , element . tail )
778 for event , element in iterator ]
779 self . assertEqual (
780 [ ( " b test " , None ) , ( " \n " , None ) , ( None , None ) ] ,
781 text )
782
784 iterparse = self . etree . iterparse
785 f = BytesIO ( '<a><b><d/></b><c/></a>' )
786
787 iterator = iterparse ( f , tag = "b" , events = ( 'start' , 'end' ) )
788 events = list ( iterator )
789 root = iterator . root
790 self . assertEqual (
791 [ ( 'start' , root [ 0 ] ) , ( 'end' , root [ 0 ] ) ] ,
792 events )
793
795 iterparse = self . etree . iterparse
796 f = BytesIO ( '<a><b><d/></b><c/></a>' )
797
798 iterator = iterparse ( f , tag = "*" , events = ( 'start' , 'end' ) )
799 events = list ( iterator )
800 self . assertEqual (
801 8 ,
802 len ( events ) )
803
805 iterparse = self . etree . iterparse
806 f = BytesIO ( '<a xmlns="urn:test:1"><b><d/></b><c/></a>' )
807
808 iterator = iterparse ( f , tag = "{urn:test:1}b" , events = ( 'start' , 'end' ) )
809 events = list ( iterator )
810 root = iterator . root
811 self . assertEqual (
812 [ ( 'start' , root [ 0 ] ) , ( 'end' , root [ 0 ] ) ] ,
813 events )
814
816 iterparse = self . etree . iterparse
817 f = BytesIO ( '<a><b><d/></b><c/></a>' )
818 iterator = iterparse ( f , tag = "{}b" , events = ( 'start' , 'end' ) )
819 events = list ( iterator )
820 root = iterator . root
821 self . assertEqual (
822 [ ( 'start' , root [ 0 ] ) , ( 'end' , root [ 0 ] ) ] ,
823 events )
824
825 f = BytesIO ( '<a xmlns="urn:test:1"><b><d/></b><c/></a>' )
826 iterator = iterparse ( f , tag = "{}b" , events = ( 'start' , 'end' ) )
827 events = list ( iterator )
828 root = iterator . root
829 self . assertEqual ( [ ] , events )
830
832 iterparse = self . etree . iterparse
833 f = BytesIO ( '<a xmlns="urn:test:1"><b><d/></b><c/></a>' )
834 iterator = iterparse ( f , tag = "{urn:test:1}*" , events = ( 'start' , 'end' ) )
835 events = list ( iterator )
836 self . assertEqual ( 8 , len ( events ) )
837
839 iterparse = self . etree . iterparse
840 f = BytesIO ( '<a xmlns="urn:test:1"><b><d/></b><c/></a>' )
841 iterator = iterparse ( f , tag = "{}*" , events = ( 'start' , 'end' ) )
842 events = list ( iterator )
843 self . assertEqual ( [ ] , events )
844
845 f = BytesIO ( '<a><b><d/></b><c/></a>' )
846 iterator = iterparse ( f , tag = "{}*" , events = ( 'start' , 'end' ) )
847 events = list ( iterator )
848 self . assertEqual ( 8 , len ( events ) )
849
851 text = _str ( 'Søk på nettet' )
852 wrong_declaration = "<?xml version='1.0' encoding='UTF-8'?>"
853 xml_latin1 = ( _str ( '%s<a>%s</a>' ) % ( wrong_declaration , text )
854 ) . encode ( 'iso-8859-1' )
855
856 self . assertRaises ( self . etree . ParseError ,
857 list , self . etree . iterparse ( BytesIO ( xml_latin1 ) ) )
858
860 text = _str ( 'Søk på nettet' , encoding = "UTF-8" )
861 wrong_declaration = "<?xml version='1.0' encoding='UTF-8'?>"
862 xml_latin1 = ( _str ( '%s<a>%s</a>' ) % ( wrong_declaration , text )
863 ) . encode ( 'iso-8859-1' )
864
865 iterator = self . etree . iterparse ( BytesIO ( xml_latin1 ) ,
866 encoding = "iso-8859-1" )
867 self . assertEqual ( 1 , len ( list ( iterator ) ) )
868
869 a = iterator . root
870 self . assertEqual ( a . text , text )
871
873 tostring = self . etree . tostring
874 f = BytesIO ( '<root><![CDATA[test]]></root>' )
875 context = self . etree . iterparse ( f , strip_cdata = False )
876 content = [ el . text for event , el in context ]
877
878 self . assertEqual ( [ 'test' ] , content )
879 self . assertEqual ( _bytes ( '<root><![CDATA[test]]></root>' ) ,
880 tostring ( context . root ) )
881
885
890
909
910
911
934
935
936
938 assertEqual = self . assertEqual
939 assertFalse = self . assertFalse
940
941 events = [ ]
942 class Target ( object ) :
943 def start ( self , tag , attrib ) :
944 events . append ( "start" )
945 assertFalse ( attrib )
946 assertEqual ( "TAG" , tag )
947 def end ( self , tag ) :
948 events . append ( "end" )
949 assertEqual ( "TAG" , tag )
950 def close ( self ) :
951 return "DONE"
952
953 parser = self . etree . XMLParser ( target = Target ( ) )
954 tree = self . etree . ElementTree ( )
955
956 self . assertRaises ( TypeError ,
957 tree . parse , BytesIO ( "<TAG/>" ) , parser = parser )
958 self . assertEqual ( [ "start" , "end" ] , events )
959
961
962 events = [ ]
963 class Target ( object ) :
964 def start ( self , tag , attrib ) :
965 events . append ( "start-" + tag )
966 def end ( self , tag ) :
967 events . append ( "end-" + tag )
968 if tag == 'a' :
969 raise ValueError ( "dead and gone" )
970 def data ( self , data ) :
971 events . append ( "data-" + data )
972 def close ( self ) :
973 events . append ( "close" )
974 return "DONE"
975
976 parser = self . etree . XMLParser ( target = Target ( ) )
977
978 try :
979 parser . feed ( _bytes ( '<root>A<a>ca</a>B</root>' ) )
980 done = parser . close ( )
981 self . fail ( "error expected, but parsing succeeded" )
982 except ValueError :
983 done = 'value error received as expected'
984
985 self . assertEqual ( [ "start-root" , "data-A" , "start-a" ,
986 "data-ca" , "end-a" , "close" ] ,
987 events )
988
990
991 events = [ ]
992 class Target ( object ) :
993 def start ( self , tag , attrib ) :
994 events . append ( "start-" + tag )
995 def end ( self , tag ) :
996 events . append ( "end-" + tag )
997 if tag == 'a' :
998 raise ValueError ( "dead and gone" )
999 def data ( self , data ) :
1000 events . append ( "data-" + data )
1001 def close ( self ) :
1002 events . append ( "close" )
1003 return "DONE"
1004
1005 parser = self . etree . XMLParser ( target = Target ( ) )
1006
1007 try :
1008 done = self . etree . fromstring ( _bytes ( '<root>A<a>ca</a>B</root>' ) ,
1009 parser = parser )
1010 self . fail ( "error expected, but parsing succeeded" )
1011 except ValueError :
1012 done = 'value error received as expected'
1013
1014 self . assertEqual ( [ "start-root" , "data-A" , "start-a" ,
1015 "data-ca" , "end-a" , "close" ] ,
1016 events )
1017
1019
1020 events = [ ]
1021 class Target ( object ) :
1022 def start ( self , tag , attrib ) :
1023 events . append ( "start-" + tag )
1024 def end ( self , tag ) :
1025 events . append ( "end-" + tag )
1026 def data ( self , data ) :
1027 events . append ( "data-" + data )
1028 def comment ( self , text ) :
1029 events . append ( "comment-" + text )
1030 def close ( self ) :
1031 return "DONE"
1032
1033 parser = self . etree . XMLParser ( target = Target ( ) , collect_ids = False )
1034
1035 parser . feed ( _bytes ( '<!--a--><root xml:id="123">A<!--b-->' ) )
1036 parser . feed ( _bytes ( '<sub xml:id="321"/>B</root>' ) )
1037 done = parser . close ( )
1038
1039 self . assertEqual ( "DONE" , done )
1040 self . assertEqual ( [ "comment-a" , "start-root" , "data-A" , "comment-b" ,
1041 "start-sub" , "end-sub" , "data-B" , "end-root" ] ,
1042 events )
1043
1049 def end ( self , tag ) :
1050 events . append ( "end-" + tag )
1051 def data ( self , data ) :
1052 events . append ( "data-" + data )
1053 def comment ( self , text ) :
1054 events . append ( "comment-" + text )
1055 def close ( self ) :
1056 return "DONE"
1057
1058 parser = self . etree . XMLParser ( target = Target ( ) )
1059
1060 parser . feed ( _bytes ( '<!--a--><root>A<!--b--><sub/><!--c-->B</root><!--d-->' ) )
1061 done = parser . close ( )
1062
1063 self . assertEqual ( "DONE" , done )
1064 self . assertEqual ( [ "comment-a" , "start-root" , "data-A" , "comment-b" ,
1065 "start-sub" , "end-sub" , "comment-c" , "data-B" ,
1066 "end-root" , "comment-d" ] ,
1067 events )
1068
1070 events = [ ]
1071 class Target ( object ) :
1072 def start ( self , tag , attrib ) :
1073 events . append ( "start-" + tag )
1074 def end ( self , tag ) :
1075 events . append ( "end-" + tag )
1076 def data ( self , data ) :
1077 events . append ( "data-" + data )
1078 def pi ( self , target , data ) :
1079 events . append ( "pi-" + target + "-" + data )
1080 def close ( self ) :
1081 return "DONE"
1082
1083 parser = self . etree . XMLParser ( target = Target ( ) )
1084
1085 parser . feed ( _bytes ( '<?test a?><root>A<?test b?>B</root><?test c?>' ) )
1086 done = parser . close ( )
1087
1088 self . assertEqual ( "DONE" , done )
1089 self . assertEqual ( [ "pi-test-a" , "start-root" , "data-A" , "pi-test-b" ,
1090 "data-B" , "end-root" , "pi-test-c" ] ,
1091 events )
1092
1094 events = [ ]
1095 class Target ( object ) :
1096 def start ( self , tag , attrib ) :
1097 events . append ( "start-" + tag )
1098 def end ( self , tag ) :
1099 events . append ( "end-" + tag )
1100 def data ( self , data ) :
1101 events . append ( "data-" + data )
1102 def close ( self ) :
1103 return "DONE"
1104
1105 parser = self . etree . XMLParser ( target = Target ( ) ,
1106 strip_cdata = False )
1107
1108 parser . feed ( _bytes ( '<root>A<a><![CDATA[ca]]></a>B</root>' ) )
1109 done = parser . close ( )
1110
1111 self . assertEqual ( "DONE" , done )
1112 self . assertEqual ( [ "start-root" , "data-A" , "start-a" ,
1113 "data-ca" , "end-a" , "data-B" , "end-root" ] ,
1114 events )
1115
1117 events = [ ]
1118 class Target ( object ) :
1119 def start ( self , tag , attrib ) :
1120 events . append ( "start-" + tag )
1121 def end ( self , tag ) :
1122 events . append ( "end-" + tag )
1123 def data ( self , data ) :
1124 events . append ( "data-" + data )
1125 def close ( self ) :
1126 events . append ( "close" )
1127 return "DONE"
1128
1129 parser = self . etree . XMLParser ( target = Target ( ) ,
1130 recover = True )
1131
1132 parser . feed ( _bytes ( '<root>A<a>ca</a>B</not-root>' ) )
1133 done = parser . close ( )
1134
1135 self . assertEqual ( "DONE" , done )
1136 self . assertEqual ( [ "start-root" , "data-A" , "start-a" ,
1137 "data-ca" , "end-a" , "data-B" ,
1138 "end-root" , "close" ] ,
1139 events )
1140
1150
1160
1169
1179
1181 iterwalk = self . etree . iterwalk
1182 root = self . etree . XML ( _bytes ( '<a><b></b><c/></a>' ) )
1183
1184 iterator = iterwalk ( root , events = ( 'start' , 'end' ) )
1185 events = list ( iterator )
1186 self . assertEqual (
1187 [ ( 'start' , root ) , ( 'start' , root [ 0 ] ) , ( 'end' , root [ 0 ] ) ,
1188 ( 'start' , root [ 1 ] ) , ( 'end' , root [ 1 ] ) , ( 'end' , root ) ] ,
1189 events )
1190
1200
1210
1224
1235
1237 iterwalk = self . etree . iterwalk
1238 root = self . etree . XML ( _bytes ( '<a xmlns="ns1"><b><c xmlns="ns2"/></b></a>' ) )
1239
1240 attr_name = '{testns}bla'
1241 events = [ ]
1242 iterator = iterwalk ( root , events = ( 'start' , 'end' , 'start-ns' , 'end-ns' ) )
1243 for event , elem in iterator :
1244 events . append ( event )
1245 if event == 'start' :
1246 if elem . tag != '{ns1}a' :
1247 elem . set ( attr_name , 'value' )
1248
1249 self . assertEqual (
1250 [ 'start-ns' , 'start' , 'start' , 'start-ns' , 'start' ,
1251 'end' , 'end-ns' , 'end' , 'end' , 'end-ns' ] ,
1252 events )
1253
1254 self . assertEqual (
1255 None ,
1256 root . get ( attr_name ) )
1257 self . assertEqual (
1258 'value' ,
1259 root [ 0 ] . get ( attr_name ) )
1260
1273
1275 iterwalk = self . etree . iterwalk
1276 root = self . etree . XML ( _bytes ( '<a><b><c/></b><d><e/></d></a>' ) )
1277
1278 iterator = iterwalk ( root , events = ( 'start' , 'end' ) )
1279 tags = [ ]
1280 for event , elem in iterator :
1281 tags . append ( ( event , elem . tag ) )
1282 if elem . tag in ( 'b' , 'e' ) :
1283
1284 iterator . skip_subtree ( )
1285
1286 self . assertEqual (
1287 [ ( 'start' , 'a' ) ,
1288 ( 'start' , 'b' ) , ( 'end' , 'b' ) ,
1289 ( 'start' , 'd' ) ,
1290 ( 'start' , 'e' ) , ( 'end' , 'e' ) ,
1291 ( 'end' , 'd' ) ,
1292 ( 'end' , 'a' ) ] ,
1293 tags )
1294
1296 iterwalk = self . etree . iterwalk
1297 root = self . etree . XML ( _bytes (
1298 '<a xmlns="ns1"><b xmlns="nsb"><c xmlns="ns2"/></b><d xmlns="ns2"><e/></d></a>' ) )
1299
1300 events = [ ]
1301 iterator = iterwalk ( root , events = ( 'start' , 'start-ns' , 'end-ns' ) )
1302 for event , elem in iterator :
1303 if event in ( 'start-ns' , 'end-ns' ) :
1304 events . append ( ( event , elem ) )
1305 if event == 'start-ns' and elem == ( '' , 'nsb' ) :
1306 events . append ( 'skip' )
1307 iterator . skip_subtree ( )
1308 else :
1309 events . append ( ( event , elem . tag ) )
1310
1311 self . assertEqual (
1312 [ ( 'start-ns' , ( '' , 'ns1' ) ) ,
1313 ( 'start' , '{ns1}a' ) ,
1314 ( 'start-ns' , ( '' , 'nsb' ) ) ,
1315 'skip' ,
1316 ( 'start' , '{nsb}b' ) ,
1317 ( 'end-ns' , None ) ,
1318 ( 'start-ns' , ( '' , 'ns2' ) ) ,
1319 ( 'start' , '{ns2}d' ) ,
1320 ( 'start' , '{ns2}e' ) ,
1321 ( 'end-ns' , None ) ,
1322 ( 'end-ns' , None )
1323 ] ,
1324 events )
1325
1336
1338 parse = self . etree . parse
1339 parser = self . etree . XMLParser ( dtd_validation = True )
1340 assertEqual = self . assertEqual
1341 test_url = _str ( "__nosuch.dtd" )
1342
1343 class MyResolver ( self . etree . Resolver ) :
1344 def resolve ( self , url , id , context ) :
1345 assertEqual ( url , test_url )
1346 return self . resolve_string (
1347 _str ( '''<!ENTITY myentity "%s">
1348 <!ELEMENT doc ANY>''' ) % url , context )
1349
1350 parser . resolvers . add ( MyResolver ( ) )
1351
1352 xml = _str ( '<!DOCTYPE doc SYSTEM "%s"><doc>&myentity;</doc>' ) % test_url
1353 tree = parse ( StringIO ( xml ) , parser )
1354 root = tree . getroot ( )
1355 self . assertEqual ( root . text , test_url )
1356
1358 parse = self . etree . parse
1359 parser = self . etree . XMLParser ( dtd_validation = True )
1360 assertEqual = self . assertEqual
1361 test_url = _str ( "__nosuch.dtd" )
1362
1363 class MyResolver ( self . etree . Resolver ) :
1364 def resolve ( self , url , id , context ) :
1365 assertEqual ( url , test_url )
1366 return self . resolve_string (
1367 ( _str ( '''<!ENTITY myentity "%s">
1368 <!ELEMENT doc ANY>''' ) % url ) . encode ( 'utf-8' ) ,
1369 context )
1370
1371 parser . resolvers . add ( MyResolver ( ) )
1372
1373 xml = _str ( '<!DOCTYPE doc SYSTEM "%s"><doc>&myentity;</doc>' ) % test_url
1374 tree = parse ( StringIO ( xml ) , parser )
1375 root = tree . getroot ( )
1376 self . assertEqual ( root . text , test_url )
1377
1379 parse = self . etree . parse
1380 parser = self . etree . XMLParser ( dtd_validation = True )
1381 assertEqual = self . assertEqual
1382 test_url = _str ( "__nosuch.dtd" )
1383
1384 class MyResolver ( self . etree . Resolver ) :
1385 def resolve ( self , url , id , context ) :
1386 assertEqual ( url , test_url )
1387 return self . resolve_file (
1388 SillyFileLike (
1389 _str ( '''<!ENTITY myentity "%s">
1390 <!ELEMENT doc ANY>''' ) % url ) , context )
1391
1392 parser . resolvers . add ( MyResolver ( ) )
1393
1394 xml = _str ( '<!DOCTYPE doc SYSTEM "%s"><doc>&myentity;</doc>' ) % test_url
1395 tree = parse ( StringIO ( xml ) , parser )
1396 root = tree . getroot ( )
1397 self . assertEqual ( root . text , test_url )
1398
1400 parse = self . etree . parse
1401 parser = self . etree . XMLParser ( attribute_defaults = True )
1402 assertEqual = self . assertEqual
1403 test_url = _str ( "__nosuch.dtd" )
1404
1405 class MyResolver ( self . etree . Resolver ) :
1406 def resolve ( self , url , id , context ) :
1407 assertEqual ( url , test_url )
1408 return self . resolve_filename (
1409 fileInTestDir ( 'test.dtd' ) , context )
1410
1411 parser . resolvers . add ( MyResolver ( ) )
1412
1413 xml = _str ( '<!DOCTYPE a SYSTEM "%s"><a><b/></a>' ) % test_url
1414 tree = parse ( StringIO ( xml ) , parser )
1415 root = tree . getroot ( )
1416 self . assertEqual (
1417 root . attrib , { 'default' : 'valueA' } )
1418 self . assertEqual (
1419 root [ 0 ] . attrib , { 'default' : 'valueB' } )
1420
1435
1436 parser . resolvers . add ( MyResolver ( ) )
1437
1438 xml = _str ( '<!DOCTYPE a SYSTEM "%s"><a><b/></a>' ) % test_url
1439 tree = parse ( StringIO ( xml ) , parser ,
1440 base_url = fileUrlInTestDir ( '__test.xml' ) )
1441 root = tree . getroot ( )
1442 self . assertEqual (
1443 root . attrib , { 'default' : 'valueA' } )
1444 self . assertEqual (
1445 root [ 0 ] . attrib , { 'default' : 'valueB' } )
1446
1448 parse = self . etree . parse
1449 parser = self . etree . XMLParser ( attribute_defaults = True )
1450 assertEqual = self . assertEqual
1451 test_url = _str ( "__nosuch.dtd" )
1452
1453 class MyResolver ( self . etree . Resolver ) :
1454 def resolve ( self , url , id , context ) :
1455 assertEqual ( url , test_url )
1456 return self . resolve_file (
1457 open ( fileInTestDir ( 'test.dtd' ) , 'rb' ) , context )
1458
1459 parser . resolvers . add ( MyResolver ( ) )
1460
1461 xml = _str ( '<!DOCTYPE a SYSTEM "%s"><a><b/></a>' ) % test_url
1462 tree = parse ( StringIO ( xml ) , parser )
1463 root = tree . getroot ( )
1464 self . assertEqual (
1465 root . attrib , { 'default' : 'valueA' } )
1466 self . assertEqual (
1467 root [ 0 ] . attrib , { 'default' : 'valueB' } )
1468
1470 parse = self . etree . parse
1471 parser = self . etree . XMLParser ( load_dtd = True )
1472 assertEqual = self . assertEqual
1473 test_url = _str ( "__nosuch.dtd" )
1474
1475 class check ( object ) :
1476 resolved = False
1477
1478 class MyResolver ( self . etree . Resolver ) :
1479 def resolve ( self , url , id , context ) :
1480 assertEqual ( url , test_url )
1481 check . resolved = True
1482 return self . resolve_empty ( context )
1483
1484 parser . resolvers . add ( MyResolver ( ) )
1485
1486 xml = _str ( '<!DOCTYPE doc SYSTEM "%s"><doc>&myentity;</doc>' ) % test_url
1487 self . assertRaises ( etree . XMLSyntaxError , parse , StringIO ( xml ) , parser )
1488 self . assertTrue ( check . resolved )
1489
1496
1497 class MyResolver ( self . etree . Resolver ) :
1498 def resolve ( self , url , id , context ) :
1499 raise _LocalException
1500
1501 parser . resolvers . add ( MyResolver ( ) )
1502
1503 xml = '<!DOCTYPE doc SYSTEM "test"><doc>&myentity;</doc>'
1504 self . assertRaises ( _LocalException , parse , BytesIO ( xml ) , parser )
1505
1506 if etree . LIBXML_VERSION > ( 2 , 6 , 20 ) :
1523
1525 xml = _bytes ( '''<!DOCTYPE root [ <!ENTITY nbsp " "> ]>
1526 <root>
1527 <child1/>
1528 <child2/>
1529 <child3> </child3>
1530 </root>''' )
1531
1532 parser = self . etree . XMLParser ( resolve_entities = False )
1533 root = etree . fromstring ( xml , parser )
1534 self . assertEqual ( [ el . tag for el in root ] ,
1535 [ 'child1' , 'child2' , 'child3' ] )
1536
1537 root [ 0 ] = root [ - 1 ]
1538 self . assertEqual ( [ el . tag for el in root ] ,
1539 [ 'child3' , 'child2' ] )
1540 self . assertEqual ( root [ 0 ] [ 0 ] . text , ' ' )
1541 self . assertEqual ( root [ 0 ] [ 0 ] . name , 'nbsp' )
1542
1558
1565
1567 Entity = self . etree . Entity
1568 self . assertRaises ( ValueError , Entity , 'a b c' )
1569 self . assertRaises ( ValueError , Entity , 'a,b' )
1570 self . assertRaises ( ValueError , Entity , 'a\0b' )
1571 self . assertRaises ( ValueError , Entity , '#abc' )
1572 self . assertRaises ( ValueError , Entity , '#xxyz' )
1573
1586
1607
1620
1632
1641
1650
1651
1661
1670
1672 Element = self . etree . Element
1673 SubElement = self . etree . SubElement
1674 root = Element ( 'root' )
1675 self . assertRaises ( ValueError , root . append , root )
1676 child = SubElement ( root , 'child' )
1677 self . assertRaises ( ValueError , child . append , root )
1678 child2 = SubElement ( child , 'child2' )
1679 self . assertRaises ( ValueError , child2 . append , root )
1680 self . assertRaises ( ValueError , child2 . append , child )
1681 self . assertEqual ( 'child2' , root [ 0 ] [ 0 ] . tag )
1682
1695
1708
1719
1730
1740
1750
1766
1782
1788
1803
1816
1831
1844
1859
1872
1887
1900
1901
1909
1910
1920
1921
1936
1937
1947
1948
1959
1986
1987
1989 self . assertRaises ( TypeError , self . etree . dump , None )
1990
2003
2016
2037
2046
2048 XML = self . etree . XML
2049
2050 root = XML ( _bytes ( '<doc><one/><two>Two</two>Hm<three/></doc>' ) )
2051 result = [ ]
2052 for el in root . iterchildren ( reversed = True ) :
2053 result . append ( el . tag )
2054 self . assertEqual ( [ 'three' , 'two' , 'one' ] , result )
2055
2064
2073
2082
2084 XML = self . etree . XML
2085
2086 root = XML ( _bytes ( '<doc><one/><two>Two</two>Hm<two>Bla</two><three/></doc>' ) )
2087 result = [ ]
2088 for el in root . iterchildren ( tag = [ 'two' , 'three' ] ) :
2089 result . append ( el . text )
2090 self . assertEqual ( [ 'Two' , 'Bla' , None ] , result )
2091
2093 XML = self . etree . XML
2094
2095 root = XML ( _bytes ( '<doc><one/><two>Two</two>Hm<two>Bla</two><three/></doc>' ) )
2096 result = [ ]
2097 for el in root . iterchildren ( 'two' , 'three' ) :
2098 result . append ( el . text )
2099 self . assertEqual ( [ 'Two' , 'Bla' , None ] , result )
2100
2102 XML = self . etree . XML
2103
2104 root = XML ( _bytes ( '<doc><one/><two>Two</two>Hm<two>Bla</two><three/></doc>' ) )
2105 result = [ ]
2106 for el in root . iterchildren ( reversed = True , tag = [ 'two' , 'three' ] ) :
2107 result . append ( el . text )
2108 self . assertEqual ( [ None , 'Bla' , 'Two' ] , result )
2109
2130
2152
2154 Element = self . etree . Element
2155 SubElement = self . etree . SubElement
2156
2157 a = Element ( 'a' )
2158 b = SubElement ( a , 'b' )
2159 c = SubElement ( a , 'c' )
2160 d = SubElement ( b , 'd' )
2161 self . assertEqual (
2162 [ b , a ] ,
2163 list ( d . iterancestors ( tag = ( 'a' , 'b' ) ) ) )
2164 self . assertEqual (
2165 [ b , a ] ,
2166 list ( d . iterancestors ( 'a' , 'b' ) ) )
2167
2168 self . assertEqual (
2169 [ ] ,
2170 list ( d . iterancestors ( tag = ( 'w' , 'x' , 'y' , 'z' ) ) ) )
2171 self . assertEqual (
2172 [ ] ,
2173 list ( d . iterancestors ( 'w' , 'x' , 'y' , 'z' ) ) )
2174
2175 self . assertEqual (
2176 [ ] ,
2177 list ( d . iterancestors ( tag = ( 'd' , 'x' ) ) ) )
2178 self . assertEqual (
2179 [ ] ,
2180 list ( d . iterancestors ( 'd' , 'x' ) ) )
2181
2182 self . assertEqual (
2183 [ b , a ] ,
2184 list ( d . iterancestors ( tag = ( 'b' , '*' ) ) ) )
2185 self . assertEqual (
2186 [ b , a ] ,
2187 list ( d . iterancestors ( 'b' , '*' ) ) )
2188
2189 self . assertEqual (
2190 [ b ] ,
2191 list ( d . iterancestors ( tag = ( 'b' , 'c' ) ) ) )
2192 self . assertEqual (
2193 [ b ] ,
2194 list ( d . iterancestors ( 'b' , 'c' ) ) )
2195
2212
2214 Element = self . etree . Element
2215 SubElement = self . etree . SubElement
2216
2217 a = Element ( 'a' )
2218 b = SubElement ( a , 'b' )
2219 c = SubElement ( a , 'c' )
2220 d = SubElement ( b , 'd' )
2221 e = SubElement ( c , 'e' )
2222
2223 self . assertEqual (
2224 [ ] ,
2225 list ( a . iterdescendants ( 'a' ) ) )
2226 self . assertEqual (
2227 [ ] ,
2228 list ( a . iterdescendants ( tag = 'a' ) ) )
2229
2230 a2 = SubElement ( e , 'a' )
2231 self . assertEqual (
2232 [ a2 ] ,
2233 list ( a . iterdescendants ( 'a' ) ) )
2234
2235 self . assertEqual (
2236 [ a2 ] ,
2237 list ( c . iterdescendants ( 'a' ) ) )
2238 self . assertEqual (
2239 [ a2 ] ,
2240 list ( c . iterdescendants ( tag = 'a' ) ) )
2241
2243 Element = self . etree . Element
2244 SubElement = self . etree . SubElement
2245
2246 a = Element ( 'a' )
2247 b = SubElement ( a , 'b' )
2248 c = SubElement ( a , 'c' )
2249 d = SubElement ( b , 'd' )
2250 e = SubElement ( c , 'e' )
2251
2252 self . assertEqual (
2253 [ b , e ] ,
2254 list ( a . iterdescendants ( tag = ( 'a' , 'b' , 'e' ) ) ) )
2255 self . assertEqual (
2256 [ b , e ] ,
2257 list ( a . iterdescendants ( 'a' , 'b' , 'e' ) ) )
2258
2259 a2 = SubElement ( e , 'a' )
2260 self . assertEqual (
2261 [ b , a2 ] ,
2262 list ( a . iterdescendants ( tag = ( 'a' , 'b' ) ) ) )
2263 self . assertEqual (
2264 [ b , a2 ] ,
2265 list ( a . iterdescendants ( 'a' , 'b' ) ) )
2266
2267 self . assertEqual (
2268 [ ] ,
2269 list ( c . iterdescendants ( tag = ( 'x' , 'y' , 'z' ) ) ) )
2270 self . assertEqual (
2271 [ ] ,
2272 list ( c . iterdescendants ( 'x' , 'y' , 'z' ) ) )
2273
2274 self . assertEqual (
2275 [ b , d , c , e , a2 ] ,
2276 list ( a . iterdescendants ( tag = ( 'x' , 'y' , 'z' , '*' ) ) ) )
2277 self . assertEqual (
2278 [ b , d , c , e , a2 ] ,
2279 list ( a . iterdescendants ( 'x' , 'y' , 'z' , '*' ) ) )
2280
2298
2315
2333
2357
2359 Element = self . etree . Element
2360 SubElement = self . etree . SubElement
2361
2362 a = Element ( 'a' )
2363 b = SubElement ( a , 'b' )
2364 c = SubElement ( a , 'c' )
2365 d = SubElement ( b , 'd' )
2366 self . assertEqual (
2367 [ ] ,
2368 list ( a . itersiblings ( tag = 'XXX' ) ) )
2369 self . assertEqual (
2370 [ c ] ,
2371 list ( b . itersiblings ( tag = 'c' ) ) )
2372 self . assertEqual (
2373 [ c ] ,
2374 list ( b . itersiblings ( tag = '*' ) ) )
2375 self . assertEqual (
2376 [ b ] ,
2377 list ( c . itersiblings ( preceding = True , tag = 'b' ) ) )
2378 self . assertEqual (
2379 [ ] ,
2380 list ( c . itersiblings ( preceding = True , tag = 'c' ) ) )
2381
2383 Element = self . etree . Element
2384 SubElement = self . etree . SubElement
2385
2386 a = Element ( 'a' )
2387 b = SubElement ( a , 'b' )
2388 c = SubElement ( a , 'c' )
2389 d = SubElement ( b , 'd' )
2390 e = SubElement ( a , 'e' )
2391 self . assertEqual (
2392 [ ] ,
2393 list ( a . itersiblings ( tag = ( 'XXX' , 'YYY' ) ) ) )
2394 self . assertEqual (
2395 [ c , e ] ,
2396 list ( b . itersiblings ( tag = ( 'c' , 'd' , 'e' ) ) ) )
2397 self . assertEqual (
2398 [ b ] ,
2399 list ( c . itersiblings ( preceding = True , tag = ( 'b' , 'b' , 'c' , 'd' ) ) ) )
2400 self . assertEqual (
2401 [ c , b ] ,
2402 list ( e . itersiblings ( preceding = True , tag = ( 'c' , '*' ) ) ) )
2403
2405 parseid = self . etree . parseid
2406 XML = self . etree . XML
2407 xml_text = _bytes ( '''
2408 <!DOCTYPE document [
2409 <!ELEMENT document (h1,p)*>
2410 <!ELEMENT h1 (#PCDATA)>
2411 <!ATTLIST h1 myid ID #REQUIRED>
2412 <!ELEMENT p (#PCDATA)>
2413 <!ATTLIST p someid ID #REQUIRED>
2414 ]>
2415 <document>
2416 <h1 myid="chapter1">...</h1>
2417 <p id="note1" class="note">...</p>
2418 <p>Regular paragraph.</p>
2419 <p xml:id="xmlid">XML:ID paragraph.</p>
2420 <p someid="warn1" class="warning">...</p>
2421 </document>
2422 ''' )
2423
2424 tree , dic = parseid ( BytesIO ( xml_text ) )
2425 root = tree . getroot ( )
2426 root2 = XML ( xml_text )
2427 self . assertEqual ( self . _writeElement ( root ) ,
2428 self . _writeElement ( root2 ) )
2429 expected = {
2430 "chapter1" : root [ 0 ] ,
2431 "xmlid" : root [ 3 ] ,
2432 "warn1" : root [ 4 ]
2433 }
2434 self . assertTrue ( "chapter1" in dic )
2435 self . assertTrue ( "warn1" in dic )
2436 self . assertTrue ( "xmlid" in dic )
2437 self . _checkIDDict ( dic , expected )
2438
2440 XMLDTDID = self . etree . XMLDTDID
2441 XML = self . etree . XML
2442 xml_text = _bytes ( '''
2443 <!DOCTYPE document [
2444 <!ELEMENT document (h1,p)*>
2445 <!ELEMENT h1 (#PCDATA)>
2446 <!ATTLIST h1 myid ID #REQUIRED>
2447 <!ELEMENT p (#PCDATA)>
2448 <!ATTLIST p someid ID #REQUIRED>
2449 ]>
2450 <document>
2451 <h1 myid="chapter1">...</h1>
2452 <p id="note1" class="note">...</p>
2453 <p>Regular paragraph.</p>
2454 <p xml:id="xmlid">XML:ID paragraph.</p>
2455 <p someid="warn1" class="warning">...</p>
2456 </document>
2457 ''' )
2458
2459 root , dic = XMLDTDID ( xml_text )
2460 root2 = XML ( xml_text )
2461 self . assertEqual ( self . _writeElement ( root ) ,
2462 self . _writeElement ( root2 ) )
2463 expected = {
2464 "chapter1" : root [ 0 ] ,
2465 "xmlid" : root [ 3 ] ,
2466 "warn1" : root [ 4 ]
2467 }
2468 self . assertTrue ( "chapter1" in dic )
2469 self . assertTrue ( "warn1" in dic )
2470 self . assertTrue ( "xmlid" in dic )
2471 self . _checkIDDict ( dic , expected )
2472
2474 XMLDTDID = self . etree . XMLDTDID
2475 XML = self . etree . XML
2476 xml_text = _bytes ( '''
2477 <document>
2478 <h1 myid="chapter1">...</h1>
2479 <p id="note1" class="note">...</p>
2480 <p>Regular paragraph.</p>
2481 <p someid="warn1" class="warning">...</p>
2482 </document>
2483 ''' )
2484
2485 root , dic = XMLDTDID ( xml_text )
2486 root2 = XML ( xml_text )
2487 self . assertEqual ( self . _writeElement ( root ) ,
2488 self . _writeElement ( root2 ) )
2489 expected = { }
2490 self . _checkIDDict ( dic , expected )
2491
2493 XMLDTDID = self . etree . XMLDTDID
2494 XML = self . etree . XML
2495 xml_text = _bytes ( '''
2496 <!DOCTYPE document [
2497 <!ELEMENT document (h1,p)*>
2498 <!ELEMENT h1 (#PCDATA)>
2499 <!ATTLIST h1 myid ID #REQUIRED>
2500 <!ELEMENT p (#PCDATA)>
2501 <!ATTLIST p someid ID #REQUIRED>
2502 ]>
2503 <document>
2504 <h1 myid="chapter1">...</h1>
2505 <p id="note1" class="note">...</p>
2506 <p>Regular paragraph.</p>
2507 <p xml:id="xmlid">XML:ID paragraph.</p>
2508 <p someid="warn1" class="warning">...</p>
2509 </document>
2510 ''' )
2511
2512 parser = etree . XMLParser ( collect_ids = False )
2513 root , dic = XMLDTDID ( xml_text , parser = parser )
2514 root2 = XML ( xml_text )
2515 self . assertEqual ( self . _writeElement ( root ) ,
2516 self . _writeElement ( root2 ) )
2517 self . assertFalse ( dic )
2518 self . _checkIDDict ( dic , { } )
2519
2521 self . assertEqual ( len ( dic ) ,
2522 len ( expected ) )
2523 self . assertEqual ( sorted ( dic . items ( ) ) ,
2524 sorted ( expected . items ( ) ) )
2525 if sys . version_info < ( 3 , ) :
2526 self . assertEqual ( sorted ( dic . iteritems ( ) ) ,
2527 sorted ( expected . iteritems ( ) ) )
2528 self . assertEqual ( sorted ( dic . keys ( ) ) ,
2529 sorted ( expected . keys ( ) ) )
2530 if sys . version_info < ( 3 , ) :
2531 self . assertEqual ( sorted ( dic . iterkeys ( ) ) ,
2532 sorted ( expected . iterkeys ( ) ) )
2533 if sys . version_info < ( 3 , ) :
2534 self . assertEqual ( sorted ( dic . values ( ) ) ,
2535 sorted ( expected . values ( ) ) )
2536 self . assertEqual ( sorted ( dic . itervalues ( ) ) ,
2537 sorted ( expected . itervalues ( ) ) )
2538
2540 etree = self . etree
2541
2542 r = { 'foo' : 'http://ns.infrae.com/foo' }
2543 e = etree . Element ( '{http://ns.infrae.com/foo}bar' , nsmap = r )
2544 self . assertEqual (
2545 'foo' ,
2546 e . prefix )
2547 self . assertEqual (
2548 _bytes ( '<foo:bar xmlns:foo="http://ns.infrae.com/foo"></foo:bar>' ) ,
2549 self . _writeElement ( e ) )
2550
2552 etree = self . etree
2553
2554 r = { None : 'http://ns.infrae.com/foo' }
2555 e = etree . Element ( '{http://ns.infrae.com/foo}bar' , nsmap = r )
2556 self . assertEqual (
2557 None ,
2558 e . prefix )
2559 self . assertEqual (
2560 '{http://ns.infrae.com/foo}bar' ,
2561 e . tag )
2562 self . assertEqual (
2563 _bytes ( '<bar xmlns="http://ns.infrae.com/foo"></bar>' ) ,
2564 self . _writeElement ( e ) )
2565
2567 etree = self . etree
2568
2569 r = { None : 'http://ns.infrae.com/foo' , 'p' : 'http://test/' }
2570 e = etree . Element ( '{http://ns.infrae.com/foo}bar' , nsmap = r )
2571 self . assertEqual ( None , e . prefix )
2572 self . assertEqual ( '{http://ns.infrae.com/foo}bar' , e . tag )
2573 self . assertEqual (
2574 _bytes ( '<bar xmlns="http://ns.infrae.com/foo" xmlns:p="http://test/"></bar>' ) ,
2575 self . _writeElement ( e ) )
2576
2578 etree = self . etree
2579
2580 r = { None : 'http://ns.infrae.com/foo' ,
2581 'hoi' : 'http://ns.infrae.com/hoi' }
2582 e = etree . Element ( '{http://ns.infrae.com/foo}bar' , nsmap = r )
2583 e . set ( '{http://ns.infrae.com/hoi}test' , 'value' )
2584 self . assertEqual (
2585 _bytes ( '<bar xmlns="http://ns.infrae.com/foo" xmlns:hoi="http://ns.infrae.com/hoi" hoi:test="value"></bar>' ) ,
2586 self . _writeElement ( e ) )
2587
2589 etree = self . etree
2590
2591 root = etree . Element ( '{http://test/ns}root' ,
2592 nsmap = { None : 'http://test/ns' } )
2593 sub = etree . Element ( '{http://test/ns}sub' ,
2594 nsmap = { 'test' : 'http://test/ns' } )
2595
2596 sub . attrib [ '{http://test/ns}attr' ] = 'value'
2597 self . assertEqual ( sub . attrib [ '{http://test/ns}attr' ] , 'value' )
2598 self . assertEqual (
2599 _bytes ( '<test:sub xmlns:test="http://test/ns" test:attr="value"/>' ) ,
2600 etree . tostring ( sub ) )
2601
2602 root . append ( sub )
2603 self . assertEqual (
2604 _bytes ( '<root xmlns="http://test/ns">'
2605 '<sub xmlns:test="http://test/ns" test:attr="value"/>'
2606 '</root>' ) ,
2607 etree . tostring ( root ) )
2608
2610 etree = self . etree
2611
2612 root = etree . Element ( 'root' )
2613 sub = etree . Element ( '{http://test/ns}sub' ,
2614 nsmap = { 'test' : 'http://test/ns' } )
2615
2616 sub . attrib [ '{http://test/ns}attr' ] = 'value'
2617 self . assertEqual ( sub . attrib [ '{http://test/ns}attr' ] , 'value' )
2618 self . assertEqual (
2619 _bytes ( '<test:sub xmlns:test="http://test/ns" test:attr="value"/>' ) ,
2620 etree . tostring ( sub ) )
2621
2622 root . append ( sub )
2623 self . assertEqual (
2624 _bytes ( '<root>'
2625 '<test:sub xmlns:test="http://test/ns" test:attr="value"/>'
2626 '</root>' ) ,
2627 etree . tostring ( root ) )
2628
2630 etree = self . etree
2631
2632 root = etree . Element ( 'root' )
2633 sub = etree . Element ( '{http://test/ns}sub' ,
2634 nsmap = { None : 'http://test/ns' } )
2635
2636 sub . attrib [ '{http://test/ns}attr' ] = 'value'
2637 self . assertEqual ( sub . attrib [ '{http://test/ns}attr' ] , 'value' )
2638 self . assertEqual (
2639 _bytes ( '<sub xmlns="http://test/ns" '
2640 'xmlns:ns0="http://test/ns" ns0:attr="value"/>' ) ,
2641 etree . tostring ( sub ) )
2642
2643 root . append ( sub )
2644 self . assertEqual (
2645 _bytes ( '<root>'
2646 '<sub xmlns="http://test/ns"'
2647 ' xmlns:ns0="http://test/ns" ns0:attr="value"/>'
2648 '</root>' ) ,
2649 etree . tostring ( root ) )
2650
2652 etree = self . etree
2653
2654 root = etree . Element ( '{http://test/ns}root' ,
2655 nsmap = { 'test' : 'http://test/ns' ,
2656 None : 'http://test/ns' } )
2657 sub = etree . Element ( '{http://test/ns}sub' ,
2658 nsmap = { None : 'http://test/ns' } )
2659
2660 sub . attrib [ '{http://test/ns}attr' ] = 'value'
2661 self . assertEqual ( sub . attrib [ '{http://test/ns}attr' ] , 'value' )
2662 self . assertEqual (
2663 _bytes ( '<sub xmlns="http://test/ns" '
2664 'xmlns:ns0="http://test/ns" ns0:attr="value"/>' ) ,
2665 etree . tostring ( sub ) )
2666
2667 root . append ( sub )
2668 self . assertEqual (
2669 _bytes ( '<test:root xmlns:test="http://test/ns" xmlns="http://test/ns">'
2670 '<test:sub test:attr="value"/>'
2671 '</test:root>' ) ,
2672 etree . tostring ( root ) )
2673
2675 etree = self . etree
2676 r = { None : 'http://ns.infrae.com/foo' ,
2677 'hoi' : 'http://ns.infrae.com/hoi' }
2678 e = etree . Element ( '{http://ns.infrae.com/foo}z' , nsmap = r )
2679 tree = etree . ElementTree ( element = e )
2680 etree . SubElement ( e , '{http://ns.infrae.com/hoi}x' )
2681 self . assertEqual (
2682 _bytes ( '<z xmlns="http://ns.infrae.com/foo" xmlns:hoi="http://ns.infrae.com/hoi"><hoi:x></hoi:x></z>' ) ,
2683 self . _writeElement ( e ) )
2684
2686 etree = self . etree
2687
2688 r = { None : 'http://ns.infrae.com/foo' }
2689 e1 = etree . Element ( '{http://ns.infrae.com/foo}bar' , nsmap = r )
2690 e2 = etree . Element ( '{http://ns.infrae.com/foo}bar' , nsmap = r )
2691
2692 e1 . append ( e2 )
2693
2694 self . assertEqual (
2695 None ,
2696 e1 . prefix )
2697 self . assertEqual (
2698 None ,
2699 e1 [ 0 ] . prefix )
2700 self . assertEqual (
2701 '{http://ns.infrae.com/foo}bar' ,
2702 e1 . tag )
2703 self . assertEqual (
2704 '{http://ns.infrae.com/foo}bar' ,
2705 e1 [ 0 ] . tag )
2706
2708 etree = self . etree
2709
2710 r = { None : 'http://ns.infrae.com/BAR' }
2711 e1 = etree . Element ( '{http://ns.infrae.com/BAR}bar' , nsmap = r )
2712 e2 = etree . Element ( '{http://ns.infrae.com/foo}bar' , nsmap = r )
2713
2714 e1 . append ( e2 )
2715
2716 self . assertEqual (
2717 None ,
2718 e1 . prefix )
2719 self . assertNotEqual (
2720 None ,
2721 e2 . prefix )
2722 self . assertEqual (
2723 '{http://ns.infrae.com/BAR}bar' ,
2724 e1 . tag )
2725 self . assertEqual (
2726 '{http://ns.infrae.com/foo}bar' ,
2727 e2 . tag )
2728
2730 ns_href = "http://a.b.c"
2731 one = self . etree . fromstring (
2732 _bytes ( '<foo><bar xmlns:ns="%s"><ns:baz/></bar></foo>' % ns_href ) )
2733 baz = one [ 0 ] [ 0 ]
2734
2735 two = self . etree . fromstring (
2736 _bytes ( '<root xmlns:ns="%s"/>' % ns_href ) )
2737 two . append ( baz )
2738 del one
2739
2740 self . assertEqual ( '{%s}baz' % ns_href , baz . tag )
2741 self . assertEqual (
2742 _bytes ( '<root xmlns:ns="%s"><ns:baz/></root>' % ns_href ) ,
2743 self . etree . tostring ( two ) )
2744
2758
2775
2786
2788 xml = ( '<root>' +
2789 '' . join ( '<a xmlns:n{n}="NS{n}">' . format ( n = i ) for i in range ( 100 ) ) +
2790 '<n64:x/>' + '</a>' * 100 + '</root>' ) . encode ( 'utf8' )
2791 root = self . etree . fromstring ( xml )
2792 self . assertEqual ( xml , self . etree . tostring ( root ) )
2793 self . etree . cleanup_namespaces ( root )
2794 self . assertEqual (
2795 b'<root>' + b'<a>' * 64 + b'<a xmlns:n64="NS64">' + b'<a>' * 35 +
2796 b'<n64:x/>' + b'</a>' * 100 + b'</root>' ,
2797 self . etree . tostring ( root ) )
2798
2800 xml = ( '<root>' +
2801 '' . join ( '<a xmlns:n{n}="NS{n}">' . format ( n = i ) for i in range ( 100 ) ) +
2802 '<n64:x xmlns:a="A" a:attr="X"/>' +
2803 '</a>' * 100 +
2804 '</root>' ) . encode ( 'utf8' )
2805 root = self . etree . fromstring ( xml )
2806 self . assertEqual ( xml , self . etree . tostring ( root ) )
2807 self . etree . cleanup_namespaces ( root , top_nsmap = { 'n64' : 'NS64' } )
2808 self . assertEqual (
2809 b'<root xmlns:n64="NS64">' + b'<a>' * 100 +
2810 b'<n64:x xmlns:a="A" a:attr="X"/>' + b'</a>' * 100 + b'</root>' ,
2811 self . etree . tostring ( root ) )
2812
2814 xml = ( '<root xmlns:n64="NS64" xmlns:foo="FOO" xmlns:unused1="UNUSED" xmlns:no="NO">'
2815 '<a xmlns:unused2="UNUSED"><n64:x xmlns:a="A" a:attr="X"/></a>'
2816 '<foo>foo:bar</foo>'
2817 '</root>' ) . encode ( 'utf8' )
2818 root = self . etree . fromstring ( xml )
2819 self . assertEqual ( xml , self . etree . tostring ( root ) )
2820 self . etree . cleanup_namespaces ( root , keep_ns_prefixes = [ 'foo' ] )
2821 self . assertEqual (
2822 b'<root xmlns:n64="NS64" xmlns:foo="FOO">'
2823 b'<a><n64:x xmlns:a="A" a:attr="X"/></a>'
2824 b'<foo>foo:bar</foo>'
2825 b'</root>' ,
2826 self . etree . tostring ( root ) )
2827
2829 xml = ( '<root xmlns:n64="NS64" xmlns:unused1="UNUSED" xmlns:no="NO">'
2830 '<sub xmlns:foo="FOO">'
2831 '<a xmlns:unused2="UNUSED"><n64:x xmlns:a="A" a:attr="X"/></a>'
2832 '<foo>foo:bar</foo>'
2833 '</sub>'
2834 '</root>' ) . encode ( 'utf8' )
2835 root = self . etree . fromstring ( xml )
2836 self . assertEqual ( xml , self . etree . tostring ( root ) )
2837 self . etree . cleanup_namespaces (
2838 root ,
2839 top_nsmap = { 'foo' : 'FOO' , 'unused1' : 'UNUSED' } ,
2840 keep_ns_prefixes = [ 'foo' ] )
2841 self . assertEqual (
2842 b'<root xmlns:n64="NS64" xmlns:foo="FOO">'
2843 b'<sub>'
2844 b'<a><n64:x xmlns:a="A" a:attr="X"/></a>'
2845 b'<foo>foo:bar</foo>'
2846 b'</sub>'
2847 b'</root>' ,
2848 self . etree . tostring ( root ) )
2849
2851 etree = self . etree
2852
2853 r = { None : 'http://ns.infrae.com/foo' ,
2854 'hoi' : 'http://ns.infrae.com/hoi' }
2855 e = etree . Element ( '{http://ns.infrae.com/foo}bar' , nsmap = r )
2856 self . assertEqual (
2857 r ,
2858 e . nsmap )
2859
2861 etree = self . etree
2862
2863 re = { None : 'http://ns.infrae.com/foo' ,
2864 'hoi' : 'http://ns.infrae.com/hoi' }
2865 e = etree . Element ( '{http://ns.infrae.com/foo}bar' , nsmap = re )
2866
2867 rs = { None : 'http://ns.infrae.com/honk' ,
2868 'top' : 'http://ns.infrae.com/top' }
2869 s = etree . SubElement ( e , '{http://ns.infrae.com/honk}bar' , nsmap = rs )
2870
2871 r = re . copy ( )
2872 r . update ( rs )
2873 self . assertEqual ( re , e . nsmap )
2874 self . assertEqual ( r , s . nsmap )
2875
2877 etree = self . etree
2878 el = etree . HTML ( '<hha:page-description>aa</hha:page-description>' ) . find ( './/page-description' )
2879 self . assertEqual ( { 'hha' : None } , el . nsmap )
2880
2882 Element = self . etree . Element
2883 SubElement = self . etree . SubElement
2884
2885 a = Element ( 'a' )
2886 b = SubElement ( a , 'b' )
2887 c = SubElement ( a , 'c' )
2888 d = SubElement ( b , 'd' )
2889 e = SubElement ( c , 'e' )
2890 f = SubElement ( c , 'f' )
2891
2892 self . assertEqual (
2893 [ a , b ] ,
2894 list ( a . getiterator ( 'a' , 'b' ) ) )
2895 self . assertEqual (
2896 [ ] ,
2897 list ( a . getiterator ( 'x' , 'y' ) ) )
2898 self . assertEqual (
2899 [ a , f ] ,
2900 list ( a . getiterator ( 'f' , 'a' ) ) )
2901 self . assertEqual (
2902 [ c , e , f ] ,
2903 list ( c . getiterator ( 'c' , '*' , 'a' ) ) )
2904 self . assertEqual (
2905 [ ] ,
2906 list ( a . getiterator ( ( ) , ( ) ) ) )
2907
2909 Element = self . etree . Element
2910 SubElement = self . etree . SubElement
2911
2912 a = Element ( 'a' )
2913 b = SubElement ( a , 'b' )
2914 c = SubElement ( a , 'c' )
2915 d = SubElement ( b , 'd' )
2916 e = SubElement ( c , 'e' )
2917 f = SubElement ( c , 'f' )
2918
2919 self . assertEqual (
2920 [ a , b ] ,
2921 list ( a . getiterator ( ( 'a' , 'b' ) ) ) )
2922 self . assertEqual (
2923 [ ] ,
2924 list ( a . getiterator ( ( 'x' , 'y' ) ) ) )
2925 self . assertEqual (
2926 [ a , f ] ,
2927 list ( a . getiterator ( ( 'f' , 'a' ) ) ) )
2928 self . assertEqual (
2929 [ c , e , f ] ,
2930 list ( c . getiterator ( ( 'c' , '*' , 'a' ) ) ) )
2931 self . assertEqual (
2932 [ ] ,
2933 list ( a . getiterator ( ( ) ) ) )
2934
2936 Element = self . etree . Element
2937 SubElement = self . etree . SubElement
2938
2939 a = Element ( '{a}a' )
2940 b = SubElement ( a , '{a}b' )
2941 c = SubElement ( a , '{a}c' )
2942 d = SubElement ( b , '{b}d' )
2943 e = SubElement ( c , '{a}e' )
2944 f = SubElement ( c , '{b}f' )
2945 g = SubElement ( c , 'g' )
2946
2947 self . assertEqual (
2948 [ a ] ,
2949 list ( a . getiterator ( '{a}a' ) ) )
2950 self . assertEqual (
2951 [ ] ,
2952 list ( a . getiterator ( '{b}a' ) ) )
2953 self . assertEqual (
2954 [ ] ,
2955 list ( a . getiterator ( 'a' ) ) )
2956 self . assertEqual (
2957 [ a , b , d , c , e , f , g ] ,
2958 list ( a . getiterator ( '*' ) ) )
2959 self . assertEqual (
2960 [ f ] ,
2961 list ( c . getiterator ( '{b}*' ) ) )
2962 self . assertEqual (
2963 [ d , f ] ,
2964 list ( a . getiterator ( '{b}*' ) ) )
2965 self . assertEqual (
2966 [ g ] ,
2967 list ( a . getiterator ( 'g' ) ) )
2968 self . assertEqual (
2969 [ g ] ,
2970 list ( a . getiterator ( '{}g' ) ) )
2971 self . assertEqual (
2972 [ g ] ,
2973 list ( a . getiterator ( '{}*' ) ) )
2974
2976 Element = self . etree . Element
2977 SubElement = self . etree . SubElement
2978
2979 a = Element ( '{a}a' )
2980 b = SubElement ( a , '{nsA}b' )
2981 c = SubElement ( b , '{nsB}b' )
2982 d = SubElement ( a , 'b' )
2983 e = SubElement ( a , '{nsA}e' )
2984 f = SubElement ( e , '{nsB}e' )
2985 g = SubElement ( e , 'e' )
2986
2987 self . assertEqual (
2988 [ b , c , d ] ,
2989 list ( a . getiterator ( '{*}b' ) ) )
2990 self . assertEqual (
2991 [ e , f , g ] ,
2992 list ( a . getiterator ( '{*}e' ) ) )
2993 self . assertEqual (
2994 [ a , b , c , d , e , f , g ] ,
2995 list ( a . getiterator ( '{*}*' ) ) )
2996
3021
3037
3054
3056 a = etree . Element ( "a" )
3057 b = etree . SubElement ( a , "b" )
3058 c = etree . SubElement ( a , "c" )
3059 d1 = etree . SubElement ( c , "d" )
3060 d2 = etree . SubElement ( c , "d" )
3061 c . text = d1 . text = 'TEXT'
3062
3063 tree = etree . ElementTree ( a )
3064 self . assertEqual ( '.' , tree . getelementpath ( a ) )
3065 self . assertEqual ( 'c/d[1]' , tree . getelementpath ( d1 ) )
3066 self . assertEqual ( 'c/d[2]' , tree . getelementpath ( d2 ) )
3067
3068 self . assertEqual ( d1 , tree . find ( tree . getelementpath ( d1 ) ) )
3069 self . assertEqual ( d2 , tree . find ( tree . getelementpath ( d2 ) ) )
3070
3071 tree = etree . ElementTree ( c )
3072 self . assertEqual ( '.' , tree . getelementpath ( c ) )
3073 self . assertEqual ( 'd[2]' , tree . getelementpath ( d2 ) )
3074 self . assertEqual ( d2 , tree . find ( tree . getelementpath ( d2 ) ) )
3075
3076 tree = etree . ElementTree ( b )
3077 self . assertEqual ( '.' , tree . getelementpath ( b ) )
3078 self . assertRaises ( ValueError , tree . getelementpath , a )
3079 self . assertRaises ( ValueError , tree . getelementpath , c )
3080 self . assertRaises ( ValueError , tree . getelementpath , d2 )
3081
3083 a = etree . Element ( "{http://ns1/}a" )
3084 b = etree . SubElement ( a , "{http://ns1/}b" )
3085 c = etree . SubElement ( a , "{http://ns1/}c" )
3086 d1 = etree . SubElement ( c , "{http://ns1/}d" )
3087 d2 = etree . SubElement ( c , "{http://ns2/}d" )
3088 d3 = etree . SubElement ( c , "{http://ns1/}d" )
3089
3090 tree = etree . ElementTree ( a )
3091 self . assertEqual ( '.' , tree . getelementpath ( a ) )
3092 self . assertEqual ( '{http://ns1/}c/{http://ns1/}d[1]' ,
3093 tree . getelementpath ( d1 ) )
3094 self . assertEqual ( '{http://ns1/}c/{http://ns2/}d' ,
3095 tree . getelementpath ( d2 ) )
3096 self . assertEqual ( '{http://ns1/}c/{http://ns1/}d[2]' ,
3097 tree . getelementpath ( d3 ) )
3098
3099 self . assertEqual ( a , tree . find ( tree . getelementpath ( a ) ) )
3100 self . assertEqual ( b , tree . find ( tree . getelementpath ( b ) ) )
3101 self . assertEqual ( c , tree . find ( tree . getelementpath ( c ) ) )
3102 self . assertEqual ( d1 , tree . find ( tree . getelementpath ( d1 ) ) )
3103 self . assertEqual ( d2 , tree . find ( tree . getelementpath ( d2 ) ) )
3104 self . assertEqual ( d3 , tree . find ( tree . getelementpath ( d3 ) ) )
3105
3106 tree = etree . ElementTree ( c )
3107 self . assertEqual ( '{http://ns1/}d[1]' , tree . getelementpath ( d1 ) )
3108 self . assertEqual ( '{http://ns2/}d' , tree . getelementpath ( d2 ) )
3109 self . assertEqual ( '{http://ns1/}d[2]' , tree . getelementpath ( d3 ) )
3110 self . assertEqual ( d1 , tree . find ( tree . getelementpath ( d1 ) ) )
3111 self . assertEqual ( d2 , tree . find ( tree . getelementpath ( d2 ) ) )
3112 self . assertEqual ( d3 , tree . find ( tree . getelementpath ( d3 ) ) )
3113
3114 tree = etree . ElementTree ( b )
3115 self . assertRaises ( ValueError , tree . getelementpath , d1 )
3116 self . assertRaises ( ValueError , tree . getelementpath , d2 )
3117
3124
3131
3140
3142 XML = self . etree . XML
3143 root = XML ( _bytes ( '<a xmlns:x="X" xmlns:y="Y"><x:b><c/></x:b><b/><c><x:b/><b/></c><b/></a>' ) )
3144 self . assertEqual ( len ( root . findall ( ".//{X}b" ) ) , 2 )
3145 self . assertEqual ( len ( root . findall ( ".//{X}*" ) ) , 2 )
3146 self . assertEqual ( len ( root . findall ( ".//b" ) ) , 3 )
3147
3149 XML = self . etree . XML
3150 root = XML ( _bytes ( '<a xmlns:x="X" xmlns:y="Y"><x:b><c/></x:b><b/><c><x:b/><b/></c><y:b/></a>' ) )
3151 nsmap = { 'xx' : 'X' }
3152 self . assertEqual ( len ( root . findall ( ".//xx:b" , namespaces = nsmap ) ) , 2 )
3153 self . assertEqual ( len ( root . findall ( ".//xx:*" , namespaces = nsmap ) ) , 2 )
3154 self . assertEqual ( len ( root . findall ( ".//b" , namespaces = nsmap ) ) , 2 )
3155 nsmap = { 'xx' : 'Y' }
3156 self . assertEqual ( len ( root . findall ( ".//xx:b" , namespaces = nsmap ) ) , 1 )
3157 self . assertEqual ( len ( root . findall ( ".//xx:*" , namespaces = nsmap ) ) , 1 )
3158 self . assertEqual ( len ( root . findall ( ".//b" , namespaces = nsmap ) ) , 2 )
3159
3161 XML = self . etree . XML
3162 root = XML ( _bytes ( '<a xmlns:x="X" xmlns:y="Y"><x:b><c/></x:b><b/><c><x:b/><b/></c><y:b/></a>' ) )
3163 nsmap = { 'xx' : 'X' }
3164 self . assertEqual ( len ( root . findall ( ".//xx:b" , namespaces = nsmap ) ) , 2 )
3165 nsmap = { 'xx' : 'X' , None : 'Y' }
3166 self . assertEqual ( len ( root . findall ( ".//b" , namespaces = nsmap ) ) , 1 )
3167 nsmap = { 'xx' : 'X' , '' : 'Y' }
3168 self . assertRaises ( ValueError , root . findall , ".//xx:b" , namespaces = nsmap )
3169
3176
3178 etree = self . etree
3179 e = etree . Element ( 'foo' )
3180 for i in range ( 10 ) :
3181 etree . SubElement ( e , 'a%s' % i )
3182 for i in range ( 10 ) :
3183 self . assertEqual (
3184 i ,
3185 e . index ( e [ i ] ) )
3186 self . assertEqual (
3187 3 , e . index ( e [ 3 ] , 3 ) )
3188 self . assertRaises (
3189 ValueError , e . index , e [ 3 ] , 4 )
3190 self . assertRaises (
3191 ValueError , e . index , e [ 3 ] , 0 , 2 )
3192 self . assertRaises (
3193 ValueError , e . index , e [ 8 ] , 0 , - 3 )
3194 self . assertRaises (
3195 ValueError , e . index , e [ 8 ] , - 5 , - 3 )
3196 self . assertEqual (
3197 8 , e . index ( e [ 8 ] , 0 , - 1 ) )
3198 self . assertEqual (
3199 8 , e . index ( e [ 8 ] , - 12 , - 1 ) )
3200 self . assertEqual (
3201 0 , e . index ( e [ 0 ] , - 12 , - 1 ) )
3202
3204 etree = self . etree
3205 e = etree . Element ( 'foo' )
3206 for i in range ( 10 ) :
3207 el = etree . SubElement ( e , 'a%s' % i )
3208 el . text = "text%d" % i
3209 el . tail = "tail%d" % i
3210
3211 child0 = e [ 0 ]
3212 child1 = e [ 1 ]
3213 child2 = e [ 2 ]
3214
3215 e . replace ( e [ 0 ] , e [ 1 ] )
3216 self . assertEqual (
3217 9 , len ( e ) )
3218 self . assertEqual (
3219 child1 , e [ 0 ] )
3220 self . assertEqual (
3221 child1 . text , "text1" )
3222 self . assertEqual (
3223 child1 . tail , "tail1" )
3224 self . assertEqual (
3225 child0 . tail , "tail0" )
3226 self . assertEqual (
3227 child2 , e [ 1 ] )
3228
3229 e . replace ( e [ - 1 ] , e [ 0 ] )
3230 self . assertEqual (
3231 child1 , e [ - 1 ] )
3232 self . assertEqual (
3233 child1 . text , "text1" )
3234 self . assertEqual (
3235 child1 . tail , "tail1" )
3236 self . assertEqual (
3237 child2 , e [ 0 ] )
3238
3240 etree = self . etree
3241 e = etree . Element ( 'foo' )
3242 for i in range ( 10 ) :
3243 etree . SubElement ( e , 'a%s' % i )
3244
3245 new_element = etree . Element ( "test" )
3246 new_element . text = "TESTTEXT"
3247 new_element . tail = "TESTTAIL"
3248 child1 = e [ 1 ]
3249 e . replace ( e [ 0 ] , new_element )
3250 self . assertEqual (
3251 new_element , e [ 0 ] )
3252 self . assertEqual (
3253 "TESTTEXT" ,
3254 e [ 0 ] . text )
3255 self . assertEqual (
3256 "TESTTAIL" ,
3257 e [ 0 ] . tail )
3258 self . assertEqual (
3259 child1 , e [ 1 ] )
3260
3276
3294
3312
3330
3332 Element = self . etree . Element
3333 SubElement = self . etree . SubElement
3334 try :
3335 slice
3336 except NameError :
3337 print ( "slice() not found" )
3338 return
3339
3340 a = Element ( 'a' )
3341 b = SubElement ( a , 'b' )
3342 c = SubElement ( a , 'c' )
3343 d = SubElement ( a , 'd' )
3344 e = SubElement ( a , 'e' )
3345
3346 x = Element ( 'x' )
3347 y = Element ( 'y' )
3348 z = Element ( 'z' )
3349
3350 self . assertRaises (
3351 ValueError ,
3352 operator . setitem , a , slice ( 1 , None , 2 ) , [ x , y , z ] )
3353
3354 self . assertEqual (
3355 [ b , c , d , e ] ,
3356 list ( a ) )
3357
3370
3372 XML = self . etree . XML
3373 root = XML ( _bytes (
3374 '<?xml version="1.0"?>\n'
3375 '<root>' + '\n' * 65536 +
3376 '<p>' + '\n' * 65536 + '</p>\n' +
3377 '<br/>\n'
3378 '</root>' ) )
3379
3380 if self . etree . LIBXML_VERSION >= ( 2 , 9 ) :
3381 expected = [ 2 , 131074 , 131076 ]
3382 else :
3383 expected = [ 2 , 65535 , 65535 ]
3384
3385 self . assertEqual ( expected , [ el . sourceline for el in root . iter ( ) ] )
3386
3394
3403
3413
3423
3429
3437
3443
3450
3456
3458 etree = self . etree
3459 xml_header = '<?xml version="1.0" encoding="ascii"?>'
3460 pub_id = "-//W3C//DTD XHTML 1.0 Transitional//EN"
3461 sys_id = "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"
3462 doctype_string = '<!DOCTYPE html PUBLIC "%s" "%s">' % ( pub_id , sys_id )
3463
3464 xml = _bytes ( xml_header + doctype_string + '<html><body></body></html>' )
3465
3466 tree = etree . parse ( BytesIO ( xml ) )
3467 docinfo = tree . docinfo
3468 self . assertEqual ( docinfo . encoding , "ascii" )
3469 self . assertEqual ( docinfo . xml_version , "1.0" )
3470 self . assertEqual ( docinfo . public_id , pub_id )
3471 self . assertEqual ( docinfo . system_url , sys_id )
3472 self . assertEqual ( docinfo . root_name , 'html' )
3473 self . assertEqual ( docinfo . doctype , doctype_string )
3474
3490
3502
3514
3520
3522 etree = self . etree
3523 pub_id = "-//W3C//DTD XHTML 1.0 Transitional//EN"
3524 sys_id = "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"
3525 doctype_string = _bytes ( '<!DOCTYPE html PUBLIC "%s" "%s">' % ( pub_id , sys_id ) )
3526
3527 xml = _bytes ( '<!DOCTYPE root>\n<root/>' )
3528 tree = etree . parse ( BytesIO ( xml ) )
3529 self . assertEqual ( xml . replace ( _bytes ( '<!DOCTYPE root>' ) , doctype_string ) ,
3530 etree . tostring ( tree , doctype = doctype_string ) )
3531
3533 etree = self . etree
3534 root = etree . XML ( _bytes ( "<root/>" ) , base_url = "http://no/such/url" )
3535 self . assertEqual ( root . base , "http://no/such/url" )
3536 self . assertEqual (
3537 root . get ( '{http://www.w3.org/XML/1998/namespace}base' ) , None )
3538 root . base = "https://secret/url"
3539 self . assertEqual ( root . base , "https://secret/url" )
3540 self . assertEqual (
3541 root . get ( '{http://www.w3.org/XML/1998/namespace}base' ) ,
3542 "https://secret/url" )
3543
3545 etree = self . etree
3546 root = etree . XML ( _bytes ( "<root/>" ) , base_url = "http://no/such/url" )
3547 self . assertEqual ( root . base , "http://no/such/url" )
3548 self . assertEqual (
3549 root . get ( '{http://www.w3.org/XML/1998/namespace}base' ) , None )
3550 root . set ( '{http://www.w3.org/XML/1998/namespace}base' ,
3551 "https://secret/url" )
3552 self . assertEqual ( root . base , "https://secret/url" )
3553 self . assertEqual (
3554 root . get ( '{http://www.w3.org/XML/1998/namespace}base' ) ,
3555 "https://secret/url" )
3556
3562
3567
3574
3588
3590 Element = self . etree . Element
3591
3592 a = Element ( 'a' )
3593 self . assertRaises ( ValueError , setattr , a , "text" , 'ha\0ho' )
3594 self . assertRaises ( ValueError , setattr , a , "tail" , 'ha\0ho' )
3595
3596 self . assertRaises ( ValueError , Element , 'ha\0ho' )
3597
3599 Element = self . etree . Element
3600
3601 a = Element ( 'a' )
3602 self . assertRaises ( ValueError , setattr , a , "text" ,
3603 _str ( 'ha\0ho' ) )
3604 self . assertRaises ( ValueError , setattr , a , "tail" ,
3605 _str ( 'ha\0ho' ) )
3606
3607 self . assertRaises ( ValueError , Element ,
3608 _str ( 'ha\0ho' ) )
3609
3611 Element = self . etree . Element
3612
3613 a = Element ( 'a' )
3614 self . assertRaises ( ValueError , setattr , a , "text" , 'ha\x07ho' )
3615 self . assertRaises ( ValueError , setattr , a , "text" , 'ha\x02ho' )
3616
3617 self . assertRaises ( ValueError , setattr , a , "tail" , 'ha\x07ho' )
3618 self . assertRaises ( ValueError , setattr , a , "tail" , 'ha\x02ho' )
3619
3620 self . assertRaises ( ValueError , Element , 'ha\x07ho' )
3621 self . assertRaises ( ValueError , Element , 'ha\x02ho' )
3622
3624 Element = self . etree . Element
3625
3626 a = Element ( 'a' )
3627 self . assertRaises ( ValueError , setattr , a , "text" ,
3628 _str ( 'ha\x07ho' ) )
3629 self . assertRaises ( ValueError , setattr , a , "text" ,
3630 _str ( 'ha\x02ho' ) )
3631
3632 self . assertRaises ( ValueError , setattr , a , "tail" ,
3633 _str ( 'ha\x07ho' ) )
3634 self . assertRaises ( ValueError , setattr , a , "tail" ,
3635 _str ( 'ha\x02ho' ) )
3636
3637 self . assertRaises ( ValueError , Element ,
3638 _str ( 'ha\x07ho' ) )
3639 self . assertRaises ( ValueError , Element ,
3640 _str ( 'ha\x02ho' ) )
3641
3643 Element = self . etree . Element
3644
3645 a = Element ( 'a' )
3646 self . assertRaises ( ValueError , setattr , a , "text" ,
3647 _str ( 'ha\u1234\x07ho' ) )
3648 self . assertRaises ( ValueError , setattr , a , "text" ,
3649 _str ( 'ha\u1234\x02ho' ) )
3650
3651 self . assertRaises ( ValueError , setattr , a , "tail" ,
3652 _str ( 'ha\u1234\x07ho' ) )
3653 self . assertRaises ( ValueError , setattr , a , "tail" ,
3654 _str ( 'ha\u1234\x02ho' ) )
3655
3656 self . assertRaises ( ValueError , Element ,
3657 _str ( 'ha\u1234\x07ho' ) )
3658 self . assertRaises ( ValueError , Element ,
3659 _str ( 'ha\u1234\x02ho' ) )
3660
3674
3679
3697
3717
3719 tostring = self . etree . tostring
3720 html = self . etree . fromstring (
3721 '<html><body>'
3722 '<div><p>Some text<i>\r\n</i></p></div>\r\n'
3723 '</body></html>' ,
3724 parser = self . etree . HTMLParser ( ) )
3725 self . assertEqual ( html . tag , 'html' )
3726 div = html . find ( './/div' )
3727 self . assertEqual ( div . tail , '\r\n' )
3728 result = tostring ( div , method = 'html' )
3729 self . assertEqual (
3730 result ,
3731 _bytes ( "<div><p>Some text<i>\r\n</i></p></div>\r\n" ) )
3732 result = tostring ( div , method = 'html' , with_tail = True )
3733 self . assertEqual (
3734 result ,
3735 _bytes ( "<div><p>Some text<i>\r\n</i></p></div>\r\n" ) )
3736 result = tostring ( div , method = 'html' , with_tail = False )
3737 self . assertEqual (
3738 result ,
3739 _bytes ( "<div><p>Some text<i>\r\n</i></p></div>" ) )
3740
3762
3764 tostring = self . etree . tostring
3765 XML = self . etree . XML
3766 ElementTree = self . etree . ElementTree
3767
3768 root = XML ( _bytes ( "<root/>" ) )
3769
3770 tree = ElementTree ( root )
3771 self . assertEqual ( None , tree . docinfo . standalone )
3772
3773 result = tostring ( root , xml_declaration = True , encoding = "ASCII" )
3774 self . assertEqual ( result , _bytes (
3775 "<?xml version='1.0' encoding='ASCII'?>\n<root/>" ) )
3776
3777 result = tostring ( root , xml_declaration = True , encoding = "ASCII" ,
3778 standalone = True )
3779 self . assertEqual ( result , _bytes (
3780 "<?xml version='1.0' encoding='ASCII' standalone='yes'?>\n<root/>" ) )
3781
3782 tree = ElementTree ( XML ( result ) )
3783 self . assertEqual ( True , tree . docinfo . standalone )
3784
3785 result = tostring ( root , xml_declaration = True , encoding = "ASCII" ,
3786 standalone = False )
3787 self . assertEqual ( result , _bytes (
3788 "<?xml version='1.0' encoding='ASCII' standalone='no'?>\n<root/>" ) )
3789
3790 tree = ElementTree ( XML ( result ) )
3791 self . assertEqual ( False , tree . docinfo . standalone )
3792
3812
3814 tostring = self . etree . tostring
3815 Element = self . etree . Element
3816 SubElement = self . etree . SubElement
3817
3818 a = Element ( 'a' )
3819 a . text = "A"
3820 a . tail = "tail"
3821 b = SubElement ( a , 'b' )
3822 b . text = "B"
3823 b . tail = _str ( "Søk på nettet" )
3824 c = SubElement ( a , 'c' )
3825 c . text = "C"
3826
3827 result = tostring ( a , method = "text" , encoding = "UTF-16" )
3828
3829 self . assertEqual ( _str ( 'ABSøk på nettetCtail' ) . encode ( "UTF-16" ) ,
3830 result )
3831
3833 tostring = self . etree . tostring
3834 Element = self . etree . Element
3835 SubElement = self . etree . SubElement
3836
3837 a = Element ( 'a' )
3838 a . text = _str ( 'Søk på nettetA' )
3839 a . tail = "tail"
3840 b = SubElement ( a , 'b' )
3841 b . text = "B"
3842 b . tail = _str ( 'Søk på nettetB' )
3843 c = SubElement ( a , 'c' )
3844 c . text = "C"
3845
3846 self . assertRaises ( UnicodeEncodeError ,
3847 tostring , a , method = "text" )
3848
3849 self . assertEqual (
3850 _str ( 'Søk på nettetABSøk på nettetBCtail' ) . encode ( 'utf-8' ) ,
3851 tostring ( a , encoding = "UTF-8" , method = "text" ) )
3852
3865
3881
3885
3900
3918
3931
3933 tostring = self . etree . tostring
3934 Element = self . etree . Element
3935 SubElement = self . etree . SubElement
3936
3937 a = Element ( 'a' )
3938 b = SubElement ( a , 'b' )
3939 c = SubElement ( a , 'c' )
3940 d = SubElement ( c , 'd' )
3941 self . assertTrue ( isinstance ( tostring ( b , encoding = _unicode ) , _unicode ) )
3942 self . assertTrue ( isinstance ( tostring ( c , encoding = _unicode ) , _unicode ) )
3943 self . assertEqual ( _bytes ( '<b></b>' ) ,
3944 canonicalize ( tostring ( b , encoding = _unicode ) ) )
3945 self . assertEqual ( _bytes ( '<c><d></d></c>' ) ,
3946 canonicalize ( tostring ( c , encoding = _unicode ) ) )
3947
3952
3967
3969 tostring = self . etree . tostring
3970 Element = self . etree . Element
3971 SubElement = self . etree . SubElement
3972
3973 a = Element ( 'a' )
3974 b = SubElement ( a , 'b' )
3975 c = SubElement ( a , 'c' )
3976
3977 result = tostring ( a , encoding = _unicode )
3978 self . assertEqual ( result , "<a><b/><c/></a>" )
3979
3980 result = tostring ( a , encoding = _unicode , pretty_print = False )
3981 self . assertEqual ( result , "<a><b/><c/></a>" )
3982
3983 result = tostring ( a , encoding = _unicode , pretty_print = True )
3984 self . assertEqual ( result , "<a>\n <b/>\n <c/>\n</a>\n" )
3985
3997
3999 class SubEl ( etree . ElementBase ) :
4000 pass
4001
4002 el1 = SubEl ( )
4003 el2 = SubEl ( )
4004 self . assertEqual ( 'SubEl' , el1 . tag )
4005 self . assertEqual ( 'SubEl' , el2 . tag )
4006 el1 . other = el2
4007 el2 . other = el1
4008
4009 del el1 , el2
4010 gc . collect ( )
4011
4012
4026
4028 root = etree . Element ( 'parent' )
4029 c1 = etree . SubElement ( root , 'child1' )
4030 c2 = etree . SubElement ( root , 'child2' )
4031
4032 root . remove ( c1 )
4033 root . remove ( c2 )
4034 c1 . addnext ( c2 )
4035 c1 . tail = 'abc'
4036 c2 . tail = 'xyz'
4037 del c1
4038
4039 c2 . getprevious ( )
4040
4041 self . assertEqual ( 'child1' , c2 . getprevious ( ) . tag )
4042 self . assertEqual ( 'abc' , c2 . getprevious ( ) . tail )
4043
4044
4045
4046 - def _writeElement ( self , element , encoding = 'us-ascii' , compression = 0 ) :
4057
4102
4103 res_instance = res ( )
4104 parser = etree . XMLParser ( load_dtd = True )
4105 parser . resolvers . add ( res_instance )
4106
4107 tree = etree . parse ( fileInTestDir ( 'include/test_xinclude.xml' ) ,
4108 parser = parser )
4109
4110 self . include ( tree )
4111
4112 called = list ( res_instance . called . items ( ) )
4113 called . sort ( )
4114 self . assertEqual (
4115 [ ( "dtd" , True ) , ( "include" , True ) , ( "input" , True ) ] ,
4116 called )
4117
4119 data = textwrap . dedent ( '''
4120 <doc xmlns:xi="http://www.w3.org/2001/XInclude">
4121 <foo/>
4122 <xi:include href="./test.xml" />
4123 </doc>
4124 ''' )
4125
4126 class Resolver ( etree . Resolver ) :
4127 called = { }
4128
4129 def resolve ( self , url , id , context ) :
4130 if url . endswith ( "test_xinclude.xml" ) :
4131 assert not self . called . get ( "input" )
4132 self . called [ "input" ] = True
4133 return None
4134 elif url . endswith ( '/test5.xml' ) :
4135 assert not self . called . get ( "DONE" )
4136 self . called [ "DONE" ] = True
4137 return self . resolve_string ( '<DONE/>' , context )
4138 else :
4139 _ , filename = url . rsplit ( '/' , 1 )
4140 assert not self . called . get ( filename )
4141 self . called [ filename ] = True
4142 next_data = data . replace (
4143 'test.xml' , 'test%d.xml' % len ( self . called ) )
4144 return self . resolve_string ( next_data , context )
4145
4146 res_instance = Resolver ( )
4147 parser = etree . XMLParser ( load_dtd = True )
4148 parser . resolvers . add ( res_instance )
4149
4150 tree = etree . parse ( fileInTestDir ( 'include/test_xinclude.xml' ) ,
4151 parser = parser )
4152
4153 self . include ( tree )
4154
4155 called = list ( res_instance . called . items ( ) )
4156 called . sort ( )
4157 self . assertEqual (
4158 [ ( "DONE" , True ) , ( "input" , True ) , ( "test.xml" , True ) ,
4159 ( "test2.xml" , True ) , ( "test3.xml" , True ) , ( "test4.xml" , True ) ] ,
4160 called )
4161
4166
4172
4176 tree = self . parse ( _bytes ( '<a><b/></a>' ) )
4177 f = BytesIO ( )
4178 tree . write_c14n ( f )
4179 s = f . getvalue ( )
4180 self . assertEqual ( _bytes ( '<a><b></b></a>' ) ,
4181 s )
4182
4184 tree = self . parse ( _bytes ( '<a>' + '<b/>' * 200 + '</a>' ) )
4185 f = BytesIO ( )
4186 tree . write_c14n ( f , compression = 9 )
4187 with closing ( gzip . GzipFile ( fileobj = BytesIO ( f . getvalue ( ) ) ) ) as gzfile :
4188 s = gzfile . read ( )
4189 self . assertEqual ( _bytes ( '<a>' + '<b></b>' * 200 + '</a>' ) ,
4190 s )
4191
4199
4208
4226
4238
4250
4252 tree = self . parse ( _bytes (
4253 '<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b/></a>' ) )
4254 f = BytesIO ( )
4255 tree . write_c14n ( f )
4256 s = f . getvalue ( )
4257 self . assertEqual ( _bytes ( '<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>' ) ,
4258 s )
4259 f = BytesIO ( )
4260 tree . write_c14n ( f , exclusive = False )
4261 s = f . getvalue ( )
4262 self . assertEqual ( _bytes ( '<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>' ) ,
4263 s )
4264 f = BytesIO ( )
4265 tree . write_c14n ( f , exclusive = True )
4266 s = f . getvalue ( )
4267 self . assertEqual ( _bytes ( '<a xmlns="http://abc"><z:b xmlns:z="http://cde"></z:b></a>' ) ,
4268 s )
4269
4270 f = BytesIO ( )
4271 tree . write_c14n ( f , exclusive = True , inclusive_ns_prefixes = [ 'z' ] )
4272 s = f . getvalue ( )
4273 self . assertEqual ( _bytes ( '<a xmlns="http://abc" xmlns:z="http://cde"><z:b></z:b></a>' ) ,
4274 s )
4275
4277 tree = self . parse ( _bytes (
4278 '<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b/></a>' ) )
4279 s = etree . tostring ( tree , method = 'c14n' )
4280 self . assertEqual ( _bytes ( '<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>' ) ,
4281 s )
4282 s = etree . tostring ( tree , method = 'c14n' , exclusive = False )
4283 self . assertEqual ( _bytes ( '<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>' ) ,
4284 s )
4285 s = etree . tostring ( tree , method = 'c14n' , exclusive = True )
4286 self . assertEqual ( _bytes ( '<a xmlns="http://abc"><z:b xmlns:z="http://cde"></z:b></a>' ) ,
4287 s )
4288
4289 s = etree . tostring ( tree , method = 'c14n' , exclusive = True , inclusive_ns_prefixes = [ 'y' ] )
4290 self . assertEqual ( _bytes ( '<a xmlns="http://abc" xmlns:y="http://bcd"><z:b xmlns:z="http://cde"></z:b></a>' ) ,
4291 s )
4292
4294 tree = self . parse ( _bytes (
4295 '<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b/></a>' ) )
4296 s = etree . tostring ( tree . getroot ( ) , method = 'c14n' )
4297 self . assertEqual ( _bytes ( '<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>' ) ,
4298 s )
4299 s = etree . tostring ( tree . getroot ( ) , method = 'c14n' , exclusive = False )
4300 self . assertEqual ( _bytes ( '<a xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>' ) ,
4301 s )
4302 s = etree . tostring ( tree . getroot ( ) , method = 'c14n' , exclusive = True )
4303 self . assertEqual ( _bytes ( '<a xmlns="http://abc"><z:b xmlns:z="http://cde"></z:b></a>' ) ,
4304 s )
4305
4306 s = etree . tostring ( tree . getroot ( ) [ 0 ] , method = 'c14n' , exclusive = False )
4307 self . assertEqual ( _bytes ( '<z:b xmlns="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"></z:b>' ) ,
4308 s )
4309 s = etree . tostring ( tree . getroot ( ) [ 0 ] , method = 'c14n' , exclusive = True )
4310 self . assertEqual ( _bytes ( '<z:b xmlns:z="http://cde"></z:b>' ) ,
4311 s )
4312
4313 s = etree . tostring ( tree . getroot ( ) [ 0 ] , method = 'c14n' , exclusive = True , inclusive_ns_prefixes = [ 'y' ] )
4314 self . assertEqual ( _bytes ( '<z:b xmlns:y="http://bcd" xmlns:z="http://cde"></z:b>' ) ,
4315 s )
4316
4318 """ Regression test to fix memory allocation issues (use 3+ inclusive NS spaces)"""
4319 tree = self . parse ( _bytes (
4320 '<a xmlns:x="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b/></a>' ) )
4321
4322 s = etree . tostring ( tree , method = 'c14n' , exclusive = True , inclusive_ns_prefixes = [ 'x' , 'y' , 'z' ] )
4323 self . assertEqual ( _bytes ( '<a xmlns:x="http://abc" xmlns:y="http://bcd" xmlns:z="http://cde"><z:b></z:b></a>' ) ,
4324 s )
4325
4329 tree = self . parse ( _bytes ( '<a><b/></a>' ) )
4330 f = BytesIO ( )
4331 tree . write ( f )
4332 s = f . getvalue ( )
4333 self . assertEqual ( _bytes ( '<a><b/></a>' ) ,
4334 s )
4335
4337 tree = self . parse ( _bytes ( '<a><b/></a>' ) )
4338 f = BytesIO ( )
4339 tree . write ( f , doctype = 'HUHU' )
4340 s = f . getvalue ( )
4341 self . assertEqual ( _bytes ( 'HUHU\n<a><b/></a>' ) ,
4342 s )
4343
4345 tree = self . parse ( _bytes ( '<a>' + '<b/>' * 200 + '</a>' ) )
4346 f = BytesIO ( )
4347 tree . write ( f , compression = 9 )
4348 with closing ( gzip . GzipFile ( fileobj = BytesIO ( f . getvalue ( ) ) ) ) as gzfile :
4349 s = gzfile . read ( )
4350 self . assertEqual ( _bytes ( '<a>' + '<b/>' * 200 + '</a>' ) ,
4351 s )
4352
4354 tree = self . parse ( _bytes ( '<a>' + '<b/>' * 200 + '</a>' ) )
4355 f = BytesIO ( )
4356 tree . write ( f , compression = 9 , doctype = '<!DOCTYPE a>' )
4357 with closing ( gzip . GzipFile ( fileobj = BytesIO ( f . getvalue ( ) ) ) ) as gzfile :
4358 s = gzfile . read ( )
4359 self . assertEqual ( _bytes ( '<!DOCTYPE a>\n<a>' + '<b/>' * 200 + '</a>' ) ,
4360 s )
4361
4363 tree = self . parse ( _bytes ( '<a>' + '<b/>' * 200 + '</a>' ) )
4364 f = BytesIO ( )
4365 tree . write ( f , compression = 0 )
4366 s0 = f . getvalue ( )
4367
4368 f = BytesIO ( )
4369 tree . write ( f )
4370 self . assertEqual ( f . getvalue ( ) , s0 )
4371
4372 f = BytesIO ( )
4373 tree . write ( f , compression = 1 )
4374 s = f . getvalue ( )
4375 self . assertTrue ( len ( s ) <= len ( s0 ) )
4376 with closing ( gzip . GzipFile ( fileobj = BytesIO ( s ) ) ) as gzfile :
4377 s1 = gzfile . read ( )
4378
4379 f = BytesIO ( )
4380 tree . write ( f , compression = 9 )
4381 s = f . getvalue ( )
4382 self . assertTrue ( len ( s ) <= len ( s0 ) )
4383 with closing ( gzip . GzipFile ( fileobj = BytesIO ( s ) ) ) as gzfile :
4384 s9 = gzfile . read ( )
4385
4386 self . assertEqual ( _bytes ( '<a>' + '<b/>' * 200 + '</a>' ) ,
4387 s0 )
4388 self . assertEqual ( _bytes ( '<a>' + '<b/>' * 200 + '</a>' ) ,
4389 s1 )
4390 self . assertEqual ( _bytes ( '<a>' + '<b/>' * 200 + '</a>' ) ,
4391 s9 )
4392
4400
4409
4417
4426
4429 etree = etree
4430
4452
4454 """This can't really be tested as long as there isn't a way to
4455 reset the logging setup ...
4456 """
4457 parse = self . etree . parse
4458
4459 messages = [ ]
4460 class Logger ( self . etree . PyErrorLog ) :
4461 def log ( self , entry , message , * args ) :
4462 messages . append ( message )
4463
4464 self . etree . use_global_python_log ( Logger ( ) )
4465 f = BytesIO ( '<a><b></c></b></a>' )
4466 try :
4467 parse ( f )
4468 except SyntaxError :
4469 pass
4470 f . close ( )
4471
4472 self . assertTrue ( [ message for message in messages
4473 if 'mismatch' in message ] )
4474 self . assertTrue ( [ message for message in messages
4475 if ':PARSER:' in message ] )
4476 self . assertTrue ( [ message for message in messages
4477 if ':ERR_TAG_NAME_MISMATCH:' in message ] )
4478 self . assertTrue ( [ message for message in messages
4479 if ':1:15:' in message ] )
4480
4483 etree = etree
4484
4488
4490 class Target ( object ) :
4491 def start ( self , tag , attrib ) :
4492 return 'start(%s)' % tag
4493 def end ( self , tag ) :
4494 return 'end(%s)' % tag
4495 def close ( self ) :
4496 return 'close()'
4497
4498 parser = self . etree . XMLPullParser ( target = Target ( ) )
4499 events = parser . read_events ( )
4500
4501 parser . feed ( '<root><element>' )
4502 self . assertFalse ( list ( events ) )
4503 self . assertFalse ( list ( events ) )
4504 parser . feed ( '</element><child>' )
4505 self . assertEqual ( [ ( 'end' , 'end(element)' ) ] , list ( events ) )
4506 parser . feed ( '</child>' )
4507 self . assertEqual ( [ ( 'end' , 'end(child)' ) ] , list ( events ) )
4508 parser . feed ( '</root>' )
4509 self . assertEqual ( [ ( 'end' , 'end(root)' ) ] , list ( events ) )
4510 self . assertFalse ( list ( events ) )
4511 self . assertEqual ( 'close()' , parser . close ( ) )
4512
4514 class Target ( object ) :
4515 def start ( self , tag , attrib ) :
4516 return 'start(%s)' % tag
4517 def end ( self , tag ) :
4518 return 'end(%s)' % tag
4519 def close ( self ) :
4520 return 'close()'
4521
4522 parser = self . etree . XMLPullParser (
4523 [ 'start' , 'end' ] , target = Target ( ) )
4524 events = parser . read_events ( )
4525
4526 parser . feed ( '<root><element>' )
4527 self . assertEqual (
4528 [ ( 'start' , 'start(root)' ) , ( 'start' , 'start(element)' ) ] ,
4529 list ( events ) )
4530 self . assertFalse ( list ( events ) )
4531 parser . feed ( '</element><child>' )
4532 self . assertEqual (
4533 [ ( 'end' , 'end(element)' ) , ( 'start' , 'start(child)' ) ] ,
4534 list ( events ) )
4535 parser . feed ( '</child>' )
4536 self . assertEqual (
4537 [ ( 'end' , 'end(child)' ) ] ,
4538 list ( events ) )
4539 parser . feed ( '</root>' )
4540 self . assertEqual (
4541 [ ( 'end' , 'end(root)' ) ] ,
4542 list ( events ) )
4543 self . assertFalse ( list ( events ) )
4544 self . assertEqual ( 'close()' , parser . close ( ) )
4545
4547 parser = self . etree . XMLPullParser (
4548 [ 'start' , 'end' ] , target = etree . TreeBuilder ( ) )
4549 events = parser . read_events ( )
4550
4551 parser . feed ( '<root><element>' )
4552 self . assert_event_tags (
4553 events , [ ( 'start' , 'root' ) , ( 'start' , 'element' ) ] )
4554 self . assertFalse ( list ( events ) )
4555 parser . feed ( '</element><child>' )
4556 self . assert_event_tags (
4557 events , [ ( 'end' , 'element' ) , ( 'start' , 'child' ) ] )
4558 parser . feed ( '</child>' )
4559 self . assert_event_tags (
4560 events , [ ( 'end' , 'child' ) ] )
4561 parser . feed ( '</root>' )
4562 self . assert_event_tags (
4563 events , [ ( 'end' , 'root' ) ] )
4564 self . assertFalse ( list ( events ) )
4565 root = parser . close ( )
4566 self . assertEqual ( 'root' , root . tag )
4567
4569 class Target ( etree . TreeBuilder ) :
4570 def end ( self , tag ) :
4571 el = super ( Target , self ) . end ( tag )
4572 el . tag += '-huhu'
4573 return el
4574
4575 parser = self . etree . XMLPullParser (
4576 [ 'start' , 'end' ] , target = Target ( ) )
4577 events = parser . read_events ( )
4578
4579 parser . feed ( '<root><element>' )
4580 self . assert_event_tags (
4581 events , [ ( 'start' , 'root' ) , ( 'start' , 'element' ) ] )
4582 self . assertFalse ( list ( events ) )
4583 parser . feed ( '</element><child>' )
4584 self . assert_event_tags (
4585 events , [ ( 'end' , 'element-huhu' ) , ( 'start' , 'child' ) ] )
4586 parser . feed ( '</child>' )
4587 self . assert_event_tags (
4588 events , [ ( 'end' , 'child-huhu' ) ] )
4589 parser . feed ( '</root>' )
4590 self . assert_event_tags (
4591 events , [ ( 'end' , 'root-huhu' ) ] )
4592 self . assertFalse ( list ( events ) )
4593 root = parser . close ( )
4594 self . assertEqual ( 'root-huhu' , root . tag )
4595
4627
4628
4629 if __name__ == '__main__' :
4630 print ( 'to test use test.py %s' % __file__ )
4631