lxml.tests.test_xpathevaluator
1
2
3 """
4 Test cases related to XPath evaluation and the XPath class
5 """
6
7 import unittest , sys , os . path
8
9 this_dir = os . path . dirname ( __file__ )
10 if this_dir not in sys . path :
11 sys . path . insert ( 0 , this_dir )
12
13 from common_imports import etree , HelperTestCase , _bytes , BytesIO
14 from common_imports import doctest , make_doctest
15
17 """XPath tests etree"""
18
20 tree = self . parse ( '<a><b></b><b></b></a>' )
21 self . assertTrue ( tree . xpath ( 'boolean(/a/b)' ) )
22 self . assertTrue ( not tree . xpath ( 'boolean(/a/c)' ) )
23
25 tree = self . parse ( '<a>1</a>' )
26 self . assertEqual ( 1. ,
27 tree . xpath ( 'number(/a)' ) )
28 tree = self . parse ( '<a>A</a>' )
29 actual = str ( tree . xpath ( 'number(/a)' ) )
30 expected = [ 'nan' , '1.#qnan' , 'nanq' ]
31 if not actual . lower ( ) in expected :
32 self . fail ( 'Expected a NAN value, got %s' % actual )
33
35 tree = self . parse ( '<a>Foo</a>' )
36 self . assertEqual ( 'Foo' ,
37 tree . xpath ( 'string(/a/text())' ) )
38
40 tree = self . parse ( '<a><b/></a>' )
41 self . assertEqual ( [ ] ,
42 tree . xpath ( '/' ) )
43
45 tree = self . parse ( '<a xmlns="test" xmlns:p="myURI"/>' )
46 self . assertTrue ( ( None , "test" ) in tree . xpath ( 'namespace::*' ) )
47 self . assertTrue ( ( 'p' , 'myURI' ) in tree . xpath ( 'namespace::*' ) )
48
50 tree = self . parse ( '<a/>' )
51 self . assertEqual ( [ ( 'xml' , 'http://www.w3.org/XML/1998/namespace' ) ] ,
52 tree . xpath ( 'namespace::*' ) )
53
59
61 tree = self . parse ( '<a><b/></a>' )
62 self . assertEqual ( [ ] ,
63 tree . xpath ( '/a/c' ) )
64
65 self . assertEqual ( [ ] ,
66 tree . xpath ( '/a/c/text()' ) )
67
69 tree = self . parse ( '<a><b>Foo</b><b>Bar</b></a>' )
70 root = tree . getroot ( )
71 self . assertEqual ( [ 'Foo' , 'Bar' ] ,
72 tree . xpath ( '/a/b/text()' ) )
73
75 tree = self . parse ( '<a><b>FooBar</b><b>BarFoo</b></a>' )
76 root = tree . getroot ( )
77 self . assertEqual ( [ 'FooBar' , 'BarFoo' ] ,
78 tree . xpath ( '/a/b/text()' ) )
79 self . assertEqual ( [ root [ 0 ] , root [ 1 ] ] ,
80 [ r . getparent ( ) for r in tree . xpath ( '/a/b/text()' ) ] )
81
83 tree = self . parse ( '<a><b>FooBar</b><b>BarFoo</b></a>' )
84 root = tree . getroot ( )
85 self . assertEqual ( [ 'FooBar' , 'BarFoo' ] ,
86 tree . xpath ( '/a/b/text()' , smart_strings = True ) )
87 self . assertEqual ( [ root [ 0 ] , root [ 1 ] ] ,
88 [ r . getparent ( ) for r in
89 tree . xpath ( '/a/b/text()' , smart_strings = True ) ] )
90 self . assertEqual ( [ None , None ] ,
91 [ r . attrname for r in
92 tree . xpath ( '/a/b/text()' , smart_strings = True ) ] )
93
94 self . assertEqual ( [ 'FooBar' , 'BarFoo' ] ,
95 tree . xpath ( '/a/b/text()' , smart_strings = False ) )
96 self . assertEqual ( [ False , False ] ,
97 [ hasattr ( r , 'getparent' ) for r in
98 tree . xpath ( '/a/b/text()' , smart_strings = False ) ] )
99 self . assertEqual ( [ None , None ] ,
100 [ r . attrname for r in
101 tree . xpath ( '/a/b/text()' , smart_strings = True ) ] )
102
104 xml = _bytes ( '<a><b>FooBar\\u0680\\u3120</b><b>BarFoo\\u0680\\u3120</b></a>' ) . decode ( "unicode_escape" )
105 tree = self . parse ( xml . encode ( 'utf-8' ) )
106 root = tree . getroot ( )
107 self . assertEqual ( [ _bytes ( 'FooBar\\u0680\\u3120' ) . decode ( "unicode_escape" ) ,
108 _bytes ( 'BarFoo\\u0680\\u3120' ) . decode ( "unicode_escape" ) ] ,
109 tree . xpath ( '/a/b/text()' ) )
110 self . assertEqual ( [ root [ 0 ] , root [ 1 ] ] ,
111 [ r . getparent ( ) for r in tree . xpath ( '/a/b/text()' ) ] )
112
114 tree = self . parse ( '<a b="B" c="C"/>' )
115 self . assertEqual ( [ 'B' ] ,
116 tree . xpath ( '/a/@b' ) )
117
119 tree = self . parse ( '<a b="BaSdFgHjKl" c="CqWeRtZuI"/>' )
120 results = tree . xpath ( '/a/@c' )
121 self . assertEqual ( 1 , len ( results ) )
122 self . assertEqual ( 'CqWeRtZuI' , results [ 0 ] )
123 self . assertEqual ( tree . getroot ( ) . tag , results [ 0 ] . getparent ( ) . tag )
124
126 tree = self . parse ( '<a b="BaSdFgHjKl" c="CqWeRtZuI"/>' )
127
128 results = tree . xpath ( '/a/@c' , smart_strings = True )
129 self . assertEqual ( 1 , len ( results ) )
130 self . assertEqual ( 'CqWeRtZuI' , results [ 0 ] )
131 self . assertEqual ( 'c' , results [ 0 ] . attrname )
132 self . assertEqual ( tree . getroot ( ) . tag , results [ 0 ] . getparent ( ) . tag )
133
134 results = tree . xpath ( '/a/@c' , smart_strings = False )
135 self . assertEqual ( 1 , len ( results ) )
136 self . assertEqual ( 'CqWeRtZuI' , results [ 0 ] )
137 self . assertEqual ( False , hasattr ( results [ 0 ] , 'getparent' ) )
138 self . assertEqual ( False , hasattr ( results [ 0 ] , 'attrname' ) )
139
141 xml_data = '''
142 <table>
143 <item xml:id="k1"><value>v1</value></item>
144 <item xml:id="k2"><value>v2</value></item>
145 </table>
146 '''
147
148 def lookup ( dummy , id ) :
149 return etree . XML ( xml_data ) . xpath ( 'id(%r)' % id )
150 functions = { ( None , 'lookup' ) : lookup }
151
152 root = etree . XML ( '<dummy/>' )
153 values = root . xpath ( "lookup('k1')/value/text()" ,
154 extensions = functions )
155 self . assertEqual ( [ 'v1' ] , values )
156 self . assertEqual ( 'value' , values [ 0 ] . getparent ( ) . tag )
157
162
164 root = etree . XML ( '<a><b><c/></b></a>' )
165 el = root [ 0 ]
166 self . assertTrue ( el . xpath ( 'boolean(c)' ) )
167 self . assertTrue ( not el . xpath ( 'boolean(d)' ) )
168
170 tree = self . parse ( '<a><c><b>Foo</b><b>Bar</b></c><c><b>Hey</b></c></a>' )
171 root = tree . getroot ( )
172 c = root [ 0 ]
173 self . assertEqual ( [ c [ 0 ] , c [ 1 ] ] ,
174 c . xpath ( 'b' ) )
175 self . assertEqual ( [ c [ 0 ] , c [ 1 ] , root [ 1 ] [ 0 ] ] ,
176 c . xpath ( '//b' ) )
177
179 tree = self . parse ( '<a xmlns="uri:a"><b></b></a>' )
180 root = tree . getroot ( )
181 self . assertEqual (
182 [ root [ 0 ] ] ,
183 tree . xpath ( '//foo:b' , namespaces = { 'foo' : 'uri:a' } ) )
184 self . assertEqual (
185 [ ] ,
186 tree . xpath ( '//foo:b' , namespaces = { 'foo' : 'uri:c' } ) )
187 self . assertEqual (
188 [ root [ 0 ] ] ,
189 root . xpath ( '//baz:b' , namespaces = { 'baz' : 'uri:a' } ) )
190
192 tree = self . parse ( '<a xmlns="uri:a"><b></b></a>' )
193 root = tree . getroot ( )
194 self . assertRaises (
195 TypeError ,
196 root . xpath , '//b' , namespaces = { None : 'uri:a' } )
197
199 tree = self . parse ( '<a xmlns="uri:a"><b></b></a>' )
200 root = tree . getroot ( )
201 self . assertRaises (
202 TypeError ,
203 root . xpath , '//b' , namespaces = { '' : 'uri:a' } )
204
208
212
216
221
234
247
255
267
282
290
292 def foo ( evaluator , a ) :
293 return 'hello %s' % a
294 extension = { ( None , 'foo' ) : foo }
295 tree = self . parse ( '<a><b></b></a>' )
296 e = etree . XPathEvaluator ( tree , extensions = [ extension ] )
297 self . assertEqual (
298 "hello you" , e ( "foo('you')" ) )
299
301 def foo ( evaluator , a , b ) :
302 return "hello %s and %s" % ( a , b )
303 extension = { ( None , 'foo' ) : foo }
304 tree = self . parse ( '<a><b></b></a>' )
305 e = etree . XPathEvaluator ( tree , extensions = [ extension ] )
306 self . assertRaises ( TypeError , e , "foo('you')" )
307
309 def foo ( evaluator , a ) :
310 return 1 / 0
311 extension = { ( None , 'foo' ) : foo }
312 tree = self . parse ( '<a/>' )
313 e = etree . XPathEvaluator ( tree , extensions = [ extension ] )
314 self . assertRaises ( ZeroDivisionError , e , "foo('test')" )
315
324
325 x = self . parse ( '<a/>' )
326 e = etree . XPathEvaluator ( x , extensions = [ { ( None , 'foo' ) : f } ] )
327 r = e ( "foo('World')/result" )
328 self . assertEqual ( 2 , len ( r ) )
329 self . assertEqual ( 'Hoi' , r [ 0 ] . text )
330 self . assertEqual ( 'Dag' , r [ 1 ] . text )
331
340
341 x = self . parse ( '<a/>' )
342 e = etree . XPathEvaluator ( x , extensions = [ { ( None , 'foo' ) : f } ] )
343 r = e ( "foo(/*)/result" )
344 self . assertEqual ( 2 , len ( r ) )
345 self . assertEqual ( 'Hoi' , r [ 0 ] . text )
346 self . assertEqual ( 'Dag' , r [ 1 ] . text )
347
357
358 x = self . parse ( '<result>Honk</result>' )
359 e = etree . XPathEvaluator ( x , extensions = [ { ( None , 'foo' ) : f } ] )
360 r = e ( "foo(/*)/result" )
361 self . assertEqual ( 3 , len ( r ) )
362 self . assertEqual ( 'Hoi' , r [ 0 ] . text )
363 self . assertEqual ( 'Dag' , r [ 1 ] . text )
364 self . assertEqual ( 'Honk' , r [ 2 ] . text )
365
367 tree = self . parse ( '<root><a/><b><c/></b></root>' )
368
369 check_call = [ ]
370 def check_context ( ctxt , nodes ) :
371 self . assertEqual ( len ( nodes ) , 1 )
372 check_call . append ( nodes [ 0 ] . tag )
373 self . assertEqual ( ctxt . context_node , nodes [ 0 ] )
374 return True
375
376 find = etree . XPath ( "//*[p:foo(.)]" ,
377 namespaces = { 'p' : 'ns' } ,
378 extensions = [ { ( 'ns' , 'foo' ) : check_context } ] )
379 find ( tree )
380
381 check_call . sort ( )
382 self . assertEqual ( check_call , [ "a" , "b" , "c" , "root" ] )
383
385 tree = self . parse ( '<root><a/><b><c/></b></root>' )
386
387 check_call = { }
388 def check_context ( ctxt , nodes ) :
389 self . assertEqual ( len ( nodes ) , 1 )
390 tag = nodes [ 0 ] . tag
391
392 check_call [ tag ] = ctxt . eval_context . get ( "b" )
393 ctxt . eval_context [ tag ] = tag
394 return True
395
396 find = etree . XPath ( "//b[p:foo(.)]/c[p:foo(.)]" ,
397 namespaces = { 'p' : 'ns' } ,
398 extensions = [ { ( 'ns' , 'foo' ) : check_context } ] )
399 result = find ( tree )
400
401 self . assertEqual ( result , [ tree . getroot ( ) [ 1 ] [ 0 ] ] )
402 self . assertEqual ( check_call , { 'b' : None , 'c' : 'b' } )
403
405 tree = self . parse ( '<root><a/><b><c/></b></root>' )
406
407 check_call = { }
408 def check_context ( ctxt ) :
409 check_call [ "done" ] = True
410
411 self . assertEqual ( len ( ctxt . eval_context ) , 0 )
412 ctxt . eval_context [ "test" ] = True
413 return True
414
415 find = etree . XPath ( "//b[p:foo()]" ,
416 namespaces = { 'p' : 'ns' } ,
417 extensions = [ { ( 'ns' , 'foo' ) : check_context } ] )
418 result = find ( tree )
419
420 self . assertEqual ( result , [ tree . getroot ( ) [ 1 ] ] )
421 self . assertEqual ( check_call [ "done" ] , True )
422
423 check_call . clear ( )
424 find = etree . XPath ( "//b[p:foo()]" ,
425 namespaces = { 'p' : 'ns' } ,
426 extensions = [ { ( 'ns' , 'foo' ) : check_context } ] )
427 result = find ( tree )
428
429 self . assertEqual ( result , [ tree . getroot ( ) [ 1 ] ] )
430 self . assertEqual ( check_call [ "done" ] , True )
431
433 x = self . parse ( '<a attr="true"/>' )
434 e = etree . XPathEvaluator ( x )
435
436 expr = "/a[@attr=$aval]"
437 r = e ( expr , aval = 1 )
438 self . assertEqual ( 0 , len ( r ) )
439
440 r = e ( expr , aval = "true" )
441 self . assertEqual ( 1 , len ( r ) )
442 self . assertEqual ( "true" , r [ 0 ] . get ( 'attr' ) )
443
444 r = e ( expr , aval = True )
445 self . assertEqual ( 1 , len ( r ) )
446 self . assertEqual ( "true" , r [ 0 ] . get ( 'attr' ) )
447
459
461 x = self . parse ( '<a attr="true"><test/></a>' )
462
463 class LocalException ( Exception ) :
464 pass
465
466 def foo ( evaluator , a , varval ) :
467 etree . Element ( "DUMMY" )
468 if varval == 0 :
469 raise LocalException
470 elif varval == 1 :
471 return ( )
472 elif varval == 2 :
473 return None
474 elif varval == 3 :
475 return a [ 0 ] [ 0 ]
476 a = a [ 0 ]
477 if a . get ( "attr" ) == str ( varval ) :
478 return a
479 else :
480 return etree . Element ( "NODE" )
481
482 extension = { ( None , 'foo' ) : foo }
483 e = etree . XPathEvaluator ( x , extensions = [ extension ] )
484 del x
485
486 self . assertRaises ( LocalException , e , "foo(., 0)" )
487 self . assertRaises ( LocalException , e , "foo(., $value)" , value = 0 )
488
489 r = e ( "foo(., $value)" , value = 1 )
490 self . assertEqual ( len ( r ) , 0 )
491
492 r = e ( "foo(., 1)" )
493 self . assertEqual ( len ( r ) , 0 )
494
495 r = e ( "foo(., $value)" , value = 2 )
496 self . assertEqual ( len ( r ) , 0 )
497
498 r = e ( "foo(., $value)" , value = 3 )
499 self . assertEqual ( len ( r ) , 1 )
500 self . assertEqual ( r [ 0 ] . tag , "test" )
501
502 r = e ( "foo(., $value)" , value = "false" )
503 self . assertEqual ( len ( r ) , 1 )
504 self . assertEqual ( r [ 0 ] . tag , "NODE" )
505
506 r = e ( "foo(., 'false')" )
507 self . assertEqual ( len ( r ) , 1 )
508 self . assertEqual ( r [ 0 ] . tag , "NODE" )
509
510 r = e ( "foo(., 'true')" )
511 self . assertEqual ( len ( r ) , 1 )
512 self . assertEqual ( r [ 0 ] . tag , "a" )
513 self . assertEqual ( r [ 0 ] [ 0 ] . tag , "test" )
514
515 r = e ( "foo(., $value)" , value = "true" )
516 self . assertEqual ( len ( r ) , 1 )
517 self . assertEqual ( r [ 0 ] . tag , "a" )
518
519 self . assertRaises ( LocalException , e , "foo(., 0)" )
520 self . assertRaises ( LocalException , e , "foo(., $value)" , value = 0 )
521
522
524 "Tests for the XPath class"
526 x = self . parse ( '<a attr="true"/>' )
527
528 expr = etree . XPath ( "/a[@attr != 'true']" )
529 r = expr ( x )
530 self . assertEqual ( 0 , len ( r ) )
531
532 expr = etree . XPath ( "/a[@attr = 'true']" )
533 r = expr ( x )
534 self . assertEqual ( 1 , len ( r ) )
535
536 expr = etree . XPath ( expr . path )
537 r = expr ( x )
538 self . assertEqual ( 1 , len ( r ) )
539
541 x = self . parse ( '<a><b/><c/></a>' )
542 root = x . getroot ( )
543
544 expr = etree . XPath ( "./b" )
545 r = expr ( root )
546 self . assertEqual ( 1 , len ( r ) )
547 self . assertEqual ( 'b' , r [ 0 ] . tag )
548
549 expr = etree . XPath ( "./*" )
550 r = expr ( root )
551 self . assertEqual ( 2 , len ( r ) )
552
554 x = self . parse ( '<a attr="true"/>' )
555
556 expr = etree . XPath ( "/a[@attr=$aval]" )
557 r = expr ( x , aval = False )
558 self . assertEqual ( 0 , len ( r ) )
559
560 r = expr ( x , aval = True )
561 self . assertEqual ( 1 , len ( r ) )
562
564 self . assertRaises ( SyntaxError , etree . XPath , '\\fad' )
565
568
569
571 "Tests for the EXSLT support in XPath (requires libxslt 1.1.25+)"
572
573 NSMAP = dict (
574 date = "http://exslt.org/dates-and-times" ,
575 math = "http://exslt.org/math" ,
576 set = "http://exslt.org/sets" ,
577 str = "http://exslt.org/strings" ,
578 )
579
581 tree = self . parse ( '<a><b>2009-11-12</b><b>2008-12-11</b></a>' )
582
583 match_dates = tree . xpath ( '//b[date:year(string()) = 2009]' ,
584 namespaces = self . NSMAP )
585 self . assertTrue ( match_dates , str ( match_dates ) )
586 self . assertEqual ( len ( match_dates ) , 1 , str ( match_dates ) )
587 self . assertEqual ( match_dates [ 0 ] . text , '2009-11-12' )
588
590 tree = self . parse ( '<a><b>2009-11-12</b><b>2008-12-11</b></a>' )
591
592 aligned_date = tree . xpath (
593 'str:align(string(//b[1]), "%s", "center")' % ( '-' * 20 ) ,
594 namespaces = self . NSMAP )
595 self . assertTrue ( aligned_date , str ( aligned_date ) )
596 self . assertEqual ( aligned_date , '-----2009-11-12-----' )
597
598
600 "Tests for the ETXPath class"
602 x = self . parse ( '<a><b xmlns="nsa"/><b xmlns="nsb"/></a>' )
603
604 expr = etree . ETXPath ( "/a/{nsa}b" )
605 r = expr ( x )
606 self . assertEqual ( 1 , len ( r ) )
607 self . assertEqual ( '{nsa}b' , r [ 0 ] . tag )
608
609 expr = etree . ETXPath ( "/a/{nsb}b" )
610 r = expr ( x )
611 self . assertEqual ( 1 , len ( r ) )
612 self . assertEqual ( '{nsb}b' , r [ 0 ] . tag )
613
614
615
617 x = self . parse ( _bytes ( '<a><b xmlns="http://nsa/\\uf8d2"/><b xmlns="http://nsb/\\uf8d1"/></a>'
618 ) . decode ( "unicode_escape" ) )
619
620 expr = etree . ETXPath ( _bytes ( "/a/{http://nsa/\\uf8d2}b" ) . decode ( "unicode_escape" ) )
621 r = expr ( x )
622 self . assertEqual ( 1 , len ( r ) )
623 self . assertEqual ( _bytes ( '{http://nsa/\\uf8d2}b' ) . decode ( "unicode_escape" ) , r [ 0 ] . tag )
624
625 expr = etree . ETXPath ( _bytes ( "/a/{http://nsb/\\uf8d1}b" ) . decode ( "unicode_escape" ) )
626 r = expr ( x )
627 self . assertEqual ( 1 , len ( r ) )
628 self . assertEqual ( _bytes ( '{http://nsb/\\uf8d1}b' ) . decode ( "unicode_escape" ) , r [ 0 ] . tag )
629
630 SAMPLE_XML = etree . parse ( BytesIO ( """
631 <body>
632 <tag>text</tag>
633 <section>
634 <tag>subtext</tag>
635 </section>
636 <tag />
637 <tag />
638 </body>
639 """ ) )
640
643
645 return getattr ( elem , 'tag' , elem )
646
649
651 return [ "Hello " ] + list ( s1 ) + [ "!" ]
652
655
658
661
664
666 return ", " . join ( map ( str , ( s , f , b , list ( map ( tag , st ) ) ) ) )
667
669 st1 . extend ( st2 )
670 return st1
671
674
677
678 uri = "http://www.example.com/"
679
680 extension = { ( None , 'stringTest' ) : stringTest ,
681 ( None , 'stringListTest' ) : stringListTest ,
682 ( None , 'floatTest' ) : floatTest ,
683 ( None , 'booleanTest' ) : booleanTest ,
684 ( None , 'setTest' ) : setTest ,
685 ( None , 'setTest2' ) : setTest2 ,
686 ( None , 'argsTest1' ) : argsTest1 ,
687 ( None , 'argsTest2' ) : argsTest2 ,
688 ( None , 'resultTypesTest' ) : resultTypesTest ,
689 ( None , 'resultTypesTest2' ) : resultTypesTest2 , }
690
692 """
693 Test xpath extension functions.
694
695 >>> root = SAMPLE_XML
696 >>> e = etree.XPathEvaluator(root, extensions=[extension])
697 >>> e("stringTest('you')")
698 'Hello you'
699 >>> e(_bytes("stringTest('\\\\xe9lan')").decode("unicode_escape"))
700 u'Hello \\xe9lan'
701 >>> e("stringTest('you','there')") #doctest: +ELLIPSIS
702 Traceback (most recent call last):
703 ...
704 TypeError: stringTest() takes... 2 ...arguments ...
705 >>> e("floatTest(2)")
706 6.0
707 >>> e("booleanTest(true())")
708 False
709 >>> list(map(tag, e("setTest(/body/tag)")))
710 ['tag']
711 >>> list(map(tag, e("setTest2(/body/*)")))
712 ['tag', 'section']
713 >>> list(map(tag_or_value, e("stringListTest(/body/tag)")))
714 ['Hello ', 'tag', 'tag', 'tag', '!']
715 >>> e("argsTest1('a',1.5,true(),/body/tag)")
716 "a, 1.5, True, ['tag', 'tag', 'tag']"
717 >>> list(map(tag, e("argsTest2(/body/tag, /body/section)")))
718 ['tag', 'section', 'tag', 'tag']
719 >>> e("resultTypesTest()")
720 Traceback (most recent call last):
721 ...
722 XPathResultError: This is not a supported node-set result: None
723 >>> try:
724 ... e("resultTypesTest2()")
725 ... except etree.XPathResultError:
726 ... print("Got error")
727 Got error
728 """
729
730 if sys . version_info [ 0 ] >= 3 :
731 xpath . __doc__ = xpath . __doc__ . replace ( " u'" , " '" )
732 xpath . __doc__ = xpath . __doc__ . replace ( " XPathResultError" ,
733 " lxml.etree.XPathResultError" )
734 xpath . __doc__ = xpath . __doc__ . replace ( " exactly 2 arguments" ,
735 " exactly 2 positional arguments" )
736
748
749 if __name__ == '__main__' :
750 print ( 'to test use test.py %s' % __file__ )
751