lxml.tests.test_dtd
Package lxml :: Package tests :: Module test_dtd
[hide private]
[frames] | no frames]

Source Code for Module lxml.tests.test_dtd

  1  # -*- coding: utf-8 -*- 
  2   
  3  """ 
  4  Test cases related to DTD parsing and validation 
  5  """ 
  6   
  7  import unittest, sys, os.path 
  8   
  9  this_dir = os.path.dirname(__file__) 
 10  if this_dir not in sys.path: 
 11      sys.path.insert(0, this_dir)  # needed for Py3 
 12   
 13  from common_imports import etree, html, BytesIO, _bytes, _str 
 14  from common_imports import HelperTestCase, make_doctest, skipIf 
 15  from common_imports import fileInTestDir, fileUrlInTestDir 
16 17 18 -class ETreeDtdTestCase(HelperTestCase):
19 - def test_dtd(self):
20 pass
21
22 - def test_dtd_file(self):
23 parse = etree.parse 24 tree = parse(fileInTestDir("test.xml")) 25 root = tree.getroot() 26 27 dtd = etree.DTD(fileInTestDir("test.dtd")) 28 self.assertTrue(dtd.validate(root))
29
30 - def test_dtd_stringio(self):
31 root = etree.XML(_bytes("<b/>")) 32 dtd = etree.DTD(BytesIO("<!ELEMENT b EMPTY>")) 33 self.assertTrue(dtd.validate(root))
34
35 - def test_dtd_parse_invalid(self):
36 fromstring = etree.fromstring 37 parser = etree.XMLParser(dtd_validation=True) 38 xml = _bytes('<!DOCTYPE b SYSTEM "%s"><b><a/></b>' % 39 fileInTestDir("test.dtd")) 40 self.assertRaises(etree.XMLSyntaxError, 41 fromstring, xml, parser=parser)
42
44 fromstring = etree.fromstring 45 dtd_filename = fileUrlInTestDir("__nosuch.dtd") 46 parser = etree.XMLParser(dtd_validation=True) 47 xml = _bytes('<!DOCTYPE b SYSTEM "%s"><b><a/></b>' % dtd_filename) 48 self.assertRaises(etree.XMLSyntaxError, 49 fromstring, xml, parser=parser) 50 errors = None 51 try: 52 fromstring(xml, parser=parser) 53 except etree.XMLSyntaxError: 54 e = sys.exc_info()[1] 55 self.assertTrue(e.error_log) 56 self.assertTrue(parser.error_log) 57 errors = [entry.message for entry in e.error_log 58 if dtd_filename in entry.message] 59 self.assertTrue(errors)
60
61 - def test_dtd_parse_valid(self):
62 parser = etree.XMLParser(dtd_validation=True) 63 xml = ('<!DOCTYPE a SYSTEM "%s"><a><b/></a>' % 64 fileUrlInTestDir("test.dtd")) 65 root = etree.fromstring(xml, parser=parser)
66
68 parser = etree.XMLParser(dtd_validation=True) 69 xml = ('<!DOCTYPE a SYSTEM "%s"><a><b/></a>' % 70 fileUrlInTestDir("test.dtd")) 71 root = etree.fromstring(xml, parser=parser)
72
74 parser = etree.XMLParser(dtd_validation=True) 75 xml = '<!DOCTYPE a SYSTEM "test.dtd"><a><b/></a>' 76 root = etree.fromstring( 77 xml, parser=parser, base_url=fileUrlInTestDir("test.xml"))
78
80 parser = etree.XMLParser(dtd_validation=True) 81 xml = '<!DOCTYPE a SYSTEM "test.dtd"><a><b/></a>' 82 root = etree.fromstring( 83 xml, parser=parser, base_url=fileUrlInTestDir("test.xml"))
84
85 - def test_dtd_invalid(self):
86 root = etree.XML("<b><a/></b>") 87 dtd = etree.DTD(BytesIO("<!ELEMENT b EMPTY>")) 88 self.assertRaises(etree.DocumentInvalid, dtd.assertValid, root)
89
90 - def test_dtd_assertValid(self):
91 root = etree.XML("<b><a/></b>") 92 dtd = etree.DTD(BytesIO("<!ELEMENT b (a)><!ELEMENT a EMPTY>")) 93 dtd.assertValid(root)
94
95 - def test_dtd_internal(self):
96 root = etree.XML(_bytes(''' 97 <!DOCTYPE b SYSTEM "none" [ 98 <!ELEMENT b (a)> 99 <!ELEMENT a EMPTY> 100 ]> 101 <b><a/></b> 102 ''')) 103 dtd = etree.ElementTree(root).docinfo.internalDTD 104 self.assertTrue(dtd) 105 dtd.assertValid(root)
106
108 root = etree.XML(_bytes(''' 109 <!DOCTYPE b SYSTEM "none" [ 110 <!ELEMENT b (a)> 111 <!ELEMENT a (c)> 112 <!ELEMENT c EMPTY> 113 ]> 114 <b><a/></b> 115 ''')) 116 dtd = etree.ElementTree(root).docinfo.internalDTD 117 self.assertTrue(dtd) 118 self.assertFalse(dtd.validate(root))
119
121 root = etree.XML(_bytes(''' 122 <a><b id="id1"/><b id="id2"/><b id="id1"/></a> 123 ''')) 124 dtd = etree.DTD(BytesIO(_bytes(""" 125 <!ELEMENT a (b*)> 126 <!ATTLIST b 127 id ID #REQUIRED 128 > 129 <!ELEMENT b EMPTY> 130 """))) 131 self.assertFalse(dtd.validate(root)) 132 self.assertTrue(dtd.error_log) 133 self.assertTrue([error for error in dtd.error_log 134 if 'id1' in error.message])
135
136 - def test_dtd_api_internal(self):
137 root = etree.XML(_bytes(''' 138 <!DOCTYPE b SYSTEM "none" [ 139 <!ATTLIST a 140 attr1 (x | y | z) "z" 141 attr2 CDATA #FIXED "X" 142 > 143 <!ELEMENT b (a)> 144 <!ELEMENT a EMPTY> 145 ]> 146 <b><a/></b> 147 ''')) 148 dtd = etree.ElementTree(root).docinfo.internalDTD 149 self.assertTrue(dtd) 150 dtd.assertValid(root) 151 152 seen = [] 153 for el in dtd.iterelements(): 154 if el.name == 'a': 155 self.assertEqual(2, len(el.attributes())) 156 for attr in el.iterattributes(): 157 if attr.name == 'attr1': 158 self.assertEqual('enumeration', attr.type) 159 self.assertEqual('none', attr.default) 160 self.assertEqual('z', attr.default_value) 161 values = attr.values() 162 values.sort() 163 self.assertEqual(['x', 'y', 'z'], values) 164 else: 165 self.assertEqual('attr2', attr.name) 166 self.assertEqual('cdata', attr.type) 167 self.assertEqual('fixed', attr.default) 168 self.assertEqual('X', attr.default_value) 169 else: 170 self.assertEqual('b', el.name) 171 self.assertEqual(0, len(el.attributes())) 172 seen.append(el.name) 173 seen.sort() 174 self.assertEqual(['a', 'b'], seen) 175 self.assertEqual(2, len(dtd.elements()))
176
177 - def test_internal_dtds(self):
178 for el_count in range(2, 5): 179 for attr_count in range(4): 180 root = etree.XML(_bytes(''' 181 <!DOCTYPE el0 SYSTEM "none" [ 182 ''' + ''.join([''' 183 <!ATTLIST el%d 184 attr%d (x | y | z) "z" 185 > 186 ''' % (e, a) for a in range(attr_count) for e in range(el_count) 187 ]) + ''.join([''' 188 <!ELEMENT el%d EMPTY> 189 ''' % e for e in range(1, el_count) 190 ]) + ''' 191 ''' + '<!ELEMENT el0 (%s)>' % '|'.join([ 192 'el%d' % e for e in range(1, el_count)]) + ''' 193 ]> 194 <el0><el1 %s /></el0> 195 ''' % ' '.join(['attr%d="x"' % a for a in range(attr_count)]))) 196 dtd = etree.ElementTree(root).docinfo.internalDTD 197 self.assertTrue(dtd) 198 dtd.assertValid(root) 199 200 e = -1 201 for e, el in enumerate(dtd.iterelements()): 202 self.assertEqual(attr_count, len(el.attributes())) 203 a = -1 204 for a, attr in enumerate(el.iterattributes()): 205 self.assertEqual('enumeration', attr.type) 206 self.assertEqual('none', attr.default) 207 self.assertEqual('z', attr.default_value) 208 values = sorted(attr.values()) 209 self.assertEqual(['x', 'y', 'z'], values) 210 self.assertEqual(attr_count - 1, a) 211 self.assertEqual(el_count - 1, e) 212 self.assertEqual(el_count, len(dtd.elements()))
213
214 - def test_dtd_broken(self):
215 self.assertRaises(etree.DTDParseError, etree.DTD, 216 BytesIO("<!ELEMENT b HONKEY>"))
217
218 - def test_parse_file_dtd(self):
219 parser = etree.XMLParser(attribute_defaults=True) 220 221 tree = etree.parse(fileInTestDir('test.xml'), parser) 222 root = tree.getroot() 223 224 self.assertEqual( 225 "valueA", 226 root.get("default")) 227 self.assertEqual( 228 "valueB", 229 root[0].get("default"))
230 231 @skipIf(etree.LIBXML_VERSION == (2, 9, 0), 232 "DTD loading is broken for incremental parsing in libxml2 2.9.0")
234 iterparse = etree.iterparse 235 iterator = iterparse(fileInTestDir("test.xml"), events=('start',), 236 attribute_defaults=True) 237 attributes = [ element.get("default") 238 for event, element in iterator ] 239 self.assertEqual( 240 ["valueA", "valueB"], 241 attributes)
242 243 @skipIf(etree.LIBXML_VERSION == (2, 9, 0), 244 "DTD loading is broken for incremental parsing in libxml2 2.9.0")
246 iterparse = etree.iterparse 247 iterator = iterparse(fileInTestDir("test.xml"), events=('end',), 248 attribute_defaults=True) 249 attributes = [ element.get("default") 250 for event, element in iterator ] 251 self.assertEqual( 252 ["valueB", "valueA"], 253 attributes)
254
255 - def test_dtd_attrs(self):
256 dtd = etree.DTD(fileUrlInTestDir("test.dtd")) 257 258 # Test DTD.system_url attribute 259 self.assertTrue(dtd.system_url.endswith("test.dtd")) 260 261 # Test elements and their attributes 262 a = dtd.elements()[0] 263 self.assertEqual(a.name, "a") 264 self.assertEqual(a.type, "element") 265 self.assertEqual(a.content.name, "b") 266 self.assertEqual(a.content.type, "element") 267 self.assertEqual(a.content.occur, "once") 268 269 aattr = a.attributes()[0] 270 self.assertEqual(aattr.name, "default") 271 self.assertEqual(aattr.type, "enumeration") 272 self.assertEqual(aattr.values(), ["valueA", "valueB"]) 273 self.assertEqual(aattr.default_value, "valueA") 274 275 b = dtd.elements()[1] 276 self.assertEqual(b.name, "b") 277 self.assertEqual(b.type, "empty") 278 self.assertEqual(b.content, None) 279 280 # Test entities and their attributes 281 c = dtd.entities()[0] 282 self.assertEqual(c.name, "c") 283 self.assertEqual(c.orig, "&#42;") 284 self.assertEqual(c.content, "*") 285 286 # Test DTD.name attribute 287 root = etree.XML(_bytes(''' 288 <!DOCTYPE a SYSTEM "none" [ 289 <!ELEMENT a EMPTY> 290 ]> 291 <a/> 292 ''')) 293 dtd = etree.ElementTree(root).docinfo.internalDTD 294 self.assertEqual(dtd.name, "a") 295 296 # Test DTD.name and DTD.systemID attributes 297 parser = etree.XMLParser(dtd_validation=True) 298 xml = '<!DOCTYPE a SYSTEM "test.dtd"><a><b/></a>' 299 root = etree.fromstring(xml, parser=parser, 300 base_url=fileUrlInTestDir("test.xml")) 301 302 dtd = root.getroottree().docinfo.internalDTD 303 self.assertEqual(dtd.name, "a") 304 self.assertEqual(dtd.system_url, "test.dtd")
305
307 # Standard allows quotes in systemliteral, but in that case 308 # systemliteral must be escaped with single quotes. 309 # See http://www.w3.org/TR/REC-xml/#sec-prolog-dtd. 310 root = etree.XML('''<!DOCTYPE a PUBLIC 'foo' '"'><a/>''') 311 doc = root.getroottree() 312 self.assertEqual(doc.docinfo.doctype, 313 '''<!DOCTYPE a PUBLIC "foo" '"'>''') 314 self.assertEqual(etree.tostring(doc), 315 _bytes('''<!DOCTYPE a PUBLIC "foo" '"'>\n<a/>'''))
316
318 root = etree.XML('''<!DOCTYPE a SYSTEM '"'><a/>''') 319 doc = root.getroottree() 320 self.assertEqual(doc.docinfo.doctype, '''<!DOCTYPE a SYSTEM '"'>''') 321 self.assertEqual(etree.tostring(doc), 322 _bytes('''<!DOCTYPE a SYSTEM '"'>\n<a/>'''))
323
324 - def test_declaration_apos(self):
325 root = etree.XML('''<!DOCTYPE a SYSTEM "'"><a/>''') 326 doc = root.getroottree() 327 self.assertEqual(doc.docinfo.doctype, '''<!DOCTYPE a SYSTEM "'">''') 328 self.assertEqual(etree.tostring(doc), 329 _bytes('''<!DOCTYPE a SYSTEM "'">\n<a/>'''))
330
331 - def test_ietf_decl(self):
332 html_data = ( 333 '<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML//EN">\n' 334 '<html></html>') 335 root = etree.HTML(html_data) 336 doc = root.getroottree() 337 self.assertEqual(doc.docinfo.doctype, 338 '<!DOCTYPE html PUBLIC "-//IETF//DTD HTML//EN">') 339 self.assertEqual(etree.tostring(doc, method='html'), _bytes(html_data))
340
341 - def test_set_decl_public(self):
342 doc = etree.Element('test').getroottree() 343 doc.docinfo.public_id = 'bar' 344 doc.docinfo.system_url = 'baz' 345 self.assertEqual(doc.docinfo.doctype, 346 '<!DOCTYPE test PUBLIC "bar" "baz">') 347 self.assertEqual(etree.tostring(doc), 348 _bytes('<!DOCTYPE test PUBLIC "bar" "baz">\n<test/>'))
349
350 - def test_html_decl(self):
351 # Slightly different to one above: when we create an html element, 352 # we do not start with a blank slate. 353 doc = html.Element('html').getroottree() 354 doc.docinfo.public_id = 'bar' 355 doc.docinfo.system_url = 'baz' 356 self.assertEqual(doc.docinfo.doctype, 357 '<!DOCTYPE html PUBLIC "bar" "baz">') 358 self.assertEqual(etree.tostring(doc), 359 _bytes('<!DOCTYPE html PUBLIC "bar" "baz">\n<html/>'))
360
361 - def test_clean_doctype(self):
362 doc = html.Element('html').getroottree() 363 self.assertTrue(doc.docinfo.doctype != '') 364 doc.docinfo.clear() 365 self.assertTrue(doc.docinfo.doctype == '')
366
367 - def test_set_decl_system(self):
368 doc = etree.Element('test').getroottree() 369 doc.docinfo.system_url = 'baz' 370 self.assertEqual(doc.docinfo.doctype, 371 '<!DOCTYPE test SYSTEM "baz">') 372 self.assertEqual(etree.tostring(doc), 373 _bytes('<!DOCTYPE test SYSTEM "baz">\n<test/>'))
374
375 - def test_empty_decl(self):
376 doc = etree.Element('test').getroottree() 377 doc.docinfo.public_id = None 378 self.assertEqual(doc.docinfo.doctype, 379 '<!DOCTYPE test>') 380 self.assertTrue(doc.docinfo.public_id is None) 381 self.assertTrue(doc.docinfo.system_url is None) 382 self.assertEqual(etree.tostring(doc), 383 _bytes('<!DOCTYPE test>\n<test/>'))
384
385 - def test_invalid_decl_1(self):
386 docinfo = etree.Element('test').getroottree().docinfo 387 388 def set_public_id(value): 389 docinfo.public_id = value
390 self.assertRaises(ValueError, set_public_id, _str('ä')) 391 self.assertRaises(ValueError, set_public_id, _str('qwerty ä asdf'))
392
393 - def test_invalid_decl_2(self):
394 docinfo = etree.Element('test').getroottree().docinfo 395 396 def set_system_url(value): 397 docinfo.system_url = value
398 self.assertRaises(ValueError, set_system_url, '\'"') 399 self.assertRaises(ValueError, set_system_url, '"\'') 400 self.assertRaises(ValueError, set_system_url, ' " \' ') 401
402 - def test_comment_before_dtd(self):
403 data = '<!--comment--><!DOCTYPE test>\n<!-- --><test/>' 404 doc = etree.fromstring(data).getroottree() 405 self.assertEqual(etree.tostring(doc), 406 _bytes(data))
407
408 409 -def test_suite():
410 suite = unittest.TestSuite() 411 suite.addTests([unittest.makeSuite(ETreeDtdTestCase)]) 412 suite.addTests( 413 [make_doctest('../../../doc/validation.txt')]) 414 return suite
415 416 if __name__ == '__main__': 417 print('to test use test.py %s' % __file__) 418