1""" 2lxml-based doctest output comparison. 3 4Note: normally, you should just import the `lxml.usedoctest` and 5`lxml.html.usedoctest` modules from within a doctest, instead of this 6one:: 7 8 >>> import lxml.usedoctest # for XML output 9 10 >>> import lxml.html.usedoctest # for HTML output 11 12To use this module directly, you must call ``lxmldoctest.install()``, 13which will cause doctest to use this in all subsequent calls. 14 15This changes the way output is checked and comparisons are made for 16XML or HTML-like content. 17 18XML or HTML content is noticed because the example starts with ``<`` 19(it's HTML if it starts with ``<html``). You can also use the 20``PARSE_HTML`` and ``PARSE_XML`` flags to force parsing. 21 22Some rough wildcard-like things are allowed. Whitespace is generally 23ignored (except in attributes). In text (attributes and text in the 24body) you can use ``...`` as a wildcard. In an example it also 25matches any trailing tags in the element, though it does not match 26leading tags. You may create a tag ``<any>`` or include an ``any`` 27attribute in the tag. An ``any`` tag matches any tag, while the 28attribute matches any and all attributes. 29 30When a match fails, the reformatted example and gotten text is 31displayed (indented), and a rough diff-like output is given. Anything 32marked with ``+`` is in the output but wasn't supposed to be, and 33similarly ``-`` means its in the example but wasn't in the output. 34 35You can disable parsing on one line with ``# doctest:+NOPARSE_MARKUP`` 36""" 37 38fromlxmlimportetree 39importsys 40importre 41importdoctest 42try: 43fromhtmlimportescapeashtml_escape 44exceptImportError: 45fromcgiimportescapeashtml_escape 46 47__all__=['PARSE_HTML','PARSE_XML','NOPARSE_MARKUP','LXMLOutputChecker', 48'LHTMLOutputChecker','install','temp_install'] 49 50try: 51_basestring=basestring 52exceptNameError: 53_basestring=(str,bytes) 54 55_IS_PYTHON_3=sys.version_info[0]>=3 56 57PARSE_HTML=doctest.register_optionflag('PARSE_HTML') 58PARSE_XML=doctest.register_optionflag('PARSE_XML') 59NOPARSE_MARKUP=doctest.register_optionflag('NOPARSE_MARKUP') 60 61OutputChecker=doctest.OutputChecker 62
76 77# We use this to distinguish repr()s from elements: 78_repr_re=re.compile(r'^<[^>]+ (at|object) ') 79_norm_whitespace_re=re.compile(r'[ \t\n][ \t\n]+') 80
175ifwant=='any':176returnTrue177if(notisinstance(want,_basestring)178ornotisinstance(got,_basestring)):179returnwant==got180want=wantor''181got=gotor''182ifwant.startswith('{...}'):183# Ellipsis on the namespace184returnwant.split('}')[-1]==got.split('}')[-1]185else:186returnwant==got
222ifnothtml:223returnFalse224ifel.tagnotinself.empty_tags:225returnFalse226ifel.textorlen(el):227# This shouldn't happen (contents in an empty tag)228returnFalse229returnTrue
274attrs=[]275ifisinstance(el,etree.CommentBase):276# FIXME: probably PIs should be handled specially too?277return'<!--'278forname,valueinsorted(el.attrib.items()):279attrs.append('%s="%s"'%(name,self.format_text(value,False)))280ifnotattrs:281return'<%s>'%el.tag282return'<%s %s>'%(el.tag,' '.join(attrs))
375"""376 Install doctestcompare for all future doctests.377378 If html is true, then by default the HTML parser will be used;379 otherwise the XML parser is used.380 """381ifhtml:382doctest.OutputChecker=LHTMLOutputChecker383else:384doctest.OutputChecker=LXMLOutputChecker
387"""388 Use this *inside* a doctest to enable this checker for this389 doctest only.390391 If html is true, then by default the HTML parser will be used;392 otherwise the XML parser is used.393 """394ifhtml:395Checker=LHTMLOutputChecker396else:397Checker=LXMLOutputChecker398frame=_find_doctest_frame()399dt_self=frame.f_locals['self']400checker=Checker()401old_checker=dt_self._checker402dt_self._checker=checker403# The unfortunate thing is that there is a local variable 'check'404# in the function that runs the doctests, that is a bound method405# into the output checker. We have to update that. We can't406# modify the frame, so we have to modify the object in place. The407# only way to do this is to actually change the func_code408# attribute of the method. We change it, and then wait for409# __record_outcome to be run, which signals the end of the __run410# method, at which point we restore the previous check_output411# implementation.412if_IS_PYTHON_3:413check_func=frame.f_locals['check'].__func__414checker_check_func=checker.check_output.__func__415else:416check_func=frame.f_locals['check'].im_func417checker_check_func=checker.check_output.im_func418# Because we can't patch up func_globals, this is the only global419# in check_output that we care about:420doctest.etree=etree421_RestoreChecker(dt_self,old_checker,checker,422check_func,checker_check_func,423del_module)
480importsys481frame=sys._getframe(1)482whileframe:483l=frame.f_locals484if'BOOM'inl:485# Sign of doctest486returnframe487frame=frame.f_back488raiseLookupError(489"Could not find doctest (only use this function *inside* a doctest)")