lxml.sax
Package lxml :: Module sax
[hide private]
[frames] | no frames]

Source Code for Module lxml.sax

  1  """ 
  2  SAX-based adapter to copy trees from/to the Python standard library. 
  3   
  4  Use the `ElementTreeContentHandler` class to build an ElementTree from 
  5  SAX events. 
  6   
  7  Use the `ElementTreeProducer` class or the `saxify()` function to fire 
  8  the SAX events of an ElementTree against a SAX ContentHandler. 
  9   
 10  See http://codespeak.net/lxml/sax.html 
 11  """ 
 12   
 13  from xml.sax.handler import ContentHandler 
 14  from lxml import etree 
 15  from lxml.etree import ElementTree, SubElement 
 16  from lxml.etree import Comment, ProcessingInstruction 
 17   
 18   
19 -class SaxError(etree.LxmlError):
20 """General SAX error. 21 """
22 23
24 -def _getNsTag(tag):
25 if tag[0] == '{': 26 return tuple(tag[1:].split('}', 1)) 27 else: 28 return (None, tag)
29 30
31 -class ElementTreeContentHandler(ContentHandler):
32 """Build an lxml ElementTree from SAX events. 33 """
34 - def __init__(self, makeelement=None):
35 ContentHandler.__init__(self) 36 self._root = None 37 self._root_siblings = [] 38 self._element_stack = [] 39 self._default_ns = None 40 self._ns_mapping = { None : [None] } 41 self._new_mappings = {} 42 if makeelement is None: 43 makeelement = etree.Element 44 self._makeelement = makeelement
45
46 - def _get_etree(self):
47 "Contains the generated ElementTree after parsing is finished." 48 return ElementTree(self._root)
49 50 etree = property(_get_etree, doc=_get_etree.__doc__) 51
52 - def setDocumentLocator(self, locator):
53 pass
54
55 - def startDocument(self):
56 pass
57
58 - def endDocument(self):
59 pass
60
61 - def startPrefixMapping(self, prefix, uri):
62 self._new_mappings[prefix] = uri 63 try: 64 self._ns_mapping[prefix].append(uri) 65 except KeyError: 66 self._ns_mapping[prefix] = [uri] 67 if prefix is None: 68 self._default_ns = uri
69
70 - def endPrefixMapping(self, prefix):
71 ns_uri_list = self._ns_mapping[prefix] 72 ns_uri_list.pop() 73 if prefix is None: 74 self._default_ns = ns_uri_list[-1]
75
76 - def _buildTag(self, ns_name_tuple):
77 ns_uri, local_name = ns_name_tuple 78 if ns_uri: 79 el_tag = "{%s}%s" % ns_name_tuple 80 elif self._default_ns: 81 el_tag = "{%s}%s" % (self._default_ns, local_name) 82 else: 83 el_tag = local_name 84 return el_tag
85
86 - def startElementNS(self, ns_name, qname, attributes=None):
87 el_name = self._buildTag(ns_name) 88 if attributes: 89 attrs = {} 90 try: 91 iter_attributes = attributes.iteritems() 92 except AttributeError: 93 iter_attributes = attributes.items() 94 95 for name_tuple, value in iter_attributes: 96 if name_tuple[0]: 97 attr_name = "{%s}%s" % name_tuple 98 else: 99 attr_name = name_tuple[1] 100 attrs[attr_name] = value 101 else: 102 attrs = None 103 104 element_stack = self._element_stack 105 if self._root is None: 106 element = self._root = \ 107 self._makeelement(el_name, attrs, self._new_mappings) 108 if self._root_siblings and hasattr(element, 'addprevious'): 109 for sibling in self._root_siblings: 110 element.addprevious(sibling) 111 del self._root_siblings[:] 112 else: 113 element = SubElement(element_stack[-1], el_name, 114 attrs, self._new_mappings) 115 element_stack.append(element) 116 117 self._new_mappings.clear()
118
119 - def processingInstruction(self, target, data):
120 pi = ProcessingInstruction(target, data) 121 if self._root is None: 122 self._root_siblings.append(pi) 123 else: 124 self._element_stack[-1].append(pi)
125
126 - def endElementNS(self, ns_name, qname):
127 element = self._element_stack.pop() 128 el_tag = self._buildTag(ns_name) 129 if el_tag != element.tag: 130 raise SaxError("Unexpected element closed: " + el_tag)
131
132 - def startElement(self, name, attributes=None):
133 if attributes: 134 attributes = dict( 135 [((None, k), v) for k, v in attributes.items()] 136 ) 137 self.startElementNS((None, name), name, attributes)
138
139 - def endElement(self, name):
140 self.endElementNS((None, name), name)
141
142 - def characters(self, data):
143 last_element = self._element_stack[-1] 144 try: 145 # if there already is a child element, we must append to its tail 146 last_element = last_element[-1] 147 last_element.tail = (last_element.tail or '') + data 148 except IndexError: 149 # otherwise: append to the text 150 last_element.text = (last_element.text or '') + data
151 152 ignorableWhitespace = characters
153 154
155 -class ElementTreeProducer(object):
156 """Produces SAX events for an element and children. 157 """
158 - def __init__(self, element_or_tree, content_handler):
159 try: 160 element = element_or_tree.getroot() 161 except AttributeError: 162 element = element_or_tree 163 self._element = element 164 self._content_handler = content_handler 165 from xml.sax.xmlreader import AttributesNSImpl as attr_class 166 self._attr_class = attr_class 167 self._empty_attributes = attr_class({}, {})
168
169 - def saxify(self):
170 self._content_handler.startDocument() 171 172 element = self._element 173 if hasattr(element, 'getprevious'): 174 siblings = [] 175 sibling = element.getprevious() 176 while getattr(sibling, 'tag', None) is ProcessingInstruction: 177 siblings.append(sibling) 178 sibling = sibling.getprevious() 179 for sibling in siblings[::-1]: 180 self._recursive_saxify(sibling, {}) 181 182 self._recursive_saxify(element, {}) 183 184 if hasattr(element, 'getnext'): 185 sibling = element.getnext() 186 while getattr(sibling, 'tag', None) is ProcessingInstruction: 187 self._recursive_saxify(sibling, {}) 188 sibling = sibling.getnext() 189 190 self._content_handler.endDocument()
191
192 - def _recursive_saxify(self, element, prefixes):
193 content_handler = self._content_handler 194 tag = element.tag 195 if tag is Comment or tag is ProcessingInstruction: 196 if tag is ProcessingInstruction: 197 content_handler.processingInstruction( 198 element.target, element.text) 199 if element.tail: 200 content_handler.characters(element.tail) 201 return 202 203 new_prefixes = [] 204 build_qname = self._build_qname 205 attribs = element.items() 206 if attribs: 207 attr_values = {} 208 attr_qnames = {} 209 for attr_ns_name, value in attribs: 210 attr_ns_tuple = _getNsTag(attr_ns_name) 211 attr_values[attr_ns_tuple] = value 212 attr_qnames[attr_ns_tuple] = build_qname( 213 attr_ns_tuple[0], attr_ns_tuple[1], prefixes, new_prefixes) 214 sax_attributes = self._attr_class(attr_values, attr_qnames) 215 else: 216 sax_attributes = self._empty_attributes 217 218 ns_uri, local_name = _getNsTag(tag) 219 qname = build_qname(ns_uri, local_name, prefixes, new_prefixes) 220 221 for prefix, uri in new_prefixes: 222 content_handler.startPrefixMapping(prefix, uri) 223 content_handler.startElementNS((ns_uri, local_name), 224 qname, sax_attributes) 225 if element.text: 226 content_handler.characters(element.text) 227 for child in element: 228 self._recursive_saxify(child, prefixes) 229 content_handler.endElementNS((ns_uri, local_name), qname) 230 for prefix, uri in new_prefixes: 231 content_handler.endPrefixMapping(prefix) 232 if element.tail: 233 content_handler.characters(element.tail)
234
235 - def _build_qname(self, ns_uri, local_name, prefixes, new_prefixes):
236 if ns_uri is None: 237 return local_name 238 try: 239 prefix = prefixes[ns_uri] 240 except KeyError: 241 prefix = prefixes[ns_uri] = 'ns%02d' % len(prefixes) 242 new_prefixes.append( (prefix, ns_uri) ) 243 return prefix + ':' + local_name
244
245 -def saxify(element_or_tree, content_handler):
246 """One-shot helper to generate SAX events from an XML tree and fire 247 them against a SAX ContentHandler. 248 """ 249 return ElementTreeProducer(element_or_tree, content_handler).saxify()
250