lxml.sax
1 """
2 SAX-based adapter to copy trees from/to the Python standard library.
3
4 Use the `ElementTreeContentHandler` class to build an ElementTree from
5 SAX events.
6
7 Use the `ElementTreeProducer` class or the `saxify()` function to fire
8 the SAX events of an ElementTree against a SAX ContentHandler.
9
10 See http://codespeak.net/lxml/sax.html
11 """
12
13 from xml . sax . handler import ContentHandler
14 from lxml import etree
15 from lxml . etree import ElementTree , SubElement
16 from lxml . etree import Comment , ProcessingInstruction
17
18
20 """General SAX error.
21 """
22
23
25 if tag [ 0 ] == '{' :
26 return tuple ( tag [ 1 : ] . split ( '}' , 1 ) )
27 else :
28 return ( None , tag )
29
30
31 - class ElementTreeContentHandler ( ContentHandler ) :
32 """Build an lxml ElementTree from SAX events.
33 """
34 - def __init__ ( self , makeelement = None ) :
35 ContentHandler . __init__ ( self )
36 self . _root = None
37 self . _root_siblings = [ ]
38 self . _element_stack = [ ]
39 self . _default_ns = None
40 self . _ns_mapping = { None : [ None ] }
41 self . _new_mappings = { }
42 if makeelement is None :
43 makeelement = etree . Element
44 self . _makeelement = makeelement
45
46 - def _get_etree ( self ) :
47 "Contains the generated ElementTree after parsing is finished."
48 return ElementTree ( self . _root )
49
50 etree = property ( _get_etree , doc = _get_etree . __doc__ )
51
52 - def setDocumentLocator ( self , locator ) :
54
55 - def startDocument ( self ) :
57
58 - def endDocument ( self ) :
60
61 - def startPrefixMapping ( self , prefix , uri ) :
62 self . _new_mappings [ prefix ] = uri
63 try :
64 self . _ns_mapping [ prefix ] . append ( uri )
65 except KeyError :
66 self . _ns_mapping [ prefix ] = [ uri ]
67 if prefix is None :
68 self . _default_ns = uri
69
70 - def endPrefixMapping ( self , prefix ) :
71 ns_uri_list = self . _ns_mapping [ prefix ]
72 ns_uri_list . pop ( )
73 if prefix is None :
74 self . _default_ns = ns_uri_list [ - 1 ]
75
76 - def _buildTag ( self , ns_name_tuple ) :
77 ns_uri , local_name = ns_name_tuple
78 if ns_uri :
79 el_tag = "{%s}%s" % ns_name_tuple
80 elif self . _default_ns :
81 el_tag = "{%s}%s" % ( self . _default_ns , local_name )
82 else :
83 el_tag = local_name
84 return el_tag
85
86 - def startElementNS ( self , ns_name , qname , attributes = None ) :
87 el_name = self . _buildTag ( ns_name )
88 if attributes :
89 attrs = { }
90 try :
91 iter_attributes = attributes . iteritems ( )
92 except AttributeError :
93 iter_attributes = attributes . items ( )
94
95 for name_tuple , value in iter_attributes :
96 if name_tuple [ 0 ] :
97 attr_name = "{%s}%s" % name_tuple
98 else :
99 attr_name = name_tuple [ 1 ]
100 attrs [ attr_name ] = value
101 else :
102 attrs = None
103
104 element_stack = self . _element_stack
105 if self . _root is None :
106 element = self . _root = \
107 self . _makeelement ( el_name , attrs , self . _new_mappings )
108 if self . _root_siblings and hasattr ( element , 'addprevious' ) :
109 for sibling in self . _root_siblings :
110 element . addprevious ( sibling )
111 del self . _root_siblings [ : ]
112 else :
113 element = SubElement ( element_stack [ - 1 ] , el_name ,
114 attrs , self . _new_mappings )
115 element_stack . append ( element )
116
117 self . _new_mappings . clear ( )
118
119 - def processingInstruction ( self , target , data ) :
120 pi = ProcessingInstruction ( target , data )
121 if self . _root is None :
122 self . _root_siblings . append ( pi )
123 else :
124 self . _element_stack [ - 1 ] . append ( pi )
125
126 - def endElementNS ( self , ns_name , qname ) :
127 element = self . _element_stack . pop ( )
128 el_tag = self . _buildTag ( ns_name )
129 if el_tag != element . tag :
130 raise SaxError ( "Unexpected element closed: " + el_tag )
131
132 - def startElement ( self , name , attributes = None ) :
133 if attributes :
134 attributes = dict (
135 [ ( ( None , k ) , v ) for k , v in attributes . items ( ) ]
136 )
137 self . startElementNS ( ( None , name ) , name , attributes )
138
139 - def endElement ( self , name ) :
140 self . endElementNS ( ( None , name ) , name )
141
142 - def characters ( self , data ) :
143 last_element = self . _element_stack [ - 1 ]
144 try :
145
146 last_element = last_element [ - 1 ]
147 last_element . tail = ( last_element . tail or '' ) + data
148 except IndexError :
149
150 last_element . text = ( last_element . text or '' ) + data
151
152 ignorableWhitespace = characters
153
154
156 """Produces SAX events for an element and children.
157 """
158 - def __init__ ( self , element_or_tree , content_handler ) :
159 try :
160 element = element_or_tree . getroot ( )
161 except AttributeError :
162 element = element_or_tree
163 self . _element = element
164 self . _content_handler = content_handler
165 from xml . sax . xmlreader import AttributesNSImpl as attr_class
166 self . _attr_class = attr_class
167 self . _empty_attributes = attr_class ( { } , { } )
168
191
193 content_handler = self . _content_handler
194 tag = element . tag
195 if tag is Comment or tag is ProcessingInstruction :
196 if tag is ProcessingInstruction :
197 content_handler . processingInstruction (
198 element . target , element . text )
199 if element . tail :
200 content_handler . characters ( element . tail )
201 return
202
203 new_prefixes = [ ]
204 build_qname = self . _build_qname
205 attribs = element . items ( )
206 if attribs :
207 attr_values = { }
208 attr_qnames = { }
209 for attr_ns_name , value in attribs :
210 attr_ns_tuple = _getNsTag ( attr_ns_name )
211 attr_values [ attr_ns_tuple ] = value
212 attr_qnames [ attr_ns_tuple ] = build_qname (
213 attr_ns_tuple [ 0 ] , attr_ns_tuple [ 1 ] , prefixes , new_prefixes )
214 sax_attributes = self . _attr_class ( attr_values , attr_qnames )
215 else :
216 sax_attributes = self . _empty_attributes
217
218 ns_uri , local_name = _getNsTag ( tag )
219 qname = build_qname ( ns_uri , local_name , prefixes , new_prefixes )
220
221 for prefix , uri in new_prefixes :
222 content_handler . startPrefixMapping ( prefix , uri )
223 content_handler . startElementNS ( ( ns_uri , local_name ) ,
224 qname , sax_attributes )
225 if element . text :
226 content_handler . characters ( element . text )
227 for child in element :
228 self . _recursive_saxify ( child , prefixes )
229 content_handler . endElementNS ( ( ns_uri , local_name ) , qname )
230 for prefix , uri in new_prefixes :
231 content_handler . endPrefixMapping ( prefix )
232 if element . tail :
233 content_handler . characters ( element . tail )
234
235 - def _build_qname ( self , ns_uri , local_name , prefixes , new_prefixes ) :
236 if ns_uri is None :
237 return local_name
238 try :
239 prefix = prefixes [ ns_uri ]
240 except KeyError :
241 prefix = prefixes [ ns_uri ] = 'ns%02d' % len ( prefixes )
242 new_prefixes . append ( ( prefix , ns_uri ) )
243 return prefix + ':' + local_name
244
245 - def saxify ( element_or_tree , content_handler ) :
246 """One-shot helper to generate SAX events from an XML tree and fire
247 them against a SAX ContentHandler.
248 """
249 return ElementTreeProducer ( element_or_tree , content_handler ) . saxify ( )
250