lxml.html.defs
Package lxml :: Package html :: Module defs
[hide private]
[frames] | no frames]

Source Code for Module lxml.html.defs

  1  # FIXME: this should all be confirmed against what a DTD says 
  2  # (probably in a test; this may not match the DTD exactly, but we 
  3  # should document just how it differs). 
  4   
  5  # Data taken from http://www.w3.org/TR/html401/index/elements.html 
  6  # and http://www.w3.org/community/webed/wiki/HTML/New_HTML5_Elements 
  7  # for html5_tags. 
  8   
  9  empty_tags = frozenset([ 
 10      'area', 'base', 'basefont', 'br', 'col', 'frame', 'hr', 
 11      'img', 'input', 'isindex', 'link', 'meta', 'param']) 
 12   
 13  deprecated_tags = frozenset([ 
 14      'applet', 'basefont', 'center', 'dir', 'font', 'isindex', 
 15      'menu', 's', 'strike', 'u']) 
 16   
 17  # archive actually takes a space-separated list of URIs 
 18  link_attrs = frozenset([ 
 19      'action', 'archive', 'background', 'cite', 'classid', 
 20      'codebase', 'data', 'href', 'longdesc', 'profile', 'src', 
 21      'usemap', 
 22      # Not standard: 
 23      'dynsrc', 'lowsrc', 
 24      ]) 
 25   
 26  # Not in the HTML 4 spec: 
 27  # onerror, onresize 
 28  event_attrs = frozenset([ 
 29      'onblur', 'onchange', 'onclick', 'ondblclick', 'onerror', 
 30      'onfocus', 'onkeydown', 'onkeypress', 'onkeyup', 'onload', 
 31      'onmousedown', 'onmousemove', 'onmouseout', 'onmouseover', 
 32      'onmouseup', 'onreset', 'onresize', 'onselect', 'onsubmit', 
 33      'onunload', 
 34      ]) 
 35   
 36  safe_attrs = frozenset([ 
 37      'abbr', 'accept', 'accept-charset', 'accesskey', 'action', 'align', 
 38      'alt', 'axis', 'border', 'cellpadding', 'cellspacing', 'char', 'charoff', 
 39      'charset', 'checked', 'cite', 'class', 'clear', 'cols', 'colspan', 
 40      'color', 'compact', 'coords', 'datetime', 'dir', 'disabled', 'enctype', 
 41      'for', 'frame', 'headers', 'height', 'href', 'hreflang', 'hspace', 'id', 
 42      'ismap', 'label', 'lang', 'longdesc', 'maxlength', 'media', 'method', 
 43      'multiple', 'name', 'nohref', 'noshade', 'nowrap', 'prompt', 'readonly', 
 44      'rel', 'rev', 'rows', 'rowspan', 'rules', 'scope', 'selected', 'shape', 
 45      'size', 'span', 'src', 'start', 'summary', 'tabindex', 'target', 'title', 
 46      'type', 'usemap', 'valign', 'value', 'vspace', 'width']) 
 47   
 48  # From http://htmlhelp.com/reference/html40/olist.html 
 49  top_level_tags = frozenset([ 
 50      'html', 'head', 'body', 'frameset', 
 51      ]) 
 52   
 53  head_tags = frozenset([ 
 54      'base', 'isindex', 'link', 'meta', 'script', 'style', 'title', 
 55      ]) 
 56   
 57  general_block_tags = frozenset([ 
 58      'address', 
 59      'blockquote', 
 60      'center', 
 61      'del', 
 62      'div', 
 63      'h1', 
 64      'h2', 
 65      'h3', 
 66      'h4', 
 67      'h5', 
 68      'h6', 
 69      'hr', 
 70      'ins', 
 71      'isindex', 
 72      'noscript', 
 73      'p', 
 74      'pre', 
 75      ]) 
 76   
 77  list_tags = frozenset([ 
 78      'dir', 'dl', 'dt', 'dd', 'li', 'menu', 'ol', 'ul', 
 79      ]) 
 80   
 81  table_tags = frozenset([ 
 82      'table', 'caption', 'colgroup', 'col', 
 83      'thead', 'tfoot', 'tbody', 'tr', 'td', 'th', 
 84      ]) 
 85   
 86  # just this one from 
 87  # http://www.georgehernandez.com/h/XComputers/HTML/2BlockLevel.htm 
 88  block_tags = general_block_tags | list_tags | table_tags | frozenset([ 
 89      # Partial form tags 
 90      'fieldset', 'form', 'legend', 'optgroup', 'option', 
 91      ]) 
 92   
 93  form_tags = frozenset([ 
 94      'form', 'button', 'fieldset', 'legend', 'input', 'label', 
 95      'select', 'optgroup', 'option', 'textarea', 
 96      ]) 
 97   
 98  special_inline_tags = frozenset([ 
 99      'a', 'applet', 'basefont', 'bdo', 'br', 'embed', 'font', 'iframe', 
100      'img', 'map', 'area', 'object', 'param', 'q', 'script', 
101      'span', 'sub', 'sup', 
102      ]) 
103   
104  phrase_tags = frozenset([ 
105      'abbr', 'acronym', 'cite', 'code', 'del', 'dfn', 'em', 
106      'ins', 'kbd', 'samp', 'strong', 'var', 
107      ]) 
108   
109  font_style_tags = frozenset([ 
110      'b', 'big', 'i', 's', 'small', 'strike', 'tt', 'u', 
111      ]) 
112   
113  frame_tags = frozenset([ 
114      'frameset', 'frame', 'noframes', 
115      ]) 
116       
117  html5_tags = frozenset([ 
118      'article', 'aside', 'audio', 'canvas', 'command', 'datalist', 
119      'details', 'embed', 'figcaption', 'figure', 'footer', 'header', 
120      'hgroup', 'keygen', 'mark', 'math', 'meter', 'nav', 'output', 
121      'progress', 'rp', 'rt', 'ruby', 'section', 'source', 'summary', 
122      'svg', 'time', 'track', 'video', 'wbr' 
123      ]) 
124   
125  # These tags aren't standard 
126  nonstandard_tags = frozenset(['blink', 'marquee']) 
127   
128   
129  tags = (top_level_tags | head_tags | general_block_tags | list_tags 
130          | table_tags | form_tags | special_inline_tags | phrase_tags 
131          | font_style_tags | nonstandard_tags | html5_tags) 
132