parse-html.coffee

   1 # HTML parser meant to run in a browser, in support of WYSIWYG editor
   2 # Copyright 2015 Jason Woofenden
   3 #
   4 # This program is free software: you can redistribute it and/or modify it under
   5 # the terms of the GNU Affero General Public License as published by the Free
   6 # Software Foundation, either version 3 of the License, or (at your option) any
   7 # later version.
   8 #
   9 # This program is distributed in the hope that it will be useful, but WITHOUT
  10 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  11 # FOR A PARTICULAR PURPOSE.  See the GNU Affero General Public License for more
  12 # details.
  13 #
  14 # You should have received a copy of the GNU Affero General Public License
  15 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
  16
  17
  18 # This file implements a parser for html snippets, meant to be used by a
  19 # WYSIWYG editor.
  20
  21 # The implementation is a pretty direct implementation of the parsing algorithm
  22 # described here:
  23 # http://www.w3.org/TR/html5/syntax.html#preprocessing-the-input-stream
  24 #
  25 # Deviations from that spec:
  26 #
  27 #   Purposeful: search this file for "WTAG"
  28 #
  29 #   Not finished yet: search this file for "fixfull", "TODO" and "FIXME"
  30
  31
  32 # stacks/lists
  33 #
  34 # the spec uses a many different words do indicate which ends of lists/stacks
  35 # they are talking about (and relative movement within the lists/stacks). This
  36 # section splains. I'm implementing "lists" (afe and open_els) the same way
  37 # (both as stacks)
  38 #
  39 # stacks grow downward (current element is index=0)
  40 #
  41 # example: open_els = [a, b, c, d, e, f, g]
  42 #
  43 # "grows downwards" means it's visualized like this: (index: el, names)
  44 #
  45 #   6: g "start of the list", "topmost", "first"
  46 #   5: f
  47 #   4: e "previous" (to d), "above", "before"
  48 #   3: d   (previous/next are relative to this element)
  49 #   2: c "next", "after", "lower", "below"
  50 #   1: b
  51 #   0: a "end of the list", "current node", "bottommost", "last"
  52
  53
  54 # browser
  55 # note: to get this to run outside a browser, you'll have to write a native
  56 # implementation of decode_named_char_ref()
  57 unless module?.exports?
  58         window.wheic = {}
  59         module = exports: window.wheic
  60
  61 from_code_point = (x) ->
  62         if String.fromCodePoint?
  63                 return String.fromCodePoint x
  64         else
  65                 if x <= 0xffff
  66                         return String.fromCharCode x
  67                 x -= 0x10000
  68                 return String.fromCharCode((x >> 10) + 0xd800, (x % 0x400) + 0xdc00)
  69
  70 # Each node is an obect of the Node class. Here are the Node types:
  71 TYPE_TAG = 0 # name, {attributes}, [children]
  72 TYPE_TEXT = 1 # "text"
  73 TYPE_COMMENT = 2
  74 TYPE_DOCTYPE = 3
  75 # the following types are emited by the tokenizer, but shouldn't end up in the tree:
  76 TYPE_START_TAG = 4 # name, [attributes ([key,value]...) in reverse order], [children]
  77 TYPE_END_TAG = 5 # name
  78 TYPE_EOF = 6
  79 TYPE_AFE_MARKER = 7 # http://www.w3.org/TR/html5/syntax.html#reconstruct-the-active-formatting-elements
  80 TYPE_AAA_BOOKMARK = 8 # http://www.w3.org/TR/html5/syntax.html#adoption-agency-algorithm
  81
  82 # namespace constants
  83 NS_HTML = 1
  84 NS_MATHML = 2
  85 NS_SVG = 3
  86
  87 g_debug_log = []
  88 debug_log_reset = ->
  89         g_debug_log = []
  90 debug_log = (str) ->
  91         g_debug_log.push str
  92 debug_log_each = (cb) ->
  93         for str in g_debug_log
  94                 cb str
  95
  96 prev_node_id = 0
  97 class Node
  98         constructor: (type, args = {}) ->
  99                 @type = type # one of the TYPE_* constants above
 100                 @name = args.name ? '' # tag name
 101                 @text = args.text ? '' # contents for text/comment nodes
 102                 @attrs = args.attrs ? {}
 103                 @attrs_a = args.attr_k ? [] # attrs in progress, TYPE_START_TAG only
 104                 @children = args.children ? []
 105                 @namespace = args.namespace ? NS_HTML
 106                 @parent = args.parent ? null
 107                 @token = args.token ? null
 108                 @flags = args.flags ? {}
 109                 if args.id?
 110                         @id = "#{args.id}+"
 111                 else
 112                         @id = "#{++prev_node_id}"
 113         acknowledge_self_closing: ->
 114                 if @token?
 115                         @token.flag 'did_self_close'
 116                 else
 117                         @flag 'did_self_close', true
 118         flag: (key, value = null) ->
 119                 if value?
 120                         @flags[key] = value
 121                 else
 122                         return @flags[key]
 123         serialize: (shallow = false, show_ids = false) -> # for unit tests
 124                 ret = ''
 125                 switch @type
 126                         when TYPE_TAG
 127                                 ret += 'tag:'
 128                                 ret += JSON.stringify @name
 129                                 ret += ','
 130                                 if show_ids
 131                                         ret += "##{@id},"
 132                                 if shallow
 133                                         break
 134                                 attr_keys = []
 135                                 for k of @attrs
 136                                         attr_keys.push k
 137                                 attr_keys.sort()
 138                                 ret += '{'
 139                                 sep = ''
 140                                 for k in attr_keys
 141                                         ret += sep
 142                                         sep = ','
 143                                         ret += "#{JSON.stringify k}:#{JSON.stringify @attrs[k]}"
 144                                 ret += '},['
 145                                 sep = ''
 146                                 for c in @children
 147                                         ret += sep
 148                                         sep = ','
 149                                         ret += c.serialize shallow, show_ids
 150                                 ret += ']'
 151                         when TYPE_TEXT
 152                                 ret += 'text:'
 153                                 ret += JSON.stringify @text
 154                         when TYPE_COMMENT
 155                                 ret += 'comment:'
 156                                 ret += JSON.stringify @text
 157                         when TYPE_DOCTYPE
 158                                 ret += "doctype:#{@name},#{JSON.stringify(@public_identifier ? '')},#{JSON.stringify(@system_identifier ? '')}"
 159                         when TYPE_AFE_MARKER
 160                                 ret += 'marker'
 161                         when TYPE_AAA_BOOKMARK
 162                                 ret += 'aaa_bookmark'
 163                         else
 164                                 ret += 'unknown:'
 165                                 console.log "unknown: #{JSON.stringify @}" # backtrace is just as well
 166                 return ret
 167
 168 # helpers: (only take args that are normally known when parser creates nodes)
 169 new_open_tag = (name) ->
 170         return new Node TYPE_START_TAG, name: name
 171 new_end_tag = (name) ->
 172         return new Node TYPE_END_TAG, name: name
 173 new_element = (name) ->
 174         return new Node TYPE_TAG, name: name
 175 new_text_node = (txt) ->
 176         return new Node TYPE_TEXT, text: txt
 177 new_character_token = new_text_node
 178 new_comment_token = (txt) ->
 179         return new Node TYPE_COMMENT, text: txt
 180 new_doctype_token = (name) ->
 181         return new Node TYPE_DOCTYPE, name: name
 182 new_eof_token = ->
 183         return new Node TYPE_EOF
 184 new_afe_marker = ->
 185         return new Node TYPE_AFE_MARKER
 186 new_aaa_bookmark = ->
 187         return new Node TYPE_AAA_BOOKMARK
 188
 189 lc_alpha = "abcdefghijklmnopqrstuvwxyz"
 190 uc_alpha = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
 191 digits = "0123456789"
 192 alnum = lc_alpha + uc_alpha + digits
 193 hex_chars = digits + "abcdefABCDEF"
 194
 195 is_uc_alpha = (str) ->
 196         return str.length is 1 and uc_alpha.indexOf(str) > -1
 197 is_lc_alpha = (str) ->
 198         return str.length is 1 and lc_alpha.indexOf(str) > -1
 199
 200 # some SVG elements have dashes in them
 201 tag_name_chars = alnum + "-"
 202
 203 # http://www.w3.org/TR/html5/infrastructure.html#space-character
 204 space_chars = "\u0009\u000a\u000c\u000d\u0020"
 205 is_space = (txt) ->
 206         return txt.length is 1 and space_chars.indexOf(txt) > -1
 207 is_space_tok = (t) ->
 208         return t.type is TYPE_TEXT && t.text.length is 1 and space_chars.indexOf(t.text) > -1
 209
 210 is_input_hidden_tok = (t) ->
 211         return false unless t.type is TYPE_START_TAG
 212         for a in t.attrs_a
 213                 if a[0] is 'type'
 214                         if a[1].toLowerCase() is 'hidden'
 215                                 return true
 216                         return false
 217         return false
 218
 219 # https://en.wikipedia.org/wiki/Whitespace_character#Unicode
 220 whitespace_chars = "\u0009\u000a\u000b\u000c\u000d\u0020\u0085\u00a0\u1680\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200a\u2028\u2029\u202f\u205f\u3000"
 221
 222 unicode_fixes = {}
 223 unicode_fixes[0x00] = "\uFFFD"
 224 unicode_fixes[0x80] = "\u20AC"
 225 unicode_fixes[0x82] = "\u201A"
 226 unicode_fixes[0x83] = "\u0192"
 227 unicode_fixes[0x84] = "\u201E"
 228 unicode_fixes[0x85] = "\u2026"
 229 unicode_fixes[0x86] = "\u2020"
 230 unicode_fixes[0x87] = "\u2021"
 231 unicode_fixes[0x88] = "\u02C6"
 232 unicode_fixes[0x89] = "\u2030"
 233 unicode_fixes[0x8A] = "\u0160"
 234 unicode_fixes[0x8B] = "\u2039"
 235 unicode_fixes[0x8C] = "\u0152"
 236 unicode_fixes[0x8E] = "\u017D"
 237 unicode_fixes[0x91] = "\u2018"
 238 unicode_fixes[0x92] = "\u2019"
 239 unicode_fixes[0x93] = "\u201C"
 240 unicode_fixes[0x94] = "\u201D"
 241 unicode_fixes[0x95] = "\u2022"
 242 unicode_fixes[0x96] = "\u2013"
 243 unicode_fixes[0x97] = "\u2014"
 244 unicode_fixes[0x98] = "\u02DC"
 245 unicode_fixes[0x99] = "\u2122"
 246 unicode_fixes[0x9A] = "\u0161"
 247 unicode_fixes[0x9B] = "\u203A"
 248 unicode_fixes[0x9C] = "\u0153"
 249 unicode_fixes[0x9E] = "\u017E"
 250 unicode_fixes[0x9F] = "\u0178"
 251
 252 # These are the character references that don't need a terminating semicolon
 253 # min length: 2, max: 6, none are a prefix of any other.
 254 legacy_char_refs = {
 255         Aacute: 'Á', aacute: 'á', Acirc: 'Â', acirc: 'â', acute: '´', AElig: 'Æ',
 256         aelig: 'æ', Agrave: 'À', agrave: 'à', AMP: '&', amp: '&', Aring: 'Å',
 257         aring: 'å', Atilde: 'Ã', atilde: 'ã', Auml: 'Ä', auml: 'ä', brvbar: '¦',
 258         Ccedil: 'Ç', ccedil: 'ç', cedil: '¸', cent: '¢', COPY: '©', copy: '©',
 259         curren: '¤', deg: '°', divide: '÷', Eacute: 'É', eacute: 'é', Ecirc: 'Ê',
 260         ecirc: 'ê', Egrave: 'È', egrave: 'è', ETH: 'Ð', eth: 'ð', Euml: 'Ë',
 261         euml: 'ë', frac12: '½', frac14: '¼', frac34: '¾', GT: '>', gt: '>',
 262         Iacute: 'Í', iacute: 'í', Icirc: 'Î', icirc: 'î', iexcl: '¡', Igrave: 'Ì',
 263         igrave: 'ì', iquest: '¿', Iuml: 'Ï', iuml: 'ï', laquo: '«', LT: '<',
 264         lt: '<', macr: '¯', micro: 'µ', middot: '·', nbsp: "\u00a0", not: '¬',
 265         Ntilde: 'Ñ', ntilde: 'ñ', Oacute: 'Ó', oacute: 'ó', Ocirc: 'Ô', ocirc: 'ô',
 266         Ograve: 'Ò', ograve: 'ò', ordf: 'ª', ordm: 'º', Oslash: 'Ø', oslash: 'ø',
 267         Otilde: 'Õ', otilde: 'õ', Ouml: 'Ö', ouml: 'ö', para: '¶', plusmn: '±',
 268         pound: '£', QUOT: '"', quot: '"', raquo: '»', REG: '®', reg: '®', sect: '§',
 269         shy: '', sup1: '¹', sup2: '²', sup3: '³', szlig: 'ß', THORN: 'Þ', thorn: 'þ',
 270         times: '×', Uacute: 'Ú', uacute: 'ú', Ucirc: 'Û', ucirc: 'û', Ugrave: 'Ù',
 271         ugrave: 'ù', uml: '¨', Uuml: 'Ü', uuml: 'ü', Yacute: 'Ý', yacute: 'ý',
 272         yen: '¥', yuml: 'ÿ'
 273 }
 274
 275 void_elements = ['area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input', 'keygen', 'link', 'meta', 'param', 'source', 'track', 'wbr']
 276 raw_text_elements = ['script', 'style']
 277 escapable_raw_text_elements = ['textarea', 'title']
 278 # http://www.w3.org/TR/SVG/ 1.1 (Second Edition)
 279 svg_elements = [
 280         'a', 'altGlyph', 'altGlyphDef', 'altGlyphItem', 'animate', 'animateColor',
 281         'animateMotion', 'animateTransform', 'circle', 'clipPath', 'color-profile',
 282         'cursor', 'defs', 'desc', 'ellipse', 'feBlend', 'feColorMatrix',
 283         'feComponentTransfer', 'feComposite', 'feConvolveMatrix',
 284         'feDiffuseLighting', 'feDisplacementMap', 'feDistantLight', 'feFlood',
 285         'feFuncA', 'feFuncB', 'feFuncG', 'feFuncR', 'feGaussianBlur', 'feImage',
 286         'feMerge', 'feMergeNode', 'feMorphology', 'feOffset', 'fePointLight',
 287         'feSpecularLighting', 'feSpotLight', 'feTile', 'feTurbulence', 'filter',
 288         'font', 'font-face', 'font-face-format', 'font-face-name', 'font-face-src',
 289         'font-face-uri', 'foreignObject', 'g', 'glyph', 'glyphRef', 'hkern',
 290         'image', 'line', 'linearGradient', 'marker', 'mask', 'metadata',
 291         'missing-glyph', 'mpath', 'path', 'pattern', 'polygon', 'polyline',
 292         'radialGradient', 'rect', 'script', 'set', 'stop', 'style', 'svg',
 293         'switch', 'symbol', 'text', 'textPath', 'title', 'tref', 'tspan', 'use',
 294         'view', 'vkern'
 295 ]
 296
 297 # http://www.w3.org/TR/MathML/ Version 3.0 2nd Edition
 298 mathml_elements = [
 299         'abs', 'and', 'annotation', 'annotation-xml', 'apply', 'approx', 'arccos',
 300         'arccosh', 'arccot', 'arccoth', 'arccsc', 'arccsch', 'arcsec', 'arcsech',
 301         'arcsin', 'arcsinh', 'arctan', 'arctanh', 'arg', 'bind', 'bvar', 'card',
 302         'cartesianproduct', 'cbytes', 'ceiling', 'cerror', 'ci', 'cn', 'codomain',
 303         'complexes', 'compose', 'condition', 'conjugate', 'cos', 'cosh', 'cot',
 304         'coth', 'cs', 'csc', 'csch', 'csymbol', 'curl', 'declare', 'degree',
 305         'determinant', 'diff', 'divergence', 'divide', 'domain',
 306         'domainofapplication', 'emptyset', 'eq', 'equivalent', 'eulergamma',
 307         'exists', 'exp', 'exponentiale', 'factorial', 'factorof', 'false', 'floor',
 308         'fn', 'forall', 'gcd', 'geq', 'grad', 'gt', 'ident', 'image', 'imaginary',
 309         'imaginaryi', 'implies', 'in', 'infinity', 'int', 'integers', 'intersect',
 310         'interval', 'inverse', 'lambda', 'laplacian', 'lcm', 'leq', 'limit',
 311         'list', 'ln', 'log', 'logbase', 'lowlimit', 'lt', 'maction', 'maligngroup',
 312         'malignmark', 'math', 'matrix', 'matrixrow', 'max', 'mean', 'median',
 313         'menclose', 'merror', 'mfenced', 'mfrac', 'mglyph', 'mi', 'mi', 'min',
 314         'minus', 'mlabeledtr', 'mlongdiv', 'mmultiscripts', 'mn', 'mo', 'mode',
 315         'moment', 'momentabout', 'mover', 'mpadded', 'mphantom', 'mprescripts',
 316         'mroot', 'mrow', 'ms', 'mscarries', 'mscarry', 'msgroup', 'msline',
 317         'mspace', 'msqrt', 'msrow', 'mstack', 'mstyle', 'msub', 'msubsup', 'msup',
 318         'mtable', 'mtd', 'mtext', 'mtr', 'munder', 'munderover', 'naturalnumbers',
 319         'neq', 'none', 'not', 'notanumber', 'notin', 'notprsubset', 'notsubset',
 320         'or', 'otherwise', 'outerproduct', 'partialdiff', 'pi', 'piece',
 321         'piecewise', 'plus', 'power', 'primes', 'product', 'prsubset', 'quotient',
 322         'rationals', 'real', 'reals', 'reln', 'rem', 'root', 'scalarproduct',
 323         'sdev', 'sec', 'sech', 'selector', 'semantics', 'sep', 'set', 'setdiff',
 324         'share', 'sin', 'sinh', 'span', 'subset', 'sum', 'tan', 'tanh', 'tendsto',
 325         'times', 'transpose', 'true', 'union', 'uplimit', 'variance', 'vector',
 326         'vectorproduct', 'xor'
 327 ]
 328 # foreign_elements = [svg_elements..., mathml_elements...]
 329 #normal_elements = All other allowed HTML elements are normal elements.
 330
 331 special_elements = {
 332         # HTML:
 333         address:NS_HTML, applet:NS_HTML, area:NS_HTML, article:NS_HTML,
 334         aside:NS_HTML, base:NS_HTML, basefont:NS_HTML, bgsound:NS_HTML,
 335         blockquote:NS_HTML, body:NS_HTML, br:NS_HTML, button:NS_HTML,
 336         caption:NS_HTML, center:NS_HTML, col:NS_HTML, colgroup:NS_HTML, dd:NS_HTML,
 337         details:NS_HTML, dir:NS_HTML, div:NS_HTML, dl:NS_HTML, dt:NS_HTML,
 338         embed:NS_HTML, fieldset:NS_HTML, figcaption:NS_HTML, figure:NS_HTML,
 339         footer:NS_HTML, form:NS_HTML, frame:NS_HTML, frameset:NS_HTML, h1:NS_HTML,
 340         h2:NS_HTML, h3:NS_HTML, h4:NS_HTML, h5:NS_HTML, h6:NS_HTML, head:NS_HTML,
 341         header:NS_HTML, hgroup:NS_HTML, hr:NS_HTML, html:NS_HTML, iframe:NS_HTML,
 342         img:NS_HTML, input:NS_HTML, isindex:NS_HTML, li:NS_HTML, link:NS_HTML,
 343         listing:NS_HTML, main:NS_HTML, marquee:NS_HTML,
 344
 345         menu:NS_HTML,menuitem:NS_HTML, # WATWG adds these
 346
 347         meta:NS_HTML, nav:NS_HTML, noembed:NS_HTML, noframes:NS_HTML,
 348         noscript:NS_HTML, object:NS_HTML, ol:NS_HTML, p:NS_HTML, param:NS_HTML,
 349         plaintext:NS_HTML, pre:NS_HTML, script:NS_HTML, section:NS_HTML,
 350         select:NS_HTML, source:NS_HTML, style:NS_HTML, summary:NS_HTML,
 351         table:NS_HTML, tbody:NS_HTML, td:NS_HTML, template:NS_HTML,
 352         textarea:NS_HTML, tfoot:NS_HTML, th:NS_HTML, thead:NS_HTML, title:NS_HTML,
 353         tr:NS_HTML, track:NS_HTML, ul:NS_HTML, wbr:NS_HTML, xmp:NS_HTML,
 354
 355         # MathML:
 356         mi:NS_MATHML, mo:NS_MATHML, mn:NS_MATHML, ms:NS_MATHML, mtext:NS_MATHML,
 357         'annotation-xml':NS_MATHML,
 358
 359         # SVG:
 360         foreignObject:NS_SVG, desc:NS_SVG, title:NS_SVG
 361 }
 362
 363 formatting_elements = {
 364          a: true, b: true, big: true, code: true, em: true, font: true, i: true,
 365          nobr: true, s: true, small: true, strike: true, strong: true, tt: true,
 366          u: true
 367 }
 368
 369 mathml_text_integration = {
 370         mi: NS_MATHML, mo: NS_MATHML, mn: NS_MATHML, ms: NS_MATHML, mtext: NS_MATHML
 371 }
 372 is_mathml_text_integration_point = (el) ->
 373         return mathml_text_integration[el.name] is el.namespace
 374 is_html_integration = (el) -> # DON'T PASS A TOKEN
 375         if el.namespace is NS_MATHML
 376                 if el.name is 'annotation-xml'
 377                         if el.attrs.encoding?
 378                                 if el.attrs.encoding.toLowerCase() is 'text/html'
 379                                         return true
 380                                 if el.attrs.encoding.toLowerCase() is 'application/xhtml+xml'
 381                                         return true
 382                 return false
 383         if el.namespace is NS_SVG
 384                 if el.name is 'foreignObject' or el.name is 'desc' or el.name is 'title'
 385                         return true
 386         return false
 387
 388 h_tags = {
 389         h1:NS_HTML, h2:NS_HTML, h3:NS_HTML, h4:NS_HTML, h5:NS_HTML, h6:NS_HTML
 390 }
 391
 392 foster_parenting_targets = {
 393         table: NS_HTML
 394         tbody: NS_HTML
 395         tfoot: NS_HTML
 396         thead: NS_HTML
 397         tr: NS_HTML
 398 }
 399
 400 end_tag_implied = {
 401         dd: NS_HTML
 402         dt: NS_HTML
 403         li: NS_HTML
 404         option: NS_HTML
 405         optgroup: NS_HTML
 406         p: NS_HTML
 407         rb: NS_HTML
 408         rp: NS_HTML
 409         rt: NS_HTML
 410         rtc: NS_HTML
 411 }
 412
 413 el_is_special = (e) ->
 414         return special_elements[e.name] is e.namespace
 415
 416 adp_els = { address: NS_HTML, div: NS_HTML, p: NS_HTML }
 417 el_is_special_not_adp = (el) ->
 418         return special_elements[el.name] is el.namespace and adp_els[el.name] isnt el.namespace
 419
 420 svg_name_fixes = {
 421         altglyph: 'altGlyph'
 422         altglyphdef: 'altGlyphDef'
 423         altglyphitem: 'altGlyphItem'
 424         animatecolor: 'animateColor'
 425         animatemotion: 'animateMotion'
 426         animatetransform: 'animateTransform'
 427         clippath: 'clipPath'
 428         feblend: 'feBlend'
 429         fecolormatrix: 'feColorMatrix'
 430         fecomponenttransfer: 'feComponentTransfer'
 431         fecomposite: 'feComposite'
 432         feconvolvematrix: 'feConvolveMatrix'
 433         fediffuselighting: 'feDiffuseLighting'
 434         fedisplacementmap: 'feDisplacementMap'
 435         fedistantlight: 'feDistantLight'
 436         fedropshadow: 'feDropShadow'
 437         feflood: 'feFlood'
 438         fefunca: 'feFuncA'
 439         fefuncb: 'feFuncB'
 440         fefuncg: 'feFuncG'
 441         fefuncr: 'feFuncR'
 442         fegaussianblur: 'feGaussianBlur'
 443         feimage: 'feImage'
 444         femerge: 'feMerge'
 445         femergenode: 'feMergeNode'
 446         femorphology: 'feMorphology'
 447         feoffset: 'feOffset'
 448         fepointlight: 'fePointLight'
 449         fespecularlighting: 'feSpecularLighting'
 450         fespotlight: 'feSpotLight'
 451         fetile: 'feTile'
 452         feturbulence: 'feTurbulence'
 453         foreignobject: 'foreignObject'
 454         glyphref: 'glyphRef'
 455         lineargradient: 'linearGradient'
 456         radialgradient: 'radialGradient'
 457         textpath: 'textPath'
 458 }
 459 svg_attribute_fixes = {
 460         attributename: 'attributeName'
 461         attributetype: 'attributeType'
 462         basefrequency: 'baseFrequency'
 463         baseprofile: 'baseProfile'
 464         calcmode: 'calcMode'
 465         clippathunits: 'clipPathUnits'
 466         contentscripttype: 'contentScriptType'
 467         contentstyletype: 'contentStyleType'
 468         diffuseconstant: 'diffuseConstant'
 469         edgemode: 'edgeMode'
 470         externalresourcesrequired: 'externalResourcesRequired'
 471         filterres: 'filterRes'
 472         filterunits: 'filterUnits'
 473         glyphref: 'glyphRef'
 474         gradienttransform: 'gradientTransform'
 475         gradientunits: 'gradientUnits'
 476         kernelmatrix: 'kernelMatrix'
 477         kernelunitlength: 'kernelUnitLength'
 478         keypoints: 'keyPoints'
 479         keysplines: 'keySplines'
 480         keytimes: 'keyTimes'
 481         lengthadjust: 'lengthAdjust'
 482         limitingconeangle: 'limitingConeAngle'
 483         markerheight: 'markerHeight'
 484         markerunits: 'markerUnits'
 485         markerwidth: 'markerWidth'
 486         maskcontentunits: 'maskContentUnits'
 487         maskunits: 'maskUnits'
 488         numoctaves: 'numOctaves'
 489         pathlength: 'pathLength'
 490         patterncontentunits: 'patternContentUnits'
 491         patterntransform: 'patternTransform'
 492         patternunits: 'patternUnits'
 493         pointsatx: 'pointsAtX'
 494         pointsaty: 'pointsAtY'
 495         pointsatz: 'pointsAtZ'
 496         preservealpha: 'preserveAlpha'
 497         preserveaspectratio: 'preserveAspectRatio'
 498         primitiveunits: 'primitiveUnits'
 499         refx: 'refX'
 500         refy: 'refY'
 501         repeatcount: 'repeatCount'
 502         repeatdur: 'repeatDur'
 503         requiredextensions: 'requiredExtensions'
 504         requiredfeatures: 'requiredFeatures'
 505         specularconstant: 'specularConstant'
 506         specularexponent: 'specularExponent'
 507         spreadmethod: 'spreadMethod'
 508         startoffset: 'startOffset'
 509         stddeviation: 'stdDeviation'
 510         stitchtiles: 'stitchTiles'
 511         surfacescale: 'surfaceScale'
 512         systemlanguage: 'systemLanguage'
 513         tablevalues: 'tableValues'
 514         targetx: 'targetX'
 515         targety: 'targetY'
 516         textlength: 'textLength'
 517         viewbox: 'viewBox'
 518         viewtarget: 'viewTarget'
 519         xchannelselector: 'xChannelSelector'
 520         ychannelselector: 'yChannelSelector'
 521         zoomandpan: 'zoomAndPan'
 522 }
 523 adjust_mathml_attributes = (t) ->
 524         for a in t.attrs_a
 525                 if a[0] is 'definitionurl'
 526                         a[0] = 'definitionURL'
 527         return
 528 adjust_svg_attributes = (t) ->
 529         for a in t.attrs_a
 530                 if svg_attribute_fixes[a[0]]?
 531                         a[0] = svg_attribute_fixes[a[0]]
 532         return
 533 adjust_foreign_attributes = (t) ->
 534         # fixfull
 535         return
 536
 537 # decode_named_char_ref()
 538 #
 539 # The list of named character references is _huge_ so ask the browser to decode
 540 # for us instead of wasting bandwidth/space on including the table here.
 541 #
 542 # Pass without the "&" but with the ";" examples:
 543 #    for "&amp" pass "amp;"
 544 #    for "&#x2032" pass "x2032;"
 545 g_dncr = {
 546         cache: {}
 547         textarea: document.createElement('textarea')
 548 }
 549 # TODO test this in IE8
 550 decode_named_char_ref = (txt) ->
 551         txt = "&#{txt}"
 552         decoded = g_dncr.cache[txt]
 553         return decoded if decoded?
 554         g_dncr.textarea.innerHTML = txt
 555         decoded = g_dncr.textarea.value
 556         return null if decoded is txt
 557         return g_dncr.cache[txt] = decoded
 558
 559 parse_html = (args) ->
 560         txt = null
 561         cur = null # index of next char in txt to be parsed
 562         # declare doc and tokenizer variables so they're in scope below
 563         doc = null
 564         open_els = null # stack of open elements
 565         afe = null # active formatting elements
 566         template_ins_modes = null
 567         ins_mode = null
 568         original_ins_mode = null
 569         tok_state = null
 570         tok_cur_tag = null # partially parsed tag
 571         flag_scripting = null
 572         flag_frameset_ok = null
 573         flag_parsing = null
 574         flag_foster_parenting = null
 575         form_element_pointer = null
 576         temporary_buffer = null
 577         pending_table_character_tokens = null
 578         head_element_pointer = null
 579         flag_fragment_parsing = null
 580         context_element = null
 581
 582         stop_parsing = ->
 583                 flag_parsing = false
 584
 585         parse_error = ->
 586                 if args.error_cb?
 587                         args.error_cb cur
 588                 else
 589                         console.log "Parse error at character #{cur} of #{txt.length}"
 590
 591         afe_push = (new_el) ->
 592                 matches = 0
 593                 for el, i in afe
 594                         if el.name is new_el.name and el.namespace is new_el.namespace
 595                                 for k, v of el.attrs
 596                                         continue unless new_el.attrs[k] is v
 597                                 for k, v of new_el.attrs
 598                                         continue unless el.attrs[k] is v
 599                                 matches += 1
 600                                 if matches is 3
 601                                         afe.splice i, 1
 602                                         break
 603                 afe.unshift new_el
 604         afe_push_marker = ->
 605                 afe.unshift new_afe_marker()
 606
 607         # the functions below impliment the Tree Contstruction algorithm
 608         # http://www.w3.org/TR/html5/syntax.html#tree-construction
 609
 610         # But first... the helpers
 611         template_tag_is_open = ->
 612                 for t in open_els
 613                         if t.name is 'template' and t.namespace is NS_HTML
 614                                 return true
 615                 return false
 616         is_in_scope_x = (tag_name, scope, namespace) ->
 617                 for t in open_els
 618                         if t.name is tag_name and (namespace is null or namespace is t.namespace)
 619                                 return true
 620                         if scope[t.name] is t.namespace
 621                                 return false
 622                 return false
 623         is_in_scope_x_y = (tag_name, scope, scope2, namespace) ->
 624                 for t in open_els
 625                         if t.name is tag_name and (namespace is null or namespace is t.namespace)
 626                                 return true
 627                         if scope[t.name] is t.namespace
 628                                 return false
 629                         if scope2[t.name] is t.namespace
 630                                 return false
 631                 return false
 632         standard_scopers = {
 633                 applet: NS_HTML, caption: NS_HTML, html: NS_HTML, table: NS_HTML,
 634                 td: NS_HTML, th: NS_HTML, marquee: NS_HTML, object: NS_HTML,
 635                 template: NS_HTML, mi: NS_MATHML,
 636
 637                 mo: NS_MATHML, mn: NS_MATHML, ms: NS_MATHML, mtext: NS_MATHML,
 638                 'annotation-xml': NS_MATHML,
 639
 640                 foreignObject: NS_SVG, desc: NS_SVG, title: NS_SVG
 641         }
 642         button_scopers = button: NS_HTML
 643         li_scopers = ol: NS_HTML, ul: NS_HTML
 644         table_scopers = html: NS_HTML, table: NS_HTML, template: NS_HTML
 645         is_in_scope = (tag_name, namespace = null) ->
 646                 return is_in_scope_x tag_name, standard_scopers, namespace
 647         is_in_button_scope = (tag_name, namespace = null) ->
 648                 return is_in_scope_x_y tag_name, standard_scopers, button_scopers, namespace
 649         is_in_table_scope = (tag_name, namespace = null) ->
 650                 return is_in_scope_x tag_name, table_scopers, namespace
 651         # aka is_in_list_item_scope
 652         is_in_li_scope = (tag_name, namespace = null) ->
 653                 return is_in_scope_x_y tag_name, standard_scopers, li_scopers, namespace
 654         is_in_select_scope = (tag_name, namespace = null) ->
 655                 for t in open_els
 656                         if t.name is tag_name and (namespace is null or namespace is t.namespace)
 657                                 return true
 658                         if t.namespace isnt NS_HTML and t.name isnt 'optgroup' and t.name isnt 'option'
 659                                 return false
 660                 return false
 661         # this checks for a particular element, not by name
 662         # this requires a namespace match
 663         el_is_in_scope = (needle) ->
 664                 for el in open_els
 665                         if el is needle
 666                                 return true
 667                         if standard_scopers[el.name] is el.namespace
 668                                 return false
 669                 return false
 670
 671         clear_to_table_stopers = {
 672                 'table': true
 673                 'template': true
 674                 'html': true
 675         }
 676         clear_stack_to_table_context = ->
 677                 loop
 678                         if clear_to_table_stopers[open_els[0].name]?
 679                                 break
 680                         open_els.shift()
 681                 return
 682         clear_to_table_body_stopers = {
 683                 tbody: NS_HTML
 684                 tfoot: NS_HTML
 685                 thead: NS_HTML
 686                 template: NS_HTML
 687                 html: NS_HTML
 688         }
 689         clear_stack_to_table_body_context = ->
 690                 loop
 691                         if clear_to_table_body_stopers[open_els[0].name] is open_els[0].namespace
 692                                 break
 693                         open_els.shift()
 694                 return
 695         clear_to_table_row_stopers = {
 696                 'tr': true
 697                 'template': true
 698                 'html': true
 699         }
 700         clear_stack_to_table_row_context = ->
 701                 loop
 702                         if clear_to_table_row_stopers[open_els[0].name]?
 703                                 break
 704                         open_els.shift()
 705                 return
 706         clear_afe_to_marker = ->
 707                 loop
 708                         return unless afe.length > 0 # this happens in fragment case, ?spec error
 709                         el = afe.shift()
 710                         if el.type is TYPE_AFE_MARKER
 711                                 return
 712                 return
 713
 714         # 8.2.3.1 ...
 715         # http://www.w3.org/TR/html5/syntax.html#reset-the-insertion-mode-appropriately
 716         reset_ins_mode = ->
 717                 # 1. Let last be false.
 718                 last = false
 719                 # 2. Let node be the last node in the stack of open elements.
 720                 node_i = 0
 721                 node = open_els[node_i]
 722                 # 3. Loop: If node is the first node in the stack of open elements,
 723                 # then set last to true, and, if the parser was originally created as
 724                 # part of the HTML fragment parsing algorithm (fragment case) set node
 725                 # to the context element.
 726                 loop
 727                         if node_i is open_els.length - 1
 728                                 last = true
 729                                 # fixfull (fragment case)
 730
 731                         # 4. If node is a select element, run these substeps:
 732                         if node.name is 'select' and node.namespace is NS_HTML
 733                                 # 1. If last is true, jump to the step below labeled done.
 734                                 unless last
 735                                         # 2. Let ancestor be node.
 736                                         ancestor_i = node_i
 737                                         ancestor = node
 738                                         # 3. Loop: If ancestor is the first node in the stack of
 739                                         # open elements, jump to the step below labeled done.
 740                                         loop
 741                                                 if ancestor_i is open_els.length - 1
 742                                                         break
 743                                                 # 4. Let ancestor be the node before ancestor in the stack
 744                                                 # of open elements.
 745                                                 ancestor_i += 1
 746                                                 ancestor = open_els[ancestor_i]
 747                                                 # 5. If ancestor is a template node, jump to the step below
 748                                                 # labeled done.
 749                                                 if ancestor.name is 'template' and ancestor.namespace is NS_HTML
 750                                                         break
 751                                                 # 6. If ancestor is a table node, switch the insertion mode
 752                                                 # to "in select in table" and abort these steps.
 753                                                 if ancestor.name is 'table' and ancestor.namespace is NS_HTML
 754                                                         ins_mode = ins_mode_in_select_in_table
 755                                                         return
 756                                                 # 7. Jump back to the step labeled loop.
 757                                 # 8. Done: Switch the insertion mode to "in select" and abort
 758                                 # these steps.
 759                                 ins_mode = ins_mode_in_select
 760                                 return
 761                         # 5. If node is a td or th element and last is false, then switch
 762                         # the insertion mode to "in cell" and abort these steps.
 763                         if (node.name is 'td' or node.name is 'th') and node.namespace is NS_HTML and last is false
 764                                 ins_mode = ins_mode_in_cell
 765                                 return
 766                         # 6. If node is a tr element, then switch the insertion mode to "in
 767                         # row" and abort these steps.
 768                         if node.name is 'tr' and node.namespace is NS_HTML
 769                                 ins_mode = ins_mode_in_row
 770                                 return
 771                         # 7. If node is a tbody, thead, or tfoot element, then switch the
 772                         # insertion mode to "in table body" and abort these steps.
 773                         if (node.name is 'tbody' or node.name is 'thead' or node.name is 'tfoot') and node.namespace is NS_HTML
 774                                 ins_mode = ins_mode_in_table_body
 775                                 return
 776                         # 8. If node is a caption element, then switch the insertion mode
 777                         # to "in caption" and abort these steps.
 778                         if node.name is 'caption' and node.namespace is NS_HTML
 779                                 ins_mode = ins_mode_in_caption
 780                                 return
 781                         # 9. If node is a colgroup element, then switch the insertion mode
 782                         # to "in column group" and abort these steps.
 783                         if node.name is 'colgroup' and node.namespace is NS_HTML
 784                                 ins_mode = ins_mode_in_column_group
 785                                 return
 786                         # 10. If node is a table element, then switch the insertion mode to
 787                         # "in table" and abort these steps.
 788                         if node.name is 'table' and node.namespace is NS_HTML
 789                                 ins_mode = ins_mode_in_table
 790                                 return
 791                         # 11. If node is a template element, then switch the insertion mode
 792                         # to the current template insertion mode and abort these steps.
 793                         if node.name is 'template' and node.namespace is NS_HTML
 794                                 ins_mode = template_ins_modes[0]
 795                                 return
 796                         # 12. If node is a head element and last is true, then switch the
 797                         # insertion mode to "in body" ("in body"! not "in head"!) and abort
 798                         # these steps. (fragment case)
 799                         if node.name is 'head' and node.namespace is NS_HTML and last
 800                                 ins_mode = ins_mode_in_body
 801                                 return
 802                         # 13. If node is a head element and last is false, then switch the
 803                         # insertion mode to "in head" and abort these steps.
 804                         if node.name is 'head' and node.namespace is NS_HTML and last is false
 805                                 ins_mode = ins_mode_in_head
 806                                 return
 807                         # 14. If node is a body element, then switch the insertion mode to
 808                         # "in body" and abort these steps.
 809                         if node.name is 'body' and node.namespace is NS_HTML
 810                                 ins_mode = ins_mode_in_body
 811                                 return
 812                         # 15. If node is a frameset element, then switch the insertion mode
 813                         # to "in frameset" and abort these steps. (fragment case)
 814                         if node.name is 'frameset' and node.namespace is NS_HTML
 815                                 ins_mode = ins_mode_in_frameset
 816                                 return
 817                         # 16. If node is an html element, run these substeps:
 818                         if node.name is 'html' and node.namespace is NS_HTML
 819                                 # 1. If the head element pointer is null, switch the insertion
 820                                 # mode to "before head" and abort these steps. (fragment case)
 821                                 if head_element_pointer is null
 822                                         ins_mode = ins_mode_before_head
 823                                 else
 824                                         # 2. Otherwise, the head element pointer is not null,
 825                                         # switch the insertion mode to "after head" and abort these
 826                                         # steps.
 827                                         ins_mode = ins_mode_after_head
 828                                 return
 829                         # 17. If last is true, then switch the insertion mode to "in body"
 830                         # and abort these steps. (fragment case)
 831                         if last
 832                                 ins_mode = ins_mode_in_body
 833                                 return
 834                         # 18. Let node now be the node before node in the stack of open
 835                         # elements.
 836                         node_i += 1
 837                         node = open_els[node_i]
 838                         # 19. Return to the step labeled loop.
 839
 840         # 8.2.3.2
 841
 842         # http://www.w3.org/TR/html5/syntax.html#adjusted-current-node
 843         adjusted_current_node = ->
 844                 if open_els.length is 1 and flag_fragment_parsing
 845                         return context_element
 846                 return open_els[0]
 847
 848         # http://www.w3.org/TR/html5/syntax.html#reconstruct-the-active-formatting-elements
 849         # this implementation is structured (mostly) as described at the link above.
 850         # capitalized comments are the "labels" described at the link above.
 851         reconstruct_afe = ->
 852                 return if afe.length is 0
 853                 if afe[0].type is TYPE_AFE_MARKER or afe[0] in open_els
 854                         return
 855                 # Rewind
 856                 i = 0
 857                 loop
 858                         if i is afe.length - 1
 859                                 break
 860                         i += 1
 861                         if afe[i].type is TYPE_AFE_MARKER or afe[i] in open_els
 862                                 i -= 1 # Advance
 863                                 break
 864                 # Create
 865                 loop
 866                         el = insert_html_element afe[i].token
 867                         afe[i] = el
 868                         break if i is 0
 869                         i -= 1 # Advance
 870
 871         # http://www.w3.org/TR/html5/syntax.html#adoption-agency-algorithm
 872         # adoption agency algorithm
 873         # overview here:
 874         #   http://www.w3.org/TR/html5/syntax.html#misnested-tags:-b-i-/b-/i
 875         #   http://www.w3.org/TR/html5/syntax.html#misnested-tags:-b-p-/b-/p
 876         #   http://www.w3.org/TR/html5/syntax.html#unclosed-formatting-elements
 877         adoption_agency = (subject) ->
 878                 debug_log "adoption_agency()"
 879                 debug_log "tree: #{serialize_els doc.children, false, true}"
 880                 debug_log "open_els: #{serialize_els open_els, true, true}"
 881                 debug_log "afe: #{serialize_els afe, true, true}"
 882                 if open_els[0].name is subject and open_els[0].namespace is NS_HTML
 883                         el = open_els[0]
 884                         open_els.shift()
 885                         # remove it from the list of active formatting elements (if found)
 886                         for t, i in afe
 887                                 if t is el
 888                                         afe.splice i, 1
 889                                         break
 890                         debug_log "aaa: starting off with subject on top of stack, exiting"
 891                         return
 892                 outer = 0
 893                 loop
 894                         if outer >= 8
 895                                 return
 896                         outer += 1
 897                         # 5. Let formatting element be the last element in the list of
 898                         # active formatting elements that: is between the end of the list
 899                         # and the last scope marker in the list, if any, or the start of
 900                         # the list otherwise, and  has the tag name subject.
 901                         fe = null
 902                         for t, fe_of_afe in afe
 903                                 if t.type is TYPE_AFE_MARKER
 904                                         break
 905                                 if t.name is subject
 906                                         fe = t
 907                                         break
 908                         # If there is no such element, then abort these steps and instead
 909                         # act as described in the "any other end tag" entry above.
 910                         if fe is null
 911                                 debug_log "aaa: fe not found in afe"
 912                                 in_body_any_other_end_tag subject
 913                                 return
 914                         # 6. If formatting element is not in the stack of open elements,
 915                         # then this is a parse error; remove the element from the list, and
 916                         # abort these steps.
 917                         in_open_els = false
 918                         for t, fe_of_open_els in open_els
 919                                 if t is fe
 920                                         in_open_els = true
 921                                         break
 922                         unless in_open_els
 923                                 debug_log "aaa: fe not found in open_els"
 924                                 parse_error()
 925                                 # "remove it from the list" must mean afe, since it's not in open_els
 926                                 afe.splice fe_of_afe, 1
 927                                 return
 928                         # 7. If formatting element is in the stack of open elements, but
 929                         # the element is not in scope, then this is a parse error; abort
 930                         # these steps.
 931                         unless el_is_in_scope fe
 932                                 debug_log "aaa: fe not in scope"
 933                                 parse_error()
 934                                 return
 935                         # 8. If formatting element is not the current node, this is a parse
 936                         # error. (But do not abort these steps.)
 937                         unless open_els[0] is fe
 938                                 parse_error()
 939                                 # continue
 940                         # 9. Let furthest block be the topmost node in the stack of open
 941                         # elements that is lower in the stack than formatting element, and
 942                         # is an element in the special category. There might not be one.
 943                         fb = null
 944                         fb_of_open_els = null
 945                         for t, i in open_els
 946                                 if t is fe
 947                                         break
 948                                 if el_is_special t
 949                                         fb = t
 950                                         fb_of_open_els = i
 951                                         # and continue, to see if there's one that's more "topmost"
 952                         # 10. If there is no furthest block, then the UA must first pop all
 953                         # the nodes from the bottom of the stack of open elements, from the
 954                         # current node up to and including formatting element, then remove
 955                         # formatting element from the list of active formatting elements,
 956                         # and finally abort these steps.
 957                         if fb is null
 958                                 debug_log "aaa: no fb"
 959                                 loop
 960                                         t = open_els.shift()
 961                                         if t is fe
 962                                                 afe.splice fe_of_afe, 1
 963                                                 return
 964                         # 11. Let common ancestor be the element immediately above
 965                         # formatting element in the stack of open elements.
 966                         ca = open_els[fe_of_open_els + 1] # common ancestor
 967
 968                         node_above = open_els[fb_of_open_els + 1] # next node if node isn't in open_els anymore
 969                         # 12. Let a bookmark note the position of formatting element in the list of active formatting elements relative to the elements on either side of it in the list.
 970                         bookmark = new_aaa_bookmark()
 971                         for t, i in afe
 972                                 if t is fe
 973                                         afe.splice i, 0, bookmark
 974                                         break
 975                         node = last_node = fb
 976                         inner = 0
 977                         loop
 978                                 inner += 1
 979                                 # 3. Let node be the element immediately above node in the
 980                                 # stack of open elements, or if node is no longer in the stack
 981                                 # of open elements (e.g. because it got removed by this
 982                                 # algorithm), the element that was immediately above node in
 983                                 # the stack of open elements before node was removed.
 984                                 node_next = null
 985                                 for t, i in open_els
 986                                         if t is node
 987                                                 node_next = open_els[i + 1]
 988                                                 break
 989                                 node = node_next ? node_above
 990                                 debug_log "inner loop #{inner}"
 991                                 debug_log "tree: #{serialize_els doc.children, false, true}"
 992                                 debug_log "open_els: #{serialize_els open_els, true, true}"
 993                                 debug_log "afe: #{serialize_els afe, true, true}"
 994                                 debug_log "ca: #{ca.name}##{ca.id} children: #{serialize_els ca.children, true, true}"
 995                                 debug_log "fe: #{fe.name}##{fe.id} children: #{serialize_els fe.children, true, true}"
 996                                 debug_log "fb: #{fb.name}##{fb.id} children: #{serialize_els fb.children, true, true}"
 997                                 debug_log "node: #{node.serialize true, true}"
 998                                 # TODO make sure node_above gets re-set if/when node is removed from open_els
 999
1000                                 # 4. If node is formatting element, then go to the next step in
1001                                 # the overall algorithm.
1002                                 if node is fe
1003                                         break
1004                                 debug_log "the meat"
1005                                 # 5. If inner loop counter is greater than three and node is in
1006                                 # the list of active formatting elements, then remove node from
1007                                 # the list of active formatting elements.
1008                                 node_in_afe = false
1009                                 for t, i in afe
1010                                         if t is node
1011                                                 if inner > 3
1012                                                         afe.splice i, 1
1013                                                         debug_log "max out inner"
1014                                                 else
1015                                                         node_in_afe = true
1016                                                         debug_log "in afe"
1017                                                 break
1018                                 # 6. If node is not in the list of active formatting elements,
1019                                 # then remove node from the stack of open elements and then go
1020                                 # back to the step labeled inner loop.
1021                                 unless node_in_afe
1022                                         debug_log "not in afe"
1023                                         for t, i in open_els
1024                                                 if t is node
1025                                                         node_above = open_els[i + 1]
1026                                                         open_els.splice i, 1
1027                                                         break
1028                                         continue
1029                                 debug_log "the bones"
1030                                 # 7. create an element for the token for which the element node
1031                                 # was created, in the HTML namespace, with common ancestor as
1032                                 # the intended parent; replace the entry for node in the list
1033                                 # of active formatting elements with an entry for the new
1034                                 # element, replace the entry for node in the stack of open
1035                                 # elements with an entry for the new element, and let node be
1036                                 # the new element.
1037                                 new_node = token_to_element node.token, NS_HTML, ca
1038                                 for t, i in afe
1039                                         if t is node
1040                                                 afe[i] = new_node
1041                                                 debug_log "replaced in afe"
1042                                                 break
1043                                 for t, i in open_els
1044                                         if t is node
1045                                                 node_above = open_els[i + 1]
1046                                                 open_els[i] = new_node
1047                                                 debug_log "replaced in open_els"
1048                                                 break
1049                                 node = new_node
1050                                 # 8. If last node is furthest block, then move the
1051                                 # aforementioned bookmark to be immediately after the new node
1052                                 # in the list of active formatting elements.
1053                                 if last_node is fb
1054                                         for t, i in afe
1055                                                 if t is bookmark
1056                                                         afe.splice i, 1
1057                                                         debug_log "removed bookmark"
1058                                                         break
1059                                         for t, i in afe
1060                                                 if t is node
1061                                                         # "after" means lower
1062                                                         afe.splice i, 0, bookmark # "after as <-
1063                                                         debug_log "placed bookmark after node"
1064                                                         debug_log "node: #{node.id} afe: #{serialize_els afe, true, true}"
1065                                                         break
1066                                 # 9. Insert last node into node, first removing it from its
1067                                 # previous parent node if any.
1068                                 if last_node.parent?
1069                                         debug_log "last_node has parent"
1070                                         for c, i in last_node.parent.children
1071                                                 if c is last_node
1072                                                         debug_log "removing last_node from parent"
1073                                                         last_node.parent.children.splice i, 1
1074                                                         break
1075                                 node.children.push last_node
1076                                 last_node.parent = node
1077                                 # 10. Let last node be node.
1078                                 last_node = node
1079                                 debug_log "at last"
1080                                 # 11. Return to the step labeled inner loop.
1081                         # 14. Insert whatever last node ended up being in the previous step
1082                         # at the appropriate place for inserting a node, but using common
1083                         # ancestor as the override target.
1084
1085                         # In the case where fe is immediately followed by fb:
1086                         #   * inner loop exits out early (node==fe)
1087                         #   * last_node is fb
1088                         #   * last_node is still in the tree (not a duplicate)
1089                         if last_node.parent?
1090                                 debug_log "FEFIRST? last_node has parent"
1091                                 for c, i in last_node.parent.children
1092                                         if c is last_node
1093                                                 debug_log "removing last_node from parent"
1094                                                 last_node.parent.children.splice i, 1
1095                                                 break
1096
1097                         debug_log "after aaa inner loop"
1098                         debug_log "ca: #{ca.name}##{ca.id} children: #{serialize_els ca.children, true, true}"
1099                         debug_log "fe: #{fe.name}##{fe.id} children: #{serialize_els fe.children, true, true}"
1100                         debug_log "fb: #{fb.name}##{fb.id} children: #{serialize_els fb.children, true, true}"
1101                         debug_log "last_node: #{last_node.name}##{last_node.id} children: #{serialize_els last_node.children, true, true}"
1102                         debug_log "tree: #{serialize_els doc.children, false, true}"
1103
1104                         debug_log "insert"
1105
1106
1107                         # can't use standard insert token thing, because it's already in
1108                         # open_els and must stay at it's current position in open_els
1109                         dest = adjusted_insertion_location ca
1110                         dest[0].children.splice dest[1], 0, last_node
1111                         last_node.parent = dest[0]
1112
1113
1114                         debug_log "ca: #{ca.name}##{ca.id} children: #{serialize_els ca.children, true, true}"
1115                         debug_log "fe: #{fe.name}##{fe.id} children: #{serialize_els fe.children, true, true}"
1116                         debug_log "fb: #{fb.name}##{fb.id} children: #{serialize_els fb.children, true, true}"
1117                         debug_log "last_node: #{last_node.name}##{last_node.id} children: #{serialize_els last_node.children, true, true}"
1118                         debug_log "tree: #{serialize_els doc.children, false, true}"
1119
1120                         # 15. Create an element for the token for which formatting element
1121                         # was created, in the HTML namespace, with furthest block as the
1122                         # intended parent.
1123                         new_element = token_to_element fe.token, NS_HTML, fb
1124                         # 16. Take all of the child nodes of furthest block and append them
1125                         # to the element created in the last step.
1126                         while fb.children.length
1127                                 t = fb.children.shift()
1128                                 t.parent = new_element
1129                                 new_element.children.push t
1130                         # 17. Append that new element to furthest block.
1131                         new_element.parent = fb
1132                         fb.children.push new_element
1133                         # 18. Remove formatting element from the list of active formatting
1134                         # elements, and insert the new element into the list of active
1135                         # formatting elements at the position of the aforementioned
1136                         # bookmark.
1137                         for t, i in afe
1138                                 if t is fe
1139                                         afe.splice i, 1
1140                                         break
1141                         for t, i in afe
1142                                 if t is bookmark
1143                                         afe[i] = new_element
1144                                         break
1145                         # 19. Remove formatting element from the stack of open elements,
1146                         # and insert the new element into the stack of open elements
1147                         # immediately below the position of furthest block in that stack.
1148                         for t, i in open_els
1149                                 if t is fe
1150                                         open_els.splice i, 1
1151                                         break
1152                         for t, i in open_els
1153                                 if t is fb
1154                                         open_els.splice i, 0, new_element
1155                                         break
1156                         # 20. Jump back to the step labeled outer loop.
1157                         debug_log "done wrapping fb's children. new_element: #{new_element.name}##{new_element.id}"
1158                         debug_log "tree: #{serialize_els doc.children, false, true}"
1159                         debug_log "open_els: #{serialize_els open_els, true, true}"
1160                         debug_log "afe: #{serialize_els afe, true, true}"
1161                 debug_log "AAA DONE"
1162
1163         # http://www.w3.org/TR/html5/syntax.html#close-a-p-element
1164         close_p_element = ->
1165                 generate_implied_end_tags 'p' # arg is exception
1166                 unless open_els[0].name is 'p' and open_els[0].namespace is NS_HTML
1167                         parse_error()
1168                 while open_els.length > 1 # just in case
1169                         el = open_els.shift()
1170                         if el.name is 'p' and el.namespace is NS_HTML
1171                                 return
1172         close_p_if_in_button_scope = ->
1173                 if is_in_button_scope 'p', NS_HTML
1174                         close_p_element()
1175
1176         # http://www.w3.org/TR/html5/syntax.html#insert-a-character
1177         # aka insert_a_character = (t) ->
1178         insert_character = (t) ->
1179                 dest = adjusted_insertion_location()
1180                 # fixfull check for Document node
1181                 if dest[1] > 0
1182                         prev = dest[0].children[dest[1] - 1]
1183                         if prev.type is TYPE_TEXT
1184                                 prev.text += t.text
1185                                 return
1186                 dest[0].children.splice dest[1], 0, t
1187
1188
1189         # 8.2.5 http://www.w3.org/TR/html5/syntax.html#tree-construction
1190         process_token = (t) ->
1191                 acn = adjusted_current_node()
1192                 unless acn?
1193                         ins_mode t
1194                         return
1195                 if acn.namespace is NS_HTML
1196                         ins_mode t
1197                         return
1198                 if is_mathml_text_integration_point(acn)
1199                         if t.type is TYPE_START_TAG and (t.name is 'mglyph' or t.name is 'malignmark')
1200                                 ins_mode t
1201                                 return
1202                         if t.type is TYPE_TEXT
1203                                 ins_mode t
1204                                 return
1205                 if acn.namespace is NS_MATHML and acn.name is 'annotation-xml' and t.type is TYPE_START_TAG and t.name is 'svg'
1206                         ins_mode t
1207                         return
1208                 if is_html_integration acn
1209                         if t.type is TYPE_START_TAG or t.type is TYPE_TEXT
1210                                 ins_mode t
1211                                 return
1212                 if t.type is TYPE_EOF
1213                         ins_mode t
1214                         return
1215                 in_foreign_content t
1216                 return
1217
1218         # 8.2.5.1
1219         # http://www.w3.org/TR/html5/syntax.html#creating-and-inserting-nodes
1220         # http://www.w3.org/TR/html5/syntax.html#appropriate-place-for-inserting-a-node
1221         adjusted_insertion_location = (override_target = null) ->
1222                 # 1. If there was an override target specified, then let target be the
1223                 # override target.
1224                 if override_target?
1225                         target = override_target
1226                 else # Otherwise, let target be the current node.
1227                         target = open_els[0]
1228                 # 2. Determine the adjusted insertion location using the first matching
1229                 # steps from the following list:
1230                 #
1231                 # If foster parenting is enabled and target is a table, tbody, tfoot,
1232                 # thead, or tr element Foster parenting happens when content is
1233                 # misnested in tables.
1234                 if flag_foster_parenting and foster_parenting_targets[target.name] is target.namespace
1235                         loop # once. this is here so we can ``break`` to "abort these substeps"
1236                                 # 1. Let last template be the last template element in the
1237                                 # stack of open elements, if any.
1238                                 last_template = null
1239                                 last_template_i = null
1240                                 for el, i in open_els
1241                                         if el.name is 'template' and el.namespace is NS_HTML
1242                                                 last_template = el
1243                                                 last_template_i = i
1244                                                 break
1245                                 # 2. Let last table be the last table element in the stack of
1246                                 # open elements, if any.
1247                                 last_table = null
1248                                 last_table_i
1249                                 for el, i in open_els
1250                                         if el.name is 'table' and el.namespace is NS_HTML
1251                                                 last_table = el
1252                                                 last_table_i = i
1253                                                 break
1254                                 # 3. If there is a last template and either there is no last
1255                                 # table, or there is one, but last template is lower (more
1256                                 # recently added) than last table in the stack of open
1257                                 # elements, then: let adjusted insertion location be inside
1258                                 # last template's template contents, after its last child (if
1259                                 # any), and abort these substeps.
1260                                 if last_template and (last_table is null or last_template_i < last_table_i)
1261                                         target = last_template # fixfull should be it's contents
1262                                         target_i = target.children.length
1263                                         break
1264                                 # 4. If there is no last table, then let adjusted insertion
1265                                 # location be inside the first element in the stack of open
1266                                 # elements (the html element), after its last child (if any),
1267                                 # and abort these substeps. (fragment case)
1268                                 if last_table is null
1269                                         # this is odd
1270                                         target = open_els[open_els.length - 1]
1271                                         target_i = target.children.length
1272                                         break
1273                                 # 5. If last table has a parent element, then let adjusted
1274                                 # insertion location be inside last table's parent element,
1275                                 # immediately before last table, and abort these substeps.
1276                                 if last_table.parent?
1277                                         for c, i in last_table.parent.children
1278                                                 if c is last_table
1279                                                         target = last_table.parent
1280                                                         target_i = i
1281                                                         break
1282                                         break
1283                                 # 6. Let previous element be the element immediately above last
1284                                 # table in the stack of open elements.
1285                                 #
1286                                 # huh? how could it not have a parent?
1287                                 previous_element = open_els[last_table_i + 1]
1288                                 # 7. Let adjusted insertion location be inside previous
1289                                 # element, after its last child (if any).
1290                                 target = previous_element
1291                                 target_i = target.children.length
1292                                 # Note: These steps are involved in part because it's possible
1293                                 # for elements, the table element in this case in particular,
1294                                 # to have been moved by a script around in the DOM, or indeed
1295                                 # removed from the DOM entirely, after the element was inserted
1296                                 # by the parser.
1297                                 break # don't really loop
1298                 else
1299                         # Otherwise Let adjusted insertion location be inside target, after
1300                         # its last child (if any).
1301                         target_i = target.children.length
1302
1303                 # 3. If the adjusted insertion location is inside a template element,
1304                 # let it instead be inside the template element's template contents,
1305                 # after its last child (if any).
1306                 # fixfull (template)
1307
1308                 # 4. Return the adjusted insertion location.
1309                 return [target, target_i]
1310
1311         # http://www.w3.org/TR/html5/syntax.html#create-an-element-for-the-token
1312         # aka create_an_element_for_token
1313         token_to_element = (t, namespace, intended_parent) ->
1314                 # convert attributes into a hash
1315                 attrs = {}
1316                 for a in t.attrs_a
1317                         attrs[a[0]] = a[1] # TODO check what to do with dupilcate attrs
1318                 el = new Node TYPE_TAG, name: t.name, namespace: namespace, attrs: attrs, token: t
1319
1320                 # TODO 2. If the newly created element has an xmlns attribute in the
1321                 # XMLNS namespace whose value is not exactly the same as the element's
1322                 # namespace, that is a parse error. Similarly, if the newly created
1323                 # element has an xmlns:xlink attribute in the XMLNS namespace whose
1324                 # value is not the XLink Namespace, that is a parse error.
1325
1326                 # fixfull: the spec says stuff about form pointers and ownerDocument
1327
1328                 return el
1329
1330         # http://www.w3.org/TR/html5/syntax.html#insert-a-foreign-element
1331         insert_foreign_element = (token, namespace) ->
1332                 ail = adjusted_insertion_location()
1333                 ail_el = ail[0]
1334                 ail_i = ail[1]
1335                 el = token_to_element token, namespace, ail_el
1336                 # TODO skip this next step if it's broken (eg ail_el is document with child already)
1337                 el.parent = ail_el
1338                 ail_el.children.splice ail_i, 0, el
1339                 open_els.unshift el
1340                 return el
1341         # http://www.w3.org/TR/html5/syntax.html#insert-an-html-element
1342         insert_html_element = (token) ->
1343                 insert_foreign_element token, NS_HTML
1344
1345         # http://www.w3.org/TR/html5/syntax.html#insert-a-comment
1346         # position should be [node, index_within_children]
1347         insert_comment = (t, position = null) ->
1348                 position ?= adjusted_insertion_location()
1349                 position[0].children.splice position[1], 0, t
1350
1351         # 8.2.5.2
1352         # http://www.w3.org/TR/html5/syntax.html#generic-raw-text-element-parsing-algorithm
1353         parse_generic_raw_text = (t) ->
1354                 insert_html_element t
1355                 tok_state = tok_state_rawtext
1356                 original_ins_mode = ins_mode
1357                 ins_mode = ins_mode_text
1358         parse_generic_rcdata_text = (t) ->
1359                 insert_html_element t
1360                 tok_state = tok_state_rcdata
1361                 original_ins_mode = ins_mode
1362                 ins_mode = ins_mode_text
1363
1364         # 8.2.5.3 http://www.w3.org/TR/html5/syntax.html#closing-elements-that-have-implied-end-tags
1365         # http://www.w3.org/TR/html5/syntax.html#generate-implied-end-tags
1366         generate_implied_end_tags = (except = null) ->
1367                 while end_tag_implied[open_els[0].name] is open_els[0].namespace and open_els[0].name isnt except
1368                         open_els.shift()
1369
1370         # 8.2.5.4 The rules for parsing tokens in HTML content
1371         # http://www.w3.org/TR/html5/syntax.html#parsing-main-inhtml
1372
1373         # 8.2.5.4.1 The "initial" insertion mode
1374         # http://www.w3.org/TR/html5/syntax.html#the-initial-insertion-mode
1375         ins_mode_initial = (t) ->
1376                 if is_space_tok t
1377                         return
1378                 if t.type is TYPE_COMMENT
1379                         # ?fixfull
1380                         doc.children.push t
1381                         return
1382                 if t.type is TYPE_DOCTYPE
1383                         # FIXME check identifiers, set quirks, etc
1384                         # fixfull
1385                         doc.children.push t
1386                         ins_mode = ins_mode_before_html
1387                         return
1388                 # Anything else
1389                 #fixfull (iframe, quirks)
1390                 ins_mode = ins_mode_before_html
1391                 process_token t
1392                 return
1393
1394         # 8.2.5.4.2 http://www.w3.org/TR/html5/syntax.html#the-before-html-insertion-mode
1395         ins_mode_before_html = (t) ->
1396                 if t.type is TYPE_DOCTYPE
1397                         parse_error()
1398                         return
1399                 if t.type is TYPE_COMMENT
1400                         doc.children.push t
1401                         return
1402                 if is_space_tok t
1403                         return
1404                 if t.type is TYPE_START_TAG and t.name is 'html'
1405                         el = token_to_element t, NS_HTML, doc
1406                         doc.children.push el
1407                         open_els.unshift(el)
1408                         # fixfull (big paragraph in spec about manifest, fragment, urls, etc)
1409                         ins_mode = ins_mode_before_head
1410                         return
1411                 if t.type is TYPE_END_TAG
1412                         if t.name is 'head' or t.name is 'body' or t.name is 'html' or t.name is 'br'
1413                                 # fall through to "anything else"
1414                         else
1415                                 parse_error()
1416                                 return
1417                 # Anything else
1418                 html_tok = new_open_tag 'html'
1419                 el = token_to_element html_tok, NS_HTML, doc
1420                 doc.children.push el
1421                 open_els.unshift el
1422                 # ?fixfull browsing context
1423                 ins_mode = ins_mode_before_head
1424                 process_token t
1425                 return
1426
1427         # 8.2.5.4.3 http://www.w3.org/TR/html5/syntax.html#the-before-head-insertion-mode
1428         ins_mode_before_head = (t) ->
1429                 if is_space_tok t
1430                         return
1431                 if t.type is TYPE_COMMENT
1432                         insert_comment t
1433                         return
1434                 if t.type is TYPE_DOCTYPE
1435                         parse_error()
1436                         return
1437                 if t.type is TYPE_START_TAG and t.name is 'html'
1438                         ins_mode_in_body t
1439                         return
1440                 if t.type is TYPE_START_TAG and t.name is 'head'
1441                         el = insert_html_element t
1442                         head_element_pointer = el
1443                         ins_mode = ins_mode_in_head
1444                         return
1445                 if t.type is TYPE_END_TAG
1446                         if t.name is 'head' or t.name is 'body' or t.name is 'html' or t.name is 'br'
1447                                 # fall through to Anything else below
1448                         else
1449                                 parse_error()
1450                                 return
1451                 # Anything else
1452                 head_tok = new_open_tag 'head'
1453                 el = insert_html_element head_tok
1454                 head_element_pointer = el
1455                 ins_mode = ins_mode_in_head
1456                 process_token t
1457
1458         # 8.2.5.4.4 http://www.w3.org/TR/html5/syntax.html#parsing-main-inhead
1459         ins_mode_in_head_else = (t) -> # factored out for same-as-spec flow control
1460                 open_els.shift() # spec says this will be a 'head' node
1461                 ins_mode = ins_mode_after_head
1462                 process_token t
1463         ins_mode_in_head = (t) ->
1464                 if t.type is TYPE_TEXT and (t.text is "\t" or t.text is "\n" or t.text is "\u000c" or t.text is ' ')
1465                         insert_character t
1466                         return
1467                 if t.type is TYPE_COMMENT
1468                         insert_comment t
1469                         return
1470                 if t.type is TYPE_DOCTYPE
1471                         parse_error()
1472                         return
1473                 if t.type is TYPE_START_TAG and t.name is 'html'
1474                         ins_mode_in_body t
1475                         return
1476                 if t.type is TYPE_START_TAG and (t.name is 'base' or t.name is 'basefont' or t.name is 'bgsound' or t.name is 'link')
1477                         el = insert_html_element t
1478                         open_els.shift()
1479                         t.acknowledge_self_closing()
1480                         return
1481                 if t.type is TYPE_START_TAG and t.name is 'meta'
1482                         el = insert_html_element t
1483                         open_els.shift()
1484                         t.acknowledge_self_closing()
1485                         # fixfull encoding stuff
1486                         return
1487                 if t.type is TYPE_START_TAG and t.name is 'title'
1488                         parse_generic_rcdata_text t
1489                         return
1490                 if t.type is TYPE_START_TAG and ((t.name is 'noscript' and flag_scripting) or t.name is 'noframes' or t.name is 'style')
1491                         parse_generic_raw_text t
1492                         return
1493                 if t.type is TYPE_START_TAG and t.name is 'noscript' and flag_scripting is false
1494                         insert_html_element t
1495                         ins_mode = ins_mode_in_head_noscript
1496                         return
1497                 if t.type is TYPE_START_TAG and t.name is 'script'
1498                         ail = adjusted_insertion_location()
1499                         el = token_to_element t, NS_HTML, ail
1500                         el.flag 'parser-inserted', true
1501                         # fixfull frament case
1502                         ail[0].children.splice ail[1], 0, el
1503                         open_els.unshift el
1504                         tok_state = tok_state_script_data
1505                         original_ins_mode = ins_mode # make sure orig... is defined
1506                         ins_mode = ins_mode_text
1507                         return
1508                 if t.type is TYPE_END_TAG and t.name is 'head'
1509                         open_els.shift() # will be a head element... spec says so
1510                         ins_mode = ins_mode_after_head
1511                         return
1512                 if t.type is TYPE_END_TAG and (t.name is 'body' or t.name is 'html' or t.name is 'br')
1513                         ins_mode_in_head_else t
1514                         return
1515                 if t.type is TYPE_START_TAG and t.name is 'template'
1516                         insert_html_element t
1517                         afe_push_marker()
1518                         flag_frameset_ok = false
1519                         ins_mode = ins_mode_in_template
1520                         template_ins_modes.unshift ins_mode_in_template
1521                         return
1522                 if t.type is TYPE_END_TAG and t.name is 'template'
1523                         if template_tag_is_open()
1524                                 generate_implied_end_tags
1525                                 if open_els[0].name isnt 'template'
1526                                         parse_error()
1527                                 loop
1528                                         el = open_els.shift()
1529                                         if el.name is 'template' and el.namespace is NS_HTML
1530                                                 break
1531                                 clear_afe_to_marker()
1532                                 template_ins_modes.shift()
1533                                 reset_ins_mode()
1534                         else
1535                                 parse_error()
1536                         return
1537                 if (t.type is TYPE_START_TAG and t.name is 'head') or t.type is TYPE_END_TAG
1538                         parse_error()
1539                         return
1540                 ins_mode_in_head_else t
1541
1542         # 8.2.5.4.5 http://www.w3.org/TR/html5/syntax.html#parsing-main-inheadnoscript
1543         ins_mode_in_head_noscript_else = (t) ->
1544                 parse_error()
1545                 open_els.shift()
1546                 ins_mode = ins_mode_in_head
1547                 process_token t
1548         ins_mode_in_head_noscript = (t) ->
1549                 if t.type is TYPE_DOCTYPE
1550                         parse_error()
1551                         return
1552                 if t.type is TYPE_START_TAG and t.name is 'html'
1553                         ins_mode_in_body t
1554                         return
1555                 if t.type is TYPE_END_TAG and t.name is 'noscript'
1556                         open_els.shift()
1557                         ins_mode = ins_mode_in_head
1558                         return
1559                 if is_space_tok(t) or t.type is TYPE_COMMENT or (t.type is TYPE_START_TAG and (t.name is 'basefont' or t.name is 'bgsound' or t.name is 'link' or t.name is 'meta' or t.name is 'noframes' or t.name is 'style'))
1560                         ins_mode_in_head t
1561                         return
1562                 if t.type is TYPE_END_TAG and t.name is 'br'
1563                         ins_mode_in_head_noscript_else t
1564                         return
1565                 if (t.type is TYPE_START_TAG and (t.name is 'head' or t.name is 'noscript')) or t.type is TYPE_END_TAG
1566                         parse_error()
1567                         return
1568                 # Anything else
1569                 ins_mode_in_head_noscript_else t
1570                 return
1571
1572
1573
1574         # 8.2.5.4.6 http://www.w3.org/TR/html5/syntax.html#the-after-head-insertion-mode
1575         ins_mode_after_head_else = (t) ->
1576                 body_tok = new_open_tag 'body'
1577                 insert_html_element body_tok
1578                 ins_mode = ins_mode_in_body
1579                 process_token t
1580                 return
1581         ins_mode_after_head = (t) ->
1582                 if is_space_tok t
1583                         insert_character t
1584                         return
1585                 if t.type is TYPE_COMMENT
1586                         insert_comment t
1587                         return
1588                 if t.type is TYPE_DOCTYPE
1589                         parse_error()
1590                         return
1591                 if t.type is TYPE_START_TAG and t.name is 'html'
1592                         ins_mode_in_body t
1593                         return
1594                 if t.type is TYPE_START_TAG and t.name is 'body'
1595                         insert_html_element t
1596                         flag_frameset_ok = false
1597                         ins_mode = ins_mode_in_body
1598                         return
1599                 if t.type is TYPE_START_TAG and t.name is 'frameset'
1600                         insert_html_element t
1601                         ins_mode = ins_mode_in_frameset
1602                         return
1603                 if t.type is TYPE_START_TAG and (t.name is 'base' or t.name is 'basefont' or t.name is 'bgsound' or t.name is 'link' or t.name is 'meta' or t.name is 'noframes' or t.name is 'script' or t.name is 'style' or t.name is 'template' or t.name is 'title')
1604                         parse_error()
1605                         open_els.unshift head_element_pointer
1606                         ins_mode_in_head t
1607                         for el, i of open_els
1608                                 if el is head_element_pointer
1609                                         open_els.splice i, 1
1610                                         return
1611                         console.log "warning: 23904 couldn't find head element in open_els"
1612                         return
1613                 if t.type is TYPE_END_TAG and t.name is 'template'
1614                         ins_mode_in_head t
1615                         return
1616                 if t.type is TYPE_END_TAG and (t.name is 'body' or t.name is 'html' or t.name is 'br')
1617                         ins_mode_after_head_else t
1618                         return
1619                 if (t.type is TYPE_START_TAG and t.name is 'head') or t.type is TYPE_END_TAG
1620                         parse_error()
1621                         return
1622                 # Anything else
1623                 ins_mode_after_head_else t
1624
1625         # 8.2.5.4.7 http://www.w3.org/TR/html5/syntax.html#parsing-main-inbody
1626         in_body_any_other_end_tag = (name) -> # factored out because adoption agency calls it
1627                 for el, i in open_els
1628                         if el.name is name and el.namespace is NS_HTML
1629                                 generate_implied_end_tags name # arg is exception
1630                                 parse_error() unless i is 0
1631                                 while i >= 0
1632                                         open_els.shift()
1633                                         i -= 1
1634                                 return
1635                         if special_elements[el.name] is el.namespace
1636                                 parse_error()
1637                                 return
1638                 return
1639         ins_mode_in_body = (t) ->
1640                 if t.type is TYPE_TEXT and t.text is "\u0000"
1641                         parse_error()
1642                         return
1643                 if is_space_tok t
1644                         reconstruct_afe()
1645                         insert_character t
1646                         return
1647                 if t.type is TYPE_TEXT
1648                         reconstruct_afe()
1649                         insert_character t
1650                         flag_frameset_ok = false
1651                         return
1652                 if t.type is TYPE_COMMENT
1653                         insert_comment t
1654                         return
1655                 if t.type is TYPE_DOCTYPE
1656                         parse_error()
1657                         return
1658                 if t.type is TYPE_START_TAG and t.name is 'html'
1659                         parse_error()
1660                         return if template_tag_is_open()
1661                         root_attrs = open_els[open_els.length - 1].attrs
1662                         for a of t.attrs_a
1663                                 root_attrs[a[0]] = a[1] unless root_attrs[a[0]]?
1664                         return
1665
1666                 if (t.type is TYPE_START_TAG and (t.name is 'base' or t.name is 'basefont' or t.name is 'bgsound' or t.name is 'link' or t.name is 'meta' or t.name is 'noframes' or t.name is 'script' or t.name is 'style' or t.name is 'template' or t.name is 'title')) or (t.type is TYPE_END_TAG and t.name is 'template')
1667                         ins_mode_in_head t
1668                         return
1669                 if t.type is TYPE_START_TAG and t.name is 'body'
1670                         parse_error()
1671                         return if open_els.length < 2
1672                         second = open_els[open_els.length - 2]
1673                         return unless second.namespace is NS_HTML
1674                         return unless second.name is 'body'
1675                         return if template_tag_is_open()
1676                         flag_frameset_ok = false
1677                         for a of t.attrs_a
1678                                 second.attrs[a[0]] = a[1] unless second.attrs[a[0]]?
1679                         return
1680                 if t.type is TYPE_START_TAG and t.name is 'frameset'
1681                         parse_error()
1682                         return if open_els.length < 2
1683                         second_i = open_els.length - 2
1684                         second = open_els[second_i]
1685                         return unless second.namespace is NS_HTML
1686                         return unless second.name is 'body'
1687                         if flag_frameset_ok is false
1688                                 return
1689                         if second.parent?
1690                                 for el, i in second.parent.children
1691                                         if el is second
1692                                                 second.parent.children.splice i, 1
1693                                                 break
1694                         open_els.splice second_i, 1
1695                         # pop everything except the "root html element"
1696                         while open_els.length > 1
1697                                 open_els.shift()
1698                         insert_html_element t
1699                         ins_mode = ins_mode_in_frameset
1700                         return
1701                 if t.type is TYPE_EOF
1702                         ok_tags = {
1703                                 dd:NS_HTML, dt:NS_HTML, li:NS_HTML, p:NS_HTML, tbody:NS_HTML,
1704                                 td:NS_HTML, tfoot:NS_HTML, th:NS_HTML, thead:NS_HTML,
1705                                 tr:NS_HTML, body:NS_HTML, html:NS_HTML,
1706                         }
1707                         for el in open_els
1708                                 unless ok_tags[t.name] is el.namespace
1709                                         parse_error()
1710                                         break
1711                         if template_ins_modes.length > 0
1712                                 ins_mode_in_template t
1713                         else
1714                                 stop_parsing()
1715                         return
1716                 if t.type is TYPE_END_TAG and t.name is 'body'
1717                         unless is_in_scope 'body', NS_HTML
1718                                 parse_error()
1719                                 return
1720                         ok_tags = {
1721                                 dd:NS_HTML, dt:NS_HTML, li:NS_HTML, optgroup:NS_HTML,
1722                                 option:NS_HTML, p:NS_HTML, rb:NS_HTML, rp:NS_HTML, rt:NS_HTML,
1723                                 rtc:NS_HTML, tbody:NS_HTML, td:NS_HTML, tfoot:NS_HTML,
1724                                 th:NS_HTML, thead:NS_HTML, tr:NS_HTML, body:NS_HTML,
1725                                 html:NS_HTML
1726                         }
1727                         for el in open_els
1728                                 unless ok_tags[t.name] is el.namespace
1729                                         parse_error()
1730                                         break
1731                         ins_mode = ins_mode_after_body
1732                         return
1733                 if t.type is TYPE_END_TAG and t.name is 'html'
1734                         unless is_in_scope 'body', NS_HTML
1735                                 parse_error()
1736                                 return
1737                         ok_tags = {
1738                                 dd:NS_HTML, dt:NS_HTML, li:NS_HTML, optgroup:NS_HTML,
1739                                 option:NS_HTML, p:NS_HTML, rb:NS_HTML, rp:NS_HTML, rt:NS_HTML,
1740                                 rtc:NS_HTML, tbody:NS_HTML, td:NS_HTML, tfoot:NS_HTML,
1741                                 th:NS_HTML, thead:NS_HTML, tr:NS_HTML, body:NS_HTML,
1742                                 html:NS_HTML
1743                         }
1744                         for el in open_els
1745                                 unless ok_tags[t.name] is el.namespace
1746                                         parse_error()
1747                                         break
1748                         ins_mode = ins_mode_after_body
1749                         process_token t
1750                         return
1751                 if t.type is TYPE_START_TAG and (t.name is 'address' or t.name is 'article' or t.name is 'aside' or t.name is 'blockquote' or t.name is 'center' or t.name is 'details' or t.name is 'dialog' or t.name is 'dir' or t.name is 'div' or t.name is 'dl' or t.name is 'fieldset' or t.name is 'figcaption' or t.name is 'figure' or t.name is 'footer' or t.name is 'header' or t.name is 'hgroup' or t.name is 'main' or t.name is 'nav' or t.name is 'ol' or t.name is 'p' or t.name is 'section' or t.name is 'summary' or t.name is 'ul')
1752                         close_p_if_in_button_scope()
1753                         insert_html_element t
1754                         return
1755                 if t.type is TYPE_START_TAG and h_tags[t.name]?
1756                         close_p_if_in_button_scope()
1757                         if h_tags[open_els[0].name] is open_els[0].namespace
1758                                 parse_error()
1759                                 open_els.shift()
1760                         insert_html_element t
1761                         return
1762                 if t.type is TYPE_START_TAG and (t.name is 'pre' or t.name is 'listing')
1763                         close_p_if_in_button_scope()
1764                         insert_html_element t
1765                         # spec: If the next token is a "LF" (U+000A) character token, then
1766                         # ignore that token and move on to the next one. (Newlines at the
1767                         # start of pre blocks are ignored as an authoring convenience.)
1768                         if txt.charAt(cur) is "\u000a" # FIXME check for crlf?
1769                                 cur += 1
1770                         flag_frameset_ok = false
1771                         return
1772                 if t.type is TYPE_START_TAG and t.name is 'form'
1773                         unless form_element_pointer is null or template_tag_is_open()
1774                                 parse_error()
1775                                 return
1776                         close_p_if_in_button_scope()
1777                         el = insert_html_element t
1778                         unless template_tag_is_open()
1779                                 form_element_pointer = el
1780                         return
1781                 if t.type is TYPE_START_TAG and t.name is 'li'
1782                         flag_frameset_ok = false
1783                         for node in open_els
1784                                 if node.name is 'li' and node.namespace is NS_HTML
1785                                         generate_implied_end_tags 'li' # arg is exception
1786                                         if open_els[0].name isnt 'li' or open_els[0].namespace isnt NS_HTML
1787                                                 parse_error()
1788                                         loop
1789                                                 el = open_els.shift()
1790                                                 if el.name is 'li' and el.namespace is NS_HTML
1791                                                         break
1792                                         break
1793                                 if el_is_special_not_adp node
1794                                                 break
1795                         close_p_if_in_button_scope()
1796                         insert_html_element t
1797                         return
1798                 if t.type is TYPE_START_TAG and (t.name is 'dd' or t.name is 'dt')
1799                         flag_frameset_ok = false
1800                         for node in open_els
1801                                 if node.name is 'dd' and node.namespace is NS_HTML
1802                                         generate_implied_end_tags 'dd' # arg is exception
1803                                         if open_els[0].name isnt 'dd' or open_els[0].namespace isnt NS_HTML
1804                                                 parse_error()
1805                                         loop
1806                                                 el = open_els.shift()
1807                                                 if el.name is 'dd' and el.namespace is NS_HTML
1808                                                         break
1809                                         break
1810                                 if node.name is 'dt' and node.namespace is NS_HTML
1811                                         generate_implied_end_tags 'dt' # arg is exception
1812                                         if open_els[0].name isnt 'dt' or open_els[0].namespace isnt NS_HTML
1813                                                 parse_error()
1814                                         loop
1815                                                 el = open_els.shift()
1816                                                 if el.name is 'dt' and el.namespace is NS_HTML
1817                                                         break
1818                                         break
1819                                 if el_is_special_not_adp node
1820                                         break
1821                         close_p_if_in_button_scope()
1822                         insert_html_element t
1823                         return
1824                 if t.type is TYPE_START_TAG and t.name is 'plaintext'
1825                         close_p_if_in_button_scope()
1826                         insert_html_element t
1827                         tok_state = tok_state_plaintext
1828                         return
1829                 if t.type is TYPE_START_TAG and t.name is 'button'
1830                         if is_in_scope 'button', NS_HTML
1831                                 parse_error()
1832                                 generate_implied_end_tags()
1833                                 loop
1834                                         el = open_els.shift()
1835                                         if el.name is 'button' and el.namespace is NS_HTML
1836                                                 break
1837                         reconstruct_afe()
1838                         insert_html_element t
1839                         flag_frameset_ok = false
1840                         return
1841                 if t.type is TYPE_END_TAG and (t.name is 'address' or t.name is 'article' or t.name is 'aside' or t.name is 'blockquote' or t.name is 'button' or t.name is 'center' or t.name is 'details' or t.name is 'dialog' or t.name is 'dir' or t.name is 'div' or t.name is 'dl' or t.name is 'fieldset' or t.name is 'figcaption' or t.name is 'figure' or t.name is 'footer' or t.name is 'header' or t.name is 'hgroup' or t.name is 'listing' or t.name is 'main' or t.name is 'nav' or t.name is 'ol' or t.name is 'pre' or t.name is 'section' or t.name is 'summary' or t.name is 'ul')
1842                         unless is_in_scope t.name, NS_HTML
1843                                 parse_error()
1844                                 return
1845                         generate_implied_end_tags()
1846                         unless open_els[0].name is t.name and open_els[0].namespace is NS_HTML
1847                                 parse_error()
1848                         loop
1849                                 el = open_els.shift()
1850                                 if el.name is t.name and el.namespace is NS_HTML
1851                                         return
1852                         return
1853                 if t.type is TYPE_END_TAG and t.name is 'form'
1854                         unless template_tag_is_open()
1855                                 node = form_element_pointer
1856                                 form_element_pointer = null
1857                                 if node is null or not el_is_in_scope node
1858                                         parse_error()
1859                                         return
1860                                 generate_implied_end_tags()
1861                                 if open_els[0] isnt node
1862                                         parse_error()
1863                                 for el, i in open_els
1864                                         if el is node
1865                                                 open_els.splice i, 1
1866                                                 break
1867                         else
1868                                 unless is_in_scope 'form', NS_HTML
1869                                         parse_error()
1870                                         return
1871                                 generate_implied_end_tags()
1872                                 if open_els[0].name isnt 'form' or open_els[0].namespace isnt NS_HTML
1873                                         parse_error()
1874                                 loop
1875                                         el = open_els.shift()
1876                                         if el.name is 'form' and el.namespace is NS_HTML
1877                                                 break
1878                         return
1879                 if t.type is TYPE_END_TAG and t.name is 'p'
1880                         unless is_in_button_scope 'p', NS_HTML
1881                                 parse_error()
1882                                 insert_html_element new_open_tag 'p'
1883                         close_p_element()
1884                         return
1885                 if t.type is TYPE_END_TAG and t.name is 'li'
1886                         unless is_in_li_scope 'li', NS_HTML
1887                                 parse_error()
1888                                 return
1889                         generate_implied_end_tags 'li' # arg is exception
1890                         if open_els[0].name isnt 'li' or open_els[0].namespace isnt NS_HTML
1891                                 parse_error()
1892                         loop
1893                                 el = open_els.shift()
1894                                 if el.name is 'li' and el.namespace is NS_HTML
1895                                         break
1896                         return
1897                 if t.type is TYPE_END_TAG and (t.name is 'dd' or t.name is 'dt')
1898                         unless is_in_scope t.name, NS_HTML
1899                                 parse_error()
1900                                 return
1901                         generate_implied_end_tags t.name # arg is exception
1902                         if open_els[0].name isnt t.name or open_els[0].namespace isnt NS_HTML
1903                                 parse_error()
1904                         loop
1905                                 el = open_els.shift()
1906                                 if el.name is t.name and el.namespace is NS_HTML
1907                                         break
1908                         return
1909                 if t.type is TYPE_END_TAG and h_tags[t.name]?
1910                         h_in_scope = false
1911                         for el in open_els
1912                                 if h_tags[el.name] is el.namespace
1913                                         h_in_scope = true
1914                                         break
1915                                 if standard_scopers[el.name] is el.namespace
1916                                         break
1917                         unless h_in_scope
1918                                 parse_error()
1919                                 return
1920                         generate_implied_end_tags()
1921                         if open_els[0].name isnt t.name or open_els[0].namespace isnt NS_HTML
1922                                 parse_error()
1923                         loop
1924                                 el = open_els.shift()
1925                                 if h_tags[el.name] is el.namespace
1926                                         break
1927                         return
1928                 # deep breath!
1929                 if t.type is TYPE_START_TAG and t.name is 'a'
1930                         # If the list of active formatting elements contains an a element
1931                         # between the end of the list and the last marker on the list (or
1932                         # the start of the list if there is no marker on the list), then
1933                         # this is a parse error; run the adoption agency algorithm for the
1934                         # tag name "a", then remove that element from the list of active
1935                         # formatting elements and the stack of open elements if the
1936                         # adoption agency algorithm didn't already remove it (it might not
1937                         # have if the element is not in table scope).
1938                         found = false
1939                         for el in afe
1940                                 if el.type is TYPE_AFE_MARKER
1941                                         break
1942                                 if el.name is 'a' and el.namespace is NS_HTML
1943                                         found = el
1944                         if found?
1945                                 parse_error()
1946                                 adoption_agency 'a'
1947                                 for el, i in afe
1948                                         if el is found
1949                                                 afe.splice i, 1
1950                                 for el, i in open_els
1951                                         if el is found
1952                                                 open_els.splice i, 1
1953                         reconstruct_afe()
1954                         el = insert_html_element t
1955                         afe_push el
1956                         return
1957                 if t.type is TYPE_START_TAG and (t.name is 'b' or t.name is 'big' or t.name is 'code' or t.name is 'em' or t.name is 'font' or t.name is 'i' or t.name is 's' or t.name is 'small' or t.name is 'strike' or t.name is 'strong' or t.name is 'tt' or t.name is 'u')
1958                         reconstruct_afe()
1959                         el = insert_html_element t
1960                         afe_push el
1961                         return
1962                 if t.type is TYPE_START_TAG and t.name is 'nobr'
1963                         reconstruct_afe()
1964                         el = insert_html_element t
1965                         afe_push el
1966                         return
1967                 if t.type is TYPE_END_TAG and (t.name is 'a' or t.name is 'b' or t.name is 'big' or t.name is 'code' or t.name is 'em' or t.name is 'font' or t.name is 'i' or t.name is 'nobr' or t.name is 's' or t.name is 'small' or t.name is 'strike' or t.name is 'strong' or t.name is 'tt' or t.name is 'u')
1968                         adoption_agency t.name
1969                         return
1970                 if t.type is TYPE_START_TAG and (t.name is 'applet' or t.name is 'marquee' or t.name is 'object')
1971                         reconstruct_afe()
1972                         insert_html_element t
1973                         afe_push_marker()
1974                         flag_frameset_ok = false
1975                         return
1976                 if t.type is TYPE_END_TAG and (t.name is 'applet' or t.name is 'marquee' or t.name is 'object')
1977                         unless is_in_scope t.name, NS_HTML
1978                                 parse_error()
1979                                 return
1980                         generate_implied_end_tags()
1981                         if open_els[0].name isnt t.name or open_els[0].namespace isnt NS_HTML
1982                                 parse_error()
1983                         loop
1984                                 el = open_els.shift()
1985                                 if el.name is t.name and el.namespace is NS_HTML
1986                                         break
1987                         clear_afe_to_marker()
1988                         return
1989                 if t.type is TYPE_START_TAG and t.name is 'table'
1990                         close_p_if_in_button_scope() # fixfull quirksmode thing
1991                         insert_html_element t
1992                         flag_frameset_ok = false
1993                         ins_mode = ins_mode_in_table
1994                         return
1995                 if t.type is TYPE_END_TAG and t.name is 'br'
1996                         parse_error()
1997                         t.type is TYPE_START_TAG
1998                         # fall through
1999                 if t.type is TYPE_START_TAG and (t.name is 'area' or t.name is 'br' or t.name is 'embed' or t.name is 'img' or t.name is 'keygen' or t.name is 'wbr')
2000                         reconstruct_afe()
2001                         insert_html_element t
2002                         open_els.shift()
2003                         t.acknowledge_self_closing()
2004                         flag_frameset_ok = false
2005                         return
2006                 if t.type is TYPE_START_TAG and t.name is 'input'
2007                         reconstruct_afe()
2008                         insert_html_element t
2009                         open_els.shift()
2010                         t.acknowledge_self_closing()
2011                         unless is_input_hidden_tok t
2012                                 flag_frameset_ok = false
2013                         return
2014                 if t.type is TYPE_START_TAG and (t.name is 'param' or t.name is 'source' or t.name is 'track')
2015                         insert_html_element t
2016                         open_els.shift()
2017                         t.acknowledge_self_closing()
2018                         return
2019                 if t.type is TYPE_START_TAG and t.name is 'hr'
2020                         close_p_if_in_button_scope()
2021                         insert_html_element t
2022                         open_els.shift()
2023                         t.acknowledge_self_closing()
2024                         flag_frameset_ok = false
2025                         return
2026                 if t.type is TYPE_START_TAG and t.name is 'image'
2027                         parse_error()
2028                         t.name = 'img'
2029                         process_token t
2030                         return
2031                 if t.type is TYPE_START_TAG and t.name is 'isindex'
2032                         parse_error()
2033                         if template_tag_is_open() is false and form_element_pointer isnt null
2034                                 return
2035                         t.acknowledge_self_closing()
2036                         flag_frameset_ok = false
2037                         close_p_if_in_button_scope()
2038                         el = insert_html_element new_open_tag 'form'
2039                         unless template_tag_is_open()
2040                                 form_element_pointer = el
2041                         for a in t.attrs_a
2042                                 if a[0] is 'action'
2043                                         el.attrs['action'] = a[1]
2044                                         break
2045                         insert_html_element new_open_tag 'hr'
2046                         open_els.shift()
2047                         reconstruct_afe()
2048                         insert_html_element new_open_tag 'label'
2049                         # note: this is a little out-of-spec-order so we only have to scan t.attrs_a once
2050                         input_el = new_open_tag 'input'
2051                         prompt = null
2052                         for a in t.attrs_a
2053                                 if a[0] is 'prompt'
2054                                         prompt = a[1]
2055                                 if a[0] isnt 'name' and a[0] isnt 'action' and a[0] isnt 'prompt'
2056                                         input_el.attrs_a.push [a[0], a[1]]
2057                         input_el.attrs_a.push ['name', 'isindex']
2058                         # fixfull this next bit is in english... internationalize?
2059                         prompt ?= "This is a searchable index. Enter search keywords: "
2060                         insert_character new_character_token prompt # fixfull split
2061                         # TODO submit typo "balue" in spec
2062                         insert_html_element input_el
2063                         open_els.shift()
2064                         # insert_character '' # you can put chars here if promt attr missing
2065                         open_els.shift()
2066                         insert_html_element new_open_tag 'hr'
2067                         open_els.shift()
2068                         open_els.shift()
2069                         unless template_tag_is_open()
2070                                 form_element_pointer = null
2071                         return
2072                 if t.type is TYPE_START_TAG and t.name is 'textarea'
2073                         insert_html_element t
2074                         if txt.charAt(cur) is "\u000a" # FIXME check for crlf?
2075                                 cur += 1
2076                         tok_state = tok_state_rcdata
2077                         original_ins_mode = ins_mode
2078                         flag_frameset_ok = false
2079                         ins_mode = ins_mode_text
2080                         return
2081                 if t.type is TYPE_START_TAG and t.name is 'xmp'
2082                         close_p_if_in_button_scope()
2083                         reconstruct_afe()
2084                         flag_frameset_ok = false
2085                         parse_generic_raw_text t
2086                         return
2087                 if t.type is TYPE_START_TAG and t.name is 'iframe'
2088                         flag_frameset_ok = false
2089                         parse_generic_raw_text t
2090                         return
2091                 if t.type is TYPE_START_TAG and (t.name is 'noembed' or (t.name is 'noscript' and flag_scripting))
2092                         parse_generic_raw_text t
2093                         return
2094                 if t.type is TYPE_START_TAG and t.name is 'select'
2095                         reconstruct_afe()
2096                         insert_html_element t
2097                         flag_frameset_ok = false
2098                         if ins_mode is ins_mode_in_table or ins_mode is ins_mode_in_caption or ins_mode is ins_mode_in_table_body or ins_mode is ins_mode_in_row or ins_mode is ins_mode_in_cell
2099                                 ins_mode = ins_mode_in_select_in_table
2100                         else
2101                                 ins_mode = ins_mode_in_select
2102                         return
2103                 if t.type is TYPE_START_TAG and (t.name is 'optgroup' or t.name is 'option')
2104                         if open_els[0].name is 'option' and open_els[0].namespace is NS_HTML
2105                                 open_els.shift()
2106                         reconstruct_afe()
2107                         insert_html_element t
2108                         return
2109 # this comment block implements the W3C spec
2110 #               if t.type is TYPE_START_TAG and (t.name is 'rb' or t.name is 'rp' or t.name is 'rtc')
2111 #                       if is_in_scope 'ruby', NS_HTML
2112 #                               generate_implied_end_tags()
2113 #                               unless open_els[0].name is 'ruby' and open_els[0].namespace is NS_HTML
2114 #                                       parse_error()
2115 #                       insert_html_element t
2116 #                       return
2117 #               if t.type is TYPE_START_TAG and t.name is 'rt'
2118 #                       if is_in_scope 'ruby', NS_HTML
2119 #                               generate_implied_end_tags 'rtc' # arg is exception
2120 #                               unless (open_els[0].name is 'ruby' or open_els[0].name is 'rtc') and open_els[0].namespace is NS_HTML
2121 #                                       parse_error()
2122 #                       insert_html_element t
2123 #                       return
2124 # below implements the WATWG spec https://html.spec.whatwg.org/multipage/syntax.html#parsing-main-inbody
2125                 if t.type is TYPE_START_TAG and (t.name is 'rb' or t.name is 'rtc')
2126                         if is_in_scope 'ruby', NS_HTML
2127                                 generate_implied_end_tags()
2128                                 unless open_els[0].name is 'ruby' and open_els[0].namespace is NS_HTML
2129                                         parse_error()
2130                         insert_html_element t
2131                         return
2132                 if t.type is TYPE_START_TAG and (t.name is 'rp' or t.name is 'rt')
2133                         if is_in_scope 'ruby', NS_HTML
2134                                 generate_implied_end_tags 'rtc'
2135                                 unless (open_els[0].name is 'ruby' or open_els[0].name is 'rtc') and open_els[0].namespace is NS_HTML
2136                                         parse_error()
2137                         insert_html_element t
2138                         return
2139 # end WATWG chunk
2140                 if t.type is TYPE_START_TAG and t.name is 'math'
2141                         reconstruct_afe()
2142                         adjust_mathml_attributes t
2143                         adjust_foreign_attributes t
2144                         insert_foreign_element t, NS_MATHML
2145                         if t.flag 'self-closing'
2146                                 open_els.shift()
2147                                 t.acknowledge_self_closing()
2148                         return
2149                 if t.type is TYPE_START_TAG and t.name is 'svg'
2150                         reconstruct_afe()
2151                         adjust_svg_attributes t
2152                         adjust_foreign_attributes t
2153                         insert_foreign_element t, NS_SVG
2154                         if t.flag 'self-closing'
2155                                 open_els.shift()
2156                                 t.acknowledge_self_closing()
2157                         return
2158                 if t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'frame' or t.name is 'head' or t.name is 'tbody' or t.name is 'td' or t.name is 'tfoot' or t.name is 'th' or t.name is 'thead' or t.name is 'tr')
2159                         parse_error()
2160                         return
2161                 if t.type is TYPE_START_TAG # any other start tag
2162                         reconstruct_afe()
2163                         insert_html_element t
2164                         return
2165                 if t.type is TYPE_END_TAG # any other end tag
2166                         in_body_any_other_end_tag t.name
2167                         return
2168                 return
2169
2170         # 8.2.5.4.8 http://www.w3.org/TR/html5/syntax.html#parsing-main-incdata
2171         ins_mode_text = (t) ->
2172                 if t.type is TYPE_TEXT
2173                         insert_character t
2174                         return
2175                 if t.type is TYPE_EOF
2176                         parse_error()
2177                         if open_els[0].name is 'script' and open_els[0].namespace is NS_HTML
2178                                 open_els[0].flag 'already started', true
2179                         open_els.shift()
2180                         ins_mode = original_ins_mode
2181                         process_token t
2182                         return
2183                 if t.type is TYPE_END_TAG and t.name is 'script'
2184                         open_els.shift()
2185                         ins_mode = original_ins_mode
2186                         # fixfull the spec seems to assume that I'm going to run the script
2187                         # http://www.w3.org/TR/html5/syntax.html#scriptEndTag
2188                         return
2189                 if t.type is TYPE_END_TAG
2190                         open_els.shift()
2191                         ins_mode = original_ins_mode
2192                         return
2193                 console.log 'warning: end of ins_mode_text reached'
2194
2195         # the functions below implement the tokenizer stats described here:
2196         # http://www.w3.org/TR/html5/syntax.html#tokenization
2197
2198         # 8.2.5.4.9 http://www.w3.org/TR/html5/syntax.html#parsing-main-intable
2199         ins_mode_in_table_else = (t) ->
2200                 parse_error()
2201                 flag_foster_parenting = true
2202                 ins_mode_in_body t
2203                 flag_foster_parenting = false
2204                 return
2205         ins_mode_in_table = (t) ->
2206                 switch t.type
2207                         when TYPE_TEXT
2208                                 if (open_els[0].name is 'table' or open_els[0].name is 'tbody' or open_els[0].name is 'tfoot' or open_els[0].name is 'thead' or open_els[0].name is 'tr') and open_els[0].namespace is NS_HTML
2209                                         pending_table_character_tokens = []
2210                                         original_ins_mode = ins_mode
2211                                         ins_mode = ins_mode_in_table_text
2212                                         process_token t
2213                                 else
2214                                         ins_mode_in_table_else t
2215                         when TYPE_COMMENT
2216                                 insert_comment t
2217                         when TYPE_DOCTYPE
2218                                 parse_error()
2219                         when TYPE_START_TAG
2220                                 switch t.name
2221                                         when 'caption'
2222                                                 clear_stack_to_table_context()
2223                                                 afe_push_marker()
2224                                                 insert_html_element t
2225                                                 ins_mode = ins_mode_in_caption
2226                                         when 'colgroup'
2227                                                 clear_stack_to_table_context()
2228                                                 insert_html_element t
2229                                                 ins_mode = ins_mode_in_column_group
2230                                         when 'col'
2231                                                 clear_stack_to_table_context()
2232                                                 insert_html_element new_open_tag 'colgroup'
2233                                                 ins_mode = ins_mode_in_column_group
2234                                                 process_token t
2235                                         when 'tbody', 'tfoot', 'thead'
2236                                                 clear_stack_to_table_context()
2237                                                 insert_html_element t
2238                                                 ins_mode = ins_mode_in_table_body
2239                                         when 'td', 'th', 'tr'
2240                                                 clear_stack_to_table_context()
2241                                                 insert_html_element new_open_tag 'tbody'
2242                                                 ins_mode = ins_mode_in_table_body
2243                                                 process_token t
2244                                         when 'table'
2245                                                 parse_error()
2246                                                 if is_in_table_scope 'table', NS_HTML
2247                                                         loop
2248                                                                 el = open_els.shift()
2249                                                                 if el.name is 'table' and el.namespace is NS_HTML
2250                                                                         break
2251                                                         reset_ins_mode()
2252                                                         process_token t
2253                                         when 'style', 'script', 'template'
2254                                                 ins_mode_in_head t
2255                                         when 'input'
2256                                                 unless is_input_hidden_tok t
2257                                                         ins_mode_in_table_else t
2258                                                 else
2259                                                         parse_error()
2260                                                         el = insert_html_element t
2261                                                         open_els.shift()
2262                                                         t.acknowledge_self_closing()
2263                                         when 'form'
2264                                                 parse_error()
2265                                                 if form_element_pointer?
2266                                                         return
2267                                                 if template_tag_is_open()
2268                                                         return
2269                                                 form_element_pointer = insert_html_element t
2270                                                 open_els.shift()
2271                                         else
2272                                                 ins_mode_in_table_else t
2273                         when TYPE_END_TAG
2274                                 switch t.name
2275                                         when 'table'
2276                                                 if is_in_table_scope 'table', NS_HTML
2277                                                         loop
2278                                                                 el = open_els.shift()
2279                                                                 if el.name is 'table' and el.namespace is NS_HTML
2280                                                                         break
2281                                                         reset_ins_mode()
2282                                                 else
2283                                                         parse_error()
2284                                         when 'body', 'caption', 'col', 'colgroup', 'html', 'tbody', 'td', 'tfoot', 'th', 'thead', 'tr'
2285                                                 parse_error()
2286                                         when 'template'
2287                                                 ins_mode_in_head t
2288                                         else
2289                                                 ins_mode_in_table_else t
2290                         when TYPE_EOF
2291                                 ins_mode_in_body t
2292                         else
2293                                 ins_mode_in_table_else t
2294
2295
2296         # 8.2.5.4.10 http://www.w3.org/TR/html5/syntax.html#parsing-main-intabletext
2297         ins_mode_in_table_text = (t) ->
2298                 if t.type is TYPE_TEXT and t.text is "\u0000"
2299                         # from javascript?
2300                         parse_error()
2301                         return
2302                 if t.type is TYPE_TEXT
2303                         pending_table_character_tokens.push t
2304                         return
2305                 # Anything else
2306                 all_space = true
2307                 for old in pending_table_character_tokens
2308                         unless is_space_tok old
2309                                 all_space = false
2310                                 break
2311                 if all_space
2312                         for old in pending_table_character_tokens
2313                                 insert_character old
2314                 else
2315                         for old in pending_table_character_tokens
2316                                 ins_mode_in_table_else old
2317                 pending_table_character_tokens = []
2318                 ins_mode = original_ins_mode
2319                 process_token t
2320
2321         # 8.2.5.4.11 http://www.w3.org/TR/html5/syntax.html#parsing-main-incaption
2322         ins_mode_in_caption = (t) ->
2323                 if t.type is TYPE_END_TAG and t.name is 'caption'
2324                         if is_in_table_scope 'caption', NS_HTML
2325                                 generate_implied_end_tags()
2326                                 if open_els[0].name isnt 'caption'
2327                                         parse_error()
2328                                 loop
2329                                         el = open_els.shift()
2330                                         if el.name is 'caption' and el.namespace is NS_HTML
2331                                                 break
2332                                 clear_afe_to_marker()
2333                                 ins_mode = ins_mode_in_table
2334                         else
2335                                 parse_error()
2336                                 # fragment case
2337                         return
2338                 if (t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'tbody' or t.name is 'td' or t.name is 'tfoot' or t.name is 'th' or t.name is 'thead' or t.name is 'tr')) or t.type is TYPE_END_TAG and t.name is 'table'
2339                         parse_error()
2340                         if is_in_table_scope 'caption', NS_HTML
2341                                 loop
2342                                         el = open_els.shift()
2343                                         if el.name is 'caption' and el.namespace is NS_HTML
2344                                                 break
2345                                 clear_afe_to_marker()
2346                                 ins_mode = ins_mode_in_table
2347                                 process_token t
2348                         # else fragment case
2349                         return
2350                 if t.type is TYPE_END_TAG and (t.name is 'body' or t.name is 'col' or t.name is 'colgroup' or t.name is 'html' or t.name is 'tbody' or t.name is 'td' or t.name is 'tfoot' or t.name is 'th' or t.name is 'thead' or t.name is 'tr')
2351                         parse_error()
2352                         return
2353                 # Anything else
2354                 ins_mode_in_body t
2355
2356         # 8.2.5.4.12 http://www.w3.org/TR/html5/syntax.html#parsing-main-incolgroup
2357         ins_mode_in_column_group = (t) ->
2358                 if is_space_tok t
2359                         insert_character t
2360                         return
2361                 if t.type is TYPE_COMMENT
2362                         insert_comment t
2363                         return
2364                 if t.type is TYPE_DOCTYPE
2365                         parse_error()
2366                         return
2367                 if t.type is TYPE_START_TAG and t.name is 'html'
2368                         ins_mode_in_body t
2369                         return
2370                 if t.type is TYPE_START_TAG and t.name is 'col'
2371                         el = insert_html_element t
2372                         open_els.shift()
2373                         t.acknowledge_self_closing()
2374                         return
2375                 if t.type is TYPE_END_TAG and t.name is 'colgroup'
2376                         if open_els[0].name is 'colgroup' and open_els.namespace is NS_HTML
2377                                 open_els.shift()
2378                                 ins_mode = ins_mode_in_table
2379                         else
2380                                 parse_error()
2381                         return
2382                 if t.type is TYPE_END_TAG and t.name is 'col'
2383                         parse_error()
2384                         return
2385                 if (t.type is TYPE_START_TAG or t.type is TYPE_END_TAG) and t.name is 'template'
2386                         ins_mode_in_head t
2387                         return
2388                 if t.type is TYPE_EOF
2389                         ins_mode_in_body t
2390                         return
2391                 # Anything else
2392                 if open_els[0].name isnt 'colgroup'
2393                         parse_error()
2394                         return
2395                 open_els.shift()
2396                 ins_mode = ins_mode_in_table
2397                 process_token t
2398                 return
2399
2400         # 8.2.5.4.13 http://www.w3.org/TR/html5/syntax.html#parsing-main-intbody
2401         ins_mode_in_table_body = (t) ->
2402                 if t.type is TYPE_START_TAG and t.name is 'tr'
2403                         clear_stack_to_table_body_context()
2404                         insert_html_element t
2405                         ins_mode = ins_mode_in_row
2406                         return
2407                 if t.type is TYPE_START_TAG and (t.name is 'th' or t.name is 'td')
2408                         parse_error()
2409                         clear_stack_to_table_body_context()
2410                         insert_html_element new_open_tag 'tr'
2411                         ins_mode = ins_mode_in_row
2412                         process_token t
2413                         return
2414                 if t.type is TYPE_END_TAG and (t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead')
2415                         unless is_in_table_scope t.name, NS_HTML
2416                                 parse_error()
2417                                 return
2418                         clear_stack_to_table_body_context()
2419                         open_els.shift()
2420                         ins_mode = ins_mode_in_table
2421                         return
2422                 if (t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead')) or (t.type is TYPE_END_TAG and t.name is 'table')
2423                         has = false
2424                         for el in open_els
2425                                 if el.namespace is NS_HTML and (el.name is 'tbody' or el.name is 'tfoot' or el.name is 'thead')
2426                                         has = true
2427                                         break
2428                                 if table_scopers[el.name] is el.namespace
2429                                         break
2430                         if !has
2431                                 parse_error()
2432                                 return
2433                         clear_stack_to_table_body_context()
2434                         open_els.shift()
2435                         ins_mode = ins_mode_in_table
2436                         process_token t
2437                         return
2438                 if t.type is TYPE_END_TAG and (t.name is 'body' or t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'html' or t.name is 'td' or t.name is 'th' or t.name is 'tr')
2439                         parse_error()
2440                         return
2441                 # Anything else
2442                 ins_mode_in_table t
2443
2444         # 8.2.5.4.14 http://www.w3.org/TR/html5/syntax.html#parsing-main-intr
2445         ins_mode_in_row = (t) ->
2446                 if t.type is TYPE_START_TAG and (t.name is 'th' or t.name is 'td')
2447                         clear_stack_to_table_row_context()
2448                         insert_html_element t
2449                         ins_mode = ins_mode_in_cell
2450                         afe_push_marker()
2451                         return
2452                 if t.type is TYPE_END_TAG and t.name is 'tr'
2453                         if is_in_table_scope 'tr', NS_HTML
2454                                 clear_stack_to_table_row_context()
2455                                 open_els.shift()
2456                                 ins_mode = ins_mode_in_table_body
2457                         else
2458                                 parse_error()
2459                         return
2460                 if (t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead' or t.name is 'tr')) or t.type is TYPE_END_TAG and t.name is 'table'
2461                         if is_in_table_scope 'tr', NS_HTML
2462                                 clear_stack_to_table_row_context()
2463                                 open_els.shift()
2464                                 ins_mode = ins_mode_in_table_body
2465                                 process_token t
2466                         else
2467                                 parse_error()
2468                         return
2469                 if t.type is TYPE_END_TAG and (t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead')
2470                         if is_in_table_scope t.name, NS_HTML
2471                                 if is_in_table_scope 'tr', NS_HTML
2472                                         clear_stack_to_table_row_context()
2473                                         open_els.shift()
2474                                         ins_mode = ins_mode_in_table_body
2475                                         process_token t
2476                         else
2477                                 parse_error()
2478                         return
2479                 if t.type is TYPE_END_TAG and (t.name is 'body' or t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'html' or t.name is 'td' or t.name is 'th')
2480                         parse_error()
2481                         return
2482                 # Anything else
2483                 ins_mode_in_table t
2484
2485         # http://www.w3.org/TR/html5/syntax.html#close-the-cell
2486         close_the_cell = ->
2487                 generate_implied_end_tags()
2488                 unless (open_els[0].name is 'td' or open_els[0] is 'th') and open_els[0].namespace is NS_HTML
2489                         parse_error()
2490                 loop
2491                         el = open_els.shift()
2492                         if el.namespace is NS_HTML and (el.name is 'td' or el.name is 'th')
2493                                 break
2494                 clear_afe_to_marker()
2495                 ins_mode = ins_mode_in_row
2496
2497         # 8.2.5.4.15 http://www.w3.org/TR/html5/syntax.html#parsing-main-intd
2498         ins_mode_in_cell = (t) ->
2499                 if t.type is TYPE_END_TAG and (t.name is 'td' or t.name is 'th')
2500                         if is_in_table_scope t.name, NS_HTML
2501                                 generate_implied_end_tags()
2502                                 unless (open_els[0].name is t.name) and open_els[0].namespace is NS_HTML
2503                                         parse_error()
2504                                 loop
2505                                         el = open_els.shift()
2506                                         if el.name is t.name and el.namespace is NS_HTML
2507                                                 break
2508                                 clear_afe_to_marker()
2509                                 ins_mode = ins_mode_in_row
2510                         else
2511                                 parse_error()
2512                         return
2513                 if t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'tbody' or t.name is 'td' or t.name is 'tfoot' or t.name is 'th' or t.name is 'thead' or t.name is 'tr')
2514                         has = false
2515                         for el in open_els
2516                                 if el.namespace is NS_HTML and (el.name is 'td' or el.name is 'th')
2517                                         has = true
2518                                         break
2519                                 if table_scopers[el.name] is el.namespace
2520                                         break
2521                         if !has
2522                                 parse_error()
2523                                 return
2524                         close_the_cell()
2525                         process_token t
2526                         return
2527                 if t.type is TYPE_END_TAG and (t.name is 'body' or t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'html')
2528                         parse_error()
2529                         return
2530                 if t.type is TYPE_END_TAG and (t.name is 'table' or t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead' or t.name is 'tr')
2531                         if is_in_table_scope t.name, NS_HTML
2532                                 close_the_cell()
2533                                 process_token t
2534                         else
2535                                 parse_error()
2536                         return
2537                 # Anything Else
2538                 ins_mode_in_body t
2539
2540         # 8.2.5.4.16 http://www.w3.org/TR/html5/syntax.html#parsing-main-inselect
2541         ins_mode_in_select = (t) ->
2542                 if t.type is TYPE_TEXT and t.text is "\u0000"
2543                         parse_error()
2544                         return
2545                 if t.type is TYPE_TEXT
2546                         insert_character t
2547                         return
2548                 if t.type is TYPE_COMMENT
2549                         insert_comment t
2550                         return
2551                 if t.type is TYPE_DOCTYPE
2552                         parse_error()
2553                         return
2554                 if t.type is TYPE_START_TAG and t.name is 'html'
2555                         ins_mode_in_body t
2556                         return
2557                 if t.type is TYPE_START_TAG and t.name is 'option'
2558                         if open_els[0].name is 'option' and open_els[0].namespace is NS_HTML
2559                                 open_els.shift()
2560                         insert_html_element t
2561                         return
2562                 if t.type is TYPE_START_TAG and t.name is 'optgroup'
2563                         if open_els[0].name is 'option' and open_els[0].namespace is NS_HTML
2564                                 open_els.shift()
2565                         if open_els[0].name is 'optgroup' and open_els[0].namespace is NS_HTML
2566                                 open_els.shift()
2567                         insert_html_element t
2568                         return
2569                 if t.type is TYPE_END_TAG and t.name is 'optgroup'
2570                         if open_els[0].name is 'option' and open_els[0].namespace in NS_HTML
2571                                 if open_els[1].name is 'optgroup' and open_els[0].namespace is NS_HTML
2572                                         open_els.shift()
2573                         if open_els[0].name is 'optgroup' and open_els[0].namespace is NS_HTML
2574                                 open_els.shift()
2575                         else
2576                                 parse_error()
2577                         return
2578                 if t.type is TYPE_END_TAG and t.name is 'option'
2579                         if open_els[0].name is 'option' and open_els[0].namespace is NS_HTML
2580                                 open_els.shift()
2581                         else
2582                                 parse_error()
2583                         return
2584                 if t.type is TYPE_END_TAG and t.name is 'select'
2585                         if is_in_select_scope 'select', NS_HTML
2586                                 loop
2587                                         el = open_els.shift()
2588                                         if el.name is 'select' and el.namespace is NS_HTML
2589                                                 break
2590                                 reset_ins_mode()
2591                         else
2592                                 parse_error()
2593                         return
2594                 if t.type is TYPE_START_TAG and t.name is 'select'
2595                         parse_error()
2596                         loop
2597                                 el = open_els.shift()
2598                                 if el.name is 'select' and el.namespace is NS_HTML
2599                                         break
2600                         reset_ins_mode()
2601                         # spec says that this is the same as </select> but it doesn't say
2602                         # to check scope first
2603                         return
2604                 if t.type is TYPE_START_TAG and (t.name is 'input' or t.name is 'keygen' or t.name is 'textarea')
2605                         parse_error()
2606                         if is_in_select_scope 'select', NS_HTML
2607                                 return
2608                         loop
2609                                 el = open_els.shift()
2610                                 if el.name is 'select' and el.namespace is NS_HTML
2611                                         break
2612                         reset_ins_mode()
2613                         process_token t
2614                         return
2615                 if t.type is TYPE_START_TAG and (t.name is 'script' or t.name is 'template')
2616                         ins_mode_in_head t
2617                         return
2618                 if t.type is TYPE_EOF
2619                         ins_mode_in_body t
2620                         return
2621                 # Anything else
2622                 parse_error()
2623                 return
2624
2625         # 8.2.5.4.17 http://www.w3.org/TR/html5/syntax.html#parsing-main-inselectintable
2626         ins_mode_in_select_in_table = (t) ->
2627                 if t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'table' or t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead' or t.name is 'tr' or t.name is 'td' or t.name is 'th')
2628                         parse_error()
2629                         loop
2630                                 el = open_els.shift()
2631                                 if el.name is 'select' and el.namespace is NS_HTML
2632                                         break
2633                         reset_ins_mode()
2634                         process_token t
2635                         return
2636                 if t.type is TYPE_END_TAG and (t.name is 'caption' or t.name is 'table' or t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead' or t.name is 'tr' or t.name is 'td' or t.name is 'th')
2637                         parse_error()
2638                         unless is_in_table_scope t.name, NS_HTML
2639                                 return
2640                         loop
2641                                 el = open_els.shift()
2642                                 if el.name is 'select' and el.namespace is NS_HTML
2643                                         break
2644                         reset_ins_mode()
2645                         process_token t
2646                         return
2647                 # Anything else
2648                 ins_mode_in_select t
2649                 return
2650
2651         # 8.2.5.4.18 http://www.w3.org/TR/html5/syntax.html#parsing-main-intemplate
2652         ins_mode_in_template = (t) ->
2653                 if t.type is TYPE_TEXT or t.type is TYPE_COMMENT or t.type is TYPE_DOCTYPE
2654                         ins_mode_in_body t
2655                         return
2656                 if (t.type is TYPE_START_TAG and (t.name is 'base' or t.name is 'basefont' or t.name is 'bgsound' or t.name is 'link' or t.name is 'meta' or t.name is 'noframes' or t.name is 'script' or t.name is 'style' or t.name is 'template' or t.name is 'title')) or (t.type is TYPE_END_TAG and t.name is 'template')
2657                         ins_mode_in_head t
2658                         return
2659                 if t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'colgroup' or t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead')
2660                         template_ins_modes.shift()
2661                         template_ins_modes.unshift ins_mode_in_table
2662                         ins_mode = ins_mode_in_table
2663                         process_token t
2664                         return
2665                 if t.type is TYPE_START_TAG and t.name is 'col'
2666                         template_ins_modes.shift()
2667                         template_ins_modes.unshift ins_mode_in_column_group
2668                         ins_mode = ins_mode_in_column_group
2669                         process_token t
2670                         return
2671                 if t.type is TYPE_START_TAG and t.name is 'tr'
2672                         template_ins_modes.shift()
2673                         template_ins_modes.unshift ins_mode_in_table_body
2674                         ins_mode = ins_mode_in_table_body
2675                         process_token t
2676                         return
2677                 if t.type is TYPE_START_TAG and (t.name is 'td' or t.name is 'th')
2678                         template_ins_modes.shift()
2679                         template_ins_modes.unshift ins_mode_in_row
2680                         ins_mode = ins_mode_in_row
2681                         process_token t
2682                         return
2683                 if t.type is TYPE_START_TAG
2684                         template_ins_modes.shift()
2685                         template_ins_modes.unshift ins_mode_in_body
2686                         ins_mode = ins_mode_in_body
2687                         process_token t
2688                         return
2689                 if t.type is TYPE_END_TAG
2690                         parse_error()
2691                         return
2692                 if t.type is TYPE_EOF
2693                         unless template_tag_is_open()
2694                                 stop_parsing()
2695                                 return
2696                         parse_error()
2697                         loop
2698                                 el = open_els.shift()
2699                                 if el.name is 'template' and el.namespace is NS_HTML
2700                                         break
2701                         clear_afe_to_marker()
2702                         template_ins_modes.shift()
2703                         reset_ins_mode()
2704                         process_token t
2705
2706         # 8.2.5.4.19 http://www.w3.org/TR/html5/syntax.html#parsing-main-afterbody
2707         ins_mode_after_body = (t) ->
2708                 if is_space_tok t
2709                         ins_mode_in_body t
2710                         return
2711                 if t.type is TYPE_COMMENT
2712                         insert_comment t, [open_els[0], open_els[0].children.length]
2713                         return
2714                 if t.type is TYPE_DOCTYPE
2715                         parse_error()
2716                         return
2717                 if t.type is TYPE_START_TAG and t.name is 'html'
2718                         ins_mode_in_body t
2719                         return
2720                 if t.type is TYPE_END_TAG and t.name is 'html'
2721                         # fixfull fragment case
2722                         ins_mode = ins_mode_after_after_body
2723                         return
2724                 if t.type is TYPE_EOF
2725                         stop_parsing()
2726                         return
2727                 # Anything ELse
2728                 parse_error()
2729                 ins_mode = ins_mode_in_body
2730                 process_token t
2731
2732         # 8.2.5.4.20 http://www.w3.org/TR/html5/syntax.html#parsing-main-inframeset
2733         ins_mode_in_frameset = (t) ->
2734                 if is_space_tok t
2735                         insert_character t
2736                         return
2737                 if t.type is TYPE_COMMENT
2738                         insert_comment t
2739                         return
2740                 if t.type is TYPE_DOCTYPE
2741                         parse_error()
2742                         return
2743                 if t.type is TYPE_START_TAG and t.name is 'html'
2744                         ins_mode_in_body t
2745                         return
2746                 if t.type is TYPE_START_TAG and t.name is 'frameset'
2747                         insert_html_element t
2748                         return
2749                 if t.type is TYPE_END_TAG and t.name is 'frameset'
2750                         if open_els.length is 1
2751                                 parse_error()
2752                                 return # fragment case
2753                         open_els.shift()
2754                         if flag_fragment_parsing is false and open_els[0].name isnt 'frameset'
2755                                 ins_mode = ins_mode_after_frameset
2756                         return
2757                 if t.type is TYPE_START_TAG and t.name is 'frame'
2758                         insert_html_element t
2759                         open_els.shift()
2760                         t.acknowledge_self_closing()
2761                         return
2762                 if t.type is TYPE_START_TAG and t.name is 'noframes'
2763                         ins_mode_in_head t
2764                         return
2765                 if t.type is TYPE_EOF
2766                         if open_els.length isnt 1
2767                                 parse_error()
2768                         stop_parsing()
2769                         return
2770                 # Anything else
2771                 parse_error()
2772                 return
2773
2774         # 8.2.5.4.21 http://www.w3.org/TR/html5/syntax.html#parsing-main-afterframeset
2775         ins_mode_after_frameset = (t) ->
2776                 if is_space_tok t
2777                         insert_character t
2778                         return
2779                 if t.type is TYPE_COMMENT
2780                         insert_comment t
2781                         return
2782                 if t.type is TYPE_DOCTYPE
2783                         parse_error()
2784                         return
2785                 if t.type is TYPE_START_TAG and t.name is 'html'
2786                         ins_mode_in_body t
2787                         return
2788                 if t.type is TYPE_END_TAG and t.name is 'html'
2789                         insert_mode = ins_mode_after_after_frameset
2790                         return
2791                 if t.type is TYPE_START_TAG and t.name is 'noframes'
2792                         ins_mode_in_head t
2793                         return
2794                 if t.type is TYPE_EOF
2795                         stop_parsing()
2796                         return
2797                 # Anything else
2798                 parse_error()
2799                 return
2800
2801         # 8.2.5.4.22 http://www.w3.org/TR/html5/syntax.html#the-after-after-body-insertion-mode
2802         ins_mode_after_after_body = (t) ->
2803                 if t.type is TYPE_COMMENT
2804                         insert_comment t, [doc, doc.children.length]
2805                         return
2806                 if t.type is TYPE_DOCTYPE or is_space_tok(t) or (t.type is TYPE_START_TAG and t.name is 'html')
2807                         ins_mode_in_body t
2808                         return
2809                 if t.type is TYPE_EOF
2810                         stop_parsing()
2811                         return
2812                 # Anything else
2813                 parse_error()
2814                 ins_mode = ins_mode_in_body
2815                 return
2816
2817         # 8.2.5.4.23 http://www.w3.org/TR/html5/syntax.html#the-after-after-frameset-insertion-mode
2818         ins_mode_after_after_frameset = (t) ->
2819                 if t.type is TYPE_COMMENT
2820                         insert_comment t, [doc, doc.children.length]
2821                         return
2822                 if t.type is TYPE_DOCTYPE or is_space_tok(t) or (t.type is TYPE_START_TAG and t.name is 'html')
2823                         ins_mode_in_body t
2824                         return
2825                 if t.type is TYPE_EOF
2826                         stop_parsing()
2827                         return
2828                 if t.type is TYPE_START_TAG and t.name is 'noframes'
2829                         ins_mode_in_head t
2830                         return
2831                 # Anything else
2832                 parse_error()
2833                 return
2834
2835         # 8.2.5.5 http://www.w3.org/TR/html5/syntax.html#parsing-main-inforeign
2836         has_color_face_or_size = (t) ->
2837                 for a in t.attrs_a
2838                         if a[0] is 'color' or a[0] is 'face' or a[0] is 'size'
2839                                 return true
2840                 return false
2841         in_foreign_content_end_script = ->
2842                 open_els.shift()
2843                 # fixfull
2844                 return
2845         in_foreign_content_other_start = (t) ->
2846                 acn = adjusted_current_node()
2847                 if acn.namespace is NS_MATHML
2848                         adjust_mathml_attributes t
2849                 if acn.namespace is NS_SVG and svg_name_fixes[t.name]?
2850                         t.name = svg_name_fixes[t.name]
2851                 if acn.namespace is NS_SVG
2852                         adjust_svg_attributes t
2853                 adjust_foreign_attributes t
2854                 insert_foreign_element t, acn.namespace
2855                 if t.flag 'self-closing'
2856                         if t.name is 'script'
2857                                 t.acknowledge_self_closing()
2858                                 in_foreign_content_end_script()
2859                         else
2860                                 open_els.shift()
2861                                 t.acknowledge_self_closing()
2862                 return
2863         in_foreign_content = (t) ->
2864                 if t.type is TYPE_TEXT and t.text is "\u0000"
2865                         parse_error()
2866                         insert_character new_character_token "\ufffd"
2867                         return
2868                 if is_space_tok t
2869                         insert_character t
2870                         return
2871                 if t.type is TYPE_TEXT
2872                         flag_frameset_ok = false
2873                         insert_character t
2874                         return
2875                 if t.type is TYPE_COMMENT
2876                         insert_comment t
2877                         return
2878                 if t.type is TYPE_DOCTYPE
2879                         parse_error()
2880                         return
2881                 if t.type is TYPE_START_TAG and (t.name is 'b' or t.name is 'big' or t.name is 'blockquote' or t.name is 'body' or t.name is 'br' or t.name is 'center' or t.name is 'code' or t.name is 'dd' or t.name is 'div' or t.name is 'dl' or t.name is 'dt' or t.name is 'em' or t.name is 'embed' or t.name is 'h1' or t.name is 'h2' or t.name is 'h3' or t.name is 'h4' or t.name is 'h5' or t.name is 'h6' or t.name is 'head' or t.name is 'hr' or t.name is 'i' or t.name is 'img' or t.name is 'li' or t.name is 'listing' or t.name is 'main' or t.name is 'meta' or t.name is 'nobr' or t.name is 'ol' or t.name is 'p' or t.name is 'pre' or t.name is 'ruby' or t.name is 's' or t.name is 'small' or t.name is 'span' or t.name is 'strong' or t.name is 'strike' or t.name is 'sub' or t.name is 'sup' or t.name is 'table' or t.name is 'tt' or t.name is 'u' or t.name is 'ul' or t.name is 'var' or (t.name is 'font' and has_color_face_or_size(t)))
2882                         parse_error()
2883                         if flag_fragment_parsing
2884                                 in_foreign_content_other_start t
2885                                 return
2886                         loop # is this safe?
2887                                 open_els.shift()
2888                                 cn = open_els[0]
2889                                 if is_mathml_text_integration_point(cn) or is_html_integration(cn) or cn.namespace is NS_HTML
2890                                         break
2891                         process_token t
2892                         return
2893                 if t.type is TYPE_START_TAG
2894                         in_foreign_content_other_start t
2895                         return
2896                 if t.type is TYPE_END_TAG and t.name is 'script' and open_els[0].name is 'script' and open_els[0].namespace is NS_SVG
2897                         in_foreign_content_end_script()
2898                         return
2899                 if t.type is TYPE_END_TAG
2900                         if open_els[0].name.toLowerCase() isnt t.name
2901                                 parse_error()
2902                         for node in open_els
2903                                 if node is open_els[open_els.length - 1]
2904                                         return
2905                                 if node.name.toLowerCase() is t.name
2906                                         loop
2907                                                 el = open_els.shift()
2908                                                 if el is node
2909                                                         return
2910                                 if node.namespace is NS_HTML
2911                                         break
2912                         ins_mode t # explicitly call HTML insertion mode
2913
2914
2915         # 8.2.4.1 http://www.w3.org/TR/html5/syntax.html#data-state
2916         tok_state_data = ->
2917                 switch c = txt.charAt(cur++)
2918                         when '&'
2919                                 return new_text_node parse_character_reference()
2920                         when '<'
2921                                 tok_state = tok_state_tag_open
2922                         when "\u0000"
2923                                 parse_error()
2924                                 return new_text_node "\ufffd"
2925                         when '' # EOF
2926                                 return new_eof_token()
2927                         else
2928                                 return new_text_node c
2929                 return null
2930
2931         # 8.2.4.2 http://www.w3.org/TR/html5/syntax.html#character-reference-in-data-state
2932         # not needed: tok_state_character_reference_in_data = ->
2933         # just call parse_character_reference()
2934
2935         # 8.2.4.3 http://www.w3.org/TR/html5/syntax.html#rcdata-state
2936         tok_state_rcdata = ->
2937                 switch c = txt.charAt(cur++)
2938                         when '&'
2939                                 return new_text_node parse_character_reference()
2940                         when '<'
2941                                 tok_state = tok_state_rcdata_less_than_sign
2942                         when "\u0000"
2943                                 parse_error()
2944                                 return new_character_token "\ufffd"
2945                         when '' # EOF
2946                                 return new_eof_token()
2947                         else
2948                                 return new_character_token c
2949                 return null
2950
2951         # 8.2.4.4 http://www.w3.org/TR/html5/syntax.html#character-reference-in-rcdata-state
2952         # not needed: tok_state_character_reference_in_rcdata = ->
2953         # just call parse_character_reference()
2954
2955         # 8.2.4.5 http://www.w3.org/TR/html5/syntax.html#rawtext-state
2956         tok_state_rawtext = ->
2957                 switch c = txt.charAt(cur++)
2958                         when '<'
2959                                 tok_state = tok_state_rawtext_less_than_sign
2960                         when "\u0000"
2961                                 parse_error()
2962                                 return new_character_token "\ufffd"
2963                         when '' # EOF
2964                                 return new_eof_token()
2965                         else
2966                                 return new_character_token c
2967                 return null
2968
2969         # 8.2.4.6 http://www.w3.org/TR/html5/syntax.html#script-data-state
2970         tok_state_script_data = ->
2971                 switch c = txt.charAt(cur++)
2972                         when '<'
2973                                 tok_state = tok_state_script_data_less_than_sign
2974                         when "\u0000"
2975                                 parse_error()
2976                                 return new_character_token "\ufffd"
2977                         when '' # EOF
2978                                 return new_eof_token()
2979                         else
2980                                 return new_character_token c
2981                 return null
2982
2983         # 8.2.4.7 http://www.w3.org/TR/html5/syntax.html#plaintext-state
2984         tok_state_plaintext = ->
2985                 switch c = txt.charAt(cur++)
2986                         when "\u0000"
2987                                 parse_error()
2988                                 return new_character_token "\ufffd"
2989                         when '' # EOF
2990                                 return new_eof_token()
2991                         else
2992                                 return new_character_token c
2993                 return null
2994
2995
2996         # 8.2.4.8 http://www.w3.org/TR/html5/syntax.html#tag-open-state
2997         tok_state_tag_open = ->
2998                 switch c = txt.charAt(cur++)
2999                         when '!'
3000                                 tok_state = tok_state_markup_declaration_open
3001                         when '/'
3002                                 tok_state = tok_state_end_tag_open
3003                         when '?'
3004                                 parse_error()
3005                                 tok_cur_tag = new_comment_token '?'
3006                                 tok_state = tok_state_bogus_comment
3007                         else
3008                                 if is_lc_alpha(c)
3009                                         tok_cur_tag = new_open_tag c
3010                                         tok_state = tok_state_tag_name
3011                                 else if is_uc_alpha(c)
3012                                         tok_cur_tag = new_open_tag c.toLowerCase()
3013                                         tok_state = tok_state_tag_name
3014                                 else
3015                                         parse_error()
3016                                         tok_state = tok_state_data
3017                                         cur -= 1 # we didn't parse/handle the char after <
3018                                         return new_text_node '<'
3019                 return null
3020
3021         # 8.2.4.9 http://www.w3.org/TR/html5/syntax.html#end-tag-open-state
3022         tok_state_end_tag_open = ->
3023                 switch c = txt.charAt(cur++)
3024                         when '>'
3025                                 parse_error()
3026                                 tok_state = tok_state_data
3027                         when '' # EOF
3028                                 parse_error()
3029                                 tok_state = tok_state_data
3030                                 return new_text_node '</'
3031                         else
3032                                 if is_uc_alpha(c)
3033                                         tok_cur_tag = new_end_tag c.toLowerCase()
3034                                         tok_state = tok_state_tag_name
3035                                 else if is_lc_alpha(c)
3036                                         tok_cur_tag = new_end_tag c
3037                                         tok_state = tok_state_tag_name
3038                                 else
3039                                         parse_error()
3040                                         tok_cur_tag = new_comment_token '/'
3041                                         tok_state = tok_state_bogus_comment
3042                 return null
3043
3044         # 8.2.4.10 http://www.w3.org/TR/html5/syntax.html#tag-name-state
3045         tok_state_tag_name = ->
3046                 switch c = txt.charAt(cur++)
3047                         when "\t", "\n", "\u000c", ' '
3048                                 tok_state = tok_state_before_attribute_name
3049                         when '/'
3050                                 tok_state = tok_state_self_closing_start_tag
3051                         when '>'
3052                                 tok_state = tok_state_data
3053                                 tmp = tok_cur_tag
3054                                 tok_cur_tag = null
3055                                 return tmp
3056                         when "\u0000"
3057                                 parse_error()
3058                                 tok_cur_tag.name += "\ufffd"
3059                         when '' # EOF
3060                                 parse_error()
3061                                 tok_state = tok_state_data
3062                         else
3063                                 if is_uc_alpha(c)
3064                                         tok_cur_tag.name += c.toLowerCase()
3065                                 else
3066                                         tok_cur_tag.name += c
3067                 return null
3068
3069         # 8.2.4.11 http://www.w3.org/TR/html5/syntax.html#rcdata-less-than-sign-state
3070         tok_state_rcdata_less_than_sign = ->
3071                 c = txt.charAt(cur++)
3072                 if c is '/'
3073                         temporary_buffer = ''
3074                         tok_state = tok_state_rcdata_end_tag_open
3075                         return null
3076                 # Anything else
3077                 tok_state = tok_state_rcdata
3078                 cur -= 1 # reconsume the input character
3079                 return new_character_token '<'
3080
3081         # 8.2.4.12 http://www.w3.org/TR/html5/syntax.html#rcdata-end-tag-open-state
3082         tok_state_rcdata_end_tag_open = ->
3083                 c = txt.charAt(cur++)
3084                 if is_uc_alpha(c)
3085                         tok_cur_tag = new_end_tag c.toLowerCase()
3086                         temporary_buffer += c
3087                         tok_state = tok_state_rcdata_end_tag_name
3088                         return null
3089                 if is_lc_alpha(c)
3090                         tok_cur_tag = new_end_tag c
3091                         temporary_buffer += c
3092                         tok_state = tok_state_rcdata_end_tag_name
3093                         return null
3094                 # Anything else
3095                 tok_state = tok_state_rcdata
3096                 cur -= 1 # reconsume the input character
3097                 return new_character_token "</" # fixfull separate these
3098
3099         # http://www.w3.org/TR/html5/syntax.html#appropriate-end-tag-token
3100         is_appropriate_end_tag = (t) ->
3101                 # spec says to check against "the tag name of the last start tag to
3102                 # have been emitted from this tokenizer", but this is only called from
3103                 # the various "raw" states, so it's hopefully ok to assume that
3104                 # open_els[0].name will work instead TODO: verify this after the script
3105                 # data states are implemented
3106                 debug_log "#{t.type}, #{t.name} open_els: #{serialize_els open_els, true, true}"
3107                 return t.type is TYPE_END_TAG and t.name is open_els[0].name
3108
3109         # 8.2.4.13 http://www.w3.org/TR/html5/syntax.html#rcdata-end-tag-name-state
3110         tok_state_rcdata_end_tag_name = ->
3111                 c = txt.charAt(cur++)
3112                 if c is "\t" or c is "\n" or c is "\u000c" or c is ' '
3113                         if is_appropriate_end_tag tok_cur_tag
3114                                 tok_state = tok_state_before_attribute_name
3115                                 return
3116                         # else fall through to "Anything else"
3117                 if c is '/'
3118                         if is_appropriate_end_tag tok_cur_tag
3119                                 tok_state = tok_state_self_closing_start_tag # FIXME spec typo?
3120                                 return
3121                         # else fall through to "Anything else"
3122                 if c is '>'
3123                         if is_appropriate_end_tag tok_cur_tag
3124                                 tok_state = tok_state_data
3125                                 return tok_cur_tag
3126                         # else fall through to "Anything else"
3127                 if is_uc_alpha(c)
3128                         tok_cur_tag.name += c.toLowerCase()
3129                         temporary_buffer += c
3130                         return null
3131                 if is_lc_alpha(c)
3132                         tok_cur_tag.name += c
3133                         temporary_buffer += c
3134                         return null
3135                 # Anything else
3136                 tok_state = tok_state_rcdata
3137                 cur -= 1 # reconsume the input character
3138                 return new_character_token '</' + temporary_buffer # fixfull separate these
3139
3140         # 8.2.4.14 http://www.w3.org/TR/html5/syntax.html#rawtext-less-than-sign-state
3141         tok_state_rawtext_less_than_sign = ->
3142                 c = txt.charAt(cur++)
3143                 if c is '/'
3144                         temporary_buffer = ''
3145                         tok_state = tok_state_rawtext_end_tag_open
3146                         return null
3147                 # Anything else
3148                 tok_state = tok_state_rawtext
3149                 cur -= 1 # reconsume the input character
3150                 return new_character_token '<'
3151
3152         # 8.2.4.15 http://www.w3.org/TR/html5/syntax.html#rawtext-end-tag-open-state
3153         tok_state_rawtext_end_tag_open = ->
3154                 c = txt.charAt(cur++)
3155                 if is_uc_alpha(c)
3156                         tok_cur_tag = new_end_tag c.toLowerCase()
3157                         temporary_buffer += c
3158                         tok_state = tok_state_rawtext_end_tag_name
3159                         return null
3160                 if is_lc_alpha(c)
3161                         tok_cur_tag = new_end_tag c
3162                         temporary_buffer += c
3163                         tok_state = tok_state_rawtext_end_tag_name
3164                         return null
3165                 # Anything else
3166                 tok_state = tok_state_rawtext
3167                 cur -= 1 # reconsume the input character
3168                 return new_character_token "</" # fixfull separate these
3169
3170         # 8.2.4.16 http://www.w3.org/TR/html5/syntax.html#rawtext-end-tag-name-state
3171         tok_state_rawtext_end_tag_name = ->
3172                 c = txt.charAt(cur++)
3173                 if c is "\t" or c is "\n" or c is "\u000c" or c is ' '
3174                         if is_appropriate_end_tag tok_cur_tag
3175                                 tok_state = tok_state_before_attribute_name
3176                                 return
3177                         # else fall through to "Anything else"
3178                 if c is '/'
3179                         if is_appropriate_end_tag tok_cur_tag
3180                                 tok_state = tok_state_self_closing_start_tag
3181                                 return
3182                         # else fall through to "Anything else"
3183                 if c is '>'
3184                         if is_appropriate_end_tag tok_cur_tag
3185                                 tok_state = tok_state_data
3186                                 return tok_cur_tag
3187                         # else fall through to "Anything else"
3188                 if is_uc_alpha(c)
3189                         tok_cur_tag.name += c.toLowerCase()
3190                         temporary_buffer += c
3191                         return null
3192                 if is_lc_alpha(c)
3193                         tok_cur_tag.name += c
3194                         temporary_buffer += c
3195                         return null
3196                 # Anything else
3197                 tok_state = tok_state_rawtext
3198                 cur -= 1 # reconsume the input character
3199                 return new_character_token '</' + temporary_buffer # fixfull separate these
3200
3201         # 8.2.4.17 http://www.w3.org/TR/html5/syntax.html#script-data-less-than-sign-state
3202         tok_state_script_data_less_than_sign = ->
3203                 c = txt.charAt(cur++)
3204                 if c is '/'
3205                         temporary_buffer = ''
3206                         tok_state = tok_state_script_data_end_tag_open
3207                         return
3208                 if c is '!'
3209                         tok_state = tok_state_script_data_escape_start
3210                         return new_character_token '<!' # fixfull split
3211                 # Anything else
3212                 tok_state = tok_state_script_data
3213                 cur -= 1 # Reconsume
3214                 return new_character_token '<'
3215
3216         # 8.2.4.18 http://www.w3.org/TR/html5/syntax.html#script-data-end-tag-open-state
3217         tok_state_script_data_end_tag_open = ->
3218                 c = txt.charAt(cur++)
3219                 if is_uc_alpha(c)
3220                         tok_cur_tag = new_end_tag c.toLowerCase()
3221                         temporary_buffer += c
3222                         tok_state = tok_state_script_data_end_tag_name
3223                         return
3224                 if is_lc_alpha(c)
3225                         tok_cur_tag = new_end_tag c
3226                         temporary_buffer += c
3227                         tok_state = tok_state_script_data_end_tag_name
3228                         return
3229                 # Anything else
3230                 tok_state = tok_state_script_data
3231                 cur -= 1 # Reconsume
3232                 return new_character_token '</'
3233
3234         # 8.2.4.19 http://www.w3.org/TR/html5/syntax.html#script-data-end-tag-open-state
3235         tok_state_script_data_end_tag_name = ->
3236                 c = txt.charAt(cur++)
3237                 if c is "\t" or c is "\n" or c is "\u000c" or c is ' '
3238                         if is_appropriate_end_tag tok_cur_tag
3239                                 tok_state = tok_state_before_attribute_name
3240                                 return
3241                         # fall through
3242                 if c is '/'
3243                         if is_appropriate_end_tag tok_cur_tag
3244                                 tok_state = tok_state_self_closing_start_tag
3245                                 return
3246                         # fall through
3247                 if c is '>'
3248                         if is_appropriate_end_tag tok_cur_tag
3249                                 tok_state = tok_state_data
3250                                 return tok_cur_tag
3251                         # fall through
3252                 if is_uc_alpha(c)
3253                         tok_cur_tag.name += c.toLowerCase()
3254                         temporary_buffer += c
3255                         return
3256                 if is_lc_alpha(c)
3257                         tok_cur_tag.name += c
3258                         temporary_buffer += c
3259                         return
3260                 # Anything else
3261                 tok_state = tok_state_script_data
3262                 cur -= 1 # Reconsume
3263                 return new_character_token "</#{temporary_buffer}" # fixfull split
3264
3265         # 8.2.4.20 http://www.w3.org/TR/html5/syntax.html#script-data-escape-start-state
3266         tok_state_script_data_escape_start = ->
3267                 c = txt.charAt(cur++)
3268                 if c is '-'
3269                         tok_state = tok_state_script_data_escape_start_dash
3270                         return new_character_token '-'
3271                 # Anything else
3272                 tok_state = tok_state_script_data
3273                 cur -= 1 # Reconsume
3274                 return
3275
3276         # 8.2.4.21 http://www.w3.org/TR/html5/syntax.html#script-data-escape-start-dash-state
3277         tok_state_script_data_escape_start_dash = ->
3278                 c = txt.charAt(cur++)
3279                 if c is '-'
3280                         tok_state = tok_state_script_data_escaped_dash_dash
3281                         return new_character_token '-'
3282                 # Anything else
3283                 tok_state = tok_state_script_data
3284                 cur -= 1 # Reconsume
3285                 return
3286
3287         # 8.2.4.22 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-state
3288         tok_state_script_data_escaped = ->
3289                 c = txt.charAt(cur++)
3290                 if c is '-'
3291                         tok_state = tok_state_script_data_escaped_dash
3292                         return new_character_token '-'
3293                 if c is '<'
3294                         tok_state = tok_state_script_data_escaped_less_than_sign
3295                         return
3296                 if c is "\u0000"
3297                         parse_error()
3298                         return new_character_token "\ufffd"
3299                 if c is '' # EOF
3300                         tok_state = tok_state_data
3301                         parse_error()
3302                         cur -= 1 # Reconsume
3303                         return
3304                 # Anything else
3305                 return new_character_token c
3306
3307         # 8.2.4.23 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-dash-state
3308         tok_state_script_data_escaped_dash = ->
3309                 c = txt.charAt(cur++)
3310                 if c is '-'
3311                         tok_state = tok_state_script_data_escaped_dash_dash
3312                         return new_character_token '-'
3313                 if c is '<'
3314                         tok_state = tok_state_script_data_escaped_less_than_sign
3315                         return
3316                 if c is "\u0000"
3317                         parse_error()
3318                         tok_state = tok_state_script_data_escaped
3319                         return new_character_token "\ufffd"
3320                 if c is '' # EOF
3321                         tok_state = tok_state_data
3322                         parse_error()
3323                         cur -= 1 # Reconsume
3324                         return
3325                 # Anything else
3326                 tok_state = tok_state_script_data_escaped
3327                 return new_character_token c
3328
3329         # 8.2.4.24 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-dash-dash-state
3330         tok_state_script_data_escaped_dash_dash = ->
3331                 c = txt.charAt(cur++)
3332                 if c is '-'
3333                         return new_character_token '-'
3334                 if c is '<'
3335                         tok_state = tok_state_script_data_escaped_less_than_sign
3336                         return
3337                 if c is '>'
3338                         tok_state = tok_state_script_data
3339                         return new_character_token '>'
3340                 if c is "\u0000"
3341                         parse_error()
3342                         tok_state = tok_state_script_data_escaped
3343                         return new_character_token "\ufffd"
3344                 if c is '' # EOF
3345                         parse_error()
3346                         tok_state = tok_state_data
3347                         cur -= 1 # Reconsume
3348                         return
3349                 # Anything else
3350                 tok_state = tok_state_script_data_escaped
3351                 return new_character_token c
3352
3353         # 8.2.4.25 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-less-than-sign-state
3354         tok_state_script_data_escaped_less_than_sign = ->
3355                 c = txt.charAt(cur++)
3356                 if c is '/'
3357                         temporary_buffer = ''
3358                         tok_state = tok_state_script_data_escaped_end_tag_open
3359                         return
3360                 if is_uc_alpha(c)
3361                         temporary_buffer = c.toLowerCase() # yes, really
3362                         tok_state = tok_state_script_data_double_escape_start
3363                         return new_character_token "<#{c}" # fixfull split
3364                 if is_lc_alpha(c)
3365                         temporary_buffer = c
3366                         tok_state = tok_state_script_data_double_escape_start
3367                         return new_character_token "<#{c}" # fixfull split
3368                 # Anything else
3369                 tok_state = tok_state_script_data_escaped
3370                 cur -= 1 # Reconsume
3371                 return new_character_token c
3372
3373         # 8.2.4.26 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-end-tag-open-state
3374         tok_state_script_data_escaped_end_tag_open = ->
3375                 c = txt.charAt(cur++)
3376                 if is_uc_alpha(c)
3377                         tok_cur_tag = new_end_tag c.toLowerCase()
3378                         temporary_buffer += c
3379                         tok_state = tok_state_script_data_escaped_end_tag_name
3380                         return
3381                 if is_lc_alpha(c)
3382                         tok_cur_tag = new_end_tag c
3383                         temporary_buffer += c
3384                         tok_state = tok_state_script_data_escaped_end_tag_name
3385                         return
3386                 # Anything else
3387                 tok_state = tok_state_script_data_escaped
3388                 cur -= 1 # Reconsume
3389                 return new_character_token '</' # fixfull split
3390
3391         # 8.2.4.27 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-end-tag-name-state
3392         tok_state_script_data_escaped_end_tag_name = ->
3393                 c = txt.charAt(cur++)
3394                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
3395                         if is_appropriate_end_tag tok_cur_tag
3396                                 tok_state = tok_state_before_attribute_name
3397                                 return
3398                         # fall through
3399                 if c is '/'
3400                         if is_appropriate_end_tag tok_cur_tag
3401                                 tok_state = tok_state_self_closing_start_tag
3402                                 return
3403                         # fall through
3404                 if c is '>'
3405                         if is_appropriate_end_tag tok_cur_tag
3406                                 tok_state = tok_state_data
3407                                 return tok_cur_tag
3408                         # fall through
3409                 if is_uc_alpha(c)
3410                         tok_cur_tag.name += c.toLowerCase()
3411                         temporary_buffer += c.toLowerCase()
3412                         return
3413                 if is_lc_alpha(c)
3414                         tok_cur_tag.name += c
3415                         temporary_buffer += c.toLowerCase()
3416                         return
3417                 # Anything else
3418                 tok_state = tok_state_script_data_escaped
3419                 cur -= 1 # Reconsume
3420                 return new_character_token "</#{temporary_buffer}" # fixfull split
3421
3422         # 8.2.4.28 http://www.w3.org/TR/html5/syntax.html#script-data-double-escape-start-state
3423         tok_state_script_data_double_escape_start = ->
3424                 c = txt.charAt(cur++)
3425                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' ' or c is '/' or c is '>'
3426                         if temporary_buffer is 'script'
3427                                 tok_state = tok_state_script_data_double_escaped
3428                         else
3429                                 tok_state = tok_state_script_data_escaped
3430                         return new_character_token c
3431                 if is_uc_alpha(c)
3432                         temporary_buffer += c.toLowerCase() # yes, really lowercase
3433                         return new_character_token c
3434                 if is_lc_alpha(c)
3435                         temporary_buffer += c
3436                         return new_character_token c
3437                 # Anything else
3438                 tok_state = tok_state_script_data_escaped
3439                 cur -= 1 # Reconsume
3440                 return
3441
3442         # 8.2.4.29 http://www.w3.org/TR/html5/syntax.html#script-data-double-escaped-state
3443         tok_state_script_data_double_escaped = ->
3444                 c = txt.charAt(cur++)
3445                 if c is '-'
3446                         tok_state = tok_state_script_data_double_escaped_dash
3447                         return new_character_token '-'
3448                 if c is '<'
3449                         tok_state = tok_state_script_data_double_escaped_less_than_sign
3450                         return new_character_token '<'
3451                 if c is "\u0000"
3452                         parse_error()
3453                         return new_character_token "\ufffd"
3454                 if c is '' # EOF
3455                         parse_error()
3456                         tok_state = tok_state_data
3457                         cur -= 1 # Reconsume
3458                         return
3459                 # Anything else
3460                 return new_character_token c
3461
3462         # 8.2.4.30 http://www.w3.org/TR/html5/syntax.html#script-data-double-escaped-dash-state
3463         tok_state_script_data_double_escaped_dash = ->
3464                 c = txt.charAt(cur++)
3465                 if c is '-'
3466                         tok_state = tok_state_script_data_double_escaped_dash_dash
3467                         return new_character_token '-'
3468                 if c is '<'
3469                         tok_state = tok_state_script_data_double_escaped_less_than_sign
3470                         return new_character_token '<'
3471                 if c is "\u0000"
3472                         parse_error()
3473                         tok_state = tok_state_script_data_double_escaped
3474                         return new_character_token "\ufffd"
3475                 if c is '' # EOF
3476                         parse_error()
3477                         tok_state = tok_state_data
3478                         cur -= 1 # Reconsume
3479                         return
3480                 # Anything else
3481                 tok_state = tok_state_script_data_double_escaped
3482                 return new_character_token c
3483
3484         # 8.2.4.31 http://www.w3.org/TR/html5/syntax.html#script-data-double-escaped-dash-dash-state
3485         tok_state_script_data_double_escaped_dash_dash = ->
3486                 c = txt.charAt(cur++)
3487                 if c is '-'
3488                         return new_character_token '-'
3489                 if c is '<'
3490                         tok_state = tok_state_script_data_double_escaped_less_than_sign
3491                         return new_character_token '<'
3492                 if c is '>'
3493                         tok_state = tok_state_script_data
3494                         return new_character_token '>'
3495                 if c is "\u0000"
3496                         parse_error()
3497                         tok_state = tok_state_script_data_double_escaped
3498                         return new_character_token "\ufffd"
3499                 if c is '' # EOF
3500                         parse_error()
3501                         tok_state = tok_state_data
3502                         cur -= 1 # Reconsume
3503                         return
3504                 # Anything else
3505                 tok_state = tok_state_script_data_double_escaped
3506                 return new_character_token c
3507
3508         # 8.2.4.32 http://www.w3.org/TR/html5/syntax.html#script-data-double-escaped-less-than-sign-state
3509         tok_state_script_data_double_escaped_less_than_sign = ->
3510                 c = txt.charAt(cur++)
3511                 if c is '/'
3512                         temporary_buffer = ''
3513                         tok_state = tok_state_script_data_double_escape_end
3514                         return new_character_token '/'
3515                 # Anything else
3516                 tok_state = tok_state_script_data_double_escaped
3517                 cur -= 1 # Reconsume
3518                 return
3519
3520         # 8.2.4.33 http://www.w3.org/TR/html5/syntax.html#script-data-double-escape-end-state
3521         tok_state_script_data_double_escape_end = ->
3522                 c = txt.charAt(cur++)
3523                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' ' or c is '/' or c is '>'
3524                         if temporary_buffer is 'script'
3525                                 tok_state = tok_state_script_data_escaped
3526                         else
3527                                 tok_state = tok_state_script_data_double_escaped
3528                         return new_character_token c
3529                 if is_uc_alpha(c)
3530                         temporary_buffer += c.toLowerCase() # yes, really lowercase
3531                         return new_character_token c
3532                 if is_lc_alpha(c)
3533                         temporary_buffer += c
3534                         return new_character_token c
3535                 # Anything else
3536                 tok_state = tok_state_script_data_double_escaped
3537                 cur -= 1 # Reconsume
3538                 return
3539
3540         # 8.2.4.34 http://www.w3.org/TR/html5/syntax.html#before-attribute-name-state
3541         tok_state_before_attribute_name = ->
3542                 attr_name = null
3543                 switch c = txt.charAt(cur++)
3544                         when "\t", "\n", "\u000c", ' '
3545                                 return null
3546                         when '/'
3547                                 tok_state = tok_state_self_closing_start_tag
3548                                 return null
3549                         when '>'
3550                                 tok_state = tok_state_data
3551                                 tmp = tok_cur_tag
3552                                 tok_cur_tag = null
3553                                 return tmp
3554                         when "\u0000"
3555                                 parse_error()
3556                                 attr_name = "\ufffd"
3557                         when '"', "'", '<', '='
3558                                 parse_error()
3559                                 attr_name = c
3560                         when '' # EOF
3561                                 parse_error()
3562                                 tok_state = tok_state_data
3563                         else
3564                                 if is_uc_alpha(c)
3565                                         attr_name = c.toLowerCase()
3566                                 else
3567                                         attr_name = c
3568                 if attr_name?
3569                         tok_cur_tag.attrs_a.unshift [attr_name, '']
3570                         tok_state = tok_state_attribute_name
3571                 return null
3572
3573         # 8.2.4.35 http://www.w3.org/TR/html5/syntax.html#attribute-name-state
3574         tok_state_attribute_name = ->
3575                 switch c = txt.charAt(cur++)
3576                         when "\t", "\n", "\u000c", ' '
3577                                 tok_state = tok_state_after_attribute_name
3578                         when '/'
3579                                 tok_state = tok_state_self_closing_start_tag
3580                         when '='
3581                                 tok_state = tok_state_before_attribute_value
3582                         when '>'
3583                                 tok_state = tok_state_data
3584                                 tmp = tok_cur_tag
3585                                 tok_cur_tag = null
3586                                 return tmp
3587                         when "\u0000"
3588                                 parse_error()
3589                                 tok_cur_tag.attrs_a[0][0] += "\ufffd"
3590                         when '"', "'", '<'
3591                                 parse_error()
3592                                 tok_cur_tag.attrs_a[0][0] += c
3593                         when '' # EOF
3594                                 parse_error()
3595                                 tok_state = tok_state_data
3596                         else
3597                                 if is_uc_alpha(c)
3598                                         tok_cur_tag.attrs_a[0][0] += c.toLowerCase()
3599                                 else
3600                                         tok_cur_tag.attrs_a[0][0] += c
3601                 return null
3602
3603         # 8.2.4.36 http://www.w3.org/TR/html5/syntax.html#after-attribute-name-state
3604         tok_state_after_attribute_name = ->
3605                 c = txt.charAt(cur++)
3606                 if c is "\t" or c is "\n" or c is "\u000c" or c is ' '
3607                         return
3608                 if c is '/'
3609                         tok_state = tok_state_self_closing_start_tag
3610                         return
3611                 if c is '='
3612                         tok_state = tok_state_before_attribute_value
3613                         return
3614                 if c is '>'
3615                         tok_state = tok_state_data
3616                         return
3617                 if is_uc_alpha(c)
3618                         tok_cur_tag.attrs_a.unshift [c.toLowerCase(), '']
3619                         tok_state = tok_state_attribute_name
3620                         return
3621                 if c is "\u0000"
3622                         parse_error()
3623                         tok_cur_tag.attrs_a.unshift ["\ufffd", '']
3624                         tok_state = tok_state_attribute_name
3625                         return
3626                 if c is '' # EOF
3627                         parse_error()
3628                         tok_state = tok_state_data
3629                         cur -= 1 # reconsume
3630                         return
3631                 if c is '"' or c is "'" or c is '<'
3632                         parse_error()
3633                         # fall through to Anything else
3634                 # Anything else
3635                 tok_cur_tag.attrs_a.unshift [c, '']
3636                 tok_state = tok_state_attribute_name
3637
3638         # 8.2.4.37 http://www.w3.org/TR/html5/syntax.html#before-attribute-value-state
3639         tok_state_before_attribute_value = ->
3640                 switch c = txt.charAt(cur++)
3641                         when "\t", "\n", "\u000c", ' '
3642                                 return null
3643                         when '"'
3644                                 tok_state = tok_state_attribute_value_double_quoted
3645                         when '&'
3646                                 tok_state = tok_state_attribute_value_unquoted
3647                                 cur -= 1
3648                         when "'"
3649                                 tok_state = tok_state_attribute_value_single_quoted
3650                         when "\u0000"
3651                                 # Parse error
3652                                 tok_cur_tag.attrs_a[0][1] += "\ufffd"
3653                                 tok_state = tok_state_attribute_value_unquoted
3654                         when '>'
3655                                 # Parse error
3656                                 tok_state = tok_state_data
3657                                 tmp = tok_cur_tag
3658                                 tok_cur_tag = null
3659                                 return tmp
3660                         when '' # EOF
3661                                 parse_error()
3662                                 tok_state = tok_state_data
3663                         else
3664                                 tok_cur_tag.attrs_a[0][1] += c
3665                                 tok_state = tok_state_attribute_value_unquoted
3666                 return null
3667
3668         # 8.2.4.38 http://www.w3.org/TR/html5/syntax.html#attribute-value-(double-quoted)-state
3669         tok_state_attribute_value_double_quoted = ->
3670                 switch c = txt.charAt(cur++)
3671                         when '"'
3672                                 tok_state = tok_state_after_attribute_value_quoted
3673                         when '&'
3674                                 tok_cur_tag.attrs_a[0][1] += parse_character_reference '"', true
3675                         when "\u0000"
3676                                 # Parse error
3677                                 tok_cur_tag.attrs_a[0][1] += "\ufffd"
3678                         when '' # EOF
3679                                 parse_error()
3680                                 tok_state = tok_state_data
3681                         else
3682                                 tok_cur_tag.attrs_a[0][1] += c
3683                 return null
3684
3685         # 8.2.4.39 http://www.w3.org/TR/html5/syntax.html#attribute-value-(single-quoted)-state
3686         tok_state_attribute_value_single_quoted = ->
3687                 switch c = txt.charAt(cur++)
3688                         when "'"
3689                                 tok_state = tok_state_after_attribute_value_quoted
3690                         when '&'
3691                                 tok_cur_tag.attrs_a[0][1] += parse_character_reference "'", true
3692                         when "\u0000"
3693                                 # Parse error
3694                                 tok_cur_tag.attrs_a[0][1] += "\ufffd"
3695                         when '' # EOF
3696                                 parse_error()
3697                                 tok_state = tok_state_data
3698                         else
3699                                 tok_cur_tag.attrs_a[0][1] += c
3700                 return null
3701
3702         # 8.2.4.40 http://www.w3.org/TR/html5/syntax.html#attribute-value-(unquoted)-state
3703         tok_state_attribute_value_unquoted = ->
3704                 switch c = txt.charAt(cur++)
3705                         when "\t", "\n", "\u000c", ' '
3706                                 tok_state = tok_state_before_attribute_name
3707                         when '&'
3708                                 tok_cur_tag.attrs_a[0][1] += parse_character_reference '>', true
3709                         when '>'
3710                                 tok_state = tok_state_data
3711                                 tmp = tok_cur_tag
3712                                 tok_cur_tag = null
3713                                 return tmp
3714                         when "\u0000"
3715                                 tok_cur_tag.attrs_a[0][1] += "\ufffd"
3716                         when '' # EOF
3717                                 parse_error()
3718                                 tok_state = tok_state_data
3719                         else
3720                                 # Parse Error if ', <, = or ` (backtick)
3721                                 tok_cur_tag.attrs_a[0][1] += c
3722                 return null
3723
3724         # 8.2.4.42 http://www.w3.org/TR/html5/syntax.html#after-attribute-value-(quoted)-state
3725         tok_state_after_attribute_value_quoted = ->
3726                 switch c = txt.charAt(cur++)
3727                         when "\t", "\n", "\u000c", ' '
3728                                 tok_state = tok_state_before_attribute_name
3729                         when '/'
3730                                 tok_state = tok_state_self_closing_start_tag
3731                         when '>'
3732                                 tok_state = tok_state_data
3733                                 tmp = tok_cur_tag
3734                                 tok_cur_tag = null
3735                                 return tmp
3736                         when '' # EOF
3737                                 parse_error()
3738                                 tok_state = tok_state_data
3739                         else
3740                                 # Parse Error
3741                                 tok_state = tok_state_before_attribute_name
3742                                 cur -= 1 # we didn't handle that char
3743                 return null
3744
3745         # 8.2.4.43 http://www.w3.org/TR/html5/syntax.html#self-closing-start-tag-state
3746         tok_state_self_closing_start_tag = ->
3747                 c = txt.charAt(cur++)
3748                 if c is '>'
3749                         tok_cur_tag.flag 'self-closing'
3750                         tok_state = tok_state_data
3751                         return tok_cur_tag
3752                 if c is ''
3753                         parse_error()
3754                         tok_state = tok_state_data
3755                         cur -= 1 # Reconsume
3756                         return
3757                 # Anything else
3758                 parse_error()
3759                 tok_state = tok_state_before_attribute_name
3760                 cur -= 1 # Reconsume
3761                 return
3762
3763         # 8.2.4.44 http://www.w3.org/TR/html5/syntax.html#bogus-comment-state
3764         # WARNING: put a comment token in tok_cur_tag before setting this state
3765         tok_state_bogus_comment = ->
3766                 next_gt = txt.indexOf '>', cur
3767                 if next_gt is -1
3768                         val = txt.substr cur
3769                         cur = txt.length
3770                 else
3771                         val = txt.substr cur, (next_gt - cur)
3772                         cur = next_gt + 1
3773                 val = val.replace(new RegExp("\u0000", 'g'), "\ufffd")
3774                 tok_cur_tag.text += val
3775                 tok_state = tok_state_data
3776                 return tok_cur_tag
3777
3778         # 8.2.4.45 http://www.w3.org/TR/html5/syntax.html#markup-declaration-open-state
3779         tok_state_markup_declaration_open = ->
3780                 if txt.substr(cur, 2) is '--'
3781                         cur += 2
3782                         tok_cur_tag = new_comment_token ''
3783                         tok_state = tok_state_comment_start
3784                         return
3785                 if txt.substr(cur, 7).toLowerCase() is 'doctype'
3786                         cur += 7
3787                         tok_state = tok_state_doctype
3788                         return
3789                 acn = adjusted_current_node()
3790                 if acn and acn.namespace isnt NS_HTML and txt.substr(cur, 7) is '[CDATA['
3791                         cur += 7
3792                         tok_state = tok_state_cdata_section
3793                         return
3794                 # Otherwise
3795                 parse_error()
3796                 tok_cur_tag = new_comment_token ''
3797                 tok_state = tok_state_bogus_comment
3798                 return
3799
3800         # 8.2.4.46 http://www.w3.org/TR/html5/syntax.html#comment-start-state
3801         tok_state_comment_start = ->
3802                 switch c = txt.charAt(cur++)
3803                         when '-'
3804                                 tok_state = tok_state_comment_start_dash
3805                         when "\u0000"
3806                                 parse_error()
3807                                 tok_state = tok_state_comment
3808                                 return new_character_token "\ufffd"
3809                         when '>'
3810                                 parse_error()
3811                                 tok_state = tok_state_data
3812                                 return tok_cur_tag
3813                         when '' # EOF
3814                                 parse_error()
3815                                 tok_state = tok_state_data
3816                                 cur -= 1 # Reconsume
3817                                 return tok_cur_tag
3818                         else
3819                                 tok_cur_tag.text += c
3820                                 tok_state = tok_state_comment
3821                 return null
3822
3823         # 8.2.4.47 http://www.w3.org/TR/html5/syntax.html#comment-start-dash-state
3824         tok_state_comment_start_dash = ->
3825                 switch c = txt.charAt(cur++)
3826                         when '-'
3827                                 tok_state = tok_state_comment_end
3828                         when "\u0000"
3829                                 parse_error()
3830                                 tok_cur_tag.text += "-\ufffd"
3831                                 tok_state = tok_state_comment
3832                         when '>'
3833                                 parse_error()
3834                                 tok_state = tok_state_data
3835                                 return tok_cur_tag
3836                         when '' # EOF
3837                                 parse_error()
3838                                 tok_state = tok_state_data
3839                                 cur -= 1 # Reconsume
3840                                 return tok_cur_tag
3841                         else
3842                                 tok_cur_tag.text += "-#{c}"
3843                                 tok_state = tok_state_comment
3844                 return null
3845
3846         # 8.2.4.48 http://www.w3.org/TR/html5/syntax.html#comment-state
3847         tok_state_comment = ->
3848                 switch c = txt.charAt(cur++)
3849                         when '-'
3850                                 tok_state = tok_state_comment_end_dash
3851                         when "\u0000"
3852                                 parse_error()
3853                                 tok_cur_tag.text += "\ufffd"
3854                         when '' # EOF
3855                                 parse_error()
3856                                 tok_state = tok_state_data
3857                                 cur -= 1 # Reconsume
3858                                 return tok_cur_tag
3859                         else
3860                                 tok_cur_tag.text += c
3861                 return null
3862
3863         # 8.2.4.49 http://www.w3.org/TR/html5/syntax.html#comment-end-dash-state
3864         tok_state_comment_end_dash = ->
3865                 switch c = txt.charAt(cur++)
3866                         when '-'
3867                                 tok_state = tok_state_comment_end
3868                         when "\u0000"
3869                                 parse_error()
3870                                 tok_cur_tag.text += "-\ufffd"
3871                                 tok_state = tok_state_comment
3872                         when '' # EOF
3873                                 parse_error()
3874                                 tok_state = tok_state_data
3875                                 cur -= 1 # Reconsume
3876                                 return tok_cur_tag
3877                         else
3878                                 tok_cur_tag.text += "-#{c}"
3879                                 tok_state = tok_state_comment
3880                 return null
3881
3882         # 8.2.4.50 http://www.w3.org/TR/html5/syntax.html#comment-end-state
3883         tok_state_comment_end = ->
3884                 switch c = txt.charAt(cur++)
3885                         when '>'
3886                                 tok_state = tok_state_data
3887                                 return tok_cur_tag
3888                         when "\u0000"
3889                                 parse_error()
3890                                 tok_cur_tag.text += "--\ufffd"
3891                                 tok_state = tok_state_comment
3892                         when '!'
3893                                 parse_error()
3894                                 tok_state = tok_state_comment_end_bang
3895                         when '-'
3896                                 parse_error()
3897                                 tok_cur_tag.text += '-'
3898                         when '' # EOF
3899                                 parse_error()
3900                                 tok_state = tok_state_data
3901                                 cur -= 1 # Reconsume
3902                                 return tok_cur_tag
3903                         else
3904                                 parse_error()
3905                                 tok_cur_tag.text += "--#{c}"
3906                                 tok_state = tok_state_comment
3907                 return null
3908
3909         # 8.2.4.51 http://www.w3.org/TR/html5/syntax.html#comment-end-bang-state
3910         tok_state_comment_end_bang = ->
3911                 switch c = txt.charAt(cur++)
3912                         when '-'
3913                                 tok_cur_tag.text += "--!#{c}"
3914                                 tok_state = tok_state_comment_end_dash
3915                         when '>'
3916                                 tok_state = tok_state_data
3917                                 return tok_cur_tag
3918                         when "\u0000"
3919                                 parse_error()
3920                                 tok_cur_tag.text += "--!\ufffd"
3921                                 tok_state = tok_state_comment
3922                         when '' # EOF
3923                                 parse_error()
3924                                 tok_state = tok_state_data
3925                                 cur -= 1 # Reconsume
3926                                 return tok_cur_tag
3927                         else
3928                                 tok_cur_tag.text += "--!#{c}"
3929                                 tok_state = tok_state_comment
3930                 return null
3931
3932         # 8.2.4.52 http://www.w3.org/TR/html5/syntax.html#doctype-state
3933         tok_state_doctype = ->
3934                 switch c = txt.charAt(cur++)
3935                         when "\t", "\u000a", "\u000c", ' '
3936                                 tok_state = tok_state_before_doctype_name
3937                         when '' # EOF
3938                                 parse_error()
3939                                 tok_state = tok_state_data
3940                                 el = new_doctype_token ''
3941                                 el.flag 'force-quirks', true
3942                                 cur -= 1 # Reconsume
3943                                 return el
3944                         else
3945                                 parse_error()
3946                                 tok_state = tok_state_before_doctype_name
3947                                 cur -= 1 # Reconsume
3948                 return null
3949
3950         # 8.2.4.52 http://www.w3.org/TR/html5/syntax.html#doctype-state
3951         tok_state_before_doctype_name = ->
3952                 c = txt.charAt(cur++)
3953                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
3954                         return
3955                 if is_uc_alpha(c)
3956                         tok_cur_tag = new_doctype_token c.toLowerCase()
3957                         tok_state = tok_state_doctype_name
3958                         return
3959                 if c is "\u0000"
3960                         parse_error()
3961                         tok_cur_tag = new_doctype_token "\ufffd"
3962                         tok_state = tok_state_doctype_name
3963                         return
3964                 if c is '>'
3965                         parse_error()
3966                         el = new_doctype_token ''
3967                         el.flag 'force-quirks', true
3968                         tok_state = tok_state_data
3969                         return el
3970                 if c is '' # EOF
3971                         parse_error()
3972                         tok_state = tok_state_data
3973                         el = new_doctype_token ''
3974                         el.flag 'force-quirks', true
3975                         cur -= 1 # Reconsume
3976                         return el
3977                 # Anything else
3978                 tok_cur_tag = new_doctype_token c
3979                 tok_state = tok_state_doctype_name
3980                 return null
3981
3982         # 8.2.4.54 http://www.w3.org/TR/html5/syntax.html#doctype-name-state
3983         tok_state_doctype_name = ->
3984                 c = txt.charAt(cur++)
3985                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
3986                         tok_state = tok_state_after_doctype_name
3987                         return
3988                 if c is '>'
3989                         tok_state = tok_state_data
3990                         return tok_cur_tag
3991                 if is_uc_alpha(c)
3992                         tok_cur_tag.name += c.toLowerCase()
3993                         return
3994                 if c is "\u0000"
3995                         parse_error()
3996                         tok_cur_tag.name += "\ufffd"
3997                         return
3998                 if c is '' # EOF
3999                         parse_error()
4000                         tok_state = tok_state_data
4001                         tok_cur_tag.flag 'force-quirks', true
4002                         cur -= 1 # Reconsume
4003                         return tok_cur_tag
4004                 # Anything else
4005                 tok_cur_tag.name += c
4006                 return null
4007
4008         # 8.2.4.55 http://www.w3.org/TR/html5/syntax.html#after-doctype-name-state
4009         tok_state_after_doctype_name = ->
4010                 c = txt.charAt(cur++)
4011                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4012                         return
4013                 if c is '>'
4014                         tok_state = tok_state_data
4015                         return tok_cur_tag
4016                 if c is '' # EOF
4017                         parse_error()
4018                         tok_state = tok_state_data
4019                         tok_cur_tag.flag 'force-quirks', true
4020                         cur -= 1 # Reconsume
4021                         return tok_cur_tag
4022                 # Anything else
4023                 if txt.substr(cur - 1, 6).toLowerCase() is 'public'
4024                         cur += 5
4025                         tok_state = tok_state_after_doctype_public_keyword
4026                         return
4027                 if txt.substr(cur - 1, 6).toLowerCase() is 'system'
4028                         cur += 5
4029                         tok_state = tok_state_after_doctype_system_keyword
4030                         return
4031                 parse_error()
4032                 tok_cur_tag.flag 'force-quirks', true
4033                 tok_state = tok_state_bogus_doctype
4034                 return null
4035
4036         # 8.2.4.56 http://www.w3.org/TR/html5/syntax.html#after-doctype-public-keyword-state
4037         tok_state_after_doctype_public_keyword = ->
4038                 c = txt.charAt(cur++)
4039                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4040                         tok_state = tok_state_before_doctype_public_identifier
4041                         return
4042                 if c is '"'
4043                         parse_error()
4044                         tok_cur_tag.public_identifier = ''
4045                         tok_state = tok_state_doctype_public_identifier_double_quoted
4046                         return
4047                 if c is "'"
4048                         parse_error()
4049                         tok_cur_tag.public_identifier = ''
4050                         tok_state = tok_state_doctype_public_identifier_single_quoted
4051                         return
4052                 if c is '>'
4053                         parse_error()
4054                         tok_cur_tag.flag 'force-quirks', true
4055                         tok_state = tok_state_data
4056                         return tok_cur_tag
4057                 if c is '' # EOF
4058                         parse_error()
4059                         tok_state = tok_state_data
4060                         tok_cur_tag.flag 'force-quirks', true
4061                         cur -= 1 # Reconsume
4062                         return tok_cur_tag
4063                 # Anything else
4064                 parse_error()
4065                 tok_cur_tag.flag 'force-quirks', true
4066                 tok_state = tok_state_bogus_doctype
4067                 return null
4068
4069         # 8.2.4.57 http://www.w3.org/TR/html5/syntax.html#before-doctype-public-identifier-state
4070         tok_state_before_doctype_public_identifier = ->
4071                 c = txt.charAt(cur++)
4072                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4073                         return
4074                 if c is '"'
4075                         parse_error()
4076                         tok_cur_tag.public_identifier = ''
4077                         tok_state = tok_state_doctype_public_identifier_double_quoted
4078                         return
4079                 if c is "'"
4080                         parse_error()
4081                         tok_cur_tag.public_identifier = ''
4082                         tok_state = tok_state_doctype_public_identifier_single_quoted
4083                         return
4084                 if c is '>'
4085                         parse_error()
4086                         tok_cur_tag.flag 'force-quirks', true
4087                         tok_state = tok_state_data
4088                         return tok_cur_tag
4089                 if c is '' # EOF
4090                         parse_error()
4091                         tok_state = tok_state_data
4092                         tok_cur_tag.flag 'force-quirks', true
4093                         cur -= 1 # Reconsume
4094                         return tok_cur_tag
4095                 # Anything else
4096                 parse_error()
4097                 tok_cur_tag.flag 'force-quirks', true
4098                 tok_state = tok_state_bogus_doctype
4099                 return null
4100
4101
4102         # 8.2.4.58 http://www.w3.org/TR/html5/syntax.html#doctype-public-identifier-(double-quoted)-state
4103         tok_state_doctype_public_identifier_double_quoted = ->
4104                 c = txt.charAt(cur++)
4105                 if c is '"'
4106                         tok_state = tok_state_after_doctype_public_identifier
4107                         return
4108                 if c is "\u0000"
4109                         parse_error()
4110                         tok_cur_tag.public_identifier += "\ufffd"
4111                         return
4112                 if c is '>'
4113                         parse_error()
4114                         tok_cur_tag.flag 'force-quirks', true
4115                         tok_state = tok_state_data
4116                         return tok_cur_tag
4117                 if c is '' # EOF
4118                         parse_error()
4119                         tok_state = tok_state_data
4120                         tok_cur_tag.flag 'force-quirks', true
4121                         cur -= 1 # Reconsume
4122                         return tok_cur_tag
4123                 # Anything else
4124                 tok_cur_tag.public_identifier += c
4125                 return null
4126
4127         # 8.2.4.59 http://www.w3.org/TR/html5/syntax.html#doctype-public-identifier-(single-quoted)-state
4128         tok_state_doctype_public_identifier_single_quoted = ->
4129                 c = txt.charAt(cur++)
4130                 if c is "'"
4131                         tok_state = tok_state_after_doctype_public_identifier
4132                         return
4133                 if c is "\u0000"
4134                         parse_error()
4135                         tok_cur_tag.public_identifier += "\ufffd"
4136                         return
4137                 if c is '>'
4138                         parse_error()
4139                         tok_cur_tag.flag 'force-quirks', true
4140                         tok_state = tok_state_data
4141                         return tok_cur_tag
4142                 if c is '' # EOF
4143                         parse_error()
4144                         tok_state = tok_state_data
4145                         tok_cur_tag.flag 'force-quirks', true
4146                         cur -= 1 # Reconsume
4147                         return tok_cur_tag
4148                 # Anything else
4149                 tok_cur_tag.public_identifier += c
4150                 return null
4151
4152         # 8.2.4.60 http://www.w3.org/TR/html5/syntax.html#after-doctype-public-identifier-state
4153         tok_state_after_doctype_public_identifier = ->
4154                 c = txt.charAt(cur++)
4155                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4156                         tok_state = tok_state_between_doctype_public_and_system_identifiers
4157                         return
4158                 if c is '>'
4159                         tok_state = tok_state_data
4160                         return tok_cur_tag
4161                 if c is '"'
4162                         parse_error()
4163                         tok_cur_tag.system_identifier = ''
4164                         tok_state = tok_state_doctype_system_identifier_double_quoted
4165                         return
4166                 if c is "'"
4167                         parse_error()
4168                         tok_cur_tag.system_identifier = ''
4169                         tok_state = tok_state_doctype_system_identifier_single_quoted
4170                         return
4171                 if c is '' # EOF
4172                         parse_error()
4173                         tok_state = tok_state_data
4174                         tok_cur_tag.flag 'force-quirks', true
4175                         cur -= 1 # Reconsume
4176                         return tok_cur_tag
4177                 # Anything else
4178                 parse_error()
4179                 tok_cur_tag.flag 'force-quirks', true
4180                 tok_state = tok_state_bogus_doctype
4181                 return null
4182
4183         # 8.2.4.61 http://www.w3.org/TR/html5/syntax.html#between-doctype-public-and-system-identifiers-state
4184         tok_state_between_doctype_public_and_system_identifiers = ->
4185                 c = txt.charAt(cur++)
4186                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4187                         return
4188                 if c is '>'
4189                         tok_state = tok_state_data
4190                         return tok_cur_tag
4191                 if c is '"'
4192                         parse_error()
4193                         tok_cur_tag.system_identifier = ''
4194                         tok_state = tok_state_doctype_system_identifier_double_quoted
4195                         return
4196                 if c is "'"
4197                         parse_error()
4198                         tok_cur_tag.system_identifier = ''
4199                         tok_state = tok_state_doctype_system_identifier_single_quoted
4200                         return
4201                 if c is '' # EOF
4202                         parse_error()
4203                         tok_state = tok_state_data
4204                         tok_cur_tag.flag 'force-quirks', true
4205                         cur -= 1 # Reconsume
4206                         return tok_cur_tag
4207                 # Anything else
4208                 parse_error()
4209                 tok_cur_tag.flag 'force-quirks', true
4210                 tok_state = tok_state_bogus_doctype
4211                 return null
4212
4213         # 8.2.4.62 http://www.w3.org/TR/html5/syntax.html#after-doctype-system-keyword-state
4214         tok_state_after_doctype_system_keyword = ->
4215                 c = txt.charAt(cur++)
4216                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4217                         tok_state = tok_state_before_doctype_system_identifier
4218                         return
4219                 if c is '"'
4220                         parse_error()
4221                         tok_cur_tag.system_identifier = ''
4222                         tok_state = tok_state_doctype_system_identifier_double_quoted
4223                         return
4224                 if c is "'"
4225                         parse_error()
4226                         tok_cur_tag.system_identifier = ''
4227                         tok_state = tok_state_doctype_system_identifier_single_quoted
4228                         return
4229                 if c is '>'
4230                         parse_error()
4231                         tok_cur_tag.flag 'force-quirks', true
4232                         tok_state = tok_state_data
4233                         return tok_cur_tag
4234                 if c is '' # EOF
4235                         parse_error()
4236                         tok_state = tok_state_data
4237                         tok_cur_tag.flag 'force-quirks', true
4238                         cur -= 1 # Reconsume
4239                         return tok_cur_tag
4240                 # Anything else
4241                 parse_error()
4242                 tok_cur_tag.flag 'force-quirks', true
4243                 tok_state = tok_state_bogus_doctype
4244                 return null
4245
4246         # 8.2.4.63 http://www.w3.org/TR/html5/syntax.html#before-doctype-system-identifier-state
4247         tok_state_before_doctype_system_identifier = ->
4248                 c = txt.charAt(cur++)
4249                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4250                         return
4251                 if c is '"'
4252                         tok_cur_tag.system_identifier = ''
4253                         tok_state = tok_state_doctype_system_identifier_double_quoted
4254                         return
4255                 if c is "'"
4256                         tok_cur_tag.system_identifier = ''
4257                         tok_state = tok_state_doctype_system_identifier_single_quoted
4258                         return
4259                 if c is '>'
4260                         parse_error()
4261                         tok_cur_tag.flag 'force-quirks', true
4262                         tok_state = tok_state_data
4263                         return tok_cur_tag
4264                 if c is '' # EOF
4265                         parse_error()
4266                         tok_state = tok_state_data
4267                         tok_cur_tag.flag 'force-quirks', true
4268                         cur -= 1 # Reconsume
4269                         return tok_cur_tag
4270                 # Anything else
4271                 parse_error()
4272                 tok_cur_tag.flag 'force-quirks', true
4273                 tok_state = tok_state_bogus_doctype
4274                 return null
4275
4276         # 8.2.4.64 http://www.w3.org/TR/html5/syntax.html#doctype-system-identifier-(double-quoted)-state
4277         tok_state_doctype_system_identifier_double_quoted = ->
4278                 c = txt.charAt(cur++)
4279                 if c is '"'
4280                         tok_state = tok_state_after_doctype_system_identifier
4281                         return
4282                 if c is "\u0000"
4283                         parse_error()
4284                         tok_cur_tag.system_identifier += "\ufffd"
4285                         return
4286                 if c is '>'
4287                         parse_error()
4288                         tok_cur_tag.flag 'force-quirks', true
4289                         tok_state = tok_state_data
4290                         return tok_cur_tag
4291                 if c is '' # EOF
4292                         parse_error()
4293                         tok_state = tok_state_data
4294                         tok_cur_tag.flag 'force-quirks', true
4295                         cur -= 1 # Reconsume
4296                         return tok_cur_tag
4297                 # Anything else
4298                 tok_cur_tag.system_identifier += c
4299                 return null
4300
4301         # 8.2.4.65 http://www.w3.org/TR/html5/syntax.html#doctype-system-identifier-(single-quoted)-state
4302         tok_state_doctype_system_identifier_single_quoted = ->
4303                 c = txt.charAt(cur++)
4304                 if c is "'"
4305                         tok_state = tok_state_after_doctype_system_identifier
4306                         return
4307                 if c is "\u0000"
4308                         parse_error()
4309                         tok_cur_tag.system_identifier += "\ufffd"
4310                         return
4311                 if c is '>'
4312                         parse_error()
4313                         tok_cur_tag.flag 'force-quirks', true
4314                         tok_state = tok_state_data
4315                         return tok_cur_tag
4316                 if c is '' # EOF
4317                         parse_error()
4318                         tok_state = tok_state_data
4319                         tok_cur_tag.flag 'force-quirks', true
4320                         cur -= 1 # Reconsume
4321                         return tok_cur_tag
4322                 # Anything else
4323                 tok_cur_tag.system_identifier += c
4324                 return null
4325
4326         # 8.2.4.66 http://www.w3.org/TR/html5/syntax.html#after-doctype-system-identifier-state
4327         tok_state_after_doctype_system_identifier = ->
4328                 c = txt.charAt(cur++)
4329                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4330                         return
4331                 if c is '>'
4332                         tok_state = tok_state_data
4333                         return tok_cur_tag
4334                 if c is '' # EOF
4335                         parse_error()
4336                         tok_state = tok_state_data
4337                         tok_cur_tag.flag 'force-quirks', true
4338                         cur -= 1 # Reconsume
4339                         return tok_cur_tag
4340                 # Anything else
4341                 parse_error()
4342                 # do _not_ tok_cur_tag.flag 'force-quirks', true
4343                 tok_state = tok_state_bogus_doctype
4344                 return null
4345
4346         # 8.2.4.67 http://www.w3.org/TR/html5/syntax.html#bogus-doctype-state
4347         tok_state_bogus_doctype = ->
4348                 c = txt.charAt(cur++)
4349                 if c is '>'
4350                         tok_state = tok_state_data
4351                         return tok_cur_tag
4352                 if c is '' # EOF
4353                         tok_state = tok_state_data
4354                         cur -= 1 # Reconsume
4355                         return tok_cur_tag
4356                 # Anything else
4357                 return null
4358
4359         # 8.2.4.68 http://www.w3.org/TR/html5/syntax.html#cdata-section-state
4360         tok_state_cdata_section = ->
4361                 tok_state = tok_state_data
4362                 next_gt = txt.indexOf ']]>', cur
4363                 if next_gt is -1
4364                         val = txt.substr cur
4365                         cur = txt.length
4366                 else
4367                         val = txt.substr cur, (next_gt - cur)
4368                         cur = next_gt + 3
4369                 return new_character_token val # fixfull split
4370
4371         # 8.2.4.69 http://www.w3.org/TR/html5/syntax.html#consume-a-character-reference
4372         # Don't set this as a state, just call it
4373         # returns a string (NOT a text node)
4374         parse_character_reference = (allowed_char = null, in_attr = false) ->
4375                 if cur >= txt.length
4376                         return '&'
4377                 switch c = txt.charAt(cur)
4378                         when "\t", "\n", "\u000c", ' ', '<', '&', '', allowed_char
4379                                 # explicitly not a parse error
4380                                 return '&'
4381                         when ';'
4382                                 # there has to be "one or more" alnums between & and ; to be a parse error
4383                                 return '&'
4384                         when '#'
4385                                 if cur + 1 >= txt.length
4386                                         return '&'
4387                                 if txt.charAt(cur + 1).toLowerCase() is 'x'
4388                                         base = 16
4389                                         charset = hex_chars
4390                                         start = cur + 2
4391                                 else
4392                                         charset = digits
4393                                         start = cur + 1
4394                                         base = 10
4395                                 i = 0
4396                                 while start + i < txt.length and charset.indexOf(txt.charAt(start + i)) > -1
4397                                         i += 1
4398                                 if i is 0
4399                                         return '&'
4400                                 cur = start + i
4401                                 if txt.charAt(start + i) is ';'
4402                                         cur += 1
4403                                 else
4404                                         parse_error()
4405                                 code_point = txt.substr(start, i)
4406                                 while code_point.charAt(0) is '0' and code_point.length > 1
4407                                         code_point = code_point.substr 1
4408                                 code_point = parseInt(code_point, base)
4409                                 if unicode_fixes[code_point]?
4410                                         parse_error()
4411                                         return unicode_fixes[code_point]
4412                                 else
4413                                         if (code_point >= 0xd800 and code_point <= 0xdfff) or code_point > 0x10ffff
4414                                                 parse_error()
4415                                                 return "\ufffd"
4416                                         else
4417                                                 if (code_point >= 0x0001 and code_point <= 0x0008) or (code_point >= 0x000D and code_point <= 0x001F) or (code_point >= 0x007F and code_point <= 0x009F) or (code_point >= 0xFDD0 and code_point <= 0xFDEF) or code_point is 0x000B or code_point is 0xFFFE or code_point is 0xFFFF or code_point is 0x1FFFE or code_point is 0x1FFFF or code_point is 0x2FFFE or code_point is 0x2FFFF or code_point is 0x3FFFE or code_point is 0x3FFFF or code_point is 0x4FFFE or code_point is 0x4FFFF or code_point is 0x5FFFE or code_point is 0x5FFFF or code_point is 0x6FFFE or code_point is 0x6FFFF or code_point is 0x7FFFE or code_point is 0x7FFFF or code_point is 0x8FFFE or code_point is 0x8FFFF or code_point is 0x9FFFE or code_point is 0x9FFFF or code_point is 0xAFFFE or code_point is 0xAFFFF or code_point is 0xBFFFE or code_point is 0xBFFFF or code_point is 0xCFFFE or code_point is 0xCFFFF or code_point is 0xDFFFE or code_point is 0xDFFFF or code_point is 0xEFFFE or code_point is 0xEFFFF or code_point is 0xFFFFE or code_point is 0xFFFFF or code_point is 0x10FFFE or code_point is 0x10FFFF
4418                                                         parse_error()
4419                                                 return from_code_point code_point
4420                                 return
4421                         else
4422                                 for i in [0...31]
4423                                         if alnum.indexOf(txt.charAt(cur + i)) is -1
4424                                                 break
4425                                 if i is 0
4426                                         # exit early, because parse_error() below needs at least one alnum
4427                                         return '&'
4428                                 if txt.charAt(cur + i) is ';'
4429                                         i += 1 # include ';' terminator in value
4430                                         decoded = decode_named_char_ref txt.substr(cur, i)
4431                                         if decoded?
4432                                                 cur += i
4433                                                 return decoded
4434                                         parse_error()
4435                                         return '&'
4436                                 else
4437                                         # no ';' terminator (only legacy char refs)
4438                                         max = i
4439                                         for i in [2..max] # no prefix matches, so ok to check shortest first
4440                                                 c = legacy_char_refs[txt.substr(cur, i)]
4441                                                 if c?
4442                                                         if in_attr
4443                                                                 if txt.charAt(cur + i) is '='
4444                                                                         # "because some legacy user agents will
4445                                                                         # misinterpret the markup in those cases"
4446                                                                         parse_error()
4447                                                                         return '&'
4448                                                                 if alnum.indexOf(txt.charAt(cur + i)) > -1
4449                                                                         # this makes attributes forgiving about url args
4450                                                                         return '&'
4451                                                         # ok, and besides the weird exceptions for attributes...
4452                                                         # return the matching char
4453                                                         cur += i # consume entity chars
4454                                                         parse_error() # because no terminating ";"
4455                                                         return c
4456                                         parse_error()
4457                                         return '&'
4458                 return # never reached
4459
4460         # tree constructor initialization
4461         # see comments on TYPE_TAG/etc for the structure of this data
4462         txt = args.html
4463         cur = 0
4464         doc = new Node TYPE_TAG, name: 'html', namespace: NS_HTML
4465         open_els = []
4466         afe = [] # active formatting elements
4467         template_ins_modes = []
4468         ins_mode = ins_mode_initial
4469         original_ins_mode = ins_mode # TODO check spec
4470         flag_scripting = args.scripting ? true # TODO might need an extra flag to get <noscript> to parse correctly
4471         flag_frameset_ok = true
4472         flag_parsing = true
4473         flag_foster_parenting = false
4474         form_element_pointer = null
4475         temporary_buffer = null
4476         pending_table_character_tokens = []
4477         head_element_pointer = null
4478         flag_fragment_parsing = false # parser originally created as part of the html fragment parsing algorithm (fragment case)
4479         context_element = null # FIXME initialize from args.fragment http://www.w3.org/TR/html5/syntax.html#parsing-html-fragments
4480
4481         # tokenizer initialization
4482         tok_state = tok_state_data
4483
4484         # text pre-processing
4485         # FIXME http://www.w3.org/TR/html5/syntax.html#preprocessing-the-input-stream
4486         txt = txt.replace(new RegExp("\u0000", 'g'), "\ufffd") # fixfull spec doesn't say this
4487         txt = txt.replace(new RegExp("\r\n", 'g'), "\n") # fixfull spec doesn't say this
4488         txt = txt.replace(new RegExp("\r", 'g'), "\n") # fixfull spec doesn't say this
4489
4490         if args.name is "plain-text-unsafe.dat #4"
4491                 console.log "hi"
4492         # proccess input
4493         # http://www.w3.org/TR/html5/syntax.html#tree-construction
4494         while flag_parsing
4495                 t = tok_state()
4496                 if t?
4497                         process_token t
4498                         # fixfull parse error if has self-closing flag, but it wasn't acknolwedged
4499         return doc.children
4500
4501 serialize_els = (els, shallow, show_ids) ->
4502         serialized = ''
4503         sep = ''
4504         for t in els
4505                 serialized += sep
4506                 sep = ','
4507                 serialized += t.serialize shallow, show_ids
4508         return serialized
4509
4510 module.exports.parse_html = parse_html
4511 module.exports.debug_log_reset = debug_log_reset
4512 module.exports.debug_log_each = debug_log_each
4513 module.exports.TYPE_TAG = TYPE_TAG
4514 module.exports.TYPE_TEXT = TYPE_TEXT
4515 module.exports.TYPE_COMMENT = TYPE_COMMENT
4516 module.exports.TYPE_DOCTYPE = TYPE_DOCTYPE
4517 module.exports.NS_HTML = NS_HTML
4518 module.exports.NS_MATHML = NS_MATHML
4519 module.exports.NS_SVG = NS_SVG