parser.coffee

   1 # Copyright 2015 Jason Woofenden
   2 # This file implements an HTML5 parser
   3 #
   4 # This program is free software: you can redistribute it and/or modify it under
   5 # the terms of the GNU Affero General Public License as published by the Free
   6 # Software Foundation, either version 3 of the License, or (at your option) any
   7 # later version.
   8 #
   9 # This program is distributed in the hope that it will be useful, but WITHOUT
  10 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  11 # FOR A PARTICULAR PURPOSE.  See the GNU Affero General Public License for more
  12 # details.
  13 #
  14 # You should have received a copy of the GNU Affero General Public License
  15 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
  16
  17
  18 # This file implements a thorough parser for html5, meant to be used by a
  19 # WYSIWYG editor.
  20
  21 # The implementation is a pretty direct implementation of the parsing algorithm
  22 # described here:
  23 #
  24 #     http://www.w3.org/TR/html5/syntax.html
  25 #
  26 # except for some places marked "WHATWG" that are implemented as described here:
  27 #
  28 #     https://html.spec.whatwg.org/multipage/syntax.html
  29 #
  30 # This code passes all of the tests in the .dat files at:
  31 #
  32 #     https://github.com/JasonWoof/html5lib-tests/tree/patch-1/tree-construction
  33
  34
  35 ##################################
  36 ## how to use this code
  37 ##################################
  38 #
  39 # See README.md for how to run this file in the browser or in node.js.
  40 #
  41 # This file exports a single useful function: parse_tml, and some constants
  42 # (see the bottom of this file for those.)
  43 #
  44 # Call it like this:
  45 #
  46 #     wheic_parser.parse("<p><b>hi</p>")
  47 #
  48 # Or, if you don't want <html><head><body>/etc, do this:
  49 #
  50 #     wheic_parser.parse("<p><b>hi</p>", {fragment: "body"})
  51 #
  52 # return value is an array of Nodes, see "class Node" below.
  53
  54 # This code is a work in progress, eg try search this file for "fixfull",
  55 # "TODO" and "FIXME"
  56
  57
  58 # Notes:  stacks/lists
  59 #
  60 # Jason was frequently confused by the terminology used to refer to different
  61 # parts of the stacks and lists in the spec, so he made this chart to help keep
  62 # his head straight:
  63 #
  64 # stacks grow downward (current element is index=0)
  65 #
  66 # example: open_els = [a, b, c, d, e, f, g]
  67 #
  68 # "grows downwards" means it's visualized like this: (index: el "names")
  69 #
  70 #   6: g "start of the list", "topmost", "first"
  71 #   5: f
  72 #   4: e "previous" (to d), "above", "before"
  73 #   3: d   (previous/next are relative to this element)
  74 #   2: c "next", "after", "lower", "below"
  75 #   1: b
  76 #   0: a "end of the list", "current node", "bottommost", "last"
  77
  78 if (typeof module) isnt 'undefined' and module.exports?
  79         context = 'module'
  80         exports = module.exports
  81 else
  82         context = 'browser'
  83         window.wheic_parser = {}
  84         exports = window.wheic_parser
  85
  86 from_code_point = (x) ->
  87         if String.fromCodePoint?
  88                 return String.fromCodePoint x
  89         else
  90                 if x <= 0xffff
  91                         return String.fromCharCode x
  92                 x -= 0x10000
  93                 return String.fromCharCode((x >> 10) + 0xd800, (x % 0x400) + 0xdc00)
  94
  95 # Each node is an obect of the Node class. Here are the Node types:
  96 TYPE_TAG = 0 # name, {attributes}, [children]
  97 TYPE_TEXT = 1 # "text"
  98 TYPE_COMMENT = 2
  99 TYPE_DOCTYPE = 3
 100 # the following types are emited by the tokenizer, but shouldn't end up in the tree:
 101 TYPE_START_TAG = 4 # name, [attributes ([key,value]...) in reverse order], [children]
 102 TYPE_END_TAG = 5 # name
 103 TYPE_EOF = 6
 104 TYPE_AFE_MARKER = 7 # http://www.w3.org/TR/html5/syntax.html#reconstruct-the-active-formatting-elements
 105 TYPE_AAA_BOOKMARK = 8 # http://www.w3.org/TR/html5/syntax.html#adoption-agency-algorithm
 106
 107 # namespace constants
 108 NS_HTML = 1
 109 NS_MATHML = 2
 110 NS_SVG = 3
 111
 112 # quirks mode constants
 113 QUIRKS_NO = 1
 114 QUIRKS_LIMITED = 2
 115 QUIRKS_YES = 3
 116
 117 # queue up debug logs, so eg they can be shown only for tests that fail
 118 g_debug_log = []
 119 debug_log_reset = ->
 120         g_debug_log = []
 121         return
 122 debug_log = (str) ->
 123         g_debug_log.push str
 124         return
 125 debug_log_each = (cb) ->
 126         for str in g_debug_log
 127                 cb str
 128         return
 129
 130 prev_node_id = 0
 131 class Node
 132         constructor: (type, args = {}) ->
 133                 @type = type # one of the TYPE_* constants above
 134                 @name = args.name ? '' # tag name
 135                 @text = args.text ? '' # contents for text/comment nodes
 136                 @attrs = args.attrs ? {}
 137                 @attrs_a = args.attr_k ? [] # attrs in progress, TYPE_START_TAG only
 138                 @children = args.children ? []
 139                 @namespace = args.namespace ? NS_HTML
 140                 @parent = args.parent ? null
 141                 @token = args.token ? null
 142                 @flags = args.flags ? {}
 143                 if args.id?
 144                         @id = "#{args.id}+"
 145                 else
 146                         @id = "#{++prev_node_id}"
 147         acknowledge_self_closing: ->
 148                 if @token?
 149                         @token.flag 'did_self_close', true
 150                 else
 151                         @flag 'did_self_close', true
 152                 return
 153         flag: (key, value = null) ->
 154                 if value?
 155                         @flags[key] = value
 156                 else
 157                         return @flags[key]
 158                 return
 159
 160 # helpers: (only take args that are normally known when parser creates nodes)
 161 new_open_tag = (name) ->
 162         return new Node TYPE_START_TAG, name: name
 163 new_end_tag = (name) ->
 164         return new Node TYPE_END_TAG, name: name
 165 new_element = (name) ->
 166         return new Node TYPE_TAG, name: name
 167 new_text_node = (txt) ->
 168         return new Node TYPE_TEXT, text: txt
 169 new_character_token = new_text_node
 170 new_comment_token = (txt) ->
 171         return new Node TYPE_COMMENT, text: txt
 172 new_doctype_token = (name) ->
 173         return new Node TYPE_DOCTYPE, name: name
 174 new_eof_token = ->
 175         return new Node TYPE_EOF
 176 new_afe_marker = ->
 177         return new Node TYPE_AFE_MARKER
 178 new_aaa_bookmark = ->
 179         return new Node TYPE_AAA_BOOKMARK
 180
 181 lc_alpha = "abcdefghijklmnopqrstuvwxyz"
 182 uc_alpha = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
 183 digits = "0123456789"
 184 alnum = lc_alpha + uc_alpha + digits
 185 hex_chars = digits + "abcdefABCDEF"
 186
 187 is_uc_alpha = (str) ->
 188         return str.length is 1 and uc_alpha.indexOf(str) > -1
 189 is_lc_alpha = (str) ->
 190         return str.length is 1 and lc_alpha.indexOf(str) > -1
 191
 192 # some SVG elements have dashes in them
 193 tag_name_chars = alnum + "-"
 194
 195 # http://www.w3.org/TR/html5/infrastructure.html#space-character
 196 space_chars = "\u0009\u000a\u000c\u000d\u0020"
 197 is_space = (txt) ->
 198         return txt.length is 1 and space_chars.indexOf(txt) > -1
 199 is_space_tok = (t) ->
 200         return t.type is TYPE_TEXT && t.text.length is 1 and space_chars.indexOf(t.text) > -1
 201
 202 is_input_hidden_tok = (t) ->
 203         return false unless t.type is TYPE_START_TAG
 204         for a in t.attrs_a
 205                 if a[0] is 'type'
 206                         if a[1].toLowerCase() is 'hidden'
 207                                 return true
 208                         return false
 209         return false
 210
 211 # https://en.wikipedia.org/wiki/Whitespace_character#Unicode
 212 whitespace_chars = "\u0009\u000a\u000b\u000c\u000d\u0020\u0085\u00a0\u1680\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200a\u2028\u2029\u202f\u205f\u3000"
 213
 214 unicode_fixes = {}
 215 unicode_fixes[0x00] = "\uFFFD"
 216 unicode_fixes[0x80] = "\u20AC"
 217 unicode_fixes[0x82] = "\u201A"
 218 unicode_fixes[0x83] = "\u0192"
 219 unicode_fixes[0x84] = "\u201E"
 220 unicode_fixes[0x85] = "\u2026"
 221 unicode_fixes[0x86] = "\u2020"
 222 unicode_fixes[0x87] = "\u2021"
 223 unicode_fixes[0x88] = "\u02C6"
 224 unicode_fixes[0x89] = "\u2030"
 225 unicode_fixes[0x8A] = "\u0160"
 226 unicode_fixes[0x8B] = "\u2039"
 227 unicode_fixes[0x8C] = "\u0152"
 228 unicode_fixes[0x8E] = "\u017D"
 229 unicode_fixes[0x91] = "\u2018"
 230 unicode_fixes[0x92] = "\u2019"
 231 unicode_fixes[0x93] = "\u201C"
 232 unicode_fixes[0x94] = "\u201D"
 233 unicode_fixes[0x95] = "\u2022"
 234 unicode_fixes[0x96] = "\u2013"
 235 unicode_fixes[0x97] = "\u2014"
 236 unicode_fixes[0x98] = "\u02DC"
 237 unicode_fixes[0x99] = "\u2122"
 238 unicode_fixes[0x9A] = "\u0161"
 239 unicode_fixes[0x9B] = "\u203A"
 240 unicode_fixes[0x9C] = "\u0153"
 241 unicode_fixes[0x9E] = "\u017E"
 242 unicode_fixes[0x9F] = "\u0178"
 243
 244 quirks_yes_pi_prefixes = [
 245         "+//silmaril//dtd html pro v0r11 19970101//"
 246         "-//as//dtd html 3.0 aswedit + extensions//"
 247         "-//advasoft ltd//dtd html 3.0 aswedit + extensions//"
 248         "-//ietf//dtd html 2.0 level 1//"
 249         "-//ietf//dtd html 2.0 level 2//"
 250         "-//ietf//dtd html 2.0 strict level 1//"
 251         "-//ietf//dtd html 2.0 strict level 2//"
 252         "-//ietf//dtd html 2.0 strict//"
 253         "-//ietf//dtd html 2.0//"
 254         "-//ietf//dtd html 2.1e//"
 255         "-//ietf//dtd html 3.0//"
 256         "-//ietf//dtd html 3.2 final//"
 257         "-//ietf//dtd html 3.2//"
 258         "-//ietf//dtd html 3//"
 259         "-//ietf//dtd html level 0//"
 260         "-//ietf//dtd html level 1//"
 261         "-//ietf//dtd html level 2//"
 262         "-//ietf//dtd html level 3//"
 263         "-//ietf//dtd html strict level 0//"
 264         "-//ietf//dtd html strict level 1//"
 265         "-//ietf//dtd html strict level 2//"
 266         "-//ietf//dtd html strict level 3//"
 267         "-//ietf//dtd html strict//"
 268         "-//ietf//dtd html//"
 269         "-//metrius//dtd metrius presentational//"
 270         "-//microsoft//dtd internet explorer 2.0 html strict//"
 271         "-//microsoft//dtd internet explorer 2.0 html//"
 272         "-//microsoft//dtd internet explorer 2.0 tables//"
 273         "-//microsoft//dtd internet explorer 3.0 html strict//"
 274         "-//microsoft//dtd internet explorer 3.0 html//"
 275         "-//microsoft//dtd internet explorer 3.0 tables//"
 276         "-//netscape comm. corp.//dtd html//"
 277         "-//netscape comm. corp.//dtd strict html//"
 278         "-//o'reilly and associates//dtd html 2.0//"
 279         "-//o'reilly and associates//dtd html extended 1.0//"
 280         "-//o'reilly and associates//dtd html extended relaxed 1.0//"
 281         "-//sq//dtd html 2.0 hotmetal + extensions//"
 282         "-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//"
 283         "-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//"
 284         "-//spyglass//dtd html 2.0 extended//"
 285         "-//sun microsystems corp.//dtd hotjava html//"
 286         "-//sun microsystems corp.//dtd hotjava strict html//"
 287         "-//w3c//dtd html 3 1995-03-24//"
 288         "-//w3c//dtd html 3.2 draft//"
 289         "-//w3c//dtd html 3.2 final//"
 290         "-//w3c//dtd html 3.2//"
 291         "-//w3c//dtd html 3.2s draft//"
 292         "-//w3c//dtd html 4.0 frameset//"
 293         "-//w3c//dtd html 4.0 transitional//"
 294         "-//w3c//dtd html experimental 19960712//"
 295         "-//w3c//dtd html experimental 970421//"
 296         "-//w3c//dtd w3 html//"
 297         "-//w3o//dtd w3 html 3.0//"
 298         "-//webtechs//dtd mozilla html 2.0//"
 299         "-//webtechs//dtd mozilla html//"
 300 ]
 301
 302 # These are the character references that don't need a terminating semicolon
 303 # min length: 2, max: 6, none are a prefix of any other.
 304 legacy_char_refs = {
 305         Aacute: 'Á', aacute: 'á', Acirc: 'Â', acirc: 'â', acute: '´', AElig: 'Æ',
 306         aelig: 'æ', Agrave: 'À', agrave: 'à', AMP: '&', amp: '&', Aring: 'Å',
 307         aring: 'å', Atilde: 'Ã', atilde: 'ã', Auml: 'Ä', auml: 'ä', brvbar: '¦',
 308         Ccedil: 'Ç', ccedil: 'ç', cedil: '¸', cent: '¢', COPY: '©', copy: '©',
 309         curren: '¤', deg: '°', divide: '÷', Eacute: 'É', eacute: 'é', Ecirc: 'Ê',
 310         ecirc: 'ê', Egrave: 'È', egrave: 'è', ETH: 'Ð', eth: 'ð', Euml: 'Ë',
 311         euml: 'ë', frac12: '½', frac14: '¼', frac34: '¾', GT: '>', gt: '>',
 312         Iacute: 'Í', iacute: 'í', Icirc: 'Î', icirc: 'î', iexcl: '¡', Igrave: 'Ì',
 313         igrave: 'ì', iquest: '¿', Iuml: 'Ï', iuml: 'ï', laquo: '«', LT: '<',
 314         lt: '<', macr: '¯', micro: 'µ', middot: '·', nbsp: "\u00a0", not: '¬',
 315         Ntilde: 'Ñ', ntilde: 'ñ', Oacute: 'Ó', oacute: 'ó', Ocirc: 'Ô', ocirc: 'ô',
 316         Ograve: 'Ò', ograve: 'ò', ordf: 'ª', ordm: 'º', Oslash: 'Ø', oslash: 'ø',
 317         Otilde: 'Õ', otilde: 'õ', Ouml: 'Ö', ouml: 'ö', para: '¶', plusmn: '±',
 318         pound: '£', QUOT: '"', quot: '"', raquo: '»', REG: '®', reg: '®', sect: '§',
 319         shy: '', sup1: '¹', sup2: '²', sup3: '³', szlig: 'ß', THORN: 'Þ', thorn: 'þ',
 320         times: '×', Uacute: 'Ú', uacute: 'ú', Ucirc: 'Û', ucirc: 'û', Ugrave: 'Ù',
 321         ugrave: 'ù', uml: '¨', Uuml: 'Ü', uuml: 'ü', Yacute: 'Ý', yacute: 'ý',
 322         yen: '¥', yuml: 'ÿ'
 323 }
 324
 325 #void_elements = ['area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input', 'keygen', 'link', 'meta', 'param', 'source', 'track', 'wbr']
 326 #raw_text_elements = ['script', 'style']
 327 #escapable_raw_text_elements = ['textarea', 'title']
 328 # http://www.w3.org/TR/SVG/ 1.1 (Second Edition)
 329 svg_elements = [
 330         'a', 'altGlyph', 'altGlyphDef', 'altGlyphItem', 'animate', 'animateColor',
 331         'animateMotion', 'animateTransform', 'circle', 'clipPath', 'color-profile',
 332         'cursor', 'defs', 'desc', 'ellipse', 'feBlend', 'feColorMatrix',
 333         'feComponentTransfer', 'feComposite', 'feConvolveMatrix',
 334         'feDiffuseLighting', 'feDisplacementMap', 'feDistantLight', 'feFlood',
 335         'feFuncA', 'feFuncB', 'feFuncG', 'feFuncR', 'feGaussianBlur', 'feImage',
 336         'feMerge', 'feMergeNode', 'feMorphology', 'feOffset', 'fePointLight',
 337         'feSpecularLighting', 'feSpotLight', 'feTile', 'feTurbulence', 'filter',
 338         'font', 'font-face', 'font-face-format', 'font-face-name', 'font-face-src',
 339         'font-face-uri', 'foreignObject', 'g', 'glyph', 'glyphRef', 'hkern',
 340         'image', 'line', 'linearGradient', 'marker', 'mask', 'metadata',
 341         'missing-glyph', 'mpath', 'path', 'pattern', 'polygon', 'polyline',
 342         'radialGradient', 'rect', 'script', 'set', 'stop', 'style', 'svg',
 343         'switch', 'symbol', 'text', 'textPath', 'title', 'tref', 'tspan', 'use',
 344         'view', 'vkern'
 345 ]
 346
 347 # http://www.w3.org/TR/MathML/ Version 3.0 2nd Edition
 348 mathml_elements = [
 349         'abs', 'and', 'annotation', 'annotation-xml', 'apply', 'approx', 'arccos',
 350         'arccosh', 'arccot', 'arccoth', 'arccsc', 'arccsch', 'arcsec', 'arcsech',
 351         'arcsin', 'arcsinh', 'arctan', 'arctanh', 'arg', 'bind', 'bvar', 'card',
 352         'cartesianproduct', 'cbytes', 'ceiling', 'cerror', 'ci', 'cn', 'codomain',
 353         'complexes', 'compose', 'condition', 'conjugate', 'cos', 'cosh', 'cot',
 354         'coth', 'cs', 'csc', 'csch', 'csymbol', 'curl', 'declare', 'degree',
 355         'determinant', 'diff', 'divergence', 'divide', 'domain',
 356         'domainofapplication', 'emptyset', 'eq', 'equivalent', 'eulergamma',
 357         'exists', 'exp', 'exponentiale', 'factorial', 'factorof', 'false', 'floor',
 358         'fn', 'forall', 'gcd', 'geq', 'grad', 'gt', 'ident', 'image', 'imaginary',
 359         'imaginaryi', 'implies', 'in', 'infinity', 'int', 'integers', 'intersect',
 360         'interval', 'inverse', 'lambda', 'laplacian', 'lcm', 'leq', 'limit',
 361         'list', 'ln', 'log', 'logbase', 'lowlimit', 'lt', 'maction', 'maligngroup',
 362         'malignmark', 'math', 'matrix', 'matrixrow', 'max', 'mean', 'median',
 363         'menclose', 'merror', 'mfenced', 'mfrac', 'mglyph', 'mi', 'mi', 'min',
 364         'minus', 'mlabeledtr', 'mlongdiv', 'mmultiscripts', 'mn', 'mo', 'mode',
 365         'moment', 'momentabout', 'mover', 'mpadded', 'mphantom', 'mprescripts',
 366         'mroot', 'mrow', 'ms', 'mscarries', 'mscarry', 'msgroup', 'msline',
 367         'mspace', 'msqrt', 'msrow', 'mstack', 'mstyle', 'msub', 'msubsup', 'msup',
 368         'mtable', 'mtd', 'mtext', 'mtr', 'munder', 'munderover', 'naturalnumbers',
 369         'neq', 'none', 'not', 'notanumber', 'notin', 'notprsubset', 'notsubset',
 370         'or', 'otherwise', 'outerproduct', 'partialdiff', 'pi', 'piece',
 371         'piecewise', 'plus', 'power', 'primes', 'product', 'prsubset', 'quotient',
 372         'rationals', 'real', 'reals', 'reln', 'rem', 'root', 'scalarproduct',
 373         'sdev', 'sec', 'sech', 'selector', 'semantics', 'sep', 'set', 'setdiff',
 374         'share', 'sin', 'sinh', 'span', 'subset', 'sum', 'tan', 'tanh', 'tendsto',
 375         'times', 'transpose', 'true', 'union', 'uplimit', 'variance', 'vector',
 376         'vectorproduct', 'xor'
 377 ]
 378 # foreign_elements = [svg_elements..., mathml_elements...]
 379 #normal_elements = All other allowed HTML elements are normal elements.
 380
 381 special_elements = {
 382         # HTML:
 383         address:NS_HTML, applet:NS_HTML, area:NS_HTML, article:NS_HTML,
 384         aside:NS_HTML, base:NS_HTML, basefont:NS_HTML, bgsound:NS_HTML,
 385         blockquote:NS_HTML, body:NS_HTML, br:NS_HTML, button:NS_HTML,
 386         caption:NS_HTML, center:NS_HTML, col:NS_HTML, colgroup:NS_HTML, dd:NS_HTML,
 387         details:NS_HTML, dir:NS_HTML, div:NS_HTML, dl:NS_HTML, dt:NS_HTML,
 388         embed:NS_HTML, fieldset:NS_HTML, figcaption:NS_HTML, figure:NS_HTML,
 389         footer:NS_HTML, form:NS_HTML, frame:NS_HTML, frameset:NS_HTML, h1:NS_HTML,
 390         h2:NS_HTML, h3:NS_HTML, h4:NS_HTML, h5:NS_HTML, h6:NS_HTML, head:NS_HTML,
 391         header:NS_HTML, hgroup:NS_HTML, hr:NS_HTML, html:NS_HTML, iframe:NS_HTML,
 392         img:NS_HTML, input:NS_HTML, isindex:NS_HTML, li:NS_HTML, link:NS_HTML,
 393         listing:NS_HTML, main:NS_HTML, marquee:NS_HTML,
 394
 395         menu:NS_HTML,menuitem:NS_HTML, # WHATWG adds these
 396
 397         meta:NS_HTML, nav:NS_HTML, noembed:NS_HTML, noframes:NS_HTML,
 398         noscript:NS_HTML, object:NS_HTML, ol:NS_HTML, p:NS_HTML, param:NS_HTML,
 399         plaintext:NS_HTML, pre:NS_HTML, script:NS_HTML, section:NS_HTML,
 400         select:NS_HTML, source:NS_HTML, style:NS_HTML, summary:NS_HTML,
 401         table:NS_HTML, tbody:NS_HTML, td:NS_HTML, template:NS_HTML,
 402         textarea:NS_HTML, tfoot:NS_HTML, th:NS_HTML, thead:NS_HTML, title:NS_HTML,
 403         tr:NS_HTML, track:NS_HTML, ul:NS_HTML, wbr:NS_HTML, xmp:NS_HTML,
 404
 405         # MathML:
 406         mi:NS_MATHML, mo:NS_MATHML, mn:NS_MATHML, ms:NS_MATHML, mtext:NS_MATHML,
 407         'annotation-xml':NS_MATHML,
 408
 409         # SVG:
 410         foreignObject:NS_SVG, desc:NS_SVG, title:NS_SVG
 411 }
 412
 413 formatting_elements = {
 414          a: true, b: true, big: true, code: true, em: true, font: true, i: true,
 415          nobr: true, s: true, small: true, strike: true, strong: true, tt: true,
 416          u: true
 417 }
 418
 419 mathml_text_integration = {
 420         mi: NS_MATHML, mo: NS_MATHML, mn: NS_MATHML, ms: NS_MATHML, mtext: NS_MATHML
 421 }
 422 is_mathml_text_integration_point = (el) ->
 423         return mathml_text_integration[el.name] is el.namespace
 424 is_html_integration = (el) -> # DON'T PASS A TOKEN
 425         if el.namespace is NS_MATHML
 426                 if el.name is 'annotation-xml'
 427                         if el.attrs.encoding?
 428                                 if el.attrs.encoding.toLowerCase() is 'text/html'
 429                                         return true
 430                                 if el.attrs.encoding.toLowerCase() is 'application/xhtml+xml'
 431                                         return true
 432                 return false
 433         if el.namespace is NS_SVG
 434                 if el.name is 'foreignObject' or el.name is 'desc' or el.name is 'title'
 435                         return true
 436         return false
 437
 438 h_tags = {
 439         h1:NS_HTML, h2:NS_HTML, h3:NS_HTML, h4:NS_HTML, h5:NS_HTML, h6:NS_HTML
 440 }
 441
 442 foster_parenting_targets = {
 443         table: NS_HTML
 444         tbody: NS_HTML
 445         tfoot: NS_HTML
 446         thead: NS_HTML
 447         tr: NS_HTML
 448 }
 449
 450 end_tag_implied = {
 451         dd: NS_HTML
 452         dt: NS_HTML
 453         li: NS_HTML
 454         option: NS_HTML
 455         optgroup: NS_HTML
 456         p: NS_HTML
 457         rb: NS_HTML
 458         rp: NS_HTML
 459         rt: NS_HTML
 460         rtc: NS_HTML
 461 }
 462
 463 el_is_special = (e) ->
 464         return special_elements[e.name] is e.namespace
 465
 466 adp_els = { address: NS_HTML, div: NS_HTML, p: NS_HTML }
 467 el_is_special_not_adp = (el) ->
 468         return special_elements[el.name] is el.namespace and adp_els[el.name] isnt el.namespace
 469
 470 svg_name_fixes = {
 471         altglyph: 'altGlyph'
 472         altglyphdef: 'altGlyphDef'
 473         altglyphitem: 'altGlyphItem'
 474         animatecolor: 'animateColor'
 475         animatemotion: 'animateMotion'
 476         animatetransform: 'animateTransform'
 477         clippath: 'clipPath'
 478         feblend: 'feBlend'
 479         fecolormatrix: 'feColorMatrix'
 480         fecomponenttransfer: 'feComponentTransfer'
 481         fecomposite: 'feComposite'
 482         feconvolvematrix: 'feConvolveMatrix'
 483         fediffuselighting: 'feDiffuseLighting'
 484         fedisplacementmap: 'feDisplacementMap'
 485         fedistantlight: 'feDistantLight'
 486         fedropshadow: 'feDropShadow'
 487         feflood: 'feFlood'
 488         fefunca: 'feFuncA'
 489         fefuncb: 'feFuncB'
 490         fefuncg: 'feFuncG'
 491         fefuncr: 'feFuncR'
 492         fegaussianblur: 'feGaussianBlur'
 493         feimage: 'feImage'
 494         femerge: 'feMerge'
 495         femergenode: 'feMergeNode'
 496         femorphology: 'feMorphology'
 497         feoffset: 'feOffset'
 498         fepointlight: 'fePointLight'
 499         fespecularlighting: 'feSpecularLighting'
 500         fespotlight: 'feSpotLight'
 501         fetile: 'feTile'
 502         feturbulence: 'feTurbulence'
 503         foreignobject: 'foreignObject'
 504         glyphref: 'glyphRef'
 505         lineargradient: 'linearGradient'
 506         radialgradient: 'radialGradient'
 507         textpath: 'textPath'
 508 }
 509 svg_attribute_fixes = {
 510         attributename: 'attributeName'
 511         attributetype: 'attributeType'
 512         basefrequency: 'baseFrequency'
 513         baseprofile: 'baseProfile'
 514         calcmode: 'calcMode'
 515         clippathunits: 'clipPathUnits'
 516         contentscripttype: 'contentScriptType'
 517         contentstyletype: 'contentStyleType'
 518         diffuseconstant: 'diffuseConstant'
 519         edgemode: 'edgeMode'
 520         externalresourcesrequired: 'externalResourcesRequired'
 521         # WHATWG removes this: filterres: 'filterRes'
 522         filterunits: 'filterUnits'
 523         glyphref: 'glyphRef'
 524         gradienttransform: 'gradientTransform'
 525         gradientunits: 'gradientUnits'
 526         kernelmatrix: 'kernelMatrix'
 527         kernelunitlength: 'kernelUnitLength'
 528         keypoints: 'keyPoints'
 529         keysplines: 'keySplines'
 530         keytimes: 'keyTimes'
 531         lengthadjust: 'lengthAdjust'
 532         limitingconeangle: 'limitingConeAngle'
 533         markerheight: 'markerHeight'
 534         markerunits: 'markerUnits'
 535         markerwidth: 'markerWidth'
 536         maskcontentunits: 'maskContentUnits'
 537         maskunits: 'maskUnits'
 538         numoctaves: 'numOctaves'
 539         pathlength: 'pathLength'
 540         patterncontentunits: 'patternContentUnits'
 541         patterntransform: 'patternTransform'
 542         patternunits: 'patternUnits'
 543         pointsatx: 'pointsAtX'
 544         pointsaty: 'pointsAtY'
 545         pointsatz: 'pointsAtZ'
 546         preservealpha: 'preserveAlpha'
 547         preserveaspectratio: 'preserveAspectRatio'
 548         primitiveunits: 'primitiveUnits'
 549         refx: 'refX'
 550         refy: 'refY'
 551         repeatcount: 'repeatCount'
 552         repeatdur: 'repeatDur'
 553         requiredextensions: 'requiredExtensions'
 554         requiredfeatures: 'requiredFeatures'
 555         specularconstant: 'specularConstant'
 556         specularexponent: 'specularExponent'
 557         spreadmethod: 'spreadMethod'
 558         startoffset: 'startOffset'
 559         stddeviation: 'stdDeviation'
 560         stitchtiles: 'stitchTiles'
 561         surfacescale: 'surfaceScale'
 562         systemlanguage: 'systemLanguage'
 563         tablevalues: 'tableValues'
 564         targetx: 'targetX'
 565         targety: 'targetY'
 566         textlength: 'textLength'
 567         viewbox: 'viewBox'
 568         viewtarget: 'viewTarget'
 569         xchannelselector: 'xChannelSelector'
 570         ychannelselector: 'yChannelSelector'
 571         zoomandpan: 'zoomAndPan'
 572 }
 573 foreign_attr_fixes = {
 574         'xlink:actuate': 'xlink actuate'
 575         'xlink:arcrole': 'xlink arcrole'
 576         'xlink:href': 'xlink href'
 577         'xlink:role': 'xlink role'
 578         'xlink:show': 'xlink show'
 579         'xlink:title': 'xlink title'
 580         'xlink:type': 'xlink type'
 581         'xml:base': 'xml base'
 582         'xml:lang': 'xml lang'
 583         'xml:space': 'xml space'
 584         'xmlns': 'xmlns'
 585         'xmlns:xlink': 'xmlns xlink'
 586 }
 587 adjust_mathml_attributes = (t) ->
 588         for a in t.attrs_a
 589                 if a[0] is 'definitionurl'
 590                         a[0] = 'definitionURL'
 591         return
 592 adjust_svg_attributes = (t) ->
 593         for a in t.attrs_a
 594                 if svg_attribute_fixes[a[0]]?
 595                         a[0] = svg_attribute_fixes[a[0]]
 596         return
 597 adjust_foreign_attributes = (t) ->
 598         # fixfull
 599         for a in t.attrs_a
 600                 if foreign_attr_fixes[a[0]]?
 601                         a[0] = foreign_attr_fixes[a[0]]
 602         return
 603
 604 # decode_named_char_ref()
 605 #
 606 # The list of named character references is _huge_ so if we're running in a
 607 # browser, we get the browser to decode them, rather than increasing the code
 608 # size to include the table.
 609 if context is 'module'
 610         _decode_named_char_ref = require './parser_no_browser_helper.coffee'
 611 else
 612         # TODO test this in IE8
 613         decode_named_char_ref_el = document.createElement('textarea')
 614         _decode_named_char_ref = (txt) ->
 615                 txt = "&#{txt};"
 616                 decode_named_char_ref_el.innerHTML = txt
 617                 decoded = decode_named_char_ref_el.value
 618                 return null if decoded is txt
 619                 return decoded
 620 # Pass the name of a named entity _that has a terminating semicolon_
 621 # Entities without terminating semicolons should use legacy_char_refs[]
 622 # Do not include the "&" or ";" in your argument, eg pass "alpha"
 623 decode_named_char_ref_cache = {}
 624 decode_named_char_ref = (txt) ->
 625         decoded = decode_named_char_ref_cache[txt]
 626         return decoded if decoded?
 627         decoded = _decode_named_char_ref txt
 628         return decode_named_char_ref_cache[txt] = decoded
 629
 630 parse_html = (args_html, args = {}) ->
 631         txt = null
 632         cur = null # index of next char in txt to be parsed
 633         # declare doc and tokenizer variables so they're in scope below
 634         doc = null
 635         open_els = null # stack of open elements
 636         afe = null # active formatting elements
 637         template_ins_modes = null
 638         ins_mode = null
 639         original_ins_mode = null
 640         tok_state = null
 641         tok_cur_tag = null # partially parsed tag
 642         flag_scripting = null
 643         flag_frameset_ok = null
 644         flag_parsing = null
 645         flag_foster_parenting = null
 646         form_element_pointer = null
 647         temporary_buffer = null
 648         pending_table_character_tokens = null
 649         head_element_pointer = null
 650         flag_fragment_parsing = null
 651         context_element = null
 652
 653         stop_parsing = ->
 654                 flag_parsing = false
 655                 return
 656
 657         parse_error = ->
 658                 if args.error_cb?
 659                         args.error_cb cur
 660                 return
 661
 662         # http://www.w3.org/TR/html5/syntax.html#push-onto-the-list-of-active-formatting-elements
 663         # "Noah's Ark clause" but with three
 664         afe_push = (new_el) ->
 665                 matches = 0
 666                 for el, i in afe
 667                         if el.type is TYPE_AFE_MARKER
 668                                 break
 669                         if el.name is new_el.name and el.namespace is new_el.namespace
 670                                 attrs_match = true
 671                                 for k, v of el.attrs
 672                                         unless new_el.attrs[k] is v
 673                                                 attrs_match = false
 674                                                 break
 675                                 if attrs_match
 676                                         for k, v of new_el.attrs
 677                                                 unless el.attrs[k] is v
 678                                                         attrs_match = false
 679                                                         break
 680                                 if attrs_match
 681                                         matches += 1
 682                                         if matches is 3
 683                                                 afe.splice i, 1
 684                                                 break
 685                 afe.unshift new_el
 686                 return
 687
 688         afe_push_marker = ->
 689                 afe.unshift new_afe_marker()
 690                 return
 691
 692         # the functions below impliment the Tree Contstruction algorithm
 693         # http://www.w3.org/TR/html5/syntax.html#tree-construction
 694
 695         # But first... the helpers
 696         template_tag_is_open = ->
 697                 for el in open_els
 698                         if el.name is 'template' and el.namespace is NS_HTML
 699                                 return true
 700                 return false
 701         is_in_scope_x = (tag_name, scope, namespace) ->
 702                 for el in open_els
 703                         if el.name is tag_name and (namespace is null or namespace is el.namespace)
 704                                 return true
 705                         if scope[el.name] is el.namespace
 706                                 return false
 707                 return false
 708         is_in_scope_x_y = (tag_name, scope, scope2, namespace) ->
 709                 for el in open_els
 710                         if el.name is tag_name and (namespace is null or namespace is el.namespace)
 711                                 return true
 712                         if scope[el.name] is el.namespace
 713                                 return false
 714                         if scope2[el.name] is el.namespace
 715                                 return false
 716                 return false
 717         standard_scopers = {
 718                 applet: NS_HTML, caption: NS_HTML, html: NS_HTML, table: NS_HTML,
 719                 td: NS_HTML, th: NS_HTML, marquee: NS_HTML, object: NS_HTML,
 720                 template: NS_HTML,
 721
 722                 mi: NS_MATHML, mo: NS_MATHML, mn: NS_MATHML, ms: NS_MATHML,
 723                 mtext: NS_MATHML, 'annotation-xml': NS_MATHML,
 724
 725                 foreignObject: NS_SVG, desc: NS_SVG, title: NS_SVG
 726         }
 727         button_scopers = button: NS_HTML
 728         li_scopers = ol: NS_HTML, ul: NS_HTML
 729         table_scopers = html: NS_HTML, table: NS_HTML, template: NS_HTML
 730         is_in_scope = (tag_name, namespace = null) ->
 731                 return is_in_scope_x tag_name, standard_scopers, namespace
 732         is_in_button_scope = (tag_name, namespace = null) ->
 733                 return is_in_scope_x_y tag_name, standard_scopers, button_scopers, namespace
 734         is_in_table_scope = (tag_name, namespace = null) ->
 735                 return is_in_scope_x tag_name, table_scopers, namespace
 736         # aka is_in_list_item_scope
 737         is_in_li_scope = (tag_name, namespace = null) ->
 738                 return is_in_scope_x_y tag_name, standard_scopers, li_scopers, namespace
 739         is_in_select_scope = (tag_name, namespace = null) ->
 740                 for t in open_els
 741                         if t.name is tag_name and (namespace is null or namespace is t.namespace)
 742                                 return true
 743                         if t.namespace isnt NS_HTML and t.name isnt 'optgroup' and t.name isnt 'option'
 744                                 return false
 745                 return false
 746         # this checks for a particular element, not by name
 747         # this requires a namespace match
 748         el_is_in_scope = (needle) ->
 749                 for el in open_els
 750                         if el is needle
 751                                 return true
 752                         if standard_scopers[el.name] is el.namespace
 753                                 return false
 754                 return false
 755
 756         clear_to_table_stopers = {
 757                 'table': true
 758                 'template': true
 759                 'html': true
 760         }
 761         clear_stack_to_table_context = ->
 762                 loop
 763                         if clear_to_table_stopers[open_els[0].name]?
 764                                 break
 765                         open_els.shift()
 766                 return
 767         clear_to_table_body_stopers = {
 768                 tbody: NS_HTML
 769                 tfoot: NS_HTML
 770                 thead: NS_HTML
 771                 template: NS_HTML
 772                 html: NS_HTML
 773         }
 774         clear_stack_to_table_body_context = ->
 775                 loop
 776                         if clear_to_table_body_stopers[open_els[0].name] is open_els[0].namespace
 777                                 break
 778                         open_els.shift()
 779                 return
 780         clear_to_table_row_stopers = {
 781                 'tr': true
 782                 'template': true
 783                 'html': true
 784         }
 785         clear_stack_to_table_row_context = ->
 786                 loop
 787                         if clear_to_table_row_stopers[open_els[0].name]?
 788                                 break
 789                         open_els.shift()
 790                 return
 791         clear_afe_to_marker = ->
 792                 loop
 793                         return unless afe.length > 0 # this happens in fragment case, ?spec error
 794                         el = afe.shift()
 795                         if el.type is TYPE_AFE_MARKER
 796                                 return
 797                 return
 798
 799         # 8.2.3.1 ...
 800         # http://www.w3.org/TR/html5/syntax.html#reset-the-insertion-mode-appropriately
 801         reset_ins_mode = ->
 802                 # 1. Let last be false.
 803                 last = false
 804                 # 2. Let node be the last node in the stack of open elements.
 805                 node_i = 0
 806                 node = open_els[node_i]
 807                 # 3. Loop: If node is the first node in the stack of open elements,
 808                 # then set last to true, and, if the parser was originally created as
 809                 # part of the HTML fragment parsing algorithm (fragment case) set node
 810                 # to the context element.
 811                 loop
 812                         if node_i is open_els.length - 1
 813                                 last = true
 814                                 if flag_fragment_parsing
 815                                         node = context_element
 816                         # 4. If node is a select element, run these substeps:
 817                         if node.name is 'select' and node.namespace is NS_HTML
 818                                 # 1. If last is true, jump to the step below labeled done.
 819                                 unless last
 820                                         # 2. Let ancestor be node.
 821                                         ancestor_i = node_i
 822                                         ancestor = node
 823                                         # 3. Loop: If ancestor is the first node in the stack of
 824                                         # open elements, jump to the step below labeled done.
 825                                         loop
 826                                                 if ancestor_i is open_els.length - 1
 827                                                         break
 828                                                 # 4. Let ancestor be the node before ancestor in the stack
 829                                                 # of open elements.
 830                                                 ancestor_i += 1
 831                                                 ancestor = open_els[ancestor_i]
 832                                                 # 5. If ancestor is a template node, jump to the step below
 833                                                 # labeled done.
 834                                                 if ancestor.name is 'template' and ancestor.namespace is NS_HTML
 835                                                         break
 836                                                 # 6. If ancestor is a table node, switch the insertion mode
 837                                                 # to "in select in table" and abort these steps.
 838                                                 if ancestor.name is 'table' and ancestor.namespace is NS_HTML
 839                                                         ins_mode = ins_mode_in_select_in_table
 840                                                         return
 841                                                 # 7. Jump back to the step labeled loop.
 842                                 # 8. Done: Switch the insertion mode to "in select" and abort
 843                                 # these steps.
 844                                 ins_mode = ins_mode_in_select
 845                                 return
 846                         # 5. If node is a td or th element and last is false, then switch
 847                         # the insertion mode to "in cell" and abort these steps.
 848                         if (node.name is 'td' or node.name is 'th') and node.namespace is NS_HTML and last is false
 849                                 ins_mode = ins_mode_in_cell
 850                                 return
 851                         # 6. If node is a tr element, then switch the insertion mode to "in
 852                         # row" and abort these steps.
 853                         if node.name is 'tr' and node.namespace is NS_HTML
 854                                 ins_mode = ins_mode_in_row
 855                                 return
 856                         # 7. If node is a tbody, thead, or tfoot element, then switch the
 857                         # insertion mode to "in table body" and abort these steps.
 858                         if (node.name is 'tbody' or node.name is 'thead' or node.name is 'tfoot') and node.namespace is NS_HTML
 859                                 ins_mode = ins_mode_in_table_body
 860                                 return
 861                         # 8. If node is a caption element, then switch the insertion mode
 862                         # to "in caption" and abort these steps.
 863                         if node.name is 'caption' and node.namespace is NS_HTML
 864                                 ins_mode = ins_mode_in_caption
 865                                 return
 866                         # 9. If node is a colgroup element, then switch the insertion mode
 867                         # to "in column group" and abort these steps.
 868                         if node.name is 'colgroup' and node.namespace is NS_HTML
 869                                 ins_mode = ins_mode_in_column_group
 870                                 return
 871                         # 10. If node is a table element, then switch the insertion mode to
 872                         # "in table" and abort these steps.
 873                         if node.name is 'table' and node.namespace is NS_HTML
 874                                 ins_mode = ins_mode_in_table
 875                                 return
 876                         # 11. If node is a template element, then switch the insertion mode
 877                         # to the current template insertion mode and abort these steps.
 878                         if node.name is 'template' and node.namespace is NS_HTML
 879                                 ins_mode = template_ins_modes[0]
 880                                 return
 881                         # 12. If node is a head element and last is true, then switch the
 882                         # insertion mode to "in body" ("in body"! not "in head"!) and abort
 883                         # these steps. (fragment case)
 884                         if node.name is 'head' and node.namespace is NS_HTML and last
 885                                 ins_mode = ins_mode_in_body
 886                                 return
 887                         # 13. If node is a head element and last is false, then switch the
 888                         # insertion mode to "in head" and abort these steps.
 889                         if node.name is 'head' and node.namespace is NS_HTML and last is false
 890                                 ins_mode = ins_mode_in_head
 891                                 return
 892                         # 14. If node is a body element, then switch the insertion mode to
 893                         # "in body" and abort these steps.
 894                         if node.name is 'body' and node.namespace is NS_HTML
 895                                 ins_mode = ins_mode_in_body
 896                                 return
 897                         # 15. If node is a frameset element, then switch the insertion mode
 898                         # to "in frameset" and abort these steps. (fragment case)
 899                         if node.name is 'frameset' and node.namespace is NS_HTML
 900                                 ins_mode = ins_mode_in_frameset
 901                                 return
 902                         # 16. If node is an html element, run these substeps:
 903                         if node.name is 'html' and node.namespace is NS_HTML
 904                                 # 1. If the head element pointer is null, switch the insertion
 905                                 # mode to "before head" and abort these steps. (fragment case)
 906                                 if head_element_pointer is null
 907                                         ins_mode = ins_mode_before_head
 908                                 else
 909                                         # 2. Otherwise, the head element pointer is not null,
 910                                         # switch the insertion mode to "after head" and abort these
 911                                         # steps.
 912                                         ins_mode = ins_mode_after_head
 913                                 return
 914                         # 17. If last is true, then switch the insertion mode to "in body"
 915                         # and abort these steps. (fragment case)
 916                         if last
 917                                 ins_mode = ins_mode_in_body
 918                                 return
 919                         # 18. Let node now be the node before node in the stack of open
 920                         # elements.
 921                         node_i += 1
 922                         node = open_els[node_i]
 923                         # 19. Return to the step labeled loop.
 924                 return
 925
 926         # 8.2.3.2
 927
 928         # http://www.w3.org/TR/html5/syntax.html#adjusted-current-node
 929         adjusted_current_node = ->
 930                 if open_els.length is 1 and flag_fragment_parsing
 931                         return context_element
 932                 return open_els[0]
 933
 934         # http://www.w3.org/TR/html5/syntax.html#reconstruct-the-active-formatting-elements
 935         # this implementation is structured (mostly) as described at the link above.
 936         # capitalized comments are the "labels" described at the link above.
 937         reconstruct_afe = ->
 938                 return if afe.length is 0
 939                 if afe[0].type is TYPE_AFE_MARKER or afe[0] in open_els
 940                         return
 941                 # Rewind
 942                 i = 0
 943                 loop
 944                         if i is afe.length - 1
 945                                 break
 946                         i += 1
 947                         if afe[i].type is TYPE_AFE_MARKER or afe[i] in open_els
 948                                 i -= 1 # Advance
 949                                 break
 950                 # Create
 951                 loop
 952                         el = insert_html_element afe[i].token
 953                         afe[i] = el
 954                         break if i is 0
 955                         i -= 1 # Advance
 956                 return
 957
 958         # http://www.w3.org/TR/html5/syntax.html#adoption-agency-algorithm
 959         # adoption agency algorithm
 960         # overview here:
 961         #   http://www.w3.org/TR/html5/syntax.html#misnested-tags:-b-i-/b-/i
 962         #   http://www.w3.org/TR/html5/syntax.html#misnested-tags:-b-p-/b-/p
 963         #   http://www.w3.org/TR/html5/syntax.html#unclosed-formatting-elements
 964         adoption_agency = (subject) ->
 965 # this block implements tha W3C spec
 966 #               # 1. If the current node is an HTML element whose tag name is subject,
 967 #               # then run these substeps:
 968 #               #
 969 #               # 1. Let element be the current node.
 970 #               #
 971 #               # 2. Pop element off the stack of open elements.
 972 #               #
 973 #               # 3. If element is also in the list of active formatting elements,
 974 #               # remove the element from the list.
 975 #               #
 976 #               # 4. Abort the adoption agency algorithm.
 977 #               if open_els[0].name is subject and open_els[0].namespace is NS_HTML
 978 #                       el = open_els.shift()
 979 #                       # remove it from the list of active formatting elements (if found)
 980 #                       for t, i in afe
 981 #                               if t is el
 982 #                                       afe.splice i, 1
 983 #                                       break
 984 #                       return
 985 # WHATWG: https://html.spec.whatwg.org/multipage/syntax.html#adoption-agency-algorithm
 986                 # If the current node is an HTML element whose tag name is subject, and
 987                 # the current node is not in the list of active formatting elements,
 988                 # then pop the current node off the stack of open elements, and abort
 989                 # these steps.
 990                 if open_els[0].name is subject and open_els[0].namespace is NS_HTML
 991                         # remove it from the list of active formatting elements (if found)
 992                         in_afe = false
 993                         for el, i in afe
 994                                 if el is open_els[0]
 995                                         in_afe = true
 996                                         break
 997                         unless in_afe
 998                                 open_els.shift()
 999                                 return
1000                         # fall through
1001 # END WHATWG
1002                 outer = 0
1003                 loop
1004                         if outer >= 8
1005                                 return
1006                         outer += 1
1007                         # 5. Let formatting element be the last element in the list of
1008                         # active formatting elements that: is between the end of the list
1009                         # and the last scope marker in the list, if any, or the start of
1010                         # the list otherwise, and  has the tag name subject.
1011                         fe = null
1012                         for t, fe_of_afe in afe
1013                                 if t.type is TYPE_AFE_MARKER
1014                                         break
1015                                 if t.name is subject
1016                                         fe = t
1017                                         break
1018                         # If there is no such element, then abort these steps and instead
1019                         # act as described in the "any other end tag" entry above.
1020                         if fe is null
1021                                 in_body_any_other_end_tag subject
1022                                 return
1023                         # 6. If formatting element is not in the stack of open elements,
1024                         # then this is a parse error; remove the element from the list, and
1025                         # abort these steps.
1026                         in_open_els = false
1027                         for t, fe_of_open_els in open_els
1028                                 if t is fe
1029                                         in_open_els = true
1030                                         break
1031                         unless in_open_els
1032                                 parse_error()
1033                                 # "remove it from the list" must mean afe, since it's not in open_els
1034                                 afe.splice fe_of_afe, 1
1035                                 return
1036                         # 7. If formatting element is in the stack of open elements, but
1037                         # the element is not in scope, then this is a parse error; abort
1038                         # these steps.
1039                         unless el_is_in_scope fe
1040                                 parse_error()
1041                                 return
1042                         # 8. If formatting element is not the current node, this is a parse
1043                         # error. (But do not abort these steps.)
1044                         unless open_els[0] is fe
1045                                 parse_error()
1046                                 # continue
1047                         # 9. Let furthest block be the topmost node in the stack of open
1048                         # elements that is lower in the stack than formatting element, and
1049                         # is an element in the special category. There might not be one.
1050                         fb = null
1051                         fb_of_open_els = null
1052                         for t, i in open_els
1053                                 if t is fe
1054                                         break
1055                                 if el_is_special t
1056                                         fb = t
1057                                         fb_of_open_els = i
1058                                         # and continue, to see if there's one that's more "topmost"
1059                         # 10. If there is no furthest block, then the UA must first pop all
1060                         # the nodes from the bottom of the stack of open elements, from the
1061                         # current node up to and including formatting element, then remove
1062                         # formatting element from the list of active formatting elements,
1063                         # and finally abort these steps.
1064                         if fb is null
1065                                 loop
1066                                         t = open_els.shift()
1067                                         if t is fe
1068                                                 afe.splice fe_of_afe, 1
1069                                                 return
1070                         # 11. Let common ancestor be the element immediately above
1071                         # formatting element in the stack of open elements.
1072                         ca = open_els[fe_of_open_els + 1] # common ancestor
1073
1074                         node_above = open_els[fb_of_open_els + 1] # next node if node isn't in open_els anymore
1075                         # 12. Let a bookmark note the position of formatting element in the list of active formatting elements relative to the elements on either side of it in the list.
1076                         bookmark = new_aaa_bookmark()
1077                         for t, i in afe
1078                                 if t is fe
1079                                         afe.splice i, 0, bookmark
1080                                         break
1081                         node = last_node = fb
1082                         inner = 0
1083                         loop
1084                                 inner += 1
1085                                 # 3. Let node be the element immediately above node in the
1086                                 # stack of open elements, or if node is no longer in the stack
1087                                 # of open elements (e.g. because it got removed by this
1088                                 # algorithm), the element that was immediately above node in
1089                                 # the stack of open elements before node was removed.
1090                                 node_next = null
1091                                 for t, i in open_els
1092                                         if t is node
1093                                                 node_next = open_els[i + 1]
1094                                                 break
1095                                 node = node_next ? node_above
1096                                 # TODO make sure node_above gets re-set if/when node is removed from open_els
1097
1098                                 # 4. If node is formatting element, then go to the next step in
1099                                 # the overall algorithm.
1100                                 if node is fe
1101                                         break
1102                                 # 5. If inner loop counter is greater than three and node is in
1103                                 # the list of active formatting elements, then remove node from
1104                                 # the list of active formatting elements.
1105                                 node_in_afe = false
1106                                 for t, i in afe
1107                                         if t is node
1108                                                 if inner > 3
1109                                                         afe.splice i, 1
1110                                                 else
1111                                                         node_in_afe = true
1112                                                 break
1113                                 # 6. If node is not in the list of active formatting elements,
1114                                 # then remove node from the stack of open elements and then go
1115                                 # back to the step labeled inner loop.
1116                                 unless node_in_afe
1117                                         for t, i in open_els
1118                                                 if t is node
1119                                                         node_above = open_els[i + 1]
1120                                                         open_els.splice i, 1
1121                                                         break
1122                                         continue
1123                                 # 7. create an element for the token for which the element node
1124                                 # was created, in the HTML namespace, with common ancestor as
1125                                 # the intended parent; replace the entry for node in the list
1126                                 # of active formatting elements with an entry for the new
1127                                 # element, replace the entry for node in the stack of open
1128                                 # elements with an entry for the new element, and let node be
1129                                 # the new element.
1130                                 new_node = token_to_element node.token, NS_HTML, ca
1131                                 for t, i in afe
1132                                         if t is node
1133                                                 afe[i] = new_node
1134                                                 break
1135                                 for t, i in open_els
1136                                         if t is node
1137                                                 node_above = open_els[i + 1]
1138                                                 open_els[i] = new_node
1139                                                 break
1140                                 node = new_node
1141                                 # 8. If last node is furthest block, then move the
1142                                 # aforementioned bookmark to be immediately after the new node
1143                                 # in the list of active formatting elements.
1144                                 if last_node is fb
1145                                         for t, i in afe
1146                                                 if t is bookmark
1147                                                         afe.splice i, 1
1148                                                         break
1149                                         for t, i in afe
1150                                                 if t is node
1151                                                         # "after" means lower
1152                                                         afe.splice i, 0, bookmark # "after as <-
1153                                                         break
1154                                 # 9. Insert last node into node, first removing it from its
1155                                 # previous parent node if any.
1156                                 if last_node.parent?
1157                                         for c, i in last_node.parent.children
1158                                                 if c is last_node
1159                                                         last_node.parent.children.splice i, 1
1160                                                         break
1161                                 node.children.push last_node
1162                                 last_node.parent = node
1163                                 # 10. Let last node be node.
1164                                 last_node = node
1165                                 # 11. Return to the step labeled inner loop.
1166                         # 14. Insert whatever last node ended up being in the previous step
1167                         # at the appropriate place for inserting a node, but using common
1168                         # ancestor as the override target.
1169
1170                         # In the case where fe is immediately followed by fb:
1171                         #   * inner loop exits out early (node==fe)
1172                         #   * last_node is fb
1173                         #   * last_node is still in the tree (not a duplicate)
1174                         if last_node.parent?
1175                                 for c, i in last_node.parent.children
1176                                         if c is last_node
1177                                                 last_node.parent.children.splice i, 1
1178                                                 break
1179                         # can't use standard insert token thing, because it's already in
1180                         # open_els and must stay at it's current position in open_els
1181                         dest = adjusted_insertion_location ca
1182                         dest[0].children.splice dest[1], 0, last_node
1183                         last_node.parent = dest[0]
1184                         # 15. Create an element for the token for which formatting element
1185                         # was created, in the HTML namespace, with furthest block as the
1186                         # intended parent.
1187                         new_element = token_to_element fe.token, NS_HTML, fb
1188                         # 16. Take all of the child nodes of furthest block and append them
1189                         # to the element created in the last step.
1190                         while fb.children.length
1191                                 t = fb.children.shift()
1192                                 t.parent = new_element
1193                                 new_element.children.push t
1194                         # 17. Append that new element to furthest block.
1195                         new_element.parent = fb
1196                         fb.children.push new_element
1197                         # 18. Remove formatting element from the list of active formatting
1198                         # elements, and insert the new element into the list of active
1199                         # formatting elements at the position of the aforementioned
1200                         # bookmark.
1201                         for t, i in afe
1202                                 if t is fe
1203                                         afe.splice i, 1
1204                                         break
1205                         for t, i in afe
1206                                 if t is bookmark
1207                                         afe[i] = new_element
1208                                         break
1209                         # 19. Remove formatting element from the stack of open elements,
1210                         # and insert the new element into the stack of open elements
1211                         # immediately below the position of furthest block in that stack.
1212                         for t, i in open_els
1213                                 if t is fe
1214                                         open_els.splice i, 1
1215                                         break
1216                         for t, i in open_els
1217                                 if t is fb
1218                                         open_els.splice i, 0, new_element
1219                                         break
1220                         # 20. Jump back to the step labeled outer loop.
1221                 return
1222
1223         # http://www.w3.org/TR/html5/syntax.html#close-a-p-element
1224         close_p_element = ->
1225                 generate_implied_end_tags 'p' # arg is exception
1226                 unless open_els[0].name is 'p' and open_els[0].namespace is NS_HTML
1227                         parse_error()
1228                 while open_els.length > 1 # just in case
1229                         el = open_els.shift()
1230                         if el.name is 'p' and el.namespace is NS_HTML
1231                                 return
1232                 return
1233         close_p_if_in_button_scope = ->
1234                 if is_in_button_scope 'p', NS_HTML
1235                         close_p_element()
1236                 return
1237
1238         # http://www.w3.org/TR/html5/syntax.html#insert-a-character
1239         # aka insert_a_character = (t) ->
1240         insert_character = (t) ->
1241                 dest = adjusted_insertion_location()
1242                 # fixfull check for Document node
1243                 if dest[1] > 0
1244                         prev = dest[0].children[dest[1] - 1]
1245                         if prev.type is TYPE_TEXT
1246                                 prev.text += t.text
1247                                 return
1248                 dest[0].children.splice dest[1], 0, t
1249                 return
1250
1251         # 8.2.5 http://www.w3.org/TR/html5/syntax.html#tree-construction
1252         process_token = (t) ->
1253                 acn = adjusted_current_node()
1254                 unless acn?
1255                         ins_mode t
1256                         return
1257                 if acn.namespace is NS_HTML
1258                         ins_mode t
1259                         return
1260                 if is_mathml_text_integration_point(acn)
1261                         if t.type is TYPE_START_TAG and not (t.name is 'mglyph' or t.name is 'malignmark')
1262                                 ins_mode t
1263                                 return
1264                         if t.type is TYPE_TEXT
1265                                 ins_mode t
1266                                 return
1267                 if acn.namespace is NS_MATHML and acn.name is 'annotation-xml' and t.type is TYPE_START_TAG and t.name is 'svg'
1268                         ins_mode t
1269                         return
1270                 if is_html_integration acn
1271                         if t.type is TYPE_START_TAG or t.type is TYPE_TEXT
1272                                 ins_mode t
1273                                 return
1274                 if t.type is TYPE_EOF
1275                         ins_mode t
1276                         return
1277                 in_foreign_content t
1278                 return
1279
1280         # 8.2.5.1
1281         # http://www.w3.org/TR/html5/syntax.html#creating-and-inserting-nodes
1282         # http://www.w3.org/TR/html5/syntax.html#appropriate-place-for-inserting-a-node
1283         adjusted_insertion_location = (override_target = null) ->
1284                 # 1. If there was an override target specified, then let target be the
1285                 # override target.
1286                 if override_target?
1287                         target = override_target
1288                 else # Otherwise, let target be the current node.
1289                         target = open_els[0]
1290                 # 2. Determine the adjusted insertion location using the first matching
1291                 # steps from the following list:
1292                 #
1293                 # If foster parenting is enabled and target is a table, tbody, tfoot,
1294                 # thead, or tr element Foster parenting happens when content is
1295                 # misnested in tables.
1296                 if flag_foster_parenting and foster_parenting_targets[target.name] is target.namespace
1297                         loop # once. this is here so we can ``break`` to "abort these substeps"
1298                                 # 1. Let last template be the last template element in the
1299                                 # stack of open elements, if any.
1300                                 last_template = null
1301                                 last_template_i = null
1302                                 for el, i in open_els
1303                                         if el.name is 'template' and el.namespace is NS_HTML
1304                                                 last_template = el
1305                                                 last_template_i = i
1306                                                 break
1307                                 # 2. Let last table be the last table element in the stack of
1308                                 # open elements, if any.
1309                                 last_table = null
1310                                 last_table_i
1311                                 for el, i in open_els
1312                                         if el.name is 'table' and el.namespace is NS_HTML
1313                                                 last_table = el
1314                                                 last_table_i = i
1315                                                 break
1316                                 # 3. If there is a last template and either there is no last
1317                                 # table, or there is one, but last template is lower (more
1318                                 # recently added) than last table in the stack of open
1319                                 # elements, then: let adjusted insertion location be inside
1320                                 # last template's template contents, after its last child (if
1321                                 # any), and abort these substeps.
1322                                 if last_template and (last_table is null or last_template_i < last_table_i)
1323                                         target = last_template # fixfull should be it's contents
1324                                         target_i = target.children.length
1325                                         break
1326                                 # 4. If there is no last table, then let adjusted insertion
1327                                 # location be inside the first element in the stack of open
1328                                 # elements (the html element), after its last child (if any),
1329                                 # and abort these substeps. (fragment case)
1330                                 if last_table is null
1331                                         # this is odd
1332                                         target = open_els[open_els.length - 1]
1333                                         target_i = target.children.length
1334                                         break
1335                                 # 5. If last table has a parent element, then let adjusted
1336                                 # insertion location be inside last table's parent element,
1337                                 # immediately before last table, and abort these substeps.
1338                                 if last_table.parent?
1339                                         for c, i in last_table.parent.children
1340                                                 if c is last_table
1341                                                         target = last_table.parent
1342                                                         target_i = i
1343                                                         break
1344                                         break
1345                                 # 6. Let previous element be the element immediately above last
1346                                 # table in the stack of open elements.
1347                                 #
1348                                 # huh? how could it not have a parent?
1349                                 previous_element = open_els[last_table_i + 1]
1350                                 # 7. Let adjusted insertion location be inside previous
1351                                 # element, after its last child (if any).
1352                                 target = previous_element
1353                                 target_i = target.children.length
1354                                 # Note: These steps are involved in part because it's possible
1355                                 # for elements, the table element in this case in particular,
1356                                 # to have been moved by a script around in the DOM, or indeed
1357                                 # removed from the DOM entirely, after the element was inserted
1358                                 # by the parser.
1359                                 break # don't really loop
1360                 else
1361                         # Otherwise Let adjusted insertion location be inside target, after
1362                         # its last child (if any).
1363                         target_i = target.children.length
1364
1365                 # 3. If the adjusted insertion location is inside a template element,
1366                 # let it instead be inside the template element's template contents,
1367                 # after its last child (if any).
1368                 # fixfull (template)
1369
1370                 # 4. Return the adjusted insertion location.
1371                 return [target, target_i]
1372
1373         # http://www.w3.org/TR/html5/syntax.html#create-an-element-for-the-token
1374         # aka create_an_element_for_token
1375         token_to_element = (t, namespace, intended_parent) ->
1376                 # convert attributes into a hash
1377                 attrs = {}
1378                 for a in t.attrs_a
1379                         attrs[a[0]] = a[1] # TODO check what to do with dupilcate attrs
1380                 el = new Node TYPE_TAG, name: t.name, namespace: namespace, attrs: attrs, token: t
1381
1382                 # TODO 2. If the newly created element has an xmlns attribute in the
1383                 # XMLNS namespace whose value is not exactly the same as the element's
1384                 # namespace, that is a parse error. Similarly, if the newly created
1385                 # element has an xmlns:xlink attribute in the XMLNS namespace whose
1386                 # value is not the XLink Namespace, that is a parse error.
1387
1388                 # fixfull: the spec says stuff about form pointers and ownerDocument
1389
1390                 return el
1391
1392         # http://www.w3.org/TR/html5/syntax.html#insert-a-foreign-element
1393         insert_foreign_element = (token, namespace) ->
1394                 ail = adjusted_insertion_location()
1395                 ail_el = ail[0]
1396                 ail_i = ail[1]
1397                 el = token_to_element token, namespace, ail_el
1398                 # TODO skip this next step if it's broken (eg ail_el is document with child already)
1399                 el.parent = ail_el
1400                 ail_el.children.splice ail_i, 0, el
1401                 open_els.unshift el
1402                 return el
1403         # http://www.w3.org/TR/html5/syntax.html#insert-an-html-element
1404         insert_html_element = (token) ->
1405                 return insert_foreign_element token, NS_HTML
1406
1407         # http://www.w3.org/TR/html5/syntax.html#insert-a-comment
1408         # position should be [node, index_within_children]
1409         insert_comment = (t, position = null) ->
1410                 position ?= adjusted_insertion_location()
1411                 position[0].children.splice position[1], 0, t
1412                 return
1413
1414         # 8.2.5.2
1415         # http://www.w3.org/TR/html5/syntax.html#generic-raw-text-element-parsing-algorithm
1416         parse_generic_raw_text = (t) ->
1417                 insert_html_element t
1418                 tok_state = tok_state_rawtext
1419                 original_ins_mode = ins_mode
1420                 ins_mode = ins_mode_text
1421                 return
1422         parse_generic_rcdata_text = (t) ->
1423                 insert_html_element t
1424                 tok_state = tok_state_rcdata
1425                 original_ins_mode = ins_mode
1426                 ins_mode = ins_mode_text
1427                 return
1428
1429         # 8.2.5.3 http://www.w3.org/TR/html5/syntax.html#closing-elements-that-have-implied-end-tags
1430         # http://www.w3.org/TR/html5/syntax.html#generate-implied-end-tags
1431         generate_implied_end_tags = (except = null) ->
1432                 while end_tag_implied[open_els[0].name] is open_els[0].namespace and open_els[0].name isnt except
1433                         open_els.shift()
1434                 return
1435
1436         # 8.2.5.4 The rules for parsing tokens in HTML content
1437         # http://www.w3.org/TR/html5/syntax.html#parsing-main-inhtml
1438
1439         # 8.2.5.4.1 The "initial" insertion mode
1440         # http://www.w3.org/TR/html5/syntax.html#the-initial-insertion-mode
1441         is_quirks_yes_doctype = (t) ->
1442                 if t.flag 'force-quirks'
1443                         return true
1444                 if t.name isnt 'html'
1445                         return true
1446                 if t.public_identifier?
1447                         pi = t.public_identifier.toLowerCase()
1448                         for p in quirks_yes_pi_prefixes
1449                                 if pi.substr(0, p.length) is p
1450                                         return true
1451                         if pi is '-//w3o//dtd w3 html strict 3.0//en//' or pi is '-/w3c/dtd html 4.0 transitional/en' or pi is 'html'
1452                                 return true
1453                 if t.system_identifier?
1454                         if t.system_identifier.toLowerCase() is 'http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd'
1455                                 return true
1456                 else if t.public_identifier?
1457                         # already did this: pi = t.public_identifier.toLowerCase()
1458                         if pi.substr(0, 32) is '-//w3c//dtd html 4.01 frameset//' or pi.substr(0, 36) is '-//w3c//dtd html 4.01 transitional//'
1459                                 return true
1460                 return false
1461         is_quirks_limited_doctype = (t) ->
1462                 if t.public_identifier?
1463                         pi = t.public_identifier.toLowerCase()
1464                         if pi.substr(0, 32) is '-//w3c//dtd xhtml 1.0 frameset//' or pi.substr(0, 36) is '-//w3c//dtd xhtml 1.0 transitional//'
1465                                 return true
1466                         if t.system_identifier?
1467                                 if pi.substr(0, 32) is '-//w3c//dtd html 4.01 frameset//' or pi.substr(0, 36) is '-//w3c//dtd html 4.01 transitional//'
1468                                         return true
1469                 return false
1470         ins_mode_initial = (t) ->
1471                 if is_space_tok t
1472                         return
1473                 if t.type is TYPE_COMMENT
1474                         # ?fixfull
1475                         doc.children.push t
1476                         return
1477                 if t.type is TYPE_DOCTYPE
1478                         # fixfull syntax error from first paragraph and following bullets
1479                         # fixfull set doc.doctype
1480                         # fixfull is the "not an iframe srcdoc" thing relevant?
1481                         if is_quirks_yes_doctype t
1482                                 doc.flag 'quirks mode', QUIRKS_YES
1483                         else if is_quirks_limited_doctype t
1484                                 doc.flag 'quirks mode', QUIRKS_LIMITED
1485                         doc.children.push t
1486                         ins_mode = ins_mode_before_html
1487                         return
1488                 # Anything else
1489                 # fixfull not iframe srcdoc?
1490                 parse_error()
1491                 doc.flag 'quirks mode', QUIRKS_YES
1492                 ins_mode = ins_mode_before_html
1493                 process_token t
1494                 return
1495
1496         # 8.2.5.4.2 http://www.w3.org/TR/html5/syntax.html#the-before-html-insertion-mode
1497         ins_mode_before_html = (t) ->
1498                 if t.type is TYPE_DOCTYPE
1499                         parse_error()
1500                         return
1501                 if t.type is TYPE_COMMENT
1502                         doc.children.push t
1503                         return
1504                 if is_space_tok t
1505                         return
1506                 if t.type is TYPE_START_TAG and t.name is 'html'
1507                         el = token_to_element t, NS_HTML, doc
1508                         doc.children.push el
1509                         el.document = doc
1510                         open_els.unshift(el)
1511                         # fixfull (big paragraph in spec about manifest, fragment, urls, etc)
1512                         ins_mode = ins_mode_before_head
1513                         return
1514                 if t.type is TYPE_END_TAG
1515                         if t.name is 'head' or t.name is 'body' or t.name is 'html' or t.name is 'br'
1516                                 # fall through to "anything else"
1517                         else
1518                                 parse_error()
1519                                 return
1520                 # Anything else
1521                 el = token_to_element new_open_tag('html'), NS_HTML, doc
1522                 doc.children.push el
1523                 el.document = doc
1524                 open_els.unshift el
1525                 # ?fixfull browsing context
1526                 ins_mode = ins_mode_before_head
1527                 process_token t
1528                 return
1529
1530         # 8.2.5.4.3 http://www.w3.org/TR/html5/syntax.html#the-before-head-insertion-mode
1531         ins_mode_before_head = (t) ->
1532                 if is_space_tok t
1533                         return
1534                 if t.type is TYPE_COMMENT
1535                         insert_comment t
1536                         return
1537                 if t.type is TYPE_DOCTYPE
1538                         parse_error()
1539                         return
1540                 if t.type is TYPE_START_TAG and t.name is 'html'
1541                         ins_mode_in_body t
1542                         return
1543                 if t.type is TYPE_START_TAG and t.name is 'head'
1544                         el = insert_html_element t
1545                         head_element_pointer = el
1546                         ins_mode = ins_mode_in_head
1547                         return
1548                 if t.type is TYPE_END_TAG
1549                         if t.name is 'head' or t.name is 'body' or t.name is 'html' or t.name is 'br'
1550                                 # fall through to Anything else below
1551                         else
1552                                 parse_error()
1553                                 return
1554                 # Anything else
1555                 el = insert_html_element new_open_tag 'head'
1556                 head_element_pointer = el
1557                 ins_mode = ins_mode_in_head
1558                 process_token t
1559                 return
1560
1561         # 8.2.5.4.4 http://www.w3.org/TR/html5/syntax.html#parsing-main-inhead
1562         ins_mode_in_head_else = (t) -> # factored out for same-as-spec flow control
1563                 open_els.shift() # spec says this will be a 'head' node
1564                 ins_mode = ins_mode_after_head
1565                 process_token t
1566                 return
1567         ins_mode_in_head = (t) ->
1568                 if t.type is TYPE_TEXT and (t.text is "\t" or t.text is "\n" or t.text is "\u000c" or t.text is ' ')
1569                         insert_character t
1570                         return
1571                 if t.type is TYPE_COMMENT
1572                         insert_comment t
1573                         return
1574                 if t.type is TYPE_DOCTYPE
1575                         parse_error()
1576                         return
1577                 if t.type is TYPE_START_TAG and t.name is 'html'
1578                         ins_mode_in_body t
1579                         return
1580                 if t.type is TYPE_START_TAG and (t.name is 'base' or t.name is 'basefont' or t.name is 'bgsound' or t.name is 'link')
1581                         el = insert_html_element t
1582                         open_els.shift()
1583                         t.acknowledge_self_closing()
1584                         return
1585                 if t.type is TYPE_START_TAG and t.name is 'meta'
1586                         el = insert_html_element t
1587                         open_els.shift()
1588                         t.acknowledge_self_closing()
1589                         # fixfull encoding stuff
1590                         return
1591                 if t.type is TYPE_START_TAG and t.name is 'title'
1592                         parse_generic_rcdata_text t
1593                         return
1594                 if t.type is TYPE_START_TAG and ((t.name is 'noscript' and flag_scripting) or t.name is 'noframes' or t.name is 'style')
1595                         parse_generic_raw_text t
1596                         return
1597                 if t.type is TYPE_START_TAG and t.name is 'noscript' and flag_scripting is false
1598                         insert_html_element t
1599                         ins_mode = ins_mode_in_head_noscript
1600                         return
1601                 if t.type is TYPE_START_TAG and t.name is 'script'
1602                         ail = adjusted_insertion_location()
1603                         el = token_to_element t, NS_HTML, ail
1604                         el.flag 'parser-inserted', true
1605                         # fixfull frament case
1606                         ail[0].children.splice ail[1], 0, el
1607                         open_els.unshift el
1608                         tok_state = tok_state_script_data
1609                         original_ins_mode = ins_mode # make sure orig... is defined
1610                         ins_mode = ins_mode_text
1611                         return
1612                 if t.type is TYPE_END_TAG and t.name is 'head'
1613                         open_els.shift() # will be a head element... spec says so
1614                         ins_mode = ins_mode_after_head
1615                         return
1616                 if t.type is TYPE_END_TAG and (t.name is 'body' or t.name is 'html' or t.name is 'br')
1617                         ins_mode_in_head_else t
1618                         return
1619                 if t.type is TYPE_START_TAG and t.name is 'template'
1620                         insert_html_element t
1621                         afe_push_marker()
1622                         flag_frameset_ok = false
1623                         ins_mode = ins_mode_in_template
1624                         template_ins_modes.unshift ins_mode_in_template
1625                         return
1626                 if t.type is TYPE_END_TAG and t.name is 'template'
1627                         if template_tag_is_open()
1628                                 generate_implied_end_tags
1629                                 if open_els[0].name isnt 'template'
1630                                         parse_error()
1631                                 loop
1632                                         el = open_els.shift()
1633                                         if el.name is 'template' and el.namespace is NS_HTML
1634                                                 break
1635                                 clear_afe_to_marker()
1636                                 template_ins_modes.shift()
1637                                 reset_ins_mode()
1638                         else
1639                                 parse_error()
1640                         return
1641                 if (t.type is TYPE_START_TAG and t.name is 'head') or t.type is TYPE_END_TAG
1642                         parse_error()
1643                         return
1644                 ins_mode_in_head_else t
1645                 return
1646
1647         # 8.2.5.4.5 http://www.w3.org/TR/html5/syntax.html#parsing-main-inheadnoscript
1648         ins_mode_in_head_noscript_else = (t) ->
1649                 parse_error()
1650                 open_els.shift()
1651                 ins_mode = ins_mode_in_head
1652                 process_token t
1653                 return
1654         ins_mode_in_head_noscript = (t) ->
1655                 if t.type is TYPE_DOCTYPE
1656                         parse_error()
1657                         return
1658                 if t.type is TYPE_START_TAG and t.name is 'html'
1659                         ins_mode_in_body t
1660                         return
1661                 if t.type is TYPE_END_TAG and t.name is 'noscript'
1662                         open_els.shift()
1663                         ins_mode = ins_mode_in_head
1664                         return
1665                 if is_space_tok(t) or t.type is TYPE_COMMENT or (t.type is TYPE_START_TAG and (t.name is 'basefont' or t.name is 'bgsound' or t.name is 'link' or t.name is 'meta' or t.name is 'noframes' or t.name is 'style'))
1666                         ins_mode_in_head t
1667                         return
1668                 if t.type is TYPE_END_TAG and t.name is 'br'
1669                         ins_mode_in_head_noscript_else t
1670                         return
1671                 if (t.type is TYPE_START_TAG and (t.name is 'head' or t.name is 'noscript')) or t.type is TYPE_END_TAG
1672                         parse_error()
1673                         return
1674                 # Anything else
1675                 ins_mode_in_head_noscript_else t
1676                 return
1677
1678         # 8.2.5.4.6 http://www.w3.org/TR/html5/syntax.html#the-after-head-insertion-mode
1679         ins_mode_after_head_else = (t) ->
1680                 body_tok = new_open_tag 'body'
1681                 insert_html_element body_tok
1682                 ins_mode = ins_mode_in_body
1683                 process_token t
1684                 return
1685         ins_mode_after_head = (t) ->
1686                 if is_space_tok t
1687                         insert_character t
1688                         return
1689                 if t.type is TYPE_COMMENT
1690                         insert_comment t
1691                         return
1692                 if t.type is TYPE_DOCTYPE
1693                         parse_error()
1694                         return
1695                 if t.type is TYPE_START_TAG and t.name is 'html'
1696                         ins_mode_in_body t
1697                         return
1698                 if t.type is TYPE_START_TAG and t.name is 'body'
1699                         insert_html_element t
1700                         flag_frameset_ok = false
1701                         ins_mode = ins_mode_in_body
1702                         return
1703                 if t.type is TYPE_START_TAG and t.name is 'frameset'
1704                         insert_html_element t
1705                         ins_mode = ins_mode_in_frameset
1706                         return
1707                 if t.type is TYPE_START_TAG and (t.name is 'base' or t.name is 'basefont' or t.name is 'bgsound' or t.name is 'link' or t.name is 'meta' or t.name is 'noframes' or t.name is 'script' or t.name is 'style' or t.name is 'template' or t.name is 'title')
1708                         parse_error()
1709                         open_els.unshift head_element_pointer
1710                         ins_mode_in_head t
1711                         for el, i in open_els
1712                                 if el is head_element_pointer
1713                                         open_els.splice i, 1
1714                                         return
1715                         return
1716                 if t.type is TYPE_END_TAG and t.name is 'template'
1717                         ins_mode_in_head t
1718                         return
1719                 if t.type is TYPE_END_TAG and (t.name is 'body' or t.name is 'html' or t.name is 'br')
1720                         ins_mode_after_head_else t
1721                         return
1722                 if (t.type is TYPE_START_TAG and t.name is 'head') or t.type is TYPE_END_TAG
1723                         parse_error()
1724                         return
1725                 # Anything else
1726                 ins_mode_after_head_else t
1727                 return
1728
1729         # 8.2.5.4.7 http://www.w3.org/TR/html5/syntax.html#parsing-main-inbody
1730         in_body_any_other_end_tag = (name) -> # factored out because adoption agency calls it
1731                 node = open_els[0]
1732                 loop
1733                         if node.name is name and node.namespace is NS_HTML
1734                                 generate_implied_end_tags name # arg is exception
1735                                 unless node is open_els[0]
1736                                         parse_error()
1737                                 loop
1738                                         el = open_els.shift()
1739                                         if el is node
1740                                                 return
1741                         if special_elements[node.name] is node.namespace
1742                                 parse_error()
1743                                 return
1744                         for el, i in open_els
1745                                 if node is el
1746                                         node = open_els[i + 1]
1747                                         break
1748                 return
1749         ins_mode_in_body = (t) ->
1750                 if t.type is TYPE_TEXT and t.text is "\u0000"
1751                         parse_error()
1752                         return
1753                 if is_space_tok t
1754                         reconstruct_afe()
1755                         insert_character t
1756                         return
1757                 if t.type is TYPE_TEXT
1758                         reconstruct_afe()
1759                         insert_character t
1760                         flag_frameset_ok = false
1761                         return
1762                 if t.type is TYPE_COMMENT
1763                         insert_comment t
1764                         return
1765                 if t.type is TYPE_DOCTYPE
1766                         parse_error()
1767                         return
1768                 if t.type is TYPE_START_TAG and t.name is 'html'
1769                         parse_error()
1770                         return if template_tag_is_open()
1771                         root_attrs = open_els[open_els.length - 1].attrs
1772                         for a in t.attrs_a
1773                                 root_attrs[a[0]] = a[1] unless root_attrs[a[0]]?
1774                         return
1775
1776                 if (t.type is TYPE_START_TAG and (t.name is 'base' or t.name is 'basefont' or t.name is 'bgsound' or t.name is 'link' or t.name is 'meta' or t.name is 'noframes' or t.name is 'script' or t.name is 'style' or t.name is 'template' or t.name is 'title')) or (t.type is TYPE_END_TAG and t.name is 'template')
1777                         ins_mode_in_head t
1778                         return
1779                 if t.type is TYPE_START_TAG and t.name is 'body'
1780                         parse_error()
1781                         return if open_els.length < 2
1782                         second = open_els[open_els.length - 2]
1783                         return unless second.namespace is NS_HTML
1784                         return unless second.name is 'body'
1785                         return if template_tag_is_open()
1786                         flag_frameset_ok = false
1787                         for a in t.attrs_a
1788                                 second.attrs[a[0]] = a[1] unless second.attrs[a[0]]?
1789                         return
1790                 if t.type is TYPE_START_TAG and t.name is 'frameset'
1791                         parse_error()
1792                         return if open_els.length < 2
1793                         second_i = open_els.length - 2
1794                         second = open_els[second_i]
1795                         return unless second.namespace is NS_HTML
1796                         return unless second.name is 'body'
1797                         if flag_frameset_ok is false
1798                                 return
1799                         if second.parent?
1800                                 for el, i in second.parent.children
1801                                         if el is second
1802                                                 second.parent.children.splice i, 1
1803                                                 break
1804                         open_els.splice second_i, 1
1805                         # pop everything except the "root html element"
1806                         while open_els.length > 1
1807                                 open_els.shift()
1808                         insert_html_element t
1809                         ins_mode = ins_mode_in_frameset
1810                         return
1811                 if t.type is TYPE_EOF
1812                         ok_tags = {
1813                                 dd:NS_HTML, dt:NS_HTML, li:NS_HTML, p:NS_HTML, tbody:NS_HTML,
1814                                 td:NS_HTML, tfoot:NS_HTML, th:NS_HTML, thead:NS_HTML,
1815                                 tr:NS_HTML, body:NS_HTML, html:NS_HTML,
1816                         }
1817                         for el in open_els
1818                                 unless ok_tags[t.name] is el.namespace
1819                                         parse_error()
1820                                         break
1821                         if template_ins_modes.length > 0
1822                                 ins_mode_in_template t
1823                         else
1824                                 stop_parsing()
1825                         return
1826                 if t.type is TYPE_END_TAG and t.name is 'body'
1827                         unless is_in_scope 'body', NS_HTML
1828                                 parse_error()
1829                                 return
1830                         ok_tags = {
1831                                 dd:NS_HTML, dt:NS_HTML, li:NS_HTML, optgroup:NS_HTML,
1832                                 option:NS_HTML, p:NS_HTML, rb:NS_HTML, rp:NS_HTML, rt:NS_HTML,
1833                                 rtc:NS_HTML, tbody:NS_HTML, td:NS_HTML, tfoot:NS_HTML,
1834                                 th:NS_HTML, thead:NS_HTML, tr:NS_HTML, body:NS_HTML,
1835                                 html:NS_HTML
1836                         }
1837                         for el in open_els
1838                                 unless ok_tags[t.name] is el.namespace
1839                                         parse_error()
1840                                         break
1841                         ins_mode = ins_mode_after_body
1842                         return
1843                 if t.type is TYPE_END_TAG and t.name is 'html'
1844                         unless is_in_scope 'body', NS_HTML
1845                                 parse_error()
1846                                 return
1847                         ok_tags = {
1848                                 dd:NS_HTML, dt:NS_HTML, li:NS_HTML, optgroup:NS_HTML,
1849                                 option:NS_HTML, p:NS_HTML, rb:NS_HTML, rp:NS_HTML, rt:NS_HTML,
1850                                 rtc:NS_HTML, tbody:NS_HTML, td:NS_HTML, tfoot:NS_HTML,
1851                                 th:NS_HTML, thead:NS_HTML, tr:NS_HTML, body:NS_HTML,
1852                                 html:NS_HTML
1853                         }
1854                         for el in open_els
1855                                 unless ok_tags[t.name] is el.namespace
1856                                         parse_error()
1857                                         break
1858                         ins_mode = ins_mode_after_body
1859                         process_token t
1860                         return
1861                 if t.type is TYPE_START_TAG and (t.name is 'address' or t.name is 'article' or t.name is 'aside' or t.name is 'blockquote' or t.name is 'center' or t.name is 'details' or t.name is 'dialog' or t.name is 'dir' or t.name is 'div' or t.name is 'dl' or t.name is 'fieldset' or t.name is 'figcaption' or t.name is 'figure' or t.name is 'footer' or t.name is 'header' or t.name is 'hgroup' or t.name is 'main' or t.name is 'nav' or t.name is 'ol' or t.name is 'p' or t.name is 'section' or t.name is 'summary' or t.name is 'ul')
1862                         close_p_if_in_button_scope()
1863                         insert_html_element t
1864                         return
1865                 if t.type is TYPE_START_TAG and h_tags[t.name]?
1866                         close_p_if_in_button_scope()
1867                         if h_tags[open_els[0].name] is open_els[0].namespace
1868                                 parse_error()
1869                                 open_els.shift()
1870                         insert_html_element t
1871                         return
1872                 if t.type is TYPE_START_TAG and (t.name is 'pre' or t.name is 'listing')
1873                         close_p_if_in_button_scope()
1874                         insert_html_element t
1875                         eat_next_token_if_newline()
1876                         flag_frameset_ok = false
1877                         return
1878                 if t.type is TYPE_START_TAG and t.name is 'form'
1879                         unless form_element_pointer is null or template_tag_is_open()
1880                                 parse_error()
1881                                 return
1882                         close_p_if_in_button_scope()
1883                         el = insert_html_element t
1884                         unless template_tag_is_open()
1885                                 form_element_pointer = el
1886                         return
1887                 if t.type is TYPE_START_TAG and t.name is 'li'
1888                         flag_frameset_ok = false
1889                         for node in open_els
1890                                 if node.name is 'li' and node.namespace is NS_HTML
1891                                         generate_implied_end_tags 'li' # arg is exception
1892                                         if open_els[0].name isnt 'li' or open_els[0].namespace isnt NS_HTML
1893                                                 parse_error()
1894                                         loop
1895                                                 el = open_els.shift()
1896                                                 if el.name is 'li' and el.namespace is NS_HTML
1897                                                         break
1898                                         break
1899                                 if el_is_special_not_adp node
1900                                                 break
1901                         close_p_if_in_button_scope()
1902                         insert_html_element t
1903                         return
1904                 if t.type is TYPE_START_TAG and (t.name is 'dd' or t.name is 'dt')
1905                         flag_frameset_ok = false
1906                         for node in open_els
1907                                 if node.name is 'dd' and node.namespace is NS_HTML
1908                                         generate_implied_end_tags 'dd' # arg is exception
1909                                         if open_els[0].name isnt 'dd' or open_els[0].namespace isnt NS_HTML
1910                                                 parse_error()
1911                                         loop
1912                                                 el = open_els.shift()
1913                                                 if el.name is 'dd' and el.namespace is NS_HTML
1914                                                         break
1915                                         break
1916                                 if node.name is 'dt' and node.namespace is NS_HTML
1917                                         generate_implied_end_tags 'dt' # arg is exception
1918                                         if open_els[0].name isnt 'dt' or open_els[0].namespace isnt NS_HTML
1919                                                 parse_error()
1920                                         loop
1921                                                 el = open_els.shift()
1922                                                 if el.name is 'dt' and el.namespace is NS_HTML
1923                                                         break
1924                                         break
1925                                 if el_is_special_not_adp node
1926                                         break
1927                         close_p_if_in_button_scope()
1928                         insert_html_element t
1929                         return
1930                 if t.type is TYPE_START_TAG and t.name is 'plaintext'
1931                         close_p_if_in_button_scope()
1932                         insert_html_element t
1933                         tok_state = tok_state_plaintext
1934                         return
1935                 if t.type is TYPE_START_TAG and t.name is 'button'
1936                         if is_in_scope 'button', NS_HTML
1937                                 parse_error()
1938                                 generate_implied_end_tags()
1939                                 loop
1940                                         el = open_els.shift()
1941                                         if el.name is 'button' and el.namespace is NS_HTML
1942                                                 break
1943                         reconstruct_afe()
1944                         insert_html_element t
1945                         flag_frameset_ok = false
1946                         return
1947                 if t.type is TYPE_END_TAG and (t.name is 'address' or t.name is 'article' or t.name is 'aside' or t.name is 'blockquote' or t.name is 'button' or t.name is 'center' or t.name is 'details' or t.name is 'dialog' or t.name is 'dir' or t.name is 'div' or t.name is 'dl' or t.name is 'fieldset' or t.name is 'figcaption' or t.name is 'figure' or t.name is 'footer' or t.name is 'header' or t.name is 'hgroup' or t.name is 'listing' or t.name is 'main' or t.name is 'nav' or t.name is 'ol' or t.name is 'pre' or t.name is 'section' or t.name is 'summary' or t.name is 'ul')
1948                         unless is_in_scope t.name, NS_HTML
1949                                 parse_error()
1950                                 return
1951                         generate_implied_end_tags()
1952                         unless open_els[0].name is t.name and open_els[0].namespace is NS_HTML
1953                                 parse_error()
1954                         loop
1955                                 el = open_els.shift()
1956                                 if el.name is t.name and el.namespace is NS_HTML
1957                                         return
1958                         return
1959                 if t.type is TYPE_END_TAG and t.name is 'form'
1960                         unless template_tag_is_open()
1961                                 node = form_element_pointer
1962                                 form_element_pointer = null
1963                                 if node is null or not el_is_in_scope node
1964                                         parse_error()
1965                                         return
1966                                 generate_implied_end_tags()
1967                                 if open_els[0] isnt node
1968                                         parse_error()
1969                                 for el, i in open_els
1970                                         if el is node
1971                                                 open_els.splice i, 1
1972                                                 break
1973                         else
1974                                 unless is_in_scope 'form', NS_HTML
1975                                         parse_error()
1976                                         return
1977                                 generate_implied_end_tags()
1978                                 if open_els[0].name isnt 'form' or open_els[0].namespace isnt NS_HTML
1979                                         parse_error()
1980                                 loop
1981                                         el = open_els.shift()
1982                                         if el.name is 'form' and el.namespace is NS_HTML
1983                                                 break
1984                         return
1985                 if t.type is TYPE_END_TAG and t.name is 'p'
1986                         unless is_in_button_scope 'p', NS_HTML
1987                                 parse_error()
1988                                 insert_html_element new_open_tag 'p'
1989                         close_p_element()
1990                         return
1991                 if t.type is TYPE_END_TAG and t.name is 'li'
1992                         unless is_in_li_scope 'li', NS_HTML
1993                                 parse_error()
1994                                 return
1995                         generate_implied_end_tags 'li' # arg is exception
1996                         if open_els[0].name isnt 'li' or open_els[0].namespace isnt NS_HTML
1997                                 parse_error()
1998                         loop
1999                                 el = open_els.shift()
2000                                 if el.name is 'li' and el.namespace is NS_HTML
2001                                         break
2002                         return
2003                 if t.type is TYPE_END_TAG and (t.name is 'dd' or t.name is 'dt')
2004                         unless is_in_scope t.name, NS_HTML
2005                                 parse_error()
2006                                 return
2007                         generate_implied_end_tags t.name # arg is exception
2008                         if open_els[0].name isnt t.name or open_els[0].namespace isnt NS_HTML
2009                                 parse_error()
2010                         loop
2011                                 el = open_els.shift()
2012                                 if el.name is t.name and el.namespace is NS_HTML
2013                                         break
2014                         return
2015                 if t.type is TYPE_END_TAG and h_tags[t.name]?
2016                         h_in_scope = false
2017                         for el in open_els
2018                                 if h_tags[el.name] is el.namespace
2019                                         h_in_scope = true
2020                                         break
2021                                 if standard_scopers[el.name] is el.namespace
2022                                         break
2023                         unless h_in_scope
2024                                 parse_error()
2025                                 return
2026                         generate_implied_end_tags()
2027                         if open_els[0].name isnt t.name or open_els[0].namespace isnt NS_HTML
2028                                 parse_error()
2029                         loop
2030                                 el = open_els.shift()
2031                                 if h_tags[el.name] is el.namespace
2032                                         break
2033                         return
2034                 # deep breath!
2035                 if t.type is TYPE_START_TAG and t.name is 'a'
2036                         # If the list of active formatting elements contains an a element
2037                         # between the end of the list and the last marker on the list (or
2038                         # the start of the list if there is no marker on the list), then
2039                         # this is a parse error; run the adoption agency algorithm for the
2040                         # tag name "a", then remove that element from the list of active
2041                         # formatting elements and the stack of open elements if the
2042                         # adoption agency algorithm didn't already remove it (it might not
2043                         # have if the element is not in table scope).
2044                         found = false
2045                         for el in afe
2046                                 if el.type is TYPE_AFE_MARKER
2047                                         break
2048                                 if el.name is 'a' and el.namespace is NS_HTML
2049                                         found = el
2050                         if found?
2051                                 parse_error()
2052                                 adoption_agency 'a'
2053                                 for el, i in afe
2054                                         if el is found
2055                                                 afe.splice i, 1
2056                                 for el, i in open_els
2057                                         if el is found
2058                                                 open_els.splice i, 1
2059                         reconstruct_afe()
2060                         el = insert_html_element t
2061                         afe_push el
2062                         return
2063                 if t.type is TYPE_START_TAG and (t.name is 'b' or t.name is 'big' or t.name is 'code' or t.name is 'em' or t.name is 'font' or t.name is 'i' or t.name is 's' or t.name is 'small' or t.name is 'strike' or t.name is 'strong' or t.name is 'tt' or t.name is 'u')
2064                         reconstruct_afe()
2065                         el = insert_html_element t
2066                         afe_push el
2067                         return
2068                 if t.type is TYPE_START_TAG and t.name is 'nobr'
2069                         reconstruct_afe()
2070                         if is_in_scope 'nobr', NS_HTML
2071                                 parse_error()
2072                                 adoption_agency 'nobr'
2073                                 reconstruct_afe()
2074                         el = insert_html_element t
2075                         afe_push el
2076                         return
2077                 if t.type is TYPE_END_TAG and (t.name is 'a' or t.name is 'b' or t.name is 'big' or t.name is 'code' or t.name is 'em' or t.name is 'font' or t.name is 'i' or t.name is 'nobr' or t.name is 's' or t.name is 'small' or t.name is 'strike' or t.name is 'strong' or t.name is 'tt' or t.name is 'u')
2078                         adoption_agency t.name
2079                         return
2080                 if t.type is TYPE_START_TAG and (t.name is 'applet' or t.name is 'marquee' or t.name is 'object')
2081                         reconstruct_afe()
2082                         insert_html_element t
2083                         afe_push_marker()
2084                         flag_frameset_ok = false
2085                         return
2086                 if t.type is TYPE_END_TAG and (t.name is 'applet' or t.name is 'marquee' or t.name is 'object')
2087                         unless is_in_scope t.name, NS_HTML
2088                                 parse_error()
2089                                 return
2090                         generate_implied_end_tags()
2091                         if open_els[0].name isnt t.name or open_els[0].namespace isnt NS_HTML
2092                                 parse_error()
2093                         loop
2094                                 el = open_els.shift()
2095                                 if el.name is t.name and el.namespace is NS_HTML
2096                                         break
2097                         clear_afe_to_marker()
2098                         return
2099                 if t.type is TYPE_START_TAG and t.name is 'table'
2100                         unless doc.flag('quirks mode') is QUIRKS_YES
2101                                 close_p_if_in_button_scope() # test
2102                         insert_html_element t
2103                         flag_frameset_ok = false
2104                         ins_mode = ins_mode_in_table
2105                         return
2106                 if t.type is TYPE_END_TAG and t.name is 'br'
2107                         parse_error()
2108                         # W3C: t.type = TYPE_START_TAG
2109                         t = new_open_tag 'br' # WHATWG
2110                         # fall through
2111                 if t.type is TYPE_START_TAG and (t.name is 'area' or t.name is 'br' or t.name is 'embed' or t.name is 'img' or t.name is 'keygen' or t.name is 'wbr')
2112                         reconstruct_afe()
2113                         insert_html_element t
2114                         open_els.shift()
2115                         t.acknowledge_self_closing()
2116                         flag_frameset_ok = false
2117                         return
2118                 if t.type is TYPE_START_TAG and t.name is 'input'
2119                         reconstruct_afe()
2120                         insert_html_element t
2121                         open_els.shift()
2122                         t.acknowledge_self_closing()
2123                         unless is_input_hidden_tok t
2124                                 flag_frameset_ok = false
2125                         return
2126                 if t.type is TYPE_START_TAG and (t.name is 'menuitem' or t.name is 'param' or t.name is 'source' or t.name is 'track')
2127                         # WHATWG adds 'menuitem' for this block
2128                         insert_html_element t
2129                         open_els.shift()
2130                         t.acknowledge_self_closing()
2131                         return
2132                 if t.type is TYPE_START_TAG and t.name is 'hr'
2133                         close_p_if_in_button_scope()
2134                         insert_html_element t
2135                         open_els.shift()
2136                         t.acknowledge_self_closing()
2137                         flag_frameset_ok = false
2138                         return
2139                 if t.type is TYPE_START_TAG and t.name is 'image'
2140                         parse_error()
2141                         t.name = 'img'
2142                         process_token t
2143                         return
2144                 if t.type is TYPE_START_TAG and t.name is 'isindex'
2145                         parse_error()
2146                         if template_tag_is_open() is false and form_element_pointer isnt null
2147                                 return
2148                         t.acknowledge_self_closing()
2149                         flag_frameset_ok = false
2150                         close_p_if_in_button_scope()
2151                         el = insert_html_element new_open_tag 'form'
2152                         unless template_tag_is_open()
2153                                 form_element_pointer = el
2154                         for a in t.attrs_a
2155                                 if a[0] is 'action'
2156                                         el.attrs['action'] = a[1]
2157                                         break
2158                         insert_html_element new_open_tag 'hr'
2159                         open_els.shift()
2160                         reconstruct_afe()
2161                         insert_html_element new_open_tag 'label'
2162                         # note: this is a little out-of-spec-order so we only have to scan t.attrs_a once
2163                         input_el = new_open_tag 'input'
2164                         prompt = null
2165                         for a in t.attrs_a
2166                                 if a[0] is 'prompt'
2167                                         prompt = a[1]
2168                                 if a[0] isnt 'name' and a[0] isnt 'action' and a[0] isnt 'prompt'
2169                                         input_el.attrs_a.push [a[0], a[1]]
2170                         input_el.attrs_a.push ['name', 'isindex']
2171                         # fixfull this next bit is in english... internationalize?
2172                         prompt ?= "This is a searchable index. Enter search keywords: "
2173                         insert_character new_character_token prompt # fixfull split
2174                         # TODO submit typo "balue" in spec
2175                         insert_html_element input_el
2176                         open_els.shift()
2177                         # insert_character '' # you can put chars here if promt attr missing
2178                         open_els.shift()
2179                         insert_html_element new_open_tag 'hr'
2180                         open_els.shift()
2181                         open_els.shift()
2182                         unless template_tag_is_open()
2183                                 form_element_pointer = null
2184                         return
2185                 if t.type is TYPE_START_TAG and t.name is 'textarea'
2186                         insert_html_element t
2187                         eat_next_token_if_newline()
2188                         tok_state = tok_state_rcdata
2189                         original_ins_mode = ins_mode
2190                         flag_frameset_ok = false
2191                         ins_mode = ins_mode_text
2192                         return
2193                 if t.type is TYPE_START_TAG and t.name is 'xmp'
2194                         close_p_if_in_button_scope()
2195                         reconstruct_afe()
2196                         flag_frameset_ok = false
2197                         parse_generic_raw_text t
2198                         return
2199                 if t.type is TYPE_START_TAG and t.name is 'iframe'
2200                         flag_frameset_ok = false
2201                         parse_generic_raw_text t
2202                         return
2203                 if t.type is TYPE_START_TAG and (t.name is 'noembed' or (t.name is 'noscript' and flag_scripting))
2204                         parse_generic_raw_text t
2205                         return
2206                 if t.type is TYPE_START_TAG and t.name is 'select'
2207                         reconstruct_afe()
2208                         insert_html_element t
2209                         flag_frameset_ok = false
2210                         if ins_mode is ins_mode_in_table or ins_mode is ins_mode_in_caption or ins_mode is ins_mode_in_table_body or ins_mode is ins_mode_in_row or ins_mode is ins_mode_in_cell
2211                                 ins_mode = ins_mode_in_select_in_table
2212                         else
2213                                 ins_mode = ins_mode_in_select
2214                         return
2215                 if t.type is TYPE_START_TAG and (t.name is 'optgroup' or t.name is 'option')
2216                         if open_els[0].name is 'option' and open_els[0].namespace is NS_HTML
2217                                 open_els.shift()
2218                         reconstruct_afe()
2219                         insert_html_element t
2220                         return
2221 # this comment block implements the W3C spec
2222 #               if t.type is TYPE_START_TAG and (t.name is 'rb' or t.name is 'rp' or t.name is 'rtc')
2223 #                       if is_in_scope 'ruby', NS_HTML
2224 #                               generate_implied_end_tags()
2225 #                               unless open_els[0].name is 'ruby' and open_els[0].namespace is NS_HTML
2226 #                                       parse_error()
2227 #                       insert_html_element t
2228 #                       return
2229 #               if t.type is TYPE_START_TAG and t.name is 'rt'
2230 #                       if is_in_scope 'ruby', NS_HTML
2231 #                               generate_implied_end_tags 'rtc' # arg is exception
2232 #                               unless (open_els[0].name is 'ruby' or open_els[0].name is 'rtc') and open_els[0].namespace is NS_HTML
2233 #                                       parse_error()
2234 #                       insert_html_element t
2235 #                       return
2236 # below implements the WHATWG spec https://html.spec.whatwg.org/multipage/syntax.html#parsing-main-inbody
2237                 if t.type is TYPE_START_TAG and (t.name is 'rb' or t.name is 'rtc')
2238                         if is_in_scope 'ruby', NS_HTML
2239                                 generate_implied_end_tags()
2240                                 unless open_els[0].name is 'ruby' and open_els[0].namespace is NS_HTML
2241                                         parse_error()
2242                         insert_html_element t
2243                         return
2244                 if t.type is TYPE_START_TAG and (t.name is 'rp' or t.name is 'rt')
2245                         if is_in_scope 'ruby', NS_HTML
2246                                 generate_implied_end_tags 'rtc'
2247                                 unless (open_els[0].name is 'ruby' or open_els[0].name is 'rtc') and open_els[0].namespace is NS_HTML
2248                                         parse_error()
2249                         insert_html_element t
2250                         return
2251 # end WHATWG chunk
2252                 if t.type is TYPE_START_TAG and t.name is 'math'
2253                         reconstruct_afe()
2254                         adjust_mathml_attributes t
2255                         adjust_foreign_attributes t
2256                         insert_foreign_element t, NS_MATHML
2257                         if t.flag 'self-closing'
2258                                 open_els.shift()
2259                                 t.acknowledge_self_closing()
2260                         return
2261                 if t.type is TYPE_START_TAG and t.name is 'svg'
2262                         reconstruct_afe()
2263                         adjust_svg_attributes t
2264                         adjust_foreign_attributes t
2265                         insert_foreign_element t, NS_SVG
2266                         if t.flag 'self-closing'
2267                                 open_els.shift()
2268                                 t.acknowledge_self_closing()
2269                         return
2270                 if t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'frame' or t.name is 'head' or t.name is 'tbody' or t.name is 'td' or t.name is 'tfoot' or t.name is 'th' or t.name is 'thead' or t.name is 'tr')
2271                         parse_error()
2272                         return
2273                 if t.type is TYPE_START_TAG # any other start tag
2274                         reconstruct_afe()
2275                         insert_html_element t
2276                         return
2277                 if t.type is TYPE_END_TAG # any other end tag
2278                         in_body_any_other_end_tag t.name
2279                         return
2280                 return
2281
2282         # 8.2.5.4.8 http://www.w3.org/TR/html5/syntax.html#parsing-main-incdata
2283         ins_mode_text = (t) ->
2284                 if t.type is TYPE_TEXT
2285                         insert_character t
2286                         return
2287                 if t.type is TYPE_EOF
2288                         parse_error()
2289                         if open_els[0].name is 'script' and open_els[0].namespace is NS_HTML
2290                                 open_els[0].flag 'already started', true
2291                         open_els.shift()
2292                         ins_mode = original_ins_mode
2293                         process_token t
2294                         return
2295                 if t.type is TYPE_END_TAG and t.name is 'script'
2296                         open_els.shift()
2297                         ins_mode = original_ins_mode
2298                         # fixfull the spec seems to assume that I'm going to run the script
2299                         # http://www.w3.org/TR/html5/syntax.html#scriptEndTag
2300                         return
2301                 if t.type is TYPE_END_TAG
2302                         open_els.shift()
2303                         ins_mode = original_ins_mode
2304                         return
2305                 return
2306
2307         # the functions below implement the tokenizer stats described here:
2308         # http://www.w3.org/TR/html5/syntax.html#tokenization
2309
2310         # 8.2.5.4.9 http://www.w3.org/TR/html5/syntax.html#parsing-main-intable
2311         ins_mode_in_table_else = (t) ->
2312                 parse_error()
2313                 flag_foster_parenting = true
2314                 ins_mode_in_body t
2315                 flag_foster_parenting = false
2316                 return
2317         ins_mode_in_table = (t) ->
2318                 switch t.type
2319                         when TYPE_TEXT
2320                                 if (open_els[0].name is 'table' or open_els[0].name is 'tbody' or open_els[0].name is 'tfoot' or open_els[0].name is 'thead' or open_els[0].name is 'tr') and open_els[0].namespace is NS_HTML
2321                                         pending_table_character_tokens = []
2322                                         original_ins_mode = ins_mode
2323                                         ins_mode = ins_mode_in_table_text
2324                                         process_token t
2325                                 else
2326                                         ins_mode_in_table_else t
2327                         when TYPE_COMMENT
2328                                 insert_comment t
2329                         when TYPE_DOCTYPE
2330                                 parse_error()
2331                         when TYPE_START_TAG
2332                                 switch t.name
2333                                         when 'caption'
2334                                                 clear_stack_to_table_context()
2335                                                 afe_push_marker()
2336                                                 insert_html_element t
2337                                                 ins_mode = ins_mode_in_caption
2338                                         when 'colgroup'
2339                                                 clear_stack_to_table_context()
2340                                                 insert_html_element t
2341                                                 ins_mode = ins_mode_in_column_group
2342                                         when 'col'
2343                                                 clear_stack_to_table_context()
2344                                                 insert_html_element new_open_tag 'colgroup'
2345                                                 ins_mode = ins_mode_in_column_group
2346                                                 process_token t
2347                                         when 'tbody', 'tfoot', 'thead'
2348                                                 clear_stack_to_table_context()
2349                                                 insert_html_element t
2350                                                 ins_mode = ins_mode_in_table_body
2351                                         when 'td', 'th', 'tr'
2352                                                 clear_stack_to_table_context()
2353                                                 insert_html_element new_open_tag 'tbody'
2354                                                 ins_mode = ins_mode_in_table_body
2355                                                 process_token t
2356                                         when 'table'
2357                                                 parse_error()
2358                                                 if is_in_table_scope 'table', NS_HTML
2359                                                         loop
2360                                                                 el = open_els.shift()
2361                                                                 if el.name is 'table' and el.namespace is NS_HTML
2362                                                                         break
2363                                                         reset_ins_mode()
2364                                                         process_token t
2365                                         when 'style', 'script', 'template'
2366                                                 ins_mode_in_head t
2367                                         when 'input'
2368                                                 unless is_input_hidden_tok t
2369                                                         ins_mode_in_table_else t
2370                                                 else
2371                                                         parse_error()
2372                                                         el = insert_html_element t
2373                                                         open_els.shift()
2374                                                         t.acknowledge_self_closing()
2375                                         when 'form'
2376                                                 parse_error()
2377                                                 if form_element_pointer?
2378                                                         return
2379                                                 if template_tag_is_open()
2380                                                         return
2381                                                 form_element_pointer = insert_html_element t
2382                                                 open_els.shift()
2383                                         else
2384                                                 ins_mode_in_table_else t
2385                         when TYPE_END_TAG
2386                                 switch t.name
2387                                         when 'table'
2388                                                 if is_in_table_scope 'table', NS_HTML
2389                                                         loop
2390                                                                 el = open_els.shift()
2391                                                                 if el.name is 'table' and el.namespace is NS_HTML
2392                                                                         break
2393                                                         reset_ins_mode()
2394                                                 else
2395                                                         parse_error()
2396                                         when 'body', 'caption', 'col', 'colgroup', 'html', 'tbody', 'td', 'tfoot', 'th', 'thead', 'tr'
2397                                                 parse_error()
2398                                         when 'template'
2399                                                 ins_mode_in_head t
2400                                         else
2401                                                 ins_mode_in_table_else t
2402                         when TYPE_EOF
2403                                 ins_mode_in_body t
2404                         else
2405                                 ins_mode_in_table_else t
2406                 return
2407
2408
2409         # 8.2.5.4.10 http://www.w3.org/TR/html5/syntax.html#parsing-main-intabletext
2410         ins_mode_in_table_text = (t) ->
2411                 if t.type is TYPE_TEXT and t.text is "\u0000"
2412                         # from javascript?
2413                         parse_error()
2414                         return
2415                 if t.type is TYPE_TEXT
2416                         pending_table_character_tokens.push t
2417                         return
2418                 # Anything else
2419                 all_space = true
2420                 for old in pending_table_character_tokens
2421                         unless is_space_tok old
2422                                 all_space = false
2423                                 break
2424                 if all_space
2425                         for old in pending_table_character_tokens
2426                                 insert_character old
2427                 else
2428                         for old in pending_table_character_tokens
2429                                 ins_mode_in_table_else old
2430                 pending_table_character_tokens = []
2431                 ins_mode = original_ins_mode
2432                 process_token t
2433                 return
2434
2435         # 8.2.5.4.11 http://www.w3.org/TR/html5/syntax.html#parsing-main-incaption
2436         ins_mode_in_caption = (t) ->
2437                 if t.type is TYPE_END_TAG and t.name is 'caption'
2438                         if is_in_table_scope 'caption', NS_HTML
2439                                 generate_implied_end_tags()
2440                                 if open_els[0].name isnt 'caption'
2441                                         parse_error()
2442                                 loop
2443                                         el = open_els.shift()
2444                                         if el.name is 'caption' and el.namespace is NS_HTML
2445                                                 break
2446                                 clear_afe_to_marker()
2447                                 ins_mode = ins_mode_in_table
2448                         else
2449                                 parse_error()
2450                                 # fragment case
2451                         return
2452                 if (t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'tbody' or t.name is 'td' or t.name is 'tfoot' or t.name is 'th' or t.name is 'thead' or t.name is 'tr')) or t.type is TYPE_END_TAG and t.name is 'table'
2453                         parse_error()
2454                         if is_in_table_scope 'caption', NS_HTML
2455                                 loop
2456                                         el = open_els.shift()
2457                                         if el.name is 'caption' and el.namespace is NS_HTML
2458                                                 break
2459                                 clear_afe_to_marker()
2460                                 ins_mode = ins_mode_in_table
2461                                 process_token t
2462                         # else fragment case
2463                         return
2464                 if t.type is TYPE_END_TAG and (t.name is 'body' or t.name is 'col' or t.name is 'colgroup' or t.name is 'html' or t.name is 'tbody' or t.name is 'td' or t.name is 'tfoot' or t.name is 'th' or t.name is 'thead' or t.name is 'tr')
2465                         parse_error()
2466                         return
2467                 # Anything else
2468                 ins_mode_in_body t
2469                 return
2470
2471         # 8.2.5.4.12 http://www.w3.org/TR/html5/syntax.html#parsing-main-incolgroup
2472         ins_mode_in_column_group = (t) ->
2473                 if is_space_tok t
2474                         insert_character t
2475                         return
2476                 if t.type is TYPE_COMMENT
2477                         insert_comment t
2478                         return
2479                 if t.type is TYPE_DOCTYPE
2480                         parse_error()
2481                         return
2482                 if t.type is TYPE_START_TAG and t.name is 'html'
2483                         ins_mode_in_body t
2484                         return
2485                 if t.type is TYPE_START_TAG and t.name is 'col'
2486                         el = insert_html_element t
2487                         open_els.shift()
2488                         t.acknowledge_self_closing()
2489                         return
2490                 if t.type is TYPE_END_TAG and t.name is 'colgroup'
2491                         if open_els[0].name is 'colgroup' and open_els.namespace is NS_HTML
2492                                 open_els.shift()
2493                                 ins_mode = ins_mode_in_table
2494                         else
2495                                 parse_error()
2496                         return
2497                 if t.type is TYPE_END_TAG and t.name is 'col'
2498                         parse_error()
2499                         return
2500                 if (t.type is TYPE_START_TAG or t.type is TYPE_END_TAG) and t.name is 'template'
2501                         ins_mode_in_head t
2502                         return
2503                 if t.type is TYPE_EOF
2504                         ins_mode_in_body t
2505                         return
2506                 # Anything else
2507                 if open_els[0].name isnt 'colgroup'
2508                         parse_error()
2509                         return
2510                 open_els.shift()
2511                 ins_mode = ins_mode_in_table
2512                 process_token t
2513                 return
2514
2515         # 8.2.5.4.13 http://www.w3.org/TR/html5/syntax.html#parsing-main-intbody
2516         ins_mode_in_table_body = (t) ->
2517                 if t.type is TYPE_START_TAG and t.name is 'tr'
2518                         clear_stack_to_table_body_context()
2519                         insert_html_element t
2520                         ins_mode = ins_mode_in_row
2521                         return
2522                 if t.type is TYPE_START_TAG and (t.name is 'th' or t.name is 'td')
2523                         parse_error()
2524                         clear_stack_to_table_body_context()
2525                         insert_html_element new_open_tag 'tr'
2526                         ins_mode = ins_mode_in_row
2527                         process_token t
2528                         return
2529                 if t.type is TYPE_END_TAG and (t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead')
2530                         unless is_in_table_scope t.name, NS_HTML
2531                                 parse_error()
2532                                 return
2533                         clear_stack_to_table_body_context()
2534                         open_els.shift()
2535                         ins_mode = ins_mode_in_table
2536                         return
2537                 if (t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead')) or (t.type is TYPE_END_TAG and t.name is 'table')
2538                         has = false
2539                         for el in open_els
2540                                 if el.namespace is NS_HTML and (el.name is 'tbody' or el.name is 'tfoot' or el.name is 'thead')
2541                                         has = true
2542                                         break
2543                                 if table_scopers[el.name] is el.namespace
2544                                         break
2545                         if !has
2546                                 parse_error()
2547                                 return
2548                         clear_stack_to_table_body_context()
2549                         open_els.shift()
2550                         ins_mode = ins_mode_in_table
2551                         process_token t
2552                         return
2553                 if t.type is TYPE_END_TAG and (t.name is 'body' or t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'html' or t.name is 'td' or t.name is 'th' or t.name is 'tr')
2554                         parse_error()
2555                         return
2556                 # Anything else
2557                 ins_mode_in_table t
2558                 return
2559
2560         # 8.2.5.4.14 http://www.w3.org/TR/html5/syntax.html#parsing-main-intr
2561         ins_mode_in_row = (t) ->
2562                 if t.type is TYPE_START_TAG and (t.name is 'th' or t.name is 'td')
2563                         clear_stack_to_table_row_context()
2564                         insert_html_element t
2565                         ins_mode = ins_mode_in_cell
2566                         afe_push_marker()
2567                         return
2568                 if t.type is TYPE_END_TAG and t.name is 'tr'
2569                         if is_in_table_scope 'tr', NS_HTML
2570                                 clear_stack_to_table_row_context()
2571                                 open_els.shift()
2572                                 ins_mode = ins_mode_in_table_body
2573                         else
2574                                 parse_error()
2575                         return
2576                 if (t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead' or t.name is 'tr')) or t.type is TYPE_END_TAG and t.name is 'table'
2577                         if is_in_table_scope 'tr', NS_HTML
2578                                 clear_stack_to_table_row_context()
2579                                 open_els.shift()
2580                                 ins_mode = ins_mode_in_table_body
2581                                 process_token t
2582                         else
2583                                 parse_error()
2584                         return
2585                 if t.type is TYPE_END_TAG and (t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead')
2586                         if is_in_table_scope t.name, NS_HTML
2587                                 if is_in_table_scope 'tr', NS_HTML
2588                                         clear_stack_to_table_row_context()
2589                                         open_els.shift()
2590                                         ins_mode = ins_mode_in_table_body
2591                                         process_token t
2592                         else
2593                                 parse_error()
2594                         return
2595                 if t.type is TYPE_END_TAG and (t.name is 'body' or t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'html' or t.name is 'td' or t.name is 'th')
2596                         parse_error()
2597                         return
2598                 # Anything else
2599                 ins_mode_in_table t
2600                 return
2601
2602         # http://www.w3.org/TR/html5/syntax.html#close-the-cell
2603         close_the_cell = ->
2604                 generate_implied_end_tags()
2605                 unless (open_els[0].name is 'td' or open_els[0] is 'th') and open_els[0].namespace is NS_HTML
2606                         parse_error()
2607                 loop
2608                         el = open_els.shift()
2609                         if el.namespace is NS_HTML and (el.name is 'td' or el.name is 'th')
2610                                 break
2611                 clear_afe_to_marker()
2612                 ins_mode = ins_mode_in_row
2613                 return
2614
2615         # 8.2.5.4.15 http://www.w3.org/TR/html5/syntax.html#parsing-main-intd
2616         ins_mode_in_cell = (t) ->
2617                 if t.type is TYPE_END_TAG and (t.name is 'td' or t.name is 'th')
2618                         if is_in_table_scope t.name, NS_HTML
2619                                 generate_implied_end_tags()
2620                                 unless (open_els[0].name is t.name) and open_els[0].namespace is NS_HTML
2621                                         parse_error()
2622                                 loop
2623                                         el = open_els.shift()
2624                                         if el.name is t.name and el.namespace is NS_HTML
2625                                                 break
2626                                 clear_afe_to_marker()
2627                                 ins_mode = ins_mode_in_row
2628                         else
2629                                 parse_error()
2630                         return
2631                 if t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'tbody' or t.name is 'td' or t.name is 'tfoot' or t.name is 'th' or t.name is 'thead' or t.name is 'tr')
2632                         has = false
2633                         for el in open_els
2634                                 if el.namespace is NS_HTML and (el.name is 'td' or el.name is 'th')
2635                                         has = true
2636                                         break
2637                                 if table_scopers[el.name] is el.namespace
2638                                         break
2639                         if !has
2640                                 parse_error()
2641                                 return
2642                         close_the_cell()
2643                         process_token t
2644                         return
2645                 if t.type is TYPE_END_TAG and (t.name is 'body' or t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'html')
2646                         parse_error()
2647                         return
2648                 if t.type is TYPE_END_TAG and (t.name is 'table' or t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead' or t.name is 'tr')
2649                         if is_in_table_scope t.name, NS_HTML
2650                                 close_the_cell()
2651                                 process_token t
2652                         else
2653                                 parse_error()
2654                         return
2655                 # Anything Else
2656                 ins_mode_in_body t
2657                 return
2658
2659         # 8.2.5.4.16 http://www.w3.org/TR/html5/syntax.html#parsing-main-inselect
2660         ins_mode_in_select = (t) ->
2661                 if t.type is TYPE_TEXT and t.text is "\u0000"
2662                         parse_error()
2663                         return
2664                 if t.type is TYPE_TEXT
2665                         insert_character t
2666                         return
2667                 if t.type is TYPE_COMMENT
2668                         insert_comment t
2669                         return
2670                 if t.type is TYPE_DOCTYPE
2671                         parse_error()
2672                         return
2673                 if t.type is TYPE_START_TAG and t.name is 'html'
2674                         ins_mode_in_body t
2675                         return
2676                 if t.type is TYPE_START_TAG and t.name is 'option'
2677                         if open_els[0].name is 'option' and open_els[0].namespace is NS_HTML
2678                                 open_els.shift()
2679                         insert_html_element t
2680                         return
2681                 if t.type is TYPE_START_TAG and t.name is 'optgroup'
2682                         if open_els[0].name is 'option' and open_els[0].namespace is NS_HTML
2683                                 open_els.shift()
2684                         if open_els[0].name is 'optgroup' and open_els[0].namespace is NS_HTML
2685                                 open_els.shift()
2686                         insert_html_element t
2687                         return
2688                 if t.type is TYPE_END_TAG and t.name is 'optgroup'
2689                         if open_els[0].name is 'option' and open_els[0].namespace is NS_HTML
2690                                 if open_els[1].name is 'optgroup' and open_els[0].namespace is NS_HTML
2691                                         open_els.shift()
2692                         if open_els[0].name is 'optgroup' and open_els[0].namespace is NS_HTML
2693                                 open_els.shift()
2694                         else
2695                                 parse_error()
2696                         return
2697                 if t.type is TYPE_END_TAG and t.name is 'option'
2698                         if open_els[0].name is 'option' and open_els[0].namespace is NS_HTML
2699                                 open_els.shift()
2700                         else
2701                                 parse_error()
2702                         return
2703                 if t.type is TYPE_END_TAG and t.name is 'select'
2704                         if is_in_select_scope 'select', NS_HTML
2705                                 loop
2706                                         el = open_els.shift()
2707                                         if el.name is 'select' and el.namespace is NS_HTML
2708                                                 break
2709                                 reset_ins_mode()
2710                         else
2711                                 parse_error()
2712                         return
2713                 if t.type is TYPE_START_TAG and t.name is 'select'
2714                         parse_error()
2715                         loop
2716                                 el = open_els.shift()
2717                                 if el.name is 'select' and el.namespace is NS_HTML
2718                                         break
2719                         reset_ins_mode()
2720                         # spec says that this is the same as </select> but it doesn't say
2721                         # to check scope first
2722                         return
2723                 if t.type is TYPE_START_TAG and (t.name is 'input' or t.name is 'keygen' or t.name is 'textarea')
2724                         parse_error()
2725                         unless is_in_select_scope 'select', NS_HTML
2726                                 return
2727                         loop
2728                                 el = open_els.shift()
2729                                 if el.name is 'select' and el.namespace is NS_HTML
2730                                         break
2731                         reset_ins_mode()
2732                         process_token t
2733                         return
2734                 if t.type is TYPE_START_TAG and (t.name is 'script' or t.name is 'template')
2735                         ins_mode_in_head t
2736                         return
2737                 if t.type is TYPE_EOF
2738                         ins_mode_in_body t
2739                         return
2740                 # Anything else
2741                 parse_error()
2742                 return
2743
2744         # 8.2.5.4.17 http://www.w3.org/TR/html5/syntax.html#parsing-main-inselectintable
2745         ins_mode_in_select_in_table = (t) ->
2746                 if t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'table' or t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead' or t.name is 'tr' or t.name is 'td' or t.name is 'th')
2747                         parse_error()
2748                         loop
2749                                 el = open_els.shift()
2750                                 if el.name is 'select' and el.namespace is NS_HTML
2751                                         break
2752                         reset_ins_mode()
2753                         process_token t
2754                         return
2755                 if t.type is TYPE_END_TAG and (t.name is 'caption' or t.name is 'table' or t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead' or t.name is 'tr' or t.name is 'td' or t.name is 'th')
2756                         parse_error()
2757                         unless is_in_table_scope t.name, NS_HTML
2758                                 return
2759                         loop
2760                                 el = open_els.shift()
2761                                 if el.name is 'select' and el.namespace is NS_HTML
2762                                         break
2763                         reset_ins_mode()
2764                         process_token t
2765                         return
2766                 # Anything else
2767                 ins_mode_in_select t
2768                 return
2769
2770         # 8.2.5.4.18 http://www.w3.org/TR/html5/syntax.html#parsing-main-intemplate
2771         ins_mode_in_template = (t) ->
2772                 if t.type is TYPE_TEXT or t.type is TYPE_COMMENT or t.type is TYPE_DOCTYPE
2773                         ins_mode_in_body t
2774                         return
2775                 if (t.type is TYPE_START_TAG and (t.name is 'base' or t.name is 'basefont' or t.name is 'bgsound' or t.name is 'link' or t.name is 'meta' or t.name is 'noframes' or t.name is 'script' or t.name is 'style' or t.name is 'template' or t.name is 'title')) or (t.type is TYPE_END_TAG and t.name is 'template')
2776                         ins_mode_in_head t
2777                         return
2778                 if t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'colgroup' or t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead')
2779                         template_ins_modes.shift()
2780                         template_ins_modes.unshift ins_mode_in_table
2781                         ins_mode = ins_mode_in_table
2782                         process_token t
2783                         return
2784                 if t.type is TYPE_START_TAG and t.name is 'col'
2785                         template_ins_modes.shift()
2786                         template_ins_modes.unshift ins_mode_in_column_group
2787                         ins_mode = ins_mode_in_column_group
2788                         process_token t
2789                         return
2790                 if t.type is TYPE_START_TAG and t.name is 'tr'
2791                         template_ins_modes.shift()
2792                         template_ins_modes.unshift ins_mode_in_table_body
2793                         ins_mode = ins_mode_in_table_body
2794                         process_token t
2795                         return
2796                 if t.type is TYPE_START_TAG and (t.name is 'td' or t.name is 'th')
2797                         template_ins_modes.shift()
2798                         template_ins_modes.unshift ins_mode_in_row
2799                         ins_mode = ins_mode_in_row
2800                         process_token t
2801                         return
2802                 if t.type is TYPE_START_TAG
2803                         template_ins_modes.shift()
2804                         template_ins_modes.unshift ins_mode_in_body
2805                         ins_mode = ins_mode_in_body
2806                         process_token t
2807                         return
2808                 if t.type is TYPE_END_TAG
2809                         parse_error()
2810                         return
2811                 if t.type is TYPE_EOF
2812                         unless template_tag_is_open()
2813                                 stop_parsing()
2814                                 return
2815                         parse_error()
2816                         loop
2817                                 el = open_els.shift()
2818                                 if el.name is 'template' and el.namespace is NS_HTML
2819                                         break
2820                         clear_afe_to_marker()
2821                         template_ins_modes.shift()
2822                         reset_ins_mode()
2823                         process_token t
2824                 return
2825
2826         # 8.2.5.4.19 http://www.w3.org/TR/html5/syntax.html#parsing-main-afterbody
2827         ins_mode_after_body = (t) ->
2828                 if is_space_tok t
2829                         ins_mode_in_body t
2830                         return
2831                 if t.type is TYPE_COMMENT
2832                         first = open_els[open_els.length - 1]
2833                         insert_comment t, [first, first.children.length]
2834                         return
2835                 if t.type is TYPE_DOCTYPE
2836                         parse_error()
2837                         return
2838                 if t.type is TYPE_START_TAG and t.name is 'html'
2839                         ins_mode_in_body t
2840                         return
2841                 if t.type is TYPE_END_TAG and t.name is 'html'
2842                         if flag_fragment_parsing
2843                                 parse_error()
2844                                 return
2845                         ins_mode = ins_mode_after_after_body
2846                         return
2847                 if t.type is TYPE_EOF
2848                         stop_parsing()
2849                         return
2850                 # Anything ELse
2851                 parse_error()
2852                 ins_mode = ins_mode_in_body
2853                 process_token t
2854                 return
2855
2856         # 8.2.5.4.20 http://www.w3.org/TR/html5/syntax.html#parsing-main-inframeset
2857         ins_mode_in_frameset = (t) ->
2858                 if is_space_tok t
2859                         insert_character t
2860                         return
2861                 if t.type is TYPE_COMMENT
2862                         insert_comment t
2863                         return
2864                 if t.type is TYPE_DOCTYPE
2865                         parse_error()
2866                         return
2867                 if t.type is TYPE_START_TAG and t.name is 'html'
2868                         ins_mode_in_body t
2869                         return
2870                 if t.type is TYPE_START_TAG and t.name is 'frameset'
2871                         insert_html_element t
2872                         return
2873                 if t.type is TYPE_END_TAG and t.name is 'frameset'
2874                         if open_els.length is 1
2875                                 parse_error()
2876                                 return # fragment case
2877                         open_els.shift()
2878                         if flag_fragment_parsing is false and open_els[0].name isnt 'frameset'
2879                                 ins_mode = ins_mode_after_frameset
2880                         return
2881                 if t.type is TYPE_START_TAG and t.name is 'frame'
2882                         insert_html_element t
2883                         open_els.shift()
2884                         t.acknowledge_self_closing()
2885                         return
2886                 if t.type is TYPE_START_TAG and t.name is 'noframes'
2887                         ins_mode_in_head t
2888                         return
2889                 if t.type is TYPE_EOF
2890                         if open_els.length isnt 1
2891                                 parse_error()
2892                         stop_parsing()
2893                         return
2894                 # Anything else
2895                 parse_error()
2896                 return
2897
2898         # 8.2.5.4.21 http://www.w3.org/TR/html5/syntax.html#parsing-main-afterframeset
2899         ins_mode_after_frameset = (t) ->
2900                 if is_space_tok t
2901                         insert_character t
2902                         return
2903                 if t.type is TYPE_COMMENT
2904                         insert_comment t
2905                         return
2906                 if t.type is TYPE_DOCTYPE
2907                         parse_error()
2908                         return
2909                 if t.type is TYPE_START_TAG and t.name is 'html'
2910                         ins_mode_in_body t
2911                         return
2912                 if t.type is TYPE_END_TAG and t.name is 'html'
2913                         ins_mode = ins_mode_after_after_frameset
2914                         return
2915                 if t.type is TYPE_START_TAG and t.name is 'noframes'
2916                         ins_mode_in_head t
2917                         return
2918                 if t.type is TYPE_EOF
2919                         stop_parsing()
2920                         return
2921                 # Anything else
2922                 parse_error()
2923                 return
2924
2925         # 8.2.5.4.22 http://www.w3.org/TR/html5/syntax.html#the-after-after-body-insertion-mode
2926         ins_mode_after_after_body = (t) ->
2927                 if t.type is TYPE_COMMENT
2928                         insert_comment t, [doc, doc.children.length]
2929                         return
2930                 if t.type is TYPE_DOCTYPE or is_space_tok(t) or (t.type is TYPE_START_TAG and t.name is 'html')
2931                         ins_mode_in_body t
2932                         return
2933                 if t.type is TYPE_EOF
2934                         stop_parsing()
2935                         return
2936                 # Anything else
2937                 parse_error()
2938                 ins_mode = ins_mode_in_body
2939                 process_token t
2940                 return
2941
2942         # 8.2.5.4.23 http://www.w3.org/TR/html5/syntax.html#the-after-after-frameset-insertion-mode
2943         ins_mode_after_after_frameset = (t) ->
2944                 if t.type is TYPE_COMMENT
2945                         insert_comment t, [doc, doc.children.length]
2946                         return
2947                 if t.type is TYPE_DOCTYPE or is_space_tok(t) or (t.type is TYPE_START_TAG and t.name is 'html')
2948                         ins_mode_in_body t
2949                         return
2950                 if t.type is TYPE_EOF
2951                         stop_parsing()
2952                         return
2953                 if t.type is TYPE_START_TAG and t.name is 'noframes'
2954                         ins_mode_in_head t
2955                         return
2956                 # Anything else
2957                 parse_error()
2958                 return
2959
2960         # 8.2.5.5 http://www.w3.org/TR/html5/syntax.html#parsing-main-inforeign
2961         has_color_face_or_size = (t) ->
2962                 for a in t.attrs_a
2963                         if a[0] is 'color' or a[0] is 'face' or a[0] is 'size'
2964                                 return true
2965                 return false
2966         in_foreign_content_end_script = ->
2967                 open_els.shift()
2968                 # fixfull
2969                 return
2970         in_foreign_content_other_start = (t) ->
2971                 acn = adjusted_current_node()
2972                 if acn.namespace is NS_MATHML
2973                         adjust_mathml_attributes t
2974                 if acn.namespace is NS_SVG and svg_name_fixes[t.name]?
2975                         t.name = svg_name_fixes[t.name]
2976                 if acn.namespace is NS_SVG
2977                         adjust_svg_attributes t
2978                 adjust_foreign_attributes t
2979                 insert_foreign_element t, acn.namespace
2980                 if t.flag 'self-closing'
2981                         if t.name is 'script'
2982                                 t.acknowledge_self_closing()
2983                                 in_foreign_content_end_script()
2984                                 # fixfull
2985                         else
2986                                 open_els.shift()
2987                                 t.acknowledge_self_closing()
2988                 return
2989         in_foreign_content = (t) ->
2990                 if t.type is TYPE_TEXT and t.text is "\u0000"
2991                         parse_error()
2992                         insert_character new_character_token "\ufffd"
2993                         return
2994                 if is_space_tok t
2995                         insert_character t
2996                         return
2997                 if t.type is TYPE_TEXT
2998                         flag_frameset_ok = false
2999                         insert_character t
3000                         return
3001                 if t.type is TYPE_COMMENT
3002                         insert_comment t
3003                         return
3004                 if t.type is TYPE_DOCTYPE
3005                         parse_error()
3006                         return
3007                 if t.type is TYPE_START_TAG and (t.name is 'b' or t.name is 'big' or t.name is 'blockquote' or t.name is 'body' or t.name is 'br' or t.name is 'center' or t.name is 'code' or t.name is 'dd' or t.name is 'div' or t.name is 'dl' or t.name is 'dt' or t.name is 'em' or t.name is 'embed' or t.name is 'h1' or t.name is 'h2' or t.name is 'h3' or t.name is 'h4' or t.name is 'h5' or t.name is 'h6' or t.name is 'head' or t.name is 'hr' or t.name is 'i' or t.name is 'img' or t.name is 'li' or t.name is 'listing' or t.name is 'main' or t.name is 'meta' or t.name is 'nobr' or t.name is 'ol' or t.name is 'p' or t.name is 'pre' or t.name is 'ruby' or t.name is 's' or t.name is 'small' or t.name is 'span' or t.name is 'strong' or t.name is 'strike' or t.name is 'sub' or t.name is 'sup' or t.name is 'table' or t.name is 'tt' or t.name is 'u' or t.name is 'ul' or t.name is 'var' or (t.name is 'font' and has_color_face_or_size(t)))
3008                         parse_error()
3009                         if flag_fragment_parsing
3010                                 in_foreign_content_other_start t
3011                                 return
3012                         loop # is this safe?
3013                                 open_els.shift()
3014                                 if is_mathml_text_integration_point(open_els[0]) or is_html_integration(open_els[0]) or open_els[0].namespace is NS_HTML
3015                                         break
3016                         process_token t
3017                         return
3018                 if t.type is TYPE_START_TAG
3019                         in_foreign_content_other_start t
3020                         return
3021                 if t.type is TYPE_END_TAG and t.name is 'script' and open_els[0].name is 'script' and open_els[0].namespace is NS_SVG
3022                         in_foreign_content_end_script()
3023                         return
3024                 if t.type is TYPE_END_TAG
3025                         i = 0
3026                         node = open_els[i]
3027                         if node.name.toLowerCase() isnt t.name
3028                                 parse_error()
3029                         loop
3030                                 if node is open_els[open_els.length - 1]
3031                                         return
3032                                 if node.name.toLowerCase() is t.name
3033                                         loop
3034                                                 el = open_els.shift()
3035                                                 if el is node
3036                                                         return
3037                                 i += 1
3038                                 node = open_els[i]
3039                                 if node.namespace is NS_HTML
3040                                         break
3041                         ins_mode t # explicitly call HTML insertion mode
3042                 return
3043
3044
3045         # 8.2.4.1 http://www.w3.org/TR/html5/syntax.html#data-state
3046         tok_state_data = ->
3047                 switch c = txt.charAt(cur++)
3048                         when '&'
3049                                 return new_text_node parse_character_reference()
3050                         when '<'
3051                                 tok_state = tok_state_tag_open
3052                         when "\u0000"
3053                                 parse_error()
3054                                 return new_text_node c
3055                         when '' # EOF
3056                                 return new_eof_token()
3057                         else
3058                                 return new_text_node c
3059                 return null
3060
3061         # 8.2.4.2 http://www.w3.org/TR/html5/syntax.html#character-reference-in-data-state
3062         # not needed: tok_state_character_reference_in_data = ->
3063         # just call parse_character_reference()
3064
3065         # 8.2.4.3 http://www.w3.org/TR/html5/syntax.html#rcdata-state
3066         tok_state_rcdata = ->
3067                 switch c = txt.charAt(cur++)
3068                         when '&'
3069                                 return new_text_node parse_character_reference()
3070                         when '<'
3071                                 tok_state = tok_state_rcdata_less_than_sign
3072                         when "\u0000"
3073                                 parse_error()
3074                                 return new_character_token "\ufffd"
3075                         when '' # EOF
3076                                 return new_eof_token()
3077                         else
3078                                 return new_character_token c
3079                 return null
3080
3081         # 8.2.4.4 http://www.w3.org/TR/html5/syntax.html#character-reference-in-rcdata-state
3082         # not needed: tok_state_character_reference_in_rcdata = ->
3083         # just call parse_character_reference()
3084
3085         # 8.2.4.5 http://www.w3.org/TR/html5/syntax.html#rawtext-state
3086         tok_state_rawtext = ->
3087                 switch c = txt.charAt(cur++)
3088                         when '<'
3089                                 tok_state = tok_state_rawtext_less_than_sign
3090                         when "\u0000"
3091                                 parse_error()
3092                                 return new_character_token "\ufffd"
3093                         when '' # EOF
3094                                 return new_eof_token()
3095                         else
3096                                 return new_character_token c
3097                 return null
3098
3099         # 8.2.4.6 http://www.w3.org/TR/html5/syntax.html#script-data-state
3100         tok_state_script_data = ->
3101                 switch c = txt.charAt(cur++)
3102                         when '<'
3103                                 tok_state = tok_state_script_data_less_than_sign
3104                         when "\u0000"
3105                                 parse_error()
3106                                 return new_character_token "\ufffd"
3107                         when '' # EOF
3108                                 return new_eof_token()
3109                         else
3110                                 return new_character_token c
3111                 return null
3112
3113         # 8.2.4.7 http://www.w3.org/TR/html5/syntax.html#plaintext-state
3114         tok_state_plaintext = ->
3115                 switch c = txt.charAt(cur++)
3116                         when "\u0000"
3117                                 parse_error()
3118                                 return new_character_token "\ufffd"
3119                         when '' # EOF
3120                                 return new_eof_token()
3121                         else
3122                                 return new_character_token c
3123                 return null
3124
3125
3126         # 8.2.4.8 http://www.w3.org/TR/html5/syntax.html#tag-open-state
3127         tok_state_tag_open = ->
3128                 c = txt.charAt(cur++)
3129                 if c is '!'
3130                         tok_state = tok_state_markup_declaration_open
3131                         return
3132                 if c is '/'
3133                         tok_state = tok_state_end_tag_open
3134                         return
3135                 if is_uc_alpha(c)
3136                         tok_cur_tag = new_open_tag c.toLowerCase()
3137                         tok_state = tok_state_tag_name
3138                         return
3139                 if is_lc_alpha(c)
3140                         tok_cur_tag = new_open_tag c
3141                         tok_state = tok_state_tag_name
3142                         return
3143                 if c is '?'
3144                         parse_error()
3145                         tok_cur_tag = new_comment_token '?' # FIXME right?
3146                         tok_state = tok_state_bogus_comment
3147                         return
3148                 # Anything else
3149                 parse_error()
3150                 tok_state = tok_state_data
3151                 cur -= 1 # we didn't parse/handle the char after <
3152                 return new_text_node '<'
3153
3154         # 8.2.4.9 http://www.w3.org/TR/html5/syntax.html#end-tag-open-state
3155         tok_state_end_tag_open = ->
3156                 c = txt.charAt(cur++)
3157                 if is_uc_alpha(c)
3158                         tok_cur_tag = new_end_tag c.toLowerCase()
3159                         tok_state = tok_state_tag_name
3160                         return
3161                 if is_lc_alpha(c)
3162                         tok_cur_tag = new_end_tag c
3163                         tok_state = tok_state_tag_name
3164                         return
3165                 if c is '>'
3166                         parse_error()
3167                         tok_state = tok_state_data
3168                         return
3169                 if c is '' # EOF
3170                         parse_error()
3171                         tok_state = tok_state_data
3172                         return new_text_node '</'
3173                 # Anything else
3174                 parse_error()
3175                 tok_cur_tag = new_comment_token c
3176                 tok_state = tok_state_bogus_comment
3177                 return null
3178
3179         # 8.2.4.10 http://www.w3.org/TR/html5/syntax.html#tag-name-state
3180         tok_state_tag_name = ->
3181                 switch c = txt.charAt(cur++)
3182                         when "\t", "\n", "\u000c", ' '
3183                                 tok_state = tok_state_before_attribute_name
3184                         when '/'
3185                                 tok_state = tok_state_self_closing_start_tag
3186                         when '>'
3187                                 tok_state = tok_state_data
3188                                 tmp = tok_cur_tag
3189                                 tok_cur_tag = null
3190                                 return tmp
3191                         when "\u0000"
3192                                 parse_error()
3193                                 tok_cur_tag.name += "\ufffd"
3194                         when '' # EOF
3195                                 parse_error()
3196                                 tok_state = tok_state_data
3197                         else
3198                                 if is_uc_alpha(c)
3199                                         tok_cur_tag.name += c.toLowerCase()
3200                                 else
3201                                         tok_cur_tag.name += c
3202                 return null
3203
3204         # 8.2.4.11 http://www.w3.org/TR/html5/syntax.html#rcdata-less-than-sign-state
3205         tok_state_rcdata_less_than_sign = ->
3206                 c = txt.charAt(cur++)
3207                 if c is '/'
3208                         temporary_buffer = ''
3209                         tok_state = tok_state_rcdata_end_tag_open
3210                         return null
3211                 # Anything else
3212                 tok_state = tok_state_rcdata
3213                 cur -= 1 # reconsume the input character
3214                 return new_character_token '<'
3215
3216         # 8.2.4.12 http://www.w3.org/TR/html5/syntax.html#rcdata-end-tag-open-state
3217         tok_state_rcdata_end_tag_open = ->
3218                 c = txt.charAt(cur++)
3219                 if is_uc_alpha(c)
3220                         tok_cur_tag = new_end_tag c.toLowerCase()
3221                         temporary_buffer += c
3222                         tok_state = tok_state_rcdata_end_tag_name
3223                         return null
3224                 if is_lc_alpha(c)
3225                         tok_cur_tag = new_end_tag c
3226                         temporary_buffer += c
3227                         tok_state = tok_state_rcdata_end_tag_name
3228                         return null
3229                 # Anything else
3230                 tok_state = tok_state_rcdata
3231                 cur -= 1 # reconsume the input character
3232                 return new_character_token "</" # fixfull separate these
3233
3234         # http://www.w3.org/TR/html5/syntax.html#appropriate-end-tag-token
3235         is_appropriate_end_tag = (t) ->
3236                 # fixfull: this assumes that open_els[0].name is "the tag name of the last
3237                 # start tag to have been emitted from this tokenizer"
3238                 return t.type is TYPE_END_TAG and t.name is open_els[0].name
3239
3240         # 8.2.4.13 http://www.w3.org/TR/html5/syntax.html#rcdata-end-tag-name-state
3241         tok_state_rcdata_end_tag_name = ->
3242                 c = txt.charAt(cur++)
3243                 if c is "\t" or c is "\n" or c is "\u000c" or c is ' '
3244                         if is_appropriate_end_tag tok_cur_tag
3245                                 tok_state = tok_state_before_attribute_name
3246                                 return
3247                         # else fall through to "Anything else"
3248                 if c is '/'
3249                         if is_appropriate_end_tag tok_cur_tag
3250                                 tok_state = tok_state_self_closing_start_tag # FIXME spec typo?
3251                                 return
3252                         # else fall through to "Anything else"
3253                 if c is '>'
3254                         if is_appropriate_end_tag tok_cur_tag
3255                                 tok_state = tok_state_data
3256                                 return tok_cur_tag
3257                         # else fall through to "Anything else"
3258                 if is_uc_alpha(c)
3259                         tok_cur_tag.name += c.toLowerCase()
3260                         temporary_buffer += c
3261                         return null
3262                 if is_lc_alpha(c)
3263                         tok_cur_tag.name += c
3264                         temporary_buffer += c
3265                         return null
3266                 # Anything else
3267                 tok_state = tok_state_rcdata
3268                 cur -= 1 # reconsume the input character
3269                 return new_character_token '</' + temporary_buffer # fixfull separate these
3270
3271         # 8.2.4.14 http://www.w3.org/TR/html5/syntax.html#rawtext-less-than-sign-state
3272         tok_state_rawtext_less_than_sign = ->
3273                 c = txt.charAt(cur++)
3274                 if c is '/'
3275                         temporary_buffer = ''
3276                         tok_state = tok_state_rawtext_end_tag_open
3277                         return null
3278                 # Anything else
3279                 tok_state = tok_state_rawtext
3280                 cur -= 1 # reconsume the input character
3281                 return new_character_token '<'
3282
3283         # 8.2.4.15 http://www.w3.org/TR/html5/syntax.html#rawtext-end-tag-open-state
3284         tok_state_rawtext_end_tag_open = ->
3285                 c = txt.charAt(cur++)
3286                 if is_uc_alpha(c)
3287                         tok_cur_tag = new_end_tag c.toLowerCase()
3288                         temporary_buffer += c
3289                         tok_state = tok_state_rawtext_end_tag_name
3290                         return null
3291                 if is_lc_alpha(c)
3292                         tok_cur_tag = new_end_tag c
3293                         temporary_buffer += c
3294                         tok_state = tok_state_rawtext_end_tag_name
3295                         return null
3296                 # Anything else
3297                 tok_state = tok_state_rawtext
3298                 cur -= 1 # reconsume the input character
3299                 return new_character_token "</" # fixfull separate these
3300
3301         # 8.2.4.16 http://www.w3.org/TR/html5/syntax.html#rawtext-end-tag-name-state
3302         tok_state_rawtext_end_tag_name = ->
3303                 c = txt.charAt(cur++)
3304                 if c is "\t" or c is "\n" or c is "\u000c" or c is ' '
3305                         if is_appropriate_end_tag tok_cur_tag
3306                                 tok_state = tok_state_before_attribute_name
3307                                 return
3308                         # else fall through to "Anything else"
3309                 if c is '/'
3310                         if is_appropriate_end_tag tok_cur_tag
3311                                 tok_state = tok_state_self_closing_start_tag
3312                                 return
3313                         # else fall through to "Anything else"
3314                 if c is '>'
3315                         if is_appropriate_end_tag tok_cur_tag
3316                                 tok_state = tok_state_data
3317                                 return tok_cur_tag
3318                         # else fall through to "Anything else"
3319                 if is_uc_alpha(c)
3320                         tok_cur_tag.name += c.toLowerCase()
3321                         temporary_buffer += c
3322                         return null
3323                 if is_lc_alpha(c)
3324                         tok_cur_tag.name += c
3325                         temporary_buffer += c
3326                         return null
3327                 # Anything else
3328                 tok_state = tok_state_rawtext
3329                 cur -= 1 # reconsume the input character
3330                 return new_character_token '</' + temporary_buffer # fixfull separate these
3331
3332         # 8.2.4.17 http://www.w3.org/TR/html5/syntax.html#script-data-less-than-sign-state
3333         tok_state_script_data_less_than_sign = ->
3334                 c = txt.charAt(cur++)
3335                 if c is '/'
3336                         temporary_buffer = ''
3337                         tok_state = tok_state_script_data_end_tag_open
3338                         return
3339                 if c is '!'
3340                         tok_state = tok_state_script_data_escape_start
3341                         return new_character_token '<!' # fixfull split
3342                 # Anything else
3343                 tok_state = tok_state_script_data
3344                 cur -= 1 # Reconsume
3345                 return new_character_token '<'
3346
3347         # 8.2.4.18 http://www.w3.org/TR/html5/syntax.html#script-data-end-tag-open-state
3348         tok_state_script_data_end_tag_open = ->
3349                 c = txt.charAt(cur++)
3350                 if is_uc_alpha(c)
3351                         tok_cur_tag = new_end_tag c.toLowerCase()
3352                         temporary_buffer += c
3353                         tok_state = tok_state_script_data_end_tag_name
3354                         return
3355                 if is_lc_alpha(c)
3356                         tok_cur_tag = new_end_tag c
3357                         temporary_buffer += c
3358                         tok_state = tok_state_script_data_end_tag_name
3359                         return
3360                 # Anything else
3361                 tok_state = tok_state_script_data
3362                 cur -= 1 # Reconsume
3363                 return new_character_token '</'
3364
3365         # 8.2.4.19 http://www.w3.org/TR/html5/syntax.html#script-data-end-tag-open-state
3366         tok_state_script_data_end_tag_name = ->
3367                 c = txt.charAt(cur++)
3368                 if c is "\t" or c is "\n" or c is "\u000c" or c is ' '
3369                         if is_appropriate_end_tag tok_cur_tag
3370                                 tok_state = tok_state_before_attribute_name
3371                                 return
3372                         # fall through
3373                 if c is '/'
3374                         if is_appropriate_end_tag tok_cur_tag
3375                                 tok_state = tok_state_self_closing_start_tag
3376                                 return
3377                         # fall through
3378                 if c is '>'
3379                         if is_appropriate_end_tag tok_cur_tag
3380                                 tok_state = tok_state_data
3381                                 return tok_cur_tag
3382                         # fall through
3383                 if is_uc_alpha(c)
3384                         tok_cur_tag.name += c.toLowerCase()
3385                         temporary_buffer += c
3386                         return
3387                 if is_lc_alpha(c)
3388                         tok_cur_tag.name += c
3389                         temporary_buffer += c
3390                         return
3391                 # Anything else
3392                 tok_state = tok_state_script_data
3393                 cur -= 1 # Reconsume
3394                 return new_character_token "</#{temporary_buffer}" # fixfull split
3395
3396         # 8.2.4.20 http://www.w3.org/TR/html5/syntax.html#script-data-escape-start-state
3397         tok_state_script_data_escape_start = ->
3398                 c = txt.charAt(cur++)
3399                 if c is '-'
3400                         tok_state = tok_state_script_data_escape_start_dash
3401                         return new_character_token '-'
3402                 # Anything else
3403                 tok_state = tok_state_script_data
3404                 cur -= 1 # Reconsume
3405                 return
3406
3407         # 8.2.4.21 http://www.w3.org/TR/html5/syntax.html#script-data-escape-start-dash-state
3408         tok_state_script_data_escape_start_dash = ->
3409                 c = txt.charAt(cur++)
3410                 if c is '-'
3411                         tok_state = tok_state_script_data_escaped_dash_dash
3412                         return new_character_token '-'
3413                 # Anything else
3414                 tok_state = tok_state_script_data
3415                 cur -= 1 # Reconsume
3416                 return
3417
3418         # 8.2.4.22 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-state
3419         tok_state_script_data_escaped = ->
3420                 c = txt.charAt(cur++)
3421                 if c is '-'
3422                         tok_state = tok_state_script_data_escaped_dash
3423                         return new_character_token '-'
3424                 if c is '<'
3425                         tok_state = tok_state_script_data_escaped_less_than_sign
3426                         return
3427                 if c is "\u0000"
3428                         parse_error()
3429                         return new_character_token "\ufffd"
3430                 if c is '' # EOF
3431                         tok_state = tok_state_data
3432                         parse_error()
3433                         cur -= 1 # Reconsume
3434                         return
3435                 # Anything else
3436                 return new_character_token c
3437
3438         # 8.2.4.23 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-dash-state
3439         tok_state_script_data_escaped_dash = ->
3440                 c = txt.charAt(cur++)
3441                 if c is '-'
3442                         tok_state = tok_state_script_data_escaped_dash_dash
3443                         return new_character_token '-'
3444                 if c is '<'
3445                         tok_state = tok_state_script_data_escaped_less_than_sign
3446                         return
3447                 if c is "\u0000"
3448                         parse_error()
3449                         tok_state = tok_state_script_data_escaped
3450                         return new_character_token "\ufffd"
3451                 if c is '' # EOF
3452                         tok_state = tok_state_data
3453                         parse_error()
3454                         cur -= 1 # Reconsume
3455                         return
3456                 # Anything else
3457                 tok_state = tok_state_script_data_escaped
3458                 return new_character_token c
3459
3460         # 8.2.4.24 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-dash-dash-state
3461         tok_state_script_data_escaped_dash_dash = ->
3462                 c = txt.charAt(cur++)
3463                 if c is '-'
3464                         return new_character_token '-'
3465                 if c is '<'
3466                         tok_state = tok_state_script_data_escaped_less_than_sign
3467                         return
3468                 if c is '>'
3469                         tok_state = tok_state_script_data
3470                         return new_character_token '>'
3471                 if c is "\u0000"
3472                         parse_error()
3473                         tok_state = tok_state_script_data_escaped
3474                         return new_character_token "\ufffd"
3475                 if c is '' # EOF
3476                         parse_error()
3477                         tok_state = tok_state_data
3478                         cur -= 1 # Reconsume
3479                         return
3480                 # Anything else
3481                 tok_state = tok_state_script_data_escaped
3482                 return new_character_token c
3483
3484         # 8.2.4.25 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-less-than-sign-state
3485         tok_state_script_data_escaped_less_than_sign = ->
3486                 c = txt.charAt(cur++)
3487                 if c is '/'
3488                         temporary_buffer = ''
3489                         tok_state = tok_state_script_data_escaped_end_tag_open
3490                         return
3491                 if is_uc_alpha(c)
3492                         temporary_buffer = c.toLowerCase() # yes, really
3493                         tok_state = tok_state_script_data_double_escape_start
3494                         return new_character_token "<#{c}" # fixfull split
3495                 if is_lc_alpha(c)
3496                         temporary_buffer = c
3497                         tok_state = tok_state_script_data_double_escape_start
3498                         return new_character_token "<#{c}" # fixfull split
3499                 # Anything else
3500                 tok_state = tok_state_script_data_escaped
3501                 cur -= 1 # Reconsume
3502                 return new_character_token '<'
3503
3504         # 8.2.4.26 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-end-tag-open-state
3505         tok_state_script_data_escaped_end_tag_open = ->
3506                 c = txt.charAt(cur++)
3507                 if is_uc_alpha(c)
3508                         tok_cur_tag = new_end_tag c.toLowerCase()
3509                         temporary_buffer += c
3510                         tok_state = tok_state_script_data_escaped_end_tag_name
3511                         return
3512                 if is_lc_alpha(c)
3513                         tok_cur_tag = new_end_tag c
3514                         temporary_buffer += c
3515                         tok_state = tok_state_script_data_escaped_end_tag_name
3516                         return
3517                 # Anything else
3518                 tok_state = tok_state_script_data_escaped
3519                 cur -= 1 # Reconsume
3520                 return new_character_token '</' # fixfull split
3521
3522         # 8.2.4.27 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-end-tag-name-state
3523         tok_state_script_data_escaped_end_tag_name = ->
3524                 c = txt.charAt(cur++)
3525                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
3526                         if is_appropriate_end_tag tok_cur_tag
3527                                 tok_state = tok_state_before_attribute_name
3528                                 return
3529                         # fall through
3530                 if c is '/'
3531                         if is_appropriate_end_tag tok_cur_tag
3532                                 tok_state = tok_state_self_closing_start_tag
3533                                 return
3534                         # fall through
3535                 if c is '>'
3536                         if is_appropriate_end_tag tok_cur_tag
3537                                 tok_state = tok_state_data
3538                                 return tok_cur_tag
3539                         # fall through
3540                 if is_uc_alpha(c)
3541                         tok_cur_tag.name += c.toLowerCase()
3542                         temporary_buffer += c.toLowerCase()
3543                         return
3544                 if is_lc_alpha(c)
3545                         tok_cur_tag.name += c
3546                         temporary_buffer += c.toLowerCase()
3547                         return
3548                 # Anything else
3549                 tok_state = tok_state_script_data_escaped
3550                 cur -= 1 # Reconsume
3551                 return new_character_token "</#{temporary_buffer}" # fixfull split
3552
3553         # 8.2.4.28 http://www.w3.org/TR/html5/syntax.html#script-data-double-escape-start-state
3554         tok_state_script_data_double_escape_start = ->
3555                 c = txt.charAt(cur++)
3556                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' ' or c is '/' or c is '>'
3557                         if temporary_buffer is 'script'
3558                                 tok_state = tok_state_script_data_double_escaped
3559                         else
3560                                 tok_state = tok_state_script_data_escaped
3561                         return new_character_token c
3562                 if is_uc_alpha(c)
3563                         temporary_buffer += c.toLowerCase() # yes, really lowercase
3564                         return new_character_token c
3565                 if is_lc_alpha(c)
3566                         temporary_buffer += c
3567                         return new_character_token c
3568                 # Anything else
3569                 tok_state = tok_state_script_data_escaped
3570                 cur -= 1 # Reconsume
3571                 return
3572
3573         # 8.2.4.29 http://www.w3.org/TR/html5/syntax.html#script-data-double-escaped-state
3574         tok_state_script_data_double_escaped = ->
3575                 c = txt.charAt(cur++)
3576                 if c is '-'
3577                         tok_state = tok_state_script_data_double_escaped_dash
3578                         return new_character_token '-'
3579                 if c is '<'
3580                         tok_state = tok_state_script_data_double_escaped_less_than_sign
3581                         return new_character_token '<'
3582                 if c is "\u0000"
3583                         parse_error()
3584                         return new_character_token "\ufffd"
3585                 if c is '' # EOF
3586                         parse_error()
3587                         tok_state = tok_state_data
3588                         cur -= 1 # Reconsume
3589                         return
3590                 # Anything else
3591                 return new_character_token c
3592
3593         # 8.2.4.30 http://www.w3.org/TR/html5/syntax.html#script-data-double-escaped-dash-state
3594         tok_state_script_data_double_escaped_dash = ->
3595                 c = txt.charAt(cur++)
3596                 if c is '-'
3597                         tok_state = tok_state_script_data_double_escaped_dash_dash
3598                         return new_character_token '-'
3599                 if c is '<'
3600                         tok_state = tok_state_script_data_double_escaped_less_than_sign
3601                         return new_character_token '<'
3602                 if c is "\u0000"
3603                         parse_error()
3604                         tok_state = tok_state_script_data_double_escaped
3605                         return new_character_token "\ufffd"
3606                 if c is '' # EOF
3607                         parse_error()
3608                         tok_state = tok_state_data
3609                         cur -= 1 # Reconsume
3610                         return
3611                 # Anything else
3612                 tok_state = tok_state_script_data_double_escaped
3613                 return new_character_token c
3614
3615         # 8.2.4.31 http://www.w3.org/TR/html5/syntax.html#script-data-double-escaped-dash-dash-state
3616         tok_state_script_data_double_escaped_dash_dash = ->
3617                 c = txt.charAt(cur++)
3618                 if c is '-'
3619                         return new_character_token '-'
3620                 if c is '<'
3621                         tok_state = tok_state_script_data_double_escaped_less_than_sign
3622                         return new_character_token '<'
3623                 if c is '>'
3624                         tok_state = tok_state_script_data
3625                         return new_character_token '>'
3626                 if c is "\u0000"
3627                         parse_error()
3628                         tok_state = tok_state_script_data_double_escaped
3629                         return new_character_token "\ufffd"
3630                 if c is '' # EOF
3631                         parse_error()
3632                         tok_state = tok_state_data
3633                         cur -= 1 # Reconsume
3634                         return
3635                 # Anything else
3636                 tok_state = tok_state_script_data_double_escaped
3637                 return new_character_token c
3638
3639         # 8.2.4.32 http://www.w3.org/TR/html5/syntax.html#script-data-double-escaped-less-than-sign-state
3640         tok_state_script_data_double_escaped_less_than_sign = ->
3641                 c = txt.charAt(cur++)
3642                 if c is '/'
3643                         temporary_buffer = ''
3644                         tok_state = tok_state_script_data_double_escape_end
3645                         return new_character_token '/'
3646                 # Anything else
3647                 tok_state = tok_state_script_data_double_escaped
3648                 cur -= 1 # Reconsume
3649                 return
3650
3651         # 8.2.4.33 http://www.w3.org/TR/html5/syntax.html#script-data-double-escape-end-state
3652         tok_state_script_data_double_escape_end = ->
3653                 c = txt.charAt(cur++)
3654                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' ' or c is '/' or c is '>'
3655                         if temporary_buffer is 'script'
3656                                 tok_state = tok_state_script_data_escaped
3657                         else
3658                                 tok_state = tok_state_script_data_double_escaped
3659                         return new_character_token c
3660                 if is_uc_alpha(c)
3661                         temporary_buffer += c.toLowerCase() # yes, really lowercase
3662                         return new_character_token c
3663                 if is_lc_alpha(c)
3664                         temporary_buffer += c
3665                         return new_character_token c
3666                 # Anything else
3667                 tok_state = tok_state_script_data_double_escaped
3668                 cur -= 1 # Reconsume
3669                 return
3670
3671         # 8.2.4.34 http://www.w3.org/TR/html5/syntax.html#before-attribute-name-state
3672         tok_state_before_attribute_name = ->
3673                 attr_name = null
3674                 switch c = txt.charAt(cur++)
3675                         when "\t", "\n", "\u000c", ' '
3676                                 return null
3677                         when '/'
3678                                 tok_state = tok_state_self_closing_start_tag
3679                                 return null
3680                         when '>'
3681                                 tok_state = tok_state_data
3682                                 tmp = tok_cur_tag
3683                                 tok_cur_tag = null
3684                                 return tmp
3685                         when "\u0000"
3686                                 parse_error()
3687                                 attr_name = "\ufffd"
3688                         when '"', "'", '<', '='
3689                                 parse_error()
3690                                 attr_name = c
3691                         when '' # EOF
3692                                 parse_error()
3693                                 tok_state = tok_state_data
3694                         else
3695                                 if is_uc_alpha(c)
3696                                         attr_name = c.toLowerCase()
3697                                 else
3698                                         attr_name = c
3699                 if attr_name?
3700                         tok_cur_tag.attrs_a.unshift [attr_name, '']
3701                         tok_state = tok_state_attribute_name
3702                 return null
3703
3704         # 8.2.4.35 http://www.w3.org/TR/html5/syntax.html#attribute-name-state
3705         tok_state_attribute_name = ->
3706                 switch c = txt.charAt(cur++)
3707                         when "\t", "\n", "\u000c", ' '
3708                                 tok_state = tok_state_after_attribute_name
3709                         when '/'
3710                                 tok_state = tok_state_self_closing_start_tag
3711                         when '='
3712                                 tok_state = tok_state_before_attribute_value
3713                         when '>'
3714                                 tok_state = tok_state_data
3715                                 tmp = tok_cur_tag
3716                                 tok_cur_tag = null
3717                                 return tmp
3718                         when "\u0000"
3719                                 parse_error()
3720                                 tok_cur_tag.attrs_a[0][0] += "\ufffd"
3721                         when '"', "'", '<'
3722                                 parse_error()
3723                                 tok_cur_tag.attrs_a[0][0] += c
3724                         when '' # EOF
3725                                 parse_error()
3726                                 tok_state = tok_state_data
3727                         else
3728                                 if is_uc_alpha(c)
3729                                         tok_cur_tag.attrs_a[0][0] += c.toLowerCase()
3730                                 else
3731                                         tok_cur_tag.attrs_a[0][0] += c
3732                 return null
3733
3734         # 8.2.4.36 http://www.w3.org/TR/html5/syntax.html#after-attribute-name-state
3735         tok_state_after_attribute_name = ->
3736                 c = txt.charAt(cur++)
3737                 if c is "\t" or c is "\n" or c is "\u000c" or c is ' '
3738                         return
3739                 if c is '/'
3740                         tok_state = tok_state_self_closing_start_tag
3741                         return
3742                 if c is '='
3743                         tok_state = tok_state_before_attribute_value
3744                         return
3745                 if c is '>'
3746                         tok_state = tok_state_data
3747                         return tok_cur_tag
3748                 if is_uc_alpha(c)
3749                         tok_cur_tag.attrs_a.unshift [c.toLowerCase(), '']
3750                         tok_state = tok_state_attribute_name
3751                         return
3752                 if c is "\u0000"
3753                         parse_error()
3754                         tok_cur_tag.attrs_a.unshift ["\ufffd", '']
3755                         tok_state = tok_state_attribute_name
3756                         return
3757                 if c is '' # EOF
3758                         parse_error()
3759                         tok_state = tok_state_data
3760                         cur -= 1 # reconsume
3761                         return
3762                 if c is '"' or c is "'" or c is '<'
3763                         parse_error()
3764                         # fall through to Anything else
3765                 # Anything else
3766                 tok_cur_tag.attrs_a.unshift [c, '']
3767                 tok_state = tok_state_attribute_name
3768                 return
3769
3770         # 8.2.4.37 http://www.w3.org/TR/html5/syntax.html#before-attribute-value-state
3771         tok_state_before_attribute_value = ->
3772                 switch c = txt.charAt(cur++)
3773                         when "\t", "\n", "\u000c", ' '
3774                                 return null
3775                         when '"'
3776                                 tok_state = tok_state_attribute_value_double_quoted
3777                         when '&'
3778                                 tok_state = tok_state_attribute_value_unquoted
3779                                 cur -= 1
3780                         when "'"
3781                                 tok_state = tok_state_attribute_value_single_quoted
3782                         when "\u0000"
3783                                 # Parse error
3784                                 tok_cur_tag.attrs_a[0][1] += "\ufffd"
3785                                 tok_state = tok_state_attribute_value_unquoted
3786                         when '>'
3787                                 # Parse error
3788                                 tok_state = tok_state_data
3789                                 tmp = tok_cur_tag
3790                                 tok_cur_tag = null
3791                                 return tmp
3792                         when '' # EOF
3793                                 parse_error()
3794                                 tok_state = tok_state_data
3795                         else
3796                                 tok_cur_tag.attrs_a[0][1] += c
3797                                 tok_state = tok_state_attribute_value_unquoted
3798                 return null
3799
3800         # 8.2.4.38 http://www.w3.org/TR/html5/syntax.html#attribute-value-(double-quoted)-state
3801         tok_state_attribute_value_double_quoted = ->
3802                 switch c = txt.charAt(cur++)
3803                         when '"'
3804                                 tok_state = tok_state_after_attribute_value_quoted
3805                         when '&'
3806                                 tok_cur_tag.attrs_a[0][1] += parse_character_reference '"', true
3807                         when "\u0000"
3808                                 # Parse error
3809                                 tok_cur_tag.attrs_a[0][1] += "\ufffd"
3810                         when '' # EOF
3811                                 parse_error()
3812                                 tok_state = tok_state_data
3813                         else
3814                                 tok_cur_tag.attrs_a[0][1] += c
3815                 return null
3816
3817         # 8.2.4.39 http://www.w3.org/TR/html5/syntax.html#attribute-value-(single-quoted)-state
3818         tok_state_attribute_value_single_quoted = ->
3819                 switch c = txt.charAt(cur++)
3820                         when "'"
3821                                 tok_state = tok_state_after_attribute_value_quoted
3822                         when '&'
3823                                 tok_cur_tag.attrs_a[0][1] += parse_character_reference "'", true
3824                         when "\u0000"
3825                                 # Parse error
3826                                 tok_cur_tag.attrs_a[0][1] += "\ufffd"
3827                         when '' # EOF
3828                                 parse_error()
3829                                 tok_state = tok_state_data
3830                         else
3831                                 tok_cur_tag.attrs_a[0][1] += c
3832                 return null
3833
3834         # 8.2.4.40 http://www.w3.org/TR/html5/syntax.html#attribute-value-(unquoted)-state
3835         tok_state_attribute_value_unquoted = ->
3836                 switch c = txt.charAt(cur++)
3837                         when "\t", "\n", "\u000c", ' '
3838                                 tok_state = tok_state_before_attribute_name
3839                         when '&'
3840                                 tok_cur_tag.attrs_a[0][1] += parse_character_reference '>', true
3841                         when '>'
3842                                 tok_state = tok_state_data
3843                                 tmp = tok_cur_tag
3844                                 tok_cur_tag = null
3845                                 return tmp
3846                         when "\u0000"
3847                                 tok_cur_tag.attrs_a[0][1] += "\ufffd"
3848                         when '' # EOF
3849                                 parse_error()
3850                                 tok_state = tok_state_data
3851                         else
3852                                 # Parse Error if ', <, = or ` (backtick)
3853                                 tok_cur_tag.attrs_a[0][1] += c
3854                 return null
3855
3856         # 8.2.4.42 http://www.w3.org/TR/html5/syntax.html#after-attribute-value-(quoted)-state
3857         tok_state_after_attribute_value_quoted = ->
3858                 switch c = txt.charAt(cur++)
3859                         when "\t", "\n", "\u000c", ' '
3860                                 tok_state = tok_state_before_attribute_name
3861                         when '/'
3862                                 tok_state = tok_state_self_closing_start_tag
3863                         when '>'
3864                                 tok_state = tok_state_data
3865                                 tmp = tok_cur_tag
3866                                 tok_cur_tag = null
3867                                 return tmp
3868                         when '' # EOF
3869                                 parse_error()
3870                                 tok_state = tok_state_data
3871                         else
3872                                 # Parse Error
3873                                 tok_state = tok_state_before_attribute_name
3874                                 cur -= 1 # we didn't handle that char
3875                 return null
3876
3877         # 8.2.4.43 http://www.w3.org/TR/html5/syntax.html#self-closing-start-tag-state
3878         tok_state_self_closing_start_tag = ->
3879                 c = txt.charAt(cur++)
3880                 if c is '>'
3881                         tok_cur_tag.flag 'self-closing', true
3882                         tok_state = tok_state_data
3883                         return tok_cur_tag
3884                 if c is ''
3885                         parse_error()
3886                         tok_state = tok_state_data
3887                         cur -= 1 # Reconsume
3888                         return
3889                 # Anything else
3890                 parse_error()
3891                 tok_state = tok_state_before_attribute_name
3892                 cur -= 1 # Reconsume
3893                 return
3894
3895         # 8.2.4.44 http://www.w3.org/TR/html5/syntax.html#bogus-comment-state
3896         # WARNING: put a comment token in tok_cur_tag before setting this state
3897         tok_state_bogus_comment = ->
3898                 next_gt = txt.indexOf '>', cur
3899                 if next_gt is -1
3900                         val = txt.substr cur
3901                         cur = txt.length
3902                 else
3903                         val = txt.substr cur, (next_gt - cur)
3904                         cur = next_gt + 1
3905                 val = val.replace(new RegExp("\u0000", 'g'), "\ufffd")
3906                 tok_cur_tag.text += val
3907                 tok_state = tok_state_data
3908                 return tok_cur_tag
3909
3910         # 8.2.4.45 http://www.w3.org/TR/html5/syntax.html#markup-declaration-open-state
3911         tok_state_markup_declaration_open = ->
3912                 if txt.substr(cur, 2) is '--'
3913                         cur += 2
3914                         tok_cur_tag = new_comment_token ''
3915                         tok_state = tok_state_comment_start
3916                         return
3917                 if txt.substr(cur, 7).toLowerCase() is 'doctype'
3918                         cur += 7
3919                         tok_state = tok_state_doctype
3920                         return
3921                 acn = adjusted_current_node()
3922                 if acn and acn.namespace isnt NS_HTML and txt.substr(cur, 7) is '[CDATA['
3923                         cur += 7
3924                         tok_state = tok_state_cdata_section
3925                         return
3926                 # Otherwise
3927                 parse_error()
3928                 tok_cur_tag = new_comment_token ''
3929                 tok_state = tok_state_bogus_comment
3930                 return
3931
3932         # 8.2.4.46 http://www.w3.org/TR/html5/syntax.html#comment-start-state
3933         tok_state_comment_start = ->
3934                 switch c = txt.charAt(cur++)
3935                         when '-'
3936                                 tok_state = tok_state_comment_start_dash
3937                         when "\u0000"
3938                                 parse_error()
3939                                 tok_state = tok_state_comment
3940                                 return new_character_token "\ufffd"
3941                         when '>'
3942                                 parse_error()
3943                                 tok_state = tok_state_data
3944                                 return tok_cur_tag
3945                         when '' # EOF
3946                                 parse_error()
3947                                 tok_state = tok_state_data
3948                                 cur -= 1 # Reconsume
3949                                 return tok_cur_tag
3950                         else
3951                                 tok_cur_tag.text += c
3952                                 tok_state = tok_state_comment
3953                 return null
3954
3955         # 8.2.4.47 http://www.w3.org/TR/html5/syntax.html#comment-start-dash-state
3956         tok_state_comment_start_dash = ->
3957                 switch c = txt.charAt(cur++)
3958                         when '-'
3959                                 tok_state = tok_state_comment_end
3960                         when "\u0000"
3961                                 parse_error()
3962                                 tok_cur_tag.text += "-\ufffd"
3963                                 tok_state = tok_state_comment
3964                         when '>'
3965                                 parse_error()
3966                                 tok_state = tok_state_data
3967                                 return tok_cur_tag
3968                         when '' # EOF
3969                                 parse_error()
3970                                 tok_state = tok_state_data
3971                                 cur -= 1 # Reconsume
3972                                 return tok_cur_tag
3973                         else
3974                                 tok_cur_tag.text += "-#{c}"
3975                                 tok_state = tok_state_comment
3976                 return null
3977
3978         # 8.2.4.48 http://www.w3.org/TR/html5/syntax.html#comment-state
3979         tok_state_comment = ->
3980                 switch c = txt.charAt(cur++)
3981                         when '-'
3982                                 tok_state = tok_state_comment_end_dash
3983                         when "\u0000"
3984                                 parse_error()
3985                                 tok_cur_tag.text += "\ufffd"
3986                         when '' # EOF
3987                                 parse_error()
3988                                 tok_state = tok_state_data
3989                                 cur -= 1 # Reconsume
3990                                 return tok_cur_tag
3991                         else
3992                                 tok_cur_tag.text += c
3993                 return null
3994
3995         # 8.2.4.49 http://www.w3.org/TR/html5/syntax.html#comment-end-dash-state
3996         tok_state_comment_end_dash = ->
3997                 switch c = txt.charAt(cur++)
3998                         when '-'
3999                                 tok_state = tok_state_comment_end
4000                         when "\u0000"
4001                                 parse_error()
4002                                 tok_cur_tag.text += "-\ufffd"
4003                                 tok_state = tok_state_comment
4004                         when '' # EOF
4005                                 parse_error()
4006                                 tok_state = tok_state_data
4007                                 cur -= 1 # Reconsume
4008                                 return tok_cur_tag
4009                         else
4010                                 tok_cur_tag.text += "-#{c}"
4011                                 tok_state = tok_state_comment
4012                 return null
4013
4014         # 8.2.4.50 http://www.w3.org/TR/html5/syntax.html#comment-end-state
4015         tok_state_comment_end = ->
4016                 switch c = txt.charAt(cur++)
4017                         when '>'
4018                                 tok_state = tok_state_data
4019                                 return tok_cur_tag
4020                         when "\u0000"
4021                                 parse_error()
4022                                 tok_cur_tag.text += "--\ufffd"
4023                                 tok_state = tok_state_comment
4024                         when '!'
4025                                 parse_error()
4026                                 tok_state = tok_state_comment_end_bang
4027                         when '-'
4028                                 parse_error()
4029                                 tok_cur_tag.text += '-'
4030                         when '' # EOF
4031                                 parse_error()
4032                                 tok_state = tok_state_data
4033                                 cur -= 1 # Reconsume
4034                                 return tok_cur_tag
4035                         else
4036                                 parse_error()
4037                                 tok_cur_tag.text += "--#{c}"
4038                                 tok_state = tok_state_comment
4039                 return null
4040
4041         # 8.2.4.51 http://www.w3.org/TR/html5/syntax.html#comment-end-bang-state
4042         tok_state_comment_end_bang = ->
4043                 switch c = txt.charAt(cur++)
4044                         when '-'
4045                                 tok_cur_tag.text += "--!#{c}"
4046                                 tok_state = tok_state_comment_end_dash
4047                         when '>'
4048                                 tok_state = tok_state_data
4049                                 return tok_cur_tag
4050                         when "\u0000"
4051                                 parse_error()
4052                                 tok_cur_tag.text += "--!\ufffd"
4053                                 tok_state = tok_state_comment
4054                         when '' # EOF
4055                                 parse_error()
4056                                 tok_state = tok_state_data
4057                                 cur -= 1 # Reconsume
4058                                 return tok_cur_tag
4059                         else
4060                                 tok_cur_tag.text += "--!#{c}"
4061                                 tok_state = tok_state_comment
4062                 return null
4063
4064         # 8.2.4.52 http://www.w3.org/TR/html5/syntax.html#doctype-state
4065         tok_state_doctype = ->
4066                 switch c = txt.charAt(cur++)
4067                         when "\t", "\u000a", "\u000c", ' '
4068                                 tok_state = tok_state_before_doctype_name
4069                         when '' # EOF
4070                                 parse_error()
4071                                 tok_state = tok_state_data
4072                                 el = new_doctype_token ''
4073                                 el.flag 'force-quirks', true
4074                                 cur -= 1 # Reconsume
4075                                 return el
4076                         else
4077                                 parse_error()
4078                                 tok_state = tok_state_before_doctype_name
4079                                 cur -= 1 # Reconsume
4080                 return null
4081
4082         # 8.2.4.52 http://www.w3.org/TR/html5/syntax.html#doctype-state
4083         tok_state_before_doctype_name = ->
4084                 c = txt.charAt(cur++)
4085                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4086                         return
4087                 if is_uc_alpha(c)
4088                         tok_cur_tag = new_doctype_token c.toLowerCase()
4089                         tok_state = tok_state_doctype_name
4090                         return
4091                 if c is "\u0000"
4092                         parse_error()
4093                         tok_cur_tag = new_doctype_token "\ufffd"
4094                         tok_state = tok_state_doctype_name
4095                         return
4096                 if c is '>'
4097                         parse_error()
4098                         el = new_doctype_token ''
4099                         el.flag 'force-quirks', true
4100                         tok_state = tok_state_data
4101                         return el
4102                 if c is '' # EOF
4103                         parse_error()
4104                         tok_state = tok_state_data
4105                         el = new_doctype_token ''
4106                         el.flag 'force-quirks', true
4107                         cur -= 1 # Reconsume
4108                         return el
4109                 # Anything else
4110                 tok_cur_tag = new_doctype_token c
4111                 tok_state = tok_state_doctype_name
4112                 return null
4113
4114         # 8.2.4.54 http://www.w3.org/TR/html5/syntax.html#doctype-name-state
4115         tok_state_doctype_name = ->
4116                 c = txt.charAt(cur++)
4117                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4118                         tok_state = tok_state_after_doctype_name
4119                         return
4120                 if c is '>'
4121                         tok_state = tok_state_data
4122                         return tok_cur_tag
4123                 if is_uc_alpha(c)
4124                         tok_cur_tag.name += c.toLowerCase()
4125                         return
4126                 if c is "\u0000"
4127                         parse_error()
4128                         tok_cur_tag.name += "\ufffd"
4129                         return
4130                 if c is '' # EOF
4131                         parse_error()
4132                         tok_state = tok_state_data
4133                         tok_cur_tag.flag 'force-quirks', true
4134                         cur -= 1 # Reconsume
4135                         return tok_cur_tag
4136                 # Anything else
4137                 tok_cur_tag.name += c
4138                 return null
4139
4140         # 8.2.4.55 http://www.w3.org/TR/html5/syntax.html#after-doctype-name-state
4141         tok_state_after_doctype_name = ->
4142                 c = txt.charAt(cur++)
4143                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4144                         return
4145                 if c is '>'
4146                         tok_state = tok_state_data
4147                         return tok_cur_tag
4148                 if c is '' # EOF
4149                         parse_error()
4150                         tok_state = tok_state_data
4151                         tok_cur_tag.flag 'force-quirks', true
4152                         cur -= 1 # Reconsume
4153                         return tok_cur_tag
4154                 # Anything else
4155                 if txt.substr(cur - 1, 6).toLowerCase() is 'public'
4156                         cur += 5
4157                         tok_state = tok_state_after_doctype_public_keyword
4158                         return
4159                 if txt.substr(cur - 1, 6).toLowerCase() is 'system'
4160                         cur += 5
4161                         tok_state = tok_state_after_doctype_system_keyword
4162                         return
4163                 parse_error()
4164                 tok_cur_tag.flag 'force-quirks', true
4165                 tok_state = tok_state_bogus_doctype
4166                 return null
4167
4168         # 8.2.4.56 http://www.w3.org/TR/html5/syntax.html#after-doctype-public-keyword-state
4169         tok_state_after_doctype_public_keyword = ->
4170                 c = txt.charAt(cur++)
4171                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4172                         tok_state = tok_state_before_doctype_public_identifier
4173                         return
4174                 if c is '"'
4175                         parse_error()
4176                         tok_cur_tag.public_identifier = ''
4177                         tok_state = tok_state_doctype_public_identifier_double_quoted
4178                         return
4179                 if c is "'"
4180                         parse_error()
4181                         tok_cur_tag.public_identifier = ''
4182                         tok_state = tok_state_doctype_public_identifier_single_quoted
4183                         return
4184                 if c is '>'
4185                         parse_error()
4186                         tok_cur_tag.flag 'force-quirks', true
4187                         tok_state = tok_state_data
4188                         return tok_cur_tag
4189                 if c is '' # EOF
4190                         parse_error()
4191                         tok_state = tok_state_data
4192                         tok_cur_tag.flag 'force-quirks', true
4193                         cur -= 1 # Reconsume
4194                         return tok_cur_tag
4195                 # Anything else
4196                 parse_error()
4197                 tok_cur_tag.flag 'force-quirks', true
4198                 tok_state = tok_state_bogus_doctype
4199                 return null
4200
4201         # 8.2.4.57 http://www.w3.org/TR/html5/syntax.html#before-doctype-public-identifier-state
4202         tok_state_before_doctype_public_identifier = ->
4203                 c = txt.charAt(cur++)
4204                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4205                         return
4206                 if c is '"'
4207                         parse_error()
4208                         tok_cur_tag.public_identifier = ''
4209                         tok_state = tok_state_doctype_public_identifier_double_quoted
4210                         return
4211                 if c is "'"
4212                         parse_error()
4213                         tok_cur_tag.public_identifier = ''
4214                         tok_state = tok_state_doctype_public_identifier_single_quoted
4215                         return
4216                 if c is '>'
4217                         parse_error()
4218                         tok_cur_tag.flag 'force-quirks', true
4219                         tok_state = tok_state_data
4220                         return tok_cur_tag
4221                 if c is '' # EOF
4222                         parse_error()
4223                         tok_state = tok_state_data
4224                         tok_cur_tag.flag 'force-quirks', true
4225                         cur -= 1 # Reconsume
4226                         return tok_cur_tag
4227                 # Anything else
4228                 parse_error()
4229                 tok_cur_tag.flag 'force-quirks', true
4230                 tok_state = tok_state_bogus_doctype
4231                 return null
4232
4233
4234         # 8.2.4.58 http://www.w3.org/TR/html5/syntax.html#doctype-public-identifier-(double-quoted)-state
4235         tok_state_doctype_public_identifier_double_quoted = ->
4236                 c = txt.charAt(cur++)
4237                 if c is '"'
4238                         tok_state = tok_state_after_doctype_public_identifier
4239                         return
4240                 if c is "\u0000"
4241                         parse_error()
4242                         tok_cur_tag.public_identifier += "\ufffd"
4243                         return
4244                 if c is '>'
4245                         parse_error()
4246                         tok_cur_tag.flag 'force-quirks', true
4247                         tok_state = tok_state_data
4248                         return tok_cur_tag
4249                 if c is '' # EOF
4250                         parse_error()
4251                         tok_state = tok_state_data
4252                         tok_cur_tag.flag 'force-quirks', true
4253                         cur -= 1 # Reconsume
4254                         return tok_cur_tag
4255                 # Anything else
4256                 tok_cur_tag.public_identifier += c
4257                 return null
4258
4259         # 8.2.4.59 http://www.w3.org/TR/html5/syntax.html#doctype-public-identifier-(single-quoted)-state
4260         tok_state_doctype_public_identifier_single_quoted = ->
4261                 c = txt.charAt(cur++)
4262                 if c is "'"
4263                         tok_state = tok_state_after_doctype_public_identifier
4264                         return
4265                 if c is "\u0000"
4266                         parse_error()
4267                         tok_cur_tag.public_identifier += "\ufffd"
4268                         return
4269                 if c is '>'
4270                         parse_error()
4271                         tok_cur_tag.flag 'force-quirks', true
4272                         tok_state = tok_state_data
4273                         return tok_cur_tag
4274                 if c is '' # EOF
4275                         parse_error()
4276                         tok_state = tok_state_data
4277                         tok_cur_tag.flag 'force-quirks', true
4278                         cur -= 1 # Reconsume
4279                         return tok_cur_tag
4280                 # Anything else
4281                 tok_cur_tag.public_identifier += c
4282                 return null
4283
4284         # 8.2.4.60 http://www.w3.org/TR/html5/syntax.html#after-doctype-public-identifier-state
4285         tok_state_after_doctype_public_identifier = ->
4286                 c = txt.charAt(cur++)
4287                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4288                         tok_state = tok_state_between_doctype_public_and_system_identifiers
4289                         return
4290                 if c is '>'
4291                         tok_state = tok_state_data
4292                         return tok_cur_tag
4293                 if c is '"'
4294                         parse_error()
4295                         tok_cur_tag.system_identifier = ''
4296                         tok_state = tok_state_doctype_system_identifier_double_quoted
4297                         return
4298                 if c is "'"
4299                         parse_error()
4300                         tok_cur_tag.system_identifier = ''
4301                         tok_state = tok_state_doctype_system_identifier_single_quoted
4302                         return
4303                 if c is '' # EOF
4304                         parse_error()
4305                         tok_state = tok_state_data
4306                         tok_cur_tag.flag 'force-quirks', true
4307                         cur -= 1 # Reconsume
4308                         return tok_cur_tag
4309                 # Anything else
4310                 parse_error()
4311                 tok_cur_tag.flag 'force-quirks', true
4312                 tok_state = tok_state_bogus_doctype
4313                 return null
4314
4315         # 8.2.4.61 http://www.w3.org/TR/html5/syntax.html#between-doctype-public-and-system-identifiers-state
4316         tok_state_between_doctype_public_and_system_identifiers = ->
4317                 c = txt.charAt(cur++)
4318                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4319                         return
4320                 if c is '>'
4321                         tok_state = tok_state_data
4322                         return tok_cur_tag
4323                 if c is '"'
4324                         parse_error()
4325                         tok_cur_tag.system_identifier = ''
4326                         tok_state = tok_state_doctype_system_identifier_double_quoted
4327                         return
4328                 if c is "'"
4329                         parse_error()
4330                         tok_cur_tag.system_identifier = ''
4331                         tok_state = tok_state_doctype_system_identifier_single_quoted
4332                         return
4333                 if c is '' # EOF
4334                         parse_error()
4335                         tok_state = tok_state_data
4336                         tok_cur_tag.flag 'force-quirks', true
4337                         cur -= 1 # Reconsume
4338                         return tok_cur_tag
4339                 # Anything else
4340                 parse_error()
4341                 tok_cur_tag.flag 'force-quirks', true
4342                 tok_state = tok_state_bogus_doctype
4343                 return null
4344
4345         # 8.2.4.62 http://www.w3.org/TR/html5/syntax.html#after-doctype-system-keyword-state
4346         tok_state_after_doctype_system_keyword = ->
4347                 c = txt.charAt(cur++)
4348                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4349                         tok_state = tok_state_before_doctype_system_identifier
4350                         return
4351                 if c is '"'
4352                         parse_error()
4353                         tok_cur_tag.system_identifier = ''
4354                         tok_state = tok_state_doctype_system_identifier_double_quoted
4355                         return
4356                 if c is "'"
4357                         parse_error()
4358                         tok_cur_tag.system_identifier = ''
4359                         tok_state = tok_state_doctype_system_identifier_single_quoted
4360                         return
4361                 if c is '>'
4362                         parse_error()
4363                         tok_cur_tag.flag 'force-quirks', true
4364                         tok_state = tok_state_data
4365                         return tok_cur_tag
4366                 if c is '' # EOF
4367                         parse_error()
4368                         tok_state = tok_state_data
4369                         tok_cur_tag.flag 'force-quirks', true
4370                         cur -= 1 # Reconsume
4371                         return tok_cur_tag
4372                 # Anything else
4373                 parse_error()
4374                 tok_cur_tag.flag 'force-quirks', true
4375                 tok_state = tok_state_bogus_doctype
4376                 return null
4377
4378         # 8.2.4.63 http://www.w3.org/TR/html5/syntax.html#before-doctype-system-identifier-state
4379         tok_state_before_doctype_system_identifier = ->
4380                 c = txt.charAt(cur++)
4381                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4382                         return
4383                 if c is '"'
4384                         tok_cur_tag.system_identifier = ''
4385                         tok_state = tok_state_doctype_system_identifier_double_quoted
4386                         return
4387                 if c is "'"
4388                         tok_cur_tag.system_identifier = ''
4389                         tok_state = tok_state_doctype_system_identifier_single_quoted
4390                         return
4391                 if c is '>'
4392                         parse_error()
4393                         tok_cur_tag.flag 'force-quirks', true
4394                         tok_state = tok_state_data
4395                         return tok_cur_tag
4396                 if c is '' # EOF
4397                         parse_error()
4398                         tok_state = tok_state_data
4399                         tok_cur_tag.flag 'force-quirks', true
4400                         cur -= 1 # Reconsume
4401                         return tok_cur_tag
4402                 # Anything else
4403                 parse_error()
4404                 tok_cur_tag.flag 'force-quirks', true
4405                 tok_state = tok_state_bogus_doctype
4406                 return null
4407
4408         # 8.2.4.64 http://www.w3.org/TR/html5/syntax.html#doctype-system-identifier-(double-quoted)-state
4409         tok_state_doctype_system_identifier_double_quoted = ->
4410                 c = txt.charAt(cur++)
4411                 if c is '"'
4412                         tok_state = tok_state_after_doctype_system_identifier
4413                         return
4414                 if c is "\u0000"
4415                         parse_error()
4416                         tok_cur_tag.system_identifier += "\ufffd"
4417                         return
4418                 if c is '>'
4419                         parse_error()
4420                         tok_cur_tag.flag 'force-quirks', true
4421                         tok_state = tok_state_data
4422                         return tok_cur_tag
4423                 if c is '' # EOF
4424                         parse_error()
4425                         tok_state = tok_state_data
4426                         tok_cur_tag.flag 'force-quirks', true
4427                         cur -= 1 # Reconsume
4428                         return tok_cur_tag
4429                 # Anything else
4430                 tok_cur_tag.system_identifier += c
4431                 return null
4432
4433         # 8.2.4.65 http://www.w3.org/TR/html5/syntax.html#doctype-system-identifier-(single-quoted)-state
4434         tok_state_doctype_system_identifier_single_quoted = ->
4435                 c = txt.charAt(cur++)
4436                 if c is "'"
4437                         tok_state = tok_state_after_doctype_system_identifier
4438                         return
4439                 if c is "\u0000"
4440                         parse_error()
4441                         tok_cur_tag.system_identifier += "\ufffd"
4442                         return
4443                 if c is '>'
4444                         parse_error()
4445                         tok_cur_tag.flag 'force-quirks', true
4446                         tok_state = tok_state_data
4447                         return tok_cur_tag
4448                 if c is '' # EOF
4449                         parse_error()
4450                         tok_state = tok_state_data
4451                         tok_cur_tag.flag 'force-quirks', true
4452                         cur -= 1 # Reconsume
4453                         return tok_cur_tag
4454                 # Anything else
4455                 tok_cur_tag.system_identifier += c
4456                 return null
4457
4458         # 8.2.4.66 http://www.w3.org/TR/html5/syntax.html#after-doctype-system-identifier-state
4459         tok_state_after_doctype_system_identifier = ->
4460                 c = txt.charAt(cur++)
4461                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4462                         return
4463                 if c is '>'
4464                         tok_state = tok_state_data
4465                         return tok_cur_tag
4466                 if c is '' # EOF
4467                         parse_error()
4468                         tok_state = tok_state_data
4469                         tok_cur_tag.flag 'force-quirks', true
4470                         cur -= 1 # Reconsume
4471                         return tok_cur_tag
4472                 # Anything else
4473                 parse_error()
4474                 # do _not_ tok_cur_tag.flag 'force-quirks', true
4475                 tok_state = tok_state_bogus_doctype
4476                 return null
4477
4478         # 8.2.4.67 http://www.w3.org/TR/html5/syntax.html#bogus-doctype-state
4479         tok_state_bogus_doctype = ->
4480                 c = txt.charAt(cur++)
4481                 if c is '>'
4482                         tok_state = tok_state_data
4483                         return tok_cur_tag
4484                 if c is '' # EOF
4485                         tok_state = tok_state_data
4486                         cur -= 1 # Reconsume
4487                         return tok_cur_tag
4488                 # Anything else
4489                 return null
4490
4491         # 8.2.4.68 http://www.w3.org/TR/html5/syntax.html#cdata-section-state
4492         tok_state_cdata_section = ->
4493                 tok_state = tok_state_data
4494                 next_gt = txt.indexOf ']]>', cur
4495                 if next_gt is -1
4496                         val = txt.substr cur
4497                         cur = txt.length
4498                 else
4499                         val = txt.substr cur, (next_gt - cur)
4500                         cur = next_gt + 3
4501                 val = val.replace(new RegExp("\u0000", 'g'), "\ufffd")
4502                 if val.length > 0
4503                         return new_character_token val # fixfull split
4504                 return null
4505
4506         # 8.2.4.69 http://www.w3.org/TR/html5/syntax.html#consume-a-character-reference
4507         # Don't set this as a state, just call it
4508         # returns a string (NOT a text node)
4509         parse_character_reference = (allowed_char = null, in_attr = false) ->
4510                 if cur >= txt.length
4511                         return '&'
4512                 switch c = txt.charAt(cur)
4513                         when "\t", "\n", "\u000c", ' ', '<', '&', '', allowed_char
4514                                 # explicitly not a parse error
4515                                 return '&'
4516                         when ';'
4517                                 # there has to be "one or more" alnums between & and ; to be a parse error
4518                                 return '&'
4519                         when '#'
4520                                 if cur + 1 >= txt.length
4521                                         return '&'
4522                                 if txt.charAt(cur + 1).toLowerCase() is 'x'
4523                                         base = 16
4524                                         charset = hex_chars
4525                                         start = cur + 2
4526                                 else
4527                                         charset = digits
4528                                         start = cur + 1
4529                                         base = 10
4530                                 i = 0
4531                                 while start + i < txt.length and charset.indexOf(txt.charAt(start + i)) > -1
4532                                         i += 1
4533                                 if i is 0
4534                                         return '&'
4535                                 cur = start + i
4536                                 if txt.charAt(start + i) is ';'
4537                                         cur += 1
4538                                 else
4539                                         parse_error()
4540                                 code_point = txt.substr(start, i)
4541                                 while code_point.charAt(0) is '0' and code_point.length > 1
4542                                         code_point = code_point.substr 1
4543                                 code_point = parseInt(code_point, base)
4544                                 if unicode_fixes[code_point]?
4545                                         parse_error()
4546                                         return unicode_fixes[code_point]
4547                                 else
4548                                         if (code_point >= 0xd800 and code_point <= 0xdfff) or code_point > 0x10ffff
4549                                                 parse_error()
4550                                                 return "\ufffd"
4551                                         else
4552                                                 if (code_point >= 0x0001 and code_point <= 0x0008) or (code_point >= 0x000D and code_point <= 0x001F) or (code_point >= 0x007F and code_point <= 0x009F) or (code_point >= 0xFDD0 and code_point <= 0xFDEF) or code_point is 0x000B or code_point is 0xFFFE or code_point is 0xFFFF or code_point is 0x1FFFE or code_point is 0x1FFFF or code_point is 0x2FFFE or code_point is 0x2FFFF or code_point is 0x3FFFE or code_point is 0x3FFFF or code_point is 0x4FFFE or code_point is 0x4FFFF or code_point is 0x5FFFE or code_point is 0x5FFFF or code_point is 0x6FFFE or code_point is 0x6FFFF or code_point is 0x7FFFE or code_point is 0x7FFFF or code_point is 0x8FFFE or code_point is 0x8FFFF or code_point is 0x9FFFE or code_point is 0x9FFFF or code_point is 0xAFFFE or code_point is 0xAFFFF or code_point is 0xBFFFE or code_point is 0xBFFFF or code_point is 0xCFFFE or code_point is 0xCFFFF or code_point is 0xDFFFE or code_point is 0xDFFFF or code_point is 0xEFFFE or code_point is 0xEFFFF or code_point is 0xFFFFE or code_point is 0xFFFFF or code_point is 0x10FFFE or code_point is 0x10FFFF
4553                                                         parse_error()
4554                                                 return from_code_point code_point
4555                                 return
4556                         else
4557                                 for i in [0...31]
4558                                         if alnum.indexOf(txt.charAt(cur + i)) is -1
4559                                                 break
4560                                 if i is 0
4561                                         # exit early, because parse_error() below needs at least one alnum
4562                                         return '&'
4563                                 if txt.charAt(cur + i) is ';'
4564                                         decoded = decode_named_char_ref txt.substr(cur, i)
4565                                         i += 1 # scan past the ';' (after, so we dno't pass it to decode)
4566                                         if decoded?
4567                                                 cur += i
4568                                                 return decoded
4569                                         # else FALL THROUGH (check for match without last char(s) or ";")
4570                                 # no ';' terminator (only legacy char refs)
4571                                 max = i
4572                                 for i in [2..max] # no prefix matches, so ok to check shortest first
4573                                         c = legacy_char_refs[txt.substr(cur, i)]
4574                                         if c?
4575                                                 if in_attr
4576                                                         if txt.charAt(cur + i) is '='
4577                                                                 # "because some legacy user agents will
4578                                                                 # misinterpret the markup in those cases"
4579                                                                 parse_error()
4580                                                                 return '&'
4581                                                         if alnum.indexOf(txt.charAt(cur + i)) > -1
4582                                                                 # this makes attributes forgiving about url args
4583                                                                 return '&'
4584                                                 # ok, and besides the weird exceptions for attributes...
4585                                                 # return the matching char
4586                                                 cur += i # consume entity chars
4587                                                 parse_error() # because no terminating ";"
4588                                                 return c
4589                                 parse_error()
4590                                 return '&'
4591                 return # never reached
4592
4593         eat_next_token_if_newline = ->
4594                 old_cur = cur
4595                 t = null
4596                 until t?
4597                         t = tok_state()
4598                 if t.type is TYPE_TEXT
4599                         # definition of a newline depends on whether it was a character ref or not
4600                         if cur - old_cur is 1
4601                                 # not a character reference
4602                                 if t.text is "\u000d" or t.text is "\u000a"
4603                                         return
4604                         else
4605                                 if t.text is "\u000a"
4606                                         return
4607                 # not a "newline"
4608                 cur = old_cur
4609                 return
4610
4611         # tree constructor initialization
4612         # see comments on TYPE_TAG/etc for the structure of this data
4613         txt = args_html
4614         cur = 0
4615         doc = new Node TYPE_TAG, name: 'document', namespace: NS_HTML
4616         doc.flag 'quirks mode', QUIRKS_NO # TODO bugreport spec for not specifying this
4617         fragment_root = null # fragment parsing algorithm returns children of this
4618         open_els = []
4619         afe = [] # active formatting elements
4620         template_ins_modes = []
4621         ins_mode = ins_mode_initial
4622         original_ins_mode = ins_mode # TODO check spec
4623         flag_scripting = args.scripting ? true # TODO might need an extra flag to get <noscript> to parse correctly
4624         flag_frameset_ok = true
4625         flag_parsing = true
4626         flag_foster_parenting = false
4627         form_element_pointer = null
4628         temporary_buffer = null
4629         pending_table_character_tokens = []
4630         head_element_pointer = null
4631         flag_fragment_parsing = false
4632         context_element = null
4633         prev_node_id = 0 # just for debugging
4634
4635         # tokenizer initialization
4636         tok_state = tok_state_data
4637
4638         parse_init = ->
4639                 # fragment parsing (text arg)
4640                 if args.fragment?
4641                         # this handles the fragment from the tests in the format described here:
4642                         # https://github.com/html5lib/html5lib-tests/blob/master/tree-construction/README.md
4643                         f = args.fragment
4644                         ns = NS_HTML
4645                         if f.substr(0, 5) is 'math '
4646                                 f = f.substr 5
4647                                 ns = NS_MATHML
4648                         else if f.substr(0, 4) is 'svg '
4649                                 f = f.substr 4
4650                                 ns = NS_SVG
4651                         t = new_open_tag f
4652                         context_element = token_to_element t, ns
4653                         context_element.document = new Node TYPE_TAG, name: 'document', namespace: NS_HTML
4654                         context_element.document.flag 'quirks mode', QUIRKS_NO
4655                 # fragment parsing (Node arg)
4656                 if args.context?
4657                         context_element = args.context
4658
4659                 # http://www.w3.org/TR/html5/syntax.html#parsing-html-fragments
4660                 # fragment parsing algorithm
4661                 if context_element?
4662                         flag_fragment_parsing = true
4663                         doc = new Node TYPE_TAG, name: 'html', namespace: NS_HTML
4664                         # search up the tree from context, to try to find it's document,
4665                         # because this file only puts a "document" property on the root
4666                         # element.
4667                         old_doc = null
4668                         el = context_element
4669                         loop
4670                                 if el.document?
4671                                         old_doc = el.document
4672                                         break
4673                                 if el.parent
4674                                         el = el.parent
4675                                 else
4676                                         break
4677                         if old_doc
4678                                 doc.flag 'quirks mode', old_doc.flag 'quirks mode'
4679                         # set tok_state
4680                         if context_element.namespace is NS_HTML
4681                                 switch context_element.name
4682                                         when 'title', 'textarea'
4683                                                 tok_state = tok_state_rcdata
4684                                         when 'style', 'xmp', 'iframe', 'noembed', 'noframes'
4685                                                 tok_state = tok_state_rawtext
4686                                         when 'script'
4687                                                 tok_state = tok_state_script_data
4688                                         when 'noscript'
4689                                                 if flag_scripting
4690                                                         tok_state = tok_state_rawtext
4691                                         when 'plaintext'
4692                                                 tok_state = tok_state_plaintext
4693                         fragment_root = new Node TYPE_TAG, name: 'html', namespace: NS_HTML
4694                         doc.children.push fragment_root
4695                         fragment_root.document = doc
4696                         open_els = [fragment_root]
4697                         if context_element.name is 'template' and context_element.namespace is NS_HTML
4698                                 template_ins_modes.unshift ins_mode_in_template
4699                         # fixfull create token for context (it should have it's original one already)
4700                         reset_ins_mode()
4701                         # set form_element pointer... in the foreign doc?!
4702                         el = context_element
4703                         loop
4704                                 if el.name is 'form' and el.namespace is NS_HTML
4705                                         form_element_pointer = el
4706                                         break
4707                                 if el.parent
4708                                         el = el.parent
4709                                 else
4710                                         break
4711
4712                 # text pre-processing
4713                 # FIXME check http://www.w3.org/TR/html5/syntax.html#preprocessing-the-input-stream
4714                 txt = txt.replace(new RegExp("\r\n", 'g'), "\n") # fixfull spec doesn't say this
4715                 txt = txt.replace(new RegExp("\r", 'g'), "\n") # fixfull spec doesn't say this
4716
4717                 return
4718
4719         # http://www.w3.org/TR/html5/syntax.html#tree-construction
4720         parse_main_loop = ->
4721                 while flag_parsing
4722                         t = tok_state()
4723                         if t?
4724                                 process_token t
4725                                 # fixfull parse error if has self-closing flag, but it wasn't acknolwedged
4726                 return
4727         parse_init()
4728         parse_main_loop()
4729
4730         if flag_fragment_parsing
4731                 return fragment_root.children
4732         return doc.children
4733
4734 exports.parse = parse_html
4735 exports.debug_log_reset = debug_log_reset
4736 exports.debug_log_each = debug_log_each
4737 exports.TYPE_TAG = TYPE_TAG
4738 exports.TYPE_TEXT = TYPE_TEXT
4739 exports.TYPE_COMMENT = TYPE_COMMENT
4740 exports.TYPE_DOCTYPE = TYPE_DOCTYPE
4741 exports.NS_HTML = NS_HTML
4742 exports.NS_MATHML = NS_MATHML
4743 exports.NS_SVG = NS_SVG
4744 exports.QUIRKS_NO = QUIRKS_NO
4745 exports.QUIRKS_LIMITED = QUIRKS_LIMITED
4746 exports.QUIRKS_YES = QUIRKS_YES