parse-html.coffee

   1 # HTML parser meant to run in a browser, in support of WYSIWYG editor
   2 # Copyright 2015 Jason Woofenden
   3 #
   4 # This program is free software: you can redistribute it and/or modify it under
   5 # the terms of the GNU Affero General Public License as published by the Free
   6 # Software Foundation, either version 3 of the License, or (at your option) any
   7 # later version.
   8 #
   9 # This program is distributed in the hope that it will be useful, but WITHOUT
  10 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  11 # FOR A PARTICULAR PURPOSE.  See the GNU Affero General Public License for more
  12 # details.
  13 #
  14 # You should have received a copy of the GNU Affero General Public License
  15 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
  16
  17
  18 # This file implements a parser for html snippets, meant to be used by a
  19 # WYSIWYG editor.
  20
  21 # The implementation is a pretty direct implementation of the parsing algorithm
  22 # described here:
  23 # http://www.w3.org/TR/html5/syntax.html#preprocessing-the-input-stream
  24 #
  25 # Deviations from that spec:
  26 #
  27 #   Purposeful: search this file for "WTAG"
  28 #
  29 #   Not finished yet: search this file for "fixfull", "TODO" and "FIXME"
  30
  31
  32 # stacks/lists
  33 #
  34 # the spec uses a many different words do indicate which ends of lists/stacks
  35 # they are talking about (and relative movement within the lists/stacks). This
  36 # section splains. I'm implementing "lists" (afe and open_els) the same way
  37 # (both as stacks)
  38 #
  39 # stacks grow downward (current element is index=0)
  40 #
  41 # example: open_els = [a, b, c, d, e, f, g]
  42 #
  43 # "grows downwards" means it's visualized like this: (index: el, names)
  44 #
  45 #   6: g "start of the list", "topmost", "first"
  46 #   5: f
  47 #   4: e "previous" (to d), "above", "before"
  48 #   3: d   (previous/next are relative to this element)
  49 #   2: c "next", "after", "lower", "below"
  50 #   1: b
  51 #   0: a "end of the list", "current node", "bottommost", "last"
  52
  53
  54 # browser
  55 # note: to get this to run outside a browser, you'll have to write a native
  56 # implementation of decode_named_char_ref()
  57 unless module?.exports?
  58         window.wheic = {}
  59         module = exports: window.wheic
  60
  61 from_code_point = (x) ->
  62         if String.fromCodePoint?
  63                 return String.fromCodePoint x
  64         else
  65                 if x <= 0xffff
  66                         return String.fromCharCode x
  67                 x -= 0x10000
  68                 return String.fromCharCode((x >> 10) + 0xd800, (x % 0x400) + 0xdc00)
  69
  70 # Each node is an obect of the Node class. Here are the Node types:
  71 TYPE_TAG = 0 # name, {attributes}, [children]
  72 TYPE_TEXT = 1 # "text"
  73 TYPE_COMMENT = 2
  74 TYPE_DOCTYPE = 3
  75 # the following types are emited by the tokenizer, but shouldn't end up in the tree:
  76 TYPE_START_TAG = 4 # name, [attributes ([key,value]...) in reverse order], [children]
  77 TYPE_END_TAG = 5 # name
  78 TYPE_EOF = 6
  79 TYPE_AFE_MARKER = 7 # http://www.w3.org/TR/html5/syntax.html#reconstruct-the-active-formatting-elements
  80 TYPE_AAA_BOOKMARK = 8 # http://www.w3.org/TR/html5/syntax.html#adoption-agency-algorithm
  81
  82 # namespace constants
  83 NS_HTML = 1
  84 NS_MATHML = 2
  85 NS_SVG = 3
  86
  87 g_debug_log = []
  88 debug_log_reset = ->
  89         g_debug_log = []
  90 debug_log = (str) ->
  91         g_debug_log.push str
  92 debug_log_each = (cb) ->
  93         for str in g_debug_log
  94                 cb str
  95
  96 prev_node_id = 0
  97 class Node
  98         constructor: (type, args = {}) ->
  99                 @type = type # one of the TYPE_* constants above
 100                 @name = args.name ? '' # tag name
 101                 @text = args.text ? '' # contents for text/comment nodes
 102                 @attrs = args.attrs ? {}
 103                 @attrs_a = args.attr_k ? [] # attrs in progress, TYPE_START_TAG only
 104                 @children = args.children ? []
 105                 @namespace = args.namespace ? NS_HTML
 106                 @parent = args.parent ? null
 107                 @token = args.token ? null
 108                 @flags = args.flags ? {}
 109                 if args.id?
 110                         @id = "#{args.id}+"
 111                 else
 112                         @id = "#{++prev_node_id}"
 113         acknowledge_self_closing: ->
 114                 if @token?
 115                         @token.flag 'did_self_close'
 116                 else
 117                         @flag 'did_self_close', true
 118         flag: (key, value = null) ->
 119                 if value?
 120                         @flags[key] = value
 121                 else
 122                         return @flags[key]
 123         serialize: (shallow = false, show_ids = false) -> # for unit tests
 124                 ret = ''
 125                 switch @type
 126                         when TYPE_TAG
 127                                 ret += 'tag:'
 128                                 ret += JSON.stringify @name
 129                                 ret += ','
 130                                 if show_ids
 131                                         ret += "##{@id},"
 132                                 if shallow
 133                                         break
 134                                 attr_keys = []
 135                                 for k of @attrs
 136                                         attr_keys.push k
 137                                 attr_keys.sort()
 138                                 ret += '{'
 139                                 sep = ''
 140                                 for k in attr_keys
 141                                         ret += sep
 142                                         sep = ','
 143                                         ret += "#{JSON.stringify k}:#{JSON.stringify @attrs[k]}"
 144                                 ret += '},['
 145                                 sep = ''
 146                                 for c in @children
 147                                         ret += sep
 148                                         sep = ','
 149                                         ret += c.serialize shallow, show_ids
 150                                 ret += ']'
 151                         when TYPE_TEXT
 152                                 ret += 'text:'
 153                                 ret += JSON.stringify @text
 154                         when TYPE_COMMENT
 155                                 ret += 'comment:'
 156                                 ret += JSON.stringify @text
 157                         when TYPE_DOCTYPE
 158                                 ret += "doctype:#{@name},#{JSON.stringify(@public_identifier ? '')},#{JSON.stringify(@system_identifier ? '')}"
 159                         when TYPE_AFE_MARKER
 160                                 ret += 'marker'
 161                         when TYPE_AAA_BOOKMARK
 162                                 ret += 'aaa_bookmark'
 163                         else
 164                                 ret += 'unknown:'
 165                                 console.log "unknown: #{JSON.stringify @}" # backtrace is just as well
 166                 return ret
 167
 168 # helpers: (only take args that are normally known when parser creates nodes)
 169 new_open_tag = (name) ->
 170         return new Node TYPE_START_TAG, name: name
 171 new_end_tag = (name) ->
 172         return new Node TYPE_END_TAG, name: name
 173 new_element = (name) ->
 174         return new Node TYPE_TAG, name: name
 175 new_text_node = (txt) ->
 176         return new Node TYPE_TEXT, text: txt
 177 new_character_token = new_text_node
 178 new_comment_token = (txt) ->
 179         return new Node TYPE_COMMENT, text: txt
 180 new_doctype_token = (name) ->
 181         return new Node TYPE_DOCTYPE, name: name
 182 new_eof_token = ->
 183         return new Node TYPE_EOF
 184 new_afe_marker = ->
 185         return new Node TYPE_AFE_MARKER
 186 new_aaa_bookmark = ->
 187         return new Node TYPE_AAA_BOOKMARK
 188
 189 lc_alpha = "abcdefghijklmnopqrstuvwxyz"
 190 uc_alpha = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
 191 digits = "0123456789"
 192 alnum = lc_alpha + uc_alpha + digits
 193 hex_chars = digits + "abcdefABCDEF"
 194
 195 is_uc_alpha = (str) ->
 196         return str.length is 1 and uc_alpha.indexOf(str) > -1
 197 is_lc_alpha = (str) ->
 198         return str.length is 1 and lc_alpha.indexOf(str) > -1
 199
 200 # some SVG elements have dashes in them
 201 tag_name_chars = alnum + "-"
 202
 203 # http://www.w3.org/TR/html5/infrastructure.html#space-character
 204 space_chars = "\u0009\u000a\u000c\u000d\u0020"
 205 is_space = (txt) ->
 206         return txt.length is 1 and space_chars.indexOf(txt) > -1
 207 is_space_tok = (t) ->
 208         return t.type is TYPE_TEXT && t.text.length is 1 and space_chars.indexOf(t.text) > -1
 209
 210 is_input_hidden_tok = (t) ->
 211         return false unless t.type is TYPE_START_TAG
 212         for a in t.attrs_a
 213                 if a[0] is 'type'
 214                         if a[1].toLowerCase() is 'hidden'
 215                                 return true
 216                         return false
 217         return false
 218
 219 # https://en.wikipedia.org/wiki/Whitespace_character#Unicode
 220 whitespace_chars = "\u0009\u000a\u000b\u000c\u000d\u0020\u0085\u00a0\u1680\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200a\u2028\u2029\u202f\u205f\u3000"
 221
 222 unicode_fixes = {}
 223 unicode_fixes[0x00] = "\uFFFD"
 224 unicode_fixes[0x80] = "\u20AC"
 225 unicode_fixes[0x82] = "\u201A"
 226 unicode_fixes[0x83] = "\u0192"
 227 unicode_fixes[0x84] = "\u201E"
 228 unicode_fixes[0x85] = "\u2026"
 229 unicode_fixes[0x86] = "\u2020"
 230 unicode_fixes[0x87] = "\u2021"
 231 unicode_fixes[0x88] = "\u02C6"
 232 unicode_fixes[0x89] = "\u2030"
 233 unicode_fixes[0x8A] = "\u0160"
 234 unicode_fixes[0x8B] = "\u2039"
 235 unicode_fixes[0x8C] = "\u0152"
 236 unicode_fixes[0x8E] = "\u017D"
 237 unicode_fixes[0x91] = "\u2018"
 238 unicode_fixes[0x92] = "\u2019"
 239 unicode_fixes[0x93] = "\u201C"
 240 unicode_fixes[0x94] = "\u201D"
 241 unicode_fixes[0x95] = "\u2022"
 242 unicode_fixes[0x96] = "\u2013"
 243 unicode_fixes[0x97] = "\u2014"
 244 unicode_fixes[0x98] = "\u02DC"
 245 unicode_fixes[0x99] = "\u2122"
 246 unicode_fixes[0x9A] = "\u0161"
 247 unicode_fixes[0x9B] = "\u203A"
 248 unicode_fixes[0x9C] = "\u0153"
 249 unicode_fixes[0x9E] = "\u017E"
 250 unicode_fixes[0x9F] = "\u0178"
 251
 252 # These are the character references that don't need a terminating semicolon
 253 # min length: 2, max: 6, none are a prefix of any other.
 254 legacy_char_refs = {
 255         Aacute: 'Á', aacute: 'á', Acirc: 'Â', acirc: 'â', acute: '´', AElig: 'Æ',
 256         aelig: 'æ', Agrave: 'À', agrave: 'à', AMP: '&', amp: '&', Aring: 'Å',
 257         aring: 'å', Atilde: 'Ã', atilde: 'ã', Auml: 'Ä', auml: 'ä', brvbar: '¦',
 258         Ccedil: 'Ç', ccedil: 'ç', cedil: '¸', cent: '¢', COPY: '©', copy: '©',
 259         curren: '¤', deg: '°', divide: '÷', Eacute: 'É', eacute: 'é', Ecirc: 'Ê',
 260         ecirc: 'ê', Egrave: 'È', egrave: 'è', ETH: 'Ð', eth: 'ð', Euml: 'Ë',
 261         euml: 'ë', frac12: '½', frac14: '¼', frac34: '¾', GT: '>', gt: '>',
 262         Iacute: 'Í', iacute: 'í', Icirc: 'Î', icirc: 'î', iexcl: '¡', Igrave: 'Ì',
 263         igrave: 'ì', iquest: '¿', Iuml: 'Ï', iuml: 'ï', laquo: '«', LT: '<',
 264         lt: '<', macr: '¯', micro: 'µ', middot: '·', nbsp: "\u00a0", not: '¬',
 265         Ntilde: 'Ñ', ntilde: 'ñ', Oacute: 'Ó', oacute: 'ó', Ocirc: 'Ô', ocirc: 'ô',
 266         Ograve: 'Ò', ograve: 'ò', ordf: 'ª', ordm: 'º', Oslash: 'Ø', oslash: 'ø',
 267         Otilde: 'Õ', otilde: 'õ', Ouml: 'Ö', ouml: 'ö', para: '¶', plusmn: '±',
 268         pound: '£', QUOT: '"', quot: '"', raquo: '»', REG: '®', reg: '®', sect: '§',
 269         shy: '', sup1: '¹', sup2: '²', sup3: '³', szlig: 'ß', THORN: 'Þ', thorn: 'þ',
 270         times: '×', Uacute: 'Ú', uacute: 'ú', Ucirc: 'Û', ucirc: 'û', Ugrave: 'Ù',
 271         ugrave: 'ù', uml: '¨', Uuml: 'Ü', uuml: 'ü', Yacute: 'Ý', yacute: 'ý',
 272         yen: '¥', yuml: 'ÿ'
 273 }
 274
 275 void_elements = ['area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input', 'keygen', 'link', 'meta', 'param', 'source', 'track', 'wbr']
 276 raw_text_elements = ['script', 'style']
 277 escapable_raw_text_elements = ['textarea', 'title']
 278 # http://www.w3.org/TR/SVG/ 1.1 (Second Edition)
 279 svg_elements = [
 280         'a', 'altGlyph', 'altGlyphDef', 'altGlyphItem', 'animate', 'animateColor',
 281         'animateMotion', 'animateTransform', 'circle', 'clipPath', 'color-profile',
 282         'cursor', 'defs', 'desc', 'ellipse', 'feBlend', 'feColorMatrix',
 283         'feComponentTransfer', 'feComposite', 'feConvolveMatrix',
 284         'feDiffuseLighting', 'feDisplacementMap', 'feDistantLight', 'feFlood',
 285         'feFuncA', 'feFuncB', 'feFuncG', 'feFuncR', 'feGaussianBlur', 'feImage',
 286         'feMerge', 'feMergeNode', 'feMorphology', 'feOffset', 'fePointLight',
 287         'feSpecularLighting', 'feSpotLight', 'feTile', 'feTurbulence', 'filter',
 288         'font', 'font-face', 'font-face-format', 'font-face-name', 'font-face-src',
 289         'font-face-uri', 'foreignObject', 'g', 'glyph', 'glyphRef', 'hkern',
 290         'image', 'line', 'linearGradient', 'marker', 'mask', 'metadata',
 291         'missing-glyph', 'mpath', 'path', 'pattern', 'polygon', 'polyline',
 292         'radialGradient', 'rect', 'script', 'set', 'stop', 'style', 'svg',
 293         'switch', 'symbol', 'text', 'textPath', 'title', 'tref', 'tspan', 'use',
 294         'view', 'vkern'
 295 ]
 296
 297 # http://www.w3.org/TR/MathML/ Version 3.0 2nd Edition
 298 mathml_elements = [
 299         'abs', 'and', 'annotation', 'annotation-xml', 'apply', 'approx', 'arccos',
 300         'arccosh', 'arccot', 'arccoth', 'arccsc', 'arccsch', 'arcsec', 'arcsech',
 301         'arcsin', 'arcsinh', 'arctan', 'arctanh', 'arg', 'bind', 'bvar', 'card',
 302         'cartesianproduct', 'cbytes', 'ceiling', 'cerror', 'ci', 'cn', 'codomain',
 303         'complexes', 'compose', 'condition', 'conjugate', 'cos', 'cosh', 'cot',
 304         'coth', 'cs', 'csc', 'csch', 'csymbol', 'curl', 'declare', 'degree',
 305         'determinant', 'diff', 'divergence', 'divide', 'domain',
 306         'domainofapplication', 'emptyset', 'eq', 'equivalent', 'eulergamma',
 307         'exists', 'exp', 'exponentiale', 'factorial', 'factorof', 'false', 'floor',
 308         'fn', 'forall', 'gcd', 'geq', 'grad', 'gt', 'ident', 'image', 'imaginary',
 309         'imaginaryi', 'implies', 'in', 'infinity', 'int', 'integers', 'intersect',
 310         'interval', 'inverse', 'lambda', 'laplacian', 'lcm', 'leq', 'limit',
 311         'list', 'ln', 'log', 'logbase', 'lowlimit', 'lt', 'maction', 'maligngroup',
 312         'malignmark', 'math', 'matrix', 'matrixrow', 'max', 'mean', 'median',
 313         'menclose', 'merror', 'mfenced', 'mfrac', 'mglyph', 'mi', 'mi', 'min',
 314         'minus', 'mlabeledtr', 'mlongdiv', 'mmultiscripts', 'mn', 'mo', 'mode',
 315         'moment', 'momentabout', 'mover', 'mpadded', 'mphantom', 'mprescripts',
 316         'mroot', 'mrow', 'ms', 'mscarries', 'mscarry', 'msgroup', 'msline',
 317         'mspace', 'msqrt', 'msrow', 'mstack', 'mstyle', 'msub', 'msubsup', 'msup',
 318         'mtable', 'mtd', 'mtext', 'mtr', 'munder', 'munderover', 'naturalnumbers',
 319         'neq', 'none', 'not', 'notanumber', 'notin', 'notprsubset', 'notsubset',
 320         'or', 'otherwise', 'outerproduct', 'partialdiff', 'pi', 'piece',
 321         'piecewise', 'plus', 'power', 'primes', 'product', 'prsubset', 'quotient',
 322         'rationals', 'real', 'reals', 'reln', 'rem', 'root', 'scalarproduct',
 323         'sdev', 'sec', 'sech', 'selector', 'semantics', 'sep', 'set', 'setdiff',
 324         'share', 'sin', 'sinh', 'span', 'subset', 'sum', 'tan', 'tanh', 'tendsto',
 325         'times', 'transpose', 'true', 'union', 'uplimit', 'variance', 'vector',
 326         'vectorproduct', 'xor'
 327 ]
 328 # foreign_elements = [svg_elements..., mathml_elements...]
 329 #normal_elements = All other allowed HTML elements are normal elements.
 330
 331 special_elements = {
 332         # HTML:
 333         address:NS_HTML, applet:NS_HTML, area:NS_HTML, article:NS_HTML,
 334         aside:NS_HTML, base:NS_HTML, basefont:NS_HTML, bgsound:NS_HTML,
 335         blockquote:NS_HTML, body:NS_HTML, br:NS_HTML, button:NS_HTML,
 336         caption:NS_HTML, center:NS_HTML, col:NS_HTML, colgroup:NS_HTML, dd:NS_HTML,
 337         details:NS_HTML, dir:NS_HTML, div:NS_HTML, dl:NS_HTML, dt:NS_HTML,
 338         embed:NS_HTML, fieldset:NS_HTML, figcaption:NS_HTML, figure:NS_HTML,
 339         footer:NS_HTML, form:NS_HTML, frame:NS_HTML, frameset:NS_HTML, h1:NS_HTML,
 340         h2:NS_HTML, h3:NS_HTML, h4:NS_HTML, h5:NS_HTML, h6:NS_HTML, head:NS_HTML,
 341         header:NS_HTML, hgroup:NS_HTML, hr:NS_HTML, html:NS_HTML, iframe:NS_HTML,
 342         img:NS_HTML, input:NS_HTML, isindex:NS_HTML, li:NS_HTML, link:NS_HTML,
 343         listing:NS_HTML, main:NS_HTML, marquee:NS_HTML, meta:NS_HTML, nav:NS_HTML,
 344         noembed:NS_HTML, noframes:NS_HTML, noscript:NS_HTML, object:NS_HTML,
 345         ol:NS_HTML, p:NS_HTML, param:NS_HTML, plaintext:NS_HTML, pre:NS_HTML,
 346         script:NS_HTML, section:NS_HTML, select:NS_HTML, source:NS_HTML,
 347         style:NS_HTML, summary:NS_HTML, table:NS_HTML, tbody:NS_HTML, td:NS_HTML,
 348         template:NS_HTML, textarea:NS_HTML, tfoot:NS_HTML, th:NS_HTML,
 349         thead:NS_HTML, title:NS_HTML, tr:NS_HTML, track:NS_HTML, ul:NS_HTML,
 350         wbr:NS_HTML, xmp:NS_HTML,
 351
 352         # MathML:
 353         mi:NS_MATHML, mo:NS_MATHML, mn:NS_MATHML, ms:NS_MATHML, mtext:NS_MATHML,
 354         'annotation-xml':NS_MATHML,
 355
 356         # SVG:
 357         foreignObject:NS_SVG, desc:NS_SVG, title:NS_SVG
 358 }
 359
 360 formatting_elements = {
 361          a: true, b: true, big: true, code: true, em: true, font: true, i: true,
 362          nobr: true, s: true, small: true, strike: true, strong: true, tt: true,
 363          u: true
 364 }
 365
 366 mathml_text_integration = {
 367         mi: NS_MATHML, mo: NS_MATHML, mn: NS_MATHML, ms: NS_MATHML, mtext: NS_MATHML
 368 }
 369 is_mathml_text_integration_point = (el) ->
 370         return mathml_text_integration[el.name] is el.namespace
 371 is_html_integration = (el) -> # DON'T PASS A TOKEN
 372         if el.namespace is NS_MATHML
 373                 if el.name is 'annotation-xml'
 374                         if el.attrs.encoding?
 375                                 if el.attrs.encoding.toLowerCase() is 'text/html'
 376                                         return true
 377                                 if el.attrs.encoding.toLowerCase() is 'application/xhtml+xml'
 378                                         return true
 379                 return false
 380         if el.namespace is NS_SVG
 381                 if el.name is 'foreignObject' or el.name is 'desc' or el.name is 'title'
 382                         return true
 383         return false
 384
 385 h_tags = {
 386         h1:NS_HTML, h2:NS_HTML, h3:NS_HTML, h4:NS_HTML, h5:NS_HTML, h6:NS_HTML
 387 }
 388
 389 foster_parenting_targets = {
 390         table: NS_HTML
 391         tbody: NS_HTML
 392         tfoot: NS_HTML
 393         thead: NS_HTML
 394         tr: NS_HTML
 395 }
 396
 397 end_tag_implied = {
 398         dd: NS_HTML
 399         dt: NS_HTML
 400         li: NS_HTML
 401         option: NS_HTML
 402         optgroup: NS_HTML
 403         p: NS_HTML
 404         rb: NS_HTML
 405         rp: NS_HTML
 406         rt: NS_HTML
 407         rtc: NS_HTML
 408 }
 409
 410 el_is_special = (e) ->
 411         return special_elements[e.name] is e.namespace
 412
 413 adp_els = { address: NS_HTML, div: NS_HTML, p: NS_HTML }
 414 el_is_special_not_adp = (el) ->
 415         return special_elements[el.name] is el.namespace and adp_els[el.name] isnt el.namespace
 416
 417 svg_name_fixes = {
 418         altglyph: 'altGlyph'
 419         altglyphdef: 'altGlyphDef'
 420         altglyphitem: 'altGlyphItem'
 421         animatecolor: 'animateColor'
 422         animatemotion: 'animateMotion'
 423         animatetransform: 'animateTransform'
 424         clippath: 'clipPath'
 425         feblend: 'feBlend'
 426         fecolormatrix: 'feColorMatrix'
 427         fecomponenttransfer: 'feComponentTransfer'
 428         fecomposite: 'feComposite'
 429         feconvolvematrix: 'feConvolveMatrix'
 430         fediffuselighting: 'feDiffuseLighting'
 431         fedisplacementmap: 'feDisplacementMap'
 432         fedistantlight: 'feDistantLight'
 433         fedropshadow: 'feDropShadow'
 434         feflood: 'feFlood'
 435         fefunca: 'feFuncA'
 436         fefuncb: 'feFuncB'
 437         fefuncg: 'feFuncG'
 438         fefuncr: 'feFuncR'
 439         fegaussianblur: 'feGaussianBlur'
 440         feimage: 'feImage'
 441         femerge: 'feMerge'
 442         femergenode: 'feMergeNode'
 443         femorphology: 'feMorphology'
 444         feoffset: 'feOffset'
 445         fepointlight: 'fePointLight'
 446         fespecularlighting: 'feSpecularLighting'
 447         fespotlight: 'feSpotLight'
 448         fetile: 'feTile'
 449         feturbulence: 'feTurbulence'
 450         foreignobject: 'foreignObject'
 451         glyphref: 'glyphRef'
 452         lineargradient: 'linearGradient'
 453         radialgradient: 'radialGradient'
 454         textpath: 'textPath'
 455 }
 456 svg_attribute_fixes = {
 457         attributename: 'attributeName'
 458         attributetype: 'attributeType'
 459         basefrequency: 'baseFrequency'
 460         baseprofile: 'baseProfile'
 461         calcmode: 'calcMode'
 462         clippathunits: 'clipPathUnits'
 463         contentscripttype: 'contentScriptType'
 464         contentstyletype: 'contentStyleType'
 465         diffuseconstant: 'diffuseConstant'
 466         edgemode: 'edgeMode'
 467         externalresourcesrequired: 'externalResourcesRequired'
 468         filterres: 'filterRes'
 469         filterunits: 'filterUnits'
 470         glyphref: 'glyphRef'
 471         gradienttransform: 'gradientTransform'
 472         gradientunits: 'gradientUnits'
 473         kernelmatrix: 'kernelMatrix'
 474         kernelunitlength: 'kernelUnitLength'
 475         keypoints: 'keyPoints'
 476         keysplines: 'keySplines'
 477         keytimes: 'keyTimes'
 478         lengthadjust: 'lengthAdjust'
 479         limitingconeangle: 'limitingConeAngle'
 480         markerheight: 'markerHeight'
 481         markerunits: 'markerUnits'
 482         markerwidth: 'markerWidth'
 483         maskcontentunits: 'maskContentUnits'
 484         maskunits: 'maskUnits'
 485         numoctaves: 'numOctaves'
 486         pathlength: 'pathLength'
 487         patterncontentunits: 'patternContentUnits'
 488         patterntransform: 'patternTransform'
 489         patternunits: 'patternUnits'
 490         pointsatx: 'pointsAtX'
 491         pointsaty: 'pointsAtY'
 492         pointsatz: 'pointsAtZ'
 493         preservealpha: 'preserveAlpha'
 494         preserveaspectratio: 'preserveAspectRatio'
 495         primitiveunits: 'primitiveUnits'
 496         refx: 'refX'
 497         refy: 'refY'
 498         repeatcount: 'repeatCount'
 499         repeatdur: 'repeatDur'
 500         requiredextensions: 'requiredExtensions'
 501         requiredfeatures: 'requiredFeatures'
 502         specularconstant: 'specularConstant'
 503         specularexponent: 'specularExponent'
 504         spreadmethod: 'spreadMethod'
 505         startoffset: 'startOffset'
 506         stddeviation: 'stdDeviation'
 507         stitchtiles: 'stitchTiles'
 508         surfacescale: 'surfaceScale'
 509         systemlanguage: 'systemLanguage'
 510         tablevalues: 'tableValues'
 511         targetx: 'targetX'
 512         targety: 'targetY'
 513         textlength: 'textLength'
 514         viewbox: 'viewBox'
 515         viewtarget: 'viewTarget'
 516         xchannelselector: 'xChannelSelector'
 517         ychannelselector: 'yChannelSelector'
 518         zoomandpan: 'zoomAndPan'
 519 }
 520 adjust_mathml_attributes = (t) ->
 521         for a in t.attrs_a
 522                 if a[0] is 'definitionurl'
 523                         a[0] = 'definitionURL'
 524         return
 525 adjust_svg_attributes = (t) ->
 526         for a in t.attrs_a
 527                 if svg_attribute_fixes[a[0]]?
 528                         a[0] = svg_attribute_fixes[a[0]]
 529         return
 530 adjust_foreign_attributes = (t) ->
 531         # fixfull
 532         return
 533
 534 # decode_named_char_ref()
 535 #
 536 # The list of named character references is _huge_ so ask the browser to decode
 537 # for us instead of wasting bandwidth/space on including the table here.
 538 #
 539 # Pass without the "&" but with the ";" examples:
 540 #    for "&amp" pass "amp;"
 541 #    for "&#x2032" pass "x2032;"
 542 g_dncr = {
 543         cache: {}
 544         textarea: document.createElement('textarea')
 545 }
 546 # TODO test this in IE8
 547 decode_named_char_ref = (txt) ->
 548         txt = "&#{txt}"
 549         decoded = g_dncr.cache[txt]
 550         return decoded if decoded?
 551         g_dncr.textarea.innerHTML = txt
 552         decoded = g_dncr.textarea.value
 553         return null if decoded is txt
 554         return g_dncr.cache[txt] = decoded
 555
 556 parse_html = (args) ->
 557         txt = null
 558         cur = null # index of next char in txt to be parsed
 559         # declare doc and tokenizer variables so they're in scope below
 560         doc = null
 561         open_els = null # stack of open elements
 562         afe = null # active formatting elements
 563         template_ins_modes = null
 564         ins_mode = null
 565         original_ins_mode = null
 566         tok_state = null
 567         tok_cur_tag = null # partially parsed tag
 568         flag_scripting = null
 569         flag_frameset_ok = null
 570         flag_parsing = null
 571         flag_foster_parenting = null
 572         form_element_pointer = null
 573         temporary_buffer = null
 574         pending_table_character_tokens = null
 575         head_element_pointer = null
 576         flag_fragment_parsing = null
 577         context_element = null
 578
 579         stop_parsing = ->
 580                 flag_parsing = false
 581
 582         parse_error = ->
 583                 if args.error_cb?
 584                         args.error_cb cur
 585                 else
 586                         console.log "Parse error at character #{cur} of #{txt.length}"
 587
 588         afe_push = (new_el) ->
 589                 matches = 0
 590                 for el, i in afe
 591                         if el.name is new_el.name and el.namespace is new_el.namespace
 592                                 for k, v of el.attrs
 593                                         continue unless new_el.attrs[k] is v
 594                                 for k, v of new_el.attrs
 595                                         continue unless el.attrs[k] is v
 596                                 matches += 1
 597                                 if matches is 3
 598                                         afe.splice i, 1
 599                                         break
 600                 afe.unshift new_el
 601         afe_push_marker = ->
 602                 afe.unshift new_afe_marker()
 603
 604         # the functions below impliment the Tree Contstruction algorithm
 605         # http://www.w3.org/TR/html5/syntax.html#tree-construction
 606
 607         # But first... the helpers
 608         template_tag_is_open = ->
 609                 for t in open_els
 610                         if t.name is 'template' and t.namespace is NS_HTML
 611                                 return true
 612                 return false
 613         is_in_scope_x = (tag_name, scope, namespace) ->
 614                 for t in open_els
 615                         if t.name is tag_name and (namespace is null or namespace is t.namespace)
 616                                 return true
 617                         if scope[t.name] is t.namespace
 618                                 return false
 619                 return false
 620         is_in_scope_x_y = (tag_name, scope, scope2, namespace) ->
 621                 for t in open_els
 622                         if t.name is tag_name and (namespace is null or namespace is t.namespace)
 623                                 return true
 624                         if scope[t.name] is t.namespace
 625                                 return false
 626                         if scope2[t.name] is t.namespace
 627                                 return false
 628                 return false
 629         standard_scopers = {
 630                 applet: NS_HTML, caption: NS_HTML, html: NS_HTML, table: NS_HTML,
 631                 td: NS_HTML, th: NS_HTML, marquee: NS_HTML, object: NS_HTML,
 632                 template: NS_HTML, mi: NS_MATHML,
 633
 634                 mo: NS_MATHML, mn: NS_MATHML, ms: NS_MATHML, mtext: NS_MATHML,
 635                 'annotation-xml': NS_MATHML,
 636
 637                 foreignObject: NS_SVG, desc: NS_SVG, title: NS_SVG
 638         }
 639         button_scopers = button: NS_HTML
 640         li_scopers = ol: NS_HTML, ul: NS_HTML
 641         table_scopers = html: NS_HTML, table: NS_HTML, template: NS_HTML
 642         is_in_scope = (tag_name, namespace = null) ->
 643                 return is_in_scope_x tag_name, standard_scopers, namespace
 644         is_in_button_scope = (tag_name, namespace = null) ->
 645                 return is_in_scope_x_y tag_name, standard_scopers, button_scopers, namespace
 646         is_in_table_scope = (tag_name, namespace = null) ->
 647                 return is_in_scope_x tag_name, table_scopers, namespace
 648         # aka is_in_list_item_scope
 649         is_in_li_scope = (tag_name, namespace = null) ->
 650                 return is_in_scope_x_y tag_name, standard_scopers, li_scopers, namespace
 651         is_in_select_scope = (tag_name, namespace = null) ->
 652                 for t in open_els
 653                         if t.name is tag_name and (namespace is null or namespace is t.namespace)
 654                                 return true
 655                         if t.namespace isnt NS_HTML and t.name isnt 'optgroup' and t.name isnt 'option'
 656                                 return false
 657                 return false
 658         # this checks for a particular element, not by name
 659         # this requires a namespace match
 660         el_is_in_scope = (needle) ->
 661                 for el in open_els
 662                         if el is needle
 663                                 return true
 664                         if standard_scopers[el.name] is el.namespace
 665                                 return false
 666                 return false
 667
 668         clear_to_table_stopers = {
 669                 'table': true
 670                 'template': true
 671                 'html': true
 672         }
 673         clear_stack_to_table_context = ->
 674                 loop
 675                         if clear_to_table_stopers[open_els[0].name]?
 676                                 break
 677                         open_els.shift()
 678                 return
 679         clear_to_table_body_stopers = {
 680                 tbody: NS_HTML
 681                 tfoot: NS_HTML
 682                 thead: NS_HTML
 683                 template: NS_HTML
 684                 html: NS_HTML
 685         }
 686         clear_stack_to_table_body_context = ->
 687                 loop
 688                         if clear_to_table_body_stopers[open_els[0].name] is open_els[0].namespace
 689                                 break
 690                         open_els.shift()
 691                 return
 692         clear_to_table_row_stopers = {
 693                 'tr': true
 694                 'template': true
 695                 'html': true
 696         }
 697         clear_stack_to_table_row_context = ->
 698                 loop
 699                         if clear_to_table_row_stopers[open_els[0].name]?
 700                                 break
 701                         open_els.shift()
 702                 return
 703         clear_afe_to_marker = ->
 704                 loop
 705                         return unless afe.length > 0 # this happens in fragment case, ?spec error
 706                         el = afe.shift()
 707                         if el.type is TYPE_AFE_MARKER
 708                                 return
 709                 return
 710
 711         # 8.2.3.1 ...
 712         # http://www.w3.org/TR/html5/syntax.html#reset-the-insertion-mode-appropriately
 713         reset_ins_mode = ->
 714                 # 1. Let last be false.
 715                 last = false
 716                 # 2. Let node be the last node in the stack of open elements.
 717                 node_i = 0
 718                 node = open_els[node_i]
 719                 # 3. Loop: If node is the first node in the stack of open elements,
 720                 # then set last to true, and, if the parser was originally created as
 721                 # part of the HTML fragment parsing algorithm (fragment case) set node
 722                 # to the context element.
 723                 loop
 724                         if node_i is open_els.length - 1
 725                                 last = true
 726                                 # fixfull (fragment case)
 727
 728                         # 4. If node is a select element, run these substeps:
 729                         if node.name is 'select' and node.namespace is NS_HTML
 730                                 # 1. If last is true, jump to the step below labeled done.
 731                                 unless last
 732                                         # 2. Let ancestor be node.
 733                                         ancestor_i = node_i
 734                                         ancestor = node
 735                                         # 3. Loop: If ancestor is the first node in the stack of
 736                                         # open elements, jump to the step below labeled done.
 737                                         loop
 738                                                 if ancestor_i is open_els.length - 1
 739                                                         break
 740                                                 # 4. Let ancestor be the node before ancestor in the stack
 741                                                 # of open elements.
 742                                                 ancestor_i += 1
 743                                                 ancestor = open_els[ancestor_i]
 744                                                 # 5. If ancestor is a template node, jump to the step below
 745                                                 # labeled done.
 746                                                 if ancestor.name is 'template' and ancestor.namespace is NS_HTML
 747                                                         break
 748                                                 # 6. If ancestor is a table node, switch the insertion mode
 749                                                 # to "in select in table" and abort these steps.
 750                                                 if ancestor.name is 'table' and ancestor.namespace is NS_HTML
 751                                                         ins_mode = ins_mode_in_select_in_table
 752                                                         return
 753                                                 # 7. Jump back to the step labeled loop.
 754                                 # 8. Done: Switch the insertion mode to "in select" and abort
 755                                 # these steps.
 756                                 ins_mode = ins_mode_in_select
 757                                 return
 758                         # 5. If node is a td or th element and last is false, then switch
 759                         # the insertion mode to "in cell" and abort these steps.
 760                         if (node.name is 'td' or node.name is 'th') and node.namespace is NS_HTML and last is false
 761                                 ins_mode = ins_mode_in_cell
 762                                 return
 763                         # 6. If node is a tr element, then switch the insertion mode to "in
 764                         # row" and abort these steps.
 765                         if node.name is 'tr' and node.namespace is NS_HTML
 766                                 ins_mode = ins_mode_in_row
 767                                 return
 768                         # 7. If node is a tbody, thead, or tfoot element, then switch the
 769                         # insertion mode to "in table body" and abort these steps.
 770                         if (node.name is 'tbody' or node.name is 'thead' or node.name is 'tfoot') and node.namespace is NS_HTML
 771                                 ins_mode = ins_mode_in_table_body
 772                                 return
 773                         # 8. If node is a caption element, then switch the insertion mode
 774                         # to "in caption" and abort these steps.
 775                         if node.name is 'caption' and node.namespace is NS_HTML
 776                                 ins_mode = ins_mode_in_caption
 777                                 return
 778                         # 9. If node is a colgroup element, then switch the insertion mode
 779                         # to "in column group" and abort these steps.
 780                         if node.name is 'colgroup' and node.namespace is NS_HTML
 781                                 ins_mode = ins_mode_in_column_group
 782                                 return
 783                         # 10. If node is a table element, then switch the insertion mode to
 784                         # "in table" and abort these steps.
 785                         if node.name is 'table' and node.namespace is NS_HTML
 786                                 ins_mode = ins_mode_in_table
 787                                 return
 788                         # 11. If node is a template element, then switch the insertion mode
 789                         # to the current template insertion mode and abort these steps.
 790                         if node.name is 'template' and node.namespace is NS_HTML
 791                                 ins_mode = template_ins_modes[0]
 792                                 return
 793                         # 12. If node is a head element and last is true, then switch the
 794                         # insertion mode to "in body" ("in body"! not "in head"!) and abort
 795                         # these steps. (fragment case)
 796                         if node.name is 'head' and node.namespace is NS_HTML and last
 797                                 ins_mode = ins_mode_in_body
 798                                 return
 799                         # 13. If node is a head element and last is false, then switch the
 800                         # insertion mode to "in head" and abort these steps.
 801                         if node.name is 'head' and node.namespace is NS_HTML and last is false
 802                                 ins_mode = ins_mode_in_head
 803                                 return
 804                         # 14. If node is a body element, then switch the insertion mode to
 805                         # "in body" and abort these steps.
 806                         if node.name is 'body' and node.namespace is NS_HTML
 807                                 ins_mode = ins_mode_in_body
 808                                 return
 809                         # 15. If node is a frameset element, then switch the insertion mode
 810                         # to "in frameset" and abort these steps. (fragment case)
 811                         if node.name is 'frameset' and node.namespace is NS_HTML
 812                                 ins_mode = ins_mode_in_frameset
 813                                 return
 814                         # 16. If node is an html element, run these substeps:
 815                         if node.name is 'html' and node.namespace is NS_HTML
 816                                 # 1. If the head element pointer is null, switch the insertion
 817                                 # mode to "before head" and abort these steps. (fragment case)
 818                                 if head_element_pointer is null
 819                                         ins_mode = ins_mode_before_head
 820                                 else
 821                                         # 2. Otherwise, the head element pointer is not null,
 822                                         # switch the insertion mode to "after head" and abort these
 823                                         # steps.
 824                                         ins_mode = ins_mode_after_head
 825                                 return
 826                         # 17. If last is true, then switch the insertion mode to "in body"
 827                         # and abort these steps. (fragment case)
 828                         if last
 829                                 ins_mode = ins_mode_in_body
 830                                 return
 831                         # 18. Let node now be the node before node in the stack of open
 832                         # elements.
 833                         node_i += 1
 834                         node = open_els[node_i]
 835                         # 19. Return to the step labeled loop.
 836
 837         # 8.2.3.2
 838
 839         # http://www.w3.org/TR/html5/syntax.html#adjusted-current-node
 840         adjusted_current_node = ->
 841                 if open_els.length is 1 and flag_fragment_parsing
 842                         return context_element
 843                 return open_els[0]
 844
 845         # http://www.w3.org/TR/html5/syntax.html#reconstruct-the-active-formatting-elements
 846         # this implementation is structured (mostly) as described at the link above.
 847         # capitalized comments are the "labels" described at the link above.
 848         reconstruct_afe = ->
 849                 return if afe.length is 0
 850                 if afe[0].type is TYPE_AFE_MARKER or afe[0] in open_els
 851                         return
 852                 # Rewind
 853                 i = 0
 854                 loop
 855                         if i is afe.length - 1
 856                                 break
 857                         i += 1
 858                         if afe[i].type is TYPE_AFE_MARKER or afe[i] in open_els
 859                                 i -= 1 # Advance
 860                                 break
 861                 # Create
 862                 loop
 863                         el = insert_html_element afe[i].token
 864                         afe[i] = el
 865                         break if i is 0
 866                         i -= 1 # Advance
 867
 868         # http://www.w3.org/TR/html5/syntax.html#adoption-agency-algorithm
 869         # adoption agency algorithm
 870         # overview here:
 871         #   http://www.w3.org/TR/html5/syntax.html#misnested-tags:-b-i-/b-/i
 872         #   http://www.w3.org/TR/html5/syntax.html#misnested-tags:-b-p-/b-/p
 873         #   http://www.w3.org/TR/html5/syntax.html#unclosed-formatting-elements
 874         adoption_agency = (subject) ->
 875                 debug_log "adoption_agency()"
 876                 debug_log "tree: #{serialize_els doc.children, false, true}"
 877                 debug_log "open_els: #{serialize_els open_els, true, true}"
 878                 debug_log "afe: #{serialize_els afe, true, true}"
 879                 if open_els[0].name is subject and open_els[0].namespace is NS_HTML
 880                         el = open_els[0]
 881                         open_els.shift()
 882                         # remove it from the list of active formatting elements (if found)
 883                         for t, i in afe
 884                                 if t is el
 885                                         afe.splice i, 1
 886                                         break
 887                         debug_log "aaa: starting off with subject on top of stack, exiting"
 888                         return
 889                 outer = 0
 890                 loop
 891                         if outer >= 8
 892                                 return
 893                         outer += 1
 894                         # 5. Let formatting element be the last element in the list of
 895                         # active formatting elements that: is between the end of the list
 896                         # and the last scope marker in the list, if any, or the start of
 897                         # the list otherwise, and  has the tag name subject.
 898                         fe = null
 899                         for t, fe_of_afe in afe
 900                                 if t.type is TYPE_AFE_MARKER
 901                                         break
 902                                 if t.name is subject
 903                                         fe = t
 904                                         break
 905                         # If there is no such element, then abort these steps and instead
 906                         # act as described in the "any other end tag" entry above.
 907                         if fe is null
 908                                 debug_log "aaa: fe not found in afe"
 909                                 in_body_any_other_end_tag subject
 910                                 return
 911                         # 6. If formatting element is not in the stack of open elements,
 912                         # then this is a parse error; remove the element from the list, and
 913                         # abort these steps.
 914                         in_open_els = false
 915                         for t, fe_of_open_els in open_els
 916                                 if t is fe
 917                                         in_open_els = true
 918                                         break
 919                         unless in_open_els
 920                                 debug_log "aaa: fe not found in open_els"
 921                                 parse_error()
 922                                 # "remove it from the list" must mean afe, since it's not in open_els
 923                                 afe.splice fe_of_afe, 1
 924                                 return
 925                         # 7. If formatting element is in the stack of open elements, but
 926                         # the element is not in scope, then this is a parse error; abort
 927                         # these steps.
 928                         unless el_is_in_scope fe
 929                                 debug_log "aaa: fe not in scope"
 930                                 parse_error()
 931                                 return
 932                         # 8. If formatting element is not the current node, this is a parse
 933                         # error. (But do not abort these steps.)
 934                         unless open_els[0] is fe
 935                                 parse_error()
 936                                 # continue
 937                         # 9. Let furthest block be the topmost node in the stack of open
 938                         # elements that is lower in the stack than formatting element, and
 939                         # is an element in the special category. There might not be one.
 940                         fb = null
 941                         fb_of_open_els = null
 942                         for t, i in open_els
 943                                 if t is fe
 944                                         break
 945                                 if el_is_special t
 946                                         fb = t
 947                                         fb_of_open_els = i
 948                                         # and continue, to see if there's one that's more "topmost"
 949                         # 10. If there is no furthest block, then the UA must first pop all
 950                         # the nodes from the bottom of the stack of open elements, from the
 951                         # current node up to and including formatting element, then remove
 952                         # formatting element from the list of active formatting elements,
 953                         # and finally abort these steps.
 954                         if fb is null
 955                                 debug_log "aaa: no fb"
 956                                 loop
 957                                         t = open_els.shift()
 958                                         if t is fe
 959                                                 afe.splice fe_of_afe, 1
 960                                                 return
 961                         # 11. Let common ancestor be the element immediately above
 962                         # formatting element in the stack of open elements.
 963                         ca = open_els[fe_of_open_els + 1] # common ancestor
 964
 965                         node_above = open_els[fb_of_open_els + 1] # next node if node isn't in open_els anymore
 966                         # 12. Let a bookmark note the position of formatting element in the list of active formatting elements relative to the elements on either side of it in the list.
 967                         bookmark = new_aaa_bookmark()
 968                         for t, i in afe
 969                                 if t is fe
 970                                         afe.splice i, 0, bookmark
 971                                         break
 972                         node = last_node = fb
 973                         inner = 0
 974                         loop
 975                                 inner += 1
 976                                 # 3. Let node be the element immediately above node in the
 977                                 # stack of open elements, or if node is no longer in the stack
 978                                 # of open elements (e.g. because it got removed by this
 979                                 # algorithm), the element that was immediately above node in
 980                                 # the stack of open elements before node was removed.
 981                                 node_next = null
 982                                 for t, i in open_els
 983                                         if t is node
 984                                                 node_next = open_els[i + 1]
 985                                                 break
 986                                 node = node_next ? node_above
 987                                 debug_log "inner loop #{inner}"
 988                                 debug_log "tree: #{serialize_els doc.children, false, true}"
 989                                 debug_log "open_els: #{serialize_els open_els, true, true}"
 990                                 debug_log "afe: #{serialize_els afe, true, true}"
 991                                 debug_log "ca: #{ca.name}##{ca.id} children: #{serialize_els ca.children, true, true}"
 992                                 debug_log "fe: #{fe.name}##{fe.id} children: #{serialize_els fe.children, true, true}"
 993                                 debug_log "fb: #{fb.name}##{fb.id} children: #{serialize_els fb.children, true, true}"
 994                                 debug_log "node: #{node.serialize true, true}"
 995                                 # TODO make sure node_above gets re-set if/when node is removed from open_els
 996
 997                                 # 4. If node is formatting element, then go to the next step in
 998                                 # the overall algorithm.
 999                                 if node is fe
1000                                         break
1001                                 debug_log "the meat"
1002                                 # 5. If inner loop counter is greater than three and node is in
1003                                 # the list of active formatting elements, then remove node from
1004                                 # the list of active formatting elements.
1005                                 node_in_afe = false
1006                                 for t, i in afe
1007                                         if t is node
1008                                                 if inner > 3
1009                                                         afe.splice i, 1
1010                                                         debug_log "max out inner"
1011                                                 else
1012                                                         node_in_afe = true
1013                                                         debug_log "in afe"
1014                                                 break
1015                                 # 6. If node is not in the list of active formatting elements,
1016                                 # then remove node from the stack of open elements and then go
1017                                 # back to the step labeled inner loop.
1018                                 unless node_in_afe
1019                                         debug_log "not in afe"
1020                                         for t, i in open_els
1021                                                 if t is node
1022                                                         node_above = open_els[i + 1]
1023                                                         open_els.splice i, 1
1024                                                         break
1025                                         continue
1026                                 debug_log "the bones"
1027                                 # 7. create an element for the token for which the element node
1028                                 # was created, in the HTML namespace, with common ancestor as
1029                                 # the intended parent; replace the entry for node in the list
1030                                 # of active formatting elements with an entry for the new
1031                                 # element, replace the entry for node in the stack of open
1032                                 # elements with an entry for the new element, and let node be
1033                                 # the new element.
1034                                 new_node = token_to_element node.token, NS_HTML, ca
1035                                 for t, i in afe
1036                                         if t is node
1037                                                 afe[i] = new_node
1038                                                 debug_log "replaced in afe"
1039                                                 break
1040                                 for t, i in open_els
1041                                         if t is node
1042                                                 node_above = open_els[i + 1]
1043                                                 open_els[i] = new_node
1044                                                 debug_log "replaced in open_els"
1045                                                 break
1046                                 node = new_node
1047                                 # 8. If last node is furthest block, then move the
1048                                 # aforementioned bookmark to be immediately after the new node
1049                                 # in the list of active formatting elements.
1050                                 if last_node is fb
1051                                         for t, i in afe
1052                                                 if t is bookmark
1053                                                         afe.splice i, 1
1054                                                         debug_log "removed bookmark"
1055                                                         break
1056                                         for t, i in afe
1057                                                 if t is node
1058                                                         # "after" means lower
1059                                                         afe.splice i, 0, bookmark # "after as <-
1060                                                         debug_log "placed bookmark after node"
1061                                                         debug_log "node: #{node.id} afe: #{serialize_els afe, true, true}"
1062                                                         break
1063                                 # 9. Insert last node into node, first removing it from its
1064                                 # previous parent node if any.
1065                                 if last_node.parent?
1066                                         debug_log "last_node has parent"
1067                                         for c, i in last_node.parent.children
1068                                                 if c is last_node
1069                                                         debug_log "removing last_node from parent"
1070                                                         last_node.parent.children.splice i, 1
1071                                                         break
1072                                 node.children.push last_node
1073                                 last_node.parent = node
1074                                 # 10. Let last node be node.
1075                                 last_node = node
1076                                 debug_log "at last"
1077                                 # 11. Return to the step labeled inner loop.
1078                         # 14. Insert whatever last node ended up being in the previous step
1079                         # at the appropriate place for inserting a node, but using common
1080                         # ancestor as the override target.
1081
1082                         # In the case where fe is immediately followed by fb:
1083                         #   * inner loop exits out early (node==fe)
1084                         #   * last_node is fb
1085                         #   * last_node is still in the tree (not a duplicate)
1086                         if last_node.parent?
1087                                 debug_log "FEFIRST? last_node has parent"
1088                                 for c, i in last_node.parent.children
1089                                         if c is last_node
1090                                                 debug_log "removing last_node from parent"
1091                                                 last_node.parent.children.splice i, 1
1092                                                 break
1093
1094                         debug_log "after aaa inner loop"
1095                         debug_log "ca: #{ca.name}##{ca.id} children: #{serialize_els ca.children, true, true}"
1096                         debug_log "fe: #{fe.name}##{fe.id} children: #{serialize_els fe.children, true, true}"
1097                         debug_log "fb: #{fb.name}##{fb.id} children: #{serialize_els fb.children, true, true}"
1098                         debug_log "last_node: #{last_node.name}##{last_node.id} children: #{serialize_els last_node.children, true, true}"
1099                         debug_log "tree: #{serialize_els doc.children, false, true}"
1100
1101                         debug_log "insert"
1102
1103
1104                         # can't use standard insert token thing, because it's already in
1105                         # open_els and must stay at it's current position in open_els
1106                         dest = adjusted_insertion_location ca
1107                         dest[0].children.splice dest[1], 0, last_node
1108                         last_node.parent = dest[0]
1109
1110
1111                         debug_log "ca: #{ca.name}##{ca.id} children: #{serialize_els ca.children, true, true}"
1112                         debug_log "fe: #{fe.name}##{fe.id} children: #{serialize_els fe.children, true, true}"
1113                         debug_log "fb: #{fb.name}##{fb.id} children: #{serialize_els fb.children, true, true}"
1114                         debug_log "last_node: #{last_node.name}##{last_node.id} children: #{serialize_els last_node.children, true, true}"
1115                         debug_log "tree: #{serialize_els doc.children, false, true}"
1116
1117                         # 15. Create an element for the token for which formatting element
1118                         # was created, in the HTML namespace, with furthest block as the
1119                         # intended parent.
1120                         new_element = token_to_element fe.token, NS_HTML, fb
1121                         # 16. Take all of the child nodes of furthest block and append them
1122                         # to the element created in the last step.
1123                         while fb.children.length
1124                                 t = fb.children.shift()
1125                                 t.parent = new_element
1126                                 new_element.children.push t
1127                         # 17. Append that new element to furthest block.
1128                         new_element.parent = fb
1129                         fb.children.push new_element
1130                         # 18. Remove formatting element from the list of active formatting
1131                         # elements, and insert the new element into the list of active
1132                         # formatting elements at the position of the aforementioned
1133                         # bookmark.
1134                         for t, i in afe
1135                                 if t is fe
1136                                         afe.splice i, 1
1137                                         break
1138                         for t, i in afe
1139                                 if t is bookmark
1140                                         afe[i] = new_element
1141                                         break
1142                         # 19. Remove formatting element from the stack of open elements,
1143                         # and insert the new element into the stack of open elements
1144                         # immediately below the position of furthest block in that stack.
1145                         for t, i in open_els
1146                                 if t is fe
1147                                         open_els.splice i, 1
1148                                         break
1149                         for t, i in open_els
1150                                 if t is fb
1151                                         open_els.splice i, 0, new_element
1152                                         break
1153                         # 20. Jump back to the step labeled outer loop.
1154                         debug_log "done wrapping fb's children. new_element: #{new_element.name}##{new_element.id}"
1155                         debug_log "tree: #{serialize_els doc.children, false, true}"
1156                         debug_log "open_els: #{serialize_els open_els, true, true}"
1157                         debug_log "afe: #{serialize_els afe, true, true}"
1158                 debug_log "AAA DONE"
1159
1160         # http://www.w3.org/TR/html5/syntax.html#close-a-p-element
1161         close_p_element = ->
1162                 generate_implied_end_tags 'p' # arg is exception
1163                 unless open_els[0].name is 'p' and open_els[0].namespace is NS_HTML
1164                         parse_error()
1165                 while open_els.length > 1 # just in case
1166                         el = open_els.shift()
1167                         if el.name is 'p' and el.namespace is NS_HTML
1168                                 return
1169         close_p_if_in_button_scope = ->
1170                 if is_in_button_scope 'p', NS_HTML
1171                         close_p_element()
1172
1173         # http://www.w3.org/TR/html5/syntax.html#insert-a-character
1174         # aka insert_a_character = (t) ->
1175         insert_character = (t) ->
1176                 dest = adjusted_insertion_location()
1177                 # fixfull check for Document node
1178                 if dest[1] > 0
1179                         prev = dest[0].children[dest[1] - 1]
1180                         if prev.type is TYPE_TEXT
1181                                 prev.text += t.text
1182                                 return
1183                 dest[0].children.splice dest[1], 0, t
1184
1185
1186         # 8.2.5 http://www.w3.org/TR/html5/syntax.html#tree-construction
1187         process_token = (t) ->
1188                 acn = adjusted_current_node()
1189                 unless acn?
1190                         ins_mode t
1191                         return
1192                 if acn.namespace is NS_HTML
1193                         ins_mode t
1194                         return
1195                 if is_mathml_text_integration_point(acn)
1196                         if t.type is TYPE_START_TAG and (t.name is 'mglyph' or t.name is 'malignmark')
1197                                 ins_mode t
1198                                 return
1199                         if t.type is TYPE_TEXT
1200                                 ins_mode t
1201                                 return
1202                 if acn.namespace is NS_MATHML and acn.name is 'annotation-xml' and t.type is TYPE_START_TAG and t.name is 'svg'
1203                         ins_mode t
1204                         return
1205                 if is_html_integration acn
1206                         if t.type is TYPE_START_TAG or t.type is TYPE_TEXT
1207                                 ins_mode t
1208                                 return
1209                 if t.type is TYPE_EOF
1210                         ins_mode t
1211                         return
1212                 in_foreign_content t
1213                 return
1214
1215         # 8.2.5.1
1216         # http://www.w3.org/TR/html5/syntax.html#creating-and-inserting-nodes
1217         # http://www.w3.org/TR/html5/syntax.html#appropriate-place-for-inserting-a-node
1218         adjusted_insertion_location = (override_target = null) ->
1219                 # 1. If there was an override target specified, then let target be the
1220                 # override target.
1221                 if override_target?
1222                         target = override_target
1223                 else # Otherwise, let target be the current node.
1224                         target = open_els[0]
1225                 # 2. Determine the adjusted insertion location using the first matching
1226                 # steps from the following list:
1227                 #
1228                 # If foster parenting is enabled and target is a table, tbody, tfoot,
1229                 # thead, or tr element Foster parenting happens when content is
1230                 # misnested in tables.
1231                 if flag_foster_parenting and foster_parenting_targets[target.name] is target.namespace
1232                         loop # once. this is here so we can ``break`` to "abort these substeps"
1233                                 # 1. Let last template be the last template element in the
1234                                 # stack of open elements, if any.
1235                                 last_template = null
1236                                 last_template_i = null
1237                                 for el, i in open_els
1238                                         if el.name is 'template' and el.namespace is NS_HTML
1239                                                 last_template = el
1240                                                 last_template_i = i
1241                                                 break
1242                                 # 2. Let last table be the last table element in the stack of
1243                                 # open elements, if any.
1244                                 last_table = null
1245                                 last_table_i
1246                                 for el, i in open_els
1247                                         if el.name is 'table' and el.namespace is NS_HTML
1248                                                 last_table = el
1249                                                 last_table_i = i
1250                                                 break
1251                                 # 3. If there is a last template and either there is no last
1252                                 # table, or there is one, but last template is lower (more
1253                                 # recently added) than last table in the stack of open
1254                                 # elements, then: let adjusted insertion location be inside
1255                                 # last template's template contents, after its last child (if
1256                                 # any), and abort these substeps.
1257                                 if last_template and (last_table is null or last_template_i < last_table_i)
1258                                         target = last_template # fixfull should be it's contents
1259                                         target_i = target.children.length
1260                                         break
1261                                 # 4. If there is no last table, then let adjusted insertion
1262                                 # location be inside the first element in the stack of open
1263                                 # elements (the html element), after its last child (if any),
1264                                 # and abort these substeps. (fragment case)
1265                                 if last_table is null
1266                                         # this is odd
1267                                         target = open_els[open_els.length - 1]
1268                                         target_i = target.children.length
1269                                         break
1270                                 # 5. If last table has a parent element, then let adjusted
1271                                 # insertion location be inside last table's parent element,
1272                                 # immediately before last table, and abort these substeps.
1273                                 if last_table.parent?
1274                                         for c, i in last_table.parent.children
1275                                                 if c is last_table
1276                                                         target = last_table.parent
1277                                                         target_i = i
1278                                                         break
1279                                         break
1280                                 # 6. Let previous element be the element immediately above last
1281                                 # table in the stack of open elements.
1282                                 #
1283                                 # huh? how could it not have a parent?
1284                                 previous_element = open_els[last_table_i + 1]
1285                                 # 7. Let adjusted insertion location be inside previous
1286                                 # element, after its last child (if any).
1287                                 target = previous_element
1288                                 target_i = target.children.length
1289                                 # Note: These steps are involved in part because it's possible
1290                                 # for elements, the table element in this case in particular,
1291                                 # to have been moved by a script around in the DOM, or indeed
1292                                 # removed from the DOM entirely, after the element was inserted
1293                                 # by the parser.
1294                                 break # don't really loop
1295                 else
1296                         # Otherwise Let adjusted insertion location be inside target, after
1297                         # its last child (if any).
1298                         target_i = target.children.length
1299
1300                 # 3. If the adjusted insertion location is inside a template element,
1301                 # let it instead be inside the template element's template contents,
1302                 # after its last child (if any).
1303                 # fixfull (template)
1304
1305                 # 4. Return the adjusted insertion location.
1306                 return [target, target_i]
1307
1308         # http://www.w3.org/TR/html5/syntax.html#create-an-element-for-the-token
1309         # aka create_an_element_for_token
1310         token_to_element = (t, namespace, intended_parent) ->
1311                 # convert attributes into a hash
1312                 attrs = {}
1313                 for a in t.attrs_a
1314                         attrs[a[0]] = a[1] # TODO check what to do with dupilcate attrs
1315                 el = new Node TYPE_TAG, name: t.name, namespace: namespace, attrs: attrs, token: t
1316
1317                 # TODO 2. If the newly created element has an xmlns attribute in the
1318                 # XMLNS namespace whose value is not exactly the same as the element's
1319                 # namespace, that is a parse error. Similarly, if the newly created
1320                 # element has an xmlns:xlink attribute in the XMLNS namespace whose
1321                 # value is not the XLink Namespace, that is a parse error.
1322
1323                 # fixfull: the spec says stuff about form pointers and ownerDocument
1324
1325                 return el
1326
1327         # http://www.w3.org/TR/html5/syntax.html#insert-a-foreign-element
1328         insert_foreign_element = (token, namespace) ->
1329                 ail = adjusted_insertion_location()
1330                 ail_el = ail[0]
1331                 ail_i = ail[1]
1332                 el = token_to_element token, namespace, ail_el
1333                 # TODO skip this next step if it's broken (eg ail_el is document with child already)
1334                 el.parent = ail_el
1335                 ail_el.children.splice ail_i, 0, el
1336                 open_els.unshift el
1337                 return el
1338         # http://www.w3.org/TR/html5/syntax.html#insert-an-html-element
1339         insert_html_element = (token) ->
1340                 insert_foreign_element token, NS_HTML
1341
1342         # http://www.w3.org/TR/html5/syntax.html#insert-a-comment
1343         # position should be [node, index_within_children]
1344         insert_comment = (t, position = null) ->
1345                 position ?= adjusted_insertion_location()
1346                 position[0].children.splice position[1], 0, t
1347
1348         # 8.2.5.2
1349         # http://www.w3.org/TR/html5/syntax.html#generic-raw-text-element-parsing-algorithm
1350         parse_generic_raw_text = (t) ->
1351                 insert_html_element t
1352                 tok_state = tok_state_rawtext
1353                 original_ins_mode = ins_mode
1354                 ins_mode = ins_mode_text
1355         parse_generic_rcdata_text = (t) ->
1356                 insert_html_element t
1357                 tok_state = tok_state_rcdata
1358                 original_ins_mode = ins_mode
1359                 ins_mode = ins_mode_text
1360
1361         # 8.2.5.3 http://www.w3.org/TR/html5/syntax.html#closing-elements-that-have-implied-end-tags
1362         # http://www.w3.org/TR/html5/syntax.html#generate-implied-end-tags
1363         generate_implied_end_tags = (except = null) ->
1364                 while end_tag_implied[open_els[0].name] is open_els[0].namespace and open_els[0].name isnt except
1365                         open_els.shift()
1366
1367         # 8.2.5.4 The rules for parsing tokens in HTML content
1368         # http://www.w3.org/TR/html5/syntax.html#parsing-main-inhtml
1369
1370         # 8.2.5.4.1 The "initial" insertion mode
1371         # http://www.w3.org/TR/html5/syntax.html#the-initial-insertion-mode
1372         ins_mode_initial = (t) ->
1373                 if is_space_tok t
1374                         return
1375                 if t.type is TYPE_COMMENT
1376                         # ?fixfull
1377                         doc.children.push t
1378                         return
1379                 if t.type is TYPE_DOCTYPE
1380                         # FIXME check identifiers, set quirks, etc
1381                         # fixfull
1382                         doc.children.push t
1383                         ins_mode = ins_mode_before_html
1384                         return
1385                 # Anything else
1386                 #fixfull (iframe, quirks)
1387                 ins_mode = ins_mode_before_html
1388                 process_token t
1389                 return
1390
1391         # 8.2.5.4.2 http://www.w3.org/TR/html5/syntax.html#the-before-html-insertion-mode
1392         ins_mode_before_html = (t) ->
1393                 if t.type is TYPE_DOCTYPE
1394                         parse_error()
1395                         return
1396                 if t.type is TYPE_COMMENT
1397                         doc.children.push t
1398                         return
1399                 if is_space_tok t
1400                         return
1401                 if t.type is TYPE_START_TAG and t.name is 'html'
1402                         el = token_to_element t, NS_HTML, doc
1403                         doc.children.push el
1404                         open_els.unshift(el)
1405                         # fixfull (big paragraph in spec about manifest, fragment, urls, etc)
1406                         ins_mode = ins_mode_before_head
1407                         return
1408                 if t.type is TYPE_END_TAG
1409                         if t.name is 'head' or t.name is 'body' or t.name is 'html' or t.name is 'br'
1410                                 # fall through to "anything else"
1411                         else
1412                                 parse_error()
1413                                 return
1414                 # Anything else
1415                 html_tok = new_open_tag 'html'
1416                 el = token_to_element html_tok, NS_HTML, doc
1417                 doc.children.push el
1418                 open_els.unshift el
1419                 # ?fixfull browsing context
1420                 ins_mode = ins_mode_before_head
1421                 process_token t
1422                 return
1423
1424         # 8.2.5.4.3 http://www.w3.org/TR/html5/syntax.html#the-before-head-insertion-mode
1425         ins_mode_before_head = (t) ->
1426                 if is_space_tok t
1427                         return
1428                 if t.type is TYPE_COMMENT
1429                         insert_comment t
1430                         return
1431                 if t.type is TYPE_DOCTYPE
1432                         parse_error()
1433                         return
1434                 if t.type is TYPE_START_TAG and t.name is 'html'
1435                         ins_mode_in_body t
1436                         return
1437                 if t.type is TYPE_START_TAG and t.name is 'head'
1438                         el = insert_html_element t
1439                         head_element_pointer = el
1440                         ins_mode = ins_mode_in_head
1441                         return
1442                 if t.type is TYPE_END_TAG
1443                         if t.name is 'head' or t.name is 'body' or t.name is 'html' or t.name is 'br'
1444                                 # fall through to Anything else below
1445                         else
1446                                 parse_error()
1447                                 return
1448                 # Anything else
1449                 head_tok = new_open_tag 'head'
1450                 el = insert_html_element head_tok
1451                 head_element_pointer = el
1452                 ins_mode = ins_mode_in_head
1453                 process_token t
1454
1455         # 8.2.5.4.4 http://www.w3.org/TR/html5/syntax.html#parsing-main-inhead
1456         ins_mode_in_head_else = (t) -> # factored out for same-as-spec flow control
1457                 open_els.shift() # spec says this will be a 'head' node
1458                 ins_mode = ins_mode_after_head
1459                 process_token t
1460         ins_mode_in_head = (t) ->
1461                 if t.type is TYPE_TEXT and (t.text is "\t" or t.text is "\n" or t.text is "\u000c" or t.text is ' ')
1462                         insert_character t
1463                         return
1464                 if t.type is TYPE_COMMENT
1465                         insert_comment t
1466                         return
1467                 if t.type is TYPE_DOCTYPE
1468                         parse_error()
1469                         return
1470                 if t.type is TYPE_START_TAG and t.name is 'html'
1471                         ins_mode_in_body t
1472                         return
1473                 if t.type is TYPE_START_TAG and (t.name is 'base' or t.name is 'basefont' or t.name is 'bgsound' or t.name is 'link')
1474                         el = insert_html_element t
1475                         open_els.shift()
1476                         t.acknowledge_self_closing()
1477                         return
1478                 if t.type is TYPE_START_TAG and t.name is 'meta'
1479                         el = insert_html_element t
1480                         open_els.shift()
1481                         t.acknowledge_self_closing()
1482                         # fixfull encoding stuff
1483                         return
1484                 if t.type is TYPE_START_TAG and t.name is 'title'
1485                         parse_generic_rcdata_text t
1486                         return
1487                 if t.type is TYPE_START_TAG and ((t.name is 'noscript' and flag_scripting) or t.name is 'noframes' or t.name is 'style')
1488                         parse_generic_raw_text t
1489                         return
1490                 if t.type is TYPE_START_TAG and t.name is 'noscript' and flag_scripting is false
1491                         insert_html_element t
1492                         ins_mode = ins_mode_in_head_noscript
1493                         return
1494                 if t.type is TYPE_START_TAG and t.name is 'script'
1495                         ail = adjusted_insertion_location()
1496                         el = token_to_element t, NS_HTML, ail
1497                         el.flag 'parser-inserted', true
1498                         # fixfull frament case
1499                         ail[0].children.splice ail[1], 0, el
1500                         open_els.unshift el
1501                         tok_state = tok_state_script_data
1502                         original_ins_mode = ins_mode # make sure orig... is defined
1503                         ins_mode = ins_mode_text
1504                         return
1505                 if t.type is TYPE_END_TAG and t.name is 'head'
1506                         open_els.shift() # will be a head element... spec says so
1507                         ins_mode = ins_mode_after_head
1508                         return
1509                 if t.type is TYPE_END_TAG and (t.name is 'body' or t.name is 'html' or t.name is 'br')
1510                         ins_mode_in_head_else t
1511                         return
1512                 if t.type is TYPE_START_TAG and t.name is 'template'
1513                         insert_html_element t
1514                         afe_push_marker()
1515                         flag_frameset_ok = false
1516                         ins_mode = ins_mode_in_template
1517                         template_ins_modes.unshift ins_mode_in_template
1518                         return
1519                 if t.type is TYPE_END_TAG and t.name is 'template'
1520                         if template_tag_is_open()
1521                                 generate_implied_end_tags
1522                                 if open_els[0].name isnt 'template'
1523                                         parse_error()
1524                                 loop
1525                                         el = open_els.shift()
1526                                         if el.name is 'template' and el.namespace is NS_HTML
1527                                                 break
1528                                 clear_afe_to_marker()
1529                                 template_ins_modes.shift()
1530                                 reset_ins_mode()
1531                         else
1532                                 parse_error()
1533                         return
1534                 if (t.type is TYPE_START_TAG and t.name is 'head') or t.type is TYPE_END_TAG
1535                         parse_error()
1536                         return
1537                 ins_mode_in_head_else t
1538
1539         # 8.2.5.4.5 http://www.w3.org/TR/html5/syntax.html#parsing-main-inheadnoscript
1540         ins_mode_in_head_noscript_else = (t) ->
1541                 parse_error()
1542                 open_els.shift()
1543                 ins_mode = ins_mode_in_head
1544                 process_token t
1545         ins_mode_in_head_noscript = (t) ->
1546                 if t.type is TYPE_DOCTYPE
1547                         parse_error()
1548                         return
1549                 if t.type is TYPE_START_TAG and t.name is 'html'
1550                         ins_mode_in_body t
1551                         return
1552                 if t.type is TYPE_END_TAG and t.name is 'noscript'
1553                         open_els.shift()
1554                         ins_mode = ins_mode_in_head
1555                         return
1556                 if is_space_tok(t) or t.type is TYPE_COMMENT or (t.type is TYPE_START_TAG and (t.name is 'basefont' or t.name is 'bgsound' or t.name is 'link' or t.name is 'meta' or t.name is 'noframes' or t.name is 'style'))
1557                         ins_mode_in_head t
1558                         return
1559                 if t.type is TYPE_END_TAG and t.name is 'br'
1560                         ins_mode_in_head_noscript_else t
1561                         return
1562                 if (t.type is TYPE_START_TAG and (t.name is 'head' or t.name is 'noscript')) or t.type is TYPE_END_TAG
1563                         parse_error()
1564                         return
1565                 # Anything else
1566                 ins_mode_in_head_noscript_else t
1567                 return
1568
1569
1570
1571         # 8.2.5.4.6 http://www.w3.org/TR/html5/syntax.html#the-after-head-insertion-mode
1572         ins_mode_after_head_else = (t) ->
1573                 body_tok = new_open_tag 'body'
1574                 insert_html_element body_tok
1575                 ins_mode = ins_mode_in_body
1576                 process_token t
1577                 return
1578         ins_mode_after_head = (t) ->
1579                 if is_space_tok t
1580                         insert_character t
1581                         return
1582                 if t.type is TYPE_COMMENT
1583                         insert_comment t
1584                         return
1585                 if t.type is TYPE_DOCTYPE
1586                         parse_error()
1587                         return
1588                 if t.type is TYPE_START_TAG and t.name is 'html'
1589                         ins_mode_in_body t
1590                         return
1591                 if t.type is TYPE_START_TAG and t.name is 'body'
1592                         insert_html_element t
1593                         flag_frameset_ok = false
1594                         ins_mode = ins_mode_in_body
1595                         return
1596                 if t.type is TYPE_START_TAG and t.name is 'frameset'
1597                         insert_html_element t
1598                         ins_mode = ins_mode_in_frameset
1599                         return
1600                 if t.type is TYPE_START_TAG and (t.name is 'base' or t.name is 'basefont' or t.name is 'bgsound' or t.name is 'link' or t.name is 'meta' or t.name is 'noframes' or t.name is 'script' or t.name is 'style' or t.name is 'template' or t.name is 'title')
1601                         parse_error()
1602                         open_els.unshift head_element_pointer
1603                         ins_mode_in_head t
1604                         for el, i of open_els
1605                                 if el is head_element_pointer
1606                                         open_els.splice i, 1
1607                                         return
1608                         console.log "warning: 23904 couldn't find head element in open_els"
1609                         return
1610                 if t.type is TYPE_END_TAG and t.name is 'template'
1611                         ins_mode_in_head t
1612                         return
1613                 if t.type is TYPE_END_TAG and (t.name is 'body' or t.name is 'html' or t.name is 'br')
1614                         ins_mode_after_head_else t
1615                         return
1616                 if (t.type is TYPE_START_TAG and t.name is 'head') or t.type is TYPE_END_TAG
1617                         parse_error()
1618                         return
1619                 # Anything else
1620                 ins_mode_after_head_else t
1621
1622         # 8.2.5.4.7 http://www.w3.org/TR/html5/syntax.html#parsing-main-inbody
1623         in_body_any_other_end_tag = (name) -> # factored out because adoption agency calls it
1624                 for el, i in open_els
1625                         if el.name is name and el.namespace is NS_HTML
1626                                 generate_implied_end_tags name # arg is exception
1627                                 parse_error() unless i is 0
1628                                 while i >= 0
1629                                         open_els.shift()
1630                                         i -= 1
1631                                 return
1632                         if special_elements[el.name] is el.namespace
1633                                 parse_error()
1634                                 return
1635                 return
1636         ins_mode_in_body = (t) ->
1637                 if t.type is TYPE_TEXT and t.text is "\u0000"
1638                         parse_error()
1639                         return
1640                 if is_space_tok t
1641                         reconstruct_afe()
1642                         insert_character t
1643                         return
1644                 if t.type is TYPE_TEXT
1645                         reconstruct_afe()
1646                         insert_character t
1647                         flag_frameset_ok = false
1648                         return
1649                 if t.type is TYPE_COMMENT
1650                         insert_comment t
1651                         return
1652                 if t.type is TYPE_DOCTYPE
1653                         parse_error()
1654                         return
1655                 if t.type is TYPE_START_TAG and t.name is 'html'
1656                         parse_error()
1657                         return if template_tag_is_open()
1658                         root_attrs = open_els[open_els.length - 1].attrs
1659                         for a of t.attrs_a
1660                                 root_attrs[a[0]] = a[1] unless root_attrs[a[0]]?
1661                         return
1662
1663                 if (t.type is TYPE_START_TAG and (t.name is 'base' or t.name is 'basefont' or t.name is 'bgsound' or t.name is 'link' or t.name is 'meta' or t.name is 'noframes' or t.name is 'script' or t.name is 'style' or t.name is 'template' or t.name is 'title')) or (t.type is TYPE_END_TAG and t.name is 'template')
1664                         ins_mode_in_head t
1665                         return
1666                 if t.type is TYPE_START_TAG and t.name is 'body'
1667                         parse_error()
1668                         return if open_els.length < 2
1669                         second = open_els[open_els.length - 2]
1670                         return unless second.namespace is NS_HTML
1671                         return unless second.name is 'body'
1672                         return if template_tag_is_open()
1673                         flag_frameset_ok = false
1674                         for a of t.attrs_a
1675                                 second.attrs[a[0]] = a[1] unless second.attrs[a[0]]?
1676                         return
1677                 if t.type is TYPE_START_TAG and t.name is 'frameset'
1678                         parse_error()
1679                         return if open_els.length < 2
1680                         second_i = open_els.length - 2
1681                         second = open_els[second_i]
1682                         return unless second.namespace is NS_HTML
1683                         return unless second.name is 'body'
1684                         if flag_frameset_ok is false
1685                                 return
1686                         if second.parent?
1687                                 for el, i in second.parent.children
1688                                         if el is second
1689                                                 second.parent.children.splice i, 1
1690                                                 break
1691                         open_els.splice second_i, 1
1692                         # pop everything except the "root html element"
1693                         while open_els.length > 1
1694                                 open_els.shift()
1695                         insert_html_element t
1696                         ins_mode = ins_mode_in_frameset
1697                         return
1698                 if t.type is TYPE_EOF
1699                         ok_tags = {
1700                                 dd:NS_HTML, dt:NS_HTML, li:NS_HTML, p:NS_HTML, tbody:NS_HTML,
1701                                 td:NS_HTML, tfoot:NS_HTML, th:NS_HTML, thead:NS_HTML,
1702                                 tr:NS_HTML, body:NS_HTML, html:NS_HTML,
1703                         }
1704                         for el in open_els
1705                                 unless ok_tags[t.name] is el.namespace
1706                                         parse_error()
1707                                         break
1708                         if template_ins_modes.length > 0
1709                                 ins_mode_in_template t
1710                         else
1711                                 stop_parsing()
1712                         return
1713                 if t.type is TYPE_END_TAG and t.name is 'body'
1714                         unless is_in_scope 'body', NS_HTML
1715                                 parse_error()
1716                                 return
1717                         ok_tags = {
1718                                 dd:NS_HTML, dt:NS_HTML, li:NS_HTML, optgroup:NS_HTML,
1719                                 option:NS_HTML, p:NS_HTML, rb:NS_HTML, rp:NS_HTML, rt:NS_HTML,
1720                                 rtc:NS_HTML, tbody:NS_HTML, td:NS_HTML, tfoot:NS_HTML,
1721                                 th:NS_HTML, thead:NS_HTML, tr:NS_HTML, body:NS_HTML,
1722                                 html:NS_HTML
1723                         }
1724                         for el in open_els
1725                                 unless ok_tags[t.name] is el.namespace
1726                                         parse_error()
1727                                         break
1728                         ins_mode = ins_mode_after_body
1729                         return
1730                 if t.type is TYPE_END_TAG and t.name is 'html'
1731                         unless is_in_scope 'body', NS_HTML
1732                                 parse_error()
1733                                 return
1734                         ok_tags = {
1735                                 dd:NS_HTML, dt:NS_HTML, li:NS_HTML, optgroup:NS_HTML,
1736                                 option:NS_HTML, p:NS_HTML, rb:NS_HTML, rp:NS_HTML, rt:NS_HTML,
1737                                 rtc:NS_HTML, tbody:NS_HTML, td:NS_HTML, tfoot:NS_HTML,
1738                                 th:NS_HTML, thead:NS_HTML, tr:NS_HTML, body:NS_HTML,
1739                                 html:NS_HTML
1740                         }
1741                         for el in open_els
1742                                 unless ok_tags[t.name] is el.namespace
1743                                         parse_error()
1744                                         break
1745                         ins_mode = ins_mode_after_body
1746                         process_token t
1747                         return
1748                 if t.type is TYPE_START_TAG and (t.name is 'address' or t.name is 'article' or t.name is 'aside' or t.name is 'blockquote' or t.name is 'center' or t.name is 'details' or t.name is 'dialog' or t.name is 'dir' or t.name is 'div' or t.name is 'dl' or t.name is 'fieldset' or t.name is 'figcaption' or t.name is 'figure' or t.name is 'footer' or t.name is 'header' or t.name is 'hgroup' or t.name is 'main' or t.name is 'nav' or t.name is 'ol' or t.name is 'p' or t.name is 'section' or t.name is 'summary' or t.name is 'ul')
1749                         close_p_if_in_button_scope()
1750                         insert_html_element t
1751                         return
1752                 if t.type is TYPE_START_TAG and h_tags[t.name]?
1753                         close_p_if_in_button_scope()
1754                         if h_tags[open_els[0].name] is open_els[0].namespace
1755                                 parse_error()
1756                                 open_els.shift()
1757                         insert_html_element t
1758                         return
1759                 if t.type is TYPE_START_TAG and (t.name is 'pre' or t.name is 'listing')
1760                         close_p_if_in_button_scope()
1761                         insert_html_element t
1762                         # spec: If the next token is a "LF" (U+000A) character token, then
1763                         # ignore that token and move on to the next one. (Newlines at the
1764                         # start of pre blocks are ignored as an authoring convenience.)
1765                         if txt.charAt(cur) is "\u000a" # FIXME check for crlf?
1766                                 cur += 1
1767                         flag_frameset_ok = false
1768                         return
1769                 if t.type is TYPE_START_TAG and t.name is 'form'
1770                         unless form_element_pointer is null or template_tag_is_open()
1771                                 parse_error()
1772                                 return
1773                         close_p_if_in_button_scope()
1774                         el = insert_html_element t
1775                         unless template_tag_is_open()
1776                                 form_element_pointer = el
1777                         return
1778                 if t.type is TYPE_START_TAG and t.name is 'li'
1779                         flag_frameset_ok = false
1780                         for node in open_els
1781                                 if node.name is 'li' and node.namespace is NS_HTML
1782                                         generate_implied_end_tags 'li' # arg is exception
1783                                         if open_els[0].name isnt 'li' or open_els[0].namespace isnt NS_HTML
1784                                                 parse_error()
1785                                         loop
1786                                                 el = open_els.shift()
1787                                                 if el.name is 'li' and el.namespace is NS_HTML
1788                                                         break
1789                                         break
1790                                 if el_is_special_not_adp node
1791                                                 break
1792                         close_p_if_in_button_scope()
1793                         insert_html_element t
1794                         return
1795                 if t.type is TYPE_START_TAG and (t.name is 'dd' or t.name is 'dt')
1796                         flag_frameset_ok = false
1797                         for node in open_els
1798                                 if node.name is 'dd' and node.namespace is NS_HTML
1799                                         generate_implied_end_tags 'dd' # arg is exception
1800                                         if open_els[0].name isnt 'dd' or open_els[0].namespace isnt NS_HTML
1801                                                 parse_error()
1802                                         loop
1803                                                 el = open_els.shift()
1804                                                 if el.name is 'dd' and el.namespace is NS_HTML
1805                                                         break
1806                                         break
1807                                 if node.name is 'dt' and node.namespace is NS_HTML
1808                                         generate_implied_end_tags 'dt' # arg is exception
1809                                         if open_els[0].name isnt 'dt' or open_els[0].namespace isnt NS_HTML
1810                                                 parse_error()
1811                                         loop
1812                                                 el = open_els.shift()
1813                                                 if el.name is 'dt' and el.namespace is NS_HTML
1814                                                         break
1815                                         break
1816                                 if el_is_special_not_adp node
1817                                         break
1818                         close_p_if_in_button_scope()
1819                         insert_html_element t
1820                         return
1821                 if t.type is TYPE_START_TAG and t.name is 'plaintext'
1822                         close_p_if_in_button_scope()
1823                         insert_html_element t
1824                         tok_state = tok_state_plaintext
1825                         return
1826                 if t.type is TYPE_START_TAG and t.name is 'button'
1827                         if is_in_scope 'button', NS_HTML
1828                                 parse_error()
1829                                 generate_implied_end_tags()
1830                                 loop
1831                                         el = open_els.shift()
1832                                         if el.name is 'button' and el.namespace is NS_HTML
1833                                                 break
1834                         reconstruct_afe()
1835                         insert_html_element t
1836                         flag_frameset_ok = false
1837                         return
1838                 if t.type is TYPE_END_TAG and (t.name is 'address' or t.name is 'article' or t.name is 'aside' or t.name is 'blockquote' or t.name is 'button' or t.name is 'center' or t.name is 'details' or t.name is 'dialog' or t.name is 'dir' or t.name is 'div' or t.name is 'dl' or t.name is 'fieldset' or t.name is 'figcaption' or t.name is 'figure' or t.name is 'footer' or t.name is 'header' or t.name is 'hgroup' or t.name is 'listing' or t.name is 'main' or t.name is 'nav' or t.name is 'ol' or t.name is 'pre' or t.name is 'section' or t.name is 'summary' or t.name is 'ul')
1839                         unless is_in_scope t.name, NS_HTML
1840                                 parse_error()
1841                                 return
1842                         generate_implied_end_tags()
1843                         unless open_els[0].name is t.name and open_els[0].namespace is NS_HTML
1844                                 parse_error()
1845                         loop
1846                                 el = open_els.shift()
1847                                 if el.name is t.name and el.namespace is NS_HTML
1848                                         return
1849                         return
1850                 if t.type is TYPE_END_TAG and t.name is 'form'
1851                         unless template_tag_is_open()
1852                                 node = form_element_pointer
1853                                 form_element_pointer = null
1854                                 if node is null or not el_is_in_scope node
1855                                         parse_error()
1856                                         return
1857                                 generate_implied_end_tags()
1858                                 if open_els[0] isnt node
1859                                         parse_error()
1860                                 for el, i in open_els
1861                                         if el is node
1862                                                 open_els.splice i, 1
1863                                                 break
1864                         else
1865                                 unless is_in_scope 'form', NS_HTML
1866                                         parse_error()
1867                                         return
1868                                 generate_implied_end_tags()
1869                                 if open_els[0].name isnt 'form' or open_els[0].namespace isnt NS_HTML
1870                                         parse_error()
1871                                 loop
1872                                         el = open_els.shift()
1873                                         if el.name is 'form' and el.namespace is NS_HTML
1874                                                 break
1875                         return
1876                 if t.type is TYPE_END_TAG and t.name is 'p'
1877                         unless is_in_button_scope 'p', NS_HTML
1878                                 parse_error()
1879                                 insert_html_element new_open_tag 'p'
1880                         close_p_element()
1881                         return
1882                 if t.type is TYPE_END_TAG and t.name is 'li'
1883                         unless is_in_li_scope 'li', NS_HTML
1884                                 parse_error()
1885                                 return
1886                         generate_implied_end_tags 'li' # arg is exception
1887                         if open_els[0].name isnt 'li' or open_els[0].namespace isnt NS_HTML
1888                                 parse_error()
1889                         loop
1890                                 el = open_els.shift()
1891                                 if el.name is 'li' and el.namespace is NS_HTML
1892                                         break
1893                         return
1894                 if t.type is TYPE_END_TAG and (t.name is 'dd' or t.name is 'dt')
1895                         unless is_in_scope t.name, NS_HTML
1896                                 parse_error()
1897                                 return
1898                         generate_implied_end_tags t.name # arg is exception
1899                         if open_els[0].name isnt t.name or open_els[0].namespace isnt NS_HTML
1900                                 parse_error()
1901                         loop
1902                                 el = open_els.shift()
1903                                 if el.name is t.name and el.namespace is NS_HTML
1904                                         break
1905                         return
1906                 if t.type is TYPE_END_TAG and h_tags[t.name]?
1907                         h_in_scope = false
1908                         for el in open_els
1909                                 if h_tags[el.name] is el.namespace
1910                                         h_in_scope = true
1911                                         break
1912                                 if standard_scopers[el.name] is el.namespace
1913                                         break
1914                         unless h_in_scope
1915                                 parse_error()
1916                                 return
1917                         generate_implied_end_tags()
1918                         if open_els[0].name isnt t.name or open_els[0].namespace isnt NS_HTML
1919                                 parse_error()
1920                         loop
1921                                 el = open_els.shift()
1922                                 if h_tags[el.name] is el.namespace
1923                                         break
1924                         return
1925                 # deep breath!
1926                 if t.type is TYPE_START_TAG and t.name is 'a'
1927                         # If the list of active formatting elements contains an a element
1928                         # between the end of the list and the last marker on the list (or
1929                         # the start of the list if there is no marker on the list), then
1930                         # this is a parse error; run the adoption agency algorithm for the
1931                         # tag name "a", then remove that element from the list of active
1932                         # formatting elements and the stack of open elements if the
1933                         # adoption agency algorithm didn't already remove it (it might not
1934                         # have if the element is not in table scope).
1935                         found = false
1936                         for el in afe
1937                                 if el.type is TYPE_AFE_MARKER
1938                                         break
1939                                 if el.name is 'a' and el.namespace is NS_HTML
1940                                         found = el
1941                         if found?
1942                                 parse_error()
1943                                 adoption_agency 'a'
1944                                 for el, i in afe
1945                                         if el is found
1946                                                 afe.splice i, 1
1947                                 for el, i in open_els
1948                                         if el is found
1949                                                 open_els.splice i, 1
1950                         reconstruct_afe()
1951                         el = insert_html_element t
1952                         afe_push el
1953                         return
1954                 if t.type is TYPE_START_TAG and (t.name is 'b' or t.name is 'big' or t.name is 'code' or t.name is 'em' or t.name is 'font' or t.name is 'i' or t.name is 's' or t.name is 'small' or t.name is 'strike' or t.name is 'strong' or t.name is 'tt' or t.name is 'u')
1955                         reconstruct_afe()
1956                         el = insert_html_element t
1957                         afe_push el
1958                         return
1959                 if t.type is TYPE_START_TAG and t.name is 'nobr'
1960                         reconstruct_afe()
1961                         el = insert_html_element t
1962                         afe_push el
1963                         return
1964                 if t.type is TYPE_END_TAG and (t.name is 'a' or t.name is 'b' or t.name is 'big' or t.name is 'code' or t.name is 'em' or t.name is 'font' or t.name is 'i' or t.name is 'nobr' or t.name is 's' or t.name is 'small' or t.name is 'strike' or t.name is 'strong' or t.name is 'tt' or t.name is 'u')
1965                         adoption_agency t.name
1966                         return
1967                 if t.type is TYPE_START_TAG and (t.name is 'applet' or t.name is 'marquee' or t.name is 'object')
1968                         reconstruct_afe()
1969                         insert_html_element t
1970                         afe_push_marker()
1971                         flag_frameset_ok = false
1972                         return
1973                 if t.type is TYPE_END_TAG and (t.name is 'applet' or t.name is 'marquee' or t.name is 'object')
1974                         unless is_in_scope t.name, NS_HTML
1975                                 parse_error()
1976                                 return
1977                         generate_implied_end_tags()
1978                         if open_els[0].name isnt t.name or open_els[0].namespace isnt NS_HTML
1979                                 parse_error()
1980                         loop
1981                                 el = open_els.shift()
1982                                 if el.name is t.name and el.namespace is NS_HTML
1983                                         break
1984                         clear_afe_to_marker()
1985                         return
1986                 if t.type is TYPE_START_TAG and t.name is 'table'
1987                         close_p_if_in_button_scope() # fixfull quirksmode thing
1988                         insert_html_element t
1989                         flag_frameset_ok = false
1990                         ins_mode = ins_mode_in_table
1991                         return
1992                 if t.type is TYPE_END_TAG and t.name is 'br'
1993                         parse_error()
1994                         t.type is TYPE_START_TAG
1995                         # fall through
1996                 if t.type is TYPE_START_TAG and (t.name is 'area' or t.name is 'br' or t.name is 'embed' or t.name is 'img' or t.name is 'keygen' or t.name is 'wbr')
1997                         reconstruct_afe()
1998                         insert_html_element t
1999                         open_els.shift()
2000                         t.acknowledge_self_closing()
2001                         flag_frameset_ok = false
2002                         return
2003                 if t.type is TYPE_START_TAG and t.name is 'input'
2004                         reconstruct_afe()
2005                         insert_html_element t
2006                         open_els.shift()
2007                         t.acknowledge_self_closing()
2008                         unless is_input_hidden_tok t
2009                                 flag_frameset_ok = false
2010                         return
2011                 if t.type is TYPE_START_TAG and (t.name is 'param' or t.name is 'source' or t.name is 'track')
2012                         insert_html_element t
2013                         open_els.shift()
2014                         t.acknowledge_self_closing()
2015                         return
2016                 if t.type is TYPE_START_TAG and t.name is 'hr'
2017                         close_p_if_in_button_scope()
2018                         insert_html_element t
2019                         open_els.shift()
2020                         t.acknowledge_self_closing()
2021                         flag_frameset_ok = false
2022                         return
2023                 if t.type is TYPE_START_TAG and t.name is 'image'
2024                         parse_error()
2025                         t.name = 'img'
2026                         process_token t
2027                         return
2028                 if t.type is TYPE_START_TAG and t.name is 'isindex'
2029                         parse_error()
2030                         if template_tag_is_open() is false and form_element_pointer isnt null
2031                                 return
2032                         t.acknowledge_self_closing()
2033                         flag_frameset_ok = false
2034                         close_p_if_in_button_scope()
2035                         el = insert_html_element new_open_tag 'form'
2036                         unless template_tag_is_open()
2037                                 form_element_pointer = el
2038                         for a in t.attrs_a
2039                                 if a[0] is 'action'
2040                                         el.attrs['action'] = a[1]
2041                                         break
2042                         insert_html_element new_open_tag 'hr'
2043                         open_els.shift()
2044                         reconstruct_afe()
2045                         insert_html_element new_open_tag 'label'
2046                         # note: this is a little out-of-spec-order so we only have to scan t.attrs_a once
2047                         input_el = new_open_tag 'input'
2048                         prompt = null
2049                         for a in t.attrs_a
2050                                 if a[0] is 'prompt'
2051                                         prompt = a[1]
2052                                 if a[0] isnt 'name' and a[0] isnt 'action' and a[0] isnt 'prompt'
2053                                         input_el.attrs_a.push [a[0], a[1]]
2054                         input_el.attrs_a.push ['name', 'isindex']
2055                         # fixfull this next bit is in english... internationalize?
2056                         prompt ?= "This is a searchable index. Enter search keywords: "
2057                         insert_character new_character_token prompt # fixfull split
2058                         # TODO submit typo "balue" in spec
2059                         insert_html_element input_el
2060                         open_els.shift()
2061                         # insert_character '' # you can put chars here if promt attr missing
2062                         open_els.shift()
2063                         insert_html_element new_open_tag 'hr'
2064                         open_els.shift()
2065                         open_els.shift()
2066                         unless template_tag_is_open()
2067                                 form_element_pointer = null
2068                         return
2069                 if t.type is TYPE_START_TAG and t.name is 'textarea'
2070                         insert_html_element t
2071                         if txt.charAt(cur) is "\u000a" # FIXME check for crlf?
2072                                 cur += 1
2073                         tok_state = tok_state_rcdata
2074                         original_ins_mode = ins_mode
2075                         flag_frameset_ok = false
2076                         ins_mode = ins_mode_text
2077                         return
2078                 if t.type is TYPE_START_TAG and t.name is 'xmp'
2079                         close_p_if_in_button_scope()
2080                         reconstruct_afe()
2081                         flag_frameset_ok = false
2082                         parse_generic_raw_text t
2083                         return
2084                 if t.type is TYPE_START_TAG and t.name is 'iframe'
2085                         flag_frameset_ok = false
2086                         parse_generic_raw_text t
2087                         return
2088                 if t.type is TYPE_START_TAG and (t.name is 'noembed' or (t.name is 'noscript' and flag_scripting))
2089                         parse_generic_raw_text t
2090                         return
2091                 if t.type is TYPE_START_TAG and t.name is 'select'
2092                         reconstruct_afe()
2093                         insert_html_element t
2094                         flag_frameset_ok = false
2095                         if ins_mode is ins_mode_in_table or ins_mode is ins_mode_in_caption or ins_mode is ins_mode_in_table_body or ins_mode is ins_mode_in_row or ins_mode is ins_mode_in_cell
2096                                 ins_mode = ins_mode_in_select_in_table
2097                         else
2098                                 ins_mode = ins_mode_in_select
2099                         return
2100                 if t.type is TYPE_START_TAG and (t.name is 'optgroup' or t.name is 'option')
2101                         if open_els[0].name is 'option' and open_els[0].namespace is NS_HTML
2102                                 open_els.shift()
2103                         reconstruct_afe()
2104                         insert_html_element t
2105                         return
2106 # this comment block implements the W3C spec
2107 #               if t.type is TYPE_START_TAG and (t.name is 'rb' or t.name is 'rp' or t.name is 'rtc')
2108 #                       if is_in_scope 'ruby', NS_HTML
2109 #                               generate_implied_end_tags()
2110 #                               unless open_els[0].name is 'ruby' and open_els[0].namespace is NS_HTML
2111 #                                       parse_error()
2112 #                       insert_html_element t
2113 #                       return
2114 #               if t.type is TYPE_START_TAG and t.name is 'rt'
2115 #                       if is_in_scope 'ruby', NS_HTML
2116 #                               generate_implied_end_tags 'rtc' # arg is exception
2117 #                               unless (open_els[0].name is 'ruby' or open_els[0].name is 'rtc') and open_els[0].namespace is NS_HTML
2118 #                                       parse_error()
2119 #                       insert_html_element t
2120 #                       return
2121 # below implements the WATWG spec https://html.spec.whatwg.org/multipage/syntax.html#parsing-main-inbody
2122                 if t.type is TYPE_START_TAG and (t.name is 'rb' or t.name is 'rtc')
2123                         if is_in_scope 'ruby', NS_HTML
2124                                 generate_implied_end_tags()
2125                                 unless open_els[0].name is 'ruby' and open_els[0].namespace is NS_HTML
2126                                         parse_error()
2127                         insert_html_element t
2128                         return
2129                 if t.type is TYPE_START_TAG and (t.name is 'rp' or t.name is 'rt')
2130                         if is_in_scope 'ruby', NS_HTML
2131                                 generate_implied_end_tags 'rtc'
2132                                 unless (open_els[0].name is 'ruby' or open_els[0].name is 'rtc') and open_els[0].namespace is NS_HTML
2133                                         parse_error()
2134                         insert_html_element t
2135                         return
2136 # end WATWG chunk
2137                 if t.type is TYPE_START_TAG and t.name is 'math'
2138                         reconstruct_afe()
2139                         adjust_mathml_attributes t
2140                         adjust_foreign_attributes t
2141                         insert_foreign_element t, NS_MATHML
2142                         if t.flag 'self-closing'
2143                                 open_els.shift()
2144                                 t.acknowledge_self_closing()
2145                         return
2146                 if t.type is TYPE_START_TAG and t.name is 'svg'
2147                         reconstruct_afe()
2148                         adjust_svg_attributes t
2149                         adjust_foreign_attributes t
2150                         insert_foreign_element t, NS_SVG
2151                         if t.flag 'self-closing'
2152                                 open_els.shift()
2153                                 t.acknowledge_self_closing()
2154                         return
2155                 if t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'frame' or t.name is 'head' or t.name is 'tbody' or t.name is 'td' or t.name is 'tfoot' or t.name is 'th' or t.name is 'thead' or t.name is 'tr')
2156                         parse_error()
2157                         return
2158                 if t.type is TYPE_START_TAG # any other start tag
2159                         reconstruct_afe()
2160                         insert_html_element t
2161                         return
2162                 if t.type is TYPE_END_TAG # any other end tag
2163                         in_body_any_other_end_tag t.name
2164                         return
2165                 return
2166
2167         # 8.2.5.4.8 http://www.w3.org/TR/html5/syntax.html#parsing-main-incdata
2168         ins_mode_text = (t) ->
2169                 if t.type is TYPE_TEXT
2170                         insert_character t
2171                         return
2172                 if t.type is TYPE_EOF
2173                         parse_error()
2174                         if open_els[0].name is 'script' and open_els[0].namespace is NS_HTML
2175                                 open_els[0].flag 'already started', true
2176                         open_els.shift()
2177                         ins_mode = original_ins_mode
2178                         process_token t
2179                         return
2180                 if t.type is TYPE_END_TAG and t.name is 'script'
2181                         open_els.shift()
2182                         ins_mode = original_ins_mode
2183                         # fixfull the spec seems to assume that I'm going to run the script
2184                         # http://www.w3.org/TR/html5/syntax.html#scriptEndTag
2185                         return
2186                 if t.type is TYPE_END_TAG
2187                         open_els.shift()
2188                         ins_mode = original_ins_mode
2189                         return
2190                 console.log 'warning: end of ins_mode_text reached'
2191
2192         # the functions below implement the tokenizer stats described here:
2193         # http://www.w3.org/TR/html5/syntax.html#tokenization
2194
2195         # 8.2.5.4.9 http://www.w3.org/TR/html5/syntax.html#parsing-main-intable
2196         ins_mode_in_table_else = (t) ->
2197                 parse_error()
2198                 flag_foster_parenting = true
2199                 ins_mode_in_body t
2200                 flag_foster_parenting = false
2201                 return
2202         ins_mode_in_table = (t) ->
2203                 switch t.type
2204                         when TYPE_TEXT
2205                                 if (open_els[0].name is 'table' or open_els[0].name is 'tbody' or open_els[0].name is 'tfoot' or open_els[0].name is 'thead' or open_els[0].name is 'tr') and open_els[0].namespace is NS_HTML
2206                                         pending_table_character_tokens = []
2207                                         original_ins_mode = ins_mode
2208                                         ins_mode = ins_mode_in_table_text
2209                                         process_token t
2210                                 else
2211                                         ins_mode_in_table_else t
2212                         when TYPE_COMMENT
2213                                 insert_comment t
2214                         when TYPE_DOCTYPE
2215                                 parse_error()
2216                         when TYPE_START_TAG
2217                                 switch t.name
2218                                         when 'caption'
2219                                                 clear_stack_to_table_context()
2220                                                 afe_push_marker()
2221                                                 insert_html_element t
2222                                                 ins_mode = ins_mode_in_caption
2223                                         when 'colgroup'
2224                                                 clear_stack_to_table_context()
2225                                                 insert_html_element t
2226                                                 ins_mode = ins_mode_in_column_group
2227                                         when 'col'
2228                                                 clear_stack_to_table_context()
2229                                                 insert_html_element new_open_tag 'colgroup'
2230                                                 ins_mode = ins_mode_in_column_group
2231                                                 process_token t
2232                                         when 'tbody', 'tfoot', 'thead'
2233                                                 clear_stack_to_table_context()
2234                                                 insert_html_element t
2235                                                 ins_mode = ins_mode_in_table_body
2236                                         when 'td', 'th', 'tr'
2237                                                 clear_stack_to_table_context()
2238                                                 insert_html_element new_open_tag 'tbody'
2239                                                 ins_mode = ins_mode_in_table_body
2240                                                 process_token t
2241                                         when 'table'
2242                                                 parse_error()
2243                                                 if is_in_table_scope 'table', NS_HTML
2244                                                         loop
2245                                                                 el = open_els.shift()
2246                                                                 if el.name is 'table' and el.namespace is NS_HTML
2247                                                                         break
2248                                                         reset_ins_mode()
2249                                                         process_token t
2250                                         when 'style', 'script', 'template'
2251                                                 ins_mode_in_head t
2252                                         when 'input'
2253                                                 unless is_input_hidden_tok t
2254                                                         ins_mode_in_table_else t
2255                                                 else
2256                                                         parse_error()
2257                                                         el = insert_html_element t
2258                                                         open_els.shift()
2259                                                         t.acknowledge_self_closing()
2260                                         when 'form'
2261                                                 parse_error()
2262                                                 if form_element_pointer?
2263                                                         return
2264                                                 if template_tag_is_open()
2265                                                         return
2266                                                 form_element_pointer = insert_html_element t
2267                                                 open_els.shift()
2268                                         else
2269                                                 ins_mode_in_table_else t
2270                         when TYPE_END_TAG
2271                                 switch t.name
2272                                         when 'table'
2273                                                 if is_in_table_scope 'table', NS_HTML
2274                                                         loop
2275                                                                 el = open_els.shift()
2276                                                                 if el.name is 'table' and el.namespace is NS_HTML
2277                                                                         break
2278                                                         reset_ins_mode()
2279                                                 else
2280                                                         parse_error()
2281                                         when 'body', 'caption', 'col', 'colgroup', 'html', 'tbody', 'td', 'tfoot', 'th', 'thead', 'tr'
2282                                                 parse_error()
2283                                         when 'template'
2284                                                 ins_mode_in_head t
2285                                         else
2286                                                 ins_mode_in_table_else t
2287                         when TYPE_EOF
2288                                 ins_mode_in_body t
2289                         else
2290                                 ins_mode_in_table_else t
2291
2292
2293         # 8.2.5.4.10 http://www.w3.org/TR/html5/syntax.html#parsing-main-intabletext
2294         ins_mode_in_table_text = (t) ->
2295                 if t.type is TYPE_TEXT and t.text is "\u0000"
2296                         # from javascript?
2297                         parse_error()
2298                         return
2299                 if t.type is TYPE_TEXT
2300                         pending_table_character_tokens.push t
2301                         return
2302                 # Anything else
2303                 all_space = true
2304                 for old in pending_table_character_tokens
2305                         unless is_space_tok old
2306                                 all_space = false
2307                                 break
2308                 if all_space
2309                         for old in pending_table_character_tokens
2310                                 insert_character old
2311                 else
2312                         for old in pending_table_character_tokens
2313                                 ins_mode_in_table_else old
2314                 pending_table_character_tokens = []
2315                 ins_mode = original_ins_mode
2316                 process_token t
2317
2318         # 8.2.5.4.11 http://www.w3.org/TR/html5/syntax.html#parsing-main-incaption
2319         ins_mode_in_caption = (t) ->
2320                 if t.type is TYPE_END_TAG and t.name is 'caption'
2321                         if is_in_table_scope 'caption', NS_HTML
2322                                 generate_implied_end_tags()
2323                                 if open_els[0].name isnt 'caption'
2324                                         parse_error()
2325                                 loop
2326                                         el = open_els.shift()
2327                                         if el.name is 'caption' and el.namespace is NS_HTML
2328                                                 break
2329                                 clear_afe_to_marker()
2330                                 ins_mode = ins_mode_in_table
2331                         else
2332                                 parse_error()
2333                                 # fragment case
2334                         return
2335                 if (t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'tbody' or t.name is 'td' or t.name is 'tfoot' or t.name is 'th' or t.name is 'thead' or t.name is 'tr')) or t.type is TYPE_END_TAG and t.name is 'table'
2336                         parse_error()
2337                         if is_in_table_scope 'caption', NS_HTML
2338                                 loop
2339                                         el = open_els.shift()
2340                                         if el.name is 'caption' and el.namespace is NS_HTML
2341                                                 break
2342                                 clear_afe_to_marker()
2343                                 ins_mode = ins_mode_in_table
2344                                 process_token t
2345                         # else fragment case
2346                         return
2347                 if t.type is TYPE_END_TAG and (t.name is 'body' or t.name is 'col' or t.name is 'colgroup' or t.name is 'html' or t.name is 'tbody' or t.name is 'td' or t.name is 'tfoot' or t.name is 'th' or t.name is 'thead' or t.name is 'tr')
2348                         parse_error()
2349                         return
2350                 # Anything else
2351                 ins_mode_in_body t
2352
2353         # 8.2.5.4.12 http://www.w3.org/TR/html5/syntax.html#parsing-main-incolgroup
2354         ins_mode_in_column_group = (t) ->
2355                 if is_space_tok t
2356                         insert_character t
2357                         return
2358                 if t.type is TYPE_COMMENT
2359                         insert_comment t
2360                         return
2361                 if t.type is TYPE_DOCTYPE
2362                         parse_error()
2363                         return
2364                 if t.type is TYPE_START_TAG and t.name is 'html'
2365                         ins_mode_in_body t
2366                         return
2367                 if t.type is TYPE_START_TAG and t.name is 'col'
2368                         el = insert_html_element t
2369                         open_els.shift()
2370                         t.acknowledge_self_closing()
2371                         return
2372                 if t.type is TYPE_END_TAG and t.name is 'colgroup'
2373                         if open_els[0].name is 'colgroup' and open_els.namespace is NS_HTML
2374                                 open_els.shift()
2375                                 ins_mode = ins_mode_in_table
2376                         else
2377                                 parse_error()
2378                         return
2379                 if t.type is TYPE_END_TAG and t.name is 'col'
2380                         parse_error()
2381                         return
2382                 if (t.type is TYPE_START_TAG or t.type is TYPE_END_TAG) and t.name is 'template'
2383                         ins_mode_in_head t
2384                         return
2385                 if t.type is TYPE_EOF
2386                         ins_mode_in_body t
2387                         return
2388                 # Anything else
2389                 if open_els[0].name isnt 'colgroup'
2390                         parse_error()
2391                         return
2392                 open_els.shift()
2393                 ins_mode = ins_mode_in_table
2394                 process_token t
2395                 return
2396
2397         # 8.2.5.4.13 http://www.w3.org/TR/html5/syntax.html#parsing-main-intbody
2398         ins_mode_in_table_body = (t) ->
2399                 if t.type is TYPE_START_TAG and t.name is 'tr'
2400                         clear_stack_to_table_body_context()
2401                         insert_html_element t
2402                         ins_mode = ins_mode_in_row
2403                         return
2404                 if t.type is TYPE_START_TAG and (t.name is 'th' or t.name is 'td')
2405                         parse_error()
2406                         clear_stack_to_table_body_context()
2407                         insert_html_element new_open_tag 'tr'
2408                         ins_mode = ins_mode_in_row
2409                         process_token t
2410                         return
2411                 if t.type is TYPE_END_TAG and (t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead')
2412                         unless is_in_table_scope t.name, NS_HTML
2413                                 parse_error()
2414                                 return
2415                         clear_stack_to_table_body_context()
2416                         open_els.shift()
2417                         ins_mode = ins_mode_in_table
2418                         return
2419                 if (t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead')) or (t.type is TYPE_END_TAG and t.name is 'table')
2420                         has = false
2421                         for el in open_els
2422                                 if el.namespace is NS_HTML and (el.name is 'tbody' or el.name is 'tfoot' or el.name is 'thead')
2423                                         has = true
2424                                         break
2425                                 if table_scopers[el.name] is el.namespace
2426                                         break
2427                         if !has
2428                                 parse_error()
2429                                 return
2430                         clear_stack_to_table_body_context()
2431                         open_els.shift()
2432                         ins_mode = ins_mode_in_table
2433                         process_token t
2434                         return
2435                 if t.type is TYPE_END_TAG and (t.name is 'body' or t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'html' or t.name is 'td' or t.name is 'th' or t.name is 'tr')
2436                         parse_error()
2437                         return
2438                 # Anything else
2439                 ins_mode_in_table t
2440
2441         # 8.2.5.4.14 http://www.w3.org/TR/html5/syntax.html#parsing-main-intr
2442         ins_mode_in_row = (t) ->
2443                 if t.type is TYPE_START_TAG and (t.name is 'th' or t.name is 'td')
2444                         clear_stack_to_table_row_context()
2445                         insert_html_element t
2446                         ins_mode = ins_mode_in_cell
2447                         afe_push_marker()
2448                         return
2449                 if t.type is TYPE_END_TAG and t.name is 'tr'
2450                         if is_in_table_scope 'tr', NS_HTML
2451                                 clear_stack_to_table_row_context()
2452                                 open_els.shift()
2453                                 ins_mode = ins_mode_in_table_body
2454                         else
2455                                 parse_error()
2456                         return
2457                 if (t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead' or t.name is 'tr')) or t.type is TYPE_END_TAG and t.name is 'table'
2458                         if is_in_table_scope 'tr', NS_HTML
2459                                 clear_stack_to_table_row_context()
2460                                 open_els.shift()
2461                                 ins_mode = ins_mode_in_table_body
2462                                 process_token t
2463                         else
2464                                 parse_error()
2465                         return
2466                 if t.type is TYPE_END_TAG and (t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead')
2467                         if is_in_table_scope t.name, NS_HTML
2468                                 if is_in_table_scope 'tr', NS_HTML
2469                                         clear_stack_to_table_row_context()
2470                                         open_els.shift()
2471                                         ins_mode = ins_mode_in_table_body
2472                                         process_token t
2473                         else
2474                                 parse_error()
2475                         return
2476                 if t.type is TYPE_END_TAG and (t.name is 'body' or t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'html' or t.name is 'td' or t.name is 'th')
2477                         parse_error()
2478                         return
2479                 # Anything else
2480                 ins_mode_in_table t
2481
2482         # http://www.w3.org/TR/html5/syntax.html#close-the-cell
2483         close_the_cell = ->
2484                 generate_implied_end_tags()
2485                 unless (open_els[0].name is 'td' or open_els[0] is 'th') and open_els[0].namespace is NS_HTML
2486                         parse_error()
2487                 loop
2488                         el = open_els.shift()
2489                         if el.namespace is NS_HTML and (el.name is 'td' or el.name is 'th')
2490                                 break
2491                 clear_afe_to_marker()
2492                 ins_mode = ins_mode_in_row
2493
2494         # 8.2.5.4.15 http://www.w3.org/TR/html5/syntax.html#parsing-main-intd
2495         ins_mode_in_cell = (t) ->
2496                 if t.type is TYPE_END_TAG and (t.name is 'td' or t.name is 'th')
2497                         if is_in_table_scope t.name, NS_HTML
2498                                 generate_implied_end_tags()
2499                                 unless (open_els[0].name is t.name) and open_els[0].namespace is NS_HTML
2500                                         parse_error()
2501                                 loop
2502                                         el = open_els.shift()
2503                                         if el.name is t.name and el.namespace is NS_HTML
2504                                                 break
2505                                 clear_afe_to_marker()
2506                                 ins_mode = ins_mode_in_row
2507                         else
2508                                 parse_error()
2509                         return
2510                 if t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'tbody' or t.name is 'td' or t.name is 'tfoot' or t.name is 'th' or t.name is 'thead' or t.name is 'tr')
2511                         has = false
2512                         for el in open_els
2513                                 if el.namespace is NS_HTML and (el.name is 'td' or el.name is 'th')
2514                                         has = true
2515                                         break
2516                                 if table_scopers[el.name] is el.namespace
2517                                         break
2518                         if !has
2519                                 parse_error()
2520                                 return
2521                         close_the_cell()
2522                         process_token t
2523                         return
2524                 if t.type is TYPE_END_TAG and (t.name is 'body' or t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'html')
2525                         parse_error()
2526                         return
2527                 if t.type is TYPE_END_TAG and (t.name is 'table' or t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead' or t.name is 'tr')
2528                         if is_in_table_scope t.name, NS_HTML
2529                                 close_the_cell()
2530                                 process_token t
2531                         else
2532                                 parse_error()
2533                         return
2534                 # Anything Else
2535                 ins_mode_in_body t
2536
2537         # 8.2.5.4.16 http://www.w3.org/TR/html5/syntax.html#parsing-main-inselect
2538         ins_mode_in_select = (t) ->
2539                 if t.type is TYPE_TEXT and t.text is "\u0000"
2540                         parse_error()
2541                         return
2542                 if t.type is TYPE_TEXT
2543                         insert_character t
2544                         return
2545                 if t.type is TYPE_COMMENT
2546                         insert_comment t
2547                         return
2548                 if t.type is TYPE_DOCTYPE
2549                         parse_error()
2550                         return
2551                 if t.type is TYPE_START_TAG and t.name is 'html'
2552                         ins_mode_in_body t
2553                         return
2554                 if t.type is TYPE_START_TAG and t.name is 'option'
2555                         if open_els[0].name is 'option' and open_els[0].namespace is NS_HTML
2556                                 open_els.shift()
2557                         insert_html_element t
2558                         return
2559                 if t.type is TYPE_START_TAG and t.name is 'optgroup'
2560                         if open_els[0].name is 'option' and open_els[0].namespace is NS_HTML
2561                                 open_els.shift()
2562                         if open_els[0].name is 'optgroup' and open_els[0].namespace is NS_HTML
2563                                 open_els.shift()
2564                         insert_html_element t
2565                         return
2566                 if t.type is TYPE_END_TAG and t.name is 'optgroup'
2567                         if open_els[0].name is 'option' and open_els[0].namespace in NS_HTML
2568                                 if open_els[1].name is 'optgroup' and open_els[0].namespace is NS_HTML
2569                                         open_els.shift()
2570                         if open_els[0].name is 'optgroup' and open_els[0].namespace is NS_HTML
2571                                 open_els.shift()
2572                         else
2573                                 parse_error()
2574                         return
2575                 if t.type is TYPE_END_TAG and t.name is 'option'
2576                         if open_els[0].name is 'option' and open_els[0].namespace is NS_HTML
2577                                 open_els.shift()
2578                         else
2579                                 parse_error()
2580                         return
2581                 if t.type is TYPE_END_TAG and t.name is 'select'
2582                         if is_in_select_scope 'select', NS_HTML
2583                                 loop
2584                                         el = open_els.shift()
2585                                         if el.name is 'select' and el.namespace is NS_HTML
2586                                                 break
2587                                 reset_ins_mode()
2588                         else
2589                                 parse_error()
2590                         return
2591                 if t.type is TYPE_START_TAG and t.name is 'select'
2592                         parse_error()
2593                         loop
2594                                 el = open_els.shift()
2595                                 if el.name is 'select' and el.namespace is NS_HTML
2596                                         break
2597                         reset_ins_mode()
2598                         # spec says that this is the same as </select> but it doesn't say
2599                         # to check scope first
2600                         return
2601                 if t.type is TYPE_START_TAG and (t.name is 'input' or t.name is 'keygen' or t.name is 'textarea')
2602                         parse_error()
2603                         if is_in_select_scope 'select', NS_HTML
2604                                 return
2605                         loop
2606                                 el = open_els.shift()
2607                                 if el.name is 'select' and el.namespace is NS_HTML
2608                                         break
2609                         reset_ins_mode()
2610                         process_token t
2611                         return
2612                 if t.type is TYPE_START_TAG and (t.name is 'script' or t.name is 'template')
2613                         ins_mode_in_head t
2614                         return
2615                 if t.type is TYPE_EOF
2616                         ins_mode_in_body t
2617                         return
2618                 # Anything else
2619                 parse_error()
2620                 return
2621
2622         # 8.2.5.4.17 http://www.w3.org/TR/html5/syntax.html#parsing-main-inselectintable
2623         ins_mode_in_select_in_table = (t) ->
2624                 if t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'table' or t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead' or t.name is 'tr' or t.name is 'td' or t.name is 'th')
2625                         parse_error()
2626                         loop
2627                                 el = open_els.shift()
2628                                 if el.name is 'select' and el.namespace is NS_HTML
2629                                         break
2630                         reset_ins_mode()
2631                         process_token t
2632                         return
2633                 if t.type is TYPE_END_TAG and (t.name is 'caption' or t.name is 'table' or t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead' or t.name is 'tr' or t.name is 'td' or t.name is 'th')
2634                         parse_error()
2635                         unless is_in_table_scope t.name, NS_HTML
2636                                 return
2637                         loop
2638                                 el = open_els.shift()
2639                                 if el.name is 'select' and el.namespace is NS_HTML
2640                                         break
2641                         reset_ins_mode()
2642                         process_token t
2643                         return
2644                 # Anything else
2645                 ins_mode_in_select t
2646                 return
2647
2648         # 8.2.5.4.18 http://www.w3.org/TR/html5/syntax.html#parsing-main-intemplate
2649         ins_mode_in_template = (t) ->
2650                 if t.type is TYPE_TEXT or t.type is TYPE_COMMENT or t.type is TYPE_DOCTYPE
2651                         ins_mode_in_body t
2652                         return
2653                 if (t.type is TYPE_START_TAG and (t.name is 'base' or t.name is 'basefont' or t.name is 'bgsound' or t.name is 'link' or t.name is 'meta' or t.name is 'noframes' or t.name is 'script' or t.name is 'style' or t.name is 'template' or t.name is 'title')) or (t.type is TYPE_END_TAG and t.name is 'template')
2654                         ins_mode_in_head t
2655                         return
2656                 if t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'colgroup' or t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead')
2657                         template_ins_modes.shift()
2658                         template_ins_modes.unshift ins_mode_in_table
2659                         ins_mode = ins_mode_in_table
2660                         process_token t
2661                         return
2662                 if t.type is TYPE_START_TAG and t.name is 'col'
2663                         template_ins_modes.shift()
2664                         template_ins_modes.unshift ins_mode_in_column_group
2665                         ins_mode = ins_mode_in_column_group
2666                         process_token t
2667                         return
2668                 if t.type is TYPE_START_TAG and t.name is 'tr'
2669                         template_ins_modes.shift()
2670                         template_ins_modes.unshift ins_mode_in_table_body
2671                         ins_mode = ins_mode_in_table_body
2672                         process_token t
2673                         return
2674                 if t.type is TYPE_START_TAG and (t.name is 'td' or t.name is 'th')
2675                         template_ins_modes.shift()
2676                         template_ins_modes.unshift ins_mode_in_row
2677                         ins_mode = ins_mode_in_row
2678                         process_token t
2679                         return
2680                 if t.type is TYPE_START_TAG
2681                         template_ins_modes.shift()
2682                         template_ins_modes.unshift ins_mode_in_body
2683                         ins_mode = ins_mode_in_body
2684                         process_token t
2685                         return
2686                 if t.type is TYPE_END_TAG
2687                         parse_error()
2688                         return
2689                 if t.type is TYPE_EOF
2690                         unless template_tag_is_open()
2691                                 stop_parsing()
2692                                 return
2693                         parse_error()
2694                         loop
2695                                 el = open_els.shift()
2696                                 if el.name is 'template' and el.namespace is NS_HTML
2697                                         break
2698                         clear_afe_to_marker()
2699                         template_ins_modes.shift()
2700                         reset_ins_mode()
2701                         process_token t
2702
2703         # 8.2.5.4.19 http://www.w3.org/TR/html5/syntax.html#parsing-main-afterbody
2704         ins_mode_after_body = (t) ->
2705                 if is_space_tok t
2706                         ins_mode_in_body t
2707                         return
2708                 if t.type is TYPE_COMMENT
2709                         insert_comment t, [open_els[0], open_els[0].children.length]
2710                         return
2711                 if t.type is TYPE_DOCTYPE
2712                         parse_error()
2713                         return
2714                 if t.type is TYPE_START_TAG and t.name is 'html'
2715                         ins_mode_in_body t
2716                         return
2717                 if t.type is TYPE_END_TAG and t.name is 'html'
2718                         # fixfull fragment case
2719                         ins_mode = ins_mode_after_after_body
2720                         return
2721                 if t.type is TYPE_EOF
2722                         stop_parsing()
2723                         return
2724                 # Anything ELse
2725                 parse_error()
2726                 ins_mode = ins_mode_in_body
2727                 process_token t
2728
2729         # 8.2.5.4.20 http://www.w3.org/TR/html5/syntax.html#parsing-main-inframeset
2730         ins_mode_in_frameset = (t) ->
2731                 if is_space_tok t
2732                         insert_character t
2733                         return
2734                 if t.type is TYPE_COMMENT
2735                         insert_comment t
2736                         return
2737                 if t.type is TYPE_DOCTYPE
2738                         parse_error()
2739                         return
2740                 if t.type is TYPE_START_TAG and t.name is 'html'
2741                         ins_mode_in_body t
2742                         return
2743                 if t.type is TYPE_START_TAG and t.name is 'frameset'
2744                         insert_html_element t
2745                         return
2746                 if t.type is TYPE_END_TAG and t.name is 'frameset'
2747                         if open_els.length is 1
2748                                 parse_error()
2749                                 return # fragment case
2750                         open_els.shift()
2751                         if flag_fragment_parsing is false and open_els[0].name isnt 'frameset'
2752                                 ins_mode = ins_mode_after_frameset
2753                         return
2754                 if t.type is TYPE_START_TAG and t.name is 'frame'
2755                         insert_html_element t
2756                         open_els.shift()
2757                         t.acknowledge_self_closing()
2758                         return
2759                 if t.type is TYPE_START_TAG and t.name is 'noframes'
2760                         ins_mode_in_head t
2761                         return
2762                 if t.type is TYPE_EOF
2763                         if open_els.length isnt 1
2764                                 parse_error()
2765                         stop_parsing()
2766                         return
2767                 # Anything else
2768                 parse_error()
2769                 return
2770
2771         # 8.2.5.4.21 http://www.w3.org/TR/html5/syntax.html#parsing-main-afterframeset
2772         ins_mode_after_frameset = (t) ->
2773                 if is_space_tok t
2774                         insert_character t
2775                         return
2776                 if t.type is TYPE_COMMENT
2777                         insert_comment t
2778                         return
2779                 if t.type is TYPE_DOCTYPE
2780                         parse_error()
2781                         return
2782                 if t.type is TYPE_START_TAG and t.name is 'html'
2783                         ins_mode_in_body t
2784                         return
2785                 if t.type is TYPE_END_TAG and t.name is 'html'
2786                         insert_mode = ins_mode_after_after_frameset
2787                         return
2788                 if t.type is TYPE_START_TAG and t.name is 'noframes'
2789                         ins_mode_in_head t
2790                         return
2791                 if t.type is TYPE_EOF
2792                         stop_parsing()
2793                         return
2794                 # Anything else
2795                 parse_error()
2796                 return
2797
2798         # 8.2.5.4.22 http://www.w3.org/TR/html5/syntax.html#the-after-after-body-insertion-mode
2799         ins_mode_after_after_body = (t) ->
2800                 if t.type is TYPE_COMMENT
2801                         insert_comment t, [doc, doc.children.length]
2802                         return
2803                 if t.type is TYPE_DOCTYPE or is_space_tok(t) or (t.type is TYPE_START_TAG and t.name is 'html')
2804                         ins_mode_in_body t
2805                         return
2806                 if t.type is TYPE_EOF
2807                         stop_parsing()
2808                         return
2809                 # Anything else
2810                 parse_error()
2811                 ins_mode = ins_mode_in_body
2812                 return
2813
2814         # 8.2.5.4.23 http://www.w3.org/TR/html5/syntax.html#the-after-after-frameset-insertion-mode
2815         ins_mode_after_after_frameset = (t) ->
2816                 if t.type is TYPE_COMMENT
2817                         insert_comment t, [doc, doc.children.length]
2818                         return
2819                 if t.type is TYPE_DOCTYPE or is_space_tok(t) or (t.type is TYPE_START_TAG and t.name is 'html')
2820                         ins_mode_in_body t
2821                         return
2822                 if t.type is TYPE_EOF
2823                         stop_parsing()
2824                         return
2825                 if t.type is TYPE_START_TAG and t.name is 'noframes'
2826                         ins_mode_in_head t
2827                         return
2828                 # Anything else
2829                 parse_error()
2830                 return
2831
2832         # 8.2.5.5 http://www.w3.org/TR/html5/syntax.html#parsing-main-inforeign
2833         has_color_face_or_size = (t) ->
2834                 for a in t.attrs_a
2835                         if a[0] is 'color' or a[0] is 'face' or a[0] is 'size'
2836                                 return true
2837                 return false
2838         in_foreign_content_end_script = ->
2839                 open_els.shift()
2840                 # fixfull
2841                 return
2842         in_foreign_content_other_start = (t) ->
2843                 acn = adjusted_current_node()
2844                 if acn.namespace is NS_MATHML
2845                         adjust_mathml_attributes t
2846                 if acn.namespace is NS_SVG and svg_name_fixes[t.name]?
2847                         t.name = svg_name_fixes[t.name]
2848                 if acn.namespace is NS_SVG
2849                         adjust_svg_attributes t
2850                 adjust_foreign_attributes t
2851                 insert_foreign_element t, acn.namespace
2852                 if t.flag 'self-closing'
2853                         if t.name is 'script'
2854                                 t.acknowledge_self_closing()
2855                                 in_foreign_content_end_script()
2856                         else
2857                                 open_els.shift()
2858                                 t.acknowledge_self_closing()
2859                 return
2860         in_foreign_content = (t) ->
2861                 if t.type is TYPE_TEXT and t.text is "\u0000"
2862                         parse_error()
2863                         insert_character new_character_token "\ufffd"
2864                         return
2865                 if is_space_tok t
2866                         insert_character t
2867                         return
2868                 if t.type is TYPE_TEXT
2869                         flag_frameset_ok = false
2870                         insert_character t
2871                         return
2872                 if t.type is TYPE_COMMENT
2873                         insert_comment t
2874                         return
2875                 if t.type is TYPE_DOCTYPE
2876                         parse_error()
2877                         return
2878                 if t.type is TYPE_START_TAG and (t.name is 'b' or t.name is 'big' or t.name is 'blockquote' or t.name is 'body' or t.name is 'br' or t.name is 'center' or t.name is 'code' or t.name is 'dd' or t.name is 'div' or t.name is 'dl' or t.name is 'dt' or t.name is 'em' or t.name is 'embed' or t.name is 'h1' or t.name is 'h2' or t.name is 'h3' or t.name is 'h4' or t.name is 'h5' or t.name is 'h6' or t.name is 'head' or t.name is 'hr' or t.name is 'i' or t.name is 'img' or t.name is 'li' or t.name is 'listing' or t.name is 'main' or t.name is 'meta' or t.name is 'nobr' or t.name is 'ol' or t.name is 'p' or t.name is 'pre' or t.name is 'ruby' or t.name is 's' or t.name is 'small' or t.name is 'span' or t.name is 'strong' or t.name is 'strike' or t.name is 'sub' or t.name is 'sup' or t.name is 'table' or t.name is 'tt' or t.name is 'u' or t.name is 'ul' or t.name is 'var' or (t.name is 'font' and has_color_face_or_size(t)))
2879                         parse_error()
2880                         if flag_fragment_parsing
2881                                 in_foreign_content_other_start t
2882                                 return
2883                         loop # is this safe?
2884                                 open_els.shift()
2885                                 cn = open_els[0]
2886                                 if is_mathml_text_integration_point(cn) or is_html_integration(cn) or cn.namespace is NS_HTML
2887                                         break
2888                         process_token t
2889                         return
2890                 if t.type is TYPE_START_TAG
2891                         in_foreign_content_other_start t
2892                         return
2893                 if t.type is TYPE_END_TAG and t.name is 'script' and open_els[0].name is 'script' and open_els[0].namespace is NS_SVG
2894                         in_foreign_content_end_script()
2895                         return
2896                 if t.type is TYPE_END_TAG
2897                         if open_els[0].name.toLowerCase() isnt t.name
2898                                 parse_error()
2899                         for node in open_els
2900                                 if node is open_els[open_els.length - 1]
2901                                         return
2902                                 if node.name.toLowerCase() is t.name
2903                                         loop
2904                                                 el = open_els.shift()
2905                                                 if el is node
2906                                                         return
2907                                 if node.namespace is NS_HTML
2908                                         break
2909                         ins_mode t # explicitly call HTML insertion mode
2910
2911
2912         # 8.2.4.1 http://www.w3.org/TR/html5/syntax.html#data-state
2913         tok_state_data = ->
2914                 switch c = txt.charAt(cur++)
2915                         when '&'
2916                                 return new_text_node parse_character_reference()
2917                         when '<'
2918                                 tok_state = tok_state_tag_open
2919                         when "\u0000"
2920                                 parse_error()
2921                                 return new_text_node "\ufffd"
2922                         when '' # EOF
2923                                 return new_eof_token()
2924                         else
2925                                 return new_text_node c
2926                 return null
2927
2928         # 8.2.4.2 http://www.w3.org/TR/html5/syntax.html#character-reference-in-data-state
2929         # not needed: tok_state_character_reference_in_data = ->
2930         # just call parse_character_reference()
2931
2932         # 8.2.4.3 http://www.w3.org/TR/html5/syntax.html#rcdata-state
2933         tok_state_rcdata = ->
2934                 switch c = txt.charAt(cur++)
2935                         when '&'
2936                                 return new_text_node parse_character_reference()
2937                         when '<'
2938                                 tok_state = tok_state_rcdata_less_than_sign
2939                         when "\u0000"
2940                                 parse_error()
2941                                 return new_character_token "\ufffd"
2942                         when '' # EOF
2943                                 return new_eof_token()
2944                         else
2945                                 return new_character_token c
2946                 return null
2947
2948         # 8.2.4.4 http://www.w3.org/TR/html5/syntax.html#character-reference-in-rcdata-state
2949         # not needed: tok_state_character_reference_in_rcdata = ->
2950         # just call parse_character_reference()
2951
2952         # 8.2.4.5 http://www.w3.org/TR/html5/syntax.html#rawtext-state
2953         tok_state_rawtext = ->
2954                 switch c = txt.charAt(cur++)
2955                         when '<'
2956                                 tok_state = tok_state_rawtext_less_than_sign
2957                         when "\u0000"
2958                                 parse_error()
2959                                 return new_character_token "\ufffd"
2960                         when '' # EOF
2961                                 return new_eof_token()
2962                         else
2963                                 return new_character_token c
2964                 return null
2965
2966         # 8.2.4.6 http://www.w3.org/TR/html5/syntax.html#script-data-state
2967         tok_state_script_data = ->
2968                 switch c = txt.charAt(cur++)
2969                         when '<'
2970                                 tok_state = tok_state_script_data_less_than_sign
2971                         when "\u0000"
2972                                 parse_error()
2973                                 return new_character_token "\ufffd"
2974                         when '' # EOF
2975                                 return new_eof_token()
2976                         else
2977                                 return new_character_token c
2978                 return null
2979
2980         # 8.2.4.7 http://www.w3.org/TR/html5/syntax.html#plaintext-state
2981         tok_state_plaintext = ->
2982                 switch c = txt.charAt(cur++)
2983                         when "\u0000"
2984                                 parse_error()
2985                                 return new_character_token "\ufffd"
2986                         when '' # EOF
2987                                 return new_eof_token()
2988                         else
2989                                 return new_character_token c
2990                 return null
2991
2992
2993         # 8.2.4.8 http://www.w3.org/TR/html5/syntax.html#tag-open-state
2994         tok_state_tag_open = ->
2995                 switch c = txt.charAt(cur++)
2996                         when '!'
2997                                 tok_state = tok_state_markup_declaration_open
2998                         when '/'
2999                                 tok_state = tok_state_end_tag_open
3000                         when '?'
3001                                 parse_error()
3002                                 tok_cur_tag = new_comment_token '?'
3003                                 tok_state = tok_state_bogus_comment
3004                         else
3005                                 if is_lc_alpha(c)
3006                                         tok_cur_tag = new_open_tag c
3007                                         tok_state = tok_state_tag_name
3008                                 else if is_uc_alpha(c)
3009                                         tok_cur_tag = new_open_tag c.toLowerCase()
3010                                         tok_state = tok_state_tag_name
3011                                 else
3012                                         parse_error()
3013                                         tok_state = tok_state_data
3014                                         cur -= 1 # we didn't parse/handle the char after <
3015                                         return new_text_node '<'
3016                 return null
3017
3018         # 8.2.4.9 http://www.w3.org/TR/html5/syntax.html#end-tag-open-state
3019         tok_state_end_tag_open = ->
3020                 switch c = txt.charAt(cur++)
3021                         when '>'
3022                                 parse_error()
3023                                 tok_state = tok_state_data
3024                         when '' # EOF
3025                                 parse_error()
3026                                 tok_state = tok_state_data
3027                                 return new_text_node '</'
3028                         else
3029                                 if is_uc_alpha(c)
3030                                         tok_cur_tag = new_end_tag c.toLowerCase()
3031                                         tok_state = tok_state_tag_name
3032                                 else if is_lc_alpha(c)
3033                                         tok_cur_tag = new_end_tag c
3034                                         tok_state = tok_state_tag_name
3035                                 else
3036                                         parse_error()
3037                                         tok_cur_tag = new_comment_token '/'
3038                                         tok_state = tok_state_bogus_comment
3039                 return null
3040
3041         # 8.2.4.10 http://www.w3.org/TR/html5/syntax.html#tag-name-state
3042         tok_state_tag_name = ->
3043                 switch c = txt.charAt(cur++)
3044                         when "\t", "\n", "\u000c", ' '
3045                                 tok_state = tok_state_before_attribute_name
3046                         when '/'
3047                                 tok_state = tok_state_self_closing_start_tag
3048                         when '>'
3049                                 tok_state = tok_state_data
3050                                 tmp = tok_cur_tag
3051                                 tok_cur_tag = null
3052                                 return tmp
3053                         when "\u0000"
3054                                 parse_error()
3055                                 tok_cur_tag.name += "\ufffd"
3056                         when '' # EOF
3057                                 parse_error()
3058                                 tok_state = tok_state_data
3059                         else
3060                                 if is_uc_alpha(c)
3061                                         tok_cur_tag.name += c.toLowerCase()
3062                                 else
3063                                         tok_cur_tag.name += c
3064                 return null
3065
3066         # 8.2.4.11 http://www.w3.org/TR/html5/syntax.html#rcdata-less-than-sign-state
3067         tok_state_rcdata_less_than_sign = ->
3068                 c = txt.charAt(cur++)
3069                 if c is '/'
3070                         temporary_buffer = ''
3071                         tok_state = tok_state_rcdata_end_tag_open
3072                         return null
3073                 # Anything else
3074                 tok_state = tok_state_rcdata
3075                 cur -= 1 # reconsume the input character
3076                 return new_character_token '<'
3077
3078         # 8.2.4.12 http://www.w3.org/TR/html5/syntax.html#rcdata-end-tag-open-state
3079         tok_state_rcdata_end_tag_open = ->
3080                 c = txt.charAt(cur++)
3081                 if is_uc_alpha(c)
3082                         tok_cur_tag = new_end_tag c.toLowerCase()
3083                         temporary_buffer += c
3084                         tok_state = tok_state_rcdata_end_tag_name
3085                         return null
3086                 if is_lc_alpha(c)
3087                         tok_cur_tag = new_end_tag c
3088                         temporary_buffer += c
3089                         tok_state = tok_state_rcdata_end_tag_name
3090                         return null
3091                 # Anything else
3092                 tok_state = tok_state_rcdata
3093                 cur -= 1 # reconsume the input character
3094                 return new_character_token "</" # fixfull separate these
3095
3096         # http://www.w3.org/TR/html5/syntax.html#appropriate-end-tag-token
3097         is_appropriate_end_tag = (t) ->
3098                 # spec says to check against "the tag name of the last start tag to
3099                 # have been emitted from this tokenizer", but this is only called from
3100                 # the various "raw" states, so it's hopefully ok to assume that
3101                 # open_els[0].name will work instead TODO: verify this after the script
3102                 # data states are implemented
3103                 debug_log "#{t.type}, #{t.name} open_els: #{serialize_els open_els, true, true}"
3104                 return t.type is TYPE_END_TAG and t.name is open_els[0].name
3105
3106         # 8.2.4.13 http://www.w3.org/TR/html5/syntax.html#rcdata-end-tag-name-state
3107         tok_state_rcdata_end_tag_name = ->
3108                 c = txt.charAt(cur++)
3109                 if c is "\t" or c is "\n" or c is "\u000c" or c is ' '
3110                         if is_appropriate_end_tag tok_cur_tag
3111                                 tok_state = tok_state_before_attribute_name
3112                                 return
3113                         # else fall through to "Anything else"
3114                 if c is '/'
3115                         if is_appropriate_end_tag tok_cur_tag
3116                                 tok_state = tok_state_self_closing_start_tag # FIXME spec typo?
3117                                 return
3118                         # else fall through to "Anything else"
3119                 if c is '>'
3120                         if is_appropriate_end_tag tok_cur_tag
3121                                 tok_state = tok_state_data
3122                                 return tok_cur_tag
3123                         # else fall through to "Anything else"
3124                 if is_uc_alpha(c)
3125                         tok_cur_tag.name += c.toLowerCase()
3126                         temporary_buffer += c
3127                         return null
3128                 if is_lc_alpha(c)
3129                         tok_cur_tag.name += c
3130                         temporary_buffer += c
3131                         return null
3132                 # Anything else
3133                 tok_state = tok_state_rcdata
3134                 cur -= 1 # reconsume the input character
3135                 return new_character_token '</' + temporary_buffer # fixfull separate these
3136
3137         # 8.2.4.14 http://www.w3.org/TR/html5/syntax.html#rawtext-less-than-sign-state
3138         tok_state_rawtext_less_than_sign = ->
3139                 c = txt.charAt(cur++)
3140                 if c is '/'
3141                         temporary_buffer = ''
3142                         tok_state = tok_state_rawtext_end_tag_open
3143                         return null
3144                 # Anything else
3145                 tok_state = tok_state_rawtext
3146                 cur -= 1 # reconsume the input character
3147                 return new_character_token '<'
3148
3149         # 8.2.4.15 http://www.w3.org/TR/html5/syntax.html#rawtext-end-tag-open-state
3150         tok_state_rawtext_end_tag_open = ->
3151                 c = txt.charAt(cur++)
3152                 if is_uc_alpha(c)
3153                         tok_cur_tag = new_end_tag c.toLowerCase()
3154                         temporary_buffer += c
3155                         tok_state = tok_state_rawtext_end_tag_name
3156                         return null
3157                 if is_lc_alpha(c)
3158                         tok_cur_tag = new_end_tag c
3159                         temporary_buffer += c
3160                         tok_state = tok_state_rawtext_end_tag_name
3161                         return null
3162                 # Anything else
3163                 tok_state = tok_state_rawtext
3164                 cur -= 1 # reconsume the input character
3165                 return new_character_token "</" # fixfull separate these
3166
3167         # 8.2.4.16 http://www.w3.org/TR/html5/syntax.html#rawtext-end-tag-name-state
3168         tok_state_rawtext_end_tag_name = ->
3169                 c = txt.charAt(cur++)
3170                 if c is "\t" or c is "\n" or c is "\u000c" or c is ' '
3171                         if is_appropriate_end_tag tok_cur_tag
3172                                 tok_state = tok_state_before_attribute_name
3173                                 return
3174                         # else fall through to "Anything else"
3175                 if c is '/'
3176                         if is_appropriate_end_tag tok_cur_tag
3177                                 tok_state = tok_state_self_closing_start_tag
3178                                 return
3179                         # else fall through to "Anything else"
3180                 if c is '>'
3181                         if is_appropriate_end_tag tok_cur_tag
3182                                 tok_state = tok_state_data
3183                                 return tok_cur_tag
3184                         # else fall through to "Anything else"
3185                 if is_uc_alpha(c)
3186                         tok_cur_tag.name += c.toLowerCase()
3187                         temporary_buffer += c
3188                         return null
3189                 if is_lc_alpha(c)
3190                         tok_cur_tag.name += c
3191                         temporary_buffer += c
3192                         return null
3193                 # Anything else
3194                 tok_state = tok_state_rawtext
3195                 cur -= 1 # reconsume the input character
3196                 return new_character_token '</' + temporary_buffer # fixfull separate these
3197
3198         # 8.2.4.17 http://www.w3.org/TR/html5/syntax.html#script-data-less-than-sign-state
3199         tok_state_script_data_less_than_sign = ->
3200                 c = txt.charAt(cur++)
3201                 if c is '/'
3202                         temporary_buffer = ''
3203                         tok_state = tok_state_script_data_end_tag_open
3204                         return
3205                 if c is '!'
3206                         tok_state = tok_state_script_data_escape_start
3207                         return new_character_token '<!' # fixfull split
3208                 # Anything else
3209                 tok_state = tok_state_script_data
3210                 cur -= 1 # Reconsume
3211                 return new_character_token '<'
3212
3213         # 8.2.4.18 http://www.w3.org/TR/html5/syntax.html#script-data-end-tag-open-state
3214         tok_state_script_data_end_tag_open = ->
3215                 c = txt.charAt(cur++)
3216                 if is_uc_alpha(c)
3217                         tok_cur_tag = new_end_tag c.toLowerCase()
3218                         temporary_buffer += c
3219                         tok_state = tok_state_script_data_end_tag_name
3220                         return
3221                 if is_lc_alpha(c)
3222                         tok_cur_tag = new_end_tag c
3223                         temporary_buffer += c
3224                         tok_state = tok_state_script_data_end_tag_name
3225                         return
3226                 # Anything else
3227                 tok_state = tok_state_script_data
3228                 cur -= 1 # Reconsume
3229                 return new_character_token '</'
3230
3231         # 8.2.4.19 http://www.w3.org/TR/html5/syntax.html#script-data-end-tag-open-state
3232         tok_state_script_data_end_tag_name = ->
3233                 c = txt.charAt(cur++)
3234                 if c is "\t" or c is "\n" or c is "\u000c" or c is ' '
3235                         if is_appropriate_end_tag tok_cur_tag
3236                                 tok_state = tok_state_before_attribute_name
3237                                 return
3238                         # fall through
3239                 if c is '/'
3240                         if is_appropriate_end_tag tok_cur_tag
3241                                 tok_state = tok_state_self_closing_start_tag
3242                                 return
3243                         # fall through
3244                 if c is '>'
3245                         if is_appropriate_end_tag tok_cur_tag
3246                                 tok_state = tok_state_data
3247                                 return tok_cur_tag
3248                         # fall through
3249                 if is_uc_alpha(c)
3250                         tok_cur_tag.name += c.toLowerCase()
3251                         temporary_buffer += c
3252                         return
3253                 if is_lc_alpha(c)
3254                         tok_cur_tag.name += c
3255                         temporary_buffer += c
3256                         return
3257                 # Anything else
3258                 tok_state = tok_state_script_data
3259                 cur -= 1 # Reconsume
3260                 return new_character_token "</#{temporary_buffer}" # fixfull split
3261
3262         # 8.2.4.20 http://www.w3.org/TR/html5/syntax.html#script-data-escape-start-state
3263         tok_state_script_data_escape_start = ->
3264                 c = txt.charAt(cur++)
3265                 if c is '-'
3266                         tok_state = tok_state_script_data_escape_start_dash
3267                         return new_character_token '-'
3268                 # Anything else
3269                 tok_state = tok_state_script_data
3270                 cur -= 1 # Reconsume
3271                 return
3272
3273         # 8.2.4.21 http://www.w3.org/TR/html5/syntax.html#script-data-escape-start-dash-state
3274         tok_state_script_data_escape_start_dash = ->
3275                 c = txt.charAt(cur++)
3276                 if c is '-'
3277                         tok_state = tok_state_script_data_escaped_dash_dash
3278                         return new_character_token '-'
3279                 # Anything else
3280                 tok_state = tok_state_script_data
3281                 cur -= 1 # Reconsume
3282                 return
3283
3284         # 8.2.4.22 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-state
3285         tok_state_script_data_escaped = ->
3286                 c = txt.charAt(cur++)
3287                 if c is '-'
3288                         tok_state = tok_state_script_data_escaped_dash
3289                         return new_character_token '-'
3290                 if c is '<'
3291                         tok_state = tok_state_script_data_escaped_less_than_sign
3292                         return
3293                 if c is "\u0000"
3294                         parse_error()
3295                         return new_character_token "\ufffd"
3296                 if c is '' # EOF
3297                         tok_state = tok_state_data
3298                         parse_error()
3299                         cur -= 1 # Reconsume
3300                         return
3301                 # Anything else
3302                 return new_character_token c
3303
3304         # 8.2.4.23 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-dash-state
3305         tok_state_script_data_escaped_dash = ->
3306                 c = txt.charAt(cur++)
3307                 if c is '-'
3308                         tok_state = tok_state_script_data_escaped_dash_dash
3309                         return new_character_token '-'
3310                 if c is '<'
3311                         tok_state = tok_state_script_data_escaped_less_than_sign
3312                         return
3313                 if c is "\u0000"
3314                         parse_error()
3315                         tok_state = tok_state_script_data_escaped
3316                         return new_character_token "\ufffd"
3317                 if c is '' # EOF
3318                         tok_state = tok_state_data
3319                         parse_error()
3320                         cur -= 1 # Reconsume
3321                         return
3322                 # Anything else
3323                 tok_state = tok_state_script_data_escaped
3324                 return new_character_token c
3325
3326         # 8.2.4.24 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-dash-dash-state
3327         tok_state_script_data_escaped_dash_dash = ->
3328                 c = txt.charAt(cur++)
3329                 if c is '-'
3330                         return new_character_token '-'
3331                 if c is '<'
3332                         tok_state = tok_state_script_data_escaped_less_than_sign
3333                         return
3334                 if c is '>'
3335                         tok_state = tok_state_script_data
3336                         return new_character_token '>'
3337                 if c is "\u0000"
3338                         parse_error()
3339                         tok_state = tok_state_script_data_escaped
3340                         return new_character_token "\ufffd"
3341                 if c is '' # EOF
3342                         parse_error()
3343                         tok_state = tok_state_data
3344                         cur -= 1 # Reconsume
3345                         return
3346                 # Anything else
3347                 tok_state = tok_state_script_data_escaped
3348                 return new_character_token c
3349
3350         # 8.2.4.25 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-less-than-sign-state
3351         tok_state_script_data_escaped_less_than_sign = ->
3352                 c = txt.charAt(cur++)
3353                 if c is '/'
3354                         temporary_buffer = ''
3355                         tok_state = tok_state_script_data_escaped_end_tag_open
3356                         return
3357                 if is_uc_alpha(c)
3358                         temporary_buffer = c.toLowerCase() # yes, really
3359                         tok_state = tok_state_script_data_double_escape_start
3360                         return new_character_token "<#{c}" # fixfull split
3361                 if is_lc_alpha(c)
3362                         temporary_buffer = c
3363                         tok_state = tok_state_script_data_double_escape_start
3364                         return new_character_token "<#{c}" # fixfull split
3365                 # Anything else
3366                 tok_state = tok_state_script_data_escaped
3367                 cur -= 1 # Reconsume
3368                 return new_character_token c
3369
3370         # 8.2.4.26 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-end-tag-open-state
3371         tok_state_script_data_escaped_end_tag_open = ->
3372                 c = txt.charAt(cur++)
3373                 if is_uc_alpha(c)
3374                         tok_cur_tag = new_end_tag c.toLowerCase()
3375                         temporary_buffer += c
3376                         tok_state = tok_state_script_data_escaped_end_tag_name
3377                         return
3378                 if is_lc_alpha(c)
3379                         tok_cur_tag = new_end_tag c
3380                         temporary_buffer += c
3381                         tok_state = tok_state_script_data_escaped_end_tag_name
3382                         return
3383                 # Anything else
3384                 tok_state = tok_state_script_data_escaped
3385                 cur -= 1 # Reconsume
3386                 return new_character_token '</' # fixfull split
3387
3388         # 8.2.4.27 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-end-tag-name-state
3389         tok_state_script_data_escaped_end_tag_name = ->
3390                 c = txt.charAt(cur++)
3391                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
3392                         if is_appropriate_end_tag tok_cur_tag
3393                                 tok_state = tok_state_before_attribute_name
3394                                 return
3395                         # fall through
3396                 if c is '/'
3397                         if is_appropriate_end_tag tok_cur_tag
3398                                 tok_state = tok_state_self_closing_start_tag
3399                                 return
3400                         # fall through
3401                 if c is '>'
3402                         if is_appropriate_end_tag tok_cur_tag
3403                                 tok_state = tok_state_data
3404                                 return tok_cur_tag
3405                         # fall through
3406                 if is_uc_alpha(c)
3407                         tok_cur_tag.name += c.toLowerCase()
3408                         temporary_buffer += c.toLowerCase()
3409                         return
3410                 if is_lc_alpha(c)
3411                         tok_cur_tag.name += c
3412                         temporary_buffer += c.toLowerCase()
3413                         return
3414                 # Anything else
3415                 tok_state = tok_state_script_data_escaped
3416                 cur -= 1 # Reconsume
3417                 return new_character_token "</#{temporary_buffer}" # fixfull split
3418
3419         # 8.2.4.28 http://www.w3.org/TR/html5/syntax.html#script-data-double-escape-start-state
3420         tok_state_script_data_double_escape_start = ->
3421                 c = txt.charAt(cur++)
3422                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' ' or c is '/' or c is '>'
3423                         if temporary_buffer is 'script'
3424                                 tok_state = tok_state_script_data_double_escaped
3425                         else
3426                                 tok_state = tok_state_script_data_escaped
3427                         return new_character_token c
3428                 if is_uc_alpha(c)
3429                         temporary_buffer += c.toLowerCase() # yes, really lowercase
3430                         return new_character_token c
3431                 if is_lc_alpha(c)
3432                         temporary_buffer += c
3433                         return new_character_token c
3434                 # Anything else
3435                 tok_state = tok_state_script_data_escaped
3436                 cur -= 1 # Reconsume
3437                 return
3438
3439         # 8.2.4.29 http://www.w3.org/TR/html5/syntax.html#script-data-double-escaped-state
3440         tok_state_script_data_double_escaped = ->
3441                 c = txt.charAt(cur++)
3442                 if c is '-'
3443                         tok_state = tok_state_script_data_double_escaped_dash
3444                         return new_character_token '-'
3445                 if c is '<'
3446                         tok_state = tok_state_script_data_double_escaped_less_than_sign
3447                         return new_character_token '<'
3448                 if c is "\u0000"
3449                         parse_error()
3450                         return new_character_token "\ufffd"
3451                 if c is '' # EOF
3452                         parse_error()
3453                         tok_state = tok_state_data
3454                         cur -= 1 # Reconsume
3455                         return
3456                 # Anything else
3457                 return new_character_token c
3458
3459         # 8.2.4.30 http://www.w3.org/TR/html5/syntax.html#script-data-double-escaped-dash-state
3460         tok_state_script_data_double_escaped_dash = ->
3461                 c = txt.charAt(cur++)
3462                 if c is '-'
3463                         tok_state = tok_state_script_data_double_escaped_dash_dash
3464                         return new_character_token '-'
3465                 if c is '<'
3466                         tok_state = tok_state_script_data_double_escaped_less_than_sign
3467                         return new_character_token '<'
3468                 if c is "\u0000"
3469                         parse_error()
3470                         tok_state = tok_state_script_data_double_escaped
3471                         return new_character_token "\ufffd"
3472                 if c is '' # EOF
3473                         parse_error()
3474                         tok_state = tok_state_data
3475                         cur -= 1 # Reconsume
3476                         return
3477                 # Anything else
3478                 tok_state = tok_state_script_data_double_escaped
3479                 return new_character_token c
3480
3481         # 8.2.4.31 http://www.w3.org/TR/html5/syntax.html#script-data-double-escaped-dash-dash-state
3482         tok_state_script_data_double_escaped_dash_dash = ->
3483                 c = txt.charAt(cur++)
3484                 if c is '-'
3485                         return new_character_token '-'
3486                 if c is '<'
3487                         tok_state = tok_state_script_data_double_escaped_less_than_sign
3488                         return new_character_token '<'
3489                 if c is '>'
3490                         tok_state = tok_state_script_data
3491                         return new_character_token '>'
3492                 if c is "\u0000"
3493                         parse_error()
3494                         tok_state = tok_state_script_data_double_escaped
3495                         return new_character_token "\ufffd"
3496                 if c is '' # EOF
3497                         parse_error()
3498                         tok_state = tok_state_data
3499                         cur -= 1 # Reconsume
3500                         return
3501                 # Anything else
3502                 tok_state = tok_state_script_data_double_escaped
3503                 return new_character_token c
3504
3505         # 8.2.4.32 http://www.w3.org/TR/html5/syntax.html#script-data-double-escaped-less-than-sign-state
3506         tok_state_script_data_double_escaped_less_than_sign = ->
3507                 c = txt.charAt(cur++)
3508                 if c is '/'
3509                         temporary_buffer = ''
3510                         tok_state = tok_state_script_data_double_escape_end
3511                         return new_character_token '/'
3512                 # Anything else
3513                 tok_state = tok_state_script_data_double_escaped
3514                 cur -= 1 # Reconsume
3515                 return
3516
3517         # 8.2.4.33 http://www.w3.org/TR/html5/syntax.html#script-data-double-escape-end-state
3518         tok_state_script_data_double_escape_end = ->
3519                 c = txt.charAt(cur++)
3520                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' ' or c is '/' or c is '>'
3521                         if temporary_buffer is 'script'
3522                                 tok_state = tok_state_script_data_escaped
3523                         else
3524                                 tok_state = tok_state_script_data_double_escaped
3525                         return new_character_token c
3526                 if is_uc_alpha(c)
3527                         temporary_buffer += c.toLowerCase() # yes, really lowercase
3528                         return new_character_token c
3529                 if is_lc_alpha(c)
3530                         temporary_buffer += c
3531                         return new_character_token c
3532                 # Anything else
3533                 tok_state = tok_state_script_data_double_escaped
3534                 cur -= 1 # Reconsume
3535                 return
3536
3537         # 8.2.4.34 http://www.w3.org/TR/html5/syntax.html#before-attribute-name-state
3538         tok_state_before_attribute_name = ->
3539                 attr_name = null
3540                 switch c = txt.charAt(cur++)
3541                         when "\t", "\n", "\u000c", ' '
3542                                 return null
3543                         when '/'
3544                                 tok_state = tok_state_self_closing_start_tag
3545                                 return null
3546                         when '>'
3547                                 tok_state = tok_state_data
3548                                 tmp = tok_cur_tag
3549                                 tok_cur_tag = null
3550                                 return tmp
3551                         when "\u0000"
3552                                 parse_error()
3553                                 attr_name = "\ufffd"
3554                         when '"', "'", '<', '='
3555                                 parse_error()
3556                                 attr_name = c
3557                         when '' # EOF
3558                                 parse_error()
3559                                 tok_state = tok_state_data
3560                         else
3561                                 if is_uc_alpha(c)
3562                                         attr_name = c.toLowerCase()
3563                                 else
3564                                         attr_name = c
3565                 if attr_name?
3566                         tok_cur_tag.attrs_a.unshift [attr_name, '']
3567                         tok_state = tok_state_attribute_name
3568                 return null
3569
3570         # 8.2.4.35 http://www.w3.org/TR/html5/syntax.html#attribute-name-state
3571         tok_state_attribute_name = ->
3572                 switch c = txt.charAt(cur++)
3573                         when "\t", "\n", "\u000c", ' '
3574                                 tok_state = tok_state_after_attribute_name
3575                         when '/'
3576                                 tok_state = tok_state_self_closing_start_tag
3577                         when '='
3578                                 tok_state = tok_state_before_attribute_value
3579                         when '>'
3580                                 tok_state = tok_state_data
3581                                 tmp = tok_cur_tag
3582                                 tok_cur_tag = null
3583                                 return tmp
3584                         when "\u0000"
3585                                 parse_error()
3586                                 tok_cur_tag.attrs_a[0][0] += "\ufffd"
3587                         when '"', "'", '<'
3588                                 parse_error()
3589                                 tok_cur_tag.attrs_a[0][0] += c
3590                         when '' # EOF
3591                                 parse_error()
3592                                 tok_state = tok_state_data
3593                         else
3594                                 if is_uc_alpha(c)
3595                                         tok_cur_tag.attrs_a[0][0] += c.toLowerCase()
3596                                 else
3597                                         tok_cur_tag.attrs_a[0][0] += c
3598                 return null
3599
3600         # 8.2.4.36 http://www.w3.org/TR/html5/syntax.html#after-attribute-name-state
3601         tok_state_after_attribute_name = ->
3602                 c = txt.charAt(cur++)
3603                 if c is "\t" or c is "\n" or c is "\u000c" or c is ' '
3604                         return
3605                 if c is '/'
3606                         tok_state = tok_state_self_closing_start_tag
3607                         return
3608                 if c is '='
3609                         tok_state = tok_state_before_attribute_value
3610                         return
3611                 if c is '>'
3612                         tok_state = tok_state_data
3613                         return
3614                 if is_uc_alpha(c)
3615                         tok_cur_tag.attrs_a.unshift [c.toLowerCase(), '']
3616                         tok_state = tok_state_attribute_name
3617                         return
3618                 if c is "\u0000"
3619                         parse_error()
3620                         tok_cur_tag.attrs_a.unshift ["\ufffd", '']
3621                         tok_state = tok_state_attribute_name
3622                         return
3623                 if c is '' # EOF
3624                         parse_error()
3625                         tok_state = tok_state_data
3626                         cur -= 1 # reconsume
3627                         return
3628                 if c is '"' or c is "'" or c is '<'
3629                         parse_error()
3630                         # fall through to Anything else
3631                 # Anything else
3632                 tok_cur_tag.attrs_a.unshift [c, '']
3633                 tok_state = tok_state_attribute_name
3634
3635         # 8.2.4.37 http://www.w3.org/TR/html5/syntax.html#before-attribute-value-state
3636         tok_state_before_attribute_value = ->
3637                 switch c = txt.charAt(cur++)
3638                         when "\t", "\n", "\u000c", ' '
3639                                 return null
3640                         when '"'
3641                                 tok_state = tok_state_attribute_value_double_quoted
3642                         when '&'
3643                                 tok_state = tok_state_attribute_value_unquoted
3644                                 cur -= 1
3645                         when "'"
3646                                 tok_state = tok_state_attribute_value_single_quoted
3647                         when "\u0000"
3648                                 # Parse error
3649                                 tok_cur_tag.attrs_a[0][1] += "\ufffd"
3650                                 tok_state = tok_state_attribute_value_unquoted
3651                         when '>'
3652                                 # Parse error
3653                                 tok_state = tok_state_data
3654                                 tmp = tok_cur_tag
3655                                 tok_cur_tag = null
3656                                 return tmp
3657                         when '' # EOF
3658                                 parse_error()
3659                                 tok_state = tok_state_data
3660                         else
3661                                 tok_cur_tag.attrs_a[0][1] += c
3662                                 tok_state = tok_state_attribute_value_unquoted
3663                 return null
3664
3665         # 8.2.4.38 http://www.w3.org/TR/html5/syntax.html#attribute-value-(double-quoted)-state
3666         tok_state_attribute_value_double_quoted = ->
3667                 switch c = txt.charAt(cur++)
3668                         when '"'
3669                                 tok_state = tok_state_after_attribute_value_quoted
3670                         when '&'
3671                                 tok_cur_tag.attrs_a[0][1] += parse_character_reference '"', true
3672                         when "\u0000"
3673                                 # Parse error
3674                                 tok_cur_tag.attrs_a[0][1] += "\ufffd"
3675                         when '' # EOF
3676                                 parse_error()
3677                                 tok_state = tok_state_data
3678                         else
3679                                 tok_cur_tag.attrs_a[0][1] += c
3680                 return null
3681
3682         # 8.2.4.39 http://www.w3.org/TR/html5/syntax.html#attribute-value-(single-quoted)-state
3683         tok_state_attribute_value_single_quoted = ->
3684                 switch c = txt.charAt(cur++)
3685                         when "'"
3686                                 tok_state = tok_state_after_attribute_value_quoted
3687                         when '&'
3688                                 tok_cur_tag.attrs_a[0][1] += parse_character_reference "'", true
3689                         when "\u0000"
3690                                 # Parse error
3691                                 tok_cur_tag.attrs_a[0][1] += "\ufffd"
3692                         when '' # EOF
3693                                 parse_error()
3694                                 tok_state = tok_state_data
3695                         else
3696                                 tok_cur_tag.attrs_a[0][1] += c
3697                 return null
3698
3699         # 8.2.4.40 http://www.w3.org/TR/html5/syntax.html#attribute-value-(unquoted)-state
3700         tok_state_attribute_value_unquoted = ->
3701                 switch c = txt.charAt(cur++)
3702                         when "\t", "\n", "\u000c", ' '
3703                                 tok_state = tok_state_before_attribute_name
3704                         when '&'
3705                                 tok_cur_tag.attrs_a[0][1] += parse_character_reference '>', true
3706                         when '>'
3707                                 tok_state = tok_state_data
3708                                 tmp = tok_cur_tag
3709                                 tok_cur_tag = null
3710                                 return tmp
3711                         when "\u0000"
3712                                 tok_cur_tag.attrs_a[0][1] += "\ufffd"
3713                         when '' # EOF
3714                                 parse_error()
3715                                 tok_state = tok_state_data
3716                         else
3717                                 # Parse Error if ', <, = or ` (backtick)
3718                                 tok_cur_tag.attrs_a[0][1] += c
3719                 return null
3720
3721         # 8.2.4.42 http://www.w3.org/TR/html5/syntax.html#after-attribute-value-(quoted)-state
3722         tok_state_after_attribute_value_quoted = ->
3723                 switch c = txt.charAt(cur++)
3724                         when "\t", "\n", "\u000c", ' '
3725                                 tok_state = tok_state_before_attribute_name
3726                         when '/'
3727                                 tok_state = tok_state_self_closing_start_tag
3728                         when '>'
3729                                 tok_state = tok_state_data
3730                                 tmp = tok_cur_tag
3731                                 tok_cur_tag = null
3732                                 return tmp
3733                         when '' # EOF
3734                                 parse_error()
3735                                 tok_state = tok_state_data
3736                         else
3737                                 # Parse Error
3738                                 tok_state = tok_state_before_attribute_name
3739                                 cur -= 1 # we didn't handle that char
3740                 return null
3741
3742         # 8.2.4.43 http://www.w3.org/TR/html5/syntax.html#self-closing-start-tag-state
3743         tok_state_self_closing_start_tag = ->
3744                 c = txt.charAt(cur++)
3745                 if c is '>'
3746                         tok_cur_tag.flag 'self-closing'
3747                         tok_state = tok_state_data
3748                         return tok_cur_tag
3749                 if c is ''
3750                         parse_error()
3751                         tok_state = tok_state_data
3752                         cur -= 1 # Reconsume
3753                         return
3754                 # Anything else
3755                 parse_error()
3756                 tok_state = tok_state_before_attribute_name
3757                 cur -= 1 # Reconsume
3758                 return
3759
3760         # 8.2.4.44 http://www.w3.org/TR/html5/syntax.html#bogus-comment-state
3761         # WARNING: put a comment token in tok_cur_tag before setting this state
3762         tok_state_bogus_comment = ->
3763                 next_gt = txt.indexOf '>', cur
3764                 if next_gt is -1
3765                         val = txt.substr cur
3766                         cur = txt.length
3767                 else
3768                         val = txt.substr cur, (next_gt - cur)
3769                         cur = next_gt + 1
3770                 val = val.replace(new RegExp("\u0000", 'g'), "\ufffd")
3771                 tok_cur_tag.text += val
3772                 tok_state = tok_state_data
3773                 return tok_cur_tag
3774
3775         # 8.2.4.45 http://www.w3.org/TR/html5/syntax.html#markup-declaration-open-state
3776         tok_state_markup_declaration_open = ->
3777                 if txt.substr(cur, 2) is '--'
3778                         cur += 2
3779                         tok_cur_tag = new_comment_token ''
3780                         tok_state = tok_state_comment_start
3781                         return
3782                 if txt.substr(cur, 7).toLowerCase() is 'doctype'
3783                         cur += 7
3784                         tok_state = tok_state_doctype
3785                         return
3786                 acn = adjusted_current_node()
3787                 if acn and acn.namespace isnt NS_HTML and txt.substr(cur, 7) is '[CDATA['
3788                         cur += 7
3789                         tok_state = tok_state_cdata_section
3790                         return
3791                 # Otherwise
3792                 parse_error()
3793                 tok_cur_tag = new_comment_token ''
3794                 tok_state = tok_state_bogus_comment
3795                 return
3796
3797         # 8.2.4.46 http://www.w3.org/TR/html5/syntax.html#comment-start-state
3798         tok_state_comment_start = ->
3799                 switch c = txt.charAt(cur++)
3800                         when '-'
3801                                 tok_state = tok_state_comment_start_dash
3802                         when "\u0000"
3803                                 parse_error()
3804                                 tok_state = tok_state_comment
3805                                 return new_character_token "\ufffd"
3806                         when '>'
3807                                 parse_error()
3808                                 tok_state = tok_state_data
3809                                 return tok_cur_tag
3810                         when '' # EOF
3811                                 parse_error()
3812                                 tok_state = tok_state_data
3813                                 cur -= 1 # Reconsume
3814                                 return tok_cur_tag
3815                         else
3816                                 tok_cur_tag.text += c
3817                                 tok_state = tok_state_comment
3818                 return null
3819
3820         # 8.2.4.47 http://www.w3.org/TR/html5/syntax.html#comment-start-dash-state
3821         tok_state_comment_start_dash = ->
3822                 switch c = txt.charAt(cur++)
3823                         when '-'
3824                                 tok_state = tok_state_comment_end
3825                         when "\u0000"
3826                                 parse_error()
3827                                 tok_cur_tag.text += "-\ufffd"
3828                                 tok_state = tok_state_comment
3829                         when '>'
3830                                 parse_error()
3831                                 tok_state = tok_state_data
3832                                 return tok_cur_tag
3833                         when '' # EOF
3834                                 parse_error()
3835                                 tok_state = tok_state_data
3836                                 cur -= 1 # Reconsume
3837                                 return tok_cur_tag
3838                         else
3839                                 tok_cur_tag.text += "-#{c}"
3840                                 tok_state = tok_state_comment
3841                 return null
3842
3843         # 8.2.4.48 http://www.w3.org/TR/html5/syntax.html#comment-state
3844         tok_state_comment = ->
3845                 switch c = txt.charAt(cur++)
3846                         when '-'
3847                                 tok_state = tok_state_comment_end_dash
3848                         when "\u0000"
3849                                 parse_error()
3850                                 tok_cur_tag.text += "\ufffd"
3851                         when '' # EOF
3852                                 parse_error()
3853                                 tok_state = tok_state_data
3854                                 cur -= 1 # Reconsume
3855                                 return tok_cur_tag
3856                         else
3857                                 tok_cur_tag.text += c
3858                 return null
3859
3860         # 8.2.4.49 http://www.w3.org/TR/html5/syntax.html#comment-end-dash-state
3861         tok_state_comment_end_dash = ->
3862                 switch c = txt.charAt(cur++)
3863                         when '-'
3864                                 tok_state = tok_state_comment_end
3865                         when "\u0000"
3866                                 parse_error()
3867                                 tok_cur_tag.text += "-\ufffd"
3868                                 tok_state = tok_state_comment
3869                         when '' # EOF
3870                                 parse_error()
3871                                 tok_state = tok_state_data
3872                                 cur -= 1 # Reconsume
3873                                 return tok_cur_tag
3874                         else
3875                                 tok_cur_tag.text += "-#{c}"
3876                                 tok_state = tok_state_comment
3877                 return null
3878
3879         # 8.2.4.50 http://www.w3.org/TR/html5/syntax.html#comment-end-state
3880         tok_state_comment_end = ->
3881                 switch c = txt.charAt(cur++)
3882                         when '>'
3883                                 tok_state = tok_state_data
3884                                 return tok_cur_tag
3885                         when "\u0000"
3886                                 parse_error()
3887                                 tok_cur_tag.text += "--\ufffd"
3888                                 tok_state = tok_state_comment
3889                         when '!'
3890                                 parse_error()
3891                                 tok_state = tok_state_comment_end_bang
3892                         when '-'
3893                                 parse_error()
3894                                 tok_cur_tag.text += '-'
3895                         when '' # EOF
3896                                 parse_error()
3897                                 tok_state = tok_state_data
3898                                 cur -= 1 # Reconsume
3899                                 return tok_cur_tag
3900                         else
3901                                 parse_error()
3902                                 tok_cur_tag.text += "--#{c}"
3903                                 tok_state = tok_state_comment
3904                 return null
3905
3906         # 8.2.4.51 http://www.w3.org/TR/html5/syntax.html#comment-end-bang-state
3907         tok_state_comment_end_bang = ->
3908                 switch c = txt.charAt(cur++)
3909                         when '-'
3910                                 tok_cur_tag.text += "--!#{c}"
3911                                 tok_state = tok_state_comment_end_dash
3912                         when '>'
3913                                 tok_state = tok_state_data
3914                                 return tok_cur_tag
3915                         when "\u0000"
3916                                 parse_error()
3917                                 tok_cur_tag.text += "--!\ufffd"
3918                                 tok_state = tok_state_comment
3919                         when '' # EOF
3920                                 parse_error()
3921                                 tok_state = tok_state_data
3922                                 cur -= 1 # Reconsume
3923                                 return tok_cur_tag
3924                         else
3925                                 tok_cur_tag.text += "--!#{c}"
3926                                 tok_state = tok_state_comment
3927                 return null
3928
3929         # 8.2.4.52 http://www.w3.org/TR/html5/syntax.html#doctype-state
3930         tok_state_doctype = ->
3931                 switch c = txt.charAt(cur++)
3932                         when "\t", "\u000a", "\u000c", ' '
3933                                 tok_state = tok_state_before_doctype_name
3934                         when '' # EOF
3935                                 parse_error()
3936                                 tok_state = tok_state_data
3937                                 el = new_doctype_token ''
3938                                 el.flag 'force-quirks', true
3939                                 cur -= 1 # Reconsume
3940                                 return el
3941                         else
3942                                 parse_error()
3943                                 tok_state = tok_state_before_doctype_name
3944                                 cur -= 1 # Reconsume
3945                 return null
3946
3947         # 8.2.4.52 http://www.w3.org/TR/html5/syntax.html#doctype-state
3948         tok_state_before_doctype_name = ->
3949                 c = txt.charAt(cur++)
3950                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
3951                         return
3952                 if is_uc_alpha(c)
3953                         tok_cur_tag = new_doctype_token c.toLowerCase()
3954                         tok_state = tok_state_doctype_name
3955                         return
3956                 if c is "\u0000"
3957                         parse_error()
3958                         tok_cur_tag = new_doctype_token "\ufffd"
3959                         tok_state = tok_state_doctype_name
3960                         return
3961                 if c is '>'
3962                         parse_error()
3963                         el = new_doctype_token ''
3964                         el.flag 'force-quirks', true
3965                         tok_state = tok_state_data
3966                         return el
3967                 if c is '' # EOF
3968                         parse_error()
3969                         tok_state = tok_state_data
3970                         el = new_doctype_token ''
3971                         el.flag 'force-quirks', true
3972                         cur -= 1 # Reconsume
3973                         return el
3974                 # Anything else
3975                 tok_cur_tag = new_doctype_token c
3976                 tok_state = tok_state_doctype_name
3977                 return null
3978
3979         # 8.2.4.54 http://www.w3.org/TR/html5/syntax.html#doctype-name-state
3980         tok_state_doctype_name = ->
3981                 c = txt.charAt(cur++)
3982                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
3983                         tok_state = tok_state_after_doctype_name
3984                         return
3985                 if c is '>'
3986                         tok_state = tok_state_data
3987                         return tok_cur_tag
3988                 if is_uc_alpha(c)
3989                         tok_cur_tag.name += c.toLowerCase()
3990                         return
3991                 if c is "\u0000"
3992                         parse_error()
3993                         tok_cur_tag.name += "\ufffd"
3994                         return
3995                 if c is '' # EOF
3996                         parse_error()
3997                         tok_state = tok_state_data
3998                         tok_cur_tag.flag 'force-quirks', true
3999                         cur -= 1 # Reconsume
4000                         return tok_cur_tag
4001                 # Anything else
4002                 tok_cur_tag.name += c
4003                 return null
4004
4005         # 8.2.4.55 http://www.w3.org/TR/html5/syntax.html#after-doctype-name-state
4006         tok_state_after_doctype_name = ->
4007                 c = txt.charAt(cur++)
4008                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4009                         return
4010                 if c is '>'
4011                         tok_state = tok_state_data
4012                         return tok_cur_tag
4013                 if c is '' # EOF
4014                         parse_error()
4015                         tok_state = tok_state_data
4016                         tok_cur_tag.flag 'force-quirks', true
4017                         cur -= 1 # Reconsume
4018                         return tok_cur_tag
4019                 # Anything else
4020                 if txt.substr(cur - 1, 6).toLowerCase() is 'public'
4021                         cur += 5
4022                         tok_state = tok_state_after_doctype_public_keyword
4023                         return
4024                 if txt.substr(cur - 1, 6).toLowerCase() is 'system'
4025                         cur += 5
4026                         tok_state = tok_state_after_doctype_system_keyword
4027                         return
4028                 parse_error()
4029                 tok_cur_tag.flag 'force-quirks', true
4030                 tok_state = tok_state_bogus_doctype
4031                 return null
4032
4033         # 8.2.4.56 http://www.w3.org/TR/html5/syntax.html#after-doctype-public-keyword-state
4034         tok_state_after_doctype_public_keyword = ->
4035                 c = txt.charAt(cur++)
4036                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4037                         tok_state = tok_state_before_doctype_public_identifier
4038                         return
4039                 if c is '"'
4040                         parse_error()
4041                         tok_cur_tag.public_identifier = ''
4042                         tok_state = tok_state_doctype_public_identifier_double_quoted
4043                         return
4044                 if c is "'"
4045                         parse_error()
4046                         tok_cur_tag.public_identifier = ''
4047                         tok_state = tok_state_doctype_public_identifier_single_quoted
4048                         return
4049                 if c is '>'
4050                         parse_error()
4051                         tok_cur_tag.flag 'force-quirks', true
4052                         tok_state = tok_state_data
4053                         return tok_cur_tag
4054                 if c is '' # EOF
4055                         parse_error()
4056                         tok_state = tok_state_data
4057                         tok_cur_tag.flag 'force-quirks', true
4058                         cur -= 1 # Reconsume
4059                         return tok_cur_tag
4060                 # Anything else
4061                 parse_error()
4062                 tok_cur_tag.flag 'force-quirks', true
4063                 tok_state = tok_state_bogus_doctype
4064                 return null
4065
4066         # 8.2.4.57 http://www.w3.org/TR/html5/syntax.html#before-doctype-public-identifier-state
4067         tok_state_before_doctype_public_identifier = ->
4068                 c = txt.charAt(cur++)
4069                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4070                         return
4071                 if c is '"'
4072                         parse_error()
4073                         tok_cur_tag.public_identifier = ''
4074                         tok_state = tok_state_doctype_public_identifier_double_quoted
4075                         return
4076                 if c is "'"
4077                         parse_error()
4078                         tok_cur_tag.public_identifier = ''
4079                         tok_state = tok_state_doctype_public_identifier_single_quoted
4080                         return
4081                 if c is '>'
4082                         parse_error()
4083                         tok_cur_tag.flag 'force-quirks', true
4084                         tok_state = tok_state_data
4085                         return tok_cur_tag
4086                 if c is '' # EOF
4087                         parse_error()
4088                         tok_state = tok_state_data
4089                         tok_cur_tag.flag 'force-quirks', true
4090                         cur -= 1 # Reconsume
4091                         return tok_cur_tag
4092                 # Anything else
4093                 parse_error()
4094                 tok_cur_tag.flag 'force-quirks', true
4095                 tok_state = tok_state_bogus_doctype
4096                 return null
4097
4098
4099         # 8.2.4.58 http://www.w3.org/TR/html5/syntax.html#doctype-public-identifier-(double-quoted)-state
4100         tok_state_doctype_public_identifier_double_quoted = ->
4101                 c = txt.charAt(cur++)
4102                 if c is '"'
4103                         tok_state = tok_state_after_doctype_public_identifier
4104                         return
4105                 if c is "\u0000"
4106                         parse_error()
4107                         tok_cur_tag.public_identifier += "\ufffd"
4108                         return
4109                 if c is '>'
4110                         parse_error()
4111                         tok_cur_tag.flag 'force-quirks', true
4112                         tok_state = tok_state_data
4113                         return tok_cur_tag
4114                 if c is '' # EOF
4115                         parse_error()
4116                         tok_state = tok_state_data
4117                         tok_cur_tag.flag 'force-quirks', true
4118                         cur -= 1 # Reconsume
4119                         return tok_cur_tag
4120                 # Anything else
4121                 tok_cur_tag.public_identifier += c
4122                 return null
4123
4124         # 8.2.4.59 http://www.w3.org/TR/html5/syntax.html#doctype-public-identifier-(single-quoted)-state
4125         tok_state_doctype_public_identifier_single_quoted = ->
4126                 c = txt.charAt(cur++)
4127                 if c is "'"
4128                         tok_state = tok_state_after_doctype_public_identifier
4129                         return
4130                 if c is "\u0000"
4131                         parse_error()
4132                         tok_cur_tag.public_identifier += "\ufffd"
4133                         return
4134                 if c is '>'
4135                         parse_error()
4136                         tok_cur_tag.flag 'force-quirks', true
4137                         tok_state = tok_state_data
4138                         return tok_cur_tag
4139                 if c is '' # EOF
4140                         parse_error()
4141                         tok_state = tok_state_data
4142                         tok_cur_tag.flag 'force-quirks', true
4143                         cur -= 1 # Reconsume
4144                         return tok_cur_tag
4145                 # Anything else
4146                 tok_cur_tag.public_identifier += c
4147                 return null
4148
4149         # 8.2.4.60 http://www.w3.org/TR/html5/syntax.html#after-doctype-public-identifier-state
4150         tok_state_after_doctype_public_identifier = ->
4151                 c = txt.charAt(cur++)
4152                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4153                         tok_state = tok_state_between_doctype_public_and_system_identifiers
4154                         return
4155                 if c is '>'
4156                         tok_state = tok_state_data
4157                         return tok_cur_tag
4158                 if c is '"'
4159                         parse_error()
4160                         tok_cur_tag.system_identifier = ''
4161                         tok_state = tok_state_doctype_system_identifier_double_quoted
4162                         return
4163                 if c is "'"
4164                         parse_error()
4165                         tok_cur_tag.system_identifier = ''
4166                         tok_state = tok_state_doctype_system_identifier_single_quoted
4167                         return
4168                 if c is '' # EOF
4169                         parse_error()
4170                         tok_state = tok_state_data
4171                         tok_cur_tag.flag 'force-quirks', true
4172                         cur -= 1 # Reconsume
4173                         return tok_cur_tag
4174                 # Anything else
4175                 parse_error()
4176                 tok_cur_tag.flag 'force-quirks', true
4177                 tok_state = tok_state_bogus_doctype
4178                 return null
4179
4180         # 8.2.4.61 http://www.w3.org/TR/html5/syntax.html#between-doctype-public-and-system-identifiers-state
4181         tok_state_between_doctype_public_and_system_identifiers = ->
4182                 c = txt.charAt(cur++)
4183                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4184                         return
4185                 if c is '>'
4186                         tok_state = tok_state_data
4187                         return tok_cur_tag
4188                 if c is '"'
4189                         parse_error()
4190                         tok_cur_tag.system_identifier = ''
4191                         tok_state = tok_state_doctype_system_identifier_double_quoted
4192                         return
4193                 if c is "'"
4194                         parse_error()
4195                         tok_cur_tag.system_identifier = ''
4196                         tok_state = tok_state_doctype_system_identifier_single_quoted
4197                         return
4198                 if c is '' # EOF
4199                         parse_error()
4200                         tok_state = tok_state_data
4201                         tok_cur_tag.flag 'force-quirks', true
4202                         cur -= 1 # Reconsume
4203                         return tok_cur_tag
4204                 # Anything else
4205                 parse_error()
4206                 tok_cur_tag.flag 'force-quirks', true
4207                 tok_state = tok_state_bogus_doctype
4208                 return null
4209
4210         # 8.2.4.62 http://www.w3.org/TR/html5/syntax.html#after-doctype-system-keyword-state
4211         tok_state_after_doctype_system_keyword = ->
4212                 c = txt.charAt(cur++)
4213                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4214                         tok_state = tok_state_before_doctype_system_identifier
4215                         return
4216                 if c is '"'
4217                         parse_error()
4218                         tok_cur_tag.system_identifier = ''
4219                         tok_state = tok_state_doctype_system_identifier_double_quoted
4220                         return
4221                 if c is "'"
4222                         parse_error()
4223                         tok_cur_tag.system_identifier = ''
4224                         tok_state = tok_state_doctype_system_identifier_single_quoted
4225                         return
4226                 if c is '>'
4227                         parse_error()
4228                         tok_cur_tag.flag 'force-quirks', true
4229                         tok_state = tok_state_data
4230                         return tok_cur_tag
4231                 if c is '' # EOF
4232                         parse_error()
4233                         tok_state = tok_state_data
4234                         tok_cur_tag.flag 'force-quirks', true
4235                         cur -= 1 # Reconsume
4236                         return tok_cur_tag
4237                 # Anything else
4238                 parse_error()
4239                 tok_cur_tag.flag 'force-quirks', true
4240                 tok_state = tok_state_bogus_doctype
4241                 return null
4242
4243         # 8.2.4.63 http://www.w3.org/TR/html5/syntax.html#before-doctype-system-identifier-state
4244         tok_state_before_doctype_system_identifier = ->
4245                 c = txt.charAt(cur++)
4246                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4247                         return
4248                 if c is '"'
4249                         tok_cur_tag.system_identifier = ''
4250                         tok_state = tok_state_doctype_system_identifier_double_quoted
4251                         return
4252                 if c is "'"
4253                         tok_cur_tag.system_identifier = ''
4254                         tok_state = tok_state_doctype_system_identifier_single_quoted
4255                         return
4256                 if c is '>'
4257                         parse_error()
4258                         tok_cur_tag.flag 'force-quirks', true
4259                         tok_state = tok_state_data
4260                         return tok_cur_tag
4261                 if c is '' # EOF
4262                         parse_error()
4263                         tok_state = tok_state_data
4264                         tok_cur_tag.flag 'force-quirks', true
4265                         cur -= 1 # Reconsume
4266                         return tok_cur_tag
4267                 # Anything else
4268                 parse_error()
4269                 tok_cur_tag.flag 'force-quirks', true
4270                 tok_state = tok_state_bogus_doctype
4271                 return null
4272
4273         # 8.2.4.64 http://www.w3.org/TR/html5/syntax.html#doctype-system-identifier-(double-quoted)-state
4274         tok_state_doctype_system_identifier_double_quoted = ->
4275                 c = txt.charAt(cur++)
4276                 if c is '"'
4277                         tok_state = tok_state_after_doctype_system_identifier
4278                         return
4279                 if c is "\u0000"
4280                         parse_error()
4281                         tok_cur_tag.system_identifier += "\ufffd"
4282                         return
4283                 if c is '>'
4284                         parse_error()
4285                         tok_cur_tag.flag 'force-quirks', true
4286                         tok_state = tok_state_data
4287                         return tok_cur_tag
4288                 if c is '' # EOF
4289                         parse_error()
4290                         tok_state = tok_state_data
4291                         tok_cur_tag.flag 'force-quirks', true
4292                         cur -= 1 # Reconsume
4293                         return tok_cur_tag
4294                 # Anything else
4295                 tok_cur_tag.system_identifier += c
4296                 return null
4297
4298         # 8.2.4.65 http://www.w3.org/TR/html5/syntax.html#doctype-system-identifier-(single-quoted)-state
4299         tok_state_doctype_system_identifier_single_quoted = ->
4300                 c = txt.charAt(cur++)
4301                 if c is "'"
4302                         tok_state = tok_state_after_doctype_system_identifier
4303                         return
4304                 if c is "\u0000"
4305                         parse_error()
4306                         tok_cur_tag.system_identifier += "\ufffd"
4307                         return
4308                 if c is '>'
4309                         parse_error()
4310                         tok_cur_tag.flag 'force-quirks', true
4311                         tok_state = tok_state_data
4312                         return tok_cur_tag
4313                 if c is '' # EOF
4314                         parse_error()
4315                         tok_state = tok_state_data
4316                         tok_cur_tag.flag 'force-quirks', true
4317                         cur -= 1 # Reconsume
4318                         return tok_cur_tag
4319                 # Anything else
4320                 tok_cur_tag.system_identifier += c
4321                 return null
4322
4323         # 8.2.4.66 http://www.w3.org/TR/html5/syntax.html#after-doctype-system-identifier-state
4324         tok_state_after_doctype_system_identifier = ->
4325                 c = txt.charAt(cur++)
4326                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4327                         return
4328                 if c is '>'
4329                         tok_state = tok_state_data
4330                         return tok_cur_tag
4331                 if c is '' # EOF
4332                         parse_error()
4333                         tok_state = tok_state_data
4334                         tok_cur_tag.flag 'force-quirks', true
4335                         cur -= 1 # Reconsume
4336                         return tok_cur_tag
4337                 # Anything else
4338                 parse_error()
4339                 # do _not_ tok_cur_tag.flag 'force-quirks', true
4340                 tok_state = tok_state_bogus_doctype
4341                 return null
4342
4343         # 8.2.4.67 http://www.w3.org/TR/html5/syntax.html#bogus-doctype-state
4344         tok_state_bogus_doctype = ->
4345                 c = txt.charAt(cur++)
4346                 if c is '>'
4347                         tok_state = tok_state_data
4348                         return tok_cur_tag
4349                 if c is '' # EOF
4350                         tok_state = tok_state_data
4351                         cur -= 1 # Reconsume
4352                         return tok_cur_tag
4353                 # Anything else
4354                 return null
4355
4356         # 8.2.4.68 http://www.w3.org/TR/html5/syntax.html#cdata-section-state
4357         tok_state_cdata_section = ->
4358                 tok_state = tok_state_data
4359                 next_gt = txt.indexOf ']]>', cur
4360                 if next_gt is -1
4361                         val = txt.substr cur
4362                         cur = txt.length
4363                 else
4364                         val = txt.substr cur, (next_gt - cur)
4365                         cur = next_gt + 3
4366                 return new_character_token val # fixfull split
4367
4368         # 8.2.4.69 http://www.w3.org/TR/html5/syntax.html#consume-a-character-reference
4369         # Don't set this as a state, just call it
4370         # returns a string (NOT a text node)
4371         parse_character_reference = (allowed_char = null, in_attr = false) ->
4372                 if cur >= txt.length
4373                         return '&'
4374                 switch c = txt.charAt(cur)
4375                         when "\t", "\n", "\u000c", ' ', '<', '&', '', allowed_char
4376                                 # explicitly not a parse error
4377                                 return '&'
4378                         when ';'
4379                                 # there has to be "one or more" alnums between & and ; to be a parse error
4380                                 return '&'
4381                         when '#'
4382                                 if cur + 1 >= txt.length
4383                                         return '&'
4384                                 if txt.charAt(cur + 1).toLowerCase() is 'x'
4385                                         base = 16
4386                                         charset = hex_chars
4387                                         start = cur + 2
4388                                 else
4389                                         charset = digits
4390                                         start = cur + 1
4391                                         base = 10
4392                                 i = 0
4393                                 while start + i < txt.length and charset.indexOf(txt.charAt(start + i)) > -1
4394                                         i += 1
4395                                 if i is 0
4396                                         return '&'
4397                                 cur = start + i
4398                                 if txt.charAt(start + i) is ';'
4399                                         cur += 1
4400                                 else
4401                                         parse_error()
4402                                 code_point = txt.substr(start, i)
4403                                 while code_point.charAt(0) is '0' and code_point.length > 1
4404                                         code_point = code_point.substr 1
4405                                 code_point = parseInt(code_point, base)
4406                                 if unicode_fixes[code_point]?
4407                                         parse_error()
4408                                         return unicode_fixes[code_point]
4409                                 else
4410                                         if (code_point >= 0xd800 and code_point <= 0xdfff) or code_point > 0x10ffff
4411                                                 parse_error()
4412                                                 return "\ufffd"
4413                                         else
4414                                                 if (code_point >= 0x0001 and code_point <= 0x0008) or (code_point >= 0x000D and code_point <= 0x001F) or (code_point >= 0x007F and code_point <= 0x009F) or (code_point >= 0xFDD0 and code_point <= 0xFDEF) or code_point is 0x000B or code_point is 0xFFFE or code_point is 0xFFFF or code_point is 0x1FFFE or code_point is 0x1FFFF or code_point is 0x2FFFE or code_point is 0x2FFFF or code_point is 0x3FFFE or code_point is 0x3FFFF or code_point is 0x4FFFE or code_point is 0x4FFFF or code_point is 0x5FFFE or code_point is 0x5FFFF or code_point is 0x6FFFE or code_point is 0x6FFFF or code_point is 0x7FFFE or code_point is 0x7FFFF or code_point is 0x8FFFE or code_point is 0x8FFFF or code_point is 0x9FFFE or code_point is 0x9FFFF or code_point is 0xAFFFE or code_point is 0xAFFFF or code_point is 0xBFFFE or code_point is 0xBFFFF or code_point is 0xCFFFE or code_point is 0xCFFFF or code_point is 0xDFFFE or code_point is 0xDFFFF or code_point is 0xEFFFE or code_point is 0xEFFFF or code_point is 0xFFFFE or code_point is 0xFFFFF or code_point is 0x10FFFE or code_point is 0x10FFFF
4415                                                         parse_error()
4416                                                 return from_code_point code_point
4417                                 return
4418                         else
4419                                 for i in [0...31]
4420                                         if alnum.indexOf(txt.charAt(cur + i)) is -1
4421                                                 break
4422                                 if i is 0
4423                                         # exit early, because parse_error() below needs at least one alnum
4424                                         return '&'
4425                                 if txt.charAt(cur + i) is ';'
4426                                         i += 1 # include ';' terminator in value
4427                                         decoded = decode_named_char_ref txt.substr(cur, i)
4428                                         if decoded?
4429                                                 cur += i
4430                                                 return decoded
4431                                         parse_error()
4432                                         return '&'
4433                                 else
4434                                         # no ';' terminator (only legacy char refs)
4435                                         max = i
4436                                         for i in [2..max] # no prefix matches, so ok to check shortest first
4437                                                 c = legacy_char_refs[txt.substr(cur, i)]
4438                                                 if c?
4439                                                         if in_attr
4440                                                                 if txt.charAt(cur + i) is '='
4441                                                                         # "because some legacy user agents will
4442                                                                         # misinterpret the markup in those cases"
4443                                                                         parse_error()
4444                                                                         return '&'
4445                                                                 if alnum.indexOf(txt.charAt(cur + i)) > -1
4446                                                                         # this makes attributes forgiving about url args
4447                                                                         return '&'
4448                                                         # ok, and besides the weird exceptions for attributes...
4449                                                         # return the matching char
4450                                                         cur += i # consume entity chars
4451                                                         parse_error() # because no terminating ";"
4452                                                         return c
4453                                         parse_error()
4454                                         return '&'
4455                 return # never reached
4456
4457         # tree constructor initialization
4458         # see comments on TYPE_TAG/etc for the structure of this data
4459         txt = args.html
4460         cur = 0
4461         doc = new Node TYPE_TAG, name: 'html', namespace: NS_HTML
4462         open_els = []
4463         afe = [] # active formatting elements
4464         template_ins_modes = []
4465         ins_mode = ins_mode_initial
4466         original_ins_mode = ins_mode # TODO check spec
4467         flag_scripting = args.scripting ? true # TODO might need an extra flag to get <noscript> to parse correctly
4468         flag_frameset_ok = true
4469         flag_parsing = true
4470         flag_foster_parenting = false
4471         form_element_pointer = null
4472         temporary_buffer = null
4473         pending_table_character_tokens = []
4474         head_element_pointer = null
4475         flag_fragment_parsing = false # parser originally created as part of the html fragment parsing algorithm (fragment case)
4476         context_element = null # FIXME initialize from args.fragment http://www.w3.org/TR/html5/syntax.html#parsing-html-fragments
4477
4478         # tokenizer initialization
4479         tok_state = tok_state_data
4480
4481         # text pre-processing
4482         # FIXME http://www.w3.org/TR/html5/syntax.html#preprocessing-the-input-stream
4483         txt = txt.replace(new RegExp("\u0000", 'g'), "\ufffd") # fixfull spec doesn't say this
4484         txt = txt.replace(new RegExp("\r\n", 'g'), "\n") # fixfull spec doesn't say this
4485         txt = txt.replace(new RegExp("\r", 'g'), "\n") # fixfull spec doesn't say this
4486
4487         if args.name is "plain-text-unsafe.dat #4"
4488                 console.log "hi"
4489         # proccess input
4490         # http://www.w3.org/TR/html5/syntax.html#tree-construction
4491         while flag_parsing
4492                 t = tok_state()
4493                 if t?
4494                         process_token t
4495                         # fixfull parse error if has self-closing flag, but it wasn't acknolwedged
4496         return doc.children
4497
4498 serialize_els = (els, shallow, show_ids) ->
4499         serialized = ''
4500         sep = ''
4501         for t in els
4502                 serialized += sep
4503                 sep = ','
4504                 serialized += t.serialize shallow, show_ids
4505         return serialized
4506
4507 module.exports.parse_html = parse_html
4508 module.exports.debug_log_reset = debug_log_reset
4509 module.exports.debug_log_each = debug_log_each
4510 module.exports.TYPE_TAG = TYPE_TAG
4511 module.exports.TYPE_TEXT = TYPE_TEXT
4512 module.exports.TYPE_COMMENT = TYPE_COMMENT
4513 module.exports.TYPE_DOCTYPE = TYPE_DOCTYPE
4514 module.exports.NS_HTML = NS_HTML
4515 module.exports.NS_MATHML = NS_MATHML
4516 module.exports.NS_SVG = NS_SVG