parse-html.coffee

   1 # HTML parser meant to run in a browser, in support of WYSIWYG editor
   2 # Copyright 2015 Jason Woofenden
   3 #
   4 # This program is free software: you can redistribute it and/or modify it under
   5 # the terms of the GNU Affero General Public License as published by the Free
   6 # Software Foundation, either version 3 of the License, or (at your option) any
   7 # later version.
   8 #
   9 # This program is distributed in the hope that it will be useful, but WITHOUT
  10 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  11 # FOR A PARTICULAR PURPOSE.  See the GNU Affero General Public License for more
  12 # details.
  13 #
  14 # You should have received a copy of the GNU Affero General Public License
  15 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
  16
  17
  18 # This file implements a parser for html snippets, meant to be used by a
  19 # WYSIWYG editor.
  20
  21 # The implementation is a pretty direct implementation of the parsing algorithm
  22 # described here:
  23 # http://www.w3.org/TR/html5/syntax.html#preprocessing-the-input-stream
  24 #
  25 # Deviations from that spec:
  26 #
  27 #   Purposeful: search this file for "WTAG"
  28 #
  29 #   Not finished yet: search this file for "fixfull", "TODO" and "FIXME"
  30
  31
  32 # stacks/lists
  33 #
  34 # the spec uses a many different words do indicate which ends of lists/stacks
  35 # they are talking about (and relative movement within the lists/stacks). This
  36 # section splains. I'm implementing "lists" (afe and open_els) the same way
  37 # (both as stacks)
  38 #
  39 # stacks grow downward (current element is index=0)
  40 #
  41 # example: open_els = [a, b, c, d, e, f, g]
  42 #
  43 # "grows downwards" means it's visualized like this: (index: el, names)
  44 #
  45 #   6: g "start of the list", "topmost", "first"
  46 #   5: f
  47 #   4: e "previous" (to d), "above", "before"
  48 #   3: d   (previous/next are relative to this element)
  49 #   2: c "next", "after", "lower", "below"
  50 #   1: b
  51 #   0: a "end of the list", "current node", "bottommost", "last"
  52
  53
  54 # browser
  55 # note: to get this to run outside a browser, you'll have to write a native
  56 # implementation of decode_named_char_ref()
  57 unless module?.exports?
  58         window.wheic = {}
  59         module = exports: window.wheic
  60
  61 from_code_point = (x) ->
  62         if String.fromCodePoint?
  63                 return String.fromCodePoint x
  64         else
  65                 if x <= 0xffff
  66                         return String.fromCharCode x
  67                 x -= 0x10000
  68                 return String.fromCharCode((x >> 10) + 0xd800, (x % 0x400) + 0xdc00)
  69
  70 # Each node is an obect of the Node class. Here are the Node types:
  71 TYPE_TAG = 0 # name, {attributes}, [children]
  72 TYPE_TEXT = 1 # "text"
  73 TYPE_COMMENT = 2
  74 TYPE_DOCTYPE = 3
  75 # the following types are emited by the tokenizer, but shouldn't end up in the tree:
  76 TYPE_START_TAG = 4 # name, [attributes ([key,value]...) in reverse order], [children]
  77 TYPE_END_TAG = 5 # name
  78 TYPE_EOF = 6
  79 TYPE_AFE_MARKER = 7 # http://www.w3.org/TR/html5/syntax.html#reconstruct-the-active-formatting-elements
  80 TYPE_AAA_BOOKMARK = 8 # http://www.w3.org/TR/html5/syntax.html#adoption-agency-algorithm
  81
  82 # namespace constants
  83 NS_HTML = 1
  84 NS_MATHML = 2
  85 NS_SVG = 3
  86
  87 g_debug_log = []
  88 debug_log_reset = ->
  89         g_debug_log = []
  90 debug_log = (str) ->
  91         g_debug_log.push str
  92 debug_log_each = (cb) ->
  93         for str in g_debug_log
  94                 cb str
  95
  96 prev_node_id = 0
  97 class Node
  98         constructor: (type, args = {}) ->
  99                 @type = type # one of the TYPE_* constants above
 100                 @name = args.name ? '' # tag name
 101                 @text = args.text ? '' # contents for text/comment nodes
 102                 @attrs = args.attrs ? {}
 103                 @attrs_a = args.attr_k ? [] # attrs in progress, TYPE_START_TAG only
 104                 @children = args.children ? []
 105                 @namespace = args.namespace ? NS_HTML
 106                 @parent = args.parent ? null
 107                 @token = args.token ? null
 108                 @flags = args.flags ? {}
 109                 if args.id?
 110                         @id = "#{args.id}+"
 111                 else
 112                         @id = "#{++prev_node_id}"
 113         acknowledge_self_closing: ->
 114                 if @token?
 115                         @token.flag 'did_self_close'
 116                 else
 117                         @flag 'did_self_close', true
 118         flag: (key, value = null) ->
 119                 if value?
 120                         @flags[key] = value
 121                 else
 122                         return @flags[key]
 123         serialize: (shallow = false, show_ids = false) -> # for unit tests
 124                 ret = ''
 125                 switch @type
 126                         when TYPE_TAG
 127                                 ret += 'tag:'
 128                                 ret += JSON.stringify @name
 129                                 ret += ','
 130                                 if show_ids
 131                                         ret += "##{@id},"
 132                                 if shallow
 133                                         break
 134                                 attr_keys = []
 135                                 for k of @attrs
 136                                         attr_keys.push k
 137                                 attr_keys.sort()
 138                                 ret += '{'
 139                                 sep = ''
 140                                 for k in attr_keys
 141                                         ret += sep
 142                                         sep = ','
 143                                         ret += "#{JSON.stringify k}:#{JSON.stringify @attrs[k]}"
 144                                 ret += '},['
 145                                 sep = ''
 146                                 for c in @children
 147                                         ret += sep
 148                                         sep = ','
 149                                         ret += c.serialize shallow, show_ids
 150                                 ret += ']'
 151                         when TYPE_TEXT
 152                                 ret += 'text:'
 153                                 ret += JSON.stringify @text
 154                         when TYPE_COMMENT
 155                                 ret += 'comment:'
 156                                 ret += JSON.stringify @text
 157                         when TYPE_DOCTYPE
 158                                 ret += "doctype:#{@name},#{JSON.stringify(@public_identifier ? '')},#{JSON.stringify(@system_identifier ? '')}"
 159                         when TYPE_AFE_MARKER
 160                                 ret += 'marker'
 161                         when TYPE_AAA_BOOKMARK
 162                                 ret += 'aaa_bookmark'
 163                         else
 164                                 ret += 'unknown:'
 165                                 console.log "unknown: #{JSON.stringify @}" # backtrace is just as well
 166                 return ret
 167
 168 # helpers: (only take args that are normally known when parser creates nodes)
 169 new_open_tag = (name) ->
 170         return new Node TYPE_START_TAG, name: name
 171 new_end_tag = (name) ->
 172         return new Node TYPE_END_TAG, name: name
 173 new_element = (name) ->
 174         return new Node TYPE_TAG, name: name
 175 new_text_node = (txt) ->
 176         return new Node TYPE_TEXT, text: txt
 177 new_character_token = new_text_node
 178 new_comment_token = (txt) ->
 179         return new Node TYPE_COMMENT, text: txt
 180 new_doctype_token = (name) ->
 181         return new Node TYPE_DOCTYPE, name: name
 182 new_eof_token = ->
 183         return new Node TYPE_EOF
 184 new_afe_marker = ->
 185         return new Node TYPE_AFE_MARKER
 186 new_aaa_bookmark = ->
 187         return new Node TYPE_AAA_BOOKMARK
 188
 189 lc_alpha = "abcdefghijklmnopqrstuvwxyz"
 190 uc_alpha = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
 191 digits = "0123456789"
 192 alnum = lc_alpha + uc_alpha + digits
 193 hex_chars = digits + "abcdefABCDEF"
 194
 195 is_uc_alpha = (str) ->
 196         return str.length is 1 and uc_alpha.indexOf(str) > -1
 197 is_lc_alpha = (str) ->
 198         return str.length is 1 and lc_alpha.indexOf(str) > -1
 199
 200 # some SVG elements have dashes in them
 201 tag_name_chars = alnum + "-"
 202
 203 # http://www.w3.org/TR/html5/infrastructure.html#space-character
 204 space_chars = "\u0009\u000a\u000c\u000d\u0020"
 205 is_space = (txt) ->
 206         return txt.length is 1 and space_chars.indexOf(txt) > -1
 207 is_space_tok = (t) ->
 208         return t.type is TYPE_TEXT && t.text.length is 1 and space_chars.indexOf(t.text) > -1
 209
 210 is_input_hidden_tok = (t) ->
 211         return false unless t.type is TYPE_START_TAG
 212         for a in t.attrs_a
 213                 if a[0] is 'type'
 214                         if a[1].toLowerCase() is 'hidden'
 215                                 return true
 216                         return false
 217         return false
 218
 219 # https://en.wikipedia.org/wiki/Whitespace_character#Unicode
 220 whitespace_chars = "\u0009\u000a\u000b\u000c\u000d\u0020\u0085\u00a0\u1680\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200a\u2028\u2029\u202f\u205f\u3000"
 221
 222 unicode_fixes = {}
 223 unicode_fixes[0x00] = "\uFFFD"
 224 unicode_fixes[0x80] = "\u20AC"
 225 unicode_fixes[0x82] = "\u201A"
 226 unicode_fixes[0x83] = "\u0192"
 227 unicode_fixes[0x84] = "\u201E"
 228 unicode_fixes[0x85] = "\u2026"
 229 unicode_fixes[0x86] = "\u2020"
 230 unicode_fixes[0x87] = "\u2021"
 231 unicode_fixes[0x88] = "\u02C6"
 232 unicode_fixes[0x89] = "\u2030"
 233 unicode_fixes[0x8A] = "\u0160"
 234 unicode_fixes[0x8B] = "\u2039"
 235 unicode_fixes[0x8C] = "\u0152"
 236 unicode_fixes[0x8E] = "\u017D"
 237 unicode_fixes[0x91] = "\u2018"
 238 unicode_fixes[0x92] = "\u2019"
 239 unicode_fixes[0x93] = "\u201C"
 240 unicode_fixes[0x94] = "\u201D"
 241 unicode_fixes[0x95] = "\u2022"
 242 unicode_fixes[0x96] = "\u2013"
 243 unicode_fixes[0x97] = "\u2014"
 244 unicode_fixes[0x98] = "\u02DC"
 245 unicode_fixes[0x99] = "\u2122"
 246 unicode_fixes[0x9A] = "\u0161"
 247 unicode_fixes[0x9B] = "\u203A"
 248 unicode_fixes[0x9C] = "\u0153"
 249 unicode_fixes[0x9E] = "\u017E"
 250 unicode_fixes[0x9F] = "\u0178"
 251
 252 # These are the character references that don't need a terminating semicolon
 253 # min length: 2, max: 6, none are a prefix of any other.
 254 legacy_char_refs = {
 255         Aacute: 'Á', aacute: 'á', Acirc: 'Â', acirc: 'â', acute: '´', AElig: 'Æ',
 256         aelig: 'æ', Agrave: 'À', agrave: 'à', AMP: '&', amp: '&', Aring: 'Å',
 257         aring: 'å', Atilde: 'Ã', atilde: 'ã', Auml: 'Ä', auml: 'ä', brvbar: '¦',
 258         Ccedil: 'Ç', ccedil: 'ç', cedil: '¸', cent: '¢', COPY: '©', copy: '©',
 259         curren: '¤', deg: '°', divide: '÷', Eacute: 'É', eacute: 'é', Ecirc: 'Ê',
 260         ecirc: 'ê', Egrave: 'È', egrave: 'è', ETH: 'Ð', eth: 'ð', Euml: 'Ë',
 261         euml: 'ë', frac12: '½', frac14: '¼', frac34: '¾', GT: '>', gt: '>',
 262         Iacute: 'Í', iacute: 'í', Icirc: 'Î', icirc: 'î', iexcl: '¡', Igrave: 'Ì',
 263         igrave: 'ì', iquest: '¿', Iuml: 'Ï', iuml: 'ï', laquo: '«', LT: '<',
 264         lt: '<', macr: '¯', micro: 'µ', middot: '·', nbsp: "\u00a0", not: '¬',
 265         Ntilde: 'Ñ', ntilde: 'ñ', Oacute: 'Ó', oacute: 'ó', Ocirc: 'Ô', ocirc: 'ô',
 266         Ograve: 'Ò', ograve: 'ò', ordf: 'ª', ordm: 'º', Oslash: 'Ø', oslash: 'ø',
 267         Otilde: 'Õ', otilde: 'õ', Ouml: 'Ö', ouml: 'ö', para: '¶', plusmn: '±',
 268         pound: '£', QUOT: '"', quot: '"', raquo: '»', REG: '®', reg: '®', sect: '§',
 269         shy: '', sup1: '¹', sup2: '²', sup3: '³', szlig: 'ß', THORN: 'Þ', thorn: 'þ',
 270         times: '×', Uacute: 'Ú', uacute: 'ú', Ucirc: 'Û', ucirc: 'û', Ugrave: 'Ù',
 271         ugrave: 'ù', uml: '¨', Uuml: 'Ü', uuml: 'ü', Yacute: 'Ý', yacute: 'ý',
 272         yen: '¥', yuml: 'ÿ'
 273 }
 274
 275 void_elements = ['area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input', 'keygen', 'link', 'meta', 'param', 'source', 'track', 'wbr']
 276 raw_text_elements = ['script', 'style']
 277 escapable_raw_text_elements = ['textarea', 'title']
 278 # http://www.w3.org/TR/SVG/ 1.1 (Second Edition)
 279 svg_elements = [
 280         'a', 'altGlyph', 'altGlyphDef', 'altGlyphItem', 'animate', 'animateColor',
 281         'animateMotion', 'animateTransform', 'circle', 'clipPath', 'color-profile',
 282         'cursor', 'defs', 'desc', 'ellipse', 'feBlend', 'feColorMatrix',
 283         'feComponentTransfer', 'feComposite', 'feConvolveMatrix',
 284         'feDiffuseLighting', 'feDisplacementMap', 'feDistantLight', 'feFlood',
 285         'feFuncA', 'feFuncB', 'feFuncG', 'feFuncR', 'feGaussianBlur', 'feImage',
 286         'feMerge', 'feMergeNode', 'feMorphology', 'feOffset', 'fePointLight',
 287         'feSpecularLighting', 'feSpotLight', 'feTile', 'feTurbulence', 'filter',
 288         'font', 'font-face', 'font-face-format', 'font-face-name', 'font-face-src',
 289         'font-face-uri', 'foreignObject', 'g', 'glyph', 'glyphRef', 'hkern',
 290         'image', 'line', 'linearGradient', 'marker', 'mask', 'metadata',
 291         'missing-glyph', 'mpath', 'path', 'pattern', 'polygon', 'polyline',
 292         'radialGradient', 'rect', 'script', 'set', 'stop', 'style', 'svg',
 293         'switch', 'symbol', 'text', 'textPath', 'title', 'tref', 'tspan', 'use',
 294         'view', 'vkern'
 295 ]
 296
 297 # http://www.w3.org/TR/MathML/ Version 3.0 2nd Edition
 298 mathml_elements = [
 299         'abs', 'and', 'annotation', 'annotation-xml', 'apply', 'approx', 'arccos',
 300         'arccosh', 'arccot', 'arccoth', 'arccsc', 'arccsch', 'arcsec', 'arcsech',
 301         'arcsin', 'arcsinh', 'arctan', 'arctanh', 'arg', 'bind', 'bvar', 'card',
 302         'cartesianproduct', 'cbytes', 'ceiling', 'cerror', 'ci', 'cn', 'codomain',
 303         'complexes', 'compose', 'condition', 'conjugate', 'cos', 'cosh', 'cot',
 304         'coth', 'cs', 'csc', 'csch', 'csymbol', 'curl', 'declare', 'degree',
 305         'determinant', 'diff', 'divergence', 'divide', 'domain',
 306         'domainofapplication', 'emptyset', 'eq', 'equivalent', 'eulergamma',
 307         'exists', 'exp', 'exponentiale', 'factorial', 'factorof', 'false', 'floor',
 308         'fn', 'forall', 'gcd', 'geq', 'grad', 'gt', 'ident', 'image', 'imaginary',
 309         'imaginaryi', 'implies', 'in', 'infinity', 'int', 'integers', 'intersect',
 310         'interval', 'inverse', 'lambda', 'laplacian', 'lcm', 'leq', 'limit',
 311         'list', 'ln', 'log', 'logbase', 'lowlimit', 'lt', 'maction', 'maligngroup',
 312         'malignmark', 'math', 'matrix', 'matrixrow', 'max', 'mean', 'median',
 313         'menclose', 'merror', 'mfenced', 'mfrac', 'mglyph', 'mi', 'mi', 'min',
 314         'minus', 'mlabeledtr', 'mlongdiv', 'mmultiscripts', 'mn', 'mo', 'mode',
 315         'moment', 'momentabout', 'mover', 'mpadded', 'mphantom', 'mprescripts',
 316         'mroot', 'mrow', 'ms', 'mscarries', 'mscarry', 'msgroup', 'msline',
 317         'mspace', 'msqrt', 'msrow', 'mstack', 'mstyle', 'msub', 'msubsup', 'msup',
 318         'mtable', 'mtd', 'mtext', 'mtr', 'munder', 'munderover', 'naturalnumbers',
 319         'neq', 'none', 'not', 'notanumber', 'notin', 'notprsubset', 'notsubset',
 320         'or', 'otherwise', 'outerproduct', 'partialdiff', 'pi', 'piece',
 321         'piecewise', 'plus', 'power', 'primes', 'product', 'prsubset', 'quotient',
 322         'rationals', 'real', 'reals', 'reln', 'rem', 'root', 'scalarproduct',
 323         'sdev', 'sec', 'sech', 'selector', 'semantics', 'sep', 'set', 'setdiff',
 324         'share', 'sin', 'sinh', 'span', 'subset', 'sum', 'tan', 'tanh', 'tendsto',
 325         'times', 'transpose', 'true', 'union', 'uplimit', 'variance', 'vector',
 326         'vectorproduct', 'xor'
 327 ]
 328 # foreign_elements = [svg_elements..., mathml_elements...]
 329 #normal_elements = All other allowed HTML elements are normal elements.
 330
 331 special_elements = {
 332         # HTML:
 333         address:NS_HTML, applet:NS_HTML, area:NS_HTML, article:NS_HTML,
 334         aside:NS_HTML, base:NS_HTML, basefont:NS_HTML, bgsound:NS_HTML,
 335         blockquote:NS_HTML, body:NS_HTML, br:NS_HTML, button:NS_HTML,
 336         caption:NS_HTML, center:NS_HTML, col:NS_HTML, colgroup:NS_HTML, dd:NS_HTML,
 337         details:NS_HTML, dir:NS_HTML, div:NS_HTML, dl:NS_HTML, dt:NS_HTML,
 338         embed:NS_HTML, fieldset:NS_HTML, figcaption:NS_HTML, figure:NS_HTML,
 339         footer:NS_HTML, form:NS_HTML, frame:NS_HTML, frameset:NS_HTML, h1:NS_HTML,
 340         h2:NS_HTML, h3:NS_HTML, h4:NS_HTML, h5:NS_HTML, h6:NS_HTML, head:NS_HTML,
 341         header:NS_HTML, hgroup:NS_HTML, hr:NS_HTML, html:NS_HTML, iframe:NS_HTML,
 342         img:NS_HTML, input:NS_HTML, isindex:NS_HTML, li:NS_HTML, link:NS_HTML,
 343         listing:NS_HTML, main:NS_HTML, marquee:NS_HTML, meta:NS_HTML, nav:NS_HTML,
 344         noembed:NS_HTML, noframes:NS_HTML, noscript:NS_HTML, object:NS_HTML,
 345         ol:NS_HTML, p:NS_HTML, param:NS_HTML, plaintext:NS_HTML, pre:NS_HTML,
 346         script:NS_HTML, section:NS_HTML, select:NS_HTML, source:NS_HTML,
 347         style:NS_HTML, summary:NS_HTML, table:NS_HTML, tbody:NS_HTML, td:NS_HTML,
 348         template:NS_HTML, textarea:NS_HTML, tfoot:NS_HTML, th:NS_HTML,
 349         thead:NS_HTML, title:NS_HTML, tr:NS_HTML, track:NS_HTML, ul:NS_HTML,
 350         wbr:NS_HTML, xmp:NS_HTML,
 351
 352         # MathML:
 353         mi:NS_MATHML, mo:NS_MATHML, mn:NS_MATHML, ms:NS_MATHML, mtext:NS_MATHML,
 354         'annotation-xml':NS_MATHML,
 355
 356         # SVG:
 357         foreignObject:NS_SVG, desc:NS_SVG, title:NS_SVG
 358 }
 359
 360 formatting_elements = {
 361          a: true, b: true, big: true, code: true, em: true, font: true, i: true,
 362          nobr: true, s: true, small: true, strike: true, strong: true, tt: true,
 363          u: true
 364 }
 365
 366 mathml_text_integration = {
 367         mi: NS_MATHML, mo: NS_MATHML, mn: NS_MATHML, ms: NS_MATHML, mtext: NS_MATHML
 368 }
 369 is_mathml_text_integration_point = (el) ->
 370         return mathml_text_integration[el.name] is el.namespace
 371 is_html_integration = (el) -> # DON'T PASS A TOKEN
 372         if el.namespace is NS_MATHML
 373                 if el.name is 'annotation-xml'
 374                         if el.attrs.encoding?
 375                                 if el.attrs.encoding.toLowerCase() is 'text/html'
 376                                         return true
 377                                 if el.attrs.encoding.toLowerCase() is 'application/xhtml+xml'
 378                                         return true
 379                 return false
 380         if el.namespace is NS_SVG
 381                 if el.name is 'foreignObject' or el.name is 'desc' or el.name is 'title'
 382                         return true
 383         return false
 384
 385 h_tags = {
 386         h1:NS_HTML, h2:NS_HTML, h3:NS_HTML, h4:NS_HTML, h5:NS_HTML, h6:NS_HTML
 387 }
 388
 389 foster_parenting_targets = {
 390         table: NS_HTML
 391         tbody: NS_HTML
 392         tfoot: NS_HTML
 393         thead: NS_HTML
 394         tr: NS_HTML
 395 }
 396
 397 end_tag_implied = {
 398         dd: NS_HTML
 399         dt: NS_HTML
 400         li: NS_HTML
 401         option: NS_HTML
 402         optgroup: NS_HTML
 403         p: NS_HTML
 404         rb: NS_HTML
 405         rp: NS_HTML
 406         rt: NS_HTML
 407         rtc: NS_HTML
 408 }
 409
 410 el_is_special = (e) ->
 411         return special_elements[e.name] is e.namespace
 412
 413 adp_els = { address: NS_HTML, div: NS_HTML, p: NS_HTML }
 414 el_is_special_not_adp = (el) ->
 415         return special_elements[el.name] is el.namespace and adp_els[el.name] isnt el.namespace
 416
 417 svg_name_fixes = {
 418         altglyph: 'altGlyph'
 419         altglyphdef: 'altGlyphDef'
 420         altglyphitem: 'altGlyphItem'
 421         animatecolor: 'animateColor'
 422         animatemotion: 'animateMotion'
 423         animatetransform: 'animateTransform'
 424         clippath: 'clipPath'
 425         feblend: 'feBlend'
 426         fecolormatrix: 'feColorMatrix'
 427         fecomponenttransfer: 'feComponentTransfer'
 428         fecomposite: 'feComposite'
 429         feconvolvematrix: 'feConvolveMatrix'
 430         fediffuselighting: 'feDiffuseLighting'
 431         fedisplacementmap: 'feDisplacementMap'
 432         fedistantlight: 'feDistantLight'
 433         fedropshadow: 'feDropShadow'
 434         feflood: 'feFlood'
 435         fefunca: 'feFuncA'
 436         fefuncb: 'feFuncB'
 437         fefuncg: 'feFuncG'
 438         fefuncr: 'feFuncR'
 439         fegaussianblur: 'feGaussianBlur'
 440         feimage: 'feImage'
 441         femerge: 'feMerge'
 442         femergenode: 'feMergeNode'
 443         femorphology: 'feMorphology'
 444         feoffset: 'feOffset'
 445         fepointlight: 'fePointLight'
 446         fespecularlighting: 'feSpecularLighting'
 447         fespotlight: 'feSpotLight'
 448         fetile: 'feTile'
 449         feturbulence: 'feTurbulence'
 450         foreignobject: 'foreignObject'
 451         glyphref: 'glyphRef'
 452         lineargradient: 'linearGradient'
 453         radialgradient: 'radialGradient'
 454         textpath: 'textPath'
 455 }
 456 svg_attribute_fixes = {
 457         attributename: 'attributeName'
 458         attributetype: 'attributeType'
 459         basefrequency: 'baseFrequency'
 460         baseprofile: 'baseProfile'
 461         calcmode: 'calcMode'
 462         clippathunits: 'clipPathUnits'
 463         contentscripttype: 'contentScriptType'
 464         contentstyletype: 'contentStyleType'
 465         diffuseconstant: 'diffuseConstant'
 466         edgemode: 'edgeMode'
 467         externalresourcesrequired: 'externalResourcesRequired'
 468         filterres: 'filterRes'
 469         filterunits: 'filterUnits'
 470         glyphref: 'glyphRef'
 471         gradienttransform: 'gradientTransform'
 472         gradientunits: 'gradientUnits'
 473         kernelmatrix: 'kernelMatrix'
 474         kernelunitlength: 'kernelUnitLength'
 475         keypoints: 'keyPoints'
 476         keysplines: 'keySplines'
 477         keytimes: 'keyTimes'
 478         lengthadjust: 'lengthAdjust'
 479         limitingconeangle: 'limitingConeAngle'
 480         markerheight: 'markerHeight'
 481         markerunits: 'markerUnits'
 482         markerwidth: 'markerWidth'
 483         maskcontentunits: 'maskContentUnits'
 484         maskunits: 'maskUnits'
 485         numoctaves: 'numOctaves'
 486         pathlength: 'pathLength'
 487         patterncontentunits: 'patternContentUnits'
 488         patterntransform: 'patternTransform'
 489         patternunits: 'patternUnits'
 490         pointsatx: 'pointsAtX'
 491         pointsaty: 'pointsAtY'
 492         pointsatz: 'pointsAtZ'
 493         preservealpha: 'preserveAlpha'
 494         preserveaspectratio: 'preserveAspectRatio'
 495         primitiveunits: 'primitiveUnits'
 496         refx: 'refX'
 497         refy: 'refY'
 498         repeatcount: 'repeatCount'
 499         repeatdur: 'repeatDur'
 500         requiredextensions: 'requiredExtensions'
 501         requiredfeatures: 'requiredFeatures'
 502         specularconstant: 'specularConstant'
 503         specularexponent: 'specularExponent'
 504         spreadmethod: 'spreadMethod'
 505         startoffset: 'startOffset'
 506         stddeviation: 'stdDeviation'
 507         stitchtiles: 'stitchTiles'
 508         surfacescale: 'surfaceScale'
 509         systemlanguage: 'systemLanguage'
 510         tablevalues: 'tableValues'
 511         targetx: 'targetX'
 512         targety: 'targetY'
 513         textlength: 'textLength'
 514         viewbox: 'viewBox'
 515         viewtarget: 'viewTarget'
 516         xchannelselector: 'xChannelSelector'
 517         ychannelselector: 'yChannelSelector'
 518         zoomandpan: 'zoomAndPan'
 519 }
 520 adjust_mathml_attributes = (t) ->
 521         for a in t.attrs_a
 522                 if a[0] is 'definitionurl'
 523                         a[0] = 'definitionURL'
 524         return
 525 adjust_svg_attributes = (t) ->
 526         for a in t.attrs_a
 527                 if svg_attribute_fixes[a[0]]?
 528                         a[0] = svg_attribute_fixes[a[0]]
 529         return
 530 adjust_foreign_attributes = (t) ->
 531         # fixfull
 532         return
 533
 534 # decode_named_char_ref()
 535 #
 536 # The list of named character references is _huge_ so ask the browser to decode
 537 # for us instead of wasting bandwidth/space on including the table here.
 538 #
 539 # Pass without the "&" but with the ";" examples:
 540 #    for "&amp" pass "amp;"
 541 #    for "&#x2032" pass "x2032;"
 542 g_dncr = {
 543         cache: {}
 544         textarea: document.createElement('textarea')
 545 }
 546 # TODO test this in IE8
 547 decode_named_char_ref = (txt) ->
 548         txt = "&#{txt}"
 549         decoded = g_dncr.cache[txt]
 550         return decoded if decoded?
 551         g_dncr.textarea.innerHTML = txt
 552         decoded = g_dncr.textarea.value
 553         return null if decoded is txt
 554         return g_dncr.cache[txt] = decoded
 555
 556 parse_html = (args) ->
 557         txt = null
 558         cur = null # index of next char in txt to be parsed
 559         # declare doc and tokenizer variables so they're in scope below
 560         doc = null
 561         open_els = null # stack of open elements
 562         afe = null # active formatting elements
 563         template_ins_modes = null
 564         ins_mode = null
 565         original_ins_mode = null
 566         tok_state = null
 567         tok_cur_tag = null # partially parsed tag
 568         flag_scripting = null
 569         flag_frameset_ok = null
 570         flag_parsing = null
 571         flag_foster_parenting = null
 572         form_element_pointer = null
 573         temporary_buffer = null
 574         pending_table_character_tokens = null
 575         head_element_pointer = null
 576         flag_fragment_parsing = null
 577         context_element = null
 578
 579         stop_parsing = ->
 580                 flag_parsing = false
 581
 582         parse_error = ->
 583                 if args.error_cb?
 584                         args.error_cb cur
 585                 else
 586                         console.log "Parse error at character #{cur} of #{txt.length}"
 587
 588         afe_push = (new_el) ->
 589                 matches = 0
 590                 for el, i in afe
 591                         if el.name is new_el.name and el.namespace is new_el.namespace
 592                                 for k, v of el.attrs
 593                                         continue unless new_el.attrs[k] is v
 594                                 for k, v of new_el.attrs
 595                                         continue unless el.attrs[k] is v
 596                                 matches += 1
 597                                 if matches is 3
 598                                         afe.splice i, 1
 599                                         break
 600                 afe.unshift new_el
 601         afe_push_marker = ->
 602                 afe.unshift new_afe_marker()
 603
 604         # the functions below impliment the Tree Contstruction algorithm
 605         # http://www.w3.org/TR/html5/syntax.html#tree-construction
 606
 607         # But first... the helpers
 608         template_tag_is_open = ->
 609                 for t in open_els
 610                         if t.name is 'template' and t.namespace is NS_HTML
 611                                 return true
 612                 return false
 613         is_in_scope_x = (tag_name, scope, namespace) ->
 614                 for t in open_els
 615                         if t.name is tag_name and (namespace is null or namespace is t.namespace)
 616                                 return true
 617                         if scope[t.name] is t.namespace
 618                                 return false
 619                 return false
 620         is_in_scope_x_y = (tag_name, scope, scope2, namespace) ->
 621                 for t in open_els
 622                         if t.name is tag_name and (namespace is null or namespace is t.namespace)
 623                                 return true
 624                         if scope[t.name] is t.namespace
 625                                 return false
 626                         if scope2[t.name] is t.namespace
 627                                 return false
 628                 return false
 629         standard_scopers = {
 630                 applet: NS_HTML, caption: NS_HTML, html: NS_HTML, table: NS_HTML,
 631                 td: NS_HTML, th: NS_HTML, marquee: NS_HTML, object: NS_HTML,
 632                 template: NS_HTML, mi: NS_MATHML,
 633
 634                 mo: NS_MATHML, mn: NS_MATHML, ms: NS_MATHML, mtext: NS_MATHML,
 635                 'annotation-xml': NS_MATHML,
 636
 637                 foreignObject: NS_SVG, desc: NS_SVG, title: NS_SVG
 638         }
 639         button_scopers = button: NS_HTML
 640         li_scopers = ol: NS_HTML, ul: NS_HTML
 641         table_scopers = html: NS_HTML, table: NS_HTML, template: NS_HTML
 642         is_in_scope = (tag_name, namespace = null) ->
 643                 return is_in_scope_x tag_name, standard_scopers, namespace
 644         is_in_button_scope = (tag_name, namespace = null) ->
 645                 return is_in_scope_x_y tag_name, standard_scopers, button_scopers, namespace
 646         is_in_table_scope = (tag_name, namespace = null) ->
 647                 return is_in_scope_x tag_name, table_scopers, namespace
 648         # aka is_in_list_item_scope
 649         is_in_li_scope = (tag_name, namespace = null) ->
 650                 return is_in_scope_x_y tag_name, standard_scopers, li_scopers, namespace
 651         is_in_select_scope = (tag_name, namespace = null) ->
 652                 for t in open_els
 653                         if t.name is tag_name and (namespace is null or namespace is t.namespace)
 654                                 return true
 655                         if t.namespace isnt NS_HTML and t.name isnt 'optgroup' and t.name isnt 'option'
 656                                 return false
 657                 return false
 658         # this checks for a particular element, not by name
 659         # this requires a namespace match
 660         el_is_in_scope = (needle) ->
 661                 for el in open_els
 662                         if el is needle
 663                                 return true
 664                         if standard_scopers[el.name] is el.namespace
 665                                 return false
 666                 return false
 667
 668         clear_to_table_stopers = {
 669                 'table': true
 670                 'template': true
 671                 'html': true
 672         }
 673         clear_stack_to_table_context = ->
 674                 loop
 675                         if clear_to_table_stopers[open_els[0].name]?
 676                                 break
 677                         open_els.shift()
 678                 return
 679         clear_to_table_body_stopers = {
 680                 tbody: NS_HTML
 681                 tfoot: NS_HTML
 682                 thead: NS_HTML
 683                 template: NS_HTML
 684                 html: NS_HTML
 685         }
 686         clear_stack_to_table_body_context = ->
 687                 loop
 688                         if clear_to_table_body_stopers[open_els[0].name] is open_els[0].namespace
 689                                 break
 690                         open_els.shift()
 691                 return
 692         clear_to_table_row_stopers = {
 693                 'tr': true
 694                 'template': true
 695                 'html': true
 696         }
 697         clear_stack_to_table_row_context = ->
 698                 loop
 699                         if clear_to_table_row_stopers[open_els[0].name]?
 700                                 break
 701                         open_els.shift()
 702                 return
 703         clear_afe_to_marker = ->
 704                 loop
 705                         return unless afe.length > 0 # this happens in fragment case, ?spec error
 706                         el = afe.shift()
 707                         if el.type is TYPE_AFE_MARKER
 708                                 return
 709                 return
 710
 711         # 8.2.3.1 ...
 712         # http://www.w3.org/TR/html5/syntax.html#reset-the-insertion-mode-appropriately
 713         reset_ins_mode = ->
 714                 # 1. Let last be false.
 715                 last = false
 716                 # 2. Let node be the last node in the stack of open elements.
 717                 node_i = 0
 718                 node = open_els[node_i]
 719                 # 3. Loop: If node is the first node in the stack of open elements,
 720                 # then set last to true, and, if the parser was originally created as
 721                 # part of the HTML fragment parsing algorithm (fragment case) set node
 722                 # to the context element.
 723                 loop
 724                         if node_i is open_els.length - 1
 725                                 last = true
 726                                 # fixfull (fragment case)
 727
 728                         # 4. If node is a select element, run these substeps:
 729                         if node.name is 'select' and node.namespace is NS_HTML
 730                                 # 1. If last is true, jump to the step below labeled done.
 731                                 unless last
 732                                         # 2. Let ancestor be node.
 733                                         ancestor_i = node_i
 734                                         ancestor = node
 735                                         # 3. Loop: If ancestor is the first node in the stack of
 736                                         # open elements, jump to the step below labeled done.
 737                                         loop
 738                                                 if ancestor_i is open_els.length - 1
 739                                                         break
 740                                                 # 4. Let ancestor be the node before ancestor in the stack
 741                                                 # of open elements.
 742                                                 ancestor_i += 1
 743                                                 ancestor = open_els[ancestor_i]
 744                                                 # 5. If ancestor is a template node, jump to the step below
 745                                                 # labeled done.
 746                                                 if ancestor.name is 'template' and ancestor.namespace is NS_HTML
 747                                                         break
 748                                                 # 6. If ancestor is a table node, switch the insertion mode
 749                                                 # to "in select in table" and abort these steps.
 750                                                 if ancestor.name is 'table' and ancestor.namespace is NS_HTML
 751                                                         ins_mode = ins_mode_in_select_in_table
 752                                                         return
 753                                                 # 7. Jump back to the step labeled loop.
 754                                 # 8. Done: Switch the insertion mode to "in select" and abort
 755                                 # these steps.
 756                                 ins_mode = ins_mode_in_select
 757                                 return
 758                         # 5. If node is a td or th element and last is false, then switch
 759                         # the insertion mode to "in cell" and abort these steps.
 760                         if (node.name is 'td' or node.name is 'th') and node.namespace is NS_HTML and last is false
 761                                 ins_mode = ins_mode_in_cell
 762                                 return
 763                         # 6. If node is a tr element, then switch the insertion mode to "in
 764                         # row" and abort these steps.
 765                         if node.name is 'tr' and node.namespace is NS_HTML
 766                                 ins_mode = ins_mode_in_row
 767                                 return
 768                         # 7. If node is a tbody, thead, or tfoot element, then switch the
 769                         # insertion mode to "in table body" and abort these steps.
 770                         if (node.name is 'tbody' or node.name is 'thead' or node.name is 'tfoot') and node.namespace is NS_HTML
 771                                 ins_mode = ins_mode_in_table_body
 772                                 return
 773                         # 8. If node is a caption element, then switch the insertion mode
 774                         # to "in caption" and abort these steps.
 775                         if node.name is 'caption' and node.namespace is NS_HTML
 776                                 ins_mode = ins_mode_in_caption
 777                                 return
 778                         # 9. If node is a colgroup element, then switch the insertion mode
 779                         # to "in column group" and abort these steps.
 780                         if node.name is 'colgroup' and node.namespace is NS_HTML
 781                                 ins_mode = ins_mode_in_column_group
 782                                 return
 783                         # 10. If node is a table element, then switch the insertion mode to
 784                         # "in table" and abort these steps.
 785                         if node.name is 'table' and node.namespace is NS_HTML
 786                                 ins_mode = ins_mode_in_table
 787                                 return
 788                         # 11. If node is a template element, then switch the insertion mode
 789                         # to the current template insertion mode and abort these steps.
 790                         if node.name is 'template' and node.namespace is NS_HTML
 791                                 ins_mode = template_ins_modes[0]
 792                                 return
 793                         # 12. If node is a head element and last is true, then switch the
 794                         # insertion mode to "in body" ("in body"! not "in head"!) and abort
 795                         # these steps. (fragment case)
 796                         if node.name is 'head' and node.namespace is NS_HTML and last
 797                                 ins_mode = ins_mode_in_body
 798                                 return
 799                         # 13. If node is a head element and last is false, then switch the
 800                         # insertion mode to "in head" and abort these steps.
 801                         if node.name is 'head' and node.namespace is NS_HTML and last is false
 802                                 ins_mode = ins_mode_in_head
 803                                 return
 804                         # 14. If node is a body element, then switch the insertion mode to
 805                         # "in body" and abort these steps.
 806                         if node.name is 'body' and node.namespace is NS_HTML
 807                                 ins_mode = ins_mode_in_body
 808                                 return
 809                         # 15. If node is a frameset element, then switch the insertion mode
 810                         # to "in frameset" and abort these steps. (fragment case)
 811                         if node.name is 'frameset' and node.namespace is NS_HTML
 812                                 ins_mode = ins_mode_in_frameset
 813                                 return
 814                         # 16. If node is an html element, run these substeps:
 815                         if node.name is 'html' and node.namespace is NS_HTML
 816                                 # 1. If the head element pointer is null, switch the insertion
 817                                 # mode to "before head" and abort these steps. (fragment case)
 818                                 if head_element_pointer is null
 819                                         ins_mode = ins_mode_before_head
 820                                 else
 821                                         # 2. Otherwise, the head element pointer is not null,
 822                                         # switch the insertion mode to "after head" and abort these
 823                                         # steps.
 824                                         ins_mode = ins_mode_after_head
 825                                 return
 826                         # 17. If last is true, then switch the insertion mode to "in body"
 827                         # and abort these steps. (fragment case)
 828                         if last
 829                                 ins_mode = ins_mode_in_body
 830                                 return
 831                         # 18. Let node now be the node before node in the stack of open
 832                         # elements.
 833                         node_i += 1
 834                         node = open_els[node_i]
 835                         # 19. Return to the step labeled loop.
 836
 837         # 8.2.3.2
 838
 839         # http://www.w3.org/TR/html5/syntax.html#adjusted-current-node
 840         adjusted_current_node = ->
 841                 if open_els.length is 1 and flag_fragment_parsing
 842                         return context_element
 843                 return open_els[0]
 844
 845         # http://www.w3.org/TR/html5/syntax.html#reconstruct-the-active-formatting-elements
 846         # this implementation is structured (mostly) as described at the link above.
 847         # capitalized comments are the "labels" described at the link above.
 848         reconstruct_afe = ->
 849                 return if afe.length is 0
 850                 if afe[0].type is TYPE_AFE_MARKER or afe[0] in open_els
 851                         return
 852                 # Rewind
 853                 i = 0
 854                 loop
 855                         if i is afe.length - 1
 856                                 break
 857                         i += 1
 858                         if afe[i].type is TYPE_AFE_MARKER or afe[i] in open_els
 859                                 i -= 1 # Advance
 860                                 break
 861                 # Create
 862                 loop
 863                         el = insert_html_element afe[i].token
 864                         afe[i] = el
 865                         break if i is 0
 866                         i -= 1 # Advance
 867
 868         # http://www.w3.org/TR/html5/syntax.html#adoption-agency-algorithm
 869         # adoption agency algorithm
 870         # overview here:
 871         #   http://www.w3.org/TR/html5/syntax.html#misnested-tags:-b-i-/b-/i
 872         #   http://www.w3.org/TR/html5/syntax.html#misnested-tags:-b-p-/b-/p
 873         #   http://www.w3.org/TR/html5/syntax.html#unclosed-formatting-elements
 874         adoption_agency = (subject) ->
 875                 debug_log "adoption_agency()"
 876                 debug_log "tree: #{serialize_els doc.children, false, true}"
 877                 debug_log "open_els: #{serialize_els open_els, true, true}"
 878                 debug_log "afe: #{serialize_els afe, true, true}"
 879                 if open_els[0].name is subject and open_els[0].namespace is NS_HTML
 880                         el = open_els[0]
 881                         open_els.shift()
 882                         # remove it from the list of active formatting elements (if found)
 883                         for t, i in afe
 884                                 if t is el
 885                                         afe.splice i, 1
 886                                         break
 887                         debug_log "aaa: starting off with subject on top of stack, exiting"
 888                         return
 889                 outer = 0
 890                 loop
 891                         if outer >= 8
 892                                 return
 893                         outer += 1
 894                         # 5. Let formatting element be the last element in the list of
 895                         # active formatting elements that: is between the end of the list
 896                         # and the last scope marker in the list, if any, or the start of
 897                         # the list otherwise, and  has the tag name subject.
 898                         fe = null
 899                         for t, fe_of_afe in afe
 900                                 if t.type is TYPE_AFE_MARKER
 901                                         break
 902                                 if t.name is subject
 903                                         fe = t
 904                                         break
 905                         # If there is no such element, then abort these steps and instead
 906                         # act as described in the "any other end tag" entry above.
 907                         if fe is null
 908                                 debug_log "aaa: fe not found in afe"
 909                                 in_body_any_other_end_tag subject
 910                                 return
 911                         # 6. If formatting element is not in the stack of open elements,
 912                         # then this is a parse error; remove the element from the list, and
 913                         # abort these steps.
 914                         in_open_els = false
 915                         for t, fe_of_open_els in open_els
 916                                 if t is fe
 917                                         in_open_els = true
 918                                         break
 919                         unless in_open_els
 920                                 debug_log "aaa: fe not found in open_els"
 921                                 parse_error()
 922                                 # "remove it from the list" must mean afe, since it's not in open_els
 923                                 afe.splice fe_of_afe, 1
 924                                 return
 925                         # 7. If formatting element is in the stack of open elements, but
 926                         # the element is not in scope, then this is a parse error; abort
 927                         # these steps.
 928                         unless el_is_in_scope fe
 929                                 debug_log "aaa: fe not in scope"
 930                                 parse_error()
 931                                 return
 932                         # 8. If formatting element is not the current node, this is a parse
 933                         # error. (But do not abort these steps.)
 934                         unless open_els[0] is fe
 935                                 parse_error()
 936                                 # continue
 937                         # 9. Let furthest block be the topmost node in the stack of open
 938                         # elements that is lower in the stack than formatting element, and
 939                         # is an element in the special category. There might not be one.
 940                         fb = null
 941                         fb_of_open_els = null
 942                         for t, i in open_els
 943                                 if t is fe
 944                                         break
 945                                 if el_is_special t
 946                                         fb = t
 947                                         fb_of_open_els = i
 948                                         # and continue, to see if there's one that's more "topmost"
 949                         # 10. If there is no furthest block, then the UA must first pop all
 950                         # the nodes from the bottom of the stack of open elements, from the
 951                         # current node up to and including formatting element, then remove
 952                         # formatting element from the list of active formatting elements,
 953                         # and finally abort these steps.
 954                         if fb is null
 955                                 debug_log "aaa: no fb"
 956                                 loop
 957                                         t = open_els.shift()
 958                                         if t is fe
 959                                                 afe.splice fe_of_afe, 1
 960                                                 return
 961                         # 11. Let common ancestor be the element immediately above
 962                         # formatting element in the stack of open elements.
 963                         ca = open_els[fe_of_open_els + 1] # common ancestor
 964
 965                         node_above = open_els[fb_of_open_els + 1] # next node if node isn't in open_els anymore
 966                         # 12. Let a bookmark note the position of formatting element in the list of active formatting elements relative to the elements on either side of it in the list.
 967                         bookmark = new_aaa_bookmark()
 968                         for t, i in afe
 969                                 if t is fe
 970                                         afe.splice i, 0, bookmark
 971                                         break
 972                         node = last_node = fb
 973                         inner = 0
 974                         loop
 975                                 inner += 1
 976                                 # 3. Let node be the element immediately above node in the
 977                                 # stack of open elements, or if node is no longer in the stack
 978                                 # of open elements (e.g. because it got removed by this
 979                                 # algorithm), the element that was immediately above node in
 980                                 # the stack of open elements before node was removed.
 981                                 node_next = null
 982                                 for t, i in open_els
 983                                         if t is node
 984                                                 node_next = open_els[i + 1]
 985                                                 break
 986                                 node = node_next ? node_above
 987                                 debug_log "inner loop #{inner}"
 988                                 debug_log "tree: #{serialize_els doc.children, false, true}"
 989                                 debug_log "open_els: #{serialize_els open_els, true, true}"
 990                                 debug_log "afe: #{serialize_els afe, true, true}"
 991                                 debug_log "ca: #{ca.name}##{ca.id} children: #{serialize_els ca.children, true, true}"
 992                                 debug_log "fe: #{fe.name}##{fe.id} children: #{serialize_els fe.children, true, true}"
 993                                 debug_log "fb: #{fb.name}##{fb.id} children: #{serialize_els fb.children, true, true}"
 994                                 debug_log "node: #{node.serialize true, true}"
 995                                 # TODO make sure node_above gets re-set if/when node is removed from open_els
 996
 997                                 # 4. If node is formatting element, then go to the next step in
 998                                 # the overall algorithm.
 999                                 if node is fe
1000                                         break
1001                                 debug_log "the meat"
1002                                 # 5. If inner loop counter is greater than three and node is in
1003                                 # the list of active formatting elements, then remove node from
1004                                 # the list of active formatting elements.
1005                                 node_in_afe = false
1006                                 for t, i in afe
1007                                         if t is node
1008                                                 if inner > 3
1009                                                         afe.splice i, 1
1010                                                         debug_log "max out inner"
1011                                                 else
1012                                                         node_in_afe = true
1013                                                         debug_log "in afe"
1014                                                 break
1015                                 # 6. If node is not in the list of active formatting elements,
1016                                 # then remove node from the stack of open elements and then go
1017                                 # back to the step labeled inner loop.
1018                                 unless node_in_afe
1019                                         debug_log "not in afe"
1020                                         for t, i in open_els
1021                                                 if t is node
1022                                                         node_above = open_els[i + 1]
1023                                                         open_els.splice i, 1
1024                                                         break
1025                                         continue
1026                                 debug_log "the bones"
1027                                 # 7. create an element for the token for which the element node
1028                                 # was created, in the HTML namespace, with common ancestor as
1029                                 # the intended parent; replace the entry for node in the list
1030                                 # of active formatting elements with an entry for the new
1031                                 # element, replace the entry for node in the stack of open
1032                                 # elements with an entry for the new element, and let node be
1033                                 # the new element.
1034                                 new_node = token_to_element node.token, NS_HTML, ca
1035                                 for t, i in afe
1036                                         if t is node
1037                                                 afe[i] = new_node
1038                                                 debug_log "replaced in afe"
1039                                                 break
1040                                 for t, i in open_els
1041                                         if t is node
1042                                                 node_above = open_els[i + 1]
1043                                                 open_els[i] = new_node
1044                                                 debug_log "replaced in open_els"
1045                                                 break
1046                                 node = new_node
1047                                 # 8. If last node is furthest block, then move the
1048                                 # aforementioned bookmark to be immediately after the new node
1049                                 # in the list of active formatting elements.
1050                                 if last_node is fb
1051                                         for t, i in afe
1052                                                 if t is bookmark
1053                                                         afe.splice i, 1
1054                                                         debug_log "removed bookmark"
1055                                                         break
1056                                         for t, i in afe
1057                                                 if t is node
1058                                                         # "after" means lower
1059                                                         afe.splice i, 0, bookmark # "after as <-
1060                                                         debug_log "placed bookmark after node"
1061                                                         debug_log "node: #{node.id} afe: #{serialize_els afe, true, true}"
1062                                                         break
1063                                 # 9. Insert last node into node, first removing it from its
1064                                 # previous parent node if any.
1065                                 if last_node.parent?
1066                                         debug_log "last_node has parent"
1067                                         for c, i in last_node.parent.children
1068                                                 if c is last_node
1069                                                         debug_log "removing last_node from parent"
1070                                                         last_node.parent.children.splice i, 1
1071                                                         break
1072                                 node.children.push last_node
1073                                 last_node.parent = node
1074                                 # 10. Let last node be node.
1075                                 last_node = node
1076                                 debug_log "at last"
1077                                 # 11. Return to the step labeled inner loop.
1078                         # 14. Insert whatever last node ended up being in the previous step
1079                         # at the appropriate place for inserting a node, but using common
1080                         # ancestor as the override target.
1081
1082                         # In the case where fe is immediately followed by fb:
1083                         #   * inner loop exits out early (node==fe)
1084                         #   * last_node is fb
1085                         #   * last_node is still in the tree (not a duplicate)
1086                         if last_node.parent?
1087                                 debug_log "FEFIRST? last_node has parent"
1088                                 for c, i in last_node.parent.children
1089                                         if c is last_node
1090                                                 debug_log "removing last_node from parent"
1091                                                 last_node.parent.children.splice i, 1
1092                                                 break
1093
1094                         debug_log "after aaa inner loop"
1095                         debug_log "ca: #{ca.name}##{ca.id} children: #{serialize_els ca.children, true, true}"
1096                         debug_log "fe: #{fe.name}##{fe.id} children: #{serialize_els fe.children, true, true}"
1097                         debug_log "fb: #{fb.name}##{fb.id} children: #{serialize_els fb.children, true, true}"
1098                         debug_log "last_node: #{last_node.name}##{last_node.id} children: #{serialize_els last_node.children, true, true}"
1099                         debug_log "tree: #{serialize_els doc.children, false, true}"
1100
1101                         debug_log "insert"
1102
1103
1104                         # can't use standard insert token thing, because it's already in
1105                         # open_els and must stay at it's current position in open_els
1106                         dest = adjusted_insertion_location ca
1107                         dest[0].children.splice dest[1], 0, last_node
1108                         last_node.parent = dest[0]
1109
1110
1111                         debug_log "ca: #{ca.name}##{ca.id} children: #{serialize_els ca.children, true, true}"
1112                         debug_log "fe: #{fe.name}##{fe.id} children: #{serialize_els fe.children, true, true}"
1113                         debug_log "fb: #{fb.name}##{fb.id} children: #{serialize_els fb.children, true, true}"
1114                         debug_log "last_node: #{last_node.name}##{last_node.id} children: #{serialize_els last_node.children, true, true}"
1115                         debug_log "tree: #{serialize_els doc.children, false, true}"
1116
1117                         # 15. Create an element for the token for which formatting element
1118                         # was created, in the HTML namespace, with furthest block as the
1119                         # intended parent.
1120                         new_element = token_to_element fe.token, NS_HTML, fb
1121                         # 16. Take all of the child nodes of furthest block and append them
1122                         # to the element created in the last step.
1123                         while fb.children.length
1124                                 t = fb.children.shift()
1125                                 t.parent = new_element
1126                                 new_element.children.push t
1127                         # 17. Append that new element to furthest block.
1128                         new_element.parent = fb
1129                         fb.children.push new_element
1130                         # 18. Remove formatting element from the list of active formatting
1131                         # elements, and insert the new element into the list of active
1132                         # formatting elements at the position of the aforementioned
1133                         # bookmark.
1134                         for t, i in afe
1135                                 if t is fe
1136                                         afe.splice i, 1
1137                                         break
1138                         for t, i in afe
1139                                 if t is bookmark
1140                                         afe[i] = new_element
1141                                         break
1142                         # 19. Remove formatting element from the stack of open elements,
1143                         # and insert the new element into the stack of open elements
1144                         # immediately below the position of furthest block in that stack.
1145                         for t, i in open_els
1146                                 if t is fe
1147                                         open_els.splice i, 1
1148                                         break
1149                         for t, i in open_els
1150                                 if t is fb
1151                                         open_els.splice i, 0, new_element
1152                                         break
1153                         # 20. Jump back to the step labeled outer loop.
1154                         debug_log "done wrapping fb's children. new_element: #{new_element.name}##{new_element.id}"
1155                         debug_log "tree: #{serialize_els doc.children, false, true}"
1156                         debug_log "open_els: #{serialize_els open_els, true, true}"
1157                         debug_log "afe: #{serialize_els afe, true, true}"
1158                 debug_log "AAA DONE"
1159
1160         # http://www.w3.org/TR/html5/syntax.html#close-a-p-element
1161         close_p_element = ->
1162                 generate_implied_end_tags 'p' # arg is exception
1163                 unless open_els[0].name is 'p' and open_els[0].namespace is NS_HTML
1164                         parse_error()
1165                 while open_els.length > 1 # just in case
1166                         el = open_els.shift()
1167                         if el.name is 'p' and el.namespace is NS_HTML
1168                                 return
1169         close_p_if_in_button_scope = ->
1170                 if is_in_button_scope 'p', NS_HTML
1171                         close_p_element()
1172
1173         # http://www.w3.org/TR/html5/syntax.html#insert-a-character
1174         # aka insert_a_character = (t) ->
1175         insert_character = (t) ->
1176                 dest = adjusted_insertion_location()
1177                 # fixfull check for Document node
1178                 if dest[1] > 0
1179                         prev = dest[0].children[dest[1] - 1]
1180                         if prev.type is TYPE_TEXT
1181                                 prev.text += t.text
1182                                 return
1183                 dest[0].children.splice dest[1], 0, t
1184
1185
1186         # 8.2.5 http://www.w3.org/TR/html5/syntax.html#tree-construction
1187         process_token = (t) ->
1188                 acn = adjusted_current_node()
1189                 unless acn?
1190                         ins_mode t
1191                         return
1192                 if acn.namespace is NS_HTML
1193                         ins_mode t
1194                         return
1195                 if is_mathml_text_integration_point(acn)
1196                         if t.type is TYPE_START_TAG and (t.name is 'mglyph' or t.name is 'malignmark')
1197                                 ins_mode t
1198                                 return
1199                         if t.type is TYPE_TEXT
1200                                 ins_mode t
1201                                 return
1202                 if acn.namespace is NS_MATHML and acn.name is 'annotation-xml' and t.type is TYPE_START_TAG and t.name is 'svg'
1203                         ins_mode t
1204                         return
1205                 if is_html_integration acn
1206                         if t.type is TYPE_START_TAG or t.type is TYPE_TEXT
1207                                 ins_mode t
1208                                 return
1209                 if t.type is TYPE_EOF
1210                         ins_mode t
1211                         return
1212                 in_foreign_content t
1213                 return
1214
1215         # 8.2.5.1
1216         # http://www.w3.org/TR/html5/syntax.html#creating-and-inserting-nodes
1217         # http://www.w3.org/TR/html5/syntax.html#appropriate-place-for-inserting-a-node
1218         adjusted_insertion_location = (override_target = null) ->
1219                 # 1. If there was an override target specified, then let target be the
1220                 # override target.
1221                 if override_target?
1222                         target = override_target
1223                 else # Otherwise, let target be the current node.
1224                         target = open_els[0]
1225                 # 2. Determine the adjusted insertion location using the first matching
1226                 # steps from the following list:
1227                 #
1228                 # If foster parenting is enabled and target is a table, tbody, tfoot,
1229                 # thead, or tr element Foster parenting happens when content is
1230                 # misnested in tables.
1231                 if flag_foster_parenting and foster_parenting_targets[target.name] is target.namespace
1232                         loop # once. this is here so we can ``break`` to "abort these substeps"
1233                                 # 1. Let last template be the last template element in the
1234                                 # stack of open elements, if any.
1235                                 last_template = null
1236                                 last_template_i = null
1237                                 for el, i in open_els
1238                                         if el.name is 'template' and el.namespace is NS_HTML
1239                                                 last_template = el
1240                                                 last_template_i = i
1241                                                 break
1242                                 # 2. Let last table be the last table element in the stack of
1243                                 # open elements, if any.
1244                                 last_table = null
1245                                 last_table_i
1246                                 for el, i in open_els
1247                                         if el.name is 'table' and el.namespace is NS_HTML
1248                                                 last_table = el
1249                                                 last_table_i = i
1250                                                 break
1251                                 # 3. If there is a last template and either there is no last
1252                                 # table, or there is one, but last template is lower (more
1253                                 # recently added) than last table in the stack of open
1254                                 # elements, then: let adjusted insertion location be inside
1255                                 # last template's template contents, after its last child (if
1256                                 # any), and abort these substeps.
1257                                 if last_template and (last_table is null or last_template_i < last_table_i)
1258                                         target = last_template # fixfull should be it's contents
1259                                         target_i = target.children.length
1260                                         break
1261                                 # 4. If there is no last table, then let adjusted insertion
1262                                 # location be inside the first element in the stack of open
1263                                 # elements (the html element), after its last child (if any),
1264                                 # and abort these substeps. (fragment case)
1265                                 if last_table is null
1266                                         # this is odd
1267                                         target = open_els[open_els.length - 1]
1268                                         target_i = target.children.length
1269                                         break
1270                                 # 5. If last table has a parent element, then let adjusted
1271                                 # insertion location be inside last table's parent element,
1272                                 # immediately before last table, and abort these substeps.
1273                                 if last_table.parent?
1274                                         for c, i in last_table.parent.children
1275                                                 if c is last_table
1276                                                         target = last_table.parent
1277                                                         target_i = i
1278                                                         break
1279                                         break
1280                                 # 6. Let previous element be the element immediately above last
1281                                 # table in the stack of open elements.
1282                                 #
1283                                 # huh? how could it not have a parent?
1284                                 previous_element = open_els[last_table_i + 1]
1285                                 # 7. Let adjusted insertion location be inside previous
1286                                 # element, after its last child (if any).
1287                                 target = previous_element
1288                                 target_i = target.children.length
1289                                 # Note: These steps are involved in part because it's possible
1290                                 # for elements, the table element in this case in particular,
1291                                 # to have been moved by a script around in the DOM, or indeed
1292                                 # removed from the DOM entirely, after the element was inserted
1293                                 # by the parser.
1294                                 break # don't really loop
1295                 else
1296                         # Otherwise Let adjusted insertion location be inside target, after
1297                         # its last child (if any).
1298                         target_i = target.children.length
1299
1300                 # 3. If the adjusted insertion location is inside a template element,
1301                 # let it instead be inside the template element's template contents,
1302                 # after its last child (if any).
1303                 # fixfull (template)
1304
1305                 # 4. Return the adjusted insertion location.
1306                 return [target, target_i]
1307
1308         # http://www.w3.org/TR/html5/syntax.html#create-an-element-for-the-token
1309         # aka create_an_element_for_token
1310         token_to_element = (t, namespace, intended_parent) ->
1311                 # convert attributes into a hash
1312                 attrs = {}
1313                 for a in t.attrs_a
1314                         attrs[a[0]] = a[1] # TODO check what to do with dupilcate attrs
1315                 el = new Node TYPE_TAG, name: t.name, namespace: namespace, attrs: attrs, token: t
1316
1317                 # TODO 2. If the newly created element has an xmlns attribute in the
1318                 # XMLNS namespace whose value is not exactly the same as the element's
1319                 # namespace, that is a parse error. Similarly, if the newly created
1320                 # element has an xmlns:xlink attribute in the XMLNS namespace whose
1321                 # value is not the XLink Namespace, that is a parse error.
1322
1323                 # fixfull: the spec says stuff about form pointers and ownerDocument
1324
1325                 return el
1326
1327         # http://www.w3.org/TR/html5/syntax.html#insert-a-foreign-element
1328         insert_foreign_element = (token, namespace) ->
1329                 ail = adjusted_insertion_location()
1330                 ail_el = ail[0]
1331                 ail_i = ail[1]
1332                 el = token_to_element token, namespace, ail_el
1333                 # TODO skip this next step if it's broken (eg ail_el is document with child already)
1334                 el.parent = ail_el
1335                 ail_el.children.splice ail_i, 0, el
1336                 open_els.unshift el
1337                 return el
1338         # http://www.w3.org/TR/html5/syntax.html#insert-an-html-element
1339         insert_html_element = (token) ->
1340                 insert_foreign_element token, NS_HTML
1341
1342         # http://www.w3.org/TR/html5/syntax.html#insert-a-comment
1343         # position should be [node, index_within_children]
1344         insert_comment = (t, position = null) ->
1345                 position ?= adjusted_insertion_location()
1346                 position[0].children.splice position[1], 0, t
1347
1348         # 8.2.5.2
1349         # http://www.w3.org/TR/html5/syntax.html#generic-raw-text-element-parsing-algorithm
1350         parse_generic_raw_text = (t) ->
1351                 insert_html_element t
1352                 tok_state = tok_state_rawtext
1353                 original_ins_mode = ins_mode
1354                 ins_mode = ins_mode_text
1355         parse_generic_rcdata_text = (t) ->
1356                 insert_html_element t
1357                 tok_state = tok_state_rcdata
1358                 original_ins_mode = ins_mode
1359                 ins_mode = ins_mode_text
1360
1361         # 8.2.5.3 http://www.w3.org/TR/html5/syntax.html#closing-elements-that-have-implied-end-tags
1362         # http://www.w3.org/TR/html5/syntax.html#generate-implied-end-tags
1363         generate_implied_end_tags = (except = null) ->
1364                 while end_tag_implied[open_els[0].name] is open_els[0].namespace and open_els[0].name isnt except
1365                         open_els.shift()
1366
1367         # 8.2.5.4 The rules for parsing tokens in HTML content
1368         # http://www.w3.org/TR/html5/syntax.html#parsing-main-inhtml
1369
1370         # 8.2.5.4.1 The "initial" insertion mode
1371         # http://www.w3.org/TR/html5/syntax.html#the-initial-insertion-mode
1372         ins_mode_initial = (t) ->
1373                 if is_space_tok t
1374                         return
1375                 if t.type is TYPE_COMMENT
1376                         # ?fixfull
1377                         doc.children.push t
1378                         return
1379                 if t.type is TYPE_DOCTYPE
1380                         # FIXME check identifiers, set quirks, etc
1381                         # fixfull
1382                         doc.children.push t
1383                         ins_mode = ins_mode_before_html
1384                         return
1385                 # Anything else
1386                 #fixfull (iframe, quirks)
1387                 ins_mode = ins_mode_before_html
1388                 process_token t
1389                 return
1390
1391         # 8.2.5.4.2 http://www.w3.org/TR/html5/syntax.html#the-before-html-insertion-mode
1392         ins_mode_before_html = (t) ->
1393                 if t.type is TYPE_DOCTYPE
1394                         parse_error()
1395                         return
1396                 if t.type is TYPE_COMMENT
1397                         doc.children.push t
1398                         return
1399                 if is_space_tok t
1400                         return
1401                 if t.type is TYPE_START_TAG and t.name is 'html'
1402                         el = token_to_element t, NS_HTML, doc
1403                         doc.children.push el
1404                         open_els.unshift(el)
1405                         # fixfull (big paragraph in spec about manifest, fragment, urls, etc)
1406                         ins_mode = ins_mode_before_head
1407                         return
1408                 if t.type is TYPE_END_TAG
1409                         if t.name is 'head' or t.name is 'body' or t.name is 'html' or t.name is 'br'
1410                                 # fall through to "anything else"
1411                         else
1412                                 parse_error()
1413                                 return
1414                 # Anything else
1415                 html_tok = new_open_tag 'html'
1416                 el = token_to_element html_tok, NS_HTML, doc
1417                 doc.children.push el
1418                 open_els.unshift el
1419                 # ?fixfull browsing context
1420                 ins_mode = ins_mode_before_head
1421                 process_token t
1422                 return
1423
1424         # 8.2.5.4.3 http://www.w3.org/TR/html5/syntax.html#the-before-head-insertion-mode
1425         ins_mode_before_head = (t) ->
1426                 if is_space_tok t
1427                         return
1428                 if t.type is TYPE_COMMENT
1429                         insert_comment t
1430                         return
1431                 if t.type is TYPE_DOCTYPE
1432                         parse_error()
1433                         return
1434                 if t.type is TYPE_START_TAG and t.name is 'html'
1435                         ins_mode_in_body t
1436                         return
1437                 if t.type is TYPE_START_TAG and t.name is 'head'
1438                         el = insert_html_element t
1439                         head_element_pointer = el
1440                         ins_mode = ins_mode_in_head
1441                         return
1442                 if t.type is TYPE_END_TAG
1443                         if t.name is 'head' or t.name is 'body' or t.name is 'html' or t.name is 'br'
1444                                 # fall through to Anything else below
1445                         else
1446                                 parse_error()
1447                                 return
1448                 # Anything else
1449                 head_tok = new_open_tag 'head'
1450                 el = insert_html_element head_tok
1451                 head_element_pointer = el
1452                 ins_mode = ins_mode_in_head
1453                 process_token t
1454
1455         # 8.2.5.4.4 http://www.w3.org/TR/html5/syntax.html#parsing-main-inhead
1456         ins_mode_in_head_else = (t) -> # factored out for same-as-spec flow control
1457                 open_els.shift() # spec says this will be a 'head' node
1458                 ins_mode = ins_mode_after_head
1459                 process_token t
1460         ins_mode_in_head = (t) ->
1461                 if t.type is TYPE_TEXT and (t.text is "\t" or t.text is "\n" or t.text is "\u000c" or t.text is ' ')
1462                         insert_character t
1463                         return
1464                 if t.type is TYPE_COMMENT
1465                         insert_comment t
1466                         return
1467                 if t.type is TYPE_DOCTYPE
1468                         parse_error()
1469                         return
1470                 if t.type is TYPE_START_TAG and t.name is 'html'
1471                         ins_mode_in_body t
1472                         return
1473                 if t.type is TYPE_START_TAG and (t.name is 'base' or t.name is 'basefont' or t.name is 'bgsound' or t.name is 'link')
1474                         el = insert_html_element t
1475                         open_els.shift()
1476                         t.acknowledge_self_closing()
1477                         return
1478                 if t.type is TYPE_START_TAG and t.name is 'meta'
1479                         el = insert_html_element t
1480                         open_els.shift()
1481                         t.acknowledge_self_closing()
1482                         # fixfull encoding stuff
1483                         return
1484                 if t.type is TYPE_START_TAG and t.name is 'title'
1485                         parse_generic_rcdata_text t
1486                         return
1487                 if t.type is TYPE_START_TAG and ((t.name is 'noscript' and flag_scripting) or t.name is 'noframes' or t.name is 'style')
1488                         parse_generic_raw_text t
1489                         return
1490                 if t.type is TYPE_START_TAG and t.name is 'noscript' and flag_scripting is false
1491                         insert_html_element t
1492                         ins_mode = ins_mode_in_head_noscript
1493                         return
1494                 if t.type is TYPE_START_TAG and t.name is 'script'
1495                         ail = adjusted_insertion_location()
1496                         el = token_to_element t, NS_HTML, ail
1497                         el.flag 'parser-inserted', true
1498                         # fixfull frament case
1499                         ail[0].children.splice ail[1], 0, el
1500                         open_els.unshift el
1501                         tok_state = tok_state_script_data
1502                         original_ins_mode = ins_mode # make sure orig... is defined
1503                         ins_mode = ins_mode_text
1504                         return
1505                 if t.type is TYPE_END_TAG and t.name is 'head'
1506                         open_els.shift() # will be a head element... spec says so
1507                         ins_mode = ins_mode_after_head
1508                         return
1509                 if t.type is TYPE_END_TAG and (t.name is 'body' or t.name is 'html' or t.name is 'br')
1510                         ins_mode_in_head_else t
1511                         return
1512                 if t.type is TYPE_START_TAG and t.name is 'template'
1513                         insert_html_element t
1514                         afe_push_marker()
1515                         flag_frameset_ok = false
1516                         ins_mode = ins_mode_in_template
1517                         template_ins_modes.unshift ins_mode_in_template
1518                         return
1519                 if t.type is TYPE_END_TAG and t.name is 'template'
1520                         if template_tag_is_open()
1521                                 generate_implied_end_tags
1522                                 if open_els[0].name isnt 'template'
1523                                         parse_error()
1524                                 loop
1525                                         el = open_els.shift()
1526                                         if el.name is 'template' and el.namespace is NS_HTML
1527                                                 break
1528                                 clear_afe_to_marker()
1529                                 template_ins_modes.shift()
1530                                 reset_ins_mode()
1531                         else
1532                                 parse_error()
1533                         return
1534                 if (t.type is TYPE_START_TAG and t.name is 'head') or t.type is TYPE_END_TAG
1535                         parse_error()
1536                         return
1537                 ins_mode_in_head_else t
1538
1539         # 8.2.5.4.5 http://www.w3.org/TR/html5/syntax.html#parsing-main-inheadnoscript
1540         ins_mode_in_head_noscript_else = (t) ->
1541                 parse_error()
1542                 open_els.shift()
1543                 ins_mode = ins_mode_in_head
1544                 process_token t
1545         ins_mode_in_head_noscript = (t) ->
1546                 if t.type is TYPE_DOCTYPE
1547                         parse_error()
1548                         return
1549                 if t.type is TYPE_START_TAG and t.name is 'html'
1550                         ins_mode_in_body t
1551                         return
1552                 if t.type is TYPE_END_TAG and t.name is 'noscript'
1553                         open_els.shift()
1554                         ins_mode = ins_mode_in_head
1555                         return
1556                 if is_space_tok(t) or t.type is TYPE_COMMENT or (t.type is TYPE_START_TAG and (t.name is 'basefont' or t.name is 'bgsound' or t.name is 'link' or t.name is 'meta' or t.name is 'noframes' or t.name is 'style'))
1557                         ins_mode_in_head t
1558                         return
1559                 if t.type is TYPE_END_TAG and t.name is 'br'
1560                         ins_mode_in_head_noscript_else t
1561                         return
1562                 if (t.type is TYPE_START_TAG and (t.name is 'head' or t.name is 'noscript')) or t.type is TYPE_END_TAG
1563                         parse_error()
1564                         return
1565                 # Anything else
1566                 ins_mode_in_head_noscript_else t
1567                 return
1568
1569
1570
1571         # 8.2.5.4.6 http://www.w3.org/TR/html5/syntax.html#the-after-head-insertion-mode
1572         ins_mode_after_head_else = (t) ->
1573                 body_tok = new_open_tag 'body'
1574                 insert_html_element body_tok
1575                 ins_mode = ins_mode_in_body
1576                 process_token t
1577                 return
1578         ins_mode_after_head = (t) ->
1579                 if is_space_tok t
1580                         insert_character t
1581                         return
1582                 if t.type is TYPE_COMMENT
1583                         insert_comment t
1584                         return
1585                 if t.type is TYPE_DOCTYPE
1586                         parse_error()
1587                         return
1588                 if t.type is TYPE_START_TAG and t.name is 'html'
1589                         ins_mode_in_body t
1590                         return
1591                 if t.type is TYPE_START_TAG and t.name is 'body'
1592                         insert_html_element t
1593                         flag_frameset_ok = false
1594                         ins_mode = ins_mode_in_body
1595                         return
1596                 if t.type is TYPE_START_TAG and t.name is 'frameset'
1597                         insert_html_element t
1598                         ins_mode = ins_mode_in_frameset
1599                         return
1600                 if t.type is TYPE_START_TAG and (t.name is 'base' or t.name is 'basefont' or t.name is 'bgsound' or t.name is 'link' or t.name is 'meta' or t.name is 'noframes' or t.name is 'script' or t.name is 'style' or t.name is 'template' or t.name is 'title')
1601                         parse_error()
1602                         open_els.unshift head_element_pointer
1603                         ins_mode_in_head t
1604                         for el, i of open_els
1605                                 if el is head_element_pointer
1606                                         open_els.splice i, 1
1607                                         return
1608                         console.log "warning: 23904 couldn't find head element in open_els"
1609                         return
1610                 if t.type is TYPE_END_TAG and t.name is 'template'
1611                         ins_mode_in_head t
1612                         return
1613                 if t.type is TYPE_END_TAG and (t.name is 'body' or t.name is 'html' or t.name is 'br')
1614                         ins_mode_after_head_else t
1615                         return
1616                 if (t.type is TYPE_START_TAG and t.name is 'head') or t.type is TYPE_END_TAG
1617                         parse_error()
1618                         return
1619                 # Anything else
1620                 ins_mode_after_head_else t
1621
1622         # 8.2.5.4.7 http://www.w3.org/TR/html5/syntax.html#parsing-main-inbody
1623         in_body_any_other_end_tag = (name) -> # factored out because adoption agency calls it
1624                 for el, i in open_els
1625                         if el.name is name and el.namespace is NS_HTML
1626                                 generate_implied_end_tags name # arg is exception
1627                                 parse_error() unless i is 0
1628                                 while i >= 0
1629                                         open_els.shift()
1630                                         i -= 1
1631                                 return
1632                         if special_elements[el.name] is el.namespace
1633                                 parse_error()
1634                                 return
1635                 return
1636         ins_mode_in_body = (t) ->
1637                 if t.type is TYPE_TEXT and t.text is "\u0000"
1638                         parse_error()
1639                         return
1640                 if is_space_tok t
1641                         reconstruct_afe()
1642                         insert_character t
1643                         return
1644                 if t.type is TYPE_TEXT
1645                         reconstruct_afe()
1646                         insert_character t
1647                         flag_frameset_ok = false
1648                         return
1649                 if t.type is TYPE_COMMENT
1650                         insert_comment t
1651                         return
1652                 if t.type is TYPE_DOCTYPE
1653                         parse_error()
1654                         return
1655                 if t.type is TYPE_START_TAG and t.name is 'html'
1656                         parse_error()
1657                         return if template_tag_is_open()
1658                         root_attrs = open_els[open_els.length - 1].attrs
1659                         for a of t.attrs_a
1660                                 root_attrs[a[0]] = a[1] unless root_attrs[a[0]]?
1661                         return
1662
1663                 if (t.type is TYPE_START_TAG and (t.name is 'base' or t.name is 'basefont' or t.name is 'bgsound' or t.name is 'link' or t.name is 'meta' or t.name is 'noframes' or t.name is 'script' or t.name is 'style' or t.name is 'template' or t.name is 'title')) or (t.type is TYPE_END_TAG and t.name is 'template')
1664                         ins_mode_in_head t
1665                         return
1666                 if t.type is TYPE_START_TAG and t.name is 'body'
1667                         parse_error()
1668                         return if open_els.length < 2
1669                         second = open_els[open_els.length - 2]
1670                         return unless second.namespace is NS_HTML
1671                         return unless second.name is 'body'
1672                         return if template_tag_is_open()
1673                         flag_frameset_ok = false
1674                         for a of t.attrs_a
1675                                 second.attrs[a[0]] = a[1] unless second.attrs[a[0]]?
1676                         return
1677                 if t.type is TYPE_START_TAG and t.name is 'frameset'
1678                         parse_error()
1679                         return if open_els.length < 2
1680                         second_i = open_els.length - 2
1681                         second = open_els[second_i]
1682                         return unless second.namespace is NS_HTML
1683                         return unless second.name is 'body'
1684                         if flag_frameset_ok is false
1685                                 return
1686                         if second.parent?
1687                                 for el, i in second.parent.children
1688                                         if el is second
1689                                                 second.parent.children.splice i, 1
1690                                                 break
1691                         open_els.splice second_i, 1
1692                         # pop everything except the "root html element"
1693                         while open_els.length > 1
1694                                 open_els.shift()
1695                         insert_html_element t
1696                         ins_mode = ins_mode_in_frameset
1697                         return
1698                 if t.type is TYPE_EOF
1699                         ok_tags = {
1700                                 dd:NS_HTML, dt:NS_HTML, li:NS_HTML, p:NS_HTML, tbody:NS_HTML,
1701                                 td:NS_HTML, tfoot:NS_HTML, th:NS_HTML, thead:NS_HTML,
1702                                 tr:NS_HTML, body:NS_HTML, html:NS_HTML,
1703                         }
1704                         for el in open_els
1705                                 unless ok_tags[t.name] is el.namespace
1706                                         parse_error()
1707                                         break
1708                         if template_ins_modes.length > 0
1709                                 ins_mode_in_template t
1710                         else
1711                                 stop_parsing()
1712                         return
1713                 if t.type is TYPE_END_TAG and t.name is 'body'
1714                         unless is_in_scope 'body', NS_HTML
1715                                 parse_error()
1716                                 return
1717                         ok_tags = {
1718                                 dd:NS_HTML, dt:NS_HTML, li:NS_HTML, optgroup:NS_HTML,
1719                                 option:NS_HTML, p:NS_HTML, rb:NS_HTML, rp:NS_HTML, rt:NS_HTML,
1720                                 rtc:NS_HTML, tbody:NS_HTML, td:NS_HTML, tfoot:NS_HTML,
1721                                 th:NS_HTML, thead:NS_HTML, tr:NS_HTML, body:NS_HTML,
1722                                 html:NS_HTML
1723                         }
1724                         for el in open_els
1725                                 unless ok_tags[t.name] is el.namespace
1726                                         parse_error()
1727                                         break
1728                         ins_mode = ins_mode_after_body
1729                         return
1730                 if t.type is TYPE_END_TAG and t.name is 'html'
1731                         unless is_in_scope 'body', NS_HTML
1732                                 parse_error()
1733                                 return
1734                         ok_tags = {
1735                                 dd:NS_HTML, dt:NS_HTML, li:NS_HTML, optgroup:NS_HTML,
1736                                 option:NS_HTML, p:NS_HTML, rb:NS_HTML, rp:NS_HTML, rt:NS_HTML,
1737                                 rtc:NS_HTML, tbody:NS_HTML, td:NS_HTML, tfoot:NS_HTML,
1738                                 th:NS_HTML, thead:NS_HTML, tr:NS_HTML, body:NS_HTML,
1739                                 html:NS_HTML
1740                         }
1741                         for el in open_els
1742                                 unless ok_tags[t.name] is el.namespace
1743                                         parse_error()
1744                                         break
1745                         ins_mode = ins_mode_after_body
1746                         process_token t
1747                         return
1748                 if t.type is TYPE_START_TAG and (t.name is 'address' or t.name is 'article' or t.name is 'aside' or t.name is 'blockquote' or t.name is 'center' or t.name is 'details' or t.name is 'dialog' or t.name is 'dir' or t.name is 'div' or t.name is 'dl' or t.name is 'fieldset' or t.name is 'figcaption' or t.name is 'figure' or t.name is 'footer' or t.name is 'header' or t.name is 'hgroup' or t.name is 'main' or t.name is 'nav' or t.name is 'ol' or t.name is 'p' or t.name is 'section' or t.name is 'summary' or t.name is 'ul')
1749                         close_p_if_in_button_scope()
1750                         insert_html_element t
1751                         return
1752                 if t.type is TYPE_START_TAG and h_tags[t.name]?
1753                         close_p_if_in_button_scope()
1754                         if h_tags[open_els[0].name] is open_els[0].namespace
1755                                 parse_error()
1756                                 open_els.shift()
1757                         insert_html_element t
1758                         return
1759                 if t.type is TYPE_START_TAG and (t.name is 'pre' or t.name is 'listing')
1760                         close_p_if_in_button_scope()
1761                         insert_html_element t
1762                         # spec: If the next token is a "LF" (U+000A) character token, then
1763                         # ignore that token and move on to the next one. (Newlines at the
1764                         # start of pre blocks are ignored as an authoring convenience.)
1765                         if txt.charAt(cur) is "\u000a" # FIXME check for crlf?
1766                                 cur += 1
1767                         flag_frameset_ok = false
1768                         return
1769                 if t.type is TYPE_START_TAG and t.name is 'form'
1770                         unless form_element_pointer is null or template_tag_is_open()
1771                                 parse_error()
1772                                 return
1773                         close_p_if_in_button_scope()
1774                         el = insert_html_element t
1775                         unless template_tag_is_open()
1776                                 form_element_pointer = el
1777                         return
1778                 if t.type is TYPE_START_TAG and t.name is 'li'
1779                         flag_frameset_ok = false
1780                         for node in open_els
1781                                 if node.name is 'li' and node.namespace is NS_HTML
1782                                         generate_implied_end_tags 'li' # arg is exception
1783                                         if open_els[0].name isnt 'li' or open_els[0].namespace isnt NS_HTML
1784                                                 parse_error()
1785                                         loop
1786                                                 el = open_els.shift()
1787                                                 if el.name is 'li' and el.namespace is NS_HTML
1788                                                         break
1789                                         break
1790                                 if el_is_special_not_adp node
1791                                                 break
1792                         close_p_if_in_button_scope()
1793                         insert_html_element t
1794                         return
1795                 if t.type is TYPE_START_TAG and (t.name is 'dd' or t.name is 'dt')
1796                         flag_frameset_ok = false
1797                         for node in open_els
1798                                 if node.name is 'dd' and node.namespace is NS_HTML
1799                                         generate_implied_end_tags 'dd' # arg is exception
1800                                         if open_els[0].name isnt 'dd' or open_els[0].namespace isnt NS_HTML
1801                                                 parse_error()
1802                                         loop
1803                                                 el = open_els.shift()
1804                                                 if el.name is 'dd' and el.namespace is NS_HTML
1805                                                         break
1806                                         break
1807                                 if node.name is 'dt' and node.namespace is NS_HTML
1808                                         generate_implied_end_tags 'dt' # arg is exception
1809                                         if open_els[0].name isnt 'dt' or open_els[0].namespace isnt NS_HTML
1810                                                 parse_error()
1811                                         loop
1812                                                 el = open_els.shift()
1813                                                 if el.name is 'dt' and el.namespace is NS_HTML
1814                                                         break
1815                                         break
1816                                 if el_is_special_not_adp node
1817                                         break
1818                         close_p_if_in_button_scope()
1819                         insert_html_element t
1820                         return
1821                 if t.type is TYPE_START_TAG and t.name is 'plaintext'
1822                         close_p_if_in_button_scope()
1823                         insert_html_element t
1824                         tok_state = tok_state_plaintext
1825                         return
1826                 if t.type is TYPE_START_TAG and t.name is 'button'
1827                         if is_in_scope 'button', NS_HTML
1828                                 parse_error()
1829                                 generate_implied_end_tags()
1830                                 loop
1831                                         el = open_els.shift()
1832                                         if el.name is 'button' and el.namespace is NS_HTML
1833                                                 break
1834                         reconstruct_afe()
1835                         insert_html_element t
1836                         flag_frameset_ok = false
1837                         return
1838                 if t.type is TYPE_END_TAG and (t.name is 'address' or t.name is 'article' or t.name is 'aside' or t.name is 'blockquote' or t.name is 'button' or t.name is 'center' or t.name is 'details' or t.name is 'dialog' or t.name is 'dir' or t.name is 'div' or t.name is 'dl' or t.name is 'fieldset' or t.name is 'figcaption' or t.name is 'figure' or t.name is 'footer' or t.name is 'header' or t.name is 'hgroup' or t.name is 'listing' or t.name is 'main' or t.name is 'nav' or t.name is 'ol' or t.name is 'pre' or t.name is 'section' or t.name is 'summary' or t.name is 'ul')
1839                         unless is_in_scope t.name, NS_HTML
1840                                 parse_error()
1841                                 return
1842                         generate_implied_end_tags()
1843                         unless open_els[0].name is t.name and open_els[0].namespace is NS_HTML
1844                                 parse_error()
1845                         loop
1846                                 el = open_els.shift()
1847                                 if el.name is t.name and el.namespace is NS_HTML
1848                                         return
1849                         return
1850                 if t.type is TYPE_END_TAG and t.name is 'form'
1851                         unless template_tag_is_open()
1852                                 node = form_element_pointer
1853                                 form_element_pointer = null
1854                                 if node is null or not el_is_in_scope node
1855                                         parse_error()
1856                                         return
1857                                 generate_implied_end_tags()
1858                                 if open_els[0] isnt node
1859                                         parse_error()
1860                                 for el, i in open_els
1861                                         if el is node
1862                                                 open_els.splice i, 1
1863                                                 break
1864                         else
1865                                 unless is_in_scope 'form', NS_HTML
1866                                         parse_error()
1867                                         return
1868                                 generate_implied_end_tags()
1869                                 if open_els[0].name isnt 'form' or open_els[0].namespace isnt NS_HTML
1870                                         parse_error()
1871                                 loop
1872                                         el = open_els.shift()
1873                                         if el.name is 'form' and el.namespace is NS_HTML
1874                                                 break
1875                         return
1876                 if t.type is TYPE_END_TAG and t.name is 'p'
1877                         unless is_in_button_scope 'p', NS_HTML
1878                                 parse_error()
1879                                 insert_html_element new_open_tag 'p'
1880                         close_p_element()
1881                         return
1882                 if t.type is TYPE_END_TAG and t.name is 'li'
1883                         unless is_in_li_scope 'li', NS_HTML
1884                                 parse_error()
1885                                 return
1886                         generate_implied_end_tags 'li' # arg is exception
1887                         if open_els[0].name isnt 'li' or open_els[0].namespace isnt NS_HTML
1888                                 parse_error()
1889                         loop
1890                                 el = open_els.shift()
1891                                 if el.name is 'li' and el.namespace is NS_HTML
1892                                         break
1893                         return
1894                 if t.type is TYPE_END_TAG and (t.name is 'dd' or t.name is 'dt')
1895                         unless is_in_scope t.name, NS_HTML
1896                                 parse_error()
1897                                 return
1898                         generate_implied_end_tags t.name # arg is exception
1899                         if open_els[0].name isnt t.name or open_els[0].namespace isnt NS_HTML
1900                                 parse_error()
1901                         loop
1902                                 el = open_els.shift()
1903                                 if el.name is t.name and el.namespace is NS_HTML
1904                                         break
1905                         return
1906                 if t.type is TYPE_END_TAG and h_tags[t.name]?
1907                         h_in_scope = false
1908                         for el in open_els
1909                                 if h_tags[el.name] is el.namespace
1910                                         h_in_scope = true
1911                                         break
1912                                 if standard_scopers[el.name] is el.namespace
1913                                         break
1914                         unless h_in_scope
1915                                 parse_error()
1916                                 return
1917                         generate_implied_end_tags()
1918                         if open_els[0].name isnt t.name or open_els[0].namespace isnt NS_HTML
1919                                 parse_error()
1920                         loop
1921                                 el = open_els.shift()
1922                                 if h_tags[el.name] is el.namespace
1923                                         break
1924                         return
1925                 # deep breath!
1926                 if t.type is TYPE_START_TAG and t.name is 'a'
1927                         # If the list of active formatting elements contains an a element
1928                         # between the end of the list and the last marker on the list (or
1929                         # the start of the list if there is no marker on the list), then
1930                         # this is a parse error; run the adoption agency algorithm for the
1931                         # tag name "a", then remove that element from the list of active
1932                         # formatting elements and the stack of open elements if the
1933                         # adoption agency algorithm didn't already remove it (it might not
1934                         # have if the element is not in table scope).
1935                         found = false
1936                         for el in afe
1937                                 if el.type is TYPE_AFE_MARKER
1938                                         break
1939                                 if el.name is 'a' and el.namespace is NS_HTML
1940                                         found = el
1941                         if found?
1942                                 parse_error()
1943                                 adoption_agency 'a'
1944                                 for el, i in afe
1945                                         if el is found
1946                                                 afe.splice i, 1
1947                                 for el, i in open_els
1948                                         if el is found
1949                                                 open_els.splice i, 1
1950                         reconstruct_afe()
1951                         el = insert_html_element t
1952                         afe_push el
1953                         return
1954                 if t.type is TYPE_START_TAG and (t.name is 'b' or t.name is 'big' or t.name is 'code' or t.name is 'em' or t.name is 'font' or t.name is 'i' or t.name is 's' or t.name is 'small' or t.name is 'strike' or t.name is 'strong' or t.name is 'tt' or t.name is 'u')
1955                         reconstruct_afe()
1956                         el = insert_html_element t
1957                         afe_push el
1958                         return
1959                 if t.type is TYPE_START_TAG and t.name is 'nobr'
1960                         reconstruct_afe()
1961                         el = insert_html_element t
1962                         afe_push el
1963                         return
1964                 if t.type is TYPE_END_TAG and (t.name is 'a' or t.name is 'b' or t.name is 'big' or t.name is 'code' or t.name is 'em' or t.name is 'font' or t.name is 'i' or t.name is 'nobr' or t.name is 's' or t.name is 'small' or t.name is 'strike' or t.name is 'strong' or t.name is 'tt' or t.name is 'u')
1965                         adoption_agency t.name
1966                         return
1967                 if t.type is TYPE_START_TAG and (t.name is 'applet' or t.name is 'marquee' or t.name is 'object')
1968                         reconstruct_afe()
1969                         insert_html_element t
1970                         afe_push_marker()
1971                         flag_frameset_ok = false
1972                         return
1973                 if t.type is TYPE_END_TAG and (t.name is 'applet' or t.name is 'marquee' or t.name is 'object')
1974                         unless is_in_scope t.name, NS_HTML
1975                                 parse_error()
1976                                 return
1977                         generate_implied_end_tags()
1978                         if open_els[0].name isnt t.name or open_els[0].namespace isnt NS_HTML
1979                                 parse_error()
1980                         loop
1981                                 el = open_els.shift()
1982                                 if el.name is t.name and el.namespace is NS_HTML
1983                                         break
1984                         clear_afe_to_marker()
1985                         return
1986                 if t.type is TYPE_START_TAG and t.name is 'table'
1987                         close_p_if_in_button_scope() # fixfull quirksmode thing
1988                         insert_html_element t
1989                         flag_frameset_ok = false
1990                         ins_mode = ins_mode_in_table
1991                         return
1992                 if t.type is TYPE_END_TAG and t.name is 'br'
1993                         parse_error()
1994                         t.type is TYPE_START_TAG
1995                         # fall through
1996                 if t.type is TYPE_START_TAG and (t.name is 'area' or t.name is 'br' or t.name is 'embed' or t.name is 'img' or t.name is 'keygen' or t.name is 'wbr')
1997                         reconstruct_afe()
1998                         insert_html_element t
1999                         open_els.shift()
2000                         t.acknowledge_self_closing()
2001                         flag_frameset_ok = false
2002                         return
2003                 if t.type is TYPE_START_TAG and t.name is 'input'
2004                         reconstruct_afe()
2005                         insert_html_element t
2006                         open_els.shift()
2007                         t.acknowledge_self_closing()
2008                         unless is_input_hidden_tok t
2009                                 flag_frameset_ok = false
2010                         return
2011                 if t.type is TYPE_START_TAG and (t.name is 'param' or t.name is 'source' or t.name is 'track')
2012                         insert_html_element t
2013                         open_els.shift()
2014                         t.acknowledge_self_closing()
2015                         return
2016                 if t.type is TYPE_START_TAG and t.name is 'hr'
2017                         close_p_if_in_button_scope()
2018                         insert_html_element t
2019                         open_els.shift()
2020                         t.acknowledge_self_closing()
2021                         flag_frameset_ok = false
2022                         return
2023                 if t.type is TYPE_START_TAG and t.name is 'image'
2024                         parse_error()
2025                         t.name = 'img'
2026                         process_token t
2027                         return
2028                 if t.type is TYPE_START_TAG and t.name is 'isindex'
2029                         parse_error()
2030                         if template_tag_is_open() is false and form_element_pointer isnt null
2031                                 return
2032                         t.acknowledge_self_closing()
2033                         flag_frameset_ok = false
2034                         close_p_if_in_button_scope()
2035                         el = insert_html_element new_open_tag 'form'
2036                         unless template_tag_is_open()
2037                                 form_element_pointer = el
2038                         for a in t.attrs_a
2039                                 if a[0] is 'action'
2040                                         el.attrs['action'] = a[1]
2041                                         break
2042                         insert_html_element new_open_tag 'hr'
2043                         open_els.shift()
2044                         reconstruct_afe()
2045                         insert_html_element new_open_tag 'label'
2046                         # note: this is a little out-of-spec-order so we only have to scan t.attrs_a once
2047                         input_el = new_open_tag 'input'
2048                         prompt = null
2049                         for a in t.attrs_a
2050                                 if a[0] is 'prompt'
2051                                         prompt = a[1]
2052                                 if a[0] isnt 'name' and a[0] isnt 'action' and a[0] isnt 'prompt'
2053                                         input_el.attrs_a.push [a[0], a[1]]
2054                         input_el.attrs_a.push ['name', 'isindex']
2055                         # fixfull this next bit is in english... internationalize?
2056                         prompt ?= "This is a searchable index. Enter search keywords: "
2057                         insert_character new_character_token prompt # fixfull split
2058                         # TODO submit typo "balue" in spec
2059                         insert_html_element input_el
2060                         open_els.shift()
2061                         # insert_character '' # you can put chars here if promt attr missing
2062                         open_els.shift()
2063                         insert_html_element new_open_tag 'hr'
2064                         open_els.shift()
2065                         open_els.shift()
2066                         unless template_tag_is_open()
2067                                 form_element_pointer = null
2068                         return
2069                 if t.type is TYPE_START_TAG and t.name is 'textarea'
2070                         insert_html_element t
2071                         if txt.charAt(cur) is "\u000a" # FIXME check for crlf?
2072                                 cur += 1
2073                         tok_state = tok_state_rcdata
2074                         original_ins_mode = ins_mode
2075                         flag_frameset_ok = false
2076                         ins_mode = ins_mode_text
2077                         return
2078                 if t.type is TYPE_START_TAG and t.name is 'xmp'
2079                         close_p_if_in_button_scope()
2080                         reconstruct_afe()
2081                         flag_frameset_ok = false
2082                         parse_generic_raw_text t
2083                         return
2084                 if t.type is TYPE_START_TAG and t.name is 'iframe'
2085                         flag_frameset_ok = false
2086                         parse_generic_raw_text t
2087                         return
2088                 if t.type is TYPE_START_TAG and (t.name is 'noembed' or (t.name is 'noscript' and flag_scripting))
2089                         parse_generic_raw_text t
2090                         return
2091                 if t.type is TYPE_START_TAG and t.name is 'select'
2092                         reconstruct_afe()
2093                         insert_html_element t
2094                         flag_frameset_ok = false
2095                         if ins_mode is ins_mode_in_table or ins_mode is ins_mode_in_caption or ins_mode is ins_mode_in_table_body or ins_mode is ins_mode_in_row or ins_mode is ins_mode_in_cell
2096                                 ins_mode = ins_mode_in_select_in_table
2097                         else
2098                                 ins_mode = ins_mode_in_select
2099                         return
2100                 if t.type is TYPE_START_TAG and (t.name is 'optgroup' or t.name is 'option')
2101                         if open_els[0].name is 'option' and open_els[0].namespace is NS_HTML
2102                                 open_els.shift()
2103                         reconstruct_afe()
2104                         insert_html_element t
2105                         return
2106 # this comment block implements the W3C spec
2107 #               if t.type is TYPE_START_TAG and (t.name is 'rb' or t.name is 'rp' or t.name is 'rtc')
2108 #                       if is_in_scope 'ruby', NS_HTML
2109 #                               generate_implied_end_tags()
2110 #                               unless open_els[0].name is 'ruby' and open_els[0].namespace is NS_HTML
2111 #                                       parse_error()
2112 #                       insert_html_element t
2113 #                       return
2114 #               if t.type is TYPE_START_TAG and t.name is 'rt'
2115 #                       if is_in_scope 'ruby', NS_HTML
2116 #                               generate_implied_end_tags 'rtc' # arg is exception
2117 #                               unless (open_els[0].name is 'ruby' or open_els[0].name is 'rtc') and open_els[0].namespace is NS_HTML
2118 #                                       parse_error()
2119 #                       insert_html_element t
2120 #                       return
2121 # below implements the WATWG spec https://html.spec.whatwg.org/multipage/syntax.html#parsing-main-inbody
2122                 if t.type is TYPE_START_TAG and (t.name is 'rb' or t.name is 'rtc')
2123                         if is_in_scope 'ruby', NS_HTML
2124                                 generate_implied_end_tags()
2125                                 unless open_els[0].name is 'ruby' and open_els[0].namespace is NS_HTML
2126                                         parse_error()
2127                         insert_html_element t
2128                         return
2129                 if t.type is TYPE_START_TAG and (t.name is 'rp' or t.name is 'rt')
2130                         if is_in_scope 'ruby', NS_HTML
2131                                 generate_implied_end_tags 'rtc'
2132                                 unless (open_els[0].name is 'ruby' or open_els[0].name is 'rtc') and open_els[0].namespace is NS_HTML
2133                                         parse_error()
2134                         insert_html_element t
2135                         return
2136 # end WATWG chunk
2137                 if t.type is TYPE_START_TAG and t.name is 'math'
2138                         reconstruct_afe()
2139                         adjust_mathml_attributes t
2140                         adjust_foreign_attributes t
2141                         insert_foreign_element t, NS_MATHML
2142                         if t.flag 'self-closing'
2143                                 open_els.shift()
2144                                 t.acknowledge_self_closing()
2145                         return
2146                 if t.type is TYPE_START_TAG and t.name is 'svg'
2147                         reconstruct_afe()
2148                         adjust_svg_attributes t
2149                         adjust_foreign_attributes t
2150                         insert_foreign_element t, NS_SVG
2151                         if t.flag 'self-closing'
2152                                 open_els.shift()
2153                                 t.acknowledge_self_closing()
2154                         return
2155                 if t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'frame' or t.name is 'head' or t.name is 'tbody' or t.name is 'td' or t.name is 'tfoot' or t.name is 'th' or t.name is 'thead' or t.name is 'tr')
2156                         parse_error()
2157                         return
2158                 if t.type is TYPE_START_TAG # any other start tag
2159                         reconstruct_afe()
2160                         insert_html_element t
2161                         return
2162                 if t.type is TYPE_END_TAG # any other end tag
2163                         in_body_any_other_end_tag t.name
2164                         return
2165                 return
2166
2167         # 8.2.5.4.8 http://www.w3.org/TR/html5/syntax.html#parsing-main-incdata
2168         ins_mode_text = (t) ->
2169                 if t.type is TYPE_TEXT
2170                         insert_character t
2171                         return
2172                 if t.type is TYPE_EOF
2173                         parse_error()
2174                         if open_els[0].name is 'script' and open_els[0].namespace is NS_HTML
2175                                 open_els[0].flag 'already started', true
2176                         open_els.shift()
2177                         ins_mode = original_ins_mode
2178                         process_token t
2179                         return
2180                 if t.type is TYPE_END_TAG and t.name is 'script'
2181                         open_els.shift()
2182                         ins_mode = original_ins_mode
2183                         # fixfull the spec seems to assume that I'm going to run the script
2184                         # http://www.w3.org/TR/html5/syntax.html#scriptEndTag
2185                         return
2186                 if t.type is TYPE_END_TAG
2187                         open_els.shift()
2188                         ins_mode = original_ins_mode
2189                         return
2190                 console.log 'warning: end of ins_mode_text reached'
2191
2192         # the functions below implement the tokenizer stats described here:
2193         # http://www.w3.org/TR/html5/syntax.html#tokenization
2194
2195         # 8.2.5.4.9 http://www.w3.org/TR/html5/syntax.html#parsing-main-intable
2196         ins_mode_in_table_else = (t) ->
2197                 parse_error()
2198                 flag_foster_parenting = true
2199                 ins_mode_in_body t
2200                 flag_foster_parenting = false
2201                 return
2202         ins_mode_in_table = (t) ->
2203                 switch t.type
2204                         when TYPE_TEXT
2205                                 if t.name is 'table' or t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead' or t.name is 'tr'
2206                                         original_ins_mode = ins_mode
2207                                         ins_mode = ins_mode_in_table_text
2208                                         process_token t
2209                                 else
2210                                         ins_mode_in_table_else t
2211                         when TYPE_COMMENT
2212                                 insert_comment t
2213                         when TYPE_DOCTYPE
2214                                 parse_error()
2215                         when TYPE_START_TAG
2216                                 switch t.name
2217                                         when 'caption'
2218                                                 clear_stack_to_table_context()
2219                                                 afe_push_marker()
2220                                                 insert_html_element t
2221                                                 ins_mode = ins_mode_in_caption
2222                                         when 'colgroup'
2223                                                 clear_stack_to_table_context()
2224                                                 insert_html_element t
2225                                                 ins_mode = ins_mode_in_column_group
2226                                         when 'col'
2227                                                 clear_stack_to_table_context()
2228                                                 insert_html_element new_open_tag 'colgroup'
2229                                                 ins_mode = ins_mode_in_column_group
2230                                                 process_token t
2231                                         when 'tbody', 'tfoot', 'thead'
2232                                                 clear_stack_to_table_context()
2233                                                 insert_html_element t
2234                                                 ins_mode = ins_mode_in_table_body
2235                                         when 'td', 'th', 'tr'
2236                                                 clear_stack_to_table_context()
2237                                                 insert_html_element new_open_tag 'tbody'
2238                                                 ins_mode = ins_mode_in_table_body
2239                                                 process_token t
2240                                         when 'table'
2241                                                 parse_error()
2242                                                 if is_in_table_scope 'table', NS_HTML
2243                                                         loop
2244                                                                 el = open_els.shift()
2245                                                                 if el.name is 'table' and el.namespace is NS_HTML
2246                                                                         break
2247                                                         reset_ins_mode()
2248                                                         process_token t
2249                                         when 'style', 'script', 'template'
2250                                                 ins_mode_in_head t
2251                                         when 'input'
2252                                                 unless is_input_hidden_tok t
2253                                                         ins_mode_in_table_else t
2254                                                 else
2255                                                         parse_error()
2256                                                         el = insert_html_element t
2257                                                         open_els.shift()
2258                                                         t.acknowledge_self_closing()
2259                                         when 'form'
2260                                                 parse_error()
2261                                                 if form_element_pointer?
2262                                                         return
2263                                                 if template_tag_is_open()
2264                                                         return
2265                                                 form_element_pointer = insert_html_element t
2266                                                 open_els.shift()
2267                                         else
2268                                                 ins_mode_in_table_else t
2269                         when TYPE_END_TAG
2270                                 switch t.name
2271                                         when 'table'
2272                                                 if is_in_table_scope 'table', NS_HTML
2273                                                         loop
2274                                                                 el = open_els.shift()
2275                                                                 if el.name is 'table' and el.namespace is NS_HTML
2276                                                                         break
2277                                                         reset_ins_mode()
2278                                                 else
2279                                                         parse_error()
2280                                         when 'body', 'caption', 'col', 'colgroup', 'html', 'tbody', 'td', 'tfoot', 'th', 'thead', 'tr'
2281                                                 parse_error()
2282                                         when 'template'
2283                                                 ins_mode_in_head t
2284                                         else
2285                                                 ins_mode_in_table_else t
2286                         when TYPE_EOF
2287                                 ins_mode_in_body t
2288                         else
2289                                 ins_mode_in_table_else t
2290
2291
2292         # 8.2.5.4.10 http://www.w3.org/TR/html5/syntax.html#parsing-main-intabletext
2293         ins_mode_in_table_text = (t) ->
2294                 if t.type is TYPE_TEXT and t.text is "\u0000"
2295                         # huh? I thought the tokenizer didn't emit these
2296                         parse_error()
2297                         return
2298                 if t.type is TYPE_TEXT
2299                         pending_table_character_tokens.push t
2300                         return
2301                 # Anything else
2302                 all_space = true
2303                 for old in pending_table_character_tokens
2304                         unless is_space_tok old
2305                                 all_space = false
2306                                 break
2307                 if all_space
2308                         for old in pending_table_character_tokens
2309                                 insert_character old
2310                 else
2311                         for old in pending_table_character_tokens
2312                                 ins_mode_table_else old
2313                 pending_table_character_tokens = [] # FIXME test (spec doesn't say this)
2314                 ins_mode = original_ins_mode
2315                 process_token t
2316
2317         # 8.2.5.4.11 http://www.w3.org/TR/html5/syntax.html#parsing-main-incaption
2318         ins_mode_in_caption = (t) ->
2319                 if t.type is TYPE_END_TAG and t.name is 'caption'
2320                         if is_in_table_scope 'caption', NS_HTML
2321                                 generate_implied_end_tags()
2322                                 if open_els[0].name isnt 'caption'
2323                                         parse_error()
2324                                 loop
2325                                         el = open_els.shift()
2326                                         if el.name is 'caption' and el.namespace is NS_HTML
2327                                                 break
2328                                 clear_afe_to_marker()
2329                                 ins_mode = ins_mode_in_table
2330                         else
2331                                 parse_error()
2332                                 # fragment case
2333                         return
2334                 if (t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'tbody' or t.name is 'td' or t.name is 'tfoot' or t.name is 'th' or t.name is 'thead' or t.name is 'tr')) or t.type is TYPE_END_TAG and t.name is 'table'
2335                         parse_error()
2336                         if is_in_table_scope 'caption', NS_HTML
2337                                 loop
2338                                         el = open_els.shift()
2339                                         if el.name is 'caption' and el.namespace is NS_HTML
2340                                                 break
2341                                 clear_afe_to_marker()
2342                                 ins_mode = ins_mode_in_table
2343                                 process_token t
2344                         # else fragment case
2345                         return
2346                 if t.type is TYPE_END_TAG and (t.name is 'body' or t.name is 'col' or t.name is 'colgroup' or t.name is 'html' or t.name is 'tbody' or t.name is 'td' or t.name is 'tfoot' or t.name is 'th' or t.name is 'thead' or t.name is 'tr')
2347                         parse_error()
2348                         return
2349                 # Anything else
2350                 ins_mode_in_body t
2351
2352         # 8.2.5.4.12 http://www.w3.org/TR/html5/syntax.html#parsing-main-incolgroup
2353         ins_mode_in_column_group = (t) ->
2354                 if is_space_tok t
2355                         insert_character t
2356                         return
2357                 if t.type is TYPE_COMMENT
2358                         insert_comment t
2359                         return
2360                 if t.type is TYPE_DOCTYPE
2361                         parse_error()
2362                         return
2363                 if t.type is TYPE_START_TAG and t.name is 'html'
2364                         ins_mode_in_body t
2365                         return
2366                 if t.type is TYPE_START_TAG and t.name is 'col'
2367                         el = insert_html_element t
2368                         open_els.shift()
2369                         t.acknowledge_self_closing()
2370                         return
2371                 if t.type is TYPE_END_TAG and t.name is 'colgroup'
2372                         if open_els[0].name is 'colgroup' and open_els.namespace is NS_HTML
2373                                 open_els.shift()
2374                                 ins_mode = ins_mode_in_table
2375                         else
2376                                 parse_error()
2377                         return
2378                 if t.type is TYPE_END_TAG and t.name is 'col'
2379                         parse_error()
2380                         return
2381                 if (t.type is TYPE_START_TAG or t.type is TYPE_END_TAG) and t.name is 'template'
2382                         ins_mode_in_head t
2383                         return
2384                 if t.type is TYPE_EOF
2385                         ins_mode_in_body t
2386                         return
2387                 # Anything else
2388                 if open_els[0].name isnt 'colgroup'
2389                         parse_error()
2390                         return
2391                 open_els.shift()
2392                 ins_mode = ins_mode_in_table
2393                 process_token t
2394                 return
2395
2396         # 8.2.5.4.13 http://www.w3.org/TR/html5/syntax.html#parsing-main-intbody
2397         ins_mode_in_table_body = (t) ->
2398                 if t.type is TYPE_START_TAG and t.name is 'tr'
2399                         clear_stack_to_table_body_context()
2400                         insert_html_element t
2401                         ins_mode = ins_mode_in_row
2402                         return
2403                 if t.type is TYPE_START_TAG and (t.name is 'th' or t.name is 'td')
2404                         parse_error()
2405                         clear_stack_to_table_body_context()
2406                         insert_html_element new_open_tag 'tr'
2407                         ins_mode = ins_mode_in_row
2408                         process_token t
2409                         return
2410                 if t.type is TYPE_END_TAG and (t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead')
2411                         unless is_in_table_scope t.name, NS_HTML
2412                                 parse_error()
2413                                 return
2414                         clear_stack_to_table_body_context()
2415                         open_els.shift()
2416                         ins_mode = ins_mode_in_table
2417                         return
2418                 if (t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead')) or (t.type is TYPE_END_TAG and t.name is 'table')
2419                         has = false
2420                         for el in open_els
2421                                 if el.namespace is NS_HTML and (el.name is 'tbody' or el.name is 'tfoot' or el.name is 'thead')
2422                                         has = true
2423                                         break
2424                                 if table_scopers[el.name] is el.namespace
2425                                         break
2426                         if !has
2427                                 parse_error()
2428                                 return
2429                         clear_stack_to_table_body_context()
2430                         open_els.shift()
2431                         ins_mode = ins_mode_in_table
2432                         process_token t
2433                         return
2434                 if t.type is TYPE_END_TAG and (t.name is 'body' or t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'html' or t.name is 'td' or t.name is 'th' or t.name is 'tr')
2435                         parse_error()
2436                         return
2437                 # Anything else
2438                 ins_mode_in_table t
2439
2440         # 8.2.5.4.14 http://www.w3.org/TR/html5/syntax.html#parsing-main-intr
2441         ins_mode_in_row = (t) ->
2442                 if t.type is TYPE_START_TAG and (t.name is 'th' or t.name is 'td')
2443                         clear_stack_to_table_row_context()
2444                         insert_html_element t
2445                         ins_mode = ins_mode_in_cell
2446                         afe_push_marker()
2447                         return
2448                 if t.type is TYPE_END_TAG and t.name is 'tr'
2449                         if is_in_table_scope 'tr', NS_HTML
2450                                 clear_stack_to_table_row_context()
2451                                 open_els.shift()
2452                                 ins_mode = ins_mode_in_table_body
2453                         else
2454                                 parse_error()
2455                         return
2456                 if (t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead' or t.name is 'tr')) or t.type is TYPE_END_TAG and t.name is 'table'
2457                         if is_in_table_scope 'tr', NS_HTML
2458                                 clear_stack_to_table_row_context()
2459                                 open_els.shift()
2460                                 ins_mode = ins_mode_in_table_body
2461                                 process_token t
2462                         else
2463                                 parse_error()
2464                         return
2465                 if t.type is TYPE_END_TAG and (t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead')
2466                         if is_in_table_scope t.name, NS_HTML
2467                                 if is_in_table_scope 'tr', NS_HTML
2468                                         clear_stack_to_table_row_context()
2469                                         open_els.shift()
2470                                         ins_mode = ins_mode_in_table_body
2471                                         process_token t
2472                         else
2473                                 parse_error()
2474                         return
2475                 if t.type is TYPE_END_TAG and (t.name is 'body' or t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'html' or t.name is 'td' or t.name is 'th')
2476                         parse_error()
2477                         return
2478                 # Anything else
2479                 ins_mode_in_table t
2480
2481         # http://www.w3.org/TR/html5/syntax.html#close-the-cell
2482         close_the_cell = ->
2483                 generate_implied_end_tags()
2484                 unless (open_els[0].name is 'td' or open_els[0] is 'th') and open_els[0].namespace is NS_HTML
2485                         parse_error()
2486                 loop
2487                         el = open_els.shift()
2488                         if el.namespace is NS_HTML and (el.name is 'td' or el.name is 'th')
2489                                 break
2490                 clear_afe_to_marker()
2491                 ins_mode = ins_mode_in_row
2492
2493         # 8.2.5.4.15 http://www.w3.org/TR/html5/syntax.html#parsing-main-intd
2494         ins_mode_in_cell = (t) ->
2495                 if t.type is TYPE_END_TAG and (t.name is 'td' or t.name is 'th')
2496                         if is_in_table_scope t.name, NS_HTML
2497                                 generate_implied_end_tags()
2498                                 unless (open_els[0].name is t.name) and open_els[0].namespace is NS_HTML
2499                                         parse_error()
2500                                 loop
2501                                         el = open_els.shift()
2502                                         if el.name is t.name and el.namespace is NS_HTML
2503                                                 break
2504                                 clear_afe_to_marker()
2505                                 ins_mode = ins_mode_in_row
2506                         else
2507                                 parse_error()
2508                         return
2509                 if t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'tbody' or t.name is 'td' or t.name is 'tfoot' or t.name is 'th' or t.name is 'thead' or t.name is 'tr')
2510                         has = false
2511                         for el in open_els
2512                                 if el.namespace is NS_HTML and (el.name is 'td' or el.name is 'th')
2513                                         has = true
2514                                         break
2515                                 if table_scopers[el.name] is el.namespace
2516                                         break
2517                         if !has
2518                                 parse_error()
2519                                 return
2520                         close_the_cell()
2521                         process_token t
2522                         return
2523                 if t.type is TYPE_END_TAG and (t.name is 'body' or t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'html')
2524                         parse_error()
2525                         return
2526                 if t.type is TYPE_END_TAG and (t.name is 'table' or t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead' or t.name is 'tr')
2527                         if is_in_table_scope t.name, NS_HTML
2528                                 close_the_cell()
2529                                 process_token t
2530                         else
2531                                 parse_error()
2532                         return
2533                 # Anything Else
2534                 ins_mode_in_body t
2535
2536         # 8.2.5.4.16 http://www.w3.org/TR/html5/syntax.html#parsing-main-inselect
2537         ins_mode_in_select = (t) ->
2538                 if t.type is TYPE_TEXT and t.text is "\u0000"
2539                         parse_error()
2540                         return
2541                 if t.type is TYPE_TEXT
2542                         insert_character t
2543                         return
2544                 if t.type is TYPE_COMMENT
2545                         insert_comment t
2546                         return
2547                 if t.type is TYPE_DOCTYPE
2548                         parse_error()
2549                         return
2550                 if t.type is TYPE_START_TAG and t.name is 'html'
2551                         ins_mode_in_body t
2552                         return
2553                 if t.type is TYPE_START_TAG and t.name is 'option'
2554                         if open_els[0].name is 'option' and open_els[0].namespace is NS_HTML
2555                                 open_els.shift()
2556                         insert_html_element t
2557                         return
2558                 if t.type is TYPE_START_TAG and t.name is 'optgroup'
2559                         if open_els[0].name is 'option' and open_els[0].namespace is NS_HTML
2560                                 open_els.shift()
2561                         if open_els[0].name is 'optgroup' and open_els[0].namespace is NS_HTML
2562                                 open_els.shift()
2563                         insert_html_element t
2564                         return
2565                 if t.type is TYPE_END_TAG and t.name is 'optgroup'
2566                         if open_els[0].name is 'option' and open_els[0].namespace in NS_HTML
2567                                 if open_els[1].name is 'optgroup' and open_els[0].namespace is NS_HTML
2568                                         open_els.shift()
2569                         if open_els[0].name is 'optgroup' and open_els[0].namespace is NS_HTML
2570                                 open_els.shift()
2571                         else
2572                                 parse_error()
2573                         return
2574                 if t.type is TYPE_END_TAG and t.name is 'option'
2575                         if open_els[0].name is 'option' and open_els[0].namespace is NS_HTML
2576                                 open_els.shift()
2577                         else
2578                                 parse_error()
2579                         return
2580                 if t.type is TYPE_END_TAG and t.name is 'select'
2581                         if is_in_select_scope 'select', NS_HTML
2582                                 loop
2583                                         el = open_els.shift()
2584                                         if el.name is 'select' and el.namespace is NS_HTML
2585                                                 break
2586                                 reset_ins_mode()
2587                         else
2588                                 parse_error()
2589                         return
2590                 if t.type is TYPE_START_TAG and t.name is 'select'
2591                         parse_error()
2592                         loop
2593                                 el = open_els.shift()
2594                                 if el.name is 'select' and el.namespace is NS_HTML
2595                                         break
2596                         reset_ins_mode()
2597                         # spec says that this is the same as </select> but it doesn't say
2598                         # to check scope first
2599                         return
2600                 if t.type is TYPE_START_TAG and (t.name is 'input' or t.name is 'keygen' or t.name is 'textarea')
2601                         parse_error()
2602                         if is_in_select_scope 'select', NS_HTML
2603                                 return
2604                         loop
2605                                 el = open_els.shift()
2606                                 if el.name is 'select' and el.namespace is NS_HTML
2607                                         break
2608                         reset_ins_mode()
2609                         process_token t
2610                         return
2611                 if t.type is TYPE_START_TAG and (t.name is 'script' or t.name is 'template')
2612                         ins_mode_in_head t
2613                         return
2614                 if t.type is TYPE_EOF
2615                         ins_mode_in_body t
2616                         return
2617                 # Anything else
2618                 parse_error()
2619                 return
2620
2621         # 8.2.5.4.17 http://www.w3.org/TR/html5/syntax.html#parsing-main-inselectintable
2622         ins_mode_in_select_in_table = (t) ->
2623                 if t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'table' or t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead' or t.name is 'tr' or t.name is 'td' or t.name is 'th')
2624                         parse_error()
2625                         loop
2626                                 el = open_els.shift()
2627                                 if el.name is 'select' and el.namespace is NS_HTML
2628                                         break
2629                         reset_ins_mode()
2630                         process_token t
2631                         return
2632                 if t.type is TYPE_END_TAG and (t.name is 'caption' or t.name is 'table' or t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead' or t.name is 'tr' or t.name is 'td' or t.name is 'th')
2633                         parse_error()
2634                         unless is_in_table_scope t.name, NS_HTML
2635                                 return
2636                         loop
2637                                 el = open_els.shift()
2638                                 if el.name is 'select' and el.namespace is NS_HTML
2639                                         break
2640                         reset_ins_mode()
2641                         process_token t
2642                         return
2643                 # Anything else
2644                 ins_mode_in_select t
2645                 return
2646
2647         # 8.2.5.4.18 http://www.w3.org/TR/html5/syntax.html#parsing-main-intemplate
2648         ins_mode_in_template = (t) ->
2649                 if t.type is TYPE_TEXT or t.type is TYPE_COMMENT or t.type is TYPE_DOCTYPE
2650                         ins_mode_in_body t
2651                         return
2652                 if (t.type is TYPE_START_TAG and (t.name is 'base' or t.name is 'basefont' or t.name is 'bgsound' or t.name is 'link' or t.name is 'meta' or t.name is 'noframes' or t.name is 'script' or t.name is 'style' or t.name is 'template' or t.name is 'title')) or (t.type is TYPE_END_TAG and t.name is 'template')
2653                         ins_mode_in_head t
2654                         return
2655                 if t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'colgroup' or t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead')
2656                         template_ins_modes.shift()
2657                         template_ins_modes.unshift ins_mode_in_table
2658                         ins_mode = ins_mode_in_table
2659                         process_token t
2660                         return
2661                 if t.type is TYPE_START_TAG and t.name is 'col'
2662                         template_ins_modes.shift()
2663                         template_ins_modes.unshift ins_mode_in_column_group
2664                         ins_mode = ins_mode_in_column_group
2665                         process_token t
2666                         return
2667                 if t.type is TYPE_START_TAG and t.name is 'tr'
2668                         template_ins_modes.shift()
2669                         template_ins_modes.unshift ins_mode_in_table_body
2670                         ins_mode = ins_mode_in_table_body
2671                         process_token t
2672                         return
2673                 if t.type is TYPE_START_TAG and (t.name is 'td' or t.name is 'th')
2674                         template_ins_modes.shift()
2675                         template_ins_modes.unshift ins_mode_in_row
2676                         ins_mode = ins_mode_in_row
2677                         process_token t
2678                         return
2679                 if t.type is TYPE_START_TAG
2680                         template_ins_modes.shift()
2681                         template_ins_modes.unshift ins_mode_in_body
2682                         ins_mode = ins_mode_in_body
2683                         process_token t
2684                         return
2685                 if t.type is TYPE_END_TAG
2686                         parse_error()
2687                         return
2688                 if t.type is TYPE_EOF
2689                         unless template_tag_is_open()
2690                                 stop_parsing()
2691                                 return
2692                         parse_error()
2693                         loop
2694                                 el = open_els.shift()
2695                                 if el.name is 'template' and el.namespace is NS_HTML
2696                                         break
2697                         clear_afe_to_marker()
2698                         template_ins_modes.shift()
2699                         reset_ins_mode()
2700                         process_token t
2701
2702         # 8.2.5.4.19 http://www.w3.org/TR/html5/syntax.html#parsing-main-afterbody
2703         ins_mode_after_body = (t) ->
2704                 if is_space_tok t
2705                         ins_mode_in_body t
2706                         return
2707                 if t.type is TYPE_COMMENT
2708                         insert_comment t, [open_els[0], open_els[0].children.length]
2709                         return
2710                 if t.type is TYPE_DOCTYPE
2711                         parse_error()
2712                         return
2713                 if t.type is TYPE_START_TAG and t.name is 'html'
2714                         ins_mode_in_body t
2715                         return
2716                 if t.type is TYPE_END_TAG and t.name is 'html'
2717                         # fixfull fragment case
2718                         ins_mode = ins_mode_after_after_body
2719                         return
2720                 if t.type is TYPE_EOF
2721                         stop_parsing()
2722                         return
2723                 # Anything ELse
2724                 parse_error()
2725                 ins_mode = ins_mode_in_body
2726                 process_token t
2727
2728         # 8.2.5.4.20 http://www.w3.org/TR/html5/syntax.html#parsing-main-inframeset
2729         ins_mode_in_frameset = (t) ->
2730                 if is_space_tok t
2731                         insert_character t
2732                         return
2733                 if t.type is TYPE_COMMENT
2734                         insert_comment t
2735                         return
2736                 if t.type is TYPE_DOCTYPE
2737                         parse_error()
2738                         return
2739                 if t.type is TYPE_START_TAG and t.name is 'html'
2740                         ins_mode_in_body t
2741                         return
2742                 if t.type is TYPE_START_TAG and t.name is 'frameset'
2743                         insert_html_element t
2744                         return
2745                 if t.type is TYPE_END_TAG and t.name is 'frameset'
2746                         if open_els.length is 1
2747                                 parse_error()
2748                                 return # fragment case
2749                         open_els.shift()
2750                         if flag_fragment_parsing is false and open_els[0].name isnt 'frameset'
2751                                 ins_mode = ins_mode_after_frameset
2752                         return
2753                 if t.type is TYPE_START_TAG and t.name is 'frame'
2754                         insert_html_element t
2755                         open_els.shift()
2756                         t.acknowledge_self_closing()
2757                         return
2758                 if t.type is TYPE_START_TAG and t.name is 'noframes'
2759                         ins_mode_in_head t
2760                         return
2761                 if t.type is TYPE_EOF
2762                         if open_els.length isnt 1
2763                                 parse_error()
2764                         stop_parsing()
2765                         return
2766                 # Anything else
2767                 parse_error()
2768                 return
2769
2770         # 8.2.5.4.21 http://www.w3.org/TR/html5/syntax.html#parsing-main-afterframeset
2771         ins_mode_after_frameset = (t) ->
2772                 if is_space_tok t
2773                         insert_character t
2774                         return
2775                 if t.type is TYPE_COMMENT
2776                         insert_comment t
2777                         return
2778                 if t.type is TYPE_DOCTYPE
2779                         parse_error()
2780                         return
2781                 if t.type is TYPE_START_TAG and t.name is 'html'
2782                         ins_mode_in_body t
2783                         return
2784                 if t.type is TYPE_END_TAG and t.name is 'html'
2785                         insert_mode = ins_mode_after_after_frameset
2786                         return
2787                 if t.type is TYPE_START_TAG and t.name is 'noframes'
2788                         ins_mode_in_head t
2789                         return
2790                 if t.type is TYPE_EOF
2791                         stop_parsing()
2792                         return
2793                 # Anything else
2794                 parse_error()
2795                 return
2796
2797         # 8.2.5.4.22 http://www.w3.org/TR/html5/syntax.html#the-after-after-body-insertion-mode
2798         ins_mode_after_after_body = (t) ->
2799                 if t.type is TYPE_COMMENT
2800                         insert_comment t, [doc, doc.children.length]
2801                         return
2802                 if t.type is TYPE_DOCTYPE or is_space_tok(t) or (t.type is TYPE_START_TAG and t.name is 'html')
2803                         ins_mode_in_body t
2804                         return
2805                 if t.type is TYPE_EOF
2806                         stop_parsing()
2807                         return
2808                 # Anything else
2809                 parse_error()
2810                 ins_mode = ins_mode_in_body
2811                 return
2812
2813         # 8.2.5.4.23 http://www.w3.org/TR/html5/syntax.html#the-after-after-frameset-insertion-mode
2814         ins_mode_after_after_frameset = (t) ->
2815                 if t.type is TYPE_COMMENT
2816                         insert_comment t, [doc, doc.children.length]
2817                         return
2818                 if t.type is TYPE_DOCTYPE or is_space_tok(t) or (t.type is TYPE_START_TAG and t.name is 'html')
2819                         ins_mode_in_body t
2820                         return
2821                 if t.type is TYPE_EOF
2822                         stop_parsing()
2823                         return
2824                 if t.type is TYPE_START_TAG and t.name is 'noframes'
2825                         ins_mode_in_head t
2826                         return
2827                 # Anything else
2828                 parse_error()
2829                 return
2830
2831         # 8.2.5.5 http://www.w3.org/TR/html5/syntax.html#parsing-main-inforeign
2832         has_color_face_or_size = (t) ->
2833                 for a in t.attrs_a
2834                         if a[0] is 'color' or a[0] is 'face' or a[0] is 'size'
2835                                 return true
2836                 return false
2837         in_foreign_content_end_script = ->
2838                 open_els.shift()
2839                 # fixfull
2840                 return
2841         in_foreign_content_other_start = (t) ->
2842                 acn = adjusted_current_node()
2843                 if acn.namespace is NS_MATHML
2844                         adjust_mathml_attributes t
2845                 if acn.namespace is NS_SVG and svg_name_fixes[t.name]?
2846                         t.name = svg_name_fixes[t.name]
2847                 if acn.namespace is NS_SVG
2848                         adjust_svg_attributes t
2849                 adjust_foreign_attributes t
2850                 insert_foreign_element t, acn.namespace
2851                 if t.flag 'self-closing'
2852                         if t.name is 'script'
2853                                 t.acknowledge_self_closing()
2854                                 in_foreign_content_end_script()
2855                         else
2856                                 open_els.shift()
2857                                 t.acknowledge_self_closing()
2858                 return
2859         in_foreign_content = (t) ->
2860                 if t.type is TYPE_TEXT and t.text is "\u0000"
2861                         parse_error()
2862                         insert_character new_character_token "\ufffd"
2863                         return
2864                 if is_space_tok t
2865                         insert_character t
2866                         return
2867                 if t.type is TYPE_TEXT
2868                         flag_frameset_ok = false
2869                         insert_character t
2870                         return
2871                 if t.type is TYPE_COMMENT
2872                         insert_comment t
2873                         return
2874                 if t.type is TYPE_DOCTYPE
2875                         parse_error()
2876                         return
2877                 if t.type is TYPE_START_TAG and (t.name is 'b' or t.name is 'big' or t.name is 'blockquote' or t.name is 'body' or t.name is 'br' or t.name is 'center' or t.name is 'code' or t.name is 'dd' or t.name is 'div' or t.name is 'dl' or t.name is 'dt' or t.name is 'em' or t.name is 'embed' or t.name is 'h1' or t.name is 'h2' or t.name is 'h3' or t.name is 'h4' or t.name is 'h5' or t.name is 'h6' or t.name is 'head' or t.name is 'hr' or t.name is 'i' or t.name is 'img' or t.name is 'li' or t.name is 'listing' or t.name is 'main' or t.name is 'meta' or t.name is 'nobr' or t.name is 'ol' or t.name is 'p' or t.name is 'pre' or t.name is 'ruby' or t.name is 's' or t.name is 'small' or t.name is 'span' or t.name is 'strong' or t.name is 'strike' or t.name is 'sub' or t.name is 'sup' or t.name is 'table' or t.name is 'tt' or t.name is 'u' or t.name is 'ul' or t.name is 'var' or (t.name is 'font' and has_color_face_or_size(t)))
2878                         parse_error()
2879                         if flag_fragment_parsing
2880                                 in_foreign_content_other_start t
2881                                 return
2882                         loop # is this safe?
2883                                 open_els.shift()
2884                                 cn = open_els[0]
2885                                 if is_mathml_text_integration_point(cn) or is_html_integration(cn) or cn.namespace is NS_HTML
2886                                         break
2887                         process_token t
2888                         return
2889                 if t.type is TYPE_START_TAG
2890                         in_foreign_content_other_start t
2891                         return
2892                 if t.type is TYPE_END_TAG and t.name is 'script' and open_els[0].name is 'script' and open_els[0].namespace is NS_SVG
2893                         in_foreign_content_end_script()
2894                         return
2895                 if t.type is TYPE_END_TAG
2896                         if open_els[0].name.toLowerCase() isnt t.name
2897                                 parse_error()
2898                         for node in open_els
2899                                 if node is open_els[open_els.length - 1]
2900                                         return
2901                                 if node.name.toLowerCase() is t.name
2902                                         loop
2903                                                 el = open_els.shift()
2904                                                 if el is node
2905                                                         return
2906                                 if node.namespace is NS_HTML
2907                                         break
2908                         ins_mode t # explicitly call HTML insertion mode
2909
2910
2911         # 8.2.4.1 http://www.w3.org/TR/html5/syntax.html#data-state
2912         tok_state_data = ->
2913                 switch c = txt.charAt(cur++)
2914                         when '&'
2915                                 return new_text_node parse_character_reference()
2916                         when '<'
2917                                 tok_state = tok_state_tag_open
2918                         when "\u0000"
2919                                 parse_error()
2920                                 return new_text_node "\ufffd"
2921                         when '' # EOF
2922                                 return new_eof_token()
2923                         else
2924                                 return new_text_node c
2925                 return null
2926
2927         # 8.2.4.2 http://www.w3.org/TR/html5/syntax.html#character-reference-in-data-state
2928         # not needed: tok_state_character_reference_in_data = ->
2929         # just call parse_character_reference()
2930
2931         # 8.2.4.3 http://www.w3.org/TR/html5/syntax.html#rcdata-state
2932         tok_state_rcdata = ->
2933                 switch c = txt.charAt(cur++)
2934                         when '&'
2935                                 return new_text_node parse_character_reference()
2936                         when '<'
2937                                 tok_state = tok_state_rcdata_less_than_sign
2938                         when "\u0000"
2939                                 parse_error()
2940                                 return new_character_token "\ufffd"
2941                         when '' # EOF
2942                                 return new_eof_token()
2943                         else
2944                                 return new_character_token c
2945                 return null
2946
2947         # 8.2.4.4 http://www.w3.org/TR/html5/syntax.html#character-reference-in-rcdata-state
2948         # not needed: tok_state_character_reference_in_rcdata = ->
2949         # just call parse_character_reference()
2950
2951         # 8.2.4.5 http://www.w3.org/TR/html5/syntax.html#rawtext-state
2952         tok_state_rawtext = ->
2953                 switch c = txt.charAt(cur++)
2954                         when '<'
2955                                 tok_state = tok_state_rawtext_less_than_sign
2956                         when "\u0000"
2957                                 parse_error()
2958                                 return new_character_token "\ufffd"
2959                         when '' # EOF
2960                                 return new_eof_token()
2961                         else
2962                                 return new_character_token c
2963                 return null
2964
2965         # 8.2.4.6 http://www.w3.org/TR/html5/syntax.html#script-data-state
2966         tok_state_script_data = ->
2967                 switch c = txt.charAt(cur++)
2968                         when '<'
2969                                 tok_state = tok_state_script_data_less_than_sign
2970                         when "\u0000"
2971                                 parse_error()
2972                                 return new_character_token "\ufffd"
2973                         when '' # EOF
2974                                 return new_eof_token()
2975                         else
2976                                 return new_character_token c
2977                 return null
2978
2979         # 8.2.4.7 http://www.w3.org/TR/html5/syntax.html#plaintext-state
2980         tok_state_plaintext = ->
2981                 switch c = txt.charAt(cur++)
2982                         when "\u0000"
2983                                 parse_error()
2984                                 return new_character_token "\ufffd"
2985                         when '' # EOF
2986                                 return new_eof_token()
2987                         else
2988                                 return new_character_token c
2989                 return null
2990
2991
2992         # 8.2.4.8 http://www.w3.org/TR/html5/syntax.html#tag-open-state
2993         tok_state_tag_open = ->
2994                 switch c = txt.charAt(cur++)
2995                         when '!'
2996                                 tok_state = tok_state_markup_declaration_open
2997                         when '/'
2998                                 tok_state = tok_state_end_tag_open
2999                         when '?'
3000                                 parse_error()
3001                                 tok_cur_tag = new_comment_token '?'
3002                                 tok_state = tok_state_bogus_comment
3003                         else
3004                                 if is_lc_alpha(c)
3005                                         tok_cur_tag = new_open_tag c
3006                                         tok_state = tok_state_tag_name
3007                                 else if is_uc_alpha(c)
3008                                         tok_cur_tag = new_open_tag c.toLowerCase()
3009                                         tok_state = tok_state_tag_name
3010                                 else
3011                                         parse_error()
3012                                         tok_state = tok_state_data
3013                                         cur -= 1 # we didn't parse/handle the char after <
3014                                         return new_text_node '<'
3015                 return null
3016
3017         # 8.2.4.9 http://www.w3.org/TR/html5/syntax.html#end-tag-open-state
3018         tok_state_end_tag_open = ->
3019                 switch c = txt.charAt(cur++)
3020                         when '>'
3021                                 parse_error()
3022                                 tok_state = tok_state_data
3023                         when '' # EOF
3024                                 parse_error()
3025                                 tok_state = tok_state_data
3026                                 return new_text_node '</'
3027                         else
3028                                 if is_uc_alpha(c)
3029                                         tok_cur_tag = new_end_tag c.toLowerCase()
3030                                         tok_state = tok_state_tag_name
3031                                 else if is_lc_alpha(c)
3032                                         tok_cur_tag = new_end_tag c
3033                                         tok_state = tok_state_tag_name
3034                                 else
3035                                         parse_error()
3036                                         tok_cur_tag = new_comment_token '/'
3037                                         tok_state = tok_state_bogus_comment
3038                 return null
3039
3040         # 8.2.4.10 http://www.w3.org/TR/html5/syntax.html#tag-name-state
3041         tok_state_tag_name = ->
3042                 switch c = txt.charAt(cur++)
3043                         when "\t", "\n", "\u000c", ' '
3044                                 tok_state = tok_state_before_attribute_name
3045                         when '/'
3046                                 tok_state = tok_state_self_closing_start_tag
3047                         when '>'
3048                                 tok_state = tok_state_data
3049                                 tmp = tok_cur_tag
3050                                 tok_cur_tag = null
3051                                 return tmp
3052                         when "\u0000"
3053                                 parse_error()
3054                                 tok_cur_tag.name += "\ufffd"
3055                         when '' # EOF
3056                                 parse_error()
3057                                 tok_state = tok_state_data
3058                         else
3059                                 if is_uc_alpha(c)
3060                                         tok_cur_tag.name += c.toLowerCase()
3061                                 else
3062                                         tok_cur_tag.name += c
3063                 return null
3064
3065         # 8.2.4.11 http://www.w3.org/TR/html5/syntax.html#rcdata-less-than-sign-state
3066         tok_state_rcdata_less_than_sign = ->
3067                 c = txt.charAt(cur++)
3068                 if c is '/'
3069                         temporary_buffer = ''
3070                         tok_state = tok_state_rcdata_end_tag_open
3071                         return null
3072                 # Anything else
3073                 tok_state = tok_state_rcdata
3074                 cur -= 1 # reconsume the input character
3075                 return new_character_token '<'
3076
3077         # 8.2.4.12 http://www.w3.org/TR/html5/syntax.html#rcdata-end-tag-open-state
3078         tok_state_rcdata_end_tag_open = ->
3079                 c = txt.charAt(cur++)
3080                 if is_uc_alpha(c)
3081                         tok_cur_tag = new_end_tag c.toLowerCase()
3082                         temporary_buffer += c
3083                         tok_state = tok_state_rcdata_end_tag_name
3084                         return null
3085                 if is_lc_alpha(c)
3086                         tok_cur_tag = new_end_tag c
3087                         temporary_buffer += c
3088                         tok_state = tok_state_rcdata_end_tag_name
3089                         return null
3090                 # Anything else
3091                 tok_state = tok_state_rcdata
3092                 cur -= 1 # reconsume the input character
3093                 return new_character_token "</" # fixfull separate these
3094
3095         # http://www.w3.org/TR/html5/syntax.html#appropriate-end-tag-token
3096         is_appropriate_end_tag = (t) ->
3097                 # spec says to check against "the tag name of the last start tag to
3098                 # have been emitted from this tokenizer", but this is only called from
3099                 # the various "raw" states, so it's hopefully ok to assume that
3100                 # open_els[0].name will work instead TODO: verify this after the script
3101                 # data states are implemented
3102                 debug_log "#{t.type}, #{t.name} open_els: #{serialize_els open_els, true, true}"
3103                 return t.type is TYPE_END_TAG and t.name is open_els[0].name
3104
3105         # 8.2.4.13 http://www.w3.org/TR/html5/syntax.html#rcdata-end-tag-name-state
3106         tok_state_rcdata_end_tag_name = ->
3107                 c = txt.charAt(cur++)
3108                 if c is "\t" or c is "\n" or c is "\u000c" or c is ' '
3109                         if is_appropriate_end_tag tok_cur_tag
3110                                 tok_state = tok_state_before_attribute_name
3111                                 return
3112                         # else fall through to "Anything else"
3113                 if c is '/'
3114                         if is_appropriate_end_tag tok_cur_tag
3115                                 tok_state = tok_state_self_closing_start_tag # FIXME spec typo?
3116                                 return
3117                         # else fall through to "Anything else"
3118                 if c is '>'
3119                         if is_appropriate_end_tag tok_cur_tag
3120                                 tok_state = tok_state_data
3121                                 return tok_cur_tag
3122                         # else fall through to "Anything else"
3123                 if is_uc_alpha(c)
3124                         tok_cur_tag.name += c.toLowerCase()
3125                         temporary_buffer += c
3126                         return null
3127                 if is_lc_alpha(c)
3128                         tok_cur_tag.name += c
3129                         temporary_buffer += c
3130                         return null
3131                 # Anything else
3132                 tok_state = tok_state_rcdata
3133                 cur -= 1 # reconsume the input character
3134                 return new_character_token '</' + temporary_buffer # fixfull separate these
3135
3136         # 8.2.4.14 http://www.w3.org/TR/html5/syntax.html#rawtext-less-than-sign-state
3137         tok_state_rawtext_less_than_sign = ->
3138                 c = txt.charAt(cur++)
3139                 if c is '/'
3140                         temporary_buffer = ''
3141                         tok_state = tok_state_rawtext_end_tag_open
3142                         return null
3143                 # Anything else
3144                 tok_state = tok_state_rawtext
3145                 cur -= 1 # reconsume the input character
3146                 return new_character_token '<'
3147
3148         # 8.2.4.15 http://www.w3.org/TR/html5/syntax.html#rawtext-end-tag-open-state
3149         tok_state_rawtext_end_tag_open = ->
3150                 c = txt.charAt(cur++)
3151                 if is_uc_alpha(c)
3152                         tok_cur_tag = new_end_tag c.toLowerCase()
3153                         temporary_buffer += c
3154                         tok_state = tok_state_rawtext_end_tag_name
3155                         return null
3156                 if is_lc_alpha(c)
3157                         tok_cur_tag = new_end_tag c
3158                         temporary_buffer += c
3159                         tok_state = tok_state_rawtext_end_tag_name
3160                         return null
3161                 # Anything else
3162                 tok_state = tok_state_rawtext
3163                 cur -= 1 # reconsume the input character
3164                 return new_character_token "</" # fixfull separate these
3165
3166         # 8.2.4.16 http://www.w3.org/TR/html5/syntax.html#rawtext-end-tag-name-state
3167         tok_state_rawtext_end_tag_name = ->
3168                 c = txt.charAt(cur++)
3169                 if c is "\t" or c is "\n" or c is "\u000c" or c is ' '
3170                         if is_appropriate_end_tag tok_cur_tag
3171                                 tok_state = tok_state_before_attribute_name
3172                                 return
3173                         # else fall through to "Anything else"
3174                 if c is '/'
3175                         if is_appropriate_end_tag tok_cur_tag
3176                                 tok_state = tok_state_self_closing_start_tag
3177                                 return
3178                         # else fall through to "Anything else"
3179                 if c is '>'
3180                         if is_appropriate_end_tag tok_cur_tag
3181                                 tok_state = tok_state_data
3182                                 return tok_cur_tag
3183                         # else fall through to "Anything else"
3184                 if is_uc_alpha(c)
3185                         tok_cur_tag.name += c.toLowerCase()
3186                         temporary_buffer += c
3187                         return null
3188                 if is_lc_alpha(c)
3189                         tok_cur_tag.name += c
3190                         temporary_buffer += c
3191                         return null
3192                 # Anything else
3193                 tok_state = tok_state_rawtext
3194                 cur -= 1 # reconsume the input character
3195                 return new_character_token '</' + temporary_buffer # fixfull separate these
3196
3197         # 8.2.4.17 http://www.w3.org/TR/html5/syntax.html#script-data-less-than-sign-state
3198         tok_state_script_data_less_than_sign = ->
3199                 c = txt.charAt(cur++)
3200                 if c is '/'
3201                         temporary_buffer = ''
3202                         tok_state = tok_state_script_data_end_tag_open
3203                         return
3204                 if c is '!'
3205                         tok_state = tok_state_script_data_escape_start
3206                         return new_character_token '<!' # fixfull split
3207                 # Anything else
3208                 tok_state = tok_state_script_data
3209                 cur -= 1 # Reconsume
3210                 return new_character_token '<'
3211
3212         # 8.2.4.18 http://www.w3.org/TR/html5/syntax.html#script-data-end-tag-open-state
3213         tok_state_script_data_end_tag_open = ->
3214                 c = txt.charAt(cur++)
3215                 if is_uc_alpha(c)
3216                         tok_cur_tag = new_end_tag c.toLowerCase()
3217                         temporary_buffer += c
3218                         tok_state = tok_state_script_data_end_tag_name
3219                         return
3220                 if is_lc_alpha(c)
3221                         tok_cur_tag = new_end_tag c
3222                         temporary_buffer += c
3223                         tok_state = tok_state_script_data_end_tag_name
3224                         return
3225                 # Anything else
3226                 tok_state = tok_state_script_data
3227                 cur -= 1 # Reconsume
3228                 return new_character_token '</'
3229
3230         # 8.2.4.19 http://www.w3.org/TR/html5/syntax.html#script-data-end-tag-open-state
3231         tok_state_script_data_end_tag_name = ->
3232                 c = txt.charAt(cur++)
3233                 if c is "\t" or c is "\n" or c is "\u000c" or c is ' '
3234                         if is_appropriate_end_tag tok_cur_tag
3235                                 tok_state = tok_state_before_attribute_name
3236                                 return
3237                         # fall through
3238                 if c is '/'
3239                         if is_appropriate_end_tag tok_cur_tag
3240                                 tok_state = tok_state_self_closing_start_tag
3241                                 return
3242                         # fall through
3243                 if c is '>'
3244                         if is_appropriate_end_tag tok_cur_tag
3245                                 tok_state = tok_state_data
3246                                 return tok_cur_tag
3247                         # fall through
3248                 if is_uc_alpha(c)
3249                         tok_cur_tag.name += c.toLowerCase()
3250                         temporary_buffer += c
3251                         return
3252                 if is_lc_alpha(c)
3253                         tok_cur_tag.name += c
3254                         temporary_buffer += c
3255                         return
3256                 # Anything else
3257                 tok_state = tok_state_script_data
3258                 cur -= 1 # Reconsume
3259                 return new_character_token "</#{temporary_buffer}" # fixfull split
3260
3261         # 8.2.4.20 http://www.w3.org/TR/html5/syntax.html#script-data-escape-start-state
3262         tok_state_script_data_escape_start = ->
3263                 c = txt.charAt(cur++)
3264                 if c is '-'
3265                         tok_state = tok_state_script_data_escape_start_dash
3266                         return new_character_token '-'
3267                 # Anything else
3268                 tok_state = tok_state_script_data
3269                 cur -= 1 # Reconsume
3270                 return
3271
3272         # 8.2.4.21 http://www.w3.org/TR/html5/syntax.html#script-data-escape-start-dash-state
3273         tok_state_script_data_escape_start_dash = ->
3274                 c = txt.charAt(cur++)
3275                 if c is '-'
3276                         tok_state = tok_state_script_data_escaped_dash_dash
3277                         return new_character_token '-'
3278                 # Anything else
3279                 tok_state = tok_state_script_data
3280                 cur -= 1 # Reconsume
3281                 return
3282
3283         # 8.2.4.22 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-state
3284         tok_state_script_data_escaped = ->
3285                 c = txt.charAt(cur++)
3286                 if c is '-'
3287                         tok_state = tok_state_script_data_escaped_dash
3288                         return new_character_token '-'
3289                 if c is '<'
3290                         tok_state = tok_state_script_data_escaped_less_than_sign
3291                         return
3292                 if c is "\u0000"
3293                         parse_error()
3294                         return new_character_token "\ufffd"
3295                 if c is '' # EOF
3296                         tok_state = tok_state_data
3297                         parse_error()
3298                         cur -= 1 # Reconsume
3299                         return
3300                 # Anything else
3301                 return new_character_token c
3302
3303         # 8.2.4.23 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-dash-state
3304         tok_state_script_data_escaped_dash = ->
3305                 c = txt.charAt(cur++)
3306                 if c is '-'
3307                         tok_state = tok_state_script_data_escaped_dash_dash
3308                         return new_character_token '-'
3309                 if c is '<'
3310                         tok_state = tok_state_script_data_escaped_less_than_sign
3311                         return
3312                 if c is "\u0000"
3313                         parse_error()
3314                         tok_state = tok_state_script_data_escaped
3315                         return new_character_token "\ufffd"
3316                 if c is '' # EOF
3317                         tok_state = tok_state_data
3318                         parse_error()
3319                         cur -= 1 # Reconsume
3320                         return
3321                 # Anything else
3322                 tok_state = tok_state_script_data_escaped
3323                 return new_character_token c
3324
3325         # 8.2.4.24 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-dash-dash-state
3326         tok_state_script_data_escaped_dash_dash = ->
3327                 c = txt.charAt(cur++)
3328                 if c is '-'
3329                         return new_character_token '-'
3330                 if c is '<'
3331                         tok_state = tok_state_script_data_escaped_less_than_sign
3332                         return
3333                 if c is '>'
3334                         tok_state = tok_state_script_data
3335                         return new_character_token '>'
3336                 if c is "\u0000"
3337                         parse_error()
3338                         tok_state = tok_state_script_data_escaped
3339                         return new_character_token "\ufffd"
3340                 if c is '' # EOF
3341                         parse_error()
3342                         tok_state = tok_state_data
3343                         cur -= 1 # Reconsume
3344                         return
3345                 # Anything else
3346                 tok_state = tok_state_script_data_escaped
3347                 return new_character_token c
3348
3349         # 8.2.4.25 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-less-than-sign-state
3350         tok_state_script_data_escaped_less_than_sign = ->
3351                 c = txt.charAt(cur++)
3352                 if c is '/'
3353                         temporary_buffer = ''
3354                         tok_state = tok_state_script_data_escaped_end_tag_open
3355                         return
3356                 if is_uc_alpha(c)
3357                         temporary_buffer = c.toLowerCase() # yes, really
3358                         tok_state = tok_state_script_data_double_escape_start
3359                         return new_character_token "<#{c}" # fixfull split
3360                 if is_lc_alpha(c)
3361                         temporary_buffer = c
3362                         tok_state = tok_state_script_data_double_escape_start
3363                         return new_character_token "<#{c}" # fixfull split
3364                 # Anything else
3365                 tok_state = tok_state_script_data_escaped
3366                 cur -= 1 # Reconsume
3367                 return new_character_token c
3368
3369         # 8.2.4.26 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-end-tag-open-state
3370         tok_state_script_data_escaped_end_tag_open = ->
3371                 c = txt.charAt(cur++)
3372                 if is_uc_alpha(c)
3373                         tok_cur_tag = new_end_tag c.toLowerCase()
3374                         temporary_buffer += c
3375                         tok_state = tok_state_script_data_escaped_end_tag_name
3376                         return
3377                 if is_lc_alpha(c)
3378                         tok_cur_tag = new_end_tag c
3379                         temporary_buffer += c
3380                         tok_state = tok_state_script_data_escaped_end_tag_name
3381                         return
3382                 # Anything else
3383                 tok_state = tok_state_script_data_escaped
3384                 cur -= 1 # Reconsume
3385                 return new_character_token '</' # fixfull split
3386
3387         # 8.2.4.27 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-end-tag-name-state
3388         tok_state_script_data_escaped_end_tag_name = ->
3389                 c = txt.charAt(cur++)
3390                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
3391                         if is_appropriate_end_tag tok_cur_tag
3392                                 tok_state = tok_state_before_attribute_name
3393                                 return
3394                         # fall through
3395                 if c is '/'
3396                         if is_appropriate_end_tag tok_cur_tag
3397                                 tok_state = tok_state_self_closing_start_tag
3398                                 return
3399                         # fall through
3400                 if c is '>'
3401                         if is_appropriate_end_tag tok_cur_tag
3402                                 tok_state = tok_state_data
3403                                 return tok_cur_tag
3404                         # fall through
3405                 if is_uc_alpha(c)
3406                         tok_cur_tag.name += c.toLowerCase()
3407                         temporary_buffer += c.toLowerCase()
3408                         return
3409                 if is_lc_alpha(c)
3410                         tok_cur_tag.name += c
3411                         temporary_buffer += c.toLowerCase()
3412                         return
3413                 # Anything else
3414                 tok_state = tok_state_script_data_escaped
3415                 cur -= 1 # Reconsume
3416                 return new_character_token "</#{temporary_buffer}" # fixfull split
3417
3418         # 8.2.4.28 http://www.w3.org/TR/html5/syntax.html#script-data-double-escape-start-state
3419         tok_state_script_data_double_escape_start = ->
3420                 c = txt.charAt(cur++)
3421                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' ' or c is '/' or c is '>'
3422                         if temporary_buffer is 'script'
3423                                 tok_state = tok_state_script_data_double_escaped
3424                         else
3425                                 tok_state = tok_state_script_data_escaped
3426                         return new_character_token c
3427                 if is_uc_alpha(c)
3428                         temporary_buffer += c.toLowerCase() # yes, really lowercase
3429                         return new_character_token c
3430                 if is_lc_alpha(c)
3431                         temporary_buffer += c
3432                         return new_character_token c
3433                 # Anything else
3434                 tok_state = tok_state_script_data_escaped
3435                 cur -= 1 # Reconsume
3436                 return
3437
3438         # 8.2.4.29 http://www.w3.org/TR/html5/syntax.html#script-data-double-escaped-state
3439         tok_state_script_data_double_escaped = ->
3440                 c = txt.charAt(cur++)
3441                 if c is '-'
3442                         tok_state = tok_state_script_data_double_escaped_dash
3443                         return new_character_token '-'
3444                 if c is '<'
3445                         tok_state = tok_state_script_data_double_escaped_less_than_sign
3446                         return new_character_token '<'
3447                 if c is "\u0000"
3448                         parse_error()
3449                         return new_character_token "\ufffd"
3450                 if c is '' # EOF
3451                         parse_error()
3452                         tok_state = tok_state_data
3453                         cur -= 1 # Reconsume
3454                         return
3455                 # Anything else
3456                 return new_character_token c
3457
3458         # 8.2.4.30 http://www.w3.org/TR/html5/syntax.html#script-data-double-escaped-dash-state
3459         tok_state_script_data_double_escaped_dash = ->
3460                 c = txt.charAt(cur++)
3461                 if c is '-'
3462                         tok_state = tok_state_script_data_double_escaped_dash_dash
3463                         return new_character_token '-'
3464                 if c is '<'
3465                         tok_state = tok_state_script_data_double_escaped_less_than_sign
3466                         return new_character_token '<'
3467                 if c is "\u0000"
3468                         parse_error()
3469                         tok_state = tok_state_script_data_double_escaped
3470                         return new_character_token "\ufffd"
3471                 if c is '' # EOF
3472                         parse_error()
3473                         tok_state = tok_state_data
3474                         cur -= 1 # Reconsume
3475                         return
3476                 # Anything else
3477                 tok_state = tok_state_script_data_double_escaped
3478                 return new_character_token c
3479
3480         # 8.2.4.31 http://www.w3.org/TR/html5/syntax.html#script-data-double-escaped-dash-dash-state
3481         tok_state_script_data_double_escaped_dash_dash = ->
3482                 c = txt.charAt(cur++)
3483                 if c is '-'
3484                         return new_character_token '-'
3485                 if c is '<'
3486                         tok_state = tok_state_script_data_double_escaped_less_than_sign
3487                         return new_character_token '<'
3488                 if c is '>'
3489                         tok_state = tok_state_script_data
3490                         return new_character_token '>'
3491                 if c is "\u0000"
3492                         parse_error()
3493                         tok_state = tok_state_script_data_double_escaped
3494                         return new_character_token "\ufffd"
3495                 if c is '' # EOF
3496                         parse_error()
3497                         tok_state = tok_state_data
3498                         cur -= 1 # Reconsume
3499                         return
3500                 # Anything else
3501                 tok_state = tok_state_script_data_double_escaped
3502                 return new_character_token c
3503
3504         # 8.2.4.32 http://www.w3.org/TR/html5/syntax.html#script-data-double-escaped-less-than-sign-state
3505         tok_state_script_data_double_escaped_less_than_sign = ->
3506                 c = txt.charAt(cur++)
3507                 if c is '/'
3508                         temporary_buffer = ''
3509                         tok_state = tok_state_script_data_double_escape_end
3510                         return new_character_token '/'
3511                 # Anything else
3512                 tok_state = tok_state_script_data_double_escaped
3513                 cur -= 1 # Reconsume
3514                 return
3515
3516         # 8.2.4.33 http://www.w3.org/TR/html5/syntax.html#script-data-double-escape-end-state
3517         tok_state_script_data_double_escape_end = ->
3518                 c = txt.charAt(cur++)
3519                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' ' or c is '/' or c is '>'
3520                         if temporary_buffer is 'script'
3521                                 tok_state = tok_state_script_data_escaped
3522                         else
3523                                 tok_state = tok_state_script_data_double_escaped
3524                         return new_character_token c
3525                 if is_uc_alpha(c)
3526                         temporary_buffer += c.toLowerCase() # yes, really lowercase
3527                         return new_character_token c
3528                 if is_lc_alpha(c)
3529                         temporary_buffer += c
3530                         return new_character_token c
3531                 # Anything else
3532                 tok_state = tok_state_script_data_double_escaped
3533                 cur -= 1 # Reconsume
3534                 return
3535
3536         # 8.2.4.34 http://www.w3.org/TR/html5/syntax.html#before-attribute-name-state
3537         tok_state_before_attribute_name = ->
3538                 attr_name = null
3539                 switch c = txt.charAt(cur++)
3540                         when "\t", "\n", "\u000c", ' '
3541                                 return null
3542                         when '/'
3543                                 tok_state = tok_state_self_closing_start_tag
3544                                 return null
3545                         when '>'
3546                                 tok_state = tok_state_data
3547                                 tmp = tok_cur_tag
3548                                 tok_cur_tag = null
3549                                 return tmp
3550                         when "\u0000"
3551                                 parse_error()
3552                                 attr_name = "\ufffd"
3553                         when '"', "'", '<', '='
3554                                 parse_error()
3555                                 attr_name = c
3556                         when '' # EOF
3557                                 parse_error()
3558                                 tok_state = tok_state_data
3559                         else
3560                                 if is_uc_alpha(c)
3561                                         attr_name = c.toLowerCase()
3562                                 else
3563                                         attr_name = c
3564                 if attr_name?
3565                         tok_cur_tag.attrs_a.unshift [attr_name, '']
3566                         tok_state = tok_state_attribute_name
3567                 return null
3568
3569         # 8.2.4.35 http://www.w3.org/TR/html5/syntax.html#attribute-name-state
3570         tok_state_attribute_name = ->
3571                 switch c = txt.charAt(cur++)
3572                         when "\t", "\n", "\u000c", ' '
3573                                 tok_state = tok_state_after_attribute_name
3574                         when '/'
3575                                 tok_state = tok_state_self_closing_start_tag
3576                         when '='
3577                                 tok_state = tok_state_before_attribute_value
3578                         when '>'
3579                                 tok_state = tok_state_data
3580                                 tmp = tok_cur_tag
3581                                 tok_cur_tag = null
3582                                 return tmp
3583                         when "\u0000"
3584                                 parse_error()
3585                                 tok_cur_tag.attrs_a[0][0] += "\ufffd"
3586                         when '"', "'", '<'
3587                                 parse_error()
3588                                 tok_cur_tag.attrs_a[0][0] += c
3589                         when '' # EOF
3590                                 parse_error()
3591                                 tok_state = tok_state_data
3592                         else
3593                                 if is_uc_alpha(c)
3594                                         tok_cur_tag.attrs_a[0][0] += c.toLowerCase()
3595                                 else
3596                                         tok_cur_tag.attrs_a[0][0] += c
3597                 return null
3598
3599         # 8.2.4.36 http://www.w3.org/TR/html5/syntax.html#after-attribute-name-state
3600         tok_state_after_attribute_name = ->
3601                 c = txt.charAt(cur++)
3602                 if c is "\t" or c is "\n" or c is "\u000c" or c is ' '
3603                         return
3604                 if c is '/'
3605                         tok_state = tok_state_self_closing_start_tag
3606                         return
3607                 if c is '='
3608                         tok_state = tok_state_before_attribute_value
3609                         return
3610                 if c is '>'
3611                         tok_state = tok_state_data
3612                         return
3613                 if is_uc_alpha(c)
3614                         tok_cur_tag.attrs_a.unshift [c.toLowerCase(), '']
3615                         tok_state = tok_state_attribute_name
3616                         return
3617                 if c is "\u0000"
3618                         parse_error()
3619                         tok_cur_tag.attrs_a.unshift ["\ufffd", '']
3620                         tok_state = tok_state_attribute_name
3621                         return
3622                 if c is '' # EOF
3623                         parse_error()
3624                         tok_state = tok_state_data
3625                         cur -= 1 # reconsume
3626                         return
3627                 if c is '"' or c is "'" or c is '<'
3628                         parse_error()
3629                         # fall through to Anything else
3630                 # Anything else
3631                 tok_cur_tag.attrs_a.unshift [c, '']
3632                 tok_state = tok_state_attribute_name
3633
3634         # 8.2.4.37 http://www.w3.org/TR/html5/syntax.html#before-attribute-value-state
3635         tok_state_before_attribute_value = ->
3636                 switch c = txt.charAt(cur++)
3637                         when "\t", "\n", "\u000c", ' '
3638                                 return null
3639                         when '"'
3640                                 tok_state = tok_state_attribute_value_double_quoted
3641                         when '&'
3642                                 tok_state = tok_state_attribute_value_unquoted
3643                                 cur -= 1
3644                         when "'"
3645                                 tok_state = tok_state_attribute_value_single_quoted
3646                         when "\u0000"
3647                                 # Parse error
3648                                 tok_cur_tag.attrs_a[0][1] += "\ufffd"
3649                                 tok_state = tok_state_attribute_value_unquoted
3650                         when '>'
3651                                 # Parse error
3652                                 tok_state = tok_state_data
3653                                 tmp = tok_cur_tag
3654                                 tok_cur_tag = null
3655                                 return tmp
3656                         when '' # EOF
3657                                 parse_error()
3658                                 tok_state = tok_state_data
3659                         else
3660                                 tok_cur_tag.attrs_a[0][1] += c
3661                                 tok_state = tok_state_attribute_value_unquoted
3662                 return null
3663
3664         # 8.2.4.38 http://www.w3.org/TR/html5/syntax.html#attribute-value-(double-quoted)-state
3665         tok_state_attribute_value_double_quoted = ->
3666                 switch c = txt.charAt(cur++)
3667                         when '"'
3668                                 tok_state = tok_state_after_attribute_value_quoted
3669                         when '&'
3670                                 tok_cur_tag.attrs_a[0][1] += parse_character_reference '"', true
3671                         when "\u0000"
3672                                 # Parse error
3673                                 tok_cur_tag.attrs_a[0][1] += "\ufffd"
3674                         when '' # EOF
3675                                 parse_error()
3676                                 tok_state = tok_state_data
3677                         else
3678                                 tok_cur_tag.attrs_a[0][1] += c
3679                 return null
3680
3681         # 8.2.4.39 http://www.w3.org/TR/html5/syntax.html#attribute-value-(single-quoted)-state
3682         tok_state_attribute_value_single_quoted = ->
3683                 switch c = txt.charAt(cur++)
3684                         when "'"
3685                                 tok_state = tok_state_after_attribute_value_quoted
3686                         when '&'
3687                                 tok_cur_tag.attrs_a[0][1] += parse_character_reference "'", true
3688                         when "\u0000"
3689                                 # Parse error
3690                                 tok_cur_tag.attrs_a[0][1] += "\ufffd"
3691                         when '' # EOF
3692                                 parse_error()
3693                                 tok_state = tok_state_data
3694                         else
3695                                 tok_cur_tag.attrs_a[0][1] += c
3696                 return null
3697
3698         # 8.2.4.40 http://www.w3.org/TR/html5/syntax.html#attribute-value-(unquoted)-state
3699         tok_state_attribute_value_unquoted = ->
3700                 switch c = txt.charAt(cur++)
3701                         when "\t", "\n", "\u000c", ' '
3702                                 tok_state = tok_state_before_attribute_name
3703                         when '&'
3704                                 tok_cur_tag.attrs_a[0][1] += parse_character_reference '>', true
3705                         when '>'
3706                                 tok_state = tok_state_data
3707                                 tmp = tok_cur_tag
3708                                 tok_cur_tag = null
3709                                 return tmp
3710                         when "\u0000"
3711                                 tok_cur_tag.attrs_a[0][1] += "\ufffd"
3712                         when '' # EOF
3713                                 parse_error()
3714                                 tok_state = tok_state_data
3715                         else
3716                                 # Parse Error if ', <, = or ` (backtick)
3717                                 tok_cur_tag.attrs_a[0][1] += c
3718                 return null
3719
3720         # 8.2.4.42 http://www.w3.org/TR/html5/syntax.html#after-attribute-value-(quoted)-state
3721         tok_state_after_attribute_value_quoted = ->
3722                 switch c = txt.charAt(cur++)
3723                         when "\t", "\n", "\u000c", ' '
3724                                 tok_state = tok_state_before_attribute_name
3725                         when '/'
3726                                 tok_state = tok_state_self_closing_start_tag
3727                         when '>'
3728                                 tok_state = tok_state_data
3729                                 tmp = tok_cur_tag
3730                                 tok_cur_tag = null
3731                                 return tmp
3732                         when '' # EOF
3733                                 parse_error()
3734                                 tok_state = tok_state_data
3735                         else
3736                                 # Parse Error
3737                                 tok_state = tok_state_before_attribute_name
3738                                 cur -= 1 # we didn't handle that char
3739                 return null
3740
3741         # 8.2.4.43 http://www.w3.org/TR/html5/syntax.html#self-closing-start-tag-state
3742         tok_state_self_closing_start_tag = ->
3743                 c = txt.charAt(cur++)
3744                 if c is '>'
3745                         tok_cur_tag.flag 'self-closing'
3746                         tok_state = tok_state_data
3747                         return tok_cur_tag
3748                 if c is ''
3749                         parse_error()
3750                         tok_state = tok_state_data
3751                         cur -= 1 # Reconsume
3752                         return
3753                 # Anything else
3754                 parse_error()
3755                 tok_state = tok_state_before_attribute_name
3756                 cur -= 1 # Reconsume
3757                 return
3758
3759         # 8.2.4.44 http://www.w3.org/TR/html5/syntax.html#bogus-comment-state
3760         # WARNING: put a comment token in tok_cur_tag before setting this state
3761         tok_state_bogus_comment = ->
3762                 next_gt = txt.indexOf '>', cur
3763                 if next_gt is -1
3764                         val = txt.substr cur
3765                         cur = txt.length
3766                 else
3767                         val = txt.substr cur, (next_gt - cur)
3768                         cur = next_gt + 1
3769                 val = val.replace(new RegExp("\u0000", 'g'), "\ufffd")
3770                 tok_cur_tag.text += val
3771                 tok_state = tok_state_data
3772                 return tok_cur_tag
3773
3774         # 8.2.4.45 http://www.w3.org/TR/html5/syntax.html#markup-declaration-open-state
3775         tok_state_markup_declaration_open = ->
3776                 if txt.substr(cur, 2) is '--'
3777                         cur += 2
3778                         tok_cur_tag = new_comment_token ''
3779                         tok_state = tok_state_comment_start
3780                         return
3781                 if txt.substr(cur, 7).toLowerCase() is 'doctype'
3782                         cur += 7
3783                         tok_state = tok_state_doctype
3784                         return
3785                 acn = adjusted_current_node()
3786                 if acn and acn.namespace isnt NS_HTML and txt.substr(cur, 7) is '[CDATA['
3787                         cur += 7
3788                         tok_state = tok_state_cdata_section
3789                         return
3790                 # Otherwise
3791                 parse_error()
3792                 tok_cur_tag = new_comment_token ''
3793                 tok_state = tok_state_bogus_comment
3794                 return
3795
3796         # 8.2.4.46 http://www.w3.org/TR/html5/syntax.html#comment-start-state
3797         tok_state_comment_start = ->
3798                 switch c = txt.charAt(cur++)
3799                         when '-'
3800                                 tok_state = tok_state_comment_start_dash
3801                         when "\u0000"
3802                                 parse_error()
3803                                 tok_state = tok_state_comment
3804                                 return new_character_token "\ufffd"
3805                         when '>'
3806                                 parse_error()
3807                                 tok_state = tok_state_data
3808                                 return tok_cur_tag
3809                         when '' # EOF
3810                                 parse_error()
3811                                 tok_state = tok_state_data
3812                                 cur -= 1 # Reconsume
3813                                 return tok_cur_tag
3814                         else
3815                                 tok_cur_tag.text += c
3816                                 tok_state = tok_state_comment
3817                 return null
3818
3819         # 8.2.4.47 http://www.w3.org/TR/html5/syntax.html#comment-start-dash-state
3820         tok_state_comment_start_dash = ->
3821                 switch c = txt.charAt(cur++)
3822                         when '-'
3823                                 tok_state = tok_state_comment_end
3824                         when "\u0000"
3825                                 parse_error()
3826                                 tok_cur_tag.text += "-\ufffd"
3827                                 tok_state = tok_state_comment
3828                         when '>'
3829                                 parse_error()
3830                                 tok_state = tok_state_data
3831                                 return tok_cur_tag
3832                         when '' # EOF
3833                                 parse_error()
3834                                 tok_state = tok_state_data
3835                                 cur -= 1 # Reconsume
3836                                 return tok_cur_tag
3837                         else
3838                                 tok_cur_tag.text += "-#{c}"
3839                                 tok_state = tok_state_comment
3840                 return null
3841
3842         # 8.2.4.48 http://www.w3.org/TR/html5/syntax.html#comment-state
3843         tok_state_comment = ->
3844                 switch c = txt.charAt(cur++)
3845                         when '-'
3846                                 tok_state = tok_state_comment_end_dash
3847                         when "\u0000"
3848                                 parse_error()
3849                                 tok_cur_tag.text += "\ufffd"
3850                         when '' # EOF
3851                                 parse_error()
3852                                 tok_state = tok_state_data
3853                                 cur -= 1 # Reconsume
3854                                 return tok_cur_tag
3855                         else
3856                                 tok_cur_tag.text += c
3857                 return null
3858
3859         # 8.2.4.49 http://www.w3.org/TR/html5/syntax.html#comment-end-dash-state
3860         tok_state_comment_end_dash = ->
3861                 switch c = txt.charAt(cur++)
3862                         when '-'
3863                                 tok_state = tok_state_comment_end
3864                         when "\u0000"
3865                                 parse_error()
3866                                 tok_cur_tag.text += "-\ufffd"
3867                                 tok_state = tok_state_comment
3868                         when '' # EOF
3869                                 parse_error()
3870                                 tok_state = tok_state_data
3871                                 cur -= 1 # Reconsume
3872                                 return tok_cur_tag
3873                         else
3874                                 tok_cur_tag.text += "-#{c}"
3875                                 tok_state = tok_state_comment
3876                 return null
3877
3878         # 8.2.4.50 http://www.w3.org/TR/html5/syntax.html#comment-end-state
3879         tok_state_comment_end = ->
3880                 switch c = txt.charAt(cur++)
3881                         when '>'
3882                                 tok_state = tok_state_data
3883                                 return tok_cur_tag
3884                         when "\u0000"
3885                                 parse_error()
3886                                 tok_cur_tag.text += "--\ufffd"
3887                                 tok_state = tok_state_comment
3888                         when '!'
3889                                 parse_error()
3890                                 tok_state = tok_state_comment_end_bang
3891                         when '-'
3892                                 parse_error()
3893                                 tok_cur_tag.text += '-'
3894                         when '' # EOF
3895                                 parse_error()
3896                                 tok_state = tok_state_data
3897                                 cur -= 1 # Reconsume
3898                                 return tok_cur_tag
3899                         else
3900                                 parse_error()
3901                                 tok_cur_tag.text += "--#{c}"
3902                                 tok_state = tok_state_comment
3903                 return null
3904
3905         # 8.2.4.51 http://www.w3.org/TR/html5/syntax.html#comment-end-bang-state
3906         tok_state_comment_end_bang = ->
3907                 switch c = txt.charAt(cur++)
3908                         when '-'
3909                                 tok_cur_tag.text += "--!#{c}"
3910                                 tok_state = tok_state_comment_end_dash
3911                         when '>'
3912                                 tok_state = tok_state_data
3913                                 return tok_cur_tag
3914                         when "\u0000"
3915                                 parse_error()
3916                                 tok_cur_tag.text += "--!\ufffd"
3917                                 tok_state = tok_state_comment
3918                         when '' # EOF
3919                                 parse_error()
3920                                 tok_state = tok_state_data
3921                                 cur -= 1 # Reconsume
3922                                 return tok_cur_tag
3923                         else
3924                                 tok_cur_tag.text += "--!#{c}"
3925                                 tok_state = tok_state_comment
3926                 return null
3927
3928         # 8.2.4.52 http://www.w3.org/TR/html5/syntax.html#doctype-state
3929         tok_state_doctype = ->
3930                 switch c = txt.charAt(cur++)
3931                         when "\t", "\u000a", "\u000c", ' '
3932                                 tok_state = tok_state_before_doctype_name
3933                         when '' # EOF
3934                                 parse_error()
3935                                 tok_state = tok_state_data
3936                                 el = new_doctype_token ''
3937                                 el.flag 'force-quirks', true
3938                                 cur -= 1 # Reconsume
3939                                 return el
3940                         else
3941                                 parse_error()
3942                                 tok_state = tok_state_before_doctype_name
3943                                 cur -= 1 # Reconsume
3944                 return null
3945
3946         # 8.2.4.52 http://www.w3.org/TR/html5/syntax.html#doctype-state
3947         tok_state_before_doctype_name = ->
3948                 c = txt.charAt(cur++)
3949                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
3950                         return
3951                 if is_uc_alpha(c)
3952                         tok_cur_tag = new_doctype_token c.toLowerCase()
3953                         tok_state = tok_state_doctype_name
3954                         return
3955                 if c is "\u0000"
3956                         parse_error()
3957                         tok_cur_tag = new_doctype_token "\ufffd"
3958                         tok_state = tok_state_doctype_name
3959                         return
3960                 if c is '>'
3961                         parse_error()
3962                         el = new_doctype_token ''
3963                         el.flag 'force-quirks', true
3964                         tok_state = tok_state_data
3965                         return el
3966                 if c is '' # EOF
3967                         parse_error()
3968                         tok_state = tok_state_data
3969                         el = new_doctype_token ''
3970                         el.flag 'force-quirks', true
3971                         cur -= 1 # Reconsume
3972                         return el
3973                 # Anything else
3974                 tok_cur_tag = new_doctype_token c
3975                 tok_state = tok_state_doctype_name
3976                 return null
3977
3978         # 8.2.4.54 http://www.w3.org/TR/html5/syntax.html#doctype-name-state
3979         tok_state_doctype_name = ->
3980                 c = txt.charAt(cur++)
3981                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
3982                         tok_state = tok_state_after_doctype_name
3983                         return
3984                 if c is '>'
3985                         tok_state = tok_state_data
3986                         return tok_cur_tag
3987                 if is_uc_alpha(c)
3988                         tok_cur_tag.name += c.toLowerCase()
3989                         return
3990                 if c is "\u0000"
3991                         parse_error()
3992                         tok_cur_tag.name += "\ufffd"
3993                         return
3994                 if c is '' # EOF
3995                         parse_error()
3996                         tok_state = tok_state_data
3997                         tok_cur_tag.flag 'force-quirks', true
3998                         cur -= 1 # Reconsume
3999                         return tok_cur_tag
4000                 # Anything else
4001                 tok_cur_tag.name += c
4002                 return null
4003
4004         # 8.2.4.55 http://www.w3.org/TR/html5/syntax.html#after-doctype-name-state
4005         tok_state_after_doctype_name = ->
4006                 c = txt.charAt(cur++)
4007                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4008                         return
4009                 if c is '>'
4010                         tok_state = tok_state_data
4011                         return tok_cur_tag
4012                 if c is '' # EOF
4013                         parse_error()
4014                         tok_state = tok_state_data
4015                         tok_cur_tag.flag 'force-quirks', true
4016                         cur -= 1 # Reconsume
4017                         return tok_cur_tag
4018                 # Anything else
4019                 if txt.substr(cur - 1, 6).toLowerCase() is 'public'
4020                         cur += 5
4021                         tok_state = tok_state_after_doctype_public_keyword
4022                         return
4023                 if txt.substr(cur - 1, 6).toLowerCase() is 'system'
4024                         cur += 5
4025                         tok_state = tok_state_after_doctype_system_keyword
4026                         return
4027                 parse_error()
4028                 tok_cur_tag.flag 'force-quirks', true
4029                 tok_state = tok_state_bogus_doctype
4030                 return null
4031
4032         # 8.2.4.56 http://www.w3.org/TR/html5/syntax.html#after-doctype-public-keyword-state
4033         tok_state_after_doctype_public_keyword = ->
4034                 c = txt.charAt(cur++)
4035                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4036                         tok_state = tok_state_before_doctype_public_identifier
4037                         return
4038                 if c is '"'
4039                         parse_error()
4040                         tok_cur_tag.public_identifier = ''
4041                         tok_state = tok_state_doctype_public_identifier_double_quoted
4042                         return
4043                 if c is "'"
4044                         parse_error()
4045                         tok_cur_tag.public_identifier = ''
4046                         tok_state = tok_state_doctype_public_identifier_single_quoted
4047                         return
4048                 if c is '>'
4049                         parse_error()
4050                         tok_cur_tag.flag 'force-quirks', true
4051                         tok_state = tok_state_data
4052                         return tok_cur_tag
4053                 if c is '' # EOF
4054                         parse_error()
4055                         tok_state = tok_state_data
4056                         tok_cur_tag.flag 'force-quirks', true
4057                         cur -= 1 # Reconsume
4058                         return tok_cur_tag
4059                 # Anything else
4060                 parse_error()
4061                 tok_cur_tag.flag 'force-quirks', true
4062                 tok_state = tok_state_bogus_doctype
4063                 return null
4064
4065         # 8.2.4.57 http://www.w3.org/TR/html5/syntax.html#before-doctype-public-identifier-state
4066         tok_state_before_doctype_public_identifier = ->
4067                 c = txt.charAt(cur++)
4068                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4069                         return
4070                 if c is '"'
4071                         parse_error()
4072                         tok_cur_tag.public_identifier = ''
4073                         tok_state = tok_state_doctype_public_identifier_double_quoted
4074                         return
4075                 if c is "'"
4076                         parse_error()
4077                         tok_cur_tag.public_identifier = ''
4078                         tok_state = tok_state_doctype_public_identifier_single_quoted
4079                         return
4080                 if c is '>'
4081                         parse_error()
4082                         tok_cur_tag.flag 'force-quirks', true
4083                         tok_state = tok_state_data
4084                         return tok_cur_tag
4085                 if c is '' # EOF
4086                         parse_error()
4087                         tok_state = tok_state_data
4088                         tok_cur_tag.flag 'force-quirks', true
4089                         cur -= 1 # Reconsume
4090                         return tok_cur_tag
4091                 # Anything else
4092                 parse_error()
4093                 tok_cur_tag.flag 'force-quirks', true
4094                 tok_state = tok_state_bogus_doctype
4095                 return null
4096
4097
4098         # 8.2.4.58 http://www.w3.org/TR/html5/syntax.html#doctype-public-identifier-(double-quoted)-state
4099         tok_state_doctype_public_identifier_double_quoted = ->
4100                 c = txt.charAt(cur++)
4101                 if c is '"'
4102                         tok_state = tok_state_after_doctype_public_identifier
4103                         return
4104                 if c is "\u0000"
4105                         parse_error()
4106                         tok_cur_tag.public_identifier += "\ufffd"
4107                         return
4108                 if c is '>'
4109                         parse_error()
4110                         tok_cur_tag.flag 'force-quirks', true
4111                         tok_state = tok_state_data
4112                         return tok_cur_tag
4113                 if c is '' # EOF
4114                         parse_error()
4115                         tok_state = tok_state_data
4116                         tok_cur_tag.flag 'force-quirks', true
4117                         cur -= 1 # Reconsume
4118                         return tok_cur_tag
4119                 # Anything else
4120                 tok_cur_tag.public_identifier += c
4121                 return null
4122
4123         # 8.2.4.59 http://www.w3.org/TR/html5/syntax.html#doctype-public-identifier-(single-quoted)-state
4124         tok_state_doctype_public_identifier_single_quoted = ->
4125                 c = txt.charAt(cur++)
4126                 if c is "'"
4127                         tok_state = tok_state_after_doctype_public_identifier
4128                         return
4129                 if c is "\u0000"
4130                         parse_error()
4131                         tok_cur_tag.public_identifier += "\ufffd"
4132                         return
4133                 if c is '>'
4134                         parse_error()
4135                         tok_cur_tag.flag 'force-quirks', true
4136                         tok_state = tok_state_data
4137                         return tok_cur_tag
4138                 if c is '' # EOF
4139                         parse_error()
4140                         tok_state = tok_state_data
4141                         tok_cur_tag.flag 'force-quirks', true
4142                         cur -= 1 # Reconsume
4143                         return tok_cur_tag
4144                 # Anything else
4145                 tok_cur_tag.public_identifier += c
4146                 return null
4147
4148         # 8.2.4.60 http://www.w3.org/TR/html5/syntax.html#after-doctype-public-identifier-state
4149         tok_state_after_doctype_public_identifier = ->
4150                 c = txt.charAt(cur++)
4151                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4152                         tok_state = tok_state_between_doctype_public_and_system_identifiers
4153                         return
4154                 if c is '>'
4155                         tok_state = tok_state_data
4156                         return tok_cur_tag
4157                 if c is '"'
4158                         parse_error()
4159                         tok_cur_tag.system_identifier = ''
4160                         tok_state = tok_state_doctype_system_identifier_double_quoted
4161                         return
4162                 if c is "'"
4163                         parse_error()
4164                         tok_cur_tag.system_identifier = ''
4165                         tok_state = tok_state_doctype_system_identifier_single_quoted
4166                         return
4167                 if c is '' # EOF
4168                         parse_error()
4169                         tok_state = tok_state_data
4170                         tok_cur_tag.flag 'force-quirks', true
4171                         cur -= 1 # Reconsume
4172                         return tok_cur_tag
4173                 # Anything else
4174                 parse_error()
4175                 tok_cur_tag.flag 'force-quirks', true
4176                 tok_state = tok_state_bogus_doctype
4177                 return null
4178
4179         # 8.2.4.61 http://www.w3.org/TR/html5/syntax.html#between-doctype-public-and-system-identifiers-state
4180         tok_state_between_doctype_public_and_system_identifiers = ->
4181                 c = txt.charAt(cur++)
4182                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4183                         return
4184                 if c is '>'
4185                         tok_state = tok_state_data
4186                         return tok_cur_tag
4187                 if c is '"'
4188                         parse_error()
4189                         tok_cur_tag.system_identifier = ''
4190                         tok_state = tok_state_doctype_system_identifier_double_quoted
4191                         return
4192                 if c is "'"
4193                         parse_error()
4194                         tok_cur_tag.system_identifier = ''
4195                         tok_state = tok_state_doctype_system_identifier_single_quoted
4196                         return
4197                 if c is '' # EOF
4198                         parse_error()
4199                         tok_state = tok_state_data
4200                         tok_cur_tag.flag 'force-quirks', true
4201                         cur -= 1 # Reconsume
4202                         return tok_cur_tag
4203                 # Anything else
4204                 parse_error()
4205                 tok_cur_tag.flag 'force-quirks', true
4206                 tok_state = tok_state_bogus_doctype
4207                 return null
4208
4209         # 8.2.4.62 http://www.w3.org/TR/html5/syntax.html#after-doctype-system-keyword-state
4210         tok_state_after_doctype_system_keyword = ->
4211                 c = txt.charAt(cur++)
4212                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4213                         tok_state = tok_state_before_doctype_system_identifier
4214                         return
4215                 if c is '"'
4216                         parse_error()
4217                         tok_cur_tag.system_identifier = ''
4218                         tok_state = tok_state_doctype_system_identifier_double_quoted
4219                         return
4220                 if c is "'"
4221                         parse_error()
4222                         tok_cur_tag.system_identifier = ''
4223                         tok_state = tok_state_doctype_system_identifier_single_quoted
4224                         return
4225                 if c is '>'
4226                         parse_error()
4227                         tok_cur_tag.flag 'force-quirks', true
4228                         tok_state = tok_state_data
4229                         return tok_cur_tag
4230                 if c is '' # EOF
4231                         parse_error()
4232                         tok_state = tok_state_data
4233                         tok_cur_tag.flag 'force-quirks', true
4234                         cur -= 1 # Reconsume
4235                         return tok_cur_tag
4236                 # Anything else
4237                 parse_error()
4238                 tok_cur_tag.flag 'force-quirks', true
4239                 tok_state = tok_state_bogus_doctype
4240                 return null
4241
4242         # 8.2.4.63 http://www.w3.org/TR/html5/syntax.html#before-doctype-system-identifier-state
4243         tok_state_before_doctype_system_identifier = ->
4244                 c = txt.charAt(cur++)
4245                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4246                         return
4247                 if c is '"'
4248                         tok_cur_tag.system_identifier = ''
4249                         tok_state = tok_state_doctype_system_identifier_double_quoted
4250                         return
4251                 if c is "'"
4252                         tok_cur_tag.system_identifier = ''
4253                         tok_state = tok_state_doctype_system_identifier_single_quoted
4254                         return
4255                 if c is '>'
4256                         parse_error()
4257                         tok_cur_tag.flag 'force-quirks', true
4258                         tok_state = tok_state_data
4259                         return tok_cur_tag
4260                 if c is '' # EOF
4261                         parse_error()
4262                         tok_state = tok_state_data
4263                         tok_cur_tag.flag 'force-quirks', true
4264                         cur -= 1 # Reconsume
4265                         return tok_cur_tag
4266                 # Anything else
4267                 parse_error()
4268                 tok_cur_tag.flag 'force-quirks', true
4269                 tok_state = tok_state_bogus_doctype
4270                 return null
4271
4272         # 8.2.4.64 http://www.w3.org/TR/html5/syntax.html#doctype-system-identifier-(double-quoted)-state
4273         tok_state_doctype_system_identifier_double_quoted = ->
4274                 c = txt.charAt(cur++)
4275                 if c is '"'
4276                         tok_state = tok_state_after_doctype_system_identifier
4277                         return
4278                 if c is "\u0000"
4279                         parse_error()
4280                         tok_cur_tag.system_identifier += "\ufffd"
4281                         return
4282                 if c is '>'
4283                         parse_error()
4284                         tok_cur_tag.flag 'force-quirks', true
4285                         tok_state = tok_state_data
4286                         return tok_cur_tag
4287                 if c is '' # EOF
4288                         parse_error()
4289                         tok_state = tok_state_data
4290                         tok_cur_tag.flag 'force-quirks', true
4291                         cur -= 1 # Reconsume
4292                         return tok_cur_tag
4293                 # Anything else
4294                 tok_cur_tag.system_identifier += c
4295                 return null
4296
4297         # 8.2.4.65 http://www.w3.org/TR/html5/syntax.html#doctype-system-identifier-(single-quoted)-state
4298         tok_state_doctype_system_identifier_single_quoted = ->
4299                 c = txt.charAt(cur++)
4300                 if c is "'"
4301                         tok_state = tok_state_after_doctype_system_identifier
4302                         return
4303                 if c is "\u0000"
4304                         parse_error()
4305                         tok_cur_tag.system_identifier += "\ufffd"
4306                         return
4307                 if c is '>'
4308                         parse_error()
4309                         tok_cur_tag.flag 'force-quirks', true
4310                         tok_state = tok_state_data
4311                         return tok_cur_tag
4312                 if c is '' # EOF
4313                         parse_error()
4314                         tok_state = tok_state_data
4315                         tok_cur_tag.flag 'force-quirks', true
4316                         cur -= 1 # Reconsume
4317                         return tok_cur_tag
4318                 # Anything else
4319                 tok_cur_tag.system_identifier += c
4320                 return null
4321
4322         # 8.2.4.66 http://www.w3.org/TR/html5/syntax.html#after-doctype-system-identifier-state
4323         tok_state_after_doctype_system_identifier = ->
4324                 c = txt.charAt(cur++)
4325                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4326                         return
4327                 if c is '>'
4328                         tok_state = tok_state_data
4329                         return tok_cur_tag
4330                 if c is '' # EOF
4331                         parse_error()
4332                         tok_state = tok_state_data
4333                         tok_cur_tag.flag 'force-quirks', true
4334                         cur -= 1 # Reconsume
4335                         return tok_cur_tag
4336                 # Anything else
4337                 parse_error()
4338                 # do _not_ tok_cur_tag.flag 'force-quirks', true
4339                 tok_state = tok_state_bogus_doctype
4340                 return null
4341
4342         # 8.2.4.67 http://www.w3.org/TR/html5/syntax.html#bogus-doctype-state
4343         tok_state_bogus_doctype = ->
4344                 c = txt.charAt(cur++)
4345                 if c is '>'
4346                         tok_state = tok_state_data
4347                         return tok_cur_tag
4348                 if c is '' # EOF
4349                         tok_state = tok_state_data
4350                         cur -= 1 # Reconsume
4351                         return tok_cur_tag
4352                 # Anything else
4353                 return null
4354
4355         # 8.2.4.68 http://www.w3.org/TR/html5/syntax.html#cdata-section-state
4356         tok_state_cdata_section = ->
4357                 tok_state = tok_state_data
4358                 next_gt = txt.indexOf ']]>', cur
4359                 if next_gt is -1
4360                         val = txt.substr cur
4361                         cur = txt.length
4362                 else
4363                         val = txt.substr cur, (next_gt - cur)
4364                         cur = next_gt + 3
4365                 return new_character_token val # fixfull split
4366
4367         # 8.2.4.69 http://www.w3.org/TR/html5/syntax.html#consume-a-character-reference
4368         # Don't set this as a state, just call it
4369         # returns a string (NOT a text node)
4370         parse_character_reference = (allowed_char = null, in_attr = false) ->
4371                 if cur >= txt.length
4372                         return '&'
4373                 switch c = txt.charAt(cur)
4374                         when "\t", "\n", "\u000c", ' ', '<', '&', '', allowed_char
4375                                 # explicitly not a parse error
4376                                 return '&'
4377                         when ';'
4378                                 # there has to be "one or more" alnums between & and ; to be a parse error
4379                                 return '&'
4380                         when '#'
4381                                 if cur + 1 >= txt.length
4382                                         return '&'
4383                                 if txt.charAt(cur + 1).toLowerCase() is 'x'
4384                                         base = 16
4385                                         charset = hex_chars
4386                                         start = cur + 2
4387                                 else
4388                                         charset = digits
4389                                         start = cur + 1
4390                                         base = 10
4391                                 i = 0
4392                                 while start + i < txt.length and charset.indexOf(txt.charAt(start + i)) > -1
4393                                         i += 1
4394                                 if i is 0
4395                                         return '&'
4396                                 cur = start + i
4397                                 if txt.charAt(start + i) is ';'
4398                                         cur += 1
4399                                 else
4400                                         parse_error()
4401                                 code_point = txt.substr(start, i)
4402                                 while code_point.charAt(0) is '0' and code_point.length > 1
4403                                         code_point = code_point.substr 1
4404                                 code_point = parseInt(code_point, base)
4405                                 if unicode_fixes[code_point]?
4406                                         parse_error()
4407                                         return unicode_fixes[code_point]
4408                                 else
4409                                         if (code_point >= 0xd800 and code_point <= 0xdfff) or code_point > 0x10ffff
4410                                                 parse_error()
4411                                                 return "\ufffd"
4412                                         else
4413                                                 if (code_point >= 0x0001 and code_point <= 0x0008) or (code_point >= 0x000D and code_point <= 0x001F) or (code_point >= 0x007F and code_point <= 0x009F) or (code_point >= 0xFDD0 and code_point <= 0xFDEF) or code_point is 0x000B or code_point is 0xFFFE or code_point is 0xFFFF or code_point is 0x1FFFE or code_point is 0x1FFFF or code_point is 0x2FFFE or code_point is 0x2FFFF or code_point is 0x3FFFE or code_point is 0x3FFFF or code_point is 0x4FFFE or code_point is 0x4FFFF or code_point is 0x5FFFE or code_point is 0x5FFFF or code_point is 0x6FFFE or code_point is 0x6FFFF or code_point is 0x7FFFE or code_point is 0x7FFFF or code_point is 0x8FFFE or code_point is 0x8FFFF or code_point is 0x9FFFE or code_point is 0x9FFFF or code_point is 0xAFFFE or code_point is 0xAFFFF or code_point is 0xBFFFE or code_point is 0xBFFFF or code_point is 0xCFFFE or code_point is 0xCFFFF or code_point is 0xDFFFE or code_point is 0xDFFFF or code_point is 0xEFFFE or code_point is 0xEFFFF or code_point is 0xFFFFE or code_point is 0xFFFFF or code_point is 0x10FFFE or code_point is 0x10FFFF
4414                                                         parse_error()
4415                                                 return from_code_point code_point
4416                                 return
4417                         else
4418                                 for i in [0...31]
4419                                         if alnum.indexOf(txt.charAt(cur + i)) is -1
4420                                                 break
4421                                 if i is 0
4422                                         # exit early, because parse_error() below needs at least one alnum
4423                                         return '&'
4424                                 if txt.charAt(cur + i) is ';'
4425                                         i += 1 # include ';' terminator in value
4426                                         decoded = decode_named_char_ref txt.substr(cur, i)
4427                                         if decoded?
4428                                                 cur += i
4429                                                 return decoded
4430                                         parse_error()
4431                                         return '&'
4432                                 else
4433                                         # no ';' terminator (only legacy char refs)
4434                                         max = i
4435                                         for i in [2..max] # no prefix matches, so ok to check shortest first
4436                                                 c = legacy_char_refs[txt.substr(cur, i)]
4437                                                 if c?
4438                                                         if in_attr
4439                                                                 if txt.charAt(cur + i) is '='
4440                                                                         # "because some legacy user agents will
4441                                                                         # misinterpret the markup in those cases"
4442                                                                         parse_error()
4443                                                                         return '&'
4444                                                                 if alnum.indexOf(txt.charAt(cur + i)) > -1
4445                                                                         # this makes attributes forgiving about url args
4446                                                                         return '&'
4447                                                         # ok, and besides the weird exceptions for attributes...
4448                                                         # return the matching char
4449                                                         cur += i # consume entity chars
4450                                                         parse_error() # because no terminating ";"
4451                                                         return c
4452                                         parse_error()
4453                                         return '&'
4454                 return # never reached
4455
4456         # tree constructor initialization
4457         # see comments on TYPE_TAG/etc for the structure of this data
4458         txt = args.html
4459         cur = 0
4460         doc = new Node TYPE_TAG, name: 'html', namespace: NS_HTML
4461         open_els = []
4462         afe = [] # active formatting elements
4463         template_ins_modes = []
4464         ins_mode = ins_mode_initial
4465         original_ins_mode = ins_mode # TODO check spec
4466         flag_scripting = args.scripting ? true # TODO might need an extra flag to get <noscript> to parse correctly
4467         flag_frameset_ok = true
4468         flag_parsing = true
4469         flag_foster_parenting = false
4470         form_element_pointer = null
4471         temporary_buffer = null
4472         pending_table_character_tokens = []
4473         head_element_pointer = null
4474         flag_fragment_parsing = false # parser originally created as part of the html fragment parsing algorithm (fragment case)
4475         context_element = null # FIXME initialize from args.fragment http://www.w3.org/TR/html5/syntax.html#parsing-html-fragments
4476
4477         # tokenizer initialization
4478         tok_state = tok_state_data
4479
4480         # text pre-processing
4481         # FIXME http://www.w3.org/TR/html5/syntax.html#preprocessing-the-input-stream
4482         txt = txt.replace(new RegExp("\u0000", 'g'), "\ufffd") # fixfull spec doesn't say this
4483         txt = txt.replace(new RegExp("\r\n", 'g'), "\n") # fixfull spec doesn't say this
4484         txt = txt.replace(new RegExp("\r", 'g'), "\n") # fixfull spec doesn't say this
4485
4486         if args.name is "plain-text-unsafe.dat #4"
4487                 console.log "hi"
4488         # proccess input
4489         # http://www.w3.org/TR/html5/syntax.html#tree-construction
4490         while flag_parsing
4491                 t = tok_state()
4492                 if t?
4493                         process_token t
4494                         # fixfull parse error if has self-closing flag, but it wasn't acknolwedged
4495         return doc.children
4496
4497 serialize_els = (els, shallow, show_ids) ->
4498         serialized = ''
4499         sep = ''
4500         for t in els
4501                 serialized += sep
4502                 sep = ','
4503                 serialized += t.serialize shallow, show_ids
4504         return serialized
4505
4506 module.exports.parse_html = parse_html
4507 module.exports.debug_log_reset = debug_log_reset
4508 module.exports.debug_log_each = debug_log_each
4509 module.exports.TYPE_TAG = TYPE_TAG
4510 module.exports.TYPE_TEXT = TYPE_TEXT
4511 module.exports.TYPE_COMMENT = TYPE_COMMENT
4512 module.exports.TYPE_DOCTYPE = TYPE_DOCTYPE
4513 module.exports.NS_HTML = NS_HTML
4514 module.exports.NS_MATHML = NS_MATHML
4515 module.exports.NS_SVG = NS_SVG