JasonWoof Got questions, comments, patches, etc.? Contact Jason Woofenden
convert parser to javascript
[peach-html5-editor.git] / parser.js
1 // todo remove refs and lens, js, ls
2 // run test suite!
3
4 // Copyright 2015 Jason Woofenden
5 // This file implements an HTML5 parser
6 //
7 // This program is free software: you can redistribute it and/or modify it under
8 // the terms of the GNU Affero General Public License as published by the Free
9 // Software Foundation, either version 3 of the License, or (at your option) any
10 // later version.
11 //
12 // This program is distributed in the hope that it will be useful, but WITHOUT
13 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 // FOR A PARTICULAR PURPOSE.  See the GNU Affero General Public License for more
15 // details.
16 //
17 // You should have received a copy of the GNU Affero General Public License
18 // along with this program.  If not, see <http://www.gnu.org/licenses/>.
19
20
21 // This file implements a thorough parser for html5, meant to be used by a
22 // WYSIWYG editor.
23
24 // The implementation is a pretty direct implementation of the parsing algorithm
25 // described here:
26 //
27 //     http://www.w3.org/TR/html5/syntax.html
28 //
29 // except for some places marked "WHATWG" that are implemented as described here:
30 //
31 //     https://html.spec.whatwg.org/multipage/syntax.html
32 //
33 // This code passes all of the tests in the .dat files at:
34 //
35 //     https://github.com/JasonWoof/html5lib-tests/tree/patch-1/tree-construction
36
37
38 //////////////////////////
39 // how to use this code //
40 //////////////////////////
41 //
42 // See README.md for how to run this file in the browser or in node.js.
43 //
44 // This file exports a single useful function: parse_tml, and some constants
45 // (see the bottom of this file for those.)
46 //
47 // Call it like this:
48 //
49 //     peach_parser.parse("<p><b>hi</p>")
50 //
51 // Or, if you don't want <html><head><body>/etc, do this:
52 //
53 //     peach_parser.parse("<p><b>hi</p>", {fragment: "body"})
54 //
55 // return value is an array of Nodes, see "class Node" below.
56
57 // This code is a work in progress, eg try search this file for "fixfull",
58 // "TODO" and "FIXME"
59
60
61 // Notes:  stacks/lists
62 //
63 // Jason was frequently confused by the terminology used to refer to different
64 // parts of the stacks and lists in the spec, so he made this chart to help keep
65 // his head straight:
66 //
67 // stacks grow downward (current element is index=0)
68 //
69 // example: open_els = [a, b, c, d, e, f, g]
70 //
71 // "grows downwards" means it's visualized like this: (index: el "names")
72 //
73 //   6: g "start of the list", "topmost", "first"
74 //   5: f
75 //   4: e "previous" (to d), "above", "before"
76 //   3: d   (previous/next are relative to this element)
77 //   2: c "next", "after", "lower", "below"
78 //   1: b
79 //   0: a "end of the list", "current node", "bottommost", "last"
80
81 if ((typeof module) !== 'undefined' && (module.exports != null)) {
82         context = 'module'
83         exports = module.exports
84 } else {
85         context = 'browser'
86         window.peach_parser = {}
87         exports = window.peach_parser
88 }
89
90 from_code_point = function (x) {
91         if (String.fromCodePoint != null) {
92                 return String.fromCodePoint(x)
93         } else {
94                 if (x <= 0xffff) {
95                         return String.fromCharCode(x)
96                 }
97                 x -= 0x10000
98                 return String.fromCharCode((x >> 10) + 0xd800, (x % 0x400) + 0xdc00)
99         }
100 }
101
102 // Each node is an obect of the Node class. Here are the Node types:
103 TYPE_TAG = 'tag' // name, {attributes}, [children]
104 TYPE_TEXT = 'text' // "text"
105 TYPE_COMMENT = 'comment'
106 TYPE_DOCTYPE = 'doctype'
107 // the following types are emited by the tokenizer, but shouldn't end up in the tree:
108 TYPE_START_TAG = 4 // name, [attributes ([key,value]...) in reverse order], [children]
109 TYPE_END_TAG = 5 // name
110 TYPE_EOF = 6
111 TYPE_AFE_MARKER = 7 // http://www.w3.org/TR/html5/syntax.html#reconstruct-the-active-formatting-elements
112 TYPE_AAA_BOOKMARK = 8 // http://www.w3.org/TR/html5/syntax.html#adoption-agency-algorithm
113
114 // namespace constants
115 NS_HTML = 'html'
116 NS_MATHML = 'mathml'
117 NS_SVG = 'svg'
118
119 // quirks mode constants
120 QUIRKS_NO = 'no'
121 QUIRKS_LIMITED = 'limited'
122 QUIRKS_YES = 'yes'
123
124 // queue up debug logs, so eg they can be shown only for tests that fail
125 g_debug_log = []
126 debug_log_reset = function () {
127         g_debug_log = []
128 }
129 debug_log = function (str) {
130         g_debug_log.push(str)
131 }
132 debug_log_each = function (cb) {
133         var i
134         for (i = 0; i < g_debug_log.length; ++i) {
135                 cb(g_debug_log[i])
136         }
137 }
138
139 prev_node_id = 0
140 function Node (type, args) {
141         if (args == null) {
142                 args = {}
143         }
144         this.type = type // one of the TYPE_* constants above
145         this.name = args.name != null ? args.name : '' // tag name
146         this.text = args.text != null ? args.text : '' // contents for text/comment nodes
147         this.attrs = args.attrs != null ? args.attrs : {}
148         this.attrs_a = args.attr_k != null ? args.attr_k : [] // attrs in progress, TYPE_START_TAG only
149         this.children = args.children != null ? args.children : []
150         this.namespace = args.namespace != null ? args.namespace : NS_HTML
151         this.parent = args.parent != null ? args.parent : null
152         this.token = args.token != null ? args.token : null
153         this.flags = args.flags != null ? args.flags : {}
154         if (args.id != null) {
155                 this.id = args.id + "+"
156         } else {
157                 this.id = "" + (++prev_node_id)
158         }
159 }
160
161 Node.prototype.acknowledge_self_closing = function () {
162         if (this.token != null) {
163                 this.token.flag('did_self_close', true)
164         } else {
165                 this.flag('did_self_close', true)
166         }
167 }
168
169 Node.prototype.flag = function (key, value) {
170         if (value != null) {
171                 this.flags[key] = value
172         } else {
173                 return this.flags[key]
174         }
175 }
176
177 // helpers: (only take args that are normally known when parser creates nodes)
178 new_open_tag = function (name) {
179         return new Node(TYPE_START_TAG, {name: name})
180 }
181 new_end_tag = function (name) {
182         return new Node(TYPE_END_TAG, {name: name})
183 }
184 new_element = function (name) {
185         return new Node(TYPE_TAG, {name: name})
186 }
187 new_text_node = function (txt) {
188         return new Node(TYPE_TEXT, {text: txt})
189 }
190 new_character_token = new_text_node
191 new_comment_token = function (txt) {
192         return new Node(TYPE_COMMENT, {text: txt})
193 }
194 new_doctype_token = function (name) {
195         return new Node(TYPE_DOCTYPE, {name: name})
196 }
197 new_eof_token = function () {
198         return new Node(TYPE_EOF)
199 }
200 new_afe_marker = function () {
201         return new Node(TYPE_AFE_MARKER)
202 }
203 new_aaa_bookmark = function () {
204         return new Node(TYPE_AAA_BOOKMARK)
205 }
206
207 lc_alpha = "abcdefghijklmnopqrstuvwxyz"
208 uc_alpha = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
209 digits = "0123456789"
210 alnum = lc_alpha + uc_alpha + digits
211 hex_chars = digits + "abcdefABCDEF"
212
213 is_uc_alpha = function (str) {
214         return str.length === 1 && uc_alpha.indexOf(str) > -1
215 }
216 is_lc_alpha = function (str) {
217         return str.length === 1 && lc_alpha.indexOf(str) > -1
218 }
219
220 // some SVG elements have dashes in them
221 tag_name_chars = alnum + "-"
222
223 // http://www.w3.org/TR/html5/infrastructure.html#space-character
224 space_chars = "\u0009\u000a\u000c\u000d\u0020"
225 is_space = function (txt) {
226         return txt.length === 1 && space_chars.indexOf(txt) > -1
227 }
228 is_space_tok = function (t) {
229         return t.type === TYPE_TEXT && t.text.length === 1 && space_chars.indexOf(t.text) > -1
230 }
231
232 is_input_hidden_tok = function (t) {
233         var i, a
234         if (t.type !== TYPE_START_TAG) {
235                 return false
236         }
237         for (i = 0; i < t.attrs_a.length; ++i) {
238                 a = t.attrs_a[i]
239                 if (a[0] === 'type') {
240                         if (a[1].toLowerCase() === 'hidden') {
241                                 return true
242                         }
243                         return false
244                 }
245         }
246         return false
247 }
248
249 // https://en.wikipedia.org/wiki/Whitespace_character#Unicode
250 whitespace_chars = "\u0009\u000a\u000b\u000c\u000d\u0020\u0085\u00a0\u1680\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200a\u2028\u2029\u202f\u205f\u3000"
251
252 unicode_fixes = {}
253 unicode_fixes[0x00] = "\uFFFD"
254 unicode_fixes[0x80] = "\u20AC"
255 unicode_fixes[0x82] = "\u201A"
256 unicode_fixes[0x83] = "\u0192"
257 unicode_fixes[0x84] = "\u201E"
258 unicode_fixes[0x85] = "\u2026"
259 unicode_fixes[0x86] = "\u2020"
260 unicode_fixes[0x87] = "\u2021"
261 unicode_fixes[0x88] = "\u02C6"
262 unicode_fixes[0x89] = "\u2030"
263 unicode_fixes[0x8A] = "\u0160"
264 unicode_fixes[0x8B] = "\u2039"
265 unicode_fixes[0x8C] = "\u0152"
266 unicode_fixes[0x8E] = "\u017D"
267 unicode_fixes[0x91] = "\u2018"
268 unicode_fixes[0x92] = "\u2019"
269 unicode_fixes[0x93] = "\u201C"
270 unicode_fixes[0x94] = "\u201D"
271 unicode_fixes[0x95] = "\u2022"
272 unicode_fixes[0x96] = "\u2013"
273 unicode_fixes[0x97] = "\u2014"
274 unicode_fixes[0x98] = "\u02DC"
275 unicode_fixes[0x99] = "\u2122"
276 unicode_fixes[0x9A] = "\u0161"
277 unicode_fixes[0x9B] = "\u203A"
278 unicode_fixes[0x9C] = "\u0153"
279 unicode_fixes[0x9E] = "\u017E"
280 unicode_fixes[0x9F] = "\u0178"
281
282 quirks_yes_pi_prefixes = [
283         "+//silmaril//dtd html pro v0r11 19970101//",
284         "-//as//dtd html 3.0 aswedit + extensions//",
285         "-//advasoft ltd//dtd html 3.0 aswedit + extensions//",
286         "-//ietf//dtd html 2.0 level 1//",
287         "-//ietf//dtd html 2.0 level 2//",
288         "-//ietf//dtd html 2.0 strict level 1//",
289         "-//ietf//dtd html 2.0 strict level 2//",
290         "-//ietf//dtd html 2.0 strict//",
291         "-//ietf//dtd html 2.0//",
292         "-//ietf//dtd html 2.1e//",
293         "-//ietf//dtd html 3.0//",
294         "-//ietf//dtd html 3.2 final//",
295         "-//ietf//dtd html 3.2//",
296         "-//ietf//dtd html 3//",
297         "-//ietf//dtd html level 0//",
298         "-//ietf//dtd html level 1//",
299         "-//ietf//dtd html level 2//",
300         "-//ietf//dtd html level 3//",
301         "-//ietf//dtd html strict level 0//",
302         "-//ietf//dtd html strict level 1//",
303         "-//ietf//dtd html strict level 2//",
304         "-//ietf//dtd html strict level 3//",
305         "-//ietf//dtd html strict//",
306         "-//ietf//dtd html//",
307         "-//metrius//dtd metrius presentational//",
308         "-//microsoft//dtd internet explorer 2.0 html strict//",
309         "-//microsoft//dtd internet explorer 2.0 html//",
310         "-//microsoft//dtd internet explorer 2.0 tables//",
311         "-//microsoft//dtd internet explorer 3.0 html strict//",
312         "-//microsoft//dtd internet explorer 3.0 html//",
313         "-//microsoft//dtd internet explorer 3.0 tables//",
314         "-//netscape comm. corp.//dtd html//",
315         "-//netscape comm. corp.//dtd strict html//",
316         "-//o'reilly and associates//dtd html 2.0//",
317         "-//o'reilly and associates//dtd html extended 1.0//",
318         "-//o'reilly and associates//dtd html extended relaxed 1.0//",
319         "-//sq//dtd html 2.0 hotmetal + extensions//",
320         "-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//",
321         "-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//",
322         "-//spyglass//dtd html 2.0 extended//",
323         "-//sun microsystems corp.//dtd hotjava html//",
324         "-//sun microsystems corp.//dtd hotjava strict html//",
325         "-//w3c//dtd html 3 1995-03-24//",
326         "-//w3c//dtd html 3.2 draft//",
327         "-//w3c//dtd html 3.2 final//",
328         "-//w3c//dtd html 3.2//",
329         "-//w3c//dtd html 3.2s draft//",
330         "-//w3c//dtd html 4.0 frameset//",
331         "-//w3c//dtd html 4.0 transitional//",
332         "-//w3c//dtd html experimental 19960712//",
333         "-//w3c//dtd html experimental 970421//",
334         "-//w3c//dtd w3 html//",
335         "-//w3o//dtd w3 html 3.0//",
336         "-//webtechs//dtd mozilla html 2.0//",
337         "-//webtechs//dtd mozilla html//",
338 ]
339
340 // These are the character references that don't need a terminating semicolon
341 // min length: 2, max: 6, none are a prefix of any other.
342 legacy_char_refs = {
343         Aacute: 'Á', aacute: 'á', Acirc: 'Â', acirc: 'â', acute: '´', AElig: 'Æ',
344         aelig: 'æ', Agrave: 'À', agrave: 'à', AMP: '&', amp: '&', Aring: 'Å',
345         aring: 'å', Atilde: 'Ã', atilde: 'ã', Auml: 'Ä', auml: 'ä', brvbar: '¦',
346         Ccedil: 'Ç', ccedil: 'ç', cedil: '¸', cent: '¢', COPY: '©', copy: '©',
347         curren: '¤', deg: '°', divide: '÷', Eacute: 'É', eacute: 'é', Ecirc: 'Ê',
348         ecirc: 'ê', Egrave: 'È', egrave: 'è', ETH: 'Ð', eth: 'ð', Euml: 'Ë',
349         euml: 'ë', frac12: '½', frac14: '¼', frac34: '¾', GT: '>', gt: '>',
350         Iacute: 'Í', iacute: 'í', Icirc: 'Î', icirc: 'î', iexcl: '¡', Igrave: 'Ì',
351         igrave: 'ì', iquest: '¿', Iuml: 'Ï', iuml: 'ï', laquo: '«', LT: '<',
352         lt: '<', macr: '¯', micro: 'µ', middot: '·', nbsp: "\u00a0", not: '¬',
353         Ntilde: 'Ñ', ntilde: 'ñ', Oacute: 'Ó', oacute: 'ó', Ocirc: 'Ô', ocirc: 'ô',
354         Ograve: 'Ò', ograve: 'ò', ordf: 'ª', ordm: 'º', Oslash: 'Ø', oslash: 'ø',
355         Otilde: 'Õ', otilde: 'õ', Ouml: 'Ö', ouml: 'ö', para: '¶', plusmn: '±',
356         pound: '£', QUOT: '"', quot: '"', raquo: '»', REG: '®', reg: '®', sect: '§',
357         shy: '­', sup1: '¹', sup2: '²', sup3: '³', szlig: 'ß', THORN: 'Þ', thorn: 'þ',
358         times: '×', Uacute: 'Ú', uacute: 'ú', Ucirc: 'Û', ucirc: 'û', Ugrave: 'Ù',
359         ugrave: 'ù', uml: '¨', Uuml: 'Ü', uuml: 'ü', Yacute: 'Ý', yacute: 'ý',
360         yen: '¥', yuml: 'ÿ'
361 }
362
363 //void_elements = ['area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input', 'keygen', 'link', 'meta', 'param', 'source', 'track', 'wbr']
364 //raw_text_elements = ['script', 'style']
365 //escapable_raw_text_elements = ['textarea', 'title']
366 // http://www.w3.org/TR/SVG/ 1.1 (Second Edition)
367 svg_elements = [
368         'a', 'altGlyph', 'altGlyphDef', 'altGlyphItem', 'animate', 'animateColor',
369         'animateMotion', 'animateTransform', 'circle', 'clipPath', 'color-profile',
370         'cursor', 'defs', 'desc', 'ellipse', 'feBlend', 'feColorMatrix',
371         'feComponentTransfer', 'feComposite', 'feConvolveMatrix',
372         'feDiffuseLighting', 'feDisplacementMap', 'feDistantLight', 'feFlood',
373         'feFuncA', 'feFuncB', 'feFuncG', 'feFuncR', 'feGaussianBlur', 'feImage',
374         'feMerge', 'feMergeNode', 'feMorphology', 'feOffset', 'fePointLight',
375         'feSpecularLighting', 'feSpotLight', 'feTile', 'feTurbulence', 'filter',
376         'font', 'font-face', 'font-face-format', 'font-face-name', 'font-face-src',
377         'font-face-uri', 'foreignObject', 'g', 'glyph', 'glyphRef', 'hkern',
378         'image', 'line', 'linearGradient', 'marker', 'mask', 'metadata',
379         'missing-glyph', 'mpath', 'path', 'pattern', 'polygon', 'polyline',
380         'radialGradient', 'rect', 'script', 'set', 'stop', 'style', 'svg',
381         'switch', 'symbol', 'text', 'textPath', 'title', 'tref', 'tspan', 'use',
382         'view', 'vkern'
383 ]
384
385 // http://www.w3.org/TR/MathML/ Version 3.0 2nd Edition
386 mathml_elements = [
387         'abs', 'and', 'annotation', 'annotation-xml', 'apply', 'approx', 'arccos',
388         'arccosh', 'arccot', 'arccoth', 'arccsc', 'arccsch', 'arcsec', 'arcsech',
389         'arcsin', 'arcsinh', 'arctan', 'arctanh', 'arg', 'bind', 'bvar', 'card',
390         'cartesianproduct', 'cbytes', 'ceiling', 'cerror', 'ci', 'cn', 'codomain',
391         'complexes', 'compose', 'condition', 'conjugate', 'cos', 'cosh', 'cot',
392         'coth', 'cs', 'csc', 'csch', 'csymbol', 'curl', 'declare', 'degree',
393         'determinant', 'diff', 'divergence', 'divide', 'domain',
394         'domainofapplication', 'emptyset', 'eq', 'equivalent', 'eulergamma',
395         'exists', 'exp', 'exponentiale', 'factorial', 'factorof', 'false', 'floor',
396         'fn', 'forall', 'gcd', 'geq', 'grad', 'gt', 'ident', 'image', 'imaginary',
397         'imaginaryi', 'implies', 'in', 'infinity', 'int', 'integers', 'intersect',
398         'interval', 'inverse', 'lambda', 'laplacian', 'lcm', 'leq', 'limit',
399         'list', 'ln', 'log', 'logbase', 'lowlimit', 'lt', 'maction', 'maligngroup',
400         'malignmark', 'math', 'matrix', 'matrixrow', 'max', 'mean', 'median',
401         'menclose', 'merror', 'mfenced', 'mfrac', 'mglyph', 'mi', 'mi', 'min',
402         'minus', 'mlabeledtr', 'mlongdiv', 'mmultiscripts', 'mn', 'mo', 'mode',
403         'moment', 'momentabout', 'mover', 'mpadded', 'mphantom', 'mprescripts',
404         'mroot', 'mrow', 'ms', 'mscarries', 'mscarry', 'msgroup', 'msline',
405         'mspace', 'msqrt', 'msrow', 'mstack', 'mstyle', 'msub', 'msubsup', 'msup',
406         'mtable', 'mtd', 'mtext', 'mtr', 'munder', 'munderover', 'naturalnumbers',
407         'neq', 'none', 'not', 'notanumber', 'notin', 'notprsubset', 'notsubset',
408         'or', 'otherwise', 'outerproduct', 'partialdiff', 'pi', 'piece',
409         'piecewise', 'plus', 'power', 'primes', 'product', 'prsubset', 'quotient',
410         'rationals', 'real', 'reals', 'reln', 'rem', 'root', 'scalarproduct',
411         'sdev', 'sec', 'sech', 'selector', 'semantics', 'sep', 'set', 'setdiff',
412         'share', 'sin', 'sinh', 'span', 'subset', 'sum', 'tan', 'tanh', 'tendsto',
413         'times', 'transpose', 'true', 'union', 'uplimit', 'variance', 'vector',
414         'vectorproduct', 'xor'
415 ]
416 // foreign_elements = [svg_elements..., mathml_elements...]
417 //normal_elements = All other allowed HTML elements are normal elements.
418
419 special_elements = {
420         // HTML:
421         address: NS_HTML, applet: NS_HTML, area: NS_HTML, article: NS_HTML,
422         aside: NS_HTML, base: NS_HTML, basefont: NS_HTML, bgsound: NS_HTML,
423         blockquote: NS_HTML, body: NS_HTML, br: NS_HTML, button: NS_HTML,
424         caption: NS_HTML, center: NS_HTML, col: NS_HTML, colgroup: NS_HTML, dd: NS_HTML,
425         details: NS_HTML, dir: NS_HTML, div: NS_HTML, dl: NS_HTML, dt: NS_HTML,
426         embed: NS_HTML, fieldset: NS_HTML, figcaption: NS_HTML, figure: NS_HTML,
427         footer: NS_HTML, form: NS_HTML, frame: NS_HTML, frameset: NS_HTML, h1: NS_HTML,
428         h2: NS_HTML, h3: NS_HTML, h4: NS_HTML, h5: NS_HTML, h6: NS_HTML, head: NS_HTML,
429         header: NS_HTML, hgroup: NS_HTML, hr: NS_HTML, html: NS_HTML, iframe: NS_HTML,
430         img: NS_HTML, input: NS_HTML, isindex: NS_HTML, li: NS_HTML, link: NS_HTML,
431         listing: NS_HTML, main: NS_HTML, marquee: NS_HTML,
432
433         menu: NS_HTML,menuitem: NS_HTML, // WHATWG adds these
434
435         meta: NS_HTML, nav: NS_HTML, noembed: NS_HTML, noframes: NS_HTML,
436         noscript: NS_HTML, object: NS_HTML, ol: NS_HTML, p: NS_HTML, param: NS_HTML,
437         plaintext: NS_HTML, pre: NS_HTML, script: NS_HTML, section: NS_HTML,
438         select: NS_HTML, source: NS_HTML, style: NS_HTML, summary: NS_HTML,
439         table: NS_HTML, tbody: NS_HTML, td: NS_HTML, template: NS_HTML,
440         textarea: NS_HTML, tfoot: NS_HTML, th: NS_HTML, thead: NS_HTML, title: NS_HTML,
441         tr: NS_HTML, track: NS_HTML, ul: NS_HTML, wbr: NS_HTML, xmp: NS_HTML,
442
443         // MathML: 
444         mi: NS_MATHML, mo: NS_MATHML, mn: NS_MATHML, ms: NS_MATHML, mtext: NS_MATHML,
445         'annotation-xml': NS_MATHML,
446
447         // SVG: 
448         foreignObject: NS_SVG, desc: NS_SVG, title: NS_SVG
449 }
450
451 formatting_elements = {
452         a: true, b: true, big: true, code: true, em: true, font: true, i: true,
453         nobr: true, s: true, small: true, strike: true, strong: true, tt: true,
454         u: true
455 }
456
457 mathml_text_integration = {
458         mi: NS_MATHML, mo: NS_MATHML, mn: NS_MATHML, ms: NS_MATHML, mtext: NS_MATHML
459 }
460 is_mathml_text_integration_point = function (el) {
461         return mathml_text_integration[el.name] === el.namespace
462 }
463 is_html_integration = function (el) { // DON'T PASS A TOKEN
464         if (el.namespace === NS_MATHML) {
465                 if (el.name === 'annotation-xml') {
466                         if (el.attrs.encoding != null) {
467                                 if (el.attrs.encoding.toLowerCase() === 'text/html') {
468                                         return true
469                                 }
470                                 if (el.attrs.encoding.toLowerCase() === 'application/xhtml+xml') {
471                                         return true
472                                 }
473                         }
474                 }
475                 return false
476         }
477         if (el.namespace === NS_SVG) {
478                 if (el.name === 'foreignObject' || el.name === 'desc' || el.name === 'title') {
479                         return true
480                 }
481         }
482         return false
483 }
484
485 h_tags = {
486         h1: NS_HTML, h2: NS_HTML, h3: NS_HTML, h4: NS_HTML, h5: NS_HTML, h6: NS_HTML
487 }
488
489 foster_parenting_targets = {
490         table: NS_HTML,
491         tbody: NS_HTML,
492         tfoot: NS_HTML,
493         thead: NS_HTML,
494         tr: NS_HTML
495 }
496
497 end_tag_implied = {
498         dd: NS_HTML,
499         dt: NS_HTML,
500         li: NS_HTML,
501         option: NS_HTML,
502         optgroup: NS_HTML,
503         p: NS_HTML,
504         rb: NS_HTML,
505         rp: NS_HTML,
506         rt: NS_HTML,
507         rtc: NS_HTML
508 }
509
510 el_is_special = function (e) {
511         return special_elements[e.name] === e.namespace
512 }
513
514 adp_els = { address: NS_HTML, div: NS_HTML, p: NS_HTML }
515 el_is_special_not_adp = function (el) {
516         return special_elements[el.name] === el.namespace && adp_els[el.name] !== el.namespace
517 }
518
519 svg_name_fixes = {
520         altglyph: 'altGlyph',
521         altglyphdef: 'altGlyphDef',
522         altglyphitem: 'altGlyphItem',
523         animatecolor: 'animateColor',
524         animatemotion: 'animateMotion',
525         animatetransform: 'animateTransform',
526         clippath: 'clipPath',
527         feblend: 'feBlend',
528         fecolormatrix: 'feColorMatrix',
529         fecomponenttransfer: 'feComponentTransfer',
530         fecomposite: 'feComposite',
531         feconvolvematrix: 'feConvolveMatrix',
532         fediffuselighting: 'feDiffuseLighting',
533         fedisplacementmap: 'feDisplacementMap',
534         fedistantlight: 'feDistantLight',
535         fedropshadow: 'feDropShadow',
536         feflood: 'feFlood',
537         fefunca: 'feFuncA',
538         fefuncb: 'feFuncB',
539         fefuncg: 'feFuncG',
540         fefuncr: 'feFuncR',
541         fegaussianblur: 'feGaussianBlur',
542         feimage: 'feImage',
543         femerge: 'feMerge',
544         femergenode: 'feMergeNode',
545         femorphology: 'feMorphology',
546         feoffset: 'feOffset',
547         fepointlight: 'fePointLight',
548         fespecularlighting: 'feSpecularLighting',
549         fespotlight: 'feSpotLight',
550         fetile: 'feTile',
551         feturbulence: 'feTurbulence',
552         foreignobject: 'foreignObject',
553         glyphref: 'glyphRef',
554         lineargradient: 'linearGradient',
555         radialgradient: 'radialGradient',
556         textpath: 'textPath'
557 }
558 svg_attribute_fixes = {
559         attributename: 'attributeName',
560         attributetype: 'attributeType',
561         basefrequency: 'baseFrequency',
562         baseprofile: 'baseProfile',
563         calcmode: 'calcMode',
564         clippathunits: 'clipPathUnits',
565         contentscripttype: 'contentScriptType',
566         contentstyletype: 'contentStyleType',
567         diffuseconstant: 'diffuseConstant',
568         edgemode: 'edgeMode',
569         externalresourcesrequired: 'externalResourcesRequired',
570         // WHATWG removes this: filterres: 'filterRes',
571         filterunits: 'filterUnits',
572         glyphref: 'glyphRef',
573         gradienttransform: 'gradientTransform',
574         gradientunits: 'gradientUnits',
575         kernelmatrix: 'kernelMatrix',
576         kernelunitlength: 'kernelUnitLength',
577         keypoints: 'keyPoints',
578         keysplines: 'keySplines',
579         keytimes: 'keyTimes',
580         lengthadjust: 'lengthAdjust',
581         limitingconeangle: 'limitingConeAngle',
582         markerheight: 'markerHeight',
583         markerunits: 'markerUnits',
584         markerwidth: 'markerWidth',
585         maskcontentunits: 'maskContentUnits',
586         maskunits: 'maskUnits',
587         numoctaves: 'numOctaves',
588         pathlength: 'pathLength',
589         patterncontentunits: 'patternContentUnits',
590         patterntransform: 'patternTransform',
591         patternunits: 'patternUnits',
592         pointsatx: 'pointsAtX',
593         pointsaty: 'pointsAtY',
594         pointsatz: 'pointsAtZ',
595         preservealpha: 'preserveAlpha',
596         preserveaspectratio: 'preserveAspectRatio',
597         primitiveunits: 'primitiveUnits',
598         refx: 'refX',
599         refy: 'refY',
600         repeatcount: 'repeatCount',
601         repeatdur: 'repeatDur',
602         requiredextensions: 'requiredExtensions',
603         requiredfeatures: 'requiredFeatures',
604         specularconstant: 'specularConstant',
605         specularexponent: 'specularExponent',
606         spreadmethod: 'spreadMethod',
607         startoffset: 'startOffset',
608         stddeviation: 'stdDeviation',
609         stitchtiles: 'stitchTiles',
610         surfacescale: 'surfaceScale',
611         systemlanguage: 'systemLanguage',
612         tablevalues: 'tableValues',
613         targetx: 'targetX',
614         targety: 'targetY',
615         textlength: 'textLength',
616         viewbox: 'viewBox',
617         viewtarget: 'viewTarget',
618         xchannelselector: 'xChannelSelector',
619         ychannelselector: 'yChannelSelector',
620         zoomandpan: 'zoomAndPan'
621 }
622 foreign_attr_fixes = {
623         'xlink:actuate': 'xlink actuate',
624         'xlink:arcrole': 'xlink arcrole',
625         'xlink:href': 'xlink href',
626         'xlink:role': 'xlink role',
627         'xlink:show': 'xlink show',
628         'xlink:title': 'xlink title',
629         'xlink:type': 'xlink type',
630         'xml:base': 'xml base',
631         'xml:lang': 'xml lang',
632         'xml:space': 'xml space',
633         'xmlns': 'xmlns',
634         'xmlns:xlink': 'xmlns xlink'
635 }
636 adjust_mathml_attributes = function (t) {
637         var i, a
638         for (i = 0; i < t.attrs_a.length; ++i) {
639                 a = t.attrs_a[i]
640                 if (a[0] === 'definitionurl') {
641                         a[0] = 'definitionURL'
642                 }
643         }
644 }
645 adjust_svg_attributes = function (t) {
646         var i, a
647         for (i = 0; i < t.attrs_a.length; ++i) {
648                 a = t.attrs_a[i]
649                 if (svg_attribute_fixes[a[0]] != null) {
650                         a[0] = svg_attribute_fixes[a[0]]
651                 }
652         }
653 }
654 adjust_foreign_attributes = function (t) {
655         // fixfull
656         var i, a
657         for (i = 0; i < t.attrs_a.length; ++i) {
658                 a = t.attrs_a[i]
659                 if (foreign_attr_fixes[a[0]] != null) {
660                         a[0] = foreign_attr_fixes[a[0]]
661                 }
662         }
663 }
664
665 // decode_named_char_ref()
666 //
667 // The list of named character references is _huge_ so if we're running in a
668 // browser, we get the browser to decode them, rather than increasing the code
669 // size to include the table.
670 if (context === 'module') {
671         _decode_named_char_ref = require('./parser_no_browser_helper.js')
672 } else {
673         decode_named_char_ref_el = document.createElement('textarea')
674         _decode_named_char_ref = function (txt) {
675                 var decoded
676                 txt = "&" + txt + ";"
677                 decode_named_char_ref_el.innerHTML = txt
678                 decoded = decode_named_char_ref_el.value
679                 if (decoded === txt) {
680                         return null
681                 }
682                 return decoded
683         }
684 }
685 // Pass the name of a named entity _that has a terminating semicolon_
686 // Entities without terminating semicolons should use legacy_char_refs[]
687 // Do not include the "&" or ";" in your argument, eg pass "alpha"
688 decode_named_char_ref_cache = {}
689 decode_named_char_ref = function (txt) {
690         var decoded
691         decoded = decode_named_char_ref_cache[txt]
692         if (decoded != null) {
693                 return decoded
694         }
695         decoded = _decode_named_char_ref(txt)
696         return decode_named_char_ref_cache[txt] = decoded
697 }
698
699 parse_html = function (args_html, args) {
700         var adjusted_current_node, adjusted_insertion_location, adoption_agency, afe, afe_push, afe_push_marker, button_scopers, clear_afe_to_marker, clear_stack_to_table_body_context, clear_stack_to_table_context, clear_stack_to_table_row_context, clear_to_table_body_stopers, clear_to_table_row_stopers, clear_to_table_stopers, close_p_element, close_p_if_in_button_scope, close_the_cell, context_element, cur, doc, eat_next_token_if_newline, el_is_in_scope, flag_foster_parenting, flag_fragment_parsing, flag_frameset_ok, flag_parsing, flag_scripting, form_element_pointer, fragment_root, generate_implied_end_tags, has_color_face_or_size, head_element_pointer, in_body_any_other_end_tag, in_foreign_content, in_foreign_content_end_script, in_foreign_content_other_start, ins_mode, ins_mode_after_after_body, ins_mode_after_after_frameset, ins_mode_after_body, ins_mode_after_frameset, ins_mode_after_head, ins_mode_after_head_else, ins_mode_before_head, ins_mode_before_html, ins_mode_in_body, ins_mode_in_caption, ins_mode_in_cell, ins_mode_in_column_group, ins_mode_in_frameset, ins_mode_in_head, ins_mode_in_head_else, ins_mode_in_head_noscript, ins_mode_in_head_noscript_else, ins_mode_in_row, ins_mode_in_select, ins_mode_in_select_in_table, ins_mode_in_table, ins_mode_in_table_body, ins_mode_in_table_else, ins_mode_in_table_text, ins_mode_in_template, ins_mode_initial, ins_mode_text, insert_character, insert_comment, insert_foreign_element, insert_html_element, is_appropriate_end_tag, is_in_button_scope, is_in_li_scope, is_in_scope, is_in_scope_x, is_in_scope_x_y, is_in_select_scope, is_in_table_scope, is_quirks_limited_doctype, is_quirks_yes_doctype, li_scopers, open_els, original_ins_mode, parse_character_reference, parse_error, parse_generic_raw_text, parse_generic_rcdata_text, parse_init, parse_main_loop, pending_table_character_tokens, process_token, reconstruct_afe, ref, reset_ins_mode, standard_scopers, stop_parsing, table_scopers, template_ins_modes, template_tag_is_open, temporary_buffer, tok_cur_tag, tok_state, tok_state_after_attribute_name, tok_state_after_attribute_value_quoted, tok_state_after_doctype_name, tok_state_after_doctype_public_identifier, tok_state_after_doctype_public_keyword, tok_state_after_doctype_system_identifier, tok_state_after_doctype_system_keyword, tok_state_attribute_name, tok_state_attribute_value_double_quoted, tok_state_attribute_value_single_quoted, tok_state_attribute_value_unquoted, tok_state_before_attribute_name, tok_state_before_attribute_value, tok_state_before_doctype_name, tok_state_before_doctype_public_identifier, tok_state_before_doctype_system_identifier, tok_state_between_doctype_public_and_system_identifiers, tok_state_bogus_comment, tok_state_bogus_doctype, tok_state_cdata_section, tok_state_comment, tok_state_comment_end, tok_state_comment_end_bang, tok_state_comment_end_dash, tok_state_comment_start, tok_state_comment_start_dash, tok_state_data, tok_state_doctype, tok_state_doctype_name, tok_state_doctype_public_identifier_double_quoted, tok_state_doctype_public_identifier_single_quoted, tok_state_doctype_system_identifier_double_quoted, tok_state_doctype_system_identifier_single_quoted, tok_state_end_tag_open, tok_state_markup_declaration_open, tok_state_plaintext, tok_state_rawtext, tok_state_rawtext_end_tag_name, tok_state_rawtext_end_tag_open, tok_state_rawtext_less_than_sign, tok_state_rcdata, tok_state_rcdata_end_tag_name, tok_state_rcdata_end_tag_open, tok_state_rcdata_less_than_sign, tok_state_script_data, tok_state_script_data_double_escape_end, tok_state_script_data_double_escape_start, tok_state_script_data_double_escaped, tok_state_script_data_double_escaped_dash, tok_state_script_data_double_escaped_dash_dash, tok_state_script_data_double_escaped_less_than_sign, tok_state_script_data_end_tag_name, tok_state_script_data_end_tag_open, tok_state_script_data_escape_start, tok_state_script_data_escape_start_dash, tok_state_script_data_escaped, tok_state_script_data_escaped_dash, tok_state_script_data_escaped_dash_dash, tok_state_script_data_escaped_end_tag_name, tok_state_script_data_escaped_end_tag_open, tok_state_script_data_escaped_less_than_sign, tok_state_script_data_less_than_sign, tok_state_self_closing_start_tag, tok_state_tag_name, tok_state_tag_open, token_to_element, txt
701         if (args == null) {
702                 args = {}
703         }
704         txt = null
705         cur = null // index of next char in txt to be parsed
706         // declare doc and tokenizer variables so they're in scope below
707         doc = null
708         open_els = null // stack of open elements
709         afe = null // active formatting elements
710         template_ins_modes = null
711         ins_mode = null
712         original_ins_mode = null
713         tok_state = null
714         tok_cur_tag = null // partially parsed tag
715         flag_scripting = null
716         flag_frameset_ok = null
717         flag_parsing = null
718         flag_foster_parenting = null
719         form_element_pointer = null
720         temporary_buffer = null
721         pending_table_character_tokens = null
722         head_element_pointer = null
723         flag_fragment_parsing = null
724         context_element = null
725
726         stop_parsing = function () {
727                 flag_parsing = false
728         }
729
730         parse_error = function () {
731                 if (args.error_cb != null) {
732                         args.error_cb(cur)
733                 }
734         }
735
736         // http://www.w3.org/TR/html5/syntax.html#push-onto-the-list-of-active-formatting-elements
737         // "Noah's Ark clause" but with three
738         afe_push = function (new_el) {
739                 var attrs_match, el, i, j, k, len, matches, ref, ref1, v
740                 matches = 0
741                 for (i = 0; i < afe.length; ++i) {
742                         el = afe[i]
743                         if (el.type === TYPE_AFE_MARKER) {
744                                 break
745                         }
746                         if (el.name === new_el.name && el.namespace === new_el.namespace) {
747                                 attrs_match = true
748                                 for (k in el.attrs) {
749                                         v = el.attrs[k]
750                                         if (new_el.attrs[k] !== v) {
751                                                 attrs_match = false
752                                                 break
753                                         }
754                                 }
755                                 if (attrs_match) {
756                                         for (k in new_el.attrs) {
757                                                 v = new_el.attrs[k]
758                                                 if (el.attrs[k] !== v) {
759                                                         attrs_match = false
760                                                         break
761                                                 }
762                                         }
763                                 }
764                                 if (attrs_match) {
765                                         matches += 1
766                                         if (matches === 3) {
767                                                 afe.splice(i, 1)
768                                                 break
769                                         }
770                                 }
771                         }
772                 }
773                 afe.unshift(new_el)
774         }
775
776         afe_push_marker = function () {
777                 afe.unshift(new_afe_marker())
778         }
779
780         // the functions below impliment the Tree Contstruction algorithm
781         // http://www.w3.org/TR/html5/syntax.html#tree-construction
782
783         // But first... the helpers
784         template_tag_is_open = function () {
785                 var i, el
786                 for (i = 0; i < open_els.length; ++i) {
787                         el = open_els[i]
788                         if (el.name === 'template' && el.namespace === NS_HTML) {
789                                 return true
790                         }
791                 }
792                 return false
793         }
794         is_in_scope_x = function (tag_name, scope, namespace) {
795                 var i, el
796                 for (i = 0; i < open_els.length; ++i) {
797                         el = open_els[i]
798                         if (el.name === tag_name && (namespace === null || namespace === el.namespace)) {
799                                 return true
800                         }
801                         if (scope[el.name] === el.namespace) {
802                                 return false
803                         }
804                 }
805                 return false
806         }
807         is_in_scope_x_y = function (tag_name, scope, scope2, namespace) {
808                 var i, el
809                 for (i = 0; i < open_els.length; ++i) {
810                         el = open_els[i]
811                         if (el.name === tag_name && (namespace === null || namespace === el.namespace)) {
812                                 return true
813                         }
814                         if (scope[el.name] === el.namespace) {
815                                 return false
816                         }
817                         if (scope2[el.name] === el.namespace) {
818                                 return false
819                         }
820                 }
821                 return false
822         }
823         standard_scopers = {
824                 applet: NS_HTML, caption: NS_HTML, html: NS_HTML, table: NS_HTML,
825                 td: NS_HTML, th: NS_HTML, marquee: NS_HTML, object: NS_HTML,
826                 template: NS_HTML,
827
828                 mi: NS_MATHML, mo: NS_MATHML, mn: NS_MATHML, ms: NS_MATHML,
829                 mtext: NS_MATHML, 'annotation-xml': NS_MATHML,
830
831                 foreignObject: NS_SVG, desc: NS_SVG, title: NS_SVG
832         }
833         button_scopers = { button: NS_HTML }
834         li_scopers = { ol: NS_HTML, ul: NS_HTML }
835         table_scopers = { html: NS_HTML, table: NS_HTML, template: NS_HTML }
836         is_in_scope = function (tag_name, namespace) {
837                 if (namespace == null) {
838                         namespace = null
839                 }
840                 return is_in_scope_x(tag_name, standard_scopers, namespace)
841         }
842         is_in_button_scope = function (tag_name, namespace) {
843                 if (namespace == null) {
844                         namespace = null
845                 }
846                 return is_in_scope_x_y(tag_name, standard_scopers, button_scopers, namespace)
847         }
848         is_in_table_scope = function (tag_name, namespace) {
849                 if (namespace == null) {
850                         namespace = null
851                 }
852                 return is_in_scope_x(tag_name, table_scopers, namespace)
853         }
854         // aka is_in_list_item_scope
855         is_in_li_scope = function (tag_name, namespace) {
856                 if (namespace == null) {
857                         namespace = null
858                 }
859                 return is_in_scope_x_y(tag_name, standard_scopers, li_scopers, namespace)
860         }
861         is_in_select_scope = function (tag_name, namespace) {
862                 var i, t
863                 if (namespace == null) {
864                         namespace = null
865                 }
866                 for (i = 0; i < open_els.length; ++i) {
867                         t = open_els[i]
868                         if (t.name === tag_name && (namespace === null || namespace === t.namespace)) {
869                                 return true
870                         }
871                         if (t.namespace !== NS_HTML && t.name !== 'optgroup' && t.name !== 'option') {
872                                 return false
873                         }
874                 }
875                 return false
876         }
877         // this checks for a particular element, not by name
878         // this requires a namespace match
879         el_is_in_scope = function (needle) {
880                 var i
881                 for (i = 0; i < open_els.length; ++i) {
882                         el = open_els[i]
883                         if (el === needle) {
884                                 return true
885                         }
886                         if (standard_scopers[el.name] === el.namespace) {
887                                 return false
888                         }
889                 }
890                 return false
891         }
892
893         clear_to_table_stopers = {
894                 'table': true,
895                 'template': true,
896                 'html': true
897         }
898         clear_stack_to_table_context = function () {
899                 while (true) {
900                         if (clear_to_table_stopers[open_els[0].name] != null) {
901                                 break
902                         }
903                         open_els.shift()
904                 }
905         }
906         clear_to_table_body_stopers = {
907                 tbody: NS_HTML,
908                 tfoot: NS_HTML,
909                 thead: NS_HTML,
910                 template: NS_HTML,
911                 html: NS_HTML
912         }
913         clear_stack_to_table_body_context = function () {
914                 while (true) {
915                         if (clear_to_table_body_stopers[open_els[0].name] === open_els[0].namespace) {
916                                 break
917                         }
918                         open_els.shift()
919                 }
920         }
921         clear_to_table_row_stopers = {
922                 'tr': true,
923                 'template': true,
924                 'html': true
925         }
926         clear_stack_to_table_row_context = function () {
927                 while (true) {
928                         if (clear_to_table_row_stopers[open_els[0].name] != null) {
929                                 break
930                         }
931                         open_els.shift()
932                 }
933         }
934         clear_afe_to_marker = function () {
935                 var el
936                 while (true) {
937                         if (!(afe.length > 0)) { // this happens in fragment case, ?spec error
938                                 return
939                         }
940                         el = afe.shift()
941                         if (el.type === TYPE_AFE_MARKER) {
942                                 return
943                         }
944                 }
945         }
946
947         // 8.2.3.1 ...
948         // http://www.w3.org/TR/html5/syntax.html#reset-the-insertion-mode-appropriately
949         reset_ins_mode = function () {
950                 var ancestor, ancestor_i, last, node, node_i
951                 // 1. Let last be false.
952                 last = false
953                 // 2. Let node be the last node in the stack of open elements.
954                 node_i = 0
955                 node = open_els[node_i]
956                 // 3. Loop: If node is the first node in the stack of open elements,
957                 // then set last to true, and, if the parser was originally created as
958                 // part of the HTML fragment parsing algorithm (fragment case) set node
959                 // to the context element.
960                 while (true) {
961                         if (node_i === open_els.length - 1) {
962                                 last = true
963                                 if (flag_fragment_parsing) {
964                                         node = context_element
965                                 }
966                         }
967                         // 4. If node is a select element, run these substeps:
968                         if (node.name === 'select' && node.namespace === NS_HTML) {
969                                 // 1. If last is true, jump to the step below labeled done.
970                                 if (!last) {
971                                         // 2. Let ancestor be node.
972                                         ancestor_i = node_i
973                                         ancestor = node
974                                         // 3. Loop: If ancestor is the first node in the stack of
975                                         // open elements, jump to the step below labeled done.
976                                         while (true) {
977                                                 if (ancestor_i === open_els.length - 1) {
978                                                         break
979                                                 }
980                                                 // 4. Let ancestor be the node before ancestor in the stack
981                                                 // of open elements.
982                                                 ancestor_i += 1
983                                                 ancestor = open_els[ancestor_i]
984                                                 // 5. If ancestor is a template node, jump to the step below
985                                                 // labeled done.
986                                                 if (ancestor.name === 'template' && ancestor.namespace === NS_HTML) {
987                                                         break
988                                                 }
989                                                 // 6. If ancestor is a table node, switch the insertion mode
990                                                 // to "in select in table" and abort these steps.
991                                                 if (ancestor.name === 'table' && ancestor.namespace === NS_HTML) {
992                                                         ins_mode = ins_mode_in_select_in_table
993                                                         return
994                                                 }
995                                                 // 7. Jump back to the step labeled loop.
996                                         }
997                                 }
998                                 // 8. Done: Switch the insertion mode to "in select" and abort
999                                 // these steps.
1000                                 ins_mode = ins_mode_in_select
1001                                 return
1002                         }
1003                         // 5. If node is a td or th element and last is false, then switch
1004                         // the insertion mode to "in cell" and abort these steps.
1005                         if ((node.name === 'td' || node.name === 'th') && node.namespace === NS_HTML && last === false) {
1006                                 ins_mode = ins_mode_in_cell
1007                                 return
1008                         }
1009                         // 6. If node is a tr element, then switch the insertion mode to "in
1010                         // row" and abort these steps.
1011                         if (node.name === 'tr' && node.namespace === NS_HTML) {
1012                                 ins_mode = ins_mode_in_row
1013                                 return
1014                         }
1015                         // 7. If node is a tbody, thead, or tfoot element, then switch the
1016                         // insertion mode to "in table body" and abort these steps.
1017                         if ((node.name === 'tbody' || node.name === 'thead' || node.name === 'tfoot') && node.namespace === NS_HTML) {
1018                                 ins_mode = ins_mode_in_table_body
1019                                 return
1020                         }
1021                         // 8. If node is a caption element, then switch the insertion mode
1022                         // to "in caption" and abort these steps.
1023                         if (node.name === 'caption' && node.namespace === NS_HTML) {
1024                                 ins_mode = ins_mode_in_caption
1025                                 return
1026                         }
1027                         // 9. If node is a colgroup element, then switch the insertion mode
1028                         // to "in column group" and abort these steps.
1029                         if (node.name === 'colgroup' && node.namespace === NS_HTML) {
1030                                 ins_mode = ins_mode_in_column_group
1031                                 return
1032                         }
1033                         // 10. If node is a table element, then switch the insertion mode to
1034                         // "in table" and abort these steps.
1035                         if (node.name === 'table' && node.namespace === NS_HTML) {
1036                                 ins_mode = ins_mode_in_table
1037                                 return
1038                         }
1039                         // 11. If node is a template element, then switch the insertion mode
1040                         // to the current template insertion mode and abort these steps.
1041                         if (node.name === 'template' && node.namespace === NS_HTML) {
1042                                 ins_mode = template_ins_modes[0]
1043                                 return
1044                         }
1045                         // 12. If node is a head element and last is true, then switch the
1046                         // insertion mode to "in body" ("in body"! not "in head"!) and abort
1047                         // these steps. (fragment case)
1048                         if (node.name === 'head' && node.namespace === NS_HTML && last) {
1049                                 ins_mode = ins_mode_in_body
1050                                 return
1051                         }
1052                         // 13. If node is a head element and last is false, then switch the
1053                         // insertion mode to "in head" and abort these steps.
1054                         if (node.name === 'head' && node.namespace === NS_HTML && last === false) {
1055                                 ins_mode = ins_mode_in_head
1056                                 return
1057                         }
1058                         // 14. If node is a body element, then switch the insertion mode to
1059                         // "in body" and abort these steps.
1060                         if (node.name === 'body' && node.namespace === NS_HTML) {
1061                                 ins_mode = ins_mode_in_body
1062                                 return
1063                         }
1064                         // 15. If node is a frameset element, then switch the insertion mode
1065                         // to "in frameset" and abort these steps. (fragment case)
1066                         if (node.name === 'frameset' && node.namespace === NS_HTML) {
1067                                 ins_mode = ins_mode_in_frameset
1068                                 return
1069                         }
1070                         // 16. If node is an html element, run these substeps:
1071                         if (node.name === 'html' && node.namespace === NS_HTML) {
1072                                 // 1. If the head element pointer is null, switch the insertion
1073                                 // mode to "before head" and abort these steps. (fragment case)
1074                                 if (head_element_pointer === null) {
1075                                         ins_mode = ins_mode_before_head
1076                                 } else {
1077                                         // 2. Otherwise, the head element pointer is not null,
1078                                         // switch the insertion mode to "after head" and abort these
1079                                         // steps.
1080                                         ins_mode = ins_mode_after_head
1081                                 }
1082                                 return
1083                         }
1084                         // 17. If last is true, then switch the insertion mode to "in body"
1085                         // and abort these steps. (fragment case)
1086                         if (last) {
1087                                 ins_mode = ins_mode_in_body
1088                                 return
1089                         }
1090                         // 18. Let node now be the node before node in the stack of open
1091                         // elements.
1092                         node_i += 1
1093                         node = open_els[node_i]
1094                         // 19. Return to the step labeled loop.
1095                 }
1096         }
1097
1098         // 8.2.3.2
1099
1100         // http://www.w3.org/TR/html5/syntax.html#adjusted-current-node
1101         adjusted_current_node = function () {
1102                 if (open_els.length === 1 && flag_fragment_parsing) {
1103                         return context_element
1104                 }
1105                 return open_els[0]
1106         }
1107
1108         // http://www.w3.org/TR/html5/syntax.html#reconstruct-the-active-formatting-elements
1109         // this implementation is structured (mostly) as described at the link above.
1110         // capitalized comments are the "labels" described at the link above.
1111         reconstruct_afe = function () {
1112                 var el, i
1113                 if (afe.length === 0) {
1114                         return
1115                 }
1116                 if (afe[0].type === TYPE_AFE_MARKER || open_els.indexOf(afe[0]) >= 0) {
1117                         return
1118                 }
1119                 // Rewind
1120                 i = 0
1121                 while (true) {
1122                         if (i === afe.length - 1) {
1123                                 break
1124                         }
1125                         i += 1
1126                         if (afe[i].type === TYPE_AFE_MARKER || open_els.indexOf(afe[i]) >= 0) {
1127                                 i -= 1 // Advance
1128                                 break
1129                         }
1130                 }
1131                 // Create
1132                 while (true) {
1133                         el = insert_html_element(afe[i].token)
1134                         afe[i] = el
1135                         if (i === 0) {
1136                                 break
1137                         }
1138                         i -= 1 // Advance
1139                 }
1140         }
1141
1142         // http://www.w3.org/TR/html5/syntax.html#adoption-agency-algorithm
1143         // adoption agency algorithm
1144         // overview here:
1145         //   http://www.w3.org/TR/html5/syntax.html#misnested-tags:-b-i-/b-/i
1146         //   http://www.w3.org/TR/html5/syntax.html#misnested-tags:-b-p-/b-/p
1147         //   http://www.w3.org/TR/html5/syntax.html#unclosed-formatting-elements
1148         adoption_agency = function (subject) {
1149                 var aa, ab, ac, ad, ae, af, bookmark, c, ca, dest, el, fb, fb_of_open_els, fe, fe_of_afe, fe_of_open_els, i, in_afe, in_open_els, inner, j, l, last_node, len, len1, len10, len11, len12, len13, len14, len15, len16, len17, len2, len3, len4, len5, len6, len7, len8, len9, m, n, new_node, node, node_above, node_in_afe, node_next, o, outer, q, r, ref, ref1, s, t, u, w, y, z
1150 // this block implements tha W3C spec
1151 //              # 1. If the current node is an HTML element whose tag name is subject,
1152 //              # then run these substeps:
1153 //              #
1154 //              # 1. Let element be the current node.
1155 //              #
1156 //              # 2. Pop element off the stack of open elements.
1157 //              #
1158 //              # 3. If element is also in the list of active formatting elements,
1159 //              # remove the element from the list.
1160 //              #
1161 //              # 4. Abort the adoption agency algorithm.
1162 //              if open_els[0].name is subject and open_els[0].namespace is NS_HTML
1163 //                      el = open_els.shift()
1164 //                      # remove it from the list of active formatting elements (if found)
1165 //                      for t, i in afe
1166 //                              if t is el
1167 //                                      afe.splice i, 1
1168 //                                      break
1169 //                      return
1170 // WHATWG: https://html.spec.whatwg.org/multipage/syntax.html#adoption-agency-algorithm
1171                 // If the current node is an HTML element whose tag name is subject, and
1172                 // the current node is not in the list of active formatting elements,
1173                 // then pop the current node off the stack of open elements, and abort
1174                 // these steps.
1175                 if (open_els[0].name === subject && open_els[0].namespace === NS_HTML) {
1176                         // remove it from the list of active formatting elements (if found)
1177                         in_afe = false
1178                         for (i = 0; i < afe.length; ++i) {
1179                                 el = afe[i]
1180                                 if (el === open_els[0]) {
1181                                         in_afe = true
1182                                         break
1183                                 }
1184                         }
1185                         if (!in_afe) {
1186                                 open_els.shift()
1187                                 return
1188                         }
1189                         // fall through
1190                 }
1191 // END WHATWG
1192                 outer = 0
1193                 while (true) {
1194                         if (outer >= 8) {
1195                                 return
1196                         }
1197                         outer += 1
1198                         // 5. Let formatting element be the last element in the list of
1199                         // active formatting elements that: is between the end of the list
1200                         // and the last scope marker in the list, if any, or the start of
1201                         // the list otherwise, and  has the tag name subject.
1202                         fe = null
1203                         for (fe_of_afe = 0; fe_of_afe < afe.length; ++fe_of_afe) {
1204                                 t = afe[fe_of_afe]
1205                                 if (t.type === TYPE_AFE_MARKER) {
1206                                         break
1207                                 }
1208                                 if (t.name === subject) {
1209                                         fe = t
1210                                         break
1211                                 }
1212                         }
1213                         // If there is no such element, then abort these steps and instead
1214                         // act as described in the "any other end tag" entry above.
1215                         if (fe === null) {
1216                                 in_body_any_other_end_tag(subject)
1217                                 return
1218                         }
1219                         // 6. If formatting element is not in the stack of open elements,
1220                         // then this is a parse error; remove the element from the list, and
1221                         // abort these steps.
1222                         in_open_els = false
1223                         for (fe_of_open_els = 0; fe_of_open_els < open_els.length; ++fe_of_open_els) {
1224                                 t = open_els[fe_of_open_els]
1225                                 if (t === fe) {
1226                                         in_open_els = true
1227                                         break
1228                                 }
1229                         }
1230                         if (!in_open_els) {
1231                                 parse_error()
1232                                 // "remove it from the list" must mean afe, since it's not in open_els
1233                                 afe.splice(fe_of_afe, 1)
1234                                 return
1235                         }
1236                         // 7. If formatting element is in the stack of open elements, but
1237                         // the element is not in scope, then this is a parse error; abort
1238                         // these steps.
1239                         if (!el_is_in_scope(fe)) {
1240                                 parse_error()
1241                                 return
1242                         }
1243                         // 8. If formatting element is not the current node, this is a parse
1244                         // error. (But do not abort these steps.)
1245                         if (open_els[0] !== fe) {
1246                                 parse_error()
1247                                 // continue
1248                         }
1249                         // 9. Let furthest block be the topmost node in the stack of open
1250                         // elements that is lower in the stack than formatting element, and
1251                         // is an element in the special category. There might not be one.
1252                         fb = null
1253                         fb_of_open_els = null
1254                         for (i = 0; i < open_els.length; ++i) {
1255                                 t = open_els[i]
1256                                 if (t === fe) {
1257                                         break
1258                                 }
1259                                 if (el_is_special(t)) {
1260                                         fb = t
1261                                         fb_of_open_els = i
1262                                         // and continue, to see if there's one that's more "topmost"
1263                                 }
1264                         }
1265                         // 10. If there is no furthest block, then the UA must first pop all
1266                         // the nodes from the bottom of the stack of open elements, from the
1267                         // current node up to and including formatting element, then remove
1268                         // formatting element from the list of active formatting elements,
1269                         // and finally abort these steps.
1270                         if (fb === null) {
1271                                 while (true) {
1272                                         t = open_els.shift()
1273                                         if (t === fe) {
1274                                                 afe.splice(fe_of_afe, 1)
1275                                                 return
1276                                         }
1277                                 }
1278                         }
1279                         // 11. Let common ancestor be the element immediately above
1280                         // formatting element in the stack of open elements.
1281                         ca = open_els[fe_of_open_els + 1] // common ancestor
1282
1283                         node_above = open_els[fb_of_open_els + 1] // next node if node isn't in open_els anymore
1284                         // 12. Let a bookmark note the position of formatting element in the list of active formatting elements relative to the elements on either side of it in the list.
1285                         bookmark = new_aaa_bookmark()
1286                         for (i = 0; i < afe.length; ++i) {
1287                                 t = afe[i]
1288                                 if (t === fe) {
1289                                         afe.splice(i, 0, bookmark)
1290                                         break
1291                                 }
1292                         }
1293                         node = last_node = fb
1294                         inner = 0
1295                         while (true) {
1296                                 inner += 1
1297                                 // 3. Let node be the element immediately above node in the
1298                                 // stack of open elements, or if node is no longer in the stack
1299                                 // of open elements (e.g. because it got removed by this
1300                                 // algorithm), the element that was immediately above node in
1301                                 // the stack of open elements before node was removed.
1302                                 node_next = null
1303                                 for (i = 0; i < open_els.length; ++i) {
1304                                         t = open_els[i]
1305                                         if (t === node) {
1306                                                 node_next = open_els[i + 1]
1307                                                 break
1308                                         }
1309                                 }
1310                                 node = node_next != null ? node_next : node_above
1311                                 // TODO make sure node_above gets re-set if/when node is removed from open_els
1312
1313                                 // 4. If node is formatting element, then go to the next step in
1314                                 // the overall algorithm.
1315                                 if (node === fe) {
1316                                         break
1317                                 }
1318                                 // 5. If inner loop counter is greater than three and node is in
1319                                 // the list of active formatting elements, then remove node from
1320                                 // the list of active formatting elements.
1321                                 node_in_afe = false
1322                                 for (i = 0; i < afe.length; ++i) {
1323                                         t = afe[i]
1324                                         if (t === node) {
1325                                                 if (inner > 3) {
1326                                                         afe.splice(i, 1)
1327                                                 } else {
1328                                                         node_in_afe = true
1329                                                 }
1330                                                 break
1331                                         }
1332                                 }
1333                                 // 6. If node is not in the list of active formatting elements,
1334                                 // then remove node from the stack of open elements and then go
1335                                 // back to the step labeled inner loop.
1336                                 if (!node_in_afe) {
1337                                         for (i = 0; i < open_els.length; ++i) {
1338                                                 t = open_els[i]
1339                                                 if (t === node) {
1340                                                         node_above = open_els[i + 1]
1341                                                         open_els.splice(i, 1)
1342                                                         break
1343                                                 }
1344                                         }
1345                                         continue
1346                                 }
1347                                 // 7. create an element for the token for which the element node
1348                                 // was created, in the HTML namespace, with common ancestor as
1349                                 // the intended parent; replace the entry for node in the list
1350                                 // of active formatting elements with an entry for the new
1351                                 // element, replace the entry for node in the stack of open
1352                                 // elements with an entry for the new element, and let node be
1353                                 // the new element.
1354                                 new_node = token_to_element(node.token, NS_HTML, ca)
1355                                 for (i = 0; i < afe.length; ++i) {
1356                                         t = afe[i]
1357                                         if (t === node) {
1358                                                 afe[i] = new_node
1359                                                 break
1360                                         }
1361                                 }
1362                                 for (i = 0; i < open_els.length; ++i) {
1363                                         t = open_els[i]
1364                                         if (t === node) {
1365                                                 node_above = open_els[i + 1]
1366                                                 open_els[i] = new_node
1367                                                 break
1368                                         }
1369                                 }
1370                                 node = new_node
1371                                 // 8. If last node is furthest block, then move the
1372                                 // aforementioned bookmark to be immediately after the new node
1373                                 // in the list of active formatting elements.
1374                                 if (last_node === fb) {
1375                                         for (i = 0; i < afe.length; ++i) {
1376                                                 t = afe[i]
1377                                                 if (t === bookmark) {
1378                                                         afe.splice(i, 1)
1379                                                         break
1380                                                 }
1381                                         }
1382                                         for (i = 0; i < afe.length; ++i) {
1383                                                 t = afe[i]
1384                                                 if (t === node) {
1385                                                         // "after" means lower
1386                                                         afe.splice(i, 0, bookmark) // "after as <-
1387                                                         break
1388                                                 }
1389                                         }
1390                                 }
1391                                 // 9. Insert last node into node, first removing it from its
1392                                 // previous parent node if any.
1393                                 if (last_node.parent != null) {
1394                                         for (i = 0; i < last_node.parent.children.length; ++i) {
1395                                                 c = last_node.parent.children[i]
1396                                                 if (c === last_node) {
1397                                                         last_node.parent.children.splice(i, 1)
1398                                                         break
1399                                                 }
1400                                         }
1401                                 }
1402                                 node.children.push(last_node)
1403                                 last_node.parent = node
1404                                 // 10. Let last node be node.
1405                                 last_node = node
1406                                 // 11. Return to the step labeled inner loop.
1407                         }
1408                         // 14. Insert whatever last node ended up being in the previous step
1409                         // at the appropriate place for inserting a node, but using common
1410                         // ancestor as the override target.
1411
1412                         // In the case where fe is immediately followed by fb:
1413                         //   * inner loop exits out early (node==fe)
1414                         //   * last_node is fb
1415                         //   * last_node is still in the tree (not a duplicate)
1416                         if (last_node.parent != null) {
1417                                 for (i = 0; i < last_node.parent.children.length; ++i) {
1418                                         c = last_node.parent.children[i]
1419                                         if (c === last_node) {
1420                                                 last_node.parent.children.splice(i, 1)
1421                                                 break
1422                                         }
1423                                 }
1424                         }
1425                         // can't use standard insert token thing, because it's already in
1426                         // open_els and must stay at it's current position in open_els
1427                         dest = adjusted_insertion_location(ca)
1428                         dest[0].children.splice(dest[1], 0, last_node)
1429                         last_node.parent = dest[0]
1430                         // 15. Create an element for the token for which formatting element
1431                         // was created, in the HTML namespace, with furthest block as the
1432                         // intended parent.
1433                         new_element = token_to_element(fe.token, NS_HTML, fb)
1434                         // 16. Take all of the child nodes of furthest block and append them
1435                         // to the element created in the last step.
1436                         while (fb.children.length) {
1437                                 t = fb.children.shift()
1438                                 t.parent = new_element
1439                                 new_element.children.push(t)
1440                         }
1441                         // 17. Append that new element to furthest block.
1442                         new_element.parent = fb
1443                         fb.children.push(new_element)
1444                         // 18. Remove formatting element from the list of active formatting
1445                         // elements, and insert the new element into the list of active
1446                         // formatting elements at the position of the aforementioned
1447                         // bookmark.
1448                         for (i = 0; i < afe.length; ++i) {
1449                                 t = afe[i]
1450                                 if (t === fe) {
1451                                         afe.splice(i, 1)
1452                                         break
1453                                 }
1454                         }
1455                         for (i = 0; i < afe.length; ++i) {
1456                                 t = afe[i]
1457                                 if (t === bookmark) {
1458                                         afe[i] = new_element
1459                                         break
1460                                 }
1461                         }
1462                         // 19. Remove formatting element from the stack of open elements,
1463                         // and insert the new element into the stack of open elements
1464                         // immediately below the position of furthest block in that stack.
1465                         for (i = 0; i < open_els.length; ++i) {
1466                                 t = open_els[i]
1467                                 if (t === fe) {
1468                                         open_els.splice(i, 1)
1469                                         break
1470                                 }
1471                         }
1472                         for (i = 0; i < open_els.length; ++i) {
1473                                 t = open_els[i]
1474                                 if (t === fb) {
1475                                         open_els.splice(i, 0, new_element)
1476                                         break
1477                                 }
1478                         }
1479                         // 20. Jump back to the step labeled outer loop.
1480                 }
1481         }
1482
1483         // http://www.w3.org/TR/html5/syntax.html#close-a-p-element
1484         close_p_element = function () {
1485                 generate_implied_end_tags('p') // arg is exception
1486                 if (!(open_els[0].name === 'p' && open_els[0].namespace === NS_HTML)) {
1487                         parse_error()
1488                 }
1489                 while (open_els.length > 1) { // just in case
1490                         el = open_els.shift()
1491                         if (el.name === 'p' && el.namespace === NS_HTML) {
1492                                 return
1493                         }
1494                 }
1495         }
1496         close_p_if_in_button_scope = function () {
1497                 if (is_in_button_scope('p', NS_HTML)) {
1498                         close_p_element()
1499                 }
1500         }
1501
1502         // http://www.w3.org/TR/html5/syntax.html#insert-a-character
1503         // aka insert_a_character = function (t) {
1504         insert_character = function (t) {
1505                 var dest, prev
1506                 dest = adjusted_insertion_location()
1507                 // fixfull check for Document node
1508                 if (dest[1] > 0) {
1509                         prev = dest[0].children[dest[1] - 1]
1510                         if (prev.type === TYPE_TEXT) {
1511                                 prev.text += t.text
1512                                 return
1513                         }
1514                 }
1515                 dest[0].children.splice(dest[1], 0, t)
1516                 t.parent = dest[0]
1517         }
1518
1519         // 8.2.5 http://www.w3.org/TR/html5/syntax.html#tree-construction
1520         process_token = function (t) {
1521                 var acn
1522                 acn = adjusted_current_node()
1523                 if (acn == null) {
1524                         ins_mode(t)
1525                         return
1526                 }
1527                 if (acn.namespace === NS_HTML) {
1528                         ins_mode(t)
1529                         return
1530                 }
1531                 if (is_mathml_text_integration_point(acn)) {
1532                         if (t.type === TYPE_START_TAG && !(t.name === 'mglyph' || t.name === 'malignmark')) {
1533                                 ins_mode(t)
1534                                 return
1535                         }
1536                         if (t.type === TYPE_TEXT) {
1537                                 ins_mode(t)
1538                                 return
1539                         }
1540                 }
1541                 if (acn.namespace === NS_MATHML && acn.name === 'annotation-xml' && t.type === TYPE_START_TAG && t.name === 'svg') {
1542                         ins_mode(t)
1543                         return
1544                 }
1545                 if (is_html_integration(acn)) {
1546                         if (t.type === TYPE_START_TAG || t.type === TYPE_TEXT) {
1547                                 ins_mode(t)
1548                                 return
1549                         }
1550                 }
1551                 if (t.type === TYPE_EOF) {
1552                         ins_mode(t)
1553                         return
1554                 }
1555                 in_foreign_content(t)
1556         }
1557
1558         // 8.2.5.1
1559         // http://www.w3.org/TR/html5/syntax.html#creating-and-inserting-nodes
1560         // http://www.w3.org/TR/html5/syntax.html#appropriate-place-for-inserting-a-node
1561         adjusted_insertion_location = function (override_target) {
1562                 var c, el, i, j, l, last_table, last_table_i, last_template, last_template_i, len, len1, len2, m, previous_element, ref, target, target_i
1563                 // 1. If there was an override target specified, then let target be the
1564                 // override target.
1565                 if (override_target != null) {
1566                         target = override_target
1567                 } else { // Otherwise, let target be the current node.
1568                         target = open_els[0]
1569                 }
1570                 // 2. Determine the adjusted insertion location using the first matching
1571                 // steps from the following list:
1572                 //
1573                 // If foster parenting is enabled and target is a table, tbody, tfoot,
1574                 // thead, or tr element Foster parenting happens when content is
1575                 // misnested in tables.
1576                 if (flag_foster_parenting && foster_parenting_targets[target.name] === target.namespace) {
1577                         while (true) { // once. this is here so we can ``break`` to "abort these substeps"
1578                                 // 1. Let last template be the last template element in the
1579                                 // stack of open elements, if any.
1580                                 last_template = null
1581                                 last_template_i = null
1582                                 for (i = 0; i < open_els.length; ++i) {
1583                                         el = open_els[i]
1584                                         if (el.name === 'template' && el.namespace === NS_HTML) {
1585                                                 last_template = el
1586                                                 last_template_i = i
1587                                                 break
1588                                         }
1589                                 }
1590                                 // 2. Let last table be the last table element in the stack of
1591                                 // open elements, if any.
1592                                 last_table = null
1593                                 last_table_i
1594                                 for (i = 0; i < open_els.length; ++i) {
1595                                         el = open_els[i]
1596                                         if (el.name === 'table' && el.namespace === NS_HTML) {
1597                                                 last_table = el
1598                                                 last_table_i = i
1599                                                 break
1600                                         }
1601                                 }
1602                                 // 3. If there is a last template and either there is no last
1603                                 // table, or there is one, but last template is lower (more
1604                                 // recently added) than last table in the stack of open
1605                                 // elements, then: let adjusted insertion location be inside
1606                                 // last template's template contents, after its last child (if
1607                                 // any), and abort these substeps.
1608                                 if (last_template && (last_table === null || last_template_i < last_table_i)) {
1609                                         target = last_template // fixfull should be it's contents
1610                                         target_i = target.children.length
1611                                         break
1612                                 }
1613                                 // 4. If there is no last table, then let adjusted insertion
1614                                 // location be inside the first element in the stack of open
1615                                 // elements (the html element), after its last child (if any),
1616                                 // and abort these substeps. (fragment case)
1617                                 if (last_table === null) {
1618                                         // this is odd
1619                                         target = open_els[open_els.length - 1]
1620                                         target_i = target.children.length
1621                                         break
1622                                 }
1623                                 // 5. If last table has a parent element, then let adjusted
1624                                 // insertion location be inside last table's parent element,
1625                                 // immediately before last table, and abort these substeps.
1626                                 if (last_table.parent != null) {
1627                                         for (i = 0; i < last_table.parent.children.length; ++i) {
1628                                                 c = last_table.parent.children[i]
1629                                                 if (c === last_table) {
1630                                                         target = last_table.parent
1631                                                         target_i = i
1632                                                         break
1633                                                 }
1634                                         }
1635                                         break
1636                                 }
1637                                 // 6. Let previous element be the element immediately above last
1638                                 // table in the stack of open elements.
1639                                 //
1640                                 // huh? how could it not have a parent?
1641                                 previous_element = open_els[last_table_i + 1]
1642                                 // 7. Let adjusted insertion location be inside previous
1643                                 // element, after its last child (if any).
1644                                 target = previous_element
1645                                 target_i = target.children.length
1646                                 // Note: These steps are involved in part because it's possible
1647                                 // for elements, the table element in this case in particular,
1648                                 // to have been moved by a script around in the DOM, or indeed
1649                                 // removed from the DOM entirely, after the element was inserted
1650                                 // by the parser.
1651                                 break // don't really loop
1652                         }
1653                 } else {
1654                         // Otherwise Let adjusted insertion location be inside target, after
1655                         // its last child (if any).
1656                         target_i = target.children.length
1657                 }
1658
1659                 // 3. If the adjusted insertion location is inside a template element,
1660                 // let it instead be inside the template element's template contents,
1661                 // after its last child (if any).
1662                 // fixfull (template)
1663
1664                 // 4. Return the adjusted insertion location.
1665                 return [target, target_i]
1666         }
1667
1668         // http://www.w3.org/TR/html5/syntax.html#create-an-element-for-the-token
1669         // aka create_an_element_for_token
1670         token_to_element = function (t, namespace, intended_parent) {
1671                 var a, attrs, el, i
1672                 // convert attributes into a hash
1673                 attrs = {}
1674                 for (i = 0; i < t.attrs_a.length; ++i) {
1675                         a = t.attrs_a[i]
1676                         attrs[a[0]] = a[1] // TODO check what to do with dupilcate attrs
1677                 }
1678                 el = new Node(TYPE_TAG, {name: t.name, namespace: namespace, attrs: attrs, token: t})
1679
1680                 // TODO 2. If the newly created element has an xmlns attribute in the
1681                 // XMLNS namespace whose value is not exactly the same as the element's
1682                 // namespace, that is a parse error. Similarly, if the newly created
1683                 // element has an xmlns:xlink attribute in the XMLNS namespace whose
1684                 // value is not the XLink Namespace, that is a parse error.
1685
1686                 // fixfull: the spec says stuff about form pointers and ownerDocument
1687
1688                 return el
1689         }
1690
1691         // http://www.w3.org/TR/html5/syntax.html#insert-a-foreign-element
1692         insert_foreign_element = function (token, namespace) {
1693                 var ail, ail_el, ail_i, el
1694                 ail = adjusted_insertion_location()
1695                 ail_el = ail[0]
1696                 ail_i = ail[1]
1697                 el = token_to_element(token, namespace, ail_el)
1698                 // TODO skip this next step if it's broken (eg ail_el is document with child already)
1699                 el.parent = ail_el
1700                 ail_el.children.splice(ail_i, 0, el)
1701                 open_els.unshift(el)
1702                 return el
1703         }
1704         // http://www.w3.org/TR/html5/syntax.html#insert-an-html-element
1705         insert_html_element = function (token) {
1706                 return insert_foreign_element(token, NS_HTML)
1707         }
1708
1709         // http://www.w3.org/TR/html5/syntax.html#insert-a-comment
1710         // position should be [node, index_within_children]
1711         insert_comment = function (t, position) {
1712                 if (position == null) {
1713                         position = adjusted_insertion_location()
1714                 }
1715                 position[0].children.splice(position[1], 0, t)
1716                 return
1717         }
1718
1719         // 8.2.5.2
1720         // http://www.w3.org/TR/html5/syntax.html#generic-raw-text-element-parsing-algorithm
1721         parse_generic_raw_text = function (t) {
1722                 insert_html_element(t)
1723                 tok_state = tok_state_rawtext
1724                 original_ins_mode = ins_mode
1725                 ins_mode = ins_mode_text
1726         }
1727         parse_generic_rcdata_text = function (t) {
1728                 insert_html_element(t)
1729                 tok_state = tok_state_rcdata
1730                 original_ins_mode = ins_mode
1731                 ins_mode = ins_mode_text
1732         }
1733
1734         // 8.2.5.3 http://www.w3.org/TR/html5/syntax.html#closing-elements-that-have-implied-end-tags
1735         // http://www.w3.org/TR/html5/syntax.html#generate-implied-end-tags
1736         generate_implied_end_tags = function (except) {
1737                 if (except == null) {
1738                         except = null
1739                 }
1740                 while (end_tag_implied[open_els[0].name] === open_els[0].namespace && open_els[0].name !== except) {
1741                         open_els.shift()
1742                 }
1743         }
1744
1745         // 8.2.5.4 The rules for parsing tokens in HTML content
1746         // http://www.w3.org/TR/html5/syntax.html#parsing-main-inhtml
1747
1748         // 8.2.5.4.1 The "initial" insertion mode
1749         // http://www.w3.org/TR/html5/syntax.html#the-initial-insertion-mode
1750         is_quirks_yes_doctype = function (t) {
1751                 var i, p, pi
1752                 if (t.flag('force-quirks')) {
1753                         return true
1754                 }
1755                 if (t.name !== 'html') {
1756                         return true
1757                 }
1758                 if (t.public_identifier != null) {
1759                         pi = t.public_identifier.toLowerCase()
1760                         for (i = 0; i < quirks_yes_pi_prefixes.length; ++i) {
1761                                 p = quirks_yes_pi_prefixes[i]
1762                                 if (pi.substr(0, p.length) === p) {
1763                                         return true
1764                                 }
1765                         }
1766                         if (pi === '-//w3o//dtd w3 html strict 3.0//en//' || pi === '-/w3c/dtd html 4.0 transitional/en' || pi === 'html') {
1767                                 return true
1768                         }
1769                 }
1770                 if (t.system_identifier != null) {
1771                         if (t.system_identifier.toLowerCase() === 'http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd') {
1772                                 return true
1773                         }
1774                 } else if (t.public_identifier != null) {
1775                         // already did this: pi = t.public_identifier.toLowerCase()
1776                         if (pi.substr(0, 32) === '-//w3c//dtd html 4.01 frameset//' || pi.substr(0, 36) === '-//w3c//dtd html 4.01 transitional//') {
1777                                 return true
1778                         }
1779                 }
1780                 return false
1781         }
1782         is_quirks_limited_doctype = function (t) {
1783                 var pi
1784                 if (t.public_identifier != null) {
1785                         pi = t.public_identifier.toLowerCase()
1786                         if (pi.substr(0, 32) === '-//w3c//dtd xhtml 1.0 frameset//' || pi.substr(0, 36) === '-//w3c//dtd xhtml 1.0 transitional//') {
1787                                 return true
1788                         }
1789                         if (t.system_identifier != null) {
1790                                 if (pi.substr(0, 32) === '-//w3c//dtd html 4.01 frameset//' || pi.substr(0, 36) === '-//w3c//dtd html 4.01 transitional//') {
1791                                         return true
1792                                 }
1793                         }
1794                 }
1795                 return false
1796         }
1797         ins_mode_initial = function (t) {
1798                 if (is_space_tok(t)) {
1799                         return
1800                 }
1801                 if (t.type === TYPE_COMMENT) {
1802                         // ?fixfull
1803                         doc.children.push(t)
1804                         return
1805                 }
1806                 if (t.type === TYPE_DOCTYPE) {
1807                         // fixfull syntax error from first paragraph and following bullets
1808                         // fixfull set doc.doctype
1809                         // fixfull is the "not an iframe srcdoc" thing relevant?
1810                         if (is_quirks_yes_doctype(t)) {
1811                                 doc.flag('quirks mode', QUIRKS_YES)
1812                         } else if (is_quirks_limited_doctype(t)) {
1813                                 doc.flag('quirks mode', QUIRKS_LIMITED)
1814                         }
1815                         doc.children.push(t)
1816                         ins_mode = ins_mode_before_html
1817                         return
1818                 }
1819                 // Anything else
1820                 // fixfull not iframe srcdoc?
1821                 parse_error()
1822                 doc.flag('quirks mode', QUIRKS_YES)
1823                 ins_mode = ins_mode_before_html
1824                 process_token(t)
1825         }
1826
1827         // 8.2.5.4.2 http://www.w3.org/TR/html5/syntax.html#the-before-html-insertion-mode
1828         ins_mode_before_html = function (t) {
1829                 if (t.type === TYPE_DOCTYPE) {
1830                         parse_error()
1831                         return
1832                 }
1833                 if (t.type === TYPE_COMMENT) {
1834                         doc.children.push(t)
1835                         return
1836                 }
1837                 if (is_space_tok(t)) {
1838                         return
1839                 }
1840                 if (t.type === TYPE_START_TAG && t.name === 'html') {
1841                         el = token_to_element(t, NS_HTML, doc)
1842                         doc.children.push(el)
1843                         el.document = doc
1844                         open_els.unshift(el)
1845                         // fixfull (big paragraph in spec about manifest, fragment, urls, etc)
1846                         ins_mode = ins_mode_before_head
1847                         return
1848                 }
1849                 if (t.type === TYPE_END_TAG) {
1850                         if (t.name === 'head' || t.name === 'body' || t.name === 'html' || t.name === 'br') {
1851                                 // fall through to "anything else"
1852                         } else {
1853                                 parse_error()
1854                                 return
1855                         }
1856                 }
1857                 // Anything else
1858                 el = token_to_element(new_open_tag('html'), NS_HTML, doc)
1859                 doc.children.push(el)
1860                 el.document = doc
1861                 open_els.unshift(el)
1862                 // ?fixfull browsing context
1863                 ins_mode = ins_mode_before_head
1864                 process_token(t)
1865         }
1866
1867         // 8.2.5.4.3 http://www.w3.org/TR/html5/syntax.html#the-before-head-insertion-mode
1868         ins_mode_before_head = function (t) {
1869                 var el
1870                 if (is_space_tok(t)) {
1871                         return
1872                 }
1873                 if (t.type === TYPE_COMMENT) {
1874                         insert_comment(t)
1875                         return
1876                 }
1877                 if (t.type === TYPE_DOCTYPE) {
1878                         parse_error()
1879                         return
1880                 }
1881                 if (t.type === TYPE_START_TAG && t.name === 'html') {
1882                         ins_mode_in_body(t)
1883                         return
1884                 }
1885                 if (t.type === TYPE_START_TAG && t.name === 'head') {
1886                         el = insert_html_element(t)
1887                         head_element_pointer = el
1888                         ins_mode = ins_mode_in_head
1889                         return
1890                 }
1891                 if (t.type === TYPE_END_TAG) {
1892                         if (t.name === 'head' || t.name === 'body' || t.name === 'html' || t.name === 'br') {
1893                                 // fall through to Anything else below
1894                         } else {
1895                                 parse_error()
1896                                 return
1897                         }
1898                 }
1899                 // Anything else
1900                 el = insert_html_element(new_open_tag('head'))
1901                 head_element_pointer = el
1902                 ins_mode = ins_mode_in_head
1903                 process_token(t)
1904         }
1905
1906         // 8.2.5.4.4 http://www.w3.org/TR/html5/syntax.html#parsing-main-inhead
1907         ins_mode_in_head_else = function (t) { // factored out for same-as-spec flow control
1908                 open_els.shift() // spec says this will be a 'head' node
1909                 ins_mode = ins_mode_after_head
1910                 process_token(t)
1911         }
1912         ins_mode_in_head = function (t) {
1913                 var ail, el
1914                 if (t.type === TYPE_TEXT && (t.text === "\t" || t.text === "\n" || t.text === "\u000c" || t.text === ' ')) {
1915                         insert_character(t)
1916                         return
1917                 }
1918                 if (t.type === TYPE_COMMENT) {
1919                         insert_comment(t)
1920                         return
1921                 }
1922                 if (t.type === TYPE_DOCTYPE) {
1923                         parse_error()
1924                         return
1925                 }
1926                 if (t.type === TYPE_START_TAG && t.name === 'html') {
1927                         ins_mode_in_body(t)
1928                         return
1929                 }
1930                 if (t.type === TYPE_START_TAG && (t.name === 'base' || t.name === 'basefont' || t.name === 'bgsound' || t.name === 'link')) {
1931                         el = insert_html_element(t)
1932                         open_els.shift()
1933                         t.acknowledge_self_closing()
1934                         return
1935                 }
1936                 if (t.type === TYPE_START_TAG && t.name === 'meta') {
1937                         el = insert_html_element(t)
1938                         open_els.shift()
1939                         t.acknowledge_self_closing()
1940                         // fixfull encoding stuff
1941                         return
1942                 }
1943                 if (t.type === TYPE_START_TAG && t.name === 'title') {
1944                         parse_generic_rcdata_text(t)
1945                         return
1946                 }
1947                 if (t.type === TYPE_START_TAG && ((t.name === 'noscript' && flag_scripting) || t.name === 'noframes' || t.name === 'style')) {
1948                         parse_generic_raw_text(t)
1949                         return
1950                 }
1951                 if (t.type === TYPE_START_TAG && t.name === 'noscript' && flag_scripting === false) {
1952                         insert_html_element(t)
1953                         ins_mode = ins_mode_in_head_noscript
1954                         return
1955                 }
1956                 if (t.type === TYPE_START_TAG && t.name === 'script') {
1957                         ail = adjusted_insertion_location()
1958                         el = token_to_element(t, NS_HTML, ail)
1959                         el.flag('parser-inserted', true)
1960                         // fixfull frament case
1961                         ail[0].children.splice(ail[1], 0, el)
1962                         open_els.unshift(el)
1963                         tok_state = tok_state_script_data
1964                         original_ins_mode = ins_mode // make sure orig... is defined
1965                         ins_mode = ins_mode_text
1966                         return
1967                 }
1968                 if (t.type === TYPE_END_TAG && t.name === 'head') {
1969                         open_els.shift() // will be a head element... spec says so
1970                         ins_mode = ins_mode_after_head
1971                         return
1972                 }
1973                 if (t.type === TYPE_END_TAG && (t.name === 'body' || t.name === 'html' || t.name === 'br')) {
1974                         ins_mode_in_head_else(t)
1975                         return
1976                 }
1977                 if (t.type === TYPE_START_TAG && t.name === 'template') {
1978                         insert_html_element(t)
1979                         afe_push_marker()
1980                         flag_frameset_ok = false
1981                         ins_mode = ins_mode_in_template
1982                         template_ins_modes.unshift(ins_mode_in_template)
1983                         return
1984                 }
1985                 if (t.type === TYPE_END_TAG && t.name === 'template') {
1986                         if (template_tag_is_open()) {
1987                                 generate_implied_end_tags
1988                                 if (open_els[0].name !== 'template') {
1989                                         parse_error()
1990                                 }
1991                                 while (true) {
1992                                         el = open_els.shift()
1993                                         if (el.name === 'template' && el.namespace === NS_HTML) {
1994                                                 break
1995                                         }
1996                                 }
1997                                 clear_afe_to_marker()
1998                                 template_ins_modes.shift()
1999                                 reset_ins_mode()
2000                         } else {
2001                                 parse_error()
2002                         }
2003                         return
2004                 }
2005                 if ((t.type === TYPE_START_TAG && t.name === 'head') || t.type === TYPE_END_TAG) {
2006                         parse_error()
2007                         return
2008                 }
2009                 ins_mode_in_head_else(t)
2010         }
2011
2012         // 8.2.5.4.5 http://www.w3.org/TR/html5/syntax.html#parsing-main-inheadnoscript
2013         ins_mode_in_head_noscript_else = function (t) {
2014                 parse_error()
2015                 open_els.shift()
2016                 ins_mode = ins_mode_in_head
2017                 process_token(t)
2018         }
2019         ins_mode_in_head_noscript = function (t) {
2020                 if (t.type === TYPE_DOCTYPE) {
2021                         parse_error()
2022                         return
2023                 }
2024                 if (t.type === TYPE_START_TAG && t.name === 'html') {
2025                         ins_mode_in_body(t)
2026                         return
2027                 }
2028                 if (t.type === TYPE_END_TAG && t.name === 'noscript') {
2029                         open_els.shift()
2030                         ins_mode = ins_mode_in_head
2031                         return
2032                 }
2033                 if (is_space_tok(t) || t.type === TYPE_COMMENT || (t.type === TYPE_START_TAG && (t.name === 'basefont' || t.name === 'bgsound' || t.name === 'link' || t.name === 'meta' || t.name === 'noframes' || t.name === 'style'))) {
2034                         ins_mode_in_head(t)
2035                         return
2036                 }
2037                 if (t.type === TYPE_END_TAG && t.name === 'br') {
2038                         ins_mode_in_head_noscript_else(t)
2039                         return
2040                 }
2041                 if ((t.type === TYPE_START_TAG && (t.name === 'head' || t.name === 'noscript')) || t.type === TYPE_END_TAG) {
2042                         parse_error()
2043                         return
2044                 }
2045                 // Anything else
2046                 ins_mode_in_head_noscript_else(t)
2047         }
2048
2049         // 8.2.5.4.6 http://www.w3.org/TR/html5/syntax.html#the-after-head-insertion-mode
2050         ins_mode_after_head_else = function (t) {
2051                 var body_tok
2052                 body_tok = new_open_tag('body')
2053                 insert_html_element(body_tok)
2054                 ins_mode = ins_mode_in_body
2055                 process_token(t)
2056         }
2057         ins_mode_after_head = function (t) {
2058                 var el, i, j, len
2059                 if (is_space_tok(t)) {
2060                         insert_character(t)
2061                         return
2062                 }
2063                 if (t.type === TYPE_COMMENT) {
2064                         insert_comment(t)
2065                         return
2066                 }
2067                 if (t.type === TYPE_DOCTYPE) {
2068                         parse_error()
2069                         return
2070                 }
2071                 if (t.type === TYPE_START_TAG && t.name === 'html') {
2072                         ins_mode_in_body(t)
2073                         return
2074                 }
2075                 if (t.type === TYPE_START_TAG && t.name === 'body') {
2076                         insert_html_element(t)
2077                         flag_frameset_ok = false
2078                         ins_mode = ins_mode_in_body
2079                         return
2080                 }
2081                 if (t.type === TYPE_START_TAG && t.name === 'frameset') {
2082                         insert_html_element(t)
2083                         ins_mode = ins_mode_in_frameset
2084                         return
2085                 }
2086                 if (t.type === TYPE_START_TAG && (t.name === 'base' || t.name === 'basefont' || t.name === 'bgsound' || t.name === 'link' || t.name === 'meta' || t.name === 'noframes' || t.name === 'script' || t.name === 'style' || t.name === 'template' || t.name === 'title')) {
2087                         parse_error()
2088                         open_els.unshift(head_element_pointer)
2089                         ins_mode_in_head(t)
2090                         for (i = 0; i < open_els.length; ++i) {
2091                                 el = open_els[i]
2092                                 if (el === head_element_pointer) {
2093                                         open_els.splice(i, 1)
2094                                         return
2095                                 }
2096                         }
2097                         return
2098                 }
2099                 if (t.type === TYPE_END_TAG && t.name === 'template') {
2100                         ins_mode_in_head(t)
2101                         return
2102                 }
2103                 if (t.type === TYPE_END_TAG && (t.name === 'body' || t.name === 'html' || t.name === 'br')) {
2104                         ins_mode_after_head_else(t)
2105                         return
2106                 }
2107                 if ((t.type === TYPE_START_TAG && t.name === 'head') || t.type === TYPE_END_TAG) {
2108                         parse_error()
2109                         return
2110                 }
2111                 // Anything else
2112                 ins_mode_after_head_else(t)
2113         }
2114
2115         // 8.2.5.4.7 http://www.w3.org/TR/html5/syntax.html#parsing-main-inbody
2116         in_body_any_other_end_tag = function (name) { // factored out because adoption agency calls it
2117                 var el, i, node
2118                 node = open_els[0]
2119                 while (true) {
2120                         if (node.name === name && node.namespace === NS_HTML) {
2121                                 generate_implied_end_tags(name) // arg is exception
2122                                 if (node !== open_els[0]) {
2123                                         parse_error()
2124                                 }
2125                                 while (true) {
2126                                         el = open_els.shift()
2127                                         if (el === node) {
2128                                                 return
2129                                         }
2130                                 }
2131                         }
2132                         if (special_elements[node.name] === node.namespace) {
2133                                 parse_error()
2134                                 return
2135                         }
2136                         for (i = 0; i < open_els.length; ++i) {
2137                                 el = open_els[i]
2138                                 if (node === el) {
2139                                         node = open_els[i + 1]
2140                                         break
2141                                 }
2142                         }
2143                 }
2144         }
2145         ins_mode_in_body = function (t) {
2146                 var a, aa, ab, ac, el, found, h_in_scope, i, input_el, j, l, len, len1, len10, len11, len12, len13, len14, len2, len3, len4, len5, len6, len7, len8, len9, m, n, node, o, ok_tags, prompt, q, r, ref, ref1, ref2, ref3, ref4, root_attrs, s, second, second_i, u, w, y, z
2147                 if (t.type === TYPE_TEXT && t.text === "\u0000") {
2148                         parse_error()
2149                         return
2150                 }
2151                 if (is_space_tok(t)) {
2152                         reconstruct_afe()
2153                         insert_character(t)
2154                         return
2155                 }
2156                 if (t.type === TYPE_TEXT) {
2157                         reconstruct_afe()
2158                         insert_character(t)
2159                         flag_frameset_ok = false
2160                         return
2161                 }
2162                 if (t.type === TYPE_COMMENT) {
2163                         insert_comment(t)
2164                         return
2165                 }
2166                 if (t.type === TYPE_DOCTYPE) {
2167                         parse_error()
2168                         return
2169                 }
2170                 if (t.type === TYPE_START_TAG && t.name === 'html') {
2171                         parse_error()
2172                         if (template_tag_is_open()) {
2173                                 return
2174                         }
2175                         root_attrs = open_els[open_els.length - 1].attrs
2176                         for (i = 0; i < t.attrs_a.length; ++i) {
2177                                 a = t.attrs_a[i]
2178                                 if (root_attrs[a[0]] == null) {
2179                                         root_attrs[a[0]] = a[1]
2180                                 }
2181                         }
2182                         return
2183                 }
2184
2185                 if ((t.type === TYPE_START_TAG && (t.name === 'base' || t.name === 'basefont' || t.name === 'bgsound' || t.name === 'link' || t.name === 'meta' || t.name === 'noframes' || t.name === 'script' || t.name === 'style' || t.name === 'template' || t.name === 'title')) || (t.type === TYPE_END_TAG && t.name === 'template')) {
2186                         ins_mode_in_head(t)
2187                         return
2188                 }
2189                 if (t.type === TYPE_START_TAG && t.name === 'body') {
2190                         parse_error()
2191                         if (open_els.length < 2) {
2192                                 return
2193                         }
2194                         second = open_els[open_els.length - 2]
2195                         if (second.namespace !== NS_HTML) {
2196                                 return
2197                         }
2198                         if (second.name !== 'body') {
2199                                 return
2200                         }
2201                         if (template_tag_is_open()) {
2202                                 return
2203                         }
2204                         flag_frameset_ok = false
2205                         for (i = 0; i < t.attrs_a.length; ++i) {
2206                                 a = t.attrs_a[i]
2207                                 if (second.attrs[a[0]] == null) {
2208                                         second.attrs[a[0]] = a[1]
2209                                 }
2210                         }
2211                         return
2212                 }
2213                 if (t.type === TYPE_START_TAG && t.name === 'frameset') {
2214                         parse_error()
2215                         if (open_els.length < 2) {
2216                                 return
2217                         }
2218                         second_i = open_els.length - 2
2219                         second = open_els[second_i]
2220                         if (second.namespace !== NS_HTML) {
2221                                 return
2222                         }
2223                         if (second.name !== 'body') {
2224                                 return
2225                         }
2226                         if (flag_frameset_ok === false) {
2227                                 return
2228                         }
2229                         if (second.parent != null) {
2230                                 for (i = 0; i < second.parent.children.length; ++i) {
2231                                         el = second.parent.children[i]
2232                                         if (el === second) {
2233                                                 second.parent.children.splice(i, 1)
2234                                                 break
2235                                         }
2236                                 }
2237                         }
2238                         open_els.splice(second_i, 1)
2239                         // pop everything except the "root html element"
2240                         while (open_els.length > 1) {
2241                                 open_els.shift()
2242                         }
2243                         insert_html_element(t)
2244                         ins_mode = ins_mode_in_frameset
2245                         return
2246                 }
2247                 if (t.type === TYPE_EOF) {
2248                         ok_tags = {
2249                                 dd: NS_HTML, dt: NS_HTML, li: NS_HTML, p: NS_HTML, tbody: NS_HTML,
2250                                 td: NS_HTML, tfoot: NS_HTML, th: NS_HTML, thead: NS_HTML,
2251                                 tr: NS_HTML, body: NS_HTML, html: NS_HTML
2252                         }
2253                         for (i = 0; i < open_els.length; ++i) {
2254                                 el = open_els[i]
2255                                 if (ok_tags[t.name] !== el.namespace) {
2256                                         parse_error()
2257                                         break
2258                                 }
2259                         }
2260                         if (template_ins_modes.length > 0) {
2261                                 ins_mode_in_template(t)
2262                         } else {
2263                                 stop_parsing()
2264                         }
2265                         return
2266                 }
2267                 if (t.type === TYPE_END_TAG && t.name === 'body') {
2268                         if (!is_in_scope('body', NS_HTML)) {
2269                                 parse_error()
2270                                 return
2271                         }
2272                         ok_tags = {
2273                                 dd: NS_HTML, dt: NS_HTML, li: NS_HTML, optgroup: NS_HTML,
2274                                 option: NS_HTML, p: NS_HTML, rb: NS_HTML, rp: NS_HTML, rt: NS_HTML,
2275                                 rtc: NS_HTML, tbody: NS_HTML, td: NS_HTML, tfoot: NS_HTML,
2276                                 th: NS_HTML, thead: NS_HTML, tr: NS_HTML, body: NS_HTML,
2277                                 html: NS_HTML
2278                         }
2279                         for (i = 0; i < open_els.length; ++i) {
2280                                 el = open_els[i]
2281                                 if (ok_tags[t.name] !== el.namespace) {
2282                                         parse_error()
2283                                         break
2284                                 }
2285                         }
2286                         ins_mode = ins_mode_after_body
2287                         return
2288                 }
2289                 if (t.type === TYPE_END_TAG && t.name === 'html') {
2290                         if (!is_in_scope('body', NS_HTML)) {
2291                                 parse_error()
2292                                 return
2293                         }
2294                         ok_tags = {
2295                                 dd: NS_HTML, dt: NS_HTML, li: NS_HTML, optgroup: NS_HTML,
2296                                 option: NS_HTML, p: NS_HTML, rb: NS_HTML, rp: NS_HTML, rt: NS_HTML,
2297                                 rtc: NS_HTML, tbody: NS_HTML, td: NS_HTML, tfoot: NS_HTML,
2298                                 th: NS_HTML, thead: NS_HTML, tr: NS_HTML, body: NS_HTML,
2299                                 html: NS_HTML
2300                         }
2301                         for (i = 0; i < open_els.length; ++i) {
2302                                 el = open_els[i]
2303                                 if (ok_tags[t.name] !== el.namespace) {
2304                                         parse_error()
2305                                         break
2306                                 }
2307                         }
2308                         ins_mode = ins_mode_after_body
2309                         process_token(t)
2310                         return
2311                 }
2312                 if (t.type === TYPE_START_TAG && (t.name === 'address' || t.name === 'article' || t.name === 'aside' || t.name === 'blockquote' || t.name === 'center' || t.name === 'details' || t.name === 'dialog' || t.name === 'dir' || t.name === 'div' || t.name === 'dl' || t.name === 'fieldset' || t.name === 'figcaption' || t.name === 'figure' || t.name === 'footer' || t.name === 'header' || t.name === 'hgroup' || t.name === 'main' || t.name === 'nav' || t.name === 'ol' || t.name === 'p' || t.name === 'section' || t.name === 'summary' || t.name === 'ul')) {
2313                         close_p_if_in_button_scope()
2314                         insert_html_element(t)
2315                         return
2316                 }
2317                 if (t.type === TYPE_START_TAG && (h_tags[t.name] != null)) { // extra perens because Coffeescript did that
2318                         close_p_if_in_button_scope()
2319                         if (h_tags[open_els[0].name] === open_els[0].namespace) {
2320                                 parse_error()
2321                                 open_els.shift()
2322                         }
2323                         insert_html_element(t)
2324                         return
2325                 }
2326                 if (t.type === TYPE_START_TAG && (t.name === 'pre' || t.name === 'listing')) {
2327                         close_p_if_in_button_scope()
2328                         insert_html_element(t)
2329                         eat_next_token_if_newline()
2330                         flag_frameset_ok = false
2331                         return
2332                 }
2333                 if (t.type === TYPE_START_TAG && t.name === 'form') {
2334                         if (!(form_element_pointer === null || template_tag_is_open())) {
2335                                 parse_error()
2336                                 return
2337                         }
2338                         close_p_if_in_button_scope()
2339                         el = insert_html_element(t)
2340                         if (!template_tag_is_open()) {
2341                                 form_element_pointer = el
2342                         }
2343                         return
2344                 }
2345                 if (t.type === TYPE_START_TAG && t.name === 'li') {
2346                         flag_frameset_ok = false
2347                         for (i = 0; i < open_els.length; ++i) {
2348                                 node = open_els[i]
2349                                 if (node.name === 'li' && node.namespace === NS_HTML) {
2350                                         generate_implied_end_tags('li') // arg is exception
2351                                         if (open_els[0].name !== 'li' || open_els[0].namespace !== NS_HTML) {
2352                                                 parse_error()
2353                                         }
2354                                         while (true) {
2355                                                 el = open_els.shift()
2356                                                 if (el.name === 'li' && el.namespace === NS_HTML) {
2357                                                         break
2358                                                 }
2359                                         }
2360                                         break
2361                                 }
2362                                 if (el_is_special_not_adp(node)) {
2363                                         break
2364                                 }
2365                         }
2366                         close_p_if_in_button_scope()
2367                         insert_html_element(t)
2368                         return
2369                 }
2370                 if (t.type === TYPE_START_TAG && (t.name === 'dd' || t.name === 'dt')) {
2371                         flag_frameset_ok = false
2372                         for (i = 0; i < open_els.length; ++i) {
2373                                 node = open_els[i]
2374                                 if (node.name === 'dd' && node.namespace === NS_HTML) {
2375                                         generate_implied_end_tags('dd') // arg is exception
2376                                         if (open_els[0].name !== 'dd' || open_els[0].namespace !== NS_HTML) {
2377                                                 parse_error()
2378                                         }
2379                                         while (true) {
2380                                                 el = open_els.shift()
2381                                                 if (el.name === 'dd' && el.namespace === NS_HTML) {
2382                                                         break
2383                                                 }
2384                                         }
2385                                         break
2386                                 }
2387                                 if (node.name === 'dt' && node.namespace === NS_HTML) {
2388                                         generate_implied_end_tags('dt') // arg is exception
2389                                         if (open_els[0].name !== 'dt' || open_els[0].namespace !== NS_HTML) {
2390                                                 parse_error()
2391                                         }
2392                                         while (true) {
2393                                                 el = open_els.shift()
2394                                                 if (el.name === 'dt' && el.namespace === NS_HTML) {
2395                                                         break
2396                                                 }
2397                                         }
2398                                         break
2399                                 }
2400                                 if (el_is_special_not_adp(node)) {
2401                                         break
2402                                 }
2403                         }
2404                         close_p_if_in_button_scope()
2405                         insert_html_element(t)
2406                         return
2407                 }
2408                 if (t.type === TYPE_START_TAG && t.name === 'plaintext') {
2409                         close_p_if_in_button_scope()
2410                         insert_html_element(t)
2411                         tok_state = tok_state_plaintext
2412                         return
2413                 }
2414                 if (t.type === TYPE_START_TAG && t.name === 'button') {
2415                         if (is_in_scope('button', NS_HTML)) {
2416                                 parse_error()
2417                                 generate_implied_end_tags()
2418                                 while (true) {
2419                                         el = open_els.shift()
2420                                         if (el.name === 'button' && el.namespace === NS_HTML) {
2421                                                 break
2422                                         }
2423                                 }
2424                         }
2425                         reconstruct_afe()
2426                         insert_html_element(t)
2427                         flag_frameset_ok = false
2428                         return
2429                 }
2430                 if (t.type === TYPE_END_TAG && (t.name === 'address' || t.name === 'article' || t.name === 'aside' || t.name === 'blockquote' || t.name === 'button' || t.name === 'center' || t.name === 'details' || t.name === 'dialog' || t.name === 'dir' || t.name === 'div' || t.name === 'dl' || t.name === 'fieldset' || t.name === 'figcaption' || t.name === 'figure' || t.name === 'footer' || t.name === 'header' || t.name === 'hgroup' || t.name === 'listing' || t.name === 'main' || t.name === 'nav' || t.name === 'ol' || t.name === 'pre' || t.name === 'section' || t.name === 'summary' || t.name === 'ul')) {
2431                         if (!is_in_scope(t.name, NS_HTML)) {
2432                                 parse_error()
2433                                 return
2434                         }
2435                         generate_implied_end_tags()
2436                         if (!(open_els[0].name === t.name && open_els[0].namespace === NS_HTML)) {
2437                                 parse_error()
2438                         }
2439                         while (true) {
2440                                 el = open_els.shift()
2441                                 if (el.name === t.name && el.namespace === NS_HTML) {
2442                                         return
2443                                 }
2444                         }
2445                         return
2446                 }
2447                 if (t.type === TYPE_END_TAG && t.name === 'form') {
2448                         if (!template_tag_is_open()) {
2449                                 node = form_element_pointer
2450                                 form_element_pointer = null
2451                                 if (node === null || !el_is_in_scope(node)) {
2452                                         parse_error()
2453                                         return
2454                                 }
2455                                 generate_implied_end_tags()
2456                                 if (open_els[0] !== node) {
2457                                         parse_error()
2458                                 }
2459                                 for (i = 0; i < open_els.length; ++i) {
2460                                         el = open_els[i]
2461                                         if (el === node) {
2462                                                 open_els.splice(i, 1)
2463                                                 break
2464                                         }
2465                                 }
2466                         } else {
2467                                 if (!is_in_scope('form', NS_HTML)) {
2468                                         parse_error()
2469                                         return
2470                                 }
2471                                 generate_implied_end_tags()
2472                                 if (open_els[0].name !== 'form' || open_els[0].namespace !== NS_HTML) {
2473                                         parse_error()
2474                                 }
2475                                 while (true) {
2476                                         el = open_els.shift()
2477                                         if (el.name === 'form' && el.namespace === NS_HTML) {
2478                                                 break
2479                                         }
2480                                 }
2481                         }
2482                         return
2483                 }
2484                 if (t.type === TYPE_END_TAG && t.name === 'p') {
2485                         if (!is_in_button_scope('p', NS_HTML)) {
2486                                 parse_error()
2487                                 insert_html_element(new_open_tag('p'))
2488                         }
2489                         close_p_element()
2490                         return
2491                 }
2492                 if (t.type === TYPE_END_TAG && t.name === 'li') {
2493                         if (!is_in_li_scope('li', NS_HTML)) {
2494                                 parse_error()
2495                                 return
2496                         }
2497                         generate_implied_end_tags('li') // arg is exception
2498                         if (open_els[0].name !== 'li' || open_els[0].namespace !== NS_HTML) {
2499                                 parse_error()
2500                         }
2501                         while (true) {
2502                                 el = open_els.shift()
2503                                 if (el.name === 'li' && el.namespace === NS_HTML) {
2504                                         break
2505                                 }
2506                         }
2507                         return
2508                 }
2509                 if (t.type === TYPE_END_TAG && (t.name === 'dd' || t.name === 'dt')) {
2510                         if (!is_in_scope(t.name, NS_HTML)) {
2511                                 parse_error()
2512                                 return
2513                         }
2514                         generate_implied_end_tags(t.name) // arg is exception
2515                         if (open_els[0].name !== t.name || open_els[0].namespace !== NS_HTML) {
2516                                 parse_error()
2517                         }
2518                         while (true) {
2519                                 el = open_els.shift()
2520                                 if (el.name === t.name && el.namespace === NS_HTML) {
2521                                         break
2522                                 }
2523                         }
2524                         return
2525                 }
2526                 if (t.type === TYPE_END_TAG && (h_tags[t.name] != null)) { // extra perens because Coffeescript did
2527                         h_in_scope = false
2528                         for (i = 0; i < open_els.length; ++i) {
2529                                 el = open_els[i]
2530                                 if (h_tags[el.name] === el.namespace) {
2531                                         h_in_scope = true
2532                                         break
2533                                 }
2534                                 if (standard_scopers[el.name] === el.namespace) {
2535                                         break
2536                                 }
2537                         }
2538                         if (!h_in_scope) {
2539                                 parse_error()
2540                                 return
2541                         }
2542                         generate_implied_end_tags()
2543                         if (open_els[0].name !== t.name || open_els[0].namespace !== NS_HTML) {
2544                                 parse_error()
2545                         }
2546                         while (true) {
2547                                 el = open_els.shift()
2548                                 if (h_tags[el.name] === el.namespace) {
2549                                         break
2550                                 }
2551                         }
2552                         return
2553                 }
2554                 // deep breath!
2555                 if (t.type === TYPE_START_TAG && t.name === 'a') {
2556                         // If the list of active formatting elements contains an a element
2557                         // between the end of the list and the last marker on the list (or
2558                         // the start of the list if there is no marker on the list), then
2559                         // this is a parse error; run the adoption agency algorithm for the
2560                         // tag name "a", then remove that element from the list of active
2561                         // formatting elements and the stack of open elements if the
2562                         // adoption agency algorithm didn't already remove it (it might not
2563                         // have if the element is not in table scope).
2564                         found = false
2565                         for (i = 0; i < afe.length; ++i) {
2566                                 el = afe[i]
2567                                 if (el.type === TYPE_AFE_MARKER) {
2568                                         break
2569                                 }
2570                                 if (el.name === 'a' && el.namespace === NS_HTML) {
2571                                         found = el
2572                                 }
2573                         }
2574                         if (found != null) {
2575                                 parse_error()
2576                                 adoption_agency('a')
2577                                 for (i = 0; i < afe.length; ++i) {
2578                                         el = afe[i]
2579                                         if (el === found) {
2580                                                 afe.splice(i, 1)
2581                                         }
2582                                 }
2583                                 for (i = 0; i < open_els.length; ++i) {
2584                                         el = open_els[i]
2585                                         if (el === found) {
2586                                                 open_els.splice(i, 1)
2587                                         }
2588                                 }
2589                         }
2590                         reconstruct_afe()
2591                         el = insert_html_element(t)
2592                         afe_push(el)
2593                         return
2594                 }
2595                 if (t.type === TYPE_START_TAG && (t.name === 'b' || t.name === 'big' || t.name === 'code' || t.name === 'em' || t.name === 'font' || t.name === 'i' || t.name === 's' || t.name === 'small' || t.name === 'strike' || t.name === 'strong' || t.name === 'tt' || t.name === 'u')) {
2596                         reconstruct_afe()
2597                         el = insert_html_element(t)
2598                         afe_push(el)
2599                         return
2600                 }
2601                 if (t.type === TYPE_START_TAG && t.name === 'nobr') {
2602                         reconstruct_afe()
2603                         if (is_in_scope('nobr', NS_HTML)) {
2604                                 parse_error()
2605                                 adoption_agency('nobr')
2606                                 reconstruct_afe()
2607                         }
2608                         el = insert_html_element(t)
2609                         afe_push(el)
2610                         return
2611                 }
2612                 if (t.type === TYPE_END_TAG && (t.name === 'a' || t.name === 'b' || t.name === 'big' || t.name === 'code' || t.name === 'em' || t.name === 'font' || t.name === 'i' || t.name === 'nobr' || t.name === 's' || t.name === 'small' || t.name === 'strike' || t.name === 'strong' || t.name === 'tt' || t.name === 'u')) {
2613                         adoption_agency(t.name)
2614                         return
2615                 }
2616                 if (t.type === TYPE_START_TAG && (t.name === 'applet' || t.name === 'marquee' || t.name === 'object')) {
2617                         reconstruct_afe()
2618                         insert_html_element(t)
2619                         afe_push_marker()
2620                         flag_frameset_ok = false
2621                         return
2622                 }
2623                 if (t.type === TYPE_END_TAG && (t.name === 'applet' || t.name === 'marquee' || t.name === 'object')) {
2624                         if (!is_in_scope(t.name, NS_HTML)) {
2625                                 parse_error()
2626                                 return
2627                         }
2628                         generate_implied_end_tags()
2629                         if (open_els[0].name !== t.name || open_els[0].namespace !== NS_HTML) {
2630                                 parse_error()
2631                         }
2632                         while (true) {
2633                                 el = open_els.shift()
2634                                 if (el.name === t.name && el.namespace === NS_HTML) {
2635                                         break
2636                                 }
2637                         }
2638                         clear_afe_to_marker()
2639                         return
2640                 }
2641                 if (t.type === TYPE_START_TAG && t.name === 'table') {
2642                         if (doc.flag('quirks mode') !== QUIRKS_YES) {
2643                                 close_p_if_in_button_scope() // test
2644                         }
2645                         insert_html_element(t)
2646                         flag_frameset_ok = false
2647                         ins_mode = ins_mode_in_table
2648                         return
2649                 }
2650                 if (t.type === TYPE_END_TAG && t.name === 'br') {
2651                         parse_error()
2652                         // W3C: t.type = TYPE_START_TAG
2653                         t = new_open_tag('br') // WHATWG
2654                         // fall through
2655                 }
2656                 if (t.type === TYPE_START_TAG && (t.name === 'area' || t.name === 'br' || t.name === 'embed' || t.name === 'img' || t.name === 'keygen' || t.name === 'wbr')) {
2657                         reconstruct_afe()
2658                         insert_html_element(t)
2659                         open_els.shift()
2660                         t.acknowledge_self_closing()
2661                         flag_frameset_ok = false
2662                         return
2663                 }
2664                 if (t.type === TYPE_START_TAG && t.name === 'input') {
2665                         reconstruct_afe()
2666                         insert_html_element(t)
2667                         open_els.shift()
2668                         t.acknowledge_self_closing()
2669                         if (!is_input_hidden_tok(t)) {
2670                                 flag_frameset_ok = false
2671                         }
2672                         return
2673                 }
2674                 if (t.type === TYPE_START_TAG && (t.name === 'menuitem' || t.name === 'param' || t.name === 'source' || t.name === 'track')) {
2675                         // WHATWG adds 'menuitem' for this block
2676                         insert_html_element(t)
2677                         open_els.shift()
2678                         t.acknowledge_self_closing()
2679                         return
2680                 }
2681                 if (t.type === TYPE_START_TAG && t.name === 'hr') {
2682                         close_p_if_in_button_scope()
2683                         insert_html_element(t)
2684                         open_els.shift()
2685                         t.acknowledge_self_closing()
2686                         flag_frameset_ok = false
2687                         return
2688                 }
2689                 if (t.type === TYPE_START_TAG && t.name === 'image') {
2690                         parse_error()
2691                         t.name = 'img'
2692                         process_token(t)
2693                         return
2694                 }
2695                 if (t.type === TYPE_START_TAG && t.name === 'isindex') {
2696                         parse_error()
2697                         if (template_tag_is_open() === false && form_element_pointer !== null) {
2698                                 return
2699                         }
2700                         t.acknowledge_self_closing()
2701                         flag_frameset_ok = false
2702                         close_p_if_in_button_scope()
2703                         el = insert_html_element(new_open_tag('form'))
2704                         if (!template_tag_is_open()) {
2705                                 form_element_pointer = el
2706                         }
2707                         for (i = 0; i < t.attrs_a.length; ++i) {
2708                                 a = t.attrs_a[i]
2709                                 if (a[0] === 'action') {
2710                                         el.attrs['action'] = a[1]
2711                                         break
2712                                 }
2713                         }
2714                         insert_html_element(new_open_tag('hr'))
2715                         open_els.shift()
2716                         reconstruct_afe()
2717                         insert_html_element(new_open_tag('label'))
2718                         // note: this is a little out-of-spec-order so we only have to scan t.attrs_a once
2719                         input_el = new_open_tag('input')
2720                         prompt = null
2721                         for (i = 0; i < t.attrs_a.length; ++i) {
2722                                 a = t.attrs_a[i]
2723                                 if (a[0] === 'prompt') {
2724                                         prompt = a[1]
2725                                 }
2726                                 if (a[0] !== 'name' && a[0] !== 'action' && a[0] !== 'prompt') {
2727                                         input_el.attrs_a.push([a[0], a[1]])
2728                                 }
2729                         }
2730                         input_el.attrs_a.push(['name', 'isindex'])
2731                         // fixfull this next bit is in english... internationalize?
2732                         if (prompt == null) {
2733                                 prompt = "This is a searchable index. Enter search keywords: "
2734                         }
2735                         insert_character(new_character_token(prompt)) // fixfull split
2736                         // TODO submit typo "balue" in spec
2737                         insert_html_element(input_el)
2738                         open_els.shift()
2739                         // insert_character('') // you can put chars here if prompt attr missing
2740                         open_els.shift()
2741                         insert_html_element(new_open_tag('hr'))
2742                         open_els.shift()
2743                         open_els.shift()
2744                         if (!template_tag_is_open()) {
2745                                 form_element_pointer = null
2746                         }
2747                         return
2748                 }
2749                 if (t.type === TYPE_START_TAG && t.name === 'textarea') {
2750                         insert_html_element(t)
2751                         eat_next_token_if_newline()
2752                         tok_state = tok_state_rcdata
2753                         original_ins_mode = ins_mode
2754                         flag_frameset_ok = false
2755                         ins_mode = ins_mode_text
2756                         return
2757                 }
2758                 if (t.type === TYPE_START_TAG && t.name === 'xmp') {
2759                         close_p_if_in_button_scope()
2760                         reconstruct_afe()
2761                         flag_frameset_ok = false
2762                         parse_generic_raw_text(t)
2763                         return
2764                 }
2765                 if (t.type === TYPE_START_TAG && t.name === 'iframe') {
2766                         flag_frameset_ok = false
2767                         parse_generic_raw_text(t)
2768                         return
2769                 }
2770                 if (t.type === TYPE_START_TAG && (t.name === 'noembed' || (t.name === 'noscript' && flag_scripting))) {
2771                         parse_generic_raw_text(t)
2772                         return
2773                 }
2774                 if (t.type === TYPE_START_TAG && t.name === 'select') {
2775                         reconstruct_afe()
2776                         insert_html_element(t)
2777                         flag_frameset_ok = false
2778                         if (ins_mode === ins_mode_in_table || ins_mode === ins_mode_in_caption || ins_mode === ins_mode_in_table_body || ins_mode === ins_mode_in_row || ins_mode === ins_mode_in_cell) {
2779                                 ins_mode = ins_mode_in_select_in_table
2780                         } else {
2781                                 ins_mode = ins_mode_in_select
2782                         }
2783                         return
2784                 }
2785                 if (t.type === TYPE_START_TAG && (t.name === 'optgroup' || t.name === 'option')) {
2786                         if (open_els[0].name === 'option' && open_els[0].namespace === NS_HTML) {
2787                                 open_els.shift()
2788                         }
2789                         reconstruct_afe()
2790                         insert_html_element(t)
2791                         return
2792                 }
2793 // this comment block implements the W3C spec
2794 //              if t.type === TYPE_START_TAG && (t.name === 'rb' || t.name === 'rp' || t.name === 'rtc')
2795 //                      if is_in_scope 'ruby', NS_HTML
2796 //                              generate_implied_end_tags()
2797 //                              unless open_els[0].name === 'ruby' && open_els[0].namespace === NS_HTML
2798 //                                      parse_error()
2799 //                      insert_html_element t
2800 //                      return
2801 //              if t.type === TYPE_START_TAG && t.name === 'rt'
2802 //                      if is_in_scope 'ruby', NS_HTML
2803 //                              generate_implied_end_tags 'rtc' // arg === exception
2804 //                              unless (open_els[0].name === 'ruby' || open_els[0].name === 'rtc') && open_els[0].namespace === NS_HTML
2805 //                                      parse_error()
2806 //                      insert_html_element t
2807 //                      return
2808 // below implements the WHATWG spec https://html.spec.whatwg.org/multipage/syntax.html#parsing-main-inbody
2809                 if (t.type === TYPE_START_TAG && (t.name === 'rb' || t.name === 'rtc')) {
2810                         if (is_in_scope('ruby', NS_HTML)) {
2811                                 generate_implied_end_tags()
2812                                 if (!(open_els[0].name === 'ruby' && open_els[0].namespace === NS_HTML)) {
2813                                         parse_error()
2814                                 }
2815                         }
2816                         insert_html_element(t)
2817                         return
2818                 }
2819                 if (t.type === TYPE_START_TAG && (t.name === 'rp' || t.name === 'rt')) {
2820                         if (is_in_scope('ruby', NS_HTML)) {
2821                                 generate_implied_end_tags('rtc')
2822                                 if (!((open_els[0].name === 'ruby' || open_els[0].name === 'rtc') && open_els[0].namespace === NS_HTML)) {
2823                                         parse_error()
2824                                 }
2825                         }
2826                         insert_html_element(t)
2827                         return
2828                 }
2829 // end WHATWG chunk
2830                 if (t.type === TYPE_START_TAG && t.name === 'math') {
2831                         reconstruct_afe()
2832                         adjust_mathml_attributes(t)
2833                         adjust_foreign_attributes(t)
2834                         insert_foreign_element(t, NS_MATHML)
2835                         if (t.flag('self-closing')) {
2836                                 open_els.shift()
2837                                 t.acknowledge_self_closing()
2838                         }
2839                         return
2840                 }
2841                 if (t.type === TYPE_START_TAG && t.name === 'svg') {
2842                         reconstruct_afe()
2843                         adjust_svg_attributes(t)
2844                         adjust_foreign_attributes(t)
2845                         insert_foreign_element(t, NS_SVG)
2846                         if (t.flag('self-closing')) {
2847                                 open_els.shift()
2848                                 t.acknowledge_self_closing()
2849                         }
2850                         return
2851                 }
2852                 if (t.type === TYPE_START_TAG && (t.name === 'caption' || t.name === 'col' || t.name === 'colgroup' || t.name === 'frame' || t.name === 'head' || t.name === 'tbody' || t.name === 'td' || t.name === 'tfoot' || t.name === 'th' || t.name === 'thead' || t.name === 'tr')) {
2853                         parse_error()
2854                         return
2855                 }
2856                 if (t.type === TYPE_START_TAG) { // any other start tag
2857                         reconstruct_afe()
2858                         insert_html_element(t)
2859                         return
2860                 }
2861                 if (t.type === TYPE_END_TAG) { // any other end tag
2862                         in_body_any_other_end_tag(t.name)
2863                         return
2864                 }
2865         }
2866
2867         // 8.2.5.4.8 http://www.w3.org/TR/html5/syntax.html#parsing-main-incdata
2868         ins_mode_text = function (t) {
2869                 if (t.type === TYPE_TEXT) {
2870                         insert_character(t)
2871                         return
2872                 }
2873                 if (t.type === TYPE_EOF) {
2874                         parse_error()
2875                         if (open_els[0].name === 'script' && open_els[0].namespace === NS_HTML) {
2876                                 open_els[0].flag('already started', true)
2877                         }
2878                         open_els.shift()
2879                         ins_mode = original_ins_mode
2880                         process_token(t)
2881                         return
2882                 }
2883                 if (t.type === TYPE_END_TAG && t.name === 'script') {
2884                         open_els.shift()
2885                         ins_mode = original_ins_mode
2886                         // fixfull the spec seems to assume that I'm going to run the script
2887                         // http://www.w3.org/TR/html5/syntax.html#scriptEndTag
2888                         return
2889                 }
2890                 if (t.type === TYPE_END_TAG) {
2891                         open_els.shift()
2892                         ins_mode = original_ins_mode
2893                         return
2894                 }
2895         }
2896
2897         // the functions below implement the tokenizer stats described here:
2898         // http://www.w3.org/TR/html5/syntax.html#tokenization
2899
2900         // 8.2.5.4.9 http://www.w3.org/TR/html5/syntax.html#parsing-main-intable
2901         ins_mode_in_table_else = function (t) {
2902                 parse_error()
2903                 flag_foster_parenting = true
2904                 ins_mode_in_body(t)
2905                 flag_foster_parenting = false
2906         }
2907         ins_mode_in_table = function (t) {
2908                 var el
2909                 switch (t.type) {
2910                         case TYPE_TEXT:
2911                                 if ((open_els[0].name === 'table' || open_els[0].name === 'tbody' || open_els[0].name === 'tfoot' || open_els[0].name === 'thead' || open_els[0].name === 'tr') && open_els[0].namespace === NS_HTML) {
2912                                         pending_table_character_tokens = []
2913                                         original_ins_mode = ins_mode
2914                                         ins_mode = ins_mode_in_table_text
2915                                         process_token(t)
2916                                 } else {
2917                                         ins_mode_in_table_else(t)
2918                                 }
2919                         break
2920                         case TYPE_COMMENT:
2921                                 insert_comment(t)
2922                         break
2923                         case TYPE_DOCTYPE:
2924                                 parse_error()
2925                         break
2926                         case TYPE_START_TAG:
2927                                 switch (t.name) {
2928                                         case 'caption':
2929                                                 clear_stack_to_table_context()
2930                                                 afe_push_marker()
2931                                                 insert_html_element(t)
2932                                                 ins_mode = ins_mode_in_caption
2933                                         break
2934                                         case 'colgroup':
2935                                                 clear_stack_to_table_context()
2936                                                 insert_html_element(t)
2937                                                 ins_mode = ins_mode_in_column_group
2938                                         break
2939                                         case 'col':
2940                                                 clear_stack_to_table_context()
2941                                                 insert_html_element(new_open_tag('colgroup'))
2942                                                 ins_mode = ins_mode_in_column_group
2943                                                 process_token(t)
2944                                         break
2945                                         case 'tbody':
2946                                         case 'tfoot':
2947                                         case 'thead':
2948                                                 clear_stack_to_table_context()
2949                                                 insert_html_element(t)
2950                                                 ins_mode = ins_mode_in_table_body
2951                                         break
2952                                         case 'td':
2953                                         case 'th':
2954                                         case 'tr':
2955                                                 clear_stack_to_table_context()
2956                                                 insert_html_element(new_open_tag('tbody'))
2957                                                 ins_mode = ins_mode_in_table_body
2958                                                 process_token(t)
2959                                         break
2960                                         case 'table':
2961                                                 parse_error()
2962                                                 if (is_in_table_scope('table', NS_HTML)) {
2963                                                         while (true) {
2964                                                                 el = open_els.shift()
2965                                                                 if (el.name === 'table' && el.namespace === NS_HTML) {
2966                                                                         break
2967                                                                 }
2968                                                         }
2969                                                         reset_ins_mode()
2970                                                         process_token(t)
2971                                                 }
2972                                         break
2973                                         case 'style':
2974                                         case 'script':
2975                                         case 'template':
2976                                                 ins_mode_in_head(t)
2977                                         break
2978                                         case 'input':
2979                                                 if (!is_input_hidden_tok(t)) {
2980                                                         ins_mode_in_table_else(t)
2981                                                 } else {
2982                                                         parse_error()
2983                                                         el = insert_html_element(t)
2984                                                         open_els.shift()
2985                                                         t.acknowledge_self_closing()
2986                                                 }
2987                                         break
2988                                         case 'form':
2989                                                 parse_error()
2990                                                 if (form_element_pointer != null) {
2991                                                         return
2992                                                 }
2993                                                 if (template_tag_is_open()) {
2994                                                         return
2995                                                 }
2996                                                 form_element_pointer = insert_html_element(t)
2997                                                 open_els.shift()
2998                                         break
2999                                         default:
3000                                                 ins_mode_in_table_else(t)
3001                                 }
3002                         break
3003                         case TYPE_END_TAG:
3004                                 switch (t.name) {
3005                                         case 'table':
3006                                                 if (is_in_table_scope('table', NS_HTML)) {
3007                                                         while (true) {
3008                                                                 el = open_els.shift()
3009                                                                 if (el.name === 'table' && el.namespace === NS_HTML) {
3010                                                                         break
3011                                                                 }
3012                                                         }
3013                                                         reset_ins_mode()
3014                                                 } else {
3015                                                         parse_error()
3016                                                 }
3017                                         break
3018                                         case 'body':
3019                                         case 'caption':
3020                                         case 'col':
3021                                         case 'colgroup':
3022                                         case 'html':
3023                                         case 'tbody':
3024                                         case 'td':
3025                                         case 'tfoot':
3026                                         case 'th':
3027                                         case 'thead':
3028                                         case 'tr':
3029                                                 parse_error()
3030                                         break
3031                                         case 'template':
3032                                                 ins_mode_in_head(t)
3033                                         break
3034                                         default:
3035                                                 ins_mode_in_table_else(t)
3036                                 }
3037                         break
3038                         case TYPE_EOF:
3039                                 ins_mode_in_body(t)
3040                         break
3041                         default:
3042                                 ins_mode_in_table_else(t)
3043                 }
3044         }
3045
3046         // 8.2.5.4.10 http://www.w3.org/TR/html5/syntax.html#parsing-main-intabletext
3047         ins_mode_in_table_text = function (t) {
3048                 var all_space, i, l, m, old
3049                 if (t.type === TYPE_TEXT && t.text === "\u0000") {
3050                         // from javascript?
3051                         parse_error()
3052                         return
3053                 }
3054                 if (t.type === TYPE_TEXT) {
3055                         pending_table_character_tokens.push(t)
3056                         return
3057                 }
3058                 // Anything else
3059                 all_space = true
3060                 for (i = 0; i < pending_table_character_tokens.length; ++i) {
3061                         old = pending_table_character_tokens[i]
3062                         if (!is_space_tok(old)) {
3063                                 all_space = false
3064                                 break
3065                         }
3066                 }
3067                 if (all_space) {
3068                         for (i = 0; i < pending_table_character_tokens.length; ++i) {
3069                                 old = pending_table_character_tokens[i]
3070                                 insert_character(old)
3071                         }
3072                 } else {
3073                         for (i = 0; i < pending_table_character_tokens.length; ++i) {
3074                                 old = pending_table_character_tokens[i]
3075                                 ins_mode_in_table_else(old)
3076                         }
3077                 }
3078                 pending_table_character_tokens = []
3079                 ins_mode = original_ins_mode
3080                 process_token(t)
3081         }
3082
3083         // 8.2.5.4.11 http://www.w3.org/TR/html5/syntax.html#parsing-main-incaption
3084         ins_mode_in_caption = function (t) {
3085                 var el
3086                 if (t.type === TYPE_END_TAG && t.name === 'caption') {
3087                         if (is_in_table_scope('caption', NS_HTML)) {
3088                                 generate_implied_end_tags()
3089                                 if (open_els[0].name !== 'caption') {
3090                                         parse_error()
3091                                 }
3092                                 while (true) {
3093                                         el = open_els.shift()
3094                                         if (el.name === 'caption' && el.namespace === NS_HTML) {
3095                                                 break
3096                                         }
3097                                 }
3098                                 clear_afe_to_marker()
3099                                 ins_mode = ins_mode_in_table
3100                         } else {
3101                                 parse_error()
3102                                 // fragment case
3103                         }
3104                         return
3105                 }
3106                 if ((t.type === TYPE_START_TAG && (t.name === 'caption' || t.name === 'col' || t.name === 'colgroup' || t.name === 'tbody' || t.name === 'td' || t.name === 'tfoot' || t.name === 'th' || t.name === 'thead' || t.name === 'tr')) || t.type === TYPE_END_TAG && t.name === 'table') {
3107                         parse_error()
3108                         if (is_in_table_scope('caption', NS_HTML)) {
3109                                 while (true) {
3110                                         el = open_els.shift()
3111                                         if (el.name === 'caption' && el.namespace === NS_HTML) {
3112                                                 break
3113                                         }
3114                                 }
3115                                 clear_afe_to_marker()
3116                                 ins_mode = ins_mode_in_table
3117                                 process_token(t)
3118                         }
3119                         // else fragment case
3120                         return
3121                 }
3122                 if (t.type === TYPE_END_TAG && (t.name === 'body' || t.name === 'col' || t.name === 'colgroup' || t.name === 'html' || t.name === 'tbody' || t.name === 'td' || t.name === 'tfoot' || t.name === 'th' || t.name === 'thead' || t.name === 'tr')) {
3123                         parse_error()
3124                         return
3125                 }
3126                 // Anything else
3127                 ins_mode_in_body(t)
3128         }
3129
3130         // 8.2.5.4.12 http://www.w3.org/TR/html5/syntax.html#parsing-main-incolgroup
3131         ins_mode_in_column_group = function (t) {
3132                 var el
3133                 if (is_space_tok(t)) {
3134                         insert_character(t)
3135                         return
3136                 }
3137                 if (t.type === TYPE_COMMENT) {
3138                         insert_comment(t)
3139                         return
3140                 }
3141                 if (t.type === TYPE_DOCTYPE) {
3142                         parse_error()
3143                         return
3144                 }
3145                 if (t.type === TYPE_START_TAG && t.name === 'html') {
3146                         ins_mode_in_body(t)
3147                         return
3148                 }
3149                 if (t.type === TYPE_START_TAG && t.name === 'col') {
3150                         el = insert_html_element(t)
3151                         open_els.shift()
3152                         t.acknowledge_self_closing()
3153                         return
3154                 }
3155                 if (t.type === TYPE_END_TAG && t.name === 'colgroup') {
3156                         if (open_els[0].name === 'colgroup' && open_els.namespace === NS_HTML) {
3157                                 open_els.shift()
3158                                 ins_mode = ins_mode_in_table
3159                         } else {
3160                                 parse_error()
3161                         }
3162                         return
3163                 }
3164                 if (t.type === TYPE_END_TAG && t.name === 'col') {
3165                         parse_error()
3166                         return
3167                 }
3168                 if ((t.type === TYPE_START_TAG || t.type === TYPE_END_TAG) && t.name === 'template') {
3169                         ins_mode_in_head(t)
3170                         return
3171                 }
3172                 if (t.type === TYPE_EOF) {
3173                         ins_mode_in_body(t)
3174                         return
3175                 }
3176                 // Anything else
3177                 if (open_els[0].name !== 'colgroup') {
3178                         parse_error()
3179                         return
3180                 }
3181                 open_els.shift()
3182                 ins_mode = ins_mode_in_table
3183                 process_token(t)
3184         }
3185
3186         // 8.2.5.4.13 http://www.w3.org/TR/html5/syntax.html#parsing-main-intbody
3187         ins_mode_in_table_body = function (t) {
3188                 var el, has, i
3189                 if (t.type === TYPE_START_TAG && t.name === 'tr') {
3190                         clear_stack_to_table_body_context()
3191                         insert_html_element(t)
3192                         ins_mode = ins_mode_in_row
3193                         return
3194                 }
3195                 if (t.type === TYPE_START_TAG && (t.name === 'th' || t.name === 'td')) {
3196                         parse_error()
3197                         clear_stack_to_table_body_context()
3198                         insert_html_element(new_open_tag('tr'))
3199                         ins_mode = ins_mode_in_row
3200                         process_token(t)
3201                         return
3202                 }
3203                 if (t.type === TYPE_END_TAG && (t.name === 'tbody' || t.name === 'tfoot' || t.name === 'thead')) {
3204                         if (!is_in_table_scope(t.name, NS_HTML)) {
3205                                 parse_error()
3206                                 return
3207                         }
3208                         clear_stack_to_table_body_context()
3209                         open_els.shift()
3210                         ins_mode = ins_mode_in_table
3211                         return
3212                 }
3213                 if ((t.type === TYPE_START_TAG && (t.name === 'caption' || t.name === 'col' || t.name === 'colgroup' || t.name === 'tbody' || t.name === 'tfoot' || t.name === 'thead')) || (t.type === TYPE_END_TAG && t.name === 'table')) {
3214                         has = false
3215                         for (i = 0; i < open_els.length; ++i) {
3216                                 el = open_els[i]
3217                                 if (el.namespace === NS_HTML && (el.name === 'tbody' || el.name === 'tfoot' || el.name === 'thead')) {
3218                                         has = true
3219                                         break
3220                                 }
3221                                 if (table_scopers[el.name] === el.namespace) {
3222                                         break
3223                                 }
3224                         }
3225                         if (!has) {
3226                                 parse_error()
3227                                 return
3228                         }
3229                         clear_stack_to_table_body_context()
3230                         open_els.shift()
3231                         ins_mode = ins_mode_in_table
3232                         process_token(t)
3233                         return
3234                 }
3235                 if (t.type === TYPE_END_TAG && (t.name === 'body' || t.name === 'caption' || t.name === 'col' || t.name === 'colgroup' || t.name === 'html' || t.name === 'td' || t.name === 'th' || t.name === 'tr')) {
3236                         parse_error()
3237                         return
3238                 }
3239                 // Anything else
3240                 ins_mode_in_table(t)
3241         }
3242
3243         // 8.2.5.4.14 http://www.w3.org/TR/html5/syntax.html#parsing-main-intr
3244         ins_mode_in_row = function (t) {
3245                 if (t.type === TYPE_START_TAG && (t.name === 'th' || t.name === 'td')) {
3246                         clear_stack_to_table_row_context()
3247                         insert_html_element(t)
3248                         ins_mode = ins_mode_in_cell
3249                         afe_push_marker()
3250                         return
3251                 }
3252                 if (t.type === TYPE_END_TAG && t.name === 'tr') {
3253                         if (is_in_table_scope('tr', NS_HTML)) {
3254                                 clear_stack_to_table_row_context()
3255                                 open_els.shift()
3256                                 ins_mode = ins_mode_in_table_body
3257                         } else {
3258                                 parse_error()
3259                         }
3260                         return
3261                 }
3262                 if ((t.type === TYPE_START_TAG && (t.name === 'caption' || t.name === 'col' || t.name === 'colgroup' || t.name === 'tbody' || t.name === 'tfoot' || t.name === 'thead' || t.name === 'tr')) || t.type === TYPE_END_TAG && t.name === 'table') {
3263                         if (is_in_table_scope('tr', NS_HTML)) {
3264                                 clear_stack_to_table_row_context()
3265                                 open_els.shift()
3266                                 ins_mode = ins_mode_in_table_body
3267                                 process_token(t)
3268                         } else {
3269                                 parse_error()
3270                         }
3271                         return
3272                 }
3273                 if (t.type === TYPE_END_TAG && (t.name === 'tbody' || t.name === 'tfoot' || t.name === 'thead')) {
3274                         if (is_in_table_scope(t.name, NS_HTML)) {
3275                                 if (is_in_table_scope('tr', NS_HTML)) {
3276                                         clear_stack_to_table_row_context()
3277                                         open_els.shift()
3278                                         ins_mode = ins_mode_in_table_body
3279                                         process_token(t)
3280                                 }
3281                         } else {
3282                                 parse_error()
3283                         }
3284                         return
3285                 }
3286                 if (t.type === TYPE_END_TAG && (t.name === 'body' || t.name === 'caption' || t.name === 'col' || t.name === 'colgroup' || t.name === 'html' || t.name === 'td' || t.name === 'th')) {
3287                         parse_error()
3288                         return
3289                 }
3290                 // Anything else
3291                 ins_mode_in_table(t)
3292         }
3293
3294         // http://www.w3.org/TR/html5/syntax.html#close-the-cell
3295         close_the_cell = function () {
3296                 var el
3297                 generate_implied_end_tags()
3298                 if (!((open_els[0].name === 'td' || open_els[0] === 'th') && open_els[0].namespace === NS_HTML)) {
3299                         parse_error()
3300                 }
3301                 while (true) {
3302                         el = open_els.shift()
3303                         if (el.namespace === NS_HTML && (el.name === 'td' || el.name === 'th')) {
3304                                 break
3305                         }
3306                 }
3307                 clear_afe_to_marker()
3308                 ins_mode = ins_mode_in_row
3309         }
3310
3311         // 8.2.5.4.15 http://www.w3.org/TR/html5/syntax.html#parsing-main-intd
3312         ins_mode_in_cell = function (t) {
3313                 var el, has, i
3314                 if (t.type === TYPE_END_TAG && (t.name === 'td' || t.name === 'th')) {
3315                         if (is_in_table_scope(t.name, NS_HTML)) {
3316                                 generate_implied_end_tags()
3317                                 if (!((open_els[0].name === t.name) && open_els[0].namespace === NS_HTML)) {
3318                                         parse_error()
3319                                 }
3320                                 while (true) {
3321                                         el = open_els.shift()
3322                                         if (el.name === t.name && el.namespace === NS_HTML) {
3323                                                 break
3324                                         }
3325                                 }
3326                                 clear_afe_to_marker()
3327                                 ins_mode = ins_mode_in_row
3328                         } else {
3329                                 parse_error()
3330                         }
3331                         return
3332                 }
3333                 if (t.type === TYPE_START_TAG && (t.name === 'caption' || t.name === 'col' || t.name === 'colgroup' || t.name === 'tbody' || t.name === 'td' || t.name === 'tfoot' || t.name === 'th' || t.name === 'thead' || t.name === 'tr')) {
3334                         has = false
3335                         for (i = 0; i < open_els.length; ++i) {
3336                                 el = open_els[i]
3337                                 if (el.namespace === NS_HTML && (el.name === 'td' || el.name === 'th')) {
3338                                         has = true
3339                                         break
3340                                 }
3341                                 if (table_scopers[el.name] === el.namespace) {
3342                                         break
3343                                 }
3344                         }
3345                         if (!has) {
3346                                 parse_error()
3347                                 return
3348                         }
3349                         close_the_cell()
3350                         process_token(t)
3351                         return
3352                 }
3353                 if (t.type === TYPE_END_TAG && (t.name === 'body' || t.name === 'caption' || t.name === 'col' || t.name === 'colgroup' || t.name === 'html')) {
3354                         parse_error()
3355                         return
3356                 }
3357                 if (t.type === TYPE_END_TAG && (t.name === 'table' || t.name === 'tbody' || t.name === 'tfoot' || t.name === 'thead' || t.name === 'tr')) {
3358                         if (is_in_table_scope(t.name, NS_HTML)) {
3359                                 close_the_cell()
3360                                 process_token(t)
3361                         } else {
3362                                 parse_error()
3363                         }
3364                         return
3365                 }
3366                 // Anything Else
3367                 ins_mode_in_body(t)
3368         }
3369
3370         // 8.2.5.4.16 http://www.w3.org/TR/html5/syntax.html#parsing-main-inselect
3371         ins_mode_in_select = function (t) {
3372                 var el
3373                 if (t.type === TYPE_TEXT && t.text === "\u0000") {
3374                         parse_error()
3375                         return
3376                 }
3377                 if (t.type === TYPE_TEXT) {
3378                         insert_character(t)
3379                         return
3380                 }
3381                 if (t.type === TYPE_COMMENT) {
3382                         insert_comment(t)
3383                         return
3384                 }
3385                 if (t.type === TYPE_DOCTYPE) {
3386                         parse_error()
3387                         return
3388                 }
3389                 if (t.type === TYPE_START_TAG && t.name === 'html') {
3390                         ins_mode_in_body(t)
3391                         return
3392                 }
3393                 if (t.type === TYPE_START_TAG && t.name === 'option') {
3394                         if (open_els[0].name === 'option' && open_els[0].namespace === NS_HTML) {
3395                                 open_els.shift()
3396                         }
3397                         insert_html_element(t)
3398                         return
3399                 }
3400                 if (t.type === TYPE_START_TAG && t.name === 'optgroup') {
3401                         if (open_els[0].name === 'option' && open_els[0].namespace === NS_HTML) {
3402                                 open_els.shift()
3403                         }
3404                         if (open_els[0].name === 'optgroup' && open_els[0].namespace === NS_HTML) {
3405                                 open_els.shift()
3406                         }
3407                         insert_html_element(t)
3408                         return
3409                 }
3410                 if (t.type === TYPE_END_TAG && t.name === 'optgroup') {
3411                         if (open_els[0].name === 'option' && open_els[0].namespace === NS_HTML) {
3412                                 if (open_els[1].name === 'optgroup' && open_els[0].namespace === NS_HTML) {
3413                                         open_els.shift()
3414                                 }
3415                         }
3416                         if (open_els[0].name === 'optgroup' && open_els[0].namespace === NS_HTML) {
3417                                 open_els.shift()
3418                         } else {
3419                                 parse_error()
3420                         }
3421                         return
3422                 }
3423                 if (t.type === TYPE_END_TAG && t.name === 'option') {
3424                         if (open_els[0].name === 'option' && open_els[0].namespace === NS_HTML) {
3425                                 open_els.shift()
3426                         } else {
3427                                 parse_error()
3428                         }
3429                         return
3430                 }
3431                 if (t.type === TYPE_END_TAG && t.name === 'select') {
3432                         if (is_in_select_scope('select', NS_HTML)) {
3433                                 while (true) {
3434                                         el = open_els.shift()
3435                                         if (el.name === 'select' && el.namespace === NS_HTML) {
3436                                                 break
3437                                         }
3438                                 }
3439                                 reset_ins_mode()
3440                         } else {
3441                                 parse_error()
3442                         }
3443                         return
3444                 }
3445                 if (t.type === TYPE_START_TAG && t.name === 'select') {
3446                         parse_error()
3447                         while (true) {
3448                                 el = open_els.shift()
3449                                 if (el.name === 'select' && el.namespace === NS_HTML) {
3450                                         break
3451                                 }
3452                         }
3453                         reset_ins_mode()
3454                         // spec says that this is the same as </select> but it doesn't say
3455                         // to check scope first
3456                         return
3457                 }
3458                 if (t.type === TYPE_START_TAG && (t.name === 'input' || t.name === 'keygen' || t.name === 'textarea')) {
3459                         parse_error()
3460                         if (!is_in_select_scope('select', NS_HTML)) {
3461                                 return
3462                         }
3463                         while (true) {
3464                                 el = open_els.shift()
3465                                 if (el.name === 'select' && el.namespace === NS_HTML) {
3466                                         break
3467                                 }
3468                         }
3469                         reset_ins_mode()
3470                         process_token(t)
3471                         return
3472                 }
3473                 if (t.type === TYPE_START_TAG && (t.name === 'script' || t.name === 'template')) {
3474                         ins_mode_in_head(t)
3475                         return
3476                 }
3477                 if (t.type === TYPE_EOF) {
3478                         ins_mode_in_body(t)
3479                         return
3480                 }
3481                 // Anything else
3482                 parse_error()
3483         }
3484
3485         // 8.2.5.4.17 http://www.w3.org/TR/html5/syntax.html#parsing-main-inselectintable
3486         ins_mode_in_select_in_table = function (t) {
3487                 var el
3488                 if (t.type === TYPE_START_TAG && (t.name === 'caption' || t.name === 'table' || t.name === 'tbody' || t.name === 'tfoot' || t.name === 'thead' || t.name === 'tr' || t.name === 'td' || t.name === 'th')) {
3489                         parse_error()
3490                         while (true) {
3491                                 el = open_els.shift()
3492                                 if (el.name === 'select' && el.namespace === NS_HTML) {
3493                                         break
3494                                 }
3495                         }
3496                         reset_ins_mode()
3497                         process_token(t)
3498                         return
3499                 }
3500                 if (t.type === TYPE_END_TAG && (t.name === 'caption' || t.name === 'table' || t.name === 'tbody' || t.name === 'tfoot' || t.name === 'thead' || t.name === 'tr' || t.name === 'td' || t.name === 'th')) {
3501                         parse_error()
3502                         if (!is_in_table_scope(t.name, NS_HTML)) {
3503                                 return
3504                         }
3505                         while (true) {
3506                                 el = open_els.shift()
3507                                 if (el.name === 'select' && el.namespace === NS_HTML) {
3508                                         break
3509                                 }
3510                         }
3511                         reset_ins_mode()
3512                         process_token(t)
3513                         return
3514                 }
3515                 // Anything else
3516                 ins_mode_in_select(t)
3517         }
3518
3519         // 8.2.5.4.18 http://www.w3.org/TR/html5/syntax.html#parsing-main-intemplate
3520         ins_mode_in_template = function (t) {
3521                 var el
3522                 if (t.type === TYPE_TEXT || t.type === TYPE_COMMENT || t.type === TYPE_DOCTYPE) {
3523                         ins_mode_in_body(t)
3524                         return
3525                 }
3526                 if ((t.type === TYPE_START_TAG && (t.name === 'base' || t.name === 'basefont' || t.name === 'bgsound' || t.name === 'link' || t.name === 'meta' || t.name === 'noframes' || t.name === 'script' || t.name === 'style' || t.name === 'template' || t.name === 'title')) || (t.type === TYPE_END_TAG && t.name === 'template')) {
3527                         ins_mode_in_head(t)
3528                         return
3529                 }
3530                 if (t.type === TYPE_START_TAG && (t.name === 'caption' || t.name === 'colgroup' || t.name === 'tbody' || t.name === 'tfoot' || t.name === 'thead')) {
3531                         template_ins_modes.shift()
3532                         template_ins_modes.unshift(ins_mode_in_table)
3533                         ins_mode = ins_mode_in_table
3534                         process_token(t)
3535                         return
3536                 }
3537                 if (t.type === TYPE_START_TAG && t.name === 'col') {
3538                         template_ins_modes.shift()
3539                         template_ins_modes.unshift(ins_mode_in_column_group)
3540                         ins_mode = ins_mode_in_column_group
3541                         process_token(t)
3542                         return
3543                 }
3544                 if (t.type === TYPE_START_TAG && t.name === 'tr') {
3545                         template_ins_modes.shift()
3546                         template_ins_modes.unshift(ins_mode_in_table_body)
3547                         ins_mode = ins_mode_in_table_body
3548                         process_token(t)
3549                         return
3550                 }
3551                 if (t.type === TYPE_START_TAG && (t.name === 'td' || t.name === 'th')) {
3552                         template_ins_modes.shift()
3553                         template_ins_modes.unshift(ins_mode_in_row)
3554                         ins_mode = ins_mode_in_row
3555                         process_token(t)
3556                         return
3557                 }
3558                 if (t.type === TYPE_START_TAG) {
3559                         template_ins_modes.shift()
3560                         template_ins_modes.unshift(ins_mode_in_body)
3561                         ins_mode = ins_mode_in_body
3562                         process_token(t)
3563                         return
3564                 }
3565                 if (t.type === TYPE_END_TAG) {
3566                         parse_error()
3567                         return
3568                 }
3569                 if (t.type === TYPE_EOF) {
3570                         if (!template_tag_is_open()) {
3571                                 stop_parsing()
3572                                 return
3573                         }
3574                         parse_error()
3575                         while (true) {
3576                                 el = open_els.shift()
3577                                 if (el.name === 'template' && el.namespace === NS_HTML) {
3578                                         break
3579                                 }
3580                         }
3581                         clear_afe_to_marker()
3582                         template_ins_modes.shift()
3583                         reset_ins_mode()
3584                         process_token(t)
3585                 }
3586         }
3587
3588         // 8.2.5.4.19 http://www.w3.org/TR/html5/syntax.html#parsing-main-afterbody
3589         ins_mode_after_body = function (t) {
3590                 var first
3591                 if (is_space_tok(t)) {
3592                         ins_mode_in_body(t)
3593                         return
3594                 }
3595                 if (t.type === TYPE_COMMENT) {
3596                         first = open_els[open_els.length - 1]
3597                         insert_comment(t, [first, first.children.length])
3598                         return
3599                 }
3600                 if (t.type === TYPE_DOCTYPE) {
3601                         parse_error()
3602                         return
3603                 }
3604                 if (t.type === TYPE_START_TAG && t.name === 'html') {
3605                         ins_mode_in_body(t)
3606                         return
3607                 }
3608                 if (t.type === TYPE_END_TAG && t.name === 'html') {
3609                         if (flag_fragment_parsing) {
3610                                 parse_error()
3611                                 return
3612                         }
3613                         ins_mode = ins_mode_after_after_body
3614                         return
3615                 }
3616                 if (t.type === TYPE_EOF) {
3617                         stop_parsing()
3618                         return
3619                 }
3620                 // Anything ELse
3621                 parse_error()
3622                 ins_mode = ins_mode_in_body
3623                 process_token(t)
3624         }
3625
3626         // 8.2.5.4.20 http://www.w3.org/TR/html5/syntax.html#parsing-main-inframeset
3627         ins_mode_in_frameset = function (t) {
3628                 if (is_space_tok(t)) {
3629                         insert_character(t)
3630                         return
3631                 }
3632                 if (t.type === TYPE_COMMENT) {
3633                         insert_comment(t)
3634                         return
3635                 }
3636                 if (t.type === TYPE_DOCTYPE) {
3637                         parse_error()
3638                         return
3639                 }
3640                 if (t.type === TYPE_START_TAG && t.name === 'html') {
3641                         ins_mode_in_body(t)
3642                         return
3643                 }
3644                 if (t.type === TYPE_START_TAG && t.name === 'frameset') {
3645                         insert_html_element(t)
3646                         return
3647                 }
3648                 if (t.type === TYPE_END_TAG && t.name === 'frameset') {
3649                         if (open_els.length === 1) {
3650                                 parse_error()
3651                                 return // fragment case
3652                         }
3653                         open_els.shift()
3654                         if (flag_fragment_parsing === false && open_els[0].name !== 'frameset') {
3655                                 ins_mode = ins_mode_after_frameset
3656                         }
3657                         return
3658                 }
3659                 if (t.type === TYPE_START_TAG && t.name === 'frame') {
3660                         insert_html_element(t)
3661                         open_els.shift()
3662                         t.acknowledge_self_closing()
3663                         return
3664                 }
3665                 if (t.type === TYPE_START_TAG && t.name === 'noframes') {
3666                         ins_mode_in_head(t)
3667                         return
3668                 }
3669                 if (t.type === TYPE_EOF) {
3670                         if (open_els.length !== 1) {
3671                                 parse_error()
3672                         }
3673                         stop_parsing()
3674                         return
3675                 }
3676                 // Anything else
3677                 parse_error()
3678         }
3679
3680         // 8.2.5.4.21 http://www.w3.org/TR/html5/syntax.html#parsing-main-afterframeset
3681         ins_mode_after_frameset = function (t) {
3682                 if (is_space_tok(t)) {
3683                         insert_character(t)
3684                         return
3685                 }
3686                 if (t.type === TYPE_COMMENT) {
3687                         insert_comment(t)
3688                         return
3689                 }
3690                 if (t.type === TYPE_DOCTYPE) {
3691                         parse_error()
3692                         return
3693                 }
3694                 if (t.type === TYPE_START_TAG && t.name === 'html') {
3695                         ins_mode_in_body(t)
3696                         return
3697                 }
3698                 if (t.type === TYPE_END_TAG && t.name === 'html') {
3699                         ins_mode = ins_mode_after_after_frameset
3700                         return
3701                 }
3702                 if (t.type === TYPE_START_TAG && t.name === 'noframes') {
3703                         ins_mode_in_head(t)
3704                         return
3705                 }
3706                 if (t.type === TYPE_EOF) {
3707                         stop_parsing()
3708                         return
3709                 }
3710                 // Anything else
3711                 parse_error()
3712         }
3713
3714         // 8.2.5.4.22 http://www.w3.org/TR/html5/syntax.html#the-after-after-body-insertion-mode
3715         ins_mode_after_after_body = function (t) {
3716                 if (t.type === TYPE_COMMENT) {
3717                         insert_comment(t, [doc, doc.children.length])
3718                         return
3719                 }
3720                 if (t.type === TYPE_DOCTYPE || is_space_tok(t) || (t.type === TYPE_START_TAG && t.name === 'html')) {
3721                         ins_mode_in_body(t)
3722                         return
3723                 }
3724                 if (t.type === TYPE_EOF) {
3725                         stop_parsing()
3726                         return
3727                 }
3728                 // Anything else
3729                 parse_error()
3730                 ins_mode = ins_mode_in_body
3731                 process_token(t)
3732         }
3733
3734         // 8.2.5.4.23 http://www.w3.org/TR/html5/syntax.html#the-after-after-frameset-insertion-mode
3735         ins_mode_after_after_frameset = function (t) {
3736                 if (t.type === TYPE_COMMENT) {
3737                         insert_comment(t, [doc, doc.children.length])
3738                         return
3739                 }
3740                 if (t.type === TYPE_DOCTYPE || is_space_tok(t) || (t.type === TYPE_START_TAG && t.name === 'html')) {
3741                         ins_mode_in_body(t)
3742                         return
3743                 }
3744                 if (t.type === TYPE_EOF) {
3745                         stop_parsing()
3746                         return
3747                 }
3748                 if (t.type === TYPE_START_TAG && t.name === 'noframes') {
3749                         ins_mode_in_head(t)
3750                         return
3751                 }
3752                 // Anything else
3753                 parse_error()
3754                 return
3755         }
3756
3757         // 8.2.5.5 http://www.w3.org/TR/html5/syntax.html#parsing-main-inforeign
3758         has_color_face_or_size = function (t) {
3759                 var a, i
3760                 for (i = 0; i < t.attrs_a.length; ++i) {
3761                         a = t.attrs_a[i]
3762                         if (a[0] === 'color' || a[0] === 'face' || a[0] === 'size') {
3763                                 return true
3764                         }
3765                 }
3766                 return false
3767         }
3768         in_foreign_content_end_script = function () {
3769                 open_els.shift()
3770                 // fixfull
3771         }
3772         in_foreign_content_other_start = function (t) {
3773                 var acn
3774                 acn = adjusted_current_node()
3775                 if (acn.namespace === NS_MATHML) {
3776                         adjust_mathml_attributes(t)
3777                 }
3778                 if (acn.namespace === NS_SVG && (svg_name_fixes[t.name] != null)) { // extra perens because Coffeescript did
3779                         t.name = svg_name_fixes[t.name]
3780                 }
3781                 if (acn.namespace === NS_SVG) {
3782                         adjust_svg_attributes(t)
3783                 }
3784                 adjust_foreign_attributes(t)
3785                 insert_foreign_element(t, acn.namespace)
3786                 if (t.flag('self-closing')) {
3787                         if (t.name === 'script') {
3788                                 t.acknowledge_self_closing()
3789                                 in_foreign_content_end_script()
3790                                 // fixfull
3791                         } else {
3792                                 open_els.shift()
3793                                 t.acknowledge_self_closing()
3794                         }
3795                 }
3796         }
3797         in_foreign_content = function (t) {
3798                 var el, i, node
3799                 if (t.type === TYPE_TEXT && t.text === "\u0000") {
3800                         parse_error()
3801                         insert_character(new_character_token("\ufffd"))
3802                         return
3803                 }
3804                 if (is_space_tok(t)) {
3805                         insert_character(t)
3806                         return
3807                 }
3808                 if (t.type === TYPE_TEXT) {
3809                         flag_frameset_ok = false
3810                         insert_character(t)
3811                         return
3812                 }
3813                 if (t.type === TYPE_COMMENT) {
3814                         insert_comment(t)
3815                         return
3816                 }
3817                 if (t.type === TYPE_DOCTYPE) {
3818                         parse_error()
3819                         return
3820                 }
3821                 if (t.type === TYPE_START_TAG && (t.name === 'b' || t.name === 'big' || t.name === 'blockquote' || t.name === 'body' || t.name === 'br' || t.name === 'center' || t.name === 'code' || t.name === 'dd' || t.name === 'div' || t.name === 'dl' || t.name === 'dt' || t.name === 'em' || t.name === 'embed' || t.name === 'h1' || t.name === 'h2' || t.name === 'h3' || t.name === 'h4' || t.name === 'h5' || t.name === 'h6' || t.name === 'head' || t.name === 'hr' || t.name === 'i' || t.name === 'img' || t.name === 'li' || t.name === 'listing' || t.name === 'main' || t.name === 'meta' || t.name === 'nobr' || t.name === 'ol' || t.name === 'p' || t.name === 'pre' || t.name === 'ruby' || t.name === 's' || t.name === 'small' || t.name === 'span' || t.name === 'strong' || t.name === 'strike' || t.name === 'sub' || t.name === 'sup' || t.name === 'table' || t.name === 'tt' || t.name === 'u' || t.name === 'ul' || t.name === 'var' || (t.name === 'font' && has_color_face_or_size(t)))) {
3822                         parse_error()
3823                         if (flag_fragment_parsing) {
3824                                 in_foreign_content_other_start(t)
3825                                 return
3826                         }
3827                         while (true) { // is this safe?
3828                                 open_els.shift()
3829                                 if (is_mathml_text_integration_point(open_els[0]) || is_html_integration(open_els[0]) || open_els[0].namespace === NS_HTML) {
3830                                         break
3831                                 }
3832                         }
3833                         process_token(t)
3834                         return
3835                 }
3836                 if (t.type === TYPE_START_TAG) {
3837                         in_foreign_content_other_start(t)
3838                         return
3839                 }
3840                 if (t.type === TYPE_END_TAG && t.name === 'script' && open_els[0].name === 'script' && open_els[0].namespace === NS_SVG) {
3841                         in_foreign_content_end_script()
3842                         return
3843                 }
3844                 if (t.type === TYPE_END_TAG) {
3845                         i = 0
3846                         node = open_els[i]
3847                         if (node.name.toLowerCase() !== t.name) {
3848                                 parse_error()
3849                         }
3850                         while (true) {
3851                                 if (node === open_els[open_els.length - 1]) {
3852                                         return
3853                                 }
3854                                 if (node.name.toLowerCase() === t.name) {
3855                                         while (true) {
3856                                                 el = open_els.shift()
3857                                                 if (el === node) {
3858                                                         return
3859                                                 }
3860                                         }
3861                                 }
3862                                 i += 1
3863                                 node = open_els[i]
3864                                 if (node.namespace === NS_HTML) {
3865                                         break
3866                                 }
3867                         }
3868                         ins_mode(t) // explicitly call HTML insertion mode
3869                 }
3870         }
3871
3872
3873         // 8.2.4.1 http://www.w3.org/TR/html5/syntax.html#data-state
3874         tok_state_data = function () {
3875                 var c
3876                 switch (c = txt.charAt(cur++)) {
3877                         case '&':
3878                                 return new_text_node(parse_character_reference())
3879                         break
3880                         case '<':
3881                                 tok_state = tok_state_tag_open
3882                         break
3883                         case "\u0000":
3884                                 parse_error()
3885                                 return new_text_node(c)
3886                         break
3887                         case '': // EOF
3888                                 return new_eof_token()
3889                         break
3890                         default:
3891                                 return new_text_node(c)
3892                 }
3893                 return null
3894         }
3895
3896         // 8.2.4.2 http://www.w3.org/TR/html5/syntax.html#character-reference-in-data-state
3897         // not needed: tok_state_character_reference_in_data = function () {
3898         // just call parse_character_reference()
3899
3900         // 8.2.4.3 http://www.w3.org/TR/html5/syntax.html#rcdata-state
3901         tok_state_rcdata = function () {
3902                 var c
3903                 switch (c = txt.charAt(cur++)) {
3904                         case '&':
3905                                 return new_text_node(parse_character_reference())
3906                         break
3907                         case '<':
3908                                 tok_state = tok_state_rcdata_less_than_sign
3909                         break
3910                         case "\u0000":
3911                                 parse_error()
3912                                 return new_character_token("\ufffd")
3913                         break
3914                         case '': // EOF
3915                                 return new_eof_token()
3916                         break
3917                         default:
3918                                 return new_character_token(c)
3919                 }
3920                 return null
3921         }
3922
3923         // 8.2.4.4 http://www.w3.org/TR/html5/syntax.html#character-reference-in-rcdata-state
3924         // not needed: tok_state_character_reference_in_rcdata = function () {
3925         // just call parse_character_reference()
3926
3927         // 8.2.4.5 http://www.w3.org/TR/html5/syntax.html#rawtext-state
3928         tok_state_rawtext = function () {
3929                 var c
3930                 switch (c = txt.charAt(cur++)) {
3931                         case '<':
3932                                 tok_state = tok_state_rawtext_less_than_sign
3933                         break
3934                         case "\u0000":
3935                                 parse_error()
3936                                 return new_character_token("\ufffd")
3937                         break
3938                         case '': // EOF
3939                                 return new_eof_token()
3940                         break
3941                         default:
3942                                 return new_character_token(c)
3943                 }
3944                 return null
3945         }
3946
3947         // 8.2.4.6 http://www.w3.org/TR/html5/syntax.html#script-data-state
3948         tok_state_script_data = function () {
3949                 var c
3950                 switch (c = txt.charAt(cur++)) {
3951                         case '<':
3952                                 tok_state = tok_state_script_data_less_than_sign
3953                         break
3954                         case "\u0000":
3955                                 parse_error()
3956                                 return new_character_token("\ufffd")
3957                         break
3958                         case '': // EOF
3959                                 return new_eof_token()
3960                         break
3961                         default:
3962                                 return new_character_token(c)
3963                 }
3964                 return null
3965         }
3966
3967         // 8.2.4.7 http://www.w3.org/TR/html5/syntax.html#plaintext-state
3968         tok_state_plaintext = function () {
3969                 var c
3970                 switch (c = txt.charAt(cur++)) {
3971                         case "\u0000":
3972                                 parse_error()
3973                                 return new_character_token("\ufffd")
3974                         break
3975                         case '': // EOF
3976                                 return new_eof_token()
3977                         break
3978                         default:
3979                                 return new_character_token(c)
3980                 }
3981                 return null
3982         }
3983
3984         // 8.2.4.8 http://www.w3.org/TR/html5/syntax.html#tag-open-state
3985         tok_state_tag_open = function () {
3986                 var c
3987                 c = txt.charAt(cur++)
3988                 if (c === '!') {
3989                         tok_state = tok_state_markup_declaration_open
3990                         return
3991                 }
3992                 if (c === '/') {
3993                         tok_state = tok_state_end_tag_open
3994                         return
3995                 }
3996                 if (is_uc_alpha(c)) {
3997                         tok_cur_tag = new_open_tag(c.toLowerCase())
3998                         tok_state = tok_state_tag_name
3999                         return
4000                 }
4001                 if (is_lc_alpha(c)) {
4002                         tok_cur_tag = new_open_tag(c)
4003                         tok_state = tok_state_tag_name
4004                         return
4005                 }
4006                 if (c === '?') {
4007                         parse_error()
4008                         tok_cur_tag = new_comment_token('?') // FIXME right?
4009                         tok_state = tok_state_bogus_comment
4010                         return
4011                 }
4012                 // Anything else
4013                 parse_error()
4014                 tok_state = tok_state_data
4015                 cur -= 1 // we didn't parse/handle the char after <
4016                 return new_text_node('<')
4017         }
4018
4019         // 8.2.4.9 http://www.w3.org/TR/html5/syntax.html#end-tag-open-state
4020         tok_state_end_tag_open = function () {
4021                 var c
4022                 c = txt.charAt(cur++)
4023                 if (is_uc_alpha(c)) {
4024                         tok_cur_tag = new_end_tag(c.toLowerCase())
4025                         tok_state = tok_state_tag_name
4026                         return
4027                 }
4028                 if (is_lc_alpha(c)) {
4029                         tok_cur_tag = new_end_tag(c)
4030                         tok_state = tok_state_tag_name
4031                         return
4032                 }
4033                 if (c === '>') {
4034                         parse_error()
4035                         tok_state = tok_state_data
4036                         return
4037                 }
4038                 if (c === '') { // EOF
4039                         parse_error()
4040                         tok_state = tok_state_data
4041                         return new_text_node('</')
4042                 }
4043                 // Anything else
4044                 parse_error()
4045                 tok_cur_tag = new_comment_token(c)
4046                 tok_state = tok_state_bogus_comment
4047                 return null
4048         }
4049
4050         // 8.2.4.10 http://www.w3.org/TR/html5/syntax.html#tag-name-state
4051         tok_state_tag_name = function () {
4052                 var c, tmp
4053                 switch (c = txt.charAt(cur++)) {
4054                         case "\t":
4055                         case "\n":
4056                         case "\u000c":
4057                         case ' ':
4058                                 tok_state = tok_state_before_attribute_name
4059                         break
4060                         case '/':
4061                                 tok_state = tok_state_self_closing_start_tag
4062                         break
4063                         case '>':
4064                                 tok_state = tok_state_data
4065                                 tmp = tok_cur_tag
4066                                 tok_cur_tag = null
4067                                 return tmp
4068                         break
4069                         case "\u0000":
4070                                 parse_error()
4071                                 tok_cur_tag.name += "\ufffd"
4072                         break
4073                         case '': // EOF
4074                                 parse_error()
4075                                 tok_state = tok_state_data
4076                         break
4077                         default:
4078                                 if (is_uc_alpha(c)) {
4079                                         tok_cur_tag.name += c.toLowerCase()
4080                                 } else {
4081                                         tok_cur_tag.name += c
4082                                 }
4083                 }
4084                 return null
4085         }
4086
4087         // 8.2.4.11 http://www.w3.org/TR/html5/syntax.html#rcdata-less-than-sign-state
4088         tok_state_rcdata_less_than_sign = function () {
4089                 var c
4090                 c = txt.charAt(cur++)
4091                 if (c === '/') {
4092                         temporary_buffer = ''
4093                         tok_state = tok_state_rcdata_end_tag_open
4094                         return null
4095                 }
4096                 // Anything else
4097                 tok_state = tok_state_rcdata
4098                 cur -= 1 // reconsume the input character
4099                 return new_character_token('<')
4100         }
4101
4102         // 8.2.4.12 http://www.w3.org/TR/html5/syntax.html#rcdata-end-tag-open-state
4103         tok_state_rcdata_end_tag_open = function () {
4104                 var c
4105                 c = txt.charAt(cur++)
4106                 if (is_uc_alpha(c)) {
4107                         tok_cur_tag = new_end_tag(c.toLowerCase())
4108                         temporary_buffer += c
4109                         tok_state = tok_state_rcdata_end_tag_name
4110                         return null
4111                 }
4112                 if (is_lc_alpha(c)) {
4113                         tok_cur_tag = new_end_tag(c)
4114                         temporary_buffer += c
4115                         tok_state = tok_state_rcdata_end_tag_name
4116                         return null
4117                 }
4118                 // Anything else
4119                 tok_state = tok_state_rcdata
4120                 cur -= 1 // reconsume the input character
4121                 return new_character_token("</") // fixfull separate these
4122         }
4123
4124         // http://www.w3.org/TR/html5/syntax.html#appropriate-end-tag-token
4125         is_appropriate_end_tag = function (t) {
4126                 // fixfull: this assumes that open_els[0].name is "the tag name of the last
4127                 // start tag to have been emitted from this tokenizer"
4128                 return t.type === TYPE_END_TAG && t.name === open_els[0].name
4129         }
4130
4131         // 8.2.4.13 http://www.w3.org/TR/html5/syntax.html#rcdata-end-tag-name-state
4132         tok_state_rcdata_end_tag_name = function () {
4133                 var c
4134                 c = txt.charAt(cur++)
4135                 if (c === "\t" || c === "\n" || c === "\u000c" || c === ' ') {
4136                         if (is_appropriate_end_tag(tok_cur_tag)) {
4137                                 tok_state = tok_state_before_attribute_name
4138                                 return
4139                         }
4140                         // else fall through to "Anything else"
4141                 }
4142                 if (c === '/') {
4143                         if (is_appropriate_end_tag(tok_cur_tag)) {
4144                                 tok_state = tok_state_self_closing_start_tag // FIXME spec typo?
4145                                 return
4146                         }
4147                         // else fall through to "Anything else"
4148                 }
4149                 if (c === '>') {
4150                         if (is_appropriate_end_tag(tok_cur_tag)) {
4151                                 tok_state = tok_state_data
4152                                 return tok_cur_tag
4153                         }
4154                         // else fall through to "Anything else"
4155                 }
4156                 if (is_uc_alpha(c)) {
4157                         tok_cur_tag.name += c.toLowerCase()
4158                         temporary_buffer += c
4159                         return null
4160                 }
4161                 if (is_lc_alpha(c)) {
4162                         tok_cur_tag.name += c
4163                         temporary_buffer += c
4164                         return null
4165                 }
4166                 // Anything else
4167                 tok_state = tok_state_rcdata
4168                 cur -= 1 // reconsume the input character
4169                 return new_character_token('</' + temporary_buffer) // fixfull separate these
4170         }
4171
4172         // 8.2.4.14 http://www.w3.org/TR/html5/syntax.html#rawtext-less-than-sign-state
4173         tok_state_rawtext_less_than_sign = function () {
4174                 var c
4175                 c = txt.charAt(cur++)
4176                 if (c === '/') {
4177                         temporary_buffer = ''
4178                         tok_state = tok_state_rawtext_end_tag_open
4179                         return null
4180                 }
4181                 // Anything else
4182                 tok_state = tok_state_rawtext
4183                 cur -= 1 // reconsume the input character
4184                 return new_character_token('<')
4185         }
4186
4187         // 8.2.4.15 http://www.w3.org/TR/html5/syntax.html#rawtext-end-tag-open-state
4188         tok_state_rawtext_end_tag_open = function () {
4189                 c = txt.charAt(cur++)
4190                 if (is_uc_alpha(c)) {
4191                         tok_cur_tag = new_end_tag(c.toLowerCase())
4192                         temporary_buffer += c
4193                         tok_state = tok_state_rawtext_end_tag_name
4194                         return null
4195                 }
4196                 if (is_lc_alpha(c)) {
4197                         tok_cur_tag = new_end_tag(c)
4198                         temporary_buffer += c
4199                         tok_state = tok_state_rawtext_end_tag_name
4200                         return null
4201                 }
4202                 // Anything else
4203                 tok_state = tok_state_rawtext
4204                 cur -= 1 // reconsume the input character
4205                 return new_character_token("</") // fixfull separate these
4206         }
4207
4208         // 8.2.4.16 http://www.w3.org/TR/html5/syntax.html#rawtext-end-tag-name-state
4209         tok_state_rawtext_end_tag_name = function () {
4210                 var c
4211                 c = txt.charAt(cur++)
4212                 if (c === "\t" || c === "\n" || c === "\u000c" || c === ' ') {
4213                         if (is_appropriate_end_tag(tok_cur_tag)) {
4214                                 tok_state = tok_state_before_attribute_name
4215                                 return
4216                         }
4217                         // else fall through to "Anything else"
4218                 }
4219                 if (c === '/') {
4220                         if (is_appropriate_end_tag(tok_cur_tag)) {
4221                                 tok_state = tok_state_self_closing_start_tag
4222                                 return
4223                         }
4224                         // else fall through to "Anything else"
4225                 }
4226                 if (c === '>') {
4227                         if (is_appropriate_end_tag(tok_cur_tag)) {
4228                                 tok_state = tok_state_data
4229                                 return tok_cur_tag
4230                         }
4231                         // else fall through to "Anything else"
4232                 }
4233                 if (is_uc_alpha(c)) {
4234                         tok_cur_tag.name += c.toLowerCase()
4235                         temporary_buffer += c
4236                         return null
4237                 }
4238                 if (is_lc_alpha(c)) {
4239                         tok_cur_tag.name += c
4240                         temporary_buffer += c
4241                         return null
4242                 }
4243                 // Anything else
4244                 tok_state = tok_state_rawtext
4245                 cur -= 1 // reconsume the input character
4246                 return new_character_token('</' + temporary_buffer) // fixfull separate these
4247         }
4248
4249         // 8.2.4.17 http://www.w3.org/TR/html5/syntax.html#script-data-less-than-sign-state
4250         tok_state_script_data_less_than_sign = function () {
4251                 var c
4252                 c = txt.charAt(cur++)
4253                 if (c === '/') {
4254                         temporary_buffer = ''
4255                         tok_state = tok_state_script_data_end_tag_open
4256                         return
4257                 }
4258                 if (c === '!') {
4259                         tok_state = tok_state_script_data_escape_start
4260                         return new_character_token('<!') // fixfull split
4261                 }
4262                 // Anything else
4263                 tok_state = tok_state_script_data
4264                 cur -= 1 // reconsume
4265                 return new_character_token('<')
4266         }
4267
4268         // 8.2.4.18 http://www.w3.org/TR/html5/syntax.html#script-data-end-tag-open-state
4269         tok_state_script_data_end_tag_open = function () {
4270                 var c
4271                 c = txt.charAt(cur++)
4272                 if (is_uc_alpha(c)) {
4273                         tok_cur_tag = new_end_tag(c.toLowerCase())
4274                         temporary_buffer += c
4275                         tok_state = tok_state_script_data_end_tag_name
4276                         return
4277                 }
4278                 if (is_lc_alpha(c)) {
4279                         tok_cur_tag = new_end_tag(c)
4280                         temporary_buffer += c
4281                         tok_state = tok_state_script_data_end_tag_name
4282                         return
4283                 }
4284                 // Anything else
4285                 tok_state = tok_state_script_data
4286                 cur -= 1 // reconsume
4287                 return new_character_token('</')
4288         }
4289
4290         // 8.2.4.19 http://www.w3.org/TR/html5/syntax.html#script-data-end-tag-open-state
4291         tok_state_script_data_end_tag_name = function () {
4292                 var c
4293                 c = txt.charAt(cur++)
4294                 if (c === "\t" || c === "\n" || c === "\u000c" || c === ' ') {
4295                         if (is_appropriate_end_tag(tok_cur_tag)) {
4296                                 tok_state = tok_state_before_attribute_name
4297                                 return
4298                         }
4299                         // fall through
4300                 }
4301                 if (c === '/') {
4302                         if (is_appropriate_end_tag(tok_cur_tag)) {
4303                                 tok_state = tok_state_self_closing_start_tag
4304                                 return
4305                         }
4306                         // fall through
4307                 }
4308                 if (c === '>') {
4309                         if (is_appropriate_end_tag(tok_cur_tag)) {
4310                                 tok_state = tok_state_data
4311                                 return tok_cur_tag
4312                         }
4313                         // fall through
4314                 }
4315                 if (is_uc_alpha(c)) {
4316                         tok_cur_tag.name += c.toLowerCase()
4317                         temporary_buffer += c
4318                         return
4319                 }
4320                 if (is_lc_alpha(c)) {
4321                         tok_cur_tag.name += c
4322                         temporary_buffer += c
4323                         return
4324                 }
4325                 // Anything else
4326                 tok_state = tok_state_script_data
4327                 cur -= 1 // reconsume
4328                 return new_character_token("</" + temporary_buffer) // fixfull split
4329         }
4330
4331         // 8.2.4.20 http://www.w3.org/TR/html5/syntax.html#script-data-escape-start-state
4332         tok_state_script_data_escape_start = function () {
4333                 var c
4334                 c = txt.charAt(cur++)
4335                 if (c === '-') {
4336                         tok_state = tok_state_script_data_escape_start_dash
4337                         return new_character_token('-')
4338                 }
4339                 // Anything else
4340                 tok_state = tok_state_script_data
4341                 cur -= 1 // reconsume
4342         }
4343
4344         // 8.2.4.21 http://www.w3.org/TR/html5/syntax.html#script-data-escape-start-dash-state
4345         tok_state_script_data_escape_start_dash = function () {
4346                 var c
4347                 c = txt.charAt(cur++)
4348                 if (c === '-') {
4349                         tok_state = tok_state_script_data_escaped_dash_dash
4350                         return new_character_token('-')
4351                 }
4352                 // Anything else
4353                 tok_state = tok_state_script_data
4354                 cur -= 1 // reconsume
4355         }
4356
4357         // 8.2.4.22 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-state
4358         tok_state_script_data_escaped = function () {
4359                 var c
4360                 c = txt.charAt(cur++)
4361                 if (c === '-') {
4362                         tok_state = tok_state_script_data_escaped_dash
4363                         return new_character_token('-')
4364                 }
4365                 if (c === '<') {
4366                         tok_state = tok_state_script_data_escaped_less_than_sign
4367                         return
4368                 }
4369                 if (c === "\u0000") {
4370                         parse_error()
4371                         return new_character_token("\ufffd")
4372                 }
4373                 if (c === '') { // EOF
4374                         tok_state = tok_state_data
4375                         parse_error()
4376                         cur -= 1 // reconsume
4377                         return
4378                 }
4379                 // Anything else
4380                 return new_character_token(c)
4381         }
4382
4383         // 8.2.4.23 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-dash-state
4384         tok_state_script_data_escaped_dash = function () {
4385                 var c
4386                 c = txt.charAt(cur++)
4387                 if (c === '-') {
4388                         tok_state = tok_state_script_data_escaped_dash_dash
4389                         return new_character_token('-')
4390                 }
4391                 if (c === '<') {
4392                         tok_state = tok_state_script_data_escaped_less_than_sign
4393                         return
4394                 }
4395                 if (c === "\u0000") {
4396                         parse_error()
4397                         tok_state = tok_state_script_data_escaped
4398                         return new_character_token("\ufffd")
4399                 }
4400                 if (c === '') { // EOF
4401                         tok_state = tok_state_data
4402                         parse_error()
4403                         cur -= 1 // reconsume
4404                         return
4405                 }
4406                 // Anything else
4407                 tok_state = tok_state_script_data_escaped
4408                 return new_character_token(c)
4409         }
4410
4411         // 8.2.4.24 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-dash-dash-state
4412         tok_state_script_data_escaped_dash_dash = function () {
4413                 var c
4414                 c = txt.charAt(cur++)
4415                 if (c === '-') {
4416                         return new_character_token('-')
4417                 }
4418                 if (c === '<') {
4419                         tok_state = tok_state_script_data_escaped_less_than_sign
4420                         return
4421                 }
4422                 if (c === '>') {
4423                         tok_state = tok_state_script_data
4424                         return new_character_token('>')
4425                 }
4426                 if (c === "\u0000") {
4427                         parse_error()
4428                         tok_state = tok_state_script_data_escaped
4429                         return new_character_token("\ufffd")
4430                 }
4431                 if (c === '') { // EOF
4432                         parse_error()
4433                         tok_state = tok_state_data
4434                         cur -= 1 // reconsume
4435                         return
4436                 }
4437                 // Anything else
4438                 tok_state = tok_state_script_data_escaped
4439                 return new_character_token(c)
4440         }
4441
4442         // 8.2.4.25 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-less-than-sign-state
4443         tok_state_script_data_escaped_less_than_sign = function () {
4444                 var c
4445                 c = txt.charAt(cur++)
4446                 if (c === '/') {
4447                         temporary_buffer = ''
4448                         tok_state = tok_state_script_data_escaped_end_tag_open
4449                         return
4450                 }
4451                 if (is_uc_alpha(c)) {
4452                         temporary_buffer = c.toLowerCase() // yes, really
4453                         tok_state = tok_state_script_data_double_escape_start
4454                         return new_character_token("<" + c) // fixfull split
4455                 }
4456                 if (is_lc_alpha(c)) {
4457                         temporary_buffer = c
4458                         tok_state = tok_state_script_data_double_escape_start
4459                         return new_character_token("<" + c) // fixfull split
4460                 }
4461                 // Anything else
4462                 tok_state = tok_state_script_data_escaped
4463                 cur -= 1 // reconsume
4464                 return new_character_token('<')
4465         }
4466
4467         // 8.2.4.26 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-end-tag-open-state
4468         tok_state_script_data_escaped_end_tag_open = function () {
4469                 var c
4470                 c = txt.charAt(cur++)
4471                 if (is_uc_alpha(c)) {
4472                         tok_cur_tag = new_end_tag(c.toLowerCase())
4473                         temporary_buffer += c
4474                         tok_state = tok_state_script_data_escaped_end_tag_name
4475                         return
4476                 }
4477                 if (is_lc_alpha(c)) {
4478                         tok_cur_tag = new_end_tag(c)
4479                         temporary_buffer += c
4480                         tok_state = tok_state_script_data_escaped_end_tag_name
4481                         return
4482                 }
4483                 // Anything else
4484                 tok_state = tok_state_script_data_escaped
4485                 cur -= 1 // reconsume
4486                 return new_character_token('</') // fixfull split
4487         }
4488
4489         // 8.2.4.27 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-end-tag-name-state
4490         tok_state_script_data_escaped_end_tag_name = function () {
4491                 var c
4492                 c = txt.charAt(cur++)
4493                 if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ') {
4494                         if (is_appropriate_end_tag(tok_cur_tag)) {
4495                                 tok_state = tok_state_before_attribute_name
4496                                 return
4497                         }
4498                         // fall through
4499                 }
4500                 if (c === '/') {
4501                         if (is_appropriate_end_tag(tok_cur_tag)) {
4502                                 tok_state = tok_state_self_closing_start_tag
4503                                 return
4504                         }
4505                         // fall through
4506                 }
4507                 if (c === '>') {
4508                         if (is_appropriate_end_tag(tok_cur_tag)) {
4509                                 tok_state = tok_state_data
4510                                 return tok_cur_tag
4511                         }
4512                         // fall through
4513                 }
4514                 if (is_uc_alpha(c)) {
4515                         tok_cur_tag.name += c.toLowerCase()
4516                         temporary_buffer += c.toLowerCase()
4517                         return
4518                 }
4519                 if (is_lc_alpha(c)) {
4520                         tok_cur_tag.name += c
4521                         temporary_buffer += c.toLowerCase()
4522                         return
4523                 }
4524                 // Anything else
4525                 tok_state = tok_state_script_data_escaped
4526                 cur -= 1 // reconsume
4527                 return new_character_token("</" + temporary_buffer) // fixfull split
4528         }
4529
4530         // 8.2.4.28 http://www.w3.org/TR/html5/syntax.html#script-data-double-escape-start-state
4531         tok_state_script_data_double_escape_start = function () {
4532                 var c
4533                 c = txt.charAt(cur++)
4534                 if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ' || c === '/' || c === '>') {
4535                         if (temporary_buffer === 'script') {
4536                                 tok_state = tok_state_script_data_double_escaped
4537                         } else {
4538                                 tok_state = tok_state_script_data_escaped
4539                         }
4540                         return new_character_token(c)
4541                 }
4542                 if (is_uc_alpha(c)) {
4543                         temporary_buffer += c.toLowerCase() // yes, really lowercase
4544                         return new_character_token(c)
4545                 }
4546                 if (is_lc_alpha(c)) {
4547                         temporary_buffer += c
4548                         return new_character_token(c)
4549                 }
4550                 // Anything else
4551                 tok_state = tok_state_script_data_escaped
4552                 cur -= 1 // reconsume
4553         }
4554
4555         // 8.2.4.29 http://www.w3.org/TR/html5/syntax.html#script-data-double-escaped-state
4556         tok_state_script_data_double_escaped = function () {
4557                 var c
4558                 c = txt.charAt(cur++)
4559                 if (c === '-') {
4560                         tok_state = tok_state_script_data_double_escaped_dash
4561                         return new_character_token('-')
4562                 }
4563                 if (c === '<') {
4564                         tok_state = tok_state_script_data_double_escaped_less_than_sign
4565                         return new_character_token('<')
4566                 }
4567                 if (c === "\u0000") {
4568                         parse_error()
4569                         return new_character_token("\ufffd")
4570                 }
4571                 if (c === '') { // EOF
4572                         parse_error()
4573                         tok_state = tok_state_data
4574                         cur -= 1 // reconsume
4575                         return
4576                 }
4577                 // Anything else
4578                 return new_character_token(c)
4579         }
4580
4581         // 8.2.4.30 http://www.w3.org/TR/html5/syntax.html#script-data-double-escaped-dash-state
4582         tok_state_script_data_double_escaped_dash = function () {
4583                 var c
4584                 c = txt.charAt(cur++)
4585                 if (c === '-') {
4586                         tok_state = tok_state_script_data_double_escaped_dash_dash
4587                         return new_character_token('-')
4588                 }
4589                 if (c === '<') {
4590                         tok_state = tok_state_script_data_double_escaped_less_than_sign
4591                         return new_character_token('<')
4592                 }
4593                 if (c === "\u0000") {
4594                         parse_error()
4595                         tok_state = tok_state_script_data_double_escaped
4596                         return new_character_token("\ufffd")
4597                 }
4598                 if (c === '') { // EOF
4599                         parse_error()
4600                         tok_state = tok_state_data
4601                         cur -= 1 // reconsume
4602                         return
4603                 }
4604                 // Anything else
4605                 tok_state = tok_state_script_data_double_escaped
4606                 return new_character_token(c)
4607         }
4608
4609         // 8.2.4.31 http://www.w3.org/TR/html5/syntax.html#script-data-double-escaped-dash-dash-state
4610         tok_state_script_data_double_escaped_dash_dash = function () {
4611                 var c
4612                 c = txt.charAt(cur++)
4613                 if (c === '-') {
4614                         return new_character_token('-')
4615                 }
4616                 if (c === '<') {
4617                         tok_state = tok_state_script_data_double_escaped_less_than_sign
4618                         return new_character_token('<')
4619                 }
4620                 if (c === '>') {
4621                         tok_state = tok_state_script_data
4622                         return new_character_token('>')
4623                 }
4624                 if (c === "\u0000") {
4625                         parse_error()
4626                         tok_state = tok_state_script_data_double_escaped
4627                         return new_character_token("\ufffd")
4628                 }
4629                 if (c === '') { // EOF
4630                         parse_error()
4631                         tok_state = tok_state_data
4632                         cur -= 1 // reconsume
4633                         return
4634                 }
4635                 // Anything else
4636                 tok_state = tok_state_script_data_double_escaped
4637                 return new_character_token(c)
4638         }
4639
4640         // 8.2.4.32 http://www.w3.org/TR/html5/syntax.html#script-data-double-escaped-less-than-sign-state
4641         tok_state_script_data_double_escaped_less_than_sign = function () {
4642                 var c
4643                 c = txt.charAt(cur++)
4644                 if (c === '/') {
4645                         temporary_buffer = ''
4646                         tok_state = tok_state_script_data_double_escape_end
4647                         return new_character_token('/')
4648                 }
4649                 // Anything else
4650                 tok_state = tok_state_script_data_double_escaped
4651                 cur -= 1 // reconsume
4652         }
4653
4654         // 8.2.4.33 http://www.w3.org/TR/html5/syntax.html#script-data-double-escape-end-state
4655         tok_state_script_data_double_escape_end = function () {
4656                 var c
4657                 c = txt.charAt(cur++)
4658                 if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ' || c === '/' || c === '>') {
4659                         if (temporary_buffer === 'script') {
4660                                 tok_state = tok_state_script_data_escaped
4661                         } else {
4662                                 tok_state = tok_state_script_data_double_escaped
4663                         }
4664                         return new_character_token(c)
4665                 }
4666                 if (is_uc_alpha(c)) {
4667                         temporary_buffer += c.toLowerCase() // yes, really lowercase
4668                         return new_character_token(c)
4669                 }
4670                 if (is_lc_alpha(c)) {
4671                         temporary_buffer += c
4672                         return new_character_token(c)
4673                 }
4674                 // Anything else
4675                 tok_state = tok_state_script_data_double_escaped
4676                 cur -= 1 // reconsume
4677         }
4678
4679         // 8.2.4.34 http://www.w3.org/TR/html5/syntax.html#before-attribute-name-state
4680         tok_state_before_attribute_name = function () {
4681                 var attr_name, c, tmp
4682                 attr_name = null
4683                 switch (c = txt.charAt(cur++)) {
4684                         case "\t":
4685                         case "\n":
4686                         case "\u000c":
4687                         case ' ':
4688                                 return null
4689                         break
4690                         case '/':
4691                                 tok_state = tok_state_self_closing_start_tag
4692                                 return null
4693                         break
4694                         case '>':
4695                                 tok_state = tok_state_data
4696                                 tmp = tok_cur_tag
4697                                 tok_cur_tag = null
4698                                 return tmp
4699                         break
4700                         case "\u0000":
4701                                 parse_error()
4702                                 attr_name = "\ufffd"
4703                         break
4704                         case '"':
4705                         case "'":
4706                         case '<':
4707                         case '=':
4708                                 parse_error()
4709                                 attr_name = c
4710                         break
4711                         case '': // EOF
4712                                 parse_error()
4713                                 tok_state = tok_state_data
4714                         break
4715                         default:
4716                                 if (is_uc_alpha(c)) {
4717                                         attr_name = c.toLowerCase()
4718                                 } else {
4719                                         attr_name = c
4720                                 }
4721                 }
4722                 if (attr_name != null) {
4723                         tok_cur_tag.attrs_a.unshift([attr_name, ''])
4724                         tok_state = tok_state_attribute_name
4725                 }
4726                 return null
4727         }
4728
4729         // 8.2.4.35 http://www.w3.org/TR/html5/syntax.html#attribute-name-state
4730         tok_state_attribute_name = function () {
4731                 var c, tmp
4732                 switch (c = txt.charAt(cur++)) {
4733                         case "\t":
4734                         case "\n":
4735                         case "\u000c":
4736                         case ' ':
4737                                 tok_state = tok_state_after_attribute_name
4738                         break
4739                         case '/':
4740                                 tok_state = tok_state_self_closing_start_tag
4741                         break
4742                         case '=':
4743                                 tok_state = tok_state_before_attribute_value
4744                         break
4745                         case '>':
4746                                 tok_state = tok_state_data
4747                                 tmp = tok_cur_tag
4748                                 tok_cur_tag = null
4749                                 return tmp
4750                         break
4751                         case "\u0000":
4752                                 parse_error()
4753                                 tok_cur_tag.attrs_a[0][0] += "\ufffd"
4754                         break
4755                         case '"':
4756                         case "'":
4757                         case '<':
4758                                 parse_error()
4759                                 tok_cur_tag.attrs_a[0][0] += c
4760                         break
4761                         case '': // EOF
4762                                 parse_error()
4763                                 tok_state = tok_state_data
4764                         break
4765                         default:
4766                                 if (is_uc_alpha(c)) {
4767                                         tok_cur_tag.attrs_a[0][0] += c.toLowerCase()
4768                                 } else {
4769                                         tok_cur_tag.attrs_a[0][0] += c
4770                                 }
4771                 }
4772                 return null
4773         }
4774
4775         // 8.2.4.36 http://www.w3.org/TR/html5/syntax.html#after-attribute-name-state
4776         tok_state_after_attribute_name = function () {
4777                 var c
4778                 c = txt.charAt(cur++)
4779                 if (c === "\t" || c === "\n" || c === "\u000c" || c === ' ') {
4780                         return
4781                 }
4782                 if (c === '/') {
4783                         tok_state = tok_state_self_closing_start_tag
4784                         return
4785                 }
4786                 if (c === '=') {
4787                         tok_state = tok_state_before_attribute_value
4788                         return
4789                 }
4790                 if (c === '>') {
4791                         tok_state = tok_state_data
4792                         return tok_cur_tag
4793                 }
4794                 if (is_uc_alpha(c)) {
4795                         tok_cur_tag.attrs_a.unshift([c.toLowerCase(), ''])
4796                         tok_state = tok_state_attribute_name
4797                         return
4798                 }
4799                 if (c === "\u0000") {
4800                         parse_error()
4801                         tok_cur_tag.attrs_a.unshift(["\ufffd", ''])
4802                         tok_state = tok_state_attribute_name
4803                         return
4804                 }
4805                 if (c === '') { // EOF
4806                         parse_error()
4807                         tok_state = tok_state_data
4808                         cur -= 1 // reconsume
4809                         return
4810                 }
4811                 if (c === '"' || c === "'" || c === '<') {
4812                         parse_error()
4813                         // fall through to Anything else
4814                 }
4815                 // Anything else
4816                 tok_cur_tag.attrs_a.unshift([c, ''])
4817                 tok_state = tok_state_attribute_name
4818         }
4819
4820         // 8.2.4.37 http://www.w3.org/TR/html5/syntax.html#before-attribute-value-state
4821         tok_state_before_attribute_value = function () {
4822                 var c, tmp
4823                 switch (c = txt.charAt(cur++)) {
4824                         case "\t":
4825                         case "\n":
4826                         case "\u000c":
4827                         case ' ':
4828                                 return null
4829                         break
4830                         case '"':
4831                                 tok_state = tok_state_attribute_value_double_quoted
4832                         break
4833                         case '&':
4834                                 tok_state = tok_state_attribute_value_unquoted
4835                                 cur -= 1
4836                         break
4837                         case "'":
4838                                 tok_state = tok_state_attribute_value_single_quoted
4839                         break
4840                         case "\u0000":
4841                                 // Parse error
4842                                 tok_cur_tag.attrs_a[0][1] += "\ufffd"
4843                                 tok_state = tok_state_attribute_value_unquoted
4844                         break
4845                         case '>':
4846                                 // Parse error
4847                                 tok_state = tok_state_data
4848                                 tmp = tok_cur_tag
4849                                 tok_cur_tag = null
4850                                 return tmp
4851                         break
4852                         case '': // EOF
4853                                 parse_error()
4854                                 tok_state = tok_state_data
4855                         break
4856                         default:
4857                                 tok_cur_tag.attrs_a[0][1] += c
4858                                 tok_state = tok_state_attribute_value_unquoted
4859                 }
4860                 return null
4861         }
4862
4863         // 8.2.4.38 http://www.w3.org/TR/html5/syntax.html#attribute-value-(double-quoted)-state
4864         tok_state_attribute_value_double_quoted = function () {
4865                 var c
4866                 switch (c = txt.charAt(cur++)) {
4867                         case '"':
4868                                 tok_state = tok_state_after_attribute_value_quoted
4869                         break
4870                         case '&':
4871                                 tok_cur_tag.attrs_a[0][1] += parse_character_reference('"', true)
4872                         break
4873                         case "\u0000":
4874                                 // Parse error
4875                                 tok_cur_tag.attrs_a[0][1] += "\ufffd"
4876                         break
4877                         case '': // EOF
4878                                 parse_error()
4879                                 tok_state = tok_state_data
4880                         break
4881                         default:
4882                                 tok_cur_tag.attrs_a[0][1] += c
4883                 }
4884                 return null
4885         }
4886
4887         // 8.2.4.39 http://www.w3.org/TR/html5/syntax.html#attribute-value-(single-quoted)-state
4888         tok_state_attribute_value_single_quoted = function () {
4889                 var c
4890                 switch (c = txt.charAt(cur++)) {
4891                         case "'":
4892                                 tok_state = tok_state_after_attribute_value_quoted
4893                         break
4894                         case '&':
4895                                 tok_cur_tag.attrs_a[0][1] += parse_character_reference("'", true)
4896                         break
4897                         case "\u0000":
4898                                 // Parse error
4899                                 tok_cur_tag.attrs_a[0][1] += "\ufffd"
4900                         break
4901                         case '': // EOF
4902                                 parse_error()
4903                                 tok_state = tok_state_data
4904                         break
4905                         default:
4906                                 tok_cur_tag.attrs_a[0][1] += c
4907                 }
4908                 return null
4909         }
4910
4911         // 8.2.4.40 http://www.w3.org/TR/html5/syntax.html#attribute-value-(unquoted)-state
4912         tok_state_attribute_value_unquoted = function () {
4913                 var c, tmp
4914                 switch (c = txt.charAt(cur++)) {
4915                         case "\t":
4916                         case "\n":
4917                         case "\u000c":
4918                         case ' ':
4919                                 tok_state = tok_state_before_attribute_name
4920                         break
4921                         case '&':
4922                                 tok_cur_tag.attrs_a[0][1] += parse_character_reference('>', true)
4923                         break
4924                         case '>':
4925                                 tok_state = tok_state_data
4926                                 tmp = tok_cur_tag
4927                                 tok_cur_tag = null
4928                                 return tmp
4929                         break
4930                         case "\u0000":
4931                                 tok_cur_tag.attrs_a[0][1] += "\ufffd"
4932                         break
4933                         case '': // EOF
4934                                 parse_error()
4935                                 tok_state = tok_state_data
4936                         break
4937                         default:
4938                                 // Parse Error if ', <, = or ` (backtick)
4939                                 tok_cur_tag.attrs_a[0][1] += c
4940                 }
4941                 return null
4942         }
4943
4944         // 8.2.4.42 http://www.w3.org/TR/html5/syntax.html#after-attribute-value-(quoted)-state
4945         tok_state_after_attribute_value_quoted = function () {
4946                 var c, tmp
4947                 switch (c = txt.charAt(cur++)) {
4948                         case "\t":
4949                         case "\n":
4950                         case "\u000c":
4951                         case ' ':
4952                                 tok_state = tok_state_before_attribute_name
4953                         break
4954                         case '/':
4955                                 tok_state = tok_state_self_closing_start_tag
4956                         break
4957                         case '>':
4958                                 tok_state = tok_state_data
4959                                 tmp = tok_cur_tag
4960                                 tok_cur_tag = null
4961                                 return tmp
4962                         break
4963                         case '': // EOF
4964                                 parse_error()
4965                                 tok_state = tok_state_data
4966                         break
4967                         default:
4968                                 // Parse Error
4969                                 tok_state = tok_state_before_attribute_name
4970                                 cur -= 1 // we didn't handle that char
4971                 }
4972                 return null
4973         }
4974
4975         // 8.2.4.43 http://www.w3.org/TR/html5/syntax.html#self-closing-start-tag-state
4976         tok_state_self_closing_start_tag = function () {
4977                 var c
4978                 c = txt.charAt(cur++)
4979                 if (c === '>') {
4980                         tok_cur_tag.flag('self-closing', true)
4981                         tok_state = tok_state_data
4982                         return tok_cur_tag
4983                 }
4984                 if (c === '') {
4985                         parse_error()
4986                         tok_state = tok_state_data
4987                         cur -= 1 // reconsume
4988                         return
4989                 }
4990                 // Anything else
4991                 parse_error()
4992                 tok_state = tok_state_before_attribute_name
4993                 cur -= 1 // reconsume
4994         }
4995
4996         // 8.2.4.44 http://www.w3.org/TR/html5/syntax.html#bogus-comment-state
4997         // WARNING: put a comment token in tok_cur_tag before setting this state
4998         tok_state_bogus_comment = function () {
4999                 var next_gt, val
5000                 next_gt = txt.indexOf('>', cur)
5001                 if (next_gt === -1) {
5002                         val = txt.substr(cur)
5003                         cur = txt.length
5004                 } else {
5005                         val = txt.substr(cur, next_gt - cur)
5006                         cur = next_gt + 1
5007                 }
5008                 val = val.replace(new RegExp("\u0000", 'g'), "\ufffd")
5009                 tok_cur_tag.text += val
5010                 tok_state = tok_state_data
5011                 return tok_cur_tag
5012         }
5013
5014         // 8.2.4.45 http://www.w3.org/TR/html5/syntax.html#markup-declaration-open-state
5015         tok_state_markup_declaration_open = function () {
5016                 var acn
5017                 if (txt.substr(cur, 2) === '--') {
5018                         cur += 2
5019                         tok_cur_tag = new_comment_token('')
5020                         tok_state = tok_state_comment_start
5021                         return
5022                 }
5023                 if (txt.substr(cur, 7).toLowerCase() === 'doctype') {
5024                         cur += 7
5025                         tok_state = tok_state_doctype
5026                         return
5027                 }
5028                 acn = adjusted_current_node()
5029                 if (acn && acn.namespace !== NS_HTML && txt.substr(cur, 7) === '[CDATA[') {
5030                         cur += 7
5031                         tok_state = tok_state_cdata_section
5032                         return
5033                 }
5034                 // Otherwise
5035                 parse_error()
5036                 tok_cur_tag = new_comment_token('')
5037                 tok_state = tok_state_bogus_comment
5038         }
5039
5040         // 8.2.4.46 http://www.w3.org/TR/html5/syntax.html#comment-start-state
5041         tok_state_comment_start = function () {
5042                 var c
5043                 switch (c = txt.charAt(cur++)) {
5044                         case '-':
5045                                 tok_state = tok_state_comment_start_dash
5046                         break
5047                         case "\u0000":
5048                                 parse_error()
5049                                 tok_state = tok_state_comment
5050                                 return new_character_token("\ufffd")
5051                         break
5052                         case '>':
5053                                 parse_error()
5054                                 tok_state = tok_state_data
5055                                 return tok_cur_tag
5056                         break
5057                         case '': // EOF
5058                                 parse_error()
5059                                 tok_state = tok_state_data
5060                                 cur -= 1 // reconsume
5061                                 return tok_cur_tag
5062                         break
5063                         default:
5064                                 tok_cur_tag.text += c
5065                                 tok_state = tok_state_comment
5066                 }
5067                 return null
5068         }
5069
5070         // 8.2.4.47 http://www.w3.org/TR/html5/syntax.html#comment-start-dash-state
5071         tok_state_comment_start_dash = function () {
5072                 var c
5073                 switch (c = txt.charAt(cur++)) {
5074                         case '-':
5075                                 tok_state = tok_state_comment_end
5076                         break
5077                         case "\u0000":
5078                                 parse_error()
5079                                 tok_cur_tag.text += "-\ufffd"
5080                                 tok_state = tok_state_comment
5081                         break
5082                         case '>':
5083                                 parse_error()
5084                                 tok_state = tok_state_data
5085                                 return tok_cur_tag
5086                         break
5087                         case '': // EOF
5088                                 parse_error()
5089                                 tok_state = tok_state_data
5090                                 cur -= 1 // reconsume
5091                                 return tok_cur_tag
5092                         break
5093                         default:
5094                                 tok_cur_tag.text += "-" + c
5095                                 tok_state = tok_state_comment
5096                 }
5097                 return null
5098         }
5099
5100         // 8.2.4.48 http://www.w3.org/TR/html5/syntax.html#comment-state
5101         tok_state_comment = function () {
5102                 var c
5103                 switch (c = txt.charAt(cur++)) {
5104                         case '-':
5105                                 tok_state = tok_state_comment_end_dash
5106                         break
5107                         case "\u0000":
5108                                 parse_error()
5109                                 tok_cur_tag.text += "\ufffd"
5110                         break
5111                         case '': // EOF
5112                                 parse_error()
5113                                 tok_state = tok_state_data
5114                                 cur -= 1 // reconsume
5115                                 return tok_cur_tag
5116                         break
5117                         default:
5118                                 tok_cur_tag.text += c
5119                 }
5120                 return null
5121         }
5122
5123         // 8.2.4.49 http://www.w3.org/TR/html5/syntax.html#comment-end-dash-state
5124         tok_state_comment_end_dash = function () {
5125                 var c
5126                 switch (c = txt.charAt(cur++)) {
5127                         case '-':
5128                                 tok_state = tok_state_comment_end
5129                         break
5130                         case "\u0000":
5131                                 parse_error()
5132                                 tok_cur_tag.text += "-\ufffd"
5133                                 tok_state = tok_state_comment
5134                         break
5135                         case '': // EOF
5136                                 parse_error()
5137                                 tok_state = tok_state_data
5138                                 cur -= 1 // reconsume
5139                                 return tok_cur_tag
5140                         break
5141                         default:
5142                                 tok_cur_tag.text += "-" + c
5143                                 tok_state = tok_state_comment
5144                 }
5145                 return null
5146         }
5147
5148         // 8.2.4.50 http://www.w3.org/TR/html5/syntax.html#comment-end-state
5149         tok_state_comment_end = function () {
5150                 var c
5151                 switch (c = txt.charAt(cur++)) {
5152                         case '>':
5153                                 tok_state = tok_state_data
5154                                 return tok_cur_tag
5155                         break
5156                         case "\u0000":
5157                                 parse_error()
5158                                 tok_cur_tag.text += "--\ufffd"
5159                                 tok_state = tok_state_comment
5160                         break
5161                         case '!':
5162                                 parse_error()
5163                                 tok_state = tok_state_comment_end_bang
5164                         break
5165                         case '-':
5166                                 parse_error()
5167                                 tok_cur_tag.text += '-'
5168                         break
5169                         case '': // EOF
5170                                 parse_error()
5171                                 tok_state = tok_state_data
5172                                 cur -= 1 // reconsume
5173                                 return tok_cur_tag
5174                         break
5175                         default:
5176                                 parse_error()
5177                                 tok_cur_tag.text += "--" + c
5178                                 tok_state = tok_state_comment
5179                 }
5180                 return null
5181         }
5182
5183         // 8.2.4.51 http://www.w3.org/TR/html5/syntax.html#comment-end-bang-state
5184         tok_state_comment_end_bang = function () {
5185                 var c
5186                 switch (c = txt.charAt(cur++)) {
5187                         case '-':
5188                                 tok_cur_tag.text += "--!" + c
5189                                 tok_state = tok_state_comment_end_dash
5190                         break
5191                         case '>':
5192                                 tok_state = tok_state_data
5193                                 return tok_cur_tag
5194                         break
5195                         case "\u0000":
5196                                 parse_error()
5197                                 tok_cur_tag.text += "--!\ufffd"
5198                                 tok_state = tok_state_comment
5199                         break
5200                         case '': // EOF
5201                                 parse_error()
5202                                 tok_state = tok_state_data
5203                                 cur -= 1 // reconsume
5204                                 return tok_cur_tag
5205                         break
5206                         default:
5207                                 tok_cur_tag.text += "--!" + c
5208                                 tok_state = tok_state_comment
5209                 }
5210                 return null
5211         }
5212
5213         // 8.2.4.52 http://www.w3.org/TR/html5/syntax.html#doctype-state
5214         tok_state_doctype = function () {
5215                 var c, el
5216                 switch (c = txt.charAt(cur++)) {
5217                         case "\t":
5218                         case "\u000a":
5219                         case "\u000c":
5220                         case ' ':
5221                                 tok_state = tok_state_before_doctype_name
5222                         break
5223                         case '': // EOF
5224                                 parse_error()
5225                                 tok_state = tok_state_data
5226                                 el = new_doctype_token('')
5227                                 el.flag('force-quirks', true)
5228                                 cur -= 1 // reconsume
5229                                 return el
5230                         break
5231                         default:
5232                                 parse_error()
5233                                 tok_state = tok_state_before_doctype_name
5234                                 cur -= 1 // reconsume
5235                 }
5236                 return null
5237         }
5238
5239         // 8.2.4.52 http://www.w3.org/TR/html5/syntax.html#doctype-state
5240         tok_state_before_doctype_name = function () {
5241                 var c, el
5242                 c = txt.charAt(cur++)
5243                 if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ') {
5244                         return
5245                 }
5246                 if (is_uc_alpha(c)) {
5247                         tok_cur_tag = new_doctype_token(c.toLowerCase())
5248                         tok_state = tok_state_doctype_name
5249                         return
5250                 }
5251                 if (c === "\u0000") {
5252                         parse_error()
5253                         tok_cur_tag = new_doctype_token("\ufffd")
5254                         tok_state = tok_state_doctype_name
5255                         return
5256                 }
5257                 if (c === '>') {
5258                         parse_error()
5259                         el = new_doctype_token('')
5260                         el.flag('force-quirks', true)
5261                         tok_state = tok_state_data
5262                         return el
5263                 }
5264                 if (c === '') { // EOF
5265                         parse_error()
5266                         tok_state = tok_state_data
5267                         el = new_doctype_token('')
5268                         el.flag('force-quirks', true)
5269                         cur -= 1 // reconsume
5270                         return el
5271                 }
5272                 // Anything else
5273                 tok_cur_tag = new_doctype_token(c)
5274                 tok_state = tok_state_doctype_name
5275                 return null
5276         }
5277
5278         // 8.2.4.54 http://www.w3.org/TR/html5/syntax.html#doctype-name-state
5279         tok_state_doctype_name = function () {
5280                 var c
5281                 c = txt.charAt(cur++)
5282                 if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ') {
5283                         tok_state = tok_state_after_doctype_name
5284                         return
5285                 }
5286                 if (c === '>') {
5287                         tok_state = tok_state_data
5288                         return tok_cur_tag
5289                 }
5290                 if (is_uc_alpha(c)) {
5291                         tok_cur_tag.name += c.toLowerCase()
5292                         return
5293                 }
5294                 if (c === "\u0000") {
5295                         parse_error()
5296                         tok_cur_tag.name += "\ufffd"
5297                         return
5298                 }
5299                 if (c === '') { // EOF
5300                         parse_error()
5301                         tok_state = tok_state_data
5302                         tok_cur_tag.flag('force-quirks', true)
5303                         cur -= 1 // reconsume
5304                         return tok_cur_tag
5305                 }
5306                 // Anything else
5307                 tok_cur_tag.name += c
5308                 return null
5309         }
5310
5311         // 8.2.4.55 http://www.w3.org/TR/html5/syntax.html#after-doctype-name-state
5312         tok_state_after_doctype_name = function () {
5313                 var c
5314                 c = txt.charAt(cur++)
5315                 if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ') {
5316                         return
5317                 }
5318                 if (c === '>') {
5319                         tok_state = tok_state_data
5320                         return tok_cur_tag
5321                 }
5322                 if (c === '') { // EOF
5323                         parse_error()
5324                         tok_state = tok_state_data
5325                         tok_cur_tag.flag('force-quirks', true)
5326                         cur -= 1 // reconsume
5327                         return tok_cur_tag
5328                 }
5329                 // Anything else
5330                 if (txt.substr(cur - 1, 6).toLowerCase() === 'public') {
5331                         cur += 5
5332                         tok_state = tok_state_after_doctype_public_keyword
5333                         return
5334                 }
5335                 if (txt.substr(cur - 1, 6).toLowerCase() === 'system') {
5336                         cur += 5
5337                         tok_state = tok_state_after_doctype_system_keyword
5338                         return
5339                 }
5340                 parse_error()
5341                 tok_cur_tag.flag('force-quirks', true)
5342                 tok_state = tok_state_bogus_doctype
5343                 return null
5344         }
5345
5346         // 8.2.4.56 http://www.w3.org/TR/html5/syntax.html#after-doctype-public-keyword-state
5347         tok_state_after_doctype_public_keyword = function () {
5348                 var c
5349                 c = txt.charAt(cur++)
5350                 if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ') {
5351                         tok_state = tok_state_before_doctype_public_identifier
5352                         return
5353                 }
5354                 if (c === '"') {
5355                         parse_error()
5356                         tok_cur_tag.public_identifier = ''
5357                         tok_state = tok_state_doctype_public_identifier_double_quoted
5358                         return
5359                 }
5360                 if (c === "'") {
5361                         parse_error()
5362                         tok_cur_tag.public_identifier = ''
5363                         tok_state = tok_state_doctype_public_identifier_single_quoted
5364                         return
5365                 }
5366                 if (c === '>') {
5367                         parse_error()
5368                         tok_cur_tag.flag('force-quirks', true)
5369                         tok_state = tok_state_data
5370                         return tok_cur_tag
5371                 }
5372                 if (c === '') { // EOF
5373                         parse_error()
5374                         tok_state = tok_state_data
5375                         tok_cur_tag.flag('force-quirks', true)
5376                         cur -= 1 // reconsume
5377                         return tok_cur_tag
5378                 }
5379                 // Anything else
5380                 parse_error()
5381                 tok_cur_tag.flag('force-quirks', true)
5382                 tok_state = tok_state_bogus_doctype
5383                 return null
5384         }
5385
5386         // 8.2.4.57 http://www.w3.org/TR/html5/syntax.html#before-doctype-public-identifier-state
5387         tok_state_before_doctype_public_identifier = function () {
5388                 var c
5389                 c = txt.charAt(cur++)
5390                 if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ') {
5391                         return
5392                 }
5393                 if (c === '"') {
5394                         parse_error()
5395                         tok_cur_tag.public_identifier = ''
5396                         tok_state = tok_state_doctype_public_identifier_double_quoted
5397                         return
5398                 }
5399                 if (c === "'") {
5400                         parse_error()
5401                         tok_cur_tag.public_identifier = ''
5402                         tok_state = tok_state_doctype_public_identifier_single_quoted
5403                         return
5404                 }
5405                 if (c === '>') {
5406                         parse_error()
5407                         tok_cur_tag.flag('force-quirks', true)
5408                         tok_state = tok_state_data
5409                         return tok_cur_tag
5410                 }
5411                 if (c === '') { // EOF
5412                         parse_error()
5413                         tok_state = tok_state_data
5414                         tok_cur_tag.flag('force-quirks', true)
5415                         cur -= 1 // reconsume
5416                         return tok_cur_tag
5417                 }
5418                 // Anything else
5419                 parse_error()
5420                 tok_cur_tag.flag('force-quirks', true)
5421                 tok_state = tok_state_bogus_doctype
5422                 return null
5423         }
5424
5425
5426         // 8.2.4.58 http://www.w3.org/TR/html5/syntax.html#doctype-public-identifier-(double-quoted)-state
5427         tok_state_doctype_public_identifier_double_quoted = function () {
5428                 var c
5429                 c = txt.charAt(cur++)
5430                 if (c === '"') {
5431                         tok_state = tok_state_after_doctype_public_identifier
5432                         return
5433                 }
5434                 if (c === "\u0000") {
5435                         parse_error()
5436                         tok_cur_tag.public_identifier += "\ufffd"
5437                         return
5438                 }
5439                 if (c === '>') {
5440                         parse_error()
5441                         tok_cur_tag.flag('force-quirks', true)
5442                         tok_state = tok_state_data
5443                         return tok_cur_tag
5444                 }
5445                 if (c === '') { // EOF
5446                         parse_error()
5447                         tok_state = tok_state_data
5448                         tok_cur_tag.flag('force-quirks', true)
5449                         cur -= 1 // reconsume
5450                         return tok_cur_tag
5451                 }
5452                 // Anything else
5453                 tok_cur_tag.public_identifier += c
5454                 return null
5455         }
5456
5457         // 8.2.4.59 http://www.w3.org/TR/html5/syntax.html#doctype-public-identifier-(single-quoted)-state
5458         tok_state_doctype_public_identifier_single_quoted = function () {
5459                 var c
5460                 c = txt.charAt(cur++)
5461                 if (c === "'") {
5462                         tok_state = tok_state_after_doctype_public_identifier
5463                         return
5464                 }
5465                 if (c === "\u0000") {
5466                         parse_error()
5467                         tok_cur_tag.public_identifier += "\ufffd"
5468                         return
5469                 }
5470                 if (c === '>') {
5471                         parse_error()
5472                         tok_cur_tag.flag('force-quirks', true)
5473                         tok_state = tok_state_data
5474                         return tok_cur_tag
5475                 }
5476                 if (c === '') { // EOF
5477                         parse_error()
5478                         tok_state = tok_state_data
5479                         tok_cur_tag.flag('force-quirks', true)
5480                         cur -= 1 // reconsume
5481                         return tok_cur_tag
5482                 }
5483                 // Anything else
5484                 tok_cur_tag.public_identifier += c
5485                 return null
5486         }
5487
5488         // 8.2.4.60 http://www.w3.org/TR/html5/syntax.html#after-doctype-public-identifier-state
5489         tok_state_after_doctype_public_identifier = function () {
5490                 var c
5491                 c = txt.charAt(cur++)
5492                 if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ') {
5493                         tok_state = tok_state_between_doctype_public_and_system_identifiers
5494                         return
5495                 }
5496                 if (c === '>') {
5497                         tok_state = tok_state_data
5498                         return tok_cur_tag
5499                 }
5500                 if (c === '"') {
5501                         parse_error()
5502                         tok_cur_tag.system_identifier = ''
5503                         tok_state = tok_state_doctype_system_identifier_double_quoted
5504                         return
5505                 }
5506                 if (c === "'") {
5507                         parse_error()
5508                         tok_cur_tag.system_identifier = ''
5509                         tok_state = tok_state_doctype_system_identifier_single_quoted
5510                         return
5511                 }
5512                 if (c === '') { // EOF
5513                         parse_error()
5514                         tok_state = tok_state_data
5515                         tok_cur_tag.flag('force-quirks', true)
5516                         cur -= 1 // reconsume
5517                         return tok_cur_tag
5518                 }
5519                 // Anything else
5520                 parse_error()
5521                 tok_cur_tag.flag('force-quirks', true)
5522                 tok_state = tok_state_bogus_doctype
5523                 return null
5524         }
5525
5526         // 8.2.4.61 http://www.w3.org/TR/html5/syntax.html#between-doctype-public-&&-system-identifiers-state
5527         tok_state_between_doctype_public_and_system_identifiers = function () {
5528                 var c
5529                 c = txt.charAt(cur++)
5530                 if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ') {
5531                         return
5532                 }
5533                 if (c === '>') {
5534                         tok_state = tok_state_data
5535                         return tok_cur_tag
5536                 }
5537                 if (c === '"') {
5538                         parse_error()
5539                         tok_cur_tag.system_identifier = ''
5540                         tok_state = tok_state_doctype_system_identifier_double_quoted
5541                         return
5542                 }
5543                 if (c === "'") {
5544                         parse_error()
5545                         tok_cur_tag.system_identifier = ''
5546                         tok_state = tok_state_doctype_system_identifier_single_quoted
5547                         return
5548                 }
5549                 if (c === '') { // EOF
5550                         parse_error()
5551                         tok_state = tok_state_data
5552                         tok_cur_tag.flag('force-quirks', true)
5553                         cur -= 1 // reconsume
5554                         return tok_cur_tag
5555                 }
5556                 // Anything else
5557                 parse_error()
5558                 tok_cur_tag.flag('force-quirks', true)
5559                 tok_state = tok_state_bogus_doctype
5560                 return null
5561         }
5562
5563         // 8.2.4.62 http://www.w3.org/TR/html5/syntax.html#after-doctype-system-keyword-state
5564         tok_state_after_doctype_system_keyword = function () {
5565                 var c
5566                 c = txt.charAt(cur++)
5567                 if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ') {
5568                         tok_state = tok_state_before_doctype_system_identifier
5569                         return
5570                 }
5571                 if (c === '"') {
5572                         parse_error()
5573                         tok_cur_tag.system_identifier = ''
5574                         tok_state = tok_state_doctype_system_identifier_double_quoted
5575                         return
5576                 }
5577                 if (c === "'") {
5578                         parse_error()
5579                         tok_cur_tag.system_identifier = ''
5580                         tok_state = tok_state_doctype_system_identifier_single_quoted
5581                         return
5582                 }
5583                 if (c === '>') {
5584                         parse_error()
5585                         tok_cur_tag.flag('force-quirks', true)
5586                         tok_state = tok_state_data
5587                         return tok_cur_tag
5588                 }
5589                 if (c === '') { // EOF
5590                         parse_error()
5591                         tok_state = tok_state_data
5592                         tok_cur_tag.flag('force-quirks', true)
5593                         cur -= 1 // reconsume
5594                         return tok_cur_tag
5595                 }
5596                 // Anything else
5597                 parse_error()
5598                 tok_cur_tag.flag('force-quirks', true)
5599                 tok_state = tok_state_bogus_doctype
5600                 return null
5601         }
5602
5603         // 8.2.4.63 http://www.w3.org/TR/html5/syntax.html#before-doctype-system-identifier-state
5604         tok_state_before_doctype_system_identifier = function () {
5605                 var c
5606                 c = txt.charAt(cur++)
5607                 if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ') {
5608                         return
5609                 }
5610                 if (c === '"') {
5611                         tok_cur_tag.system_identifier = ''
5612                         tok_state = tok_state_doctype_system_identifier_double_quoted
5613                         return
5614                 }
5615                 if (c === "'") {
5616                         tok_cur_tag.system_identifier = ''
5617                         tok_state = tok_state_doctype_system_identifier_single_quoted
5618                         return
5619                 }
5620                 if (c === '>') {
5621                         parse_error()
5622                         tok_cur_tag.flag('force-quirks', true)
5623                         tok_state = tok_state_data
5624                         return tok_cur_tag
5625                 }
5626                 if (c === '') { // EOF
5627                         parse_error()
5628                         tok_state = tok_state_data
5629                         tok_cur_tag.flag('force-quirks', true)
5630                         cur -= 1 // reconsume
5631                         return tok_cur_tag
5632                 }
5633                 // Anything else
5634                 parse_error()
5635                 tok_cur_tag.flag('force-quirks', true)
5636                 tok_state = tok_state_bogus_doctype
5637                 return null
5638         }
5639
5640         // 8.2.4.64 http://www.w3.org/TR/html5/syntax.html#doctype-system-identifier-(double-quoted)-state
5641         tok_state_doctype_system_identifier_double_quoted = function () {
5642                 var c
5643                 c = txt.charAt(cur++)
5644                 if (c === '"') {
5645                         tok_state = tok_state_after_doctype_system_identifier
5646                         return
5647                 }
5648                 if (c === "\u0000") {
5649                         parse_error()
5650                         tok_cur_tag.system_identifier += "\ufffd"
5651                         return
5652                 }
5653                 if (c === '>') {
5654                         parse_error()
5655                         tok_cur_tag.flag('force-quirks', true)
5656                         tok_state = tok_state_data
5657                         return tok_cur_tag
5658                 }
5659                 if (c === '') { // EOF
5660                         parse_error()
5661                         tok_state = tok_state_data
5662                         tok_cur_tag.flag('force-quirks', true)
5663                         cur -= 1 // reconsume
5664                         return tok_cur_tag
5665                 }
5666                 // Anything else
5667                 tok_cur_tag.system_identifier += c
5668                 return null
5669         }
5670
5671         // 8.2.4.65 http://www.w3.org/TR/html5/syntax.html#doctype-system-identifier-(single-quoted)-state
5672         tok_state_doctype_system_identifier_single_quoted = function () {
5673                 var c
5674                 c = txt.charAt(cur++)
5675                 if (c === "'") {
5676                         tok_state = tok_state_after_doctype_system_identifier
5677                         return
5678                 }
5679                 if (c === "\u0000") {
5680                         parse_error()
5681                         tok_cur_tag.system_identifier += "\ufffd"
5682                         return
5683                 }
5684                 if (c === '>') {
5685                         parse_error()
5686                         tok_cur_tag.flag('force-quirks', true)
5687                         tok_state = tok_state_data
5688                         return tok_cur_tag
5689                 }
5690                 if (c === '') { // EOF
5691                         parse_error()
5692                         tok_state = tok_state_data
5693                         tok_cur_tag.flag('force-quirks', true)
5694                         cur -= 1 // reconsume
5695                         return tok_cur_tag
5696                 }
5697                 // Anything else
5698                 tok_cur_tag.system_identifier += c
5699                 return null
5700         }
5701
5702         // 8.2.4.66 http://www.w3.org/TR/html5/syntax.html#after-doctype-system-identifier-state
5703         tok_state_after_doctype_system_identifier = function () {
5704                 var c
5705                 c = txt.charAt(cur++)
5706                 if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ') {
5707                         return
5708                 }
5709                 if (c === '>') {
5710                         tok_state = tok_state_data
5711                         return tok_cur_tag
5712                 }
5713                 if (c === '') { // EOF
5714                         parse_error()
5715                         tok_state = tok_state_data
5716                         tok_cur_tag.flag('force-quirks', true)
5717                         cur -= 1 // reconsume
5718                         return tok_cur_tag
5719                 }
5720                 // Anything else
5721                 parse_error()
5722                 // do _not_ tok_cur_tag.flag 'force-quirks', true
5723                 tok_state = tok_state_bogus_doctype
5724                 return null
5725         }
5726
5727         // 8.2.4.67 http://www.w3.org/TR/html5/syntax.html#bogus-doctype-state
5728         tok_state_bogus_doctype = function () {
5729                 var c
5730                 c = txt.charAt(cur++)
5731                 if (c === '>') {
5732                         tok_state = tok_state_data
5733                         return tok_cur_tag
5734                 }
5735                 if (c === '') { // EOF
5736                         tok_state = tok_state_data
5737                         cur -= 1 // reconsume
5738                         return tok_cur_tag
5739                 }
5740                 // Anything else
5741                 return null
5742         }
5743
5744         // 8.2.4.68 http://www.w3.org/TR/html5/syntax.html#cdata-section-state
5745         tok_state_cdata_section = function () {
5746                 var next_gt, val
5747                 tok_state = tok_state_data
5748                 next_gt = txt.indexOf(']]>', cur)
5749                 if (next_gt === -1) {
5750                         val = txt.substr(cur)
5751                         cur = txt.length
5752                 } else {
5753                         val = txt.substr(cur, next_gt - cur)
5754                         cur = next_gt + 3
5755                 }
5756                 val = val.replace(new RegExp("\u0000", 'g'), "\ufffd")
5757                 if (val.length > 0) {
5758                         return new_character_token(val) // fixfull split
5759                 }
5760                 return null
5761         }
5762
5763         // 8.2.4.69 http://www.w3.org/TR/html5/syntax.html#consume-a-character-reference
5764         // Don't set this as a state, just call it
5765         // returns a string (NOT a text node)
5766         parse_character_reference = function (allowed_char, in_attr) {
5767                 var base, c, charset, code_point, decoded, i, max, start
5768                 if (allowed_char == null) {
5769                         allowed_char = null
5770                 }
5771                 if (in_attr == null) {
5772                         in_attr = false
5773                 }
5774                 if (cur >= txt.length) {
5775                         return '&'
5776                 }
5777                 switch (c = txt.charAt(cur)) {
5778                         case "\t":
5779                         case "\n":
5780                         case "\u000c":
5781                         case ' ':
5782                         case '<':
5783                         case '&':
5784                         case '':
5785                         case allowed_char:
5786                                 // explicitly not a parse error
5787                                 return '&'
5788                         break
5789                         case ';':
5790                                 // there has to be "one or more" alnums between & and ; to be a parse error
5791                                 return '&'
5792                         break
5793                         case '#':
5794                                 if (cur + 1 >= txt.length) {
5795                                         return '&'
5796                                 }
5797                                 if (txt.charAt(cur + 1).toLowerCase() === 'x') {
5798                                         base = 16
5799                                         charset = hex_chars
5800                                         start = cur + 2
5801                                 } else {
5802                                         charset = digits
5803                                         start = cur + 1
5804                                         base = 10
5805                                 }
5806                                 i = 0
5807                                 while (start + i < txt.length && charset.indexOf(txt.charAt(start + i)) > -1) {
5808                                         i += 1
5809                                 }
5810                                 if (i === 0) {
5811                                         return '&'
5812                                 }
5813                                 cur = start + i
5814                                 if (txt.charAt(start + i) === ';') {
5815                                         cur += 1
5816                                 } else {
5817                                         parse_error()
5818                                 }
5819                                 code_point = txt.substr(start, i)
5820                                 while (code_point.charAt(0) === '0' && code_point.length > 1) {
5821                                         code_point = code_point.substr(1)
5822                                 }
5823                                 code_point = parseInt(code_point, base)
5824                                 if (unicode_fixes[code_point] != null) {
5825                                         parse_error()
5826                                         return unicode_fixes[code_point]
5827                                 } else {
5828                                         if ((code_point >= 0xd800 && code_point <= 0xdfff) || code_point > 0x10ffff) {
5829                                                 parse_error()
5830                                                 return "\ufffd"
5831                                         } else {
5832                                                 if ((code_point >= 0x0001 && code_point <= 0x0008) || (code_point >= 0x000D && code_point <= 0x001F) || (code_point >= 0x007F && code_point <= 0x009F) || (code_point >= 0xFDD0 && code_point <= 0xFDEF) || code_point === 0x000B || code_point === 0xFFFE || code_point === 0xFFFF || code_point === 0x1FFFE || code_point === 0x1FFFF || code_point === 0x2FFFE || code_point === 0x2FFFF || code_point === 0x3FFFE || code_point === 0x3FFFF || code_point === 0x4FFFE || code_point === 0x4FFFF || code_point === 0x5FFFE || code_point === 0x5FFFF || code_point === 0x6FFFE || code_point === 0x6FFFF || code_point === 0x7FFFE || code_point === 0x7FFFF || code_point === 0x8FFFE || code_point === 0x8FFFF || code_point === 0x9FFFE || code_point === 0x9FFFF || code_point === 0xAFFFE || code_point === 0xAFFFF || code_point === 0xBFFFE || code_point === 0xBFFFF || code_point === 0xCFFFE || code_point === 0xCFFFF || code_point === 0xDFFFE || code_point === 0xDFFFF || code_point === 0xEFFFE || code_point === 0xEFFFF || code_point === 0xFFFFE || code_point === 0xFFFFF || code_point === 0x10FFFE || code_point === 0x10FFFF) {
5833                                                         parse_error()
5834                                                 }
5835                                                 return from_code_point(code_point)
5836                                         }
5837                                 }
5838                                 return
5839                         break
5840                         default:
5841                                 for (i = 0; i < 31; ++i) {
5842                                         if (alnum.indexOf(txt.charAt(cur + i)) === -1) {
5843                                                 break
5844                                         }
5845                                 }
5846                                 if (i === 0) {
5847                                         // exit early, because parse_error() below needs at least one alnum
5848                                         return '&'
5849                                 }
5850                                 if (txt.charAt(cur + i) === ';') {
5851                                         decoded = decode_named_char_ref(txt.substr(cur, i))
5852                                         i += 1 // scan past the ';' (after, so we dno't pass it to decode)
5853                                         if (decoded != null) {
5854                                                 cur += i
5855                                                 return decoded
5856                                         }
5857                                         // else FALL THROUGH (check for match without last char(s) or ";")
5858                                 }
5859                                 // no ';' terminator (only legacy char refs)
5860                                 max = i
5861                                 for (i = 2; i <= max; ++i) { // no prefix matches, so ok to check shortest first
5862                                         c = legacy_char_refs[txt.substr(cur, i)]
5863                                         if (c != null) {
5864                                                 if (in_attr) {
5865                                                         if (txt.charAt(cur + i) === '=') {
5866                                                                 // "because some legacy user agents will
5867                                                                 // misinterpret the markup in those cases"
5868                                                                 parse_error()
5869                                                                 return '&'
5870                                                         }
5871                                                         if (alnum.indexOf(txt.charAt(cur + i)) > -1) {
5872                                                                 // this makes attributes forgiving about url args
5873                                                                 return '&'
5874                                                         }
5875                                                 }
5876                                                 // ok, and besides the weird exceptions for attributes...
5877                                                 // return the matching char
5878                                                 cur += i // consume entity chars
5879                                                 parse_error() // because no terminating ";"
5880                                                 return c
5881                                         }
5882                                 }
5883                                 parse_error()
5884                                 return '&'
5885                 }
5886                 // never reached
5887         }
5888
5889         eat_next_token_if_newline = function () {
5890                 var old_cur, t
5891                 old_cur = cur
5892                 t = null
5893                 while (t == null) {
5894                         t = tok_state()
5895                 }
5896                 if (t.type === TYPE_TEXT) {
5897                         // definition of a newline depends on whether it was a character ref or not
5898                         if (cur - old_cur === 1) {
5899                                 // not a character reference
5900                                 if (t.text === "\u000d" || t.text === "\u000a") {
5901                                         return
5902                                 }
5903                         } else {
5904                                 if (t.text === "\u000a") {
5905                                         return
5906                                 }
5907                         }
5908                 }
5909                 // not a "newline"
5910                 cur = old_cur
5911         }
5912
5913         // tree constructor initialization
5914         // see comments on TYPE_TAG/etc for the structure of this data
5915         txt = args_html
5916         cur = 0
5917         doc = new Node(TYPE_TAG, {name: 'document', namespace: NS_HTML})
5918         doc.flag('quirks mode', QUIRKS_NO) // TODO bugreport spec for not specifying this
5919         fragment_root = null // fragment parsing algorithm returns children of this
5920         open_els = []
5921         afe = [] // active formatting elements
5922         template_ins_modes = []
5923         ins_mode = ins_mode_initial
5924         original_ins_mode = ins_mode // TODO check spec
5925         flag_scripting = args.scripting != null ? args.scripting : true // TODO might need an extra flag to get <noscript> to parse correctly
5926         flag_frameset_ok = true
5927         flag_parsing = true
5928         flag_foster_parenting = false
5929         form_element_pointer = null
5930         temporary_buffer = null
5931         pending_table_character_tokens = []
5932         head_element_pointer = null
5933         flag_fragment_parsing = false
5934         context_element = null
5935         prev_node_id = 0 // just for debugging
5936
5937         // tokenizer initialization
5938         tok_state = tok_state_data
5939
5940         parse_init = function () {
5941                 var el, f, ns, old_doc, t
5942                 // fragment parsing (text arg)
5943                 if (args.fragment != null) {
5944                         // this handles the fragment from the tests in the format described here:
5945                         // https://github.com/html5lib/html5lib-tests/blob/master/tree-construction/README.md
5946                         f = args.fragment
5947                         ns = NS_HTML
5948                         if (f.substr(0, 5) === 'math ') {
5949                                 f = f.substr(5)
5950                                 ns = NS_MATHML
5951                         } else if (f.substr(0, 4) === 'svg ') {
5952                                 f = f.substr(4)
5953                                 ns = NS_SVG
5954                         }
5955                         t = new_open_tag(f)
5956                         context_element = token_to_element(t, ns)
5957                         context_element.document = new Node(TYPE_TAG, {name: 'document', namespace: NS_HTML})
5958                         context_element.document.flag('quirks mode', QUIRKS_NO)
5959                 }
5960                 // fragment parsing (Node arg)
5961                 if (args.context != null) {
5962                         context_element = args.context
5963                 }
5964
5965                 // http://www.w3.org/TR/html5/syntax.html#parsing-html-fragments
5966                 // fragment parsing algorithm
5967                 if (context_element != null) {
5968                         flag_fragment_parsing = true
5969                         doc = new Node(TYPE_TAG, {name: 'html', namespace: NS_HTML})
5970                         // search up the tree from context, to try to find it's document,
5971                         // because this file only puts a "document" property on the root
5972                         // element.
5973                         old_doc = null
5974                         el = context_element
5975                         while (true) {
5976                                 if (el.document != null) {
5977                                         old_doc = el.document
5978                                         break
5979                                 }
5980                                 if (el.parent) {
5981                                         el = el.parent
5982                                 } else {
5983                                         break
5984                                 }
5985                         }
5986                         if (old_doc) {
5987                                 doc.flag('quirks mode', old_doc.flag('quirks mode'))
5988                         }
5989                         // set tok_state
5990                         if (context_element.namespace === NS_HTML) {
5991                                 switch (context_element.name) {
5992                                         case 'title':
5993                                         case 'textarea':
5994                                                 tok_state = tok_state_rcdata
5995                                         break
5996                                         case 'style':
5997                                         case 'xmp':
5998                                         case 'iframe':
5999                                         case 'noembed':
6000                                         case 'noframes':
6001                                                 tok_state = tok_state_rawtext
6002                                         break
6003                                         case 'script':
6004                                                 tok_state = tok_state_script_data
6005                                         break
6006                                         case 'noscript':
6007                                                 if (flag_scripting) {
6008                                                         tok_state = tok_state_rawtext
6009                                                 }
6010                                         break
6011                                         case 'plaintext':
6012                                                 tok_state = tok_state_plaintext
6013                                 }
6014                         }
6015                         fragment_root = new Node(TYPE_TAG, {name: 'html', namespace: NS_HTML})
6016                         doc.children.push(fragment_root)
6017                         fragment_root.document = doc
6018                         open_els = [fragment_root]
6019                         if (context_element.name === 'template' && context_element.namespace === NS_HTML) {
6020                                 template_ins_modes.unshift(ins_mode_in_template)
6021                         }
6022                         // fixfull create token for context (it should have it's original one already)
6023                         reset_ins_mode()
6024                         // set form_element pointer... in the foreign doc?!
6025                         el = context_element
6026                         while (true) {
6027                                 if (el.name === 'form' && el.namespace === NS_HTML) {
6028                                         form_element_pointer = el
6029                                         break
6030                                 }
6031                                 if (el.parent) {
6032                                         el = el.parent
6033                                 } else {
6034                                         break
6035                                 }
6036                         }
6037                 }
6038
6039                 // text pre-processing
6040                 // FIXME check http://www.w3.org/TR/html5/syntax.html#preprocessing-the-input-stream
6041                 txt = txt.replace(new RegExp("\r\n", 'g'), "\n") // fixfull spec doesn't say this
6042                 txt = txt.replace(new RegExp("\r", 'g'), "\n") // fixfull spec doesn't say this
6043         }
6044
6045         // http://www.w3.org/TR/html5/syntax.html#tree-construction
6046         parse_main_loop = function () {
6047                 var t
6048                 while (flag_parsing) {
6049                         t = tok_state()
6050                         if (t != null) {
6051                                 process_token(t)
6052                                 // fixfull parse error if has self-closing flag, but it wasn't acknolwedged
6053                         }
6054                 }
6055         }
6056         parse_init()
6057         parse_main_loop()
6058
6059         if (flag_fragment_parsing) {
6060                 return fragment_root.children
6061         }
6062         return doc.children
6063 }
6064
6065 exports.parse = parse_html
6066 exports.Node = Node
6067 exports.debug_log_reset = debug_log_reset
6068 exports.debug_log_each = debug_log_each
6069 exports.TYPE_TAG = TYPE_TAG
6070 exports.TYPE_TEXT = TYPE_TEXT
6071 exports.TYPE_COMMENT = TYPE_COMMENT
6072 exports.TYPE_DOCTYPE = TYPE_DOCTYPE
6073 exports.NS_HTML = NS_HTML
6074 exports.NS_MATHML = NS_MATHML
6075 exports.NS_SVG = NS_SVG
6076 exports.QUIRKS_NO = QUIRKS_NO
6077 exports.QUIRKS_LIMITED = QUIRKS_LIMITED
6078 exports.QUIRKS_YES = QUIRKS_YES