JasonWoof Got questions, comments, patches, etc.? Contact Jason Woofenden
parser tests to javascript
[peach-html5-editor.git] / parser.js
1 // todo remove refs and lens, js, ls
2 // run test suite!
3
4 // Copyright 2015 Jason Woofenden
5 // This file implements an HTML5 parser
6 //
7 // This program is free software: you can redistribute it and/or modify it under
8 // the terms of the GNU Affero General Public License as published by the Free
9 // Software Foundation, either version 3 of the License, or (at your option) any
10 // later version.
11 //
12 // This program is distributed in the hope that it will be useful, but WITHOUT
13 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 // FOR A PARTICULAR PURPOSE.  See the GNU Affero General Public License for more
15 // details.
16 //
17 // You should have received a copy of the GNU Affero General Public License
18 // along with this program.  If not, see <http://www.gnu.org/licenses/>.
19
20
21 // This file implements a thorough parser for html5, meant to be used by a
22 // WYSIWYG editor.
23
24 // The implementation is a pretty direct implementation of the parsing algorithm
25 // described here:
26 //
27 //     http://www.w3.org/TR/html5/syntax.html
28 //
29 // except for some places marked "WHATWG" that are implemented as described here:
30 //
31 //     https://html.spec.whatwg.org/multipage/syntax.html
32 //
33 // This code passes all of the tests in the .dat files at:
34 //
35 //     https://github.com/JasonWoof/html5lib-tests/tree/patch-1/tree-construction
36
37
38 //////////////////////////
39 // how to use this code //
40 //////////////////////////
41 //
42 // See README.md for how to run this file in the browser or in node.js.
43 //
44 // This file exports a single useful function: parse, and some constants
45 // (see the bottom of this file for those.)
46 //
47 // Call it like this:
48 //
49 //     peach_parser.parse("<p><b>hi</p>")
50 //
51 // Or, if you don't want <html><head><body>/etc, do this:
52 //
53 //     peach_parser.parse("<p><b>hi</p>", {fragment: "body"})
54 //
55 // return value is an array of Nodes, A Node contains:
56 //     type: one of: "tag", "text", "comment", "doctype"
57 //     text: contents for text/comment nodes
58 //     attrs: object of attributes, eg {href: "#main"}
59 //     children: array of Nodes
60 //     namespace: one of: "html", "mathml", "svg"
61 //     parent: another Node or null
62
63 // This code is a work in progress, eg try search this file for "fixfull",
64 // "TODO" and "FIXME"
65
66
67 // Notes:  stacks/lists
68 //
69 // Jason was frequently confused by the terminology used to refer to different
70 // parts of the stacks and lists in the spec, so he made this chart to help keep
71 // his head straight:
72 //
73 // stacks grow downward (current element is index=0)
74 //
75 // example: open_els = [a, b, c, d, e, f, g]
76 //
77 // "grows downwards" means it's visualized like this: (index: el "names")
78 //
79 //   6: g "start of the list", "topmost", "first"
80 //   5: f
81 //   4: e "previous" (to d), "above", "before"
82 //   3: d   (previous/next are relative to this element)
83 //   2: c "next", "after", "lower", "below"
84 //   1: b
85 //   0: a "end of the list", "current node", "bottommost", "last"
86
87 if ((typeof module) !== 'undefined' && (module.exports != null)) {
88         context = 'module'
89         exports = module.exports
90 } else {
91         context = 'browser'
92         window.peach_parser = {}
93         exports = window.peach_parser
94 }
95
96 from_code_point = function (x) {
97         if (String.fromCodePoint != null) {
98                 return String.fromCodePoint(x)
99         } else {
100                 if (x <= 0xffff) {
101                         return String.fromCharCode(x)
102                 }
103                 x -= 0x10000
104                 return String.fromCharCode((x >> 10) + 0xd800, (x % 0x400) + 0xdc00)
105         }
106 }
107
108 // Each node is an obect of the Node class. Here are the Node types:
109 TYPE_TAG = 'tag' // name, {attributes}, [children]
110 TYPE_TEXT = 'text' // "text"
111 TYPE_COMMENT = 'comment'
112 TYPE_DOCTYPE = 'doctype'
113 // the following types are emited by the tokenizer, but shouldn't end up in the tree:
114 TYPE_START_TAG = 4 // name, [attributes ([key,value]...) in reverse order], [children]
115 TYPE_END_TAG = 5 // name
116 TYPE_EOF = 6
117 TYPE_AFE_MARKER = 7 // http://www.w3.org/TR/html5/syntax.html#reconstruct-the-active-formatting-elements
118 TYPE_AAA_BOOKMARK = 8 // http://www.w3.org/TR/html5/syntax.html#adoption-agency-algorithm
119
120 // namespace constants
121 NS_HTML = 'html'
122 NS_MATHML = 'mathml'
123 NS_SVG = 'svg'
124
125 // quirks mode constants
126 QUIRKS_NO = 'no'
127 QUIRKS_LIMITED = 'limited'
128 QUIRKS_YES = 'yes'
129
130 // queue up debug logs, so eg they can be shown only for tests that fail
131 g_debug_log = []
132 debug_log_reset = function () {
133         g_debug_log = []
134 }
135 debug_log = function (str) {
136         g_debug_log.push(str)
137 }
138 debug_log_each = function (cb) {
139         var i
140         for (i = 0; i < g_debug_log.length; ++i) {
141                 cb(g_debug_log[i])
142         }
143 }
144
145 prev_node_id = 0
146 function Node (type, args) {
147         if (args == null) {
148                 args = {}
149         }
150         this.type = type // one of the TYPE_* constants above
151         this.name = args.name != null ? args.name : '' // tag name
152         this.text = args.text != null ? args.text : '' // contents for text/comment nodes
153         this.attrs = args.attrs != null ? args.attrs : {}
154         this.children = args.children != null ? args.children : []
155         this.namespace = args.namespace != null ? args.namespace : NS_HTML
156         this.parent = args.parent != null ? args.parent : null
157         // private:
158         this.attrs_a = args.attr_k != null ? args.attr_k : [] // attrs in progress, TYPE_START_TAG only
159         this.token = args.token != null ? args.token : null
160         this.flags = args.flags != null ? args.flags : {}
161         if (args.id != null) {
162                 this.id = args.id + "+"
163         } else {
164                 this.id = "" + (++prev_node_id)
165         }
166 }
167
168 Node.prototype.acknowledge_self_closing = function () {
169         if (this.token != null) {
170                 this.token.flag('did_self_close', true)
171         } else {
172                 this.flag('did_self_close', true)
173         }
174 }
175
176 Node.prototype.flag = function (key, value) {
177         if (value != null) {
178                 this.flags[key] = value
179         } else {
180                 return this.flags[key]
181         }
182 }
183
184 // helpers: (only take args that are normally known when parser creates nodes)
185 new_open_tag = function (name) {
186         return new Node(TYPE_START_TAG, {name: name})
187 }
188 new_end_tag = function (name) {
189         return new Node(TYPE_END_TAG, {name: name})
190 }
191 new_element = function (name) {
192         return new Node(TYPE_TAG, {name: name})
193 }
194 new_text_node = function (txt) {
195         return new Node(TYPE_TEXT, {text: txt})
196 }
197 new_character_token = new_text_node
198 new_comment_token = function (txt) {
199         return new Node(TYPE_COMMENT, {text: txt})
200 }
201 new_doctype_token = function (name) {
202         return new Node(TYPE_DOCTYPE, {name: name})
203 }
204 new_eof_token = function () {
205         return new Node(TYPE_EOF)
206 }
207 new_afe_marker = function () {
208         return new Node(TYPE_AFE_MARKER)
209 }
210 new_aaa_bookmark = function () {
211         return new Node(TYPE_AAA_BOOKMARK)
212 }
213
214 lc_alpha = "abcdefghijklmnopqrstuvwxyz"
215 uc_alpha = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
216 digits = "0123456789"
217 alnum = lc_alpha + uc_alpha + digits
218 hex_chars = digits + "abcdefABCDEF"
219
220 is_uc_alpha = function (str) {
221         return str.length === 1 && uc_alpha.indexOf(str) > -1
222 }
223 is_lc_alpha = function (str) {
224         return str.length === 1 && lc_alpha.indexOf(str) > -1
225 }
226
227 // some SVG elements have dashes in them
228 tag_name_chars = alnum + "-"
229
230 // http://www.w3.org/TR/html5/infrastructure.html#space-character
231 space_chars = "\u0009\u000a\u000c\u000d\u0020"
232 is_space = function (txt) {
233         return txt.length === 1 && space_chars.indexOf(txt) > -1
234 }
235 is_space_tok = function (t) {
236         return t.type === TYPE_TEXT && t.text.length === 1 && space_chars.indexOf(t.text) > -1
237 }
238
239 is_input_hidden_tok = function (t) {
240         var i, a
241         if (t.type !== TYPE_START_TAG) {
242                 return false
243         }
244         for (i = 0; i < t.attrs_a.length; ++i) {
245                 a = t.attrs_a[i]
246                 if (a[0] === 'type') {
247                         if (a[1].toLowerCase() === 'hidden') {
248                                 return true
249                         }
250                         return false
251                 }
252         }
253         return false
254 }
255
256 // https://en.wikipedia.org/wiki/Whitespace_character#Unicode
257 whitespace_chars = "\u0009\u000a\u000b\u000c\u000d\u0020\u0085\u00a0\u1680\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200a\u2028\u2029\u202f\u205f\u3000"
258
259 unicode_fixes = {}
260 unicode_fixes[0x00] = "\uFFFD"
261 unicode_fixes[0x80] = "\u20AC"
262 unicode_fixes[0x82] = "\u201A"
263 unicode_fixes[0x83] = "\u0192"
264 unicode_fixes[0x84] = "\u201E"
265 unicode_fixes[0x85] = "\u2026"
266 unicode_fixes[0x86] = "\u2020"
267 unicode_fixes[0x87] = "\u2021"
268 unicode_fixes[0x88] = "\u02C6"
269 unicode_fixes[0x89] = "\u2030"
270 unicode_fixes[0x8A] = "\u0160"
271 unicode_fixes[0x8B] = "\u2039"
272 unicode_fixes[0x8C] = "\u0152"
273 unicode_fixes[0x8E] = "\u017D"
274 unicode_fixes[0x91] = "\u2018"
275 unicode_fixes[0x92] = "\u2019"
276 unicode_fixes[0x93] = "\u201C"
277 unicode_fixes[0x94] = "\u201D"
278 unicode_fixes[0x95] = "\u2022"
279 unicode_fixes[0x96] = "\u2013"
280 unicode_fixes[0x97] = "\u2014"
281 unicode_fixes[0x98] = "\u02DC"
282 unicode_fixes[0x99] = "\u2122"
283 unicode_fixes[0x9A] = "\u0161"
284 unicode_fixes[0x9B] = "\u203A"
285 unicode_fixes[0x9C] = "\u0153"
286 unicode_fixes[0x9E] = "\u017E"
287 unicode_fixes[0x9F] = "\u0178"
288
289 quirks_yes_pi_prefixes = [
290         "+//silmaril//dtd html pro v0r11 19970101//",
291         "-//as//dtd html 3.0 aswedit + extensions//",
292         "-//advasoft ltd//dtd html 3.0 aswedit + extensions//",
293         "-//ietf//dtd html 2.0 level 1//",
294         "-//ietf//dtd html 2.0 level 2//",
295         "-//ietf//dtd html 2.0 strict level 1//",
296         "-//ietf//dtd html 2.0 strict level 2//",
297         "-//ietf//dtd html 2.0 strict//",
298         "-//ietf//dtd html 2.0//",
299         "-//ietf//dtd html 2.1e//",
300         "-//ietf//dtd html 3.0//",
301         "-//ietf//dtd html 3.2 final//",
302         "-//ietf//dtd html 3.2//",
303         "-//ietf//dtd html 3//",
304         "-//ietf//dtd html level 0//",
305         "-//ietf//dtd html level 1//",
306         "-//ietf//dtd html level 2//",
307         "-//ietf//dtd html level 3//",
308         "-//ietf//dtd html strict level 0//",
309         "-//ietf//dtd html strict level 1//",
310         "-//ietf//dtd html strict level 2//",
311         "-//ietf//dtd html strict level 3//",
312         "-//ietf//dtd html strict//",
313         "-//ietf//dtd html//",
314         "-//metrius//dtd metrius presentational//",
315         "-//microsoft//dtd internet explorer 2.0 html strict//",
316         "-//microsoft//dtd internet explorer 2.0 html//",
317         "-//microsoft//dtd internet explorer 2.0 tables//",
318         "-//microsoft//dtd internet explorer 3.0 html strict//",
319         "-//microsoft//dtd internet explorer 3.0 html//",
320         "-//microsoft//dtd internet explorer 3.0 tables//",
321         "-//netscape comm. corp.//dtd html//",
322         "-//netscape comm. corp.//dtd strict html//",
323         "-//o'reilly and associates//dtd html 2.0//",
324         "-//o'reilly and associates//dtd html extended 1.0//",
325         "-//o'reilly and associates//dtd html extended relaxed 1.0//",
326         "-//sq//dtd html 2.0 hotmetal + extensions//",
327         "-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//",
328         "-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//",
329         "-//spyglass//dtd html 2.0 extended//",
330         "-//sun microsystems corp.//dtd hotjava html//",
331         "-//sun microsystems corp.//dtd hotjava strict html//",
332         "-//w3c//dtd html 3 1995-03-24//",
333         "-//w3c//dtd html 3.2 draft//",
334         "-//w3c//dtd html 3.2 final//",
335         "-//w3c//dtd html 3.2//",
336         "-//w3c//dtd html 3.2s draft//",
337         "-//w3c//dtd html 4.0 frameset//",
338         "-//w3c//dtd html 4.0 transitional//",
339         "-//w3c//dtd html experimental 19960712//",
340         "-//w3c//dtd html experimental 970421//",
341         "-//w3c//dtd w3 html//",
342         "-//w3o//dtd w3 html 3.0//",
343         "-//webtechs//dtd mozilla html 2.0//",
344         "-//webtechs//dtd mozilla html//",
345 ]
346
347 // These are the character references that don't need a terminating semicolon
348 // min length: 2, max: 6, none are a prefix of any other.
349 legacy_char_refs = {
350         Aacute: 'Á', aacute: 'á', Acirc: 'Â', acirc: 'â', acute: '´', AElig: 'Æ',
351         aelig: 'æ', Agrave: 'À', agrave: 'à', AMP: '&', amp: '&', Aring: 'Å',
352         aring: 'å', Atilde: 'Ã', atilde: 'ã', Auml: 'Ä', auml: 'ä', brvbar: '¦',
353         Ccedil: 'Ç', ccedil: 'ç', cedil: '¸', cent: '¢', COPY: '©', copy: '©',
354         curren: '¤', deg: '°', divide: '÷', Eacute: 'É', eacute: 'é', Ecirc: 'Ê',
355         ecirc: 'ê', Egrave: 'È', egrave: 'è', ETH: 'Ð', eth: 'ð', Euml: 'Ë',
356         euml: 'ë', frac12: '½', frac14: '¼', frac34: '¾', GT: '>', gt: '>',
357         Iacute: 'Í', iacute: 'í', Icirc: 'Î', icirc: 'î', iexcl: '¡', Igrave: 'Ì',
358         igrave: 'ì', iquest: '¿', Iuml: 'Ï', iuml: 'ï', laquo: '«', LT: '<',
359         lt: '<', macr: '¯', micro: 'µ', middot: '·', nbsp: "\u00a0", not: '¬',
360         Ntilde: 'Ñ', ntilde: 'ñ', Oacute: 'Ó', oacute: 'ó', Ocirc: 'Ô', ocirc: 'ô',
361         Ograve: 'Ò', ograve: 'ò', ordf: 'ª', ordm: 'º', Oslash: 'Ø', oslash: 'ø',
362         Otilde: 'Õ', otilde: 'õ', Ouml: 'Ö', ouml: 'ö', para: '¶', plusmn: '±',
363         pound: '£', QUOT: '"', quot: '"', raquo: '»', REG: '®', reg: '®', sect: '§',
364         shy: '­', sup1: '¹', sup2: '²', sup3: '³', szlig: 'ß', THORN: 'Þ', thorn: 'þ',
365         times: '×', Uacute: 'Ú', uacute: 'ú', Ucirc: 'Û', ucirc: 'û', Ugrave: 'Ù',
366         ugrave: 'ù', uml: '¨', Uuml: 'Ü', uuml: 'ü', Yacute: 'Ý', yacute: 'ý',
367         yen: '¥', yuml: 'ÿ'
368 }
369
370 //void_elements = ['area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input', 'keygen', 'link', 'meta', 'param', 'source', 'track', 'wbr']
371 //raw_text_elements = ['script', 'style']
372 //escapable_raw_text_elements = ['textarea', 'title']
373 // http://www.w3.org/TR/SVG/ 1.1 (Second Edition)
374 svg_elements = [
375         'a', 'altGlyph', 'altGlyphDef', 'altGlyphItem', 'animate', 'animateColor',
376         'animateMotion', 'animateTransform', 'circle', 'clipPath', 'color-profile',
377         'cursor', 'defs', 'desc', 'ellipse', 'feBlend', 'feColorMatrix',
378         'feComponentTransfer', 'feComposite', 'feConvolveMatrix',
379         'feDiffuseLighting', 'feDisplacementMap', 'feDistantLight', 'feFlood',
380         'feFuncA', 'feFuncB', 'feFuncG', 'feFuncR', 'feGaussianBlur', 'feImage',
381         'feMerge', 'feMergeNode', 'feMorphology', 'feOffset', 'fePointLight',
382         'feSpecularLighting', 'feSpotLight', 'feTile', 'feTurbulence', 'filter',
383         'font', 'font-face', 'font-face-format', 'font-face-name', 'font-face-src',
384         'font-face-uri', 'foreignObject', 'g', 'glyph', 'glyphRef', 'hkern',
385         'image', 'line', 'linearGradient', 'marker', 'mask', 'metadata',
386         'missing-glyph', 'mpath', 'path', 'pattern', 'polygon', 'polyline',
387         'radialGradient', 'rect', 'script', 'set', 'stop', 'style', 'svg',
388         'switch', 'symbol', 'text', 'textPath', 'title', 'tref', 'tspan', 'use',
389         'view', 'vkern'
390 ]
391
392 // http://www.w3.org/TR/MathML/ Version 3.0 2nd Edition
393 mathml_elements = [
394         'abs', 'and', 'annotation', 'annotation-xml', 'apply', 'approx', 'arccos',
395         'arccosh', 'arccot', 'arccoth', 'arccsc', 'arccsch', 'arcsec', 'arcsech',
396         'arcsin', 'arcsinh', 'arctan', 'arctanh', 'arg', 'bind', 'bvar', 'card',
397         'cartesianproduct', 'cbytes', 'ceiling', 'cerror', 'ci', 'cn', 'codomain',
398         'complexes', 'compose', 'condition', 'conjugate', 'cos', 'cosh', 'cot',
399         'coth', 'cs', 'csc', 'csch', 'csymbol', 'curl', 'declare', 'degree',
400         'determinant', 'diff', 'divergence', 'divide', 'domain',
401         'domainofapplication', 'emptyset', 'eq', 'equivalent', 'eulergamma',
402         'exists', 'exp', 'exponentiale', 'factorial', 'factorof', 'false', 'floor',
403         'fn', 'forall', 'gcd', 'geq', 'grad', 'gt', 'ident', 'image', 'imaginary',
404         'imaginaryi', 'implies', 'in', 'infinity', 'int', 'integers', 'intersect',
405         'interval', 'inverse', 'lambda', 'laplacian', 'lcm', 'leq', 'limit',
406         'list', 'ln', 'log', 'logbase', 'lowlimit', 'lt', 'maction', 'maligngroup',
407         'malignmark', 'math', 'matrix', 'matrixrow', 'max', 'mean', 'median',
408         'menclose', 'merror', 'mfenced', 'mfrac', 'mglyph', 'mi', 'mi', 'min',
409         'minus', 'mlabeledtr', 'mlongdiv', 'mmultiscripts', 'mn', 'mo', 'mode',
410         'moment', 'momentabout', 'mover', 'mpadded', 'mphantom', 'mprescripts',
411         'mroot', 'mrow', 'ms', 'mscarries', 'mscarry', 'msgroup', 'msline',
412         'mspace', 'msqrt', 'msrow', 'mstack', 'mstyle', 'msub', 'msubsup', 'msup',
413         'mtable', 'mtd', 'mtext', 'mtr', 'munder', 'munderover', 'naturalnumbers',
414         'neq', 'none', 'not', 'notanumber', 'notin', 'notprsubset', 'notsubset',
415         'or', 'otherwise', 'outerproduct', 'partialdiff', 'pi', 'piece',
416         'piecewise', 'plus', 'power', 'primes', 'product', 'prsubset', 'quotient',
417         'rationals', 'real', 'reals', 'reln', 'rem', 'root', 'scalarproduct',
418         'sdev', 'sec', 'sech', 'selector', 'semantics', 'sep', 'set', 'setdiff',
419         'share', 'sin', 'sinh', 'span', 'subset', 'sum', 'tan', 'tanh', 'tendsto',
420         'times', 'transpose', 'true', 'union', 'uplimit', 'variance', 'vector',
421         'vectorproduct', 'xor'
422 ]
423 // foreign_elements = [svg_elements..., mathml_elements...]
424 //normal_elements = All other allowed HTML elements are normal elements.
425
426 special_elements = {
427         // HTML:
428         address: NS_HTML, applet: NS_HTML, area: NS_HTML, article: NS_HTML,
429         aside: NS_HTML, base: NS_HTML, basefont: NS_HTML, bgsound: NS_HTML,
430         blockquote: NS_HTML, body: NS_HTML, br: NS_HTML, button: NS_HTML,
431         caption: NS_HTML, center: NS_HTML, col: NS_HTML, colgroup: NS_HTML, dd: NS_HTML,
432         details: NS_HTML, dir: NS_HTML, div: NS_HTML, dl: NS_HTML, dt: NS_HTML,
433         embed: NS_HTML, fieldset: NS_HTML, figcaption: NS_HTML, figure: NS_HTML,
434         footer: NS_HTML, form: NS_HTML, frame: NS_HTML, frameset: NS_HTML, h1: NS_HTML,
435         h2: NS_HTML, h3: NS_HTML, h4: NS_HTML, h5: NS_HTML, h6: NS_HTML, head: NS_HTML,
436         header: NS_HTML, hgroup: NS_HTML, hr: NS_HTML, html: NS_HTML, iframe: NS_HTML,
437         img: NS_HTML, input: NS_HTML, isindex: NS_HTML, li: NS_HTML, link: NS_HTML,
438         listing: NS_HTML, main: NS_HTML, marquee: NS_HTML,
439
440         menu: NS_HTML,menuitem: NS_HTML, // WHATWG adds these
441
442         meta: NS_HTML, nav: NS_HTML, noembed: NS_HTML, noframes: NS_HTML,
443         noscript: NS_HTML, object: NS_HTML, ol: NS_HTML, p: NS_HTML, param: NS_HTML,
444         plaintext: NS_HTML, pre: NS_HTML, script: NS_HTML, section: NS_HTML,
445         select: NS_HTML, source: NS_HTML, style: NS_HTML, summary: NS_HTML,
446         table: NS_HTML, tbody: NS_HTML, td: NS_HTML, template: NS_HTML,
447         textarea: NS_HTML, tfoot: NS_HTML, th: NS_HTML, thead: NS_HTML, title: NS_HTML,
448         tr: NS_HTML, track: NS_HTML, ul: NS_HTML, wbr: NS_HTML, xmp: NS_HTML,
449
450         // MathML: 
451         mi: NS_MATHML, mo: NS_MATHML, mn: NS_MATHML, ms: NS_MATHML, mtext: NS_MATHML,
452         'annotation-xml': NS_MATHML,
453
454         // SVG: 
455         foreignObject: NS_SVG, desc: NS_SVG, title: NS_SVG
456 }
457
458 formatting_elements = {
459         a: true, b: true, big: true, code: true, em: true, font: true, i: true,
460         nobr: true, s: true, small: true, strike: true, strong: true, tt: true,
461         u: true
462 }
463
464 mathml_text_integration = {
465         mi: NS_MATHML, mo: NS_MATHML, mn: NS_MATHML, ms: NS_MATHML, mtext: NS_MATHML
466 }
467 is_mathml_text_integration_point = function (el) {
468         return mathml_text_integration[el.name] === el.namespace
469 }
470 is_html_integration = function (el) { // DON'T PASS A TOKEN
471         if (el.namespace === NS_MATHML) {
472                 if (el.name === 'annotation-xml') {
473                         if (el.attrs.encoding != null) {
474                                 if (el.attrs.encoding.toLowerCase() === 'text/html') {
475                                         return true
476                                 }
477                                 if (el.attrs.encoding.toLowerCase() === 'application/xhtml+xml') {
478                                         return true
479                                 }
480                         }
481                 }
482                 return false
483         }
484         if (el.namespace === NS_SVG) {
485                 if (el.name === 'foreignObject' || el.name === 'desc' || el.name === 'title') {
486                         return true
487                 }
488         }
489         return false
490 }
491
492 h_tags = {
493         h1: NS_HTML, h2: NS_HTML, h3: NS_HTML, h4: NS_HTML, h5: NS_HTML, h6: NS_HTML
494 }
495
496 foster_parenting_targets = {
497         table: NS_HTML,
498         tbody: NS_HTML,
499         tfoot: NS_HTML,
500         thead: NS_HTML,
501         tr: NS_HTML
502 }
503
504 end_tag_implied = {
505         dd: NS_HTML,
506         dt: NS_HTML,
507         li: NS_HTML,
508         option: NS_HTML,
509         optgroup: NS_HTML,
510         p: NS_HTML,
511         rb: NS_HTML,
512         rp: NS_HTML,
513         rt: NS_HTML,
514         rtc: NS_HTML
515 }
516
517 el_is_special = function (e) {
518         return special_elements[e.name] === e.namespace
519 }
520
521 adp_els = { address: NS_HTML, div: NS_HTML, p: NS_HTML }
522 el_is_special_not_adp = function (el) {
523         return special_elements[el.name] === el.namespace && adp_els[el.name] !== el.namespace
524 }
525
526 svg_name_fixes = {
527         altglyph: 'altGlyph',
528         altglyphdef: 'altGlyphDef',
529         altglyphitem: 'altGlyphItem',
530         animatecolor: 'animateColor',
531         animatemotion: 'animateMotion',
532         animatetransform: 'animateTransform',
533         clippath: 'clipPath',
534         feblend: 'feBlend',
535         fecolormatrix: 'feColorMatrix',
536         fecomponenttransfer: 'feComponentTransfer',
537         fecomposite: 'feComposite',
538         feconvolvematrix: 'feConvolveMatrix',
539         fediffuselighting: 'feDiffuseLighting',
540         fedisplacementmap: 'feDisplacementMap',
541         fedistantlight: 'feDistantLight',
542         fedropshadow: 'feDropShadow',
543         feflood: 'feFlood',
544         fefunca: 'feFuncA',
545         fefuncb: 'feFuncB',
546         fefuncg: 'feFuncG',
547         fefuncr: 'feFuncR',
548         fegaussianblur: 'feGaussianBlur',
549         feimage: 'feImage',
550         femerge: 'feMerge',
551         femergenode: 'feMergeNode',
552         femorphology: 'feMorphology',
553         feoffset: 'feOffset',
554         fepointlight: 'fePointLight',
555         fespecularlighting: 'feSpecularLighting',
556         fespotlight: 'feSpotLight',
557         fetile: 'feTile',
558         feturbulence: 'feTurbulence',
559         foreignobject: 'foreignObject',
560         glyphref: 'glyphRef',
561         lineargradient: 'linearGradient',
562         radialgradient: 'radialGradient',
563         textpath: 'textPath'
564 }
565 svg_attribute_fixes = {
566         attributename: 'attributeName',
567         attributetype: 'attributeType',
568         basefrequency: 'baseFrequency',
569         baseprofile: 'baseProfile',
570         calcmode: 'calcMode',
571         clippathunits: 'clipPathUnits',
572         contentscripttype: 'contentScriptType',
573         contentstyletype: 'contentStyleType',
574         diffuseconstant: 'diffuseConstant',
575         edgemode: 'edgeMode',
576         externalresourcesrequired: 'externalResourcesRequired',
577         // WHATWG removes this: filterres: 'filterRes',
578         filterunits: 'filterUnits',
579         glyphref: 'glyphRef',
580         gradienttransform: 'gradientTransform',
581         gradientunits: 'gradientUnits',
582         kernelmatrix: 'kernelMatrix',
583         kernelunitlength: 'kernelUnitLength',
584         keypoints: 'keyPoints',
585         keysplines: 'keySplines',
586         keytimes: 'keyTimes',
587         lengthadjust: 'lengthAdjust',
588         limitingconeangle: 'limitingConeAngle',
589         markerheight: 'markerHeight',
590         markerunits: 'markerUnits',
591         markerwidth: 'markerWidth',
592         maskcontentunits: 'maskContentUnits',
593         maskunits: 'maskUnits',
594         numoctaves: 'numOctaves',
595         pathlength: 'pathLength',
596         patterncontentunits: 'patternContentUnits',
597         patterntransform: 'patternTransform',
598         patternunits: 'patternUnits',
599         pointsatx: 'pointsAtX',
600         pointsaty: 'pointsAtY',
601         pointsatz: 'pointsAtZ',
602         preservealpha: 'preserveAlpha',
603         preserveaspectratio: 'preserveAspectRatio',
604         primitiveunits: 'primitiveUnits',
605         refx: 'refX',
606         refy: 'refY',
607         repeatcount: 'repeatCount',
608         repeatdur: 'repeatDur',
609         requiredextensions: 'requiredExtensions',
610         requiredfeatures: 'requiredFeatures',
611         specularconstant: 'specularConstant',
612         specularexponent: 'specularExponent',
613         spreadmethod: 'spreadMethod',
614         startoffset: 'startOffset',
615         stddeviation: 'stdDeviation',
616         stitchtiles: 'stitchTiles',
617         surfacescale: 'surfaceScale',
618         systemlanguage: 'systemLanguage',
619         tablevalues: 'tableValues',
620         targetx: 'targetX',
621         targety: 'targetY',
622         textlength: 'textLength',
623         viewbox: 'viewBox',
624         viewtarget: 'viewTarget',
625         xchannelselector: 'xChannelSelector',
626         ychannelselector: 'yChannelSelector',
627         zoomandpan: 'zoomAndPan'
628 }
629 foreign_attr_fixes = {
630         'xlink:actuate': 'xlink actuate',
631         'xlink:arcrole': 'xlink arcrole',
632         'xlink:href': 'xlink href',
633         'xlink:role': 'xlink role',
634         'xlink:show': 'xlink show',
635         'xlink:title': 'xlink title',
636         'xlink:type': 'xlink type',
637         'xml:base': 'xml base',
638         'xml:lang': 'xml lang',
639         'xml:space': 'xml space',
640         'xmlns': 'xmlns',
641         'xmlns:xlink': 'xmlns xlink'
642 }
643 adjust_mathml_attributes = function (t) {
644         var i, a
645         for (i = 0; i < t.attrs_a.length; ++i) {
646                 a = t.attrs_a[i]
647                 if (a[0] === 'definitionurl') {
648                         a[0] = 'definitionURL'
649                 }
650         }
651 }
652 adjust_svg_attributes = function (t) {
653         var i, a
654         for (i = 0; i < t.attrs_a.length; ++i) {
655                 a = t.attrs_a[i]
656                 if (svg_attribute_fixes[a[0]] != null) {
657                         a[0] = svg_attribute_fixes[a[0]]
658                 }
659         }
660 }
661 adjust_foreign_attributes = function (t) {
662         // fixfull
663         var i, a
664         for (i = 0; i < t.attrs_a.length; ++i) {
665                 a = t.attrs_a[i]
666                 if (foreign_attr_fixes[a[0]] != null) {
667                         a[0] = foreign_attr_fixes[a[0]]
668                 }
669         }
670 }
671
672 // decode_named_char_ref()
673 //
674 // The list of named character references is _huge_ so if we're running in a
675 // browser, we get the browser to decode them, rather than increasing the code
676 // size to include the table.
677 if (context === 'module') {
678         _decode_named_char_ref = require('./parser_no_browser_helper.js')
679 } else {
680         decode_named_char_ref_el = document.createElement('textarea')
681         _decode_named_char_ref = function (txt) {
682                 var decoded
683                 txt = "&" + txt + ";"
684                 decode_named_char_ref_el.innerHTML = txt
685                 decoded = decode_named_char_ref_el.value
686                 if (decoded === txt) {
687                         return null
688                 }
689                 return decoded
690         }
691 }
692 // Pass the name of a named entity _that has a terminating semicolon_
693 // Entities without terminating semicolons should use legacy_char_refs[]
694 // Do not include the "&" or ";" in your argument, eg pass "alpha"
695 decode_named_char_ref_cache = {}
696 decode_named_char_ref = function (txt) {
697         var decoded
698         decoded = decode_named_char_ref_cache[txt]
699         if (decoded != null) {
700                 return decoded
701         }
702         decoded = _decode_named_char_ref(txt)
703         return decode_named_char_ref_cache[txt] = decoded
704 }
705
706 parse_html = function (args_html, args) {
707         var adjusted_current_node, adjusted_insertion_location, adoption_agency, afe, afe_push, afe_push_marker, button_scopers, clear_afe_to_marker, clear_stack_to_table_body_context, clear_stack_to_table_context, clear_stack_to_table_row_context, clear_to_table_body_stopers, clear_to_table_row_stopers, clear_to_table_stopers, close_p_element, close_p_if_in_button_scope, close_the_cell, context_element, cur, doc, eat_next_token_if_newline, el_is_in_scope, flag_foster_parenting, flag_fragment_parsing, flag_frameset_ok, flag_parsing, flag_scripting, form_element_pointer, fragment_root, generate_implied_end_tags, has_color_face_or_size, head_element_pointer, in_body_any_other_end_tag, in_foreign_content, in_foreign_content_end_script, in_foreign_content_other_start, ins_mode, ins_mode_after_after_body, ins_mode_after_after_frameset, ins_mode_after_body, ins_mode_after_frameset, ins_mode_after_head, ins_mode_after_head_else, ins_mode_before_head, ins_mode_before_html, ins_mode_in_body, ins_mode_in_caption, ins_mode_in_cell, ins_mode_in_column_group, ins_mode_in_frameset, ins_mode_in_head, ins_mode_in_head_else, ins_mode_in_head_noscript, ins_mode_in_head_noscript_else, ins_mode_in_row, ins_mode_in_select, ins_mode_in_select_in_table, ins_mode_in_table, ins_mode_in_table_body, ins_mode_in_table_else, ins_mode_in_table_text, ins_mode_in_template, ins_mode_initial, ins_mode_text, insert_character, insert_comment, insert_foreign_element, insert_html_element, is_appropriate_end_tag, is_in_button_scope, is_in_li_scope, is_in_scope, is_in_scope_x, is_in_scope_x_y, is_in_select_scope, is_in_table_scope, is_quirks_limited_doctype, is_quirks_yes_doctype, li_scopers, open_els, original_ins_mode, parse_character_reference, parse_error, parse_generic_raw_text, parse_generic_rcdata_text, parse_init, parse_main_loop, pending_table_character_tokens, process_token, reconstruct_afe, ref, reset_ins_mode, standard_scopers, stop_parsing, table_scopers, template_ins_modes, template_tag_is_open, temporary_buffer, tok_cur_tag, tok_state, tok_state_after_attribute_name, tok_state_after_attribute_value_quoted, tok_state_after_doctype_name, tok_state_after_doctype_public_identifier, tok_state_after_doctype_public_keyword, tok_state_after_doctype_system_identifier, tok_state_after_doctype_system_keyword, tok_state_attribute_name, tok_state_attribute_value_double_quoted, tok_state_attribute_value_single_quoted, tok_state_attribute_value_unquoted, tok_state_before_attribute_name, tok_state_before_attribute_value, tok_state_before_doctype_name, tok_state_before_doctype_public_identifier, tok_state_before_doctype_system_identifier, tok_state_between_doctype_public_and_system_identifiers, tok_state_bogus_comment, tok_state_bogus_doctype, tok_state_cdata_section, tok_state_comment, tok_state_comment_end, tok_state_comment_end_bang, tok_state_comment_end_dash, tok_state_comment_start, tok_state_comment_start_dash, tok_state_data, tok_state_doctype, tok_state_doctype_name, tok_state_doctype_public_identifier_double_quoted, tok_state_doctype_public_identifier_single_quoted, tok_state_doctype_system_identifier_double_quoted, tok_state_doctype_system_identifier_single_quoted, tok_state_end_tag_open, tok_state_markup_declaration_open, tok_state_plaintext, tok_state_rawtext, tok_state_rawtext_end_tag_name, tok_state_rawtext_end_tag_open, tok_state_rawtext_less_than_sign, tok_state_rcdata, tok_state_rcdata_end_tag_name, tok_state_rcdata_end_tag_open, tok_state_rcdata_less_than_sign, tok_state_script_data, tok_state_script_data_double_escape_end, tok_state_script_data_double_escape_start, tok_state_script_data_double_escaped, tok_state_script_data_double_escaped_dash, tok_state_script_data_double_escaped_dash_dash, tok_state_script_data_double_escaped_less_than_sign, tok_state_script_data_end_tag_name, tok_state_script_data_end_tag_open, tok_state_script_data_escape_start, tok_state_script_data_escape_start_dash, tok_state_script_data_escaped, tok_state_script_data_escaped_dash, tok_state_script_data_escaped_dash_dash, tok_state_script_data_escaped_end_tag_name, tok_state_script_data_escaped_end_tag_open, tok_state_script_data_escaped_less_than_sign, tok_state_script_data_less_than_sign, tok_state_self_closing_start_tag, tok_state_tag_name, tok_state_tag_open, token_to_element, txt
708         if (args == null) {
709                 args = {}
710         }
711         txt = null
712         cur = null // index of next char in txt to be parsed
713         // declare doc and tokenizer variables so they're in scope below
714         doc = null
715         open_els = null // stack of open elements
716         afe = null // active formatting elements
717         template_ins_modes = null
718         ins_mode = null
719         original_ins_mode = null
720         tok_state = null
721         tok_cur_tag = null // partially parsed tag
722         flag_scripting = null
723         flag_frameset_ok = null
724         flag_parsing = null
725         flag_foster_parenting = null
726         form_element_pointer = null
727         temporary_buffer = null
728         pending_table_character_tokens = null
729         head_element_pointer = null
730         flag_fragment_parsing = null
731         context_element = null
732
733         stop_parsing = function () {
734                 flag_parsing = false
735         }
736
737         parse_error = function () {
738                 if (args.error_cb != null) {
739                         args.error_cb(cur)
740                 }
741         }
742
743         // http://www.w3.org/TR/html5/syntax.html#push-onto-the-list-of-active-formatting-elements
744         // "Noah's Ark clause" but with three
745         afe_push = function (new_el) {
746                 var attrs_match, el, i, j, k, len, matches, ref, ref1, v
747                 matches = 0
748                 for (i = 0; i < afe.length; ++i) {
749                         el = afe[i]
750                         if (el.type === TYPE_AFE_MARKER) {
751                                 break
752                         }
753                         if (el.name === new_el.name && el.namespace === new_el.namespace) {
754                                 attrs_match = true
755                                 for (k in el.attrs) {
756                                         v = el.attrs[k]
757                                         if (new_el.attrs[k] !== v) {
758                                                 attrs_match = false
759                                                 break
760                                         }
761                                 }
762                                 if (attrs_match) {
763                                         for (k in new_el.attrs) {
764                                                 v = new_el.attrs[k]
765                                                 if (el.attrs[k] !== v) {
766                                                         attrs_match = false
767                                                         break
768                                                 }
769                                         }
770                                 }
771                                 if (attrs_match) {
772                                         matches += 1
773                                         if (matches === 3) {
774                                                 afe.splice(i, 1)
775                                                 break
776                                         }
777                                 }
778                         }
779                 }
780                 afe.unshift(new_el)
781         }
782
783         afe_push_marker = function () {
784                 afe.unshift(new_afe_marker())
785         }
786
787         // the functions below impliment the Tree Contstruction algorithm
788         // http://www.w3.org/TR/html5/syntax.html#tree-construction
789
790         // But first... the helpers
791         template_tag_is_open = function () {
792                 var i, el
793                 for (i = 0; i < open_els.length; ++i) {
794                         el = open_els[i]
795                         if (el.name === 'template' && el.namespace === NS_HTML) {
796                                 return true
797                         }
798                 }
799                 return false
800         }
801         is_in_scope_x = function (tag_name, scope, namespace) {
802                 var i, el
803                 for (i = 0; i < open_els.length; ++i) {
804                         el = open_els[i]
805                         if (el.name === tag_name && (namespace === null || namespace === el.namespace)) {
806                                 return true
807                         }
808                         if (scope[el.name] === el.namespace) {
809                                 return false
810                         }
811                 }
812                 return false
813         }
814         is_in_scope_x_y = function (tag_name, scope, scope2, namespace) {
815                 var i, el
816                 for (i = 0; i < open_els.length; ++i) {
817                         el = open_els[i]
818                         if (el.name === tag_name && (namespace === null || namespace === el.namespace)) {
819                                 return true
820                         }
821                         if (scope[el.name] === el.namespace) {
822                                 return false
823                         }
824                         if (scope2[el.name] === el.namespace) {
825                                 return false
826                         }
827                 }
828                 return false
829         }
830         standard_scopers = {
831                 applet: NS_HTML, caption: NS_HTML, html: NS_HTML, table: NS_HTML,
832                 td: NS_HTML, th: NS_HTML, marquee: NS_HTML, object: NS_HTML,
833                 template: NS_HTML,
834
835                 mi: NS_MATHML, mo: NS_MATHML, mn: NS_MATHML, ms: NS_MATHML,
836                 mtext: NS_MATHML, 'annotation-xml': NS_MATHML,
837
838                 foreignObject: NS_SVG, desc: NS_SVG, title: NS_SVG
839         }
840         button_scopers = { button: NS_HTML }
841         li_scopers = { ol: NS_HTML, ul: NS_HTML }
842         table_scopers = { html: NS_HTML, table: NS_HTML, template: NS_HTML }
843         is_in_scope = function (tag_name, namespace) {
844                 if (namespace == null) {
845                         namespace = null
846                 }
847                 return is_in_scope_x(tag_name, standard_scopers, namespace)
848         }
849         is_in_button_scope = function (tag_name, namespace) {
850                 if (namespace == null) {
851                         namespace = null
852                 }
853                 return is_in_scope_x_y(tag_name, standard_scopers, button_scopers, namespace)
854         }
855         is_in_table_scope = function (tag_name, namespace) {
856                 if (namespace == null) {
857                         namespace = null
858                 }
859                 return is_in_scope_x(tag_name, table_scopers, namespace)
860         }
861         // aka is_in_list_item_scope
862         is_in_li_scope = function (tag_name, namespace) {
863                 if (namespace == null) {
864                         namespace = null
865                 }
866                 return is_in_scope_x_y(tag_name, standard_scopers, li_scopers, namespace)
867         }
868         is_in_select_scope = function (tag_name, namespace) {
869                 var i, t
870                 if (namespace == null) {
871                         namespace = null
872                 }
873                 for (i = 0; i < open_els.length; ++i) {
874                         t = open_els[i]
875                         if (t.name === tag_name && (namespace === null || namespace === t.namespace)) {
876                                 return true
877                         }
878                         if (t.namespace !== NS_HTML && t.name !== 'optgroup' && t.name !== 'option') {
879                                 return false
880                         }
881                 }
882                 return false
883         }
884         // this checks for a particular element, not by name
885         // this requires a namespace match
886         el_is_in_scope = function (needle) {
887                 var i
888                 for (i = 0; i < open_els.length; ++i) {
889                         el = open_els[i]
890                         if (el === needle) {
891                                 return true
892                         }
893                         if (standard_scopers[el.name] === el.namespace) {
894                                 return false
895                         }
896                 }
897                 return false
898         }
899
900         clear_to_table_stopers = {
901                 'table': true,
902                 'template': true,
903                 'html': true
904         }
905         clear_stack_to_table_context = function () {
906                 while (true) {
907                         if (clear_to_table_stopers[open_els[0].name] != null) {
908                                 break
909                         }
910                         open_els.shift()
911                 }
912         }
913         clear_to_table_body_stopers = {
914                 tbody: NS_HTML,
915                 tfoot: NS_HTML,
916                 thead: NS_HTML,
917                 template: NS_HTML,
918                 html: NS_HTML
919         }
920         clear_stack_to_table_body_context = function () {
921                 while (true) {
922                         if (clear_to_table_body_stopers[open_els[0].name] === open_els[0].namespace) {
923                                 break
924                         }
925                         open_els.shift()
926                 }
927         }
928         clear_to_table_row_stopers = {
929                 'tr': true,
930                 'template': true,
931                 'html': true
932         }
933         clear_stack_to_table_row_context = function () {
934                 while (true) {
935                         if (clear_to_table_row_stopers[open_els[0].name] != null) {
936                                 break
937                         }
938                         open_els.shift()
939                 }
940         }
941         clear_afe_to_marker = function () {
942                 var el
943                 while (true) {
944                         if (!(afe.length > 0)) { // this happens in fragment case, ?spec error
945                                 return
946                         }
947                         el = afe.shift()
948                         if (el.type === TYPE_AFE_MARKER) {
949                                 return
950                         }
951                 }
952         }
953
954         // 8.2.3.1 ...
955         // http://www.w3.org/TR/html5/syntax.html#reset-the-insertion-mode-appropriately
956         reset_ins_mode = function () {
957                 var ancestor, ancestor_i, last, node, node_i
958                 // 1. Let last be false.
959                 last = false
960                 // 2. Let node be the last node in the stack of open elements.
961                 node_i = 0
962                 node = open_els[node_i]
963                 // 3. Loop: If node is the first node in the stack of open elements,
964                 // then set last to true, and, if the parser was originally created as
965                 // part of the HTML fragment parsing algorithm (fragment case) set node
966                 // to the context element.
967                 while (true) {
968                         if (node_i === open_els.length - 1) {
969                                 last = true
970                                 if (flag_fragment_parsing) {
971                                         node = context_element
972                                 }
973                         }
974                         // 4. If node is a select element, run these substeps:
975                         if (node.name === 'select' && node.namespace === NS_HTML) {
976                                 // 1. If last is true, jump to the step below labeled done.
977                                 if (!last) {
978                                         // 2. Let ancestor be node.
979                                         ancestor_i = node_i
980                                         ancestor = node
981                                         // 3. Loop: If ancestor is the first node in the stack of
982                                         // open elements, jump to the step below labeled done.
983                                         while (true) {
984                                                 if (ancestor_i === open_els.length - 1) {
985                                                         break
986                                                 }
987                                                 // 4. Let ancestor be the node before ancestor in the stack
988                                                 // of open elements.
989                                                 ancestor_i += 1
990                                                 ancestor = open_els[ancestor_i]
991                                                 // 5. If ancestor is a template node, jump to the step below
992                                                 // labeled done.
993                                                 if (ancestor.name === 'template' && ancestor.namespace === NS_HTML) {
994                                                         break
995                                                 }
996                                                 // 6. If ancestor is a table node, switch the insertion mode
997                                                 // to "in select in table" and abort these steps.
998                                                 if (ancestor.name === 'table' && ancestor.namespace === NS_HTML) {
999                                                         ins_mode = ins_mode_in_select_in_table
1000                                                         return
1001                                                 }
1002                                                 // 7. Jump back to the step labeled loop.
1003                                         }
1004                                 }
1005                                 // 8. Done: Switch the insertion mode to "in select" and abort
1006                                 // these steps.
1007                                 ins_mode = ins_mode_in_select
1008                                 return
1009                         }
1010                         // 5. If node is a td or th element and last is false, then switch
1011                         // the insertion mode to "in cell" and abort these steps.
1012                         if ((node.name === 'td' || node.name === 'th') && node.namespace === NS_HTML && last === false) {
1013                                 ins_mode = ins_mode_in_cell
1014                                 return
1015                         }
1016                         // 6. If node is a tr element, then switch the insertion mode to "in
1017                         // row" and abort these steps.
1018                         if (node.name === 'tr' && node.namespace === NS_HTML) {
1019                                 ins_mode = ins_mode_in_row
1020                                 return
1021                         }
1022                         // 7. If node is a tbody, thead, or tfoot element, then switch the
1023                         // insertion mode to "in table body" and abort these steps.
1024                         if ((node.name === 'tbody' || node.name === 'thead' || node.name === 'tfoot') && node.namespace === NS_HTML) {
1025                                 ins_mode = ins_mode_in_table_body
1026                                 return
1027                         }
1028                         // 8. If node is a caption element, then switch the insertion mode
1029                         // to "in caption" and abort these steps.
1030                         if (node.name === 'caption' && node.namespace === NS_HTML) {
1031                                 ins_mode = ins_mode_in_caption
1032                                 return
1033                         }
1034                         // 9. If node is a colgroup element, then switch the insertion mode
1035                         // to "in column group" and abort these steps.
1036                         if (node.name === 'colgroup' && node.namespace === NS_HTML) {
1037                                 ins_mode = ins_mode_in_column_group
1038                                 return
1039                         }
1040                         // 10. If node is a table element, then switch the insertion mode to
1041                         // "in table" and abort these steps.
1042                         if (node.name === 'table' && node.namespace === NS_HTML) {
1043                                 ins_mode = ins_mode_in_table
1044                                 return
1045                         }
1046                         // 11. If node is a template element, then switch the insertion mode
1047                         // to the current template insertion mode and abort these steps.
1048                         if (node.name === 'template' && node.namespace === NS_HTML) {
1049                                 ins_mode = template_ins_modes[0]
1050                                 return
1051                         }
1052                         // 12. If node is a head element and last is true, then switch the
1053                         // insertion mode to "in body" ("in body"! not "in head"!) and abort
1054                         // these steps. (fragment case)
1055                         if (node.name === 'head' && node.namespace === NS_HTML && last) {
1056                                 ins_mode = ins_mode_in_body
1057                                 return
1058                         }
1059                         // 13. If node is a head element and last is false, then switch the
1060                         // insertion mode to "in head" and abort these steps.
1061                         if (node.name === 'head' && node.namespace === NS_HTML && last === false) {
1062                                 ins_mode = ins_mode_in_head
1063                                 return
1064                         }
1065                         // 14. If node is a body element, then switch the insertion mode to
1066                         // "in body" and abort these steps.
1067                         if (node.name === 'body' && node.namespace === NS_HTML) {
1068                                 ins_mode = ins_mode_in_body
1069                                 return
1070                         }
1071                         // 15. If node is a frameset element, then switch the insertion mode
1072                         // to "in frameset" and abort these steps. (fragment case)
1073                         if (node.name === 'frameset' && node.namespace === NS_HTML) {
1074                                 ins_mode = ins_mode_in_frameset
1075                                 return
1076                         }
1077                         // 16. If node is an html element, run these substeps:
1078                         if (node.name === 'html' && node.namespace === NS_HTML) {
1079                                 // 1. If the head element pointer is null, switch the insertion
1080                                 // mode to "before head" and abort these steps. (fragment case)
1081                                 if (head_element_pointer === null) {
1082                                         ins_mode = ins_mode_before_head
1083                                 } else {
1084                                         // 2. Otherwise, the head element pointer is not null,
1085                                         // switch the insertion mode to "after head" and abort these
1086                                         // steps.
1087                                         ins_mode = ins_mode_after_head
1088                                 }
1089                                 return
1090                         }
1091                         // 17. If last is true, then switch the insertion mode to "in body"
1092                         // and abort these steps. (fragment case)
1093                         if (last) {
1094                                 ins_mode = ins_mode_in_body
1095                                 return
1096                         }
1097                         // 18. Let node now be the node before node in the stack of open
1098                         // elements.
1099                         node_i += 1
1100                         node = open_els[node_i]
1101                         // 19. Return to the step labeled loop.
1102                 }
1103         }
1104
1105         // 8.2.3.2
1106
1107         // http://www.w3.org/TR/html5/syntax.html#adjusted-current-node
1108         adjusted_current_node = function () {
1109                 if (open_els.length === 1 && flag_fragment_parsing) {
1110                         return context_element
1111                 }
1112                 return open_els[0]
1113         }
1114
1115         // http://www.w3.org/TR/html5/syntax.html#reconstruct-the-active-formatting-elements
1116         // this implementation is structured (mostly) as described at the link above.
1117         // capitalized comments are the "labels" described at the link above.
1118         reconstruct_afe = function () {
1119                 var el, i
1120                 if (afe.length === 0) {
1121                         return
1122                 }
1123                 if (afe[0].type === TYPE_AFE_MARKER || open_els.indexOf(afe[0]) >= 0) {
1124                         return
1125                 }
1126                 // Rewind
1127                 i = 0
1128                 while (true) {
1129                         if (i === afe.length - 1) {
1130                                 break
1131                         }
1132                         i += 1
1133                         if (afe[i].type === TYPE_AFE_MARKER || open_els.indexOf(afe[i]) >= 0) {
1134                                 i -= 1 // Advance
1135                                 break
1136                         }
1137                 }
1138                 // Create
1139                 while (true) {
1140                         el = insert_html_element(afe[i].token)
1141                         afe[i] = el
1142                         if (i === 0) {
1143                                 break
1144                         }
1145                         i -= 1 // Advance
1146                 }
1147         }
1148
1149         // http://www.w3.org/TR/html5/syntax.html#adoption-agency-algorithm
1150         // adoption agency algorithm
1151         // overview here:
1152         //   http://www.w3.org/TR/html5/syntax.html#misnested-tags:-b-i-/b-/i
1153         //   http://www.w3.org/TR/html5/syntax.html#misnested-tags:-b-p-/b-/p
1154         //   http://www.w3.org/TR/html5/syntax.html#unclosed-formatting-elements
1155         adoption_agency = function (subject) {
1156                 var aa, ab, ac, ad, ae, af, bookmark, c, ca, dest, el, fb, fb_of_open_els, fe, fe_of_afe, fe_of_open_els, i, in_afe, in_open_els, inner, j, l, last_node, len, len1, len10, len11, len12, len13, len14, len15, len16, len17, len2, len3, len4, len5, len6, len7, len8, len9, m, n, new_node, node, node_above, node_in_afe, node_next, o, outer, q, r, ref, ref1, s, t, u, w, y, z
1157 // this block implements tha W3C spec
1158 //              # 1. If the current node is an HTML element whose tag name is subject,
1159 //              # then run these substeps:
1160 //              #
1161 //              # 1. Let element be the current node.
1162 //              #
1163 //              # 2. Pop element off the stack of open elements.
1164 //              #
1165 //              # 3. If element is also in the list of active formatting elements,
1166 //              # remove the element from the list.
1167 //              #
1168 //              # 4. Abort the adoption agency algorithm.
1169 //              if open_els[0].name is subject and open_els[0].namespace is NS_HTML
1170 //                      el = open_els.shift()
1171 //                      # remove it from the list of active formatting elements (if found)
1172 //                      for t, i in afe
1173 //                              if t is el
1174 //                                      afe.splice i, 1
1175 //                                      break
1176 //                      return
1177 // WHATWG: https://html.spec.whatwg.org/multipage/syntax.html#adoption-agency-algorithm
1178                 // If the current node is an HTML element whose tag name is subject, and
1179                 // the current node is not in the list of active formatting elements,
1180                 // then pop the current node off the stack of open elements, and abort
1181                 // these steps.
1182                 if (open_els[0].name === subject && open_els[0].namespace === NS_HTML) {
1183                         // remove it from the list of active formatting elements (if found)
1184                         in_afe = false
1185                         for (i = 0; i < afe.length; ++i) {
1186                                 el = afe[i]
1187                                 if (el === open_els[0]) {
1188                                         in_afe = true
1189                                         break
1190                                 }
1191                         }
1192                         if (!in_afe) {
1193                                 open_els.shift()
1194                                 return
1195                         }
1196                         // fall through
1197                 }
1198 // END WHATWG
1199                 outer = 0
1200                 while (true) {
1201                         if (outer >= 8) {
1202                                 return
1203                         }
1204                         outer += 1
1205                         // 5. Let formatting element be the last element in the list of
1206                         // active formatting elements that: is between the end of the list
1207                         // and the last scope marker in the list, if any, or the start of
1208                         // the list otherwise, and  has the tag name subject.
1209                         fe = null
1210                         for (fe_of_afe = 0; fe_of_afe < afe.length; ++fe_of_afe) {
1211                                 t = afe[fe_of_afe]
1212                                 if (t.type === TYPE_AFE_MARKER) {
1213                                         break
1214                                 }
1215                                 if (t.name === subject) {
1216                                         fe = t
1217                                         break
1218                                 }
1219                         }
1220                         // If there is no such element, then abort these steps and instead
1221                         // act as described in the "any other end tag" entry above.
1222                         if (fe === null) {
1223                                 in_body_any_other_end_tag(subject)
1224                                 return
1225                         }
1226                         // 6. If formatting element is not in the stack of open elements,
1227                         // then this is a parse error; remove the element from the list, and
1228                         // abort these steps.
1229                         in_open_els = false
1230                         for (fe_of_open_els = 0; fe_of_open_els < open_els.length; ++fe_of_open_els) {
1231                                 t = open_els[fe_of_open_els]
1232                                 if (t === fe) {
1233                                         in_open_els = true
1234                                         break
1235                                 }
1236                         }
1237                         if (!in_open_els) {
1238                                 parse_error()
1239                                 // "remove it from the list" must mean afe, since it's not in open_els
1240                                 afe.splice(fe_of_afe, 1)
1241                                 return
1242                         }
1243                         // 7. If formatting element is in the stack of open elements, but
1244                         // the element is not in scope, then this is a parse error; abort
1245                         // these steps.
1246                         if (!el_is_in_scope(fe)) {
1247                                 parse_error()
1248                                 return
1249                         }
1250                         // 8. If formatting element is not the current node, this is a parse
1251                         // error. (But do not abort these steps.)
1252                         if (open_els[0] !== fe) {
1253                                 parse_error()
1254                                 // continue
1255                         }
1256                         // 9. Let furthest block be the topmost node in the stack of open
1257                         // elements that is lower in the stack than formatting element, and
1258                         // is an element in the special category. There might not be one.
1259                         fb = null
1260                         fb_of_open_els = null
1261                         for (i = 0; i < open_els.length; ++i) {
1262                                 t = open_els[i]
1263                                 if (t === fe) {
1264                                         break
1265                                 }
1266                                 if (el_is_special(t)) {
1267                                         fb = t
1268                                         fb_of_open_els = i
1269                                         // and continue, to see if there's one that's more "topmost"
1270                                 }
1271                         }
1272                         // 10. If there is no furthest block, then the UA must first pop all
1273                         // the nodes from the bottom of the stack of open elements, from the
1274                         // current node up to and including formatting element, then remove
1275                         // formatting element from the list of active formatting elements,
1276                         // and finally abort these steps.
1277                         if (fb === null) {
1278                                 while (true) {
1279                                         t = open_els.shift()
1280                                         if (t === fe) {
1281                                                 afe.splice(fe_of_afe, 1)
1282                                                 return
1283                                         }
1284                                 }
1285                         }
1286                         // 11. Let common ancestor be the element immediately above
1287                         // formatting element in the stack of open elements.
1288                         ca = open_els[fe_of_open_els + 1] // common ancestor
1289
1290                         node_above = open_els[fb_of_open_els + 1] // next node if node isn't in open_els anymore
1291                         // 12. Let a bookmark note the position of formatting element in the list of active formatting elements relative to the elements on either side of it in the list.
1292                         bookmark = new_aaa_bookmark()
1293                         for (i = 0; i < afe.length; ++i) {
1294                                 t = afe[i]
1295                                 if (t === fe) {
1296                                         afe.splice(i, 0, bookmark)
1297                                         break
1298                                 }
1299                         }
1300                         node = last_node = fb
1301                         inner = 0
1302                         while (true) {
1303                                 inner += 1
1304                                 // 3. Let node be the element immediately above node in the
1305                                 // stack of open elements, or if node is no longer in the stack
1306                                 // of open elements (e.g. because it got removed by this
1307                                 // algorithm), the element that was immediately above node in
1308                                 // the stack of open elements before node was removed.
1309                                 node_next = null
1310                                 for (i = 0; i < open_els.length; ++i) {
1311                                         t = open_els[i]
1312                                         if (t === node) {
1313                                                 node_next = open_els[i + 1]
1314                                                 break
1315                                         }
1316                                 }
1317                                 node = node_next != null ? node_next : node_above
1318                                 // TODO make sure node_above gets re-set if/when node is removed from open_els
1319
1320                                 // 4. If node is formatting element, then go to the next step in
1321                                 // the overall algorithm.
1322                                 if (node === fe) {
1323                                         break
1324                                 }
1325                                 // 5. If inner loop counter is greater than three and node is in
1326                                 // the list of active formatting elements, then remove node from
1327                                 // the list of active formatting elements.
1328                                 node_in_afe = false
1329                                 for (i = 0; i < afe.length; ++i) {
1330                                         t = afe[i]
1331                                         if (t === node) {
1332                                                 if (inner > 3) {
1333                                                         afe.splice(i, 1)
1334                                                 } else {
1335                                                         node_in_afe = true
1336                                                 }
1337                                                 break
1338                                         }
1339                                 }
1340                                 // 6. If node is not in the list of active formatting elements,
1341                                 // then remove node from the stack of open elements and then go
1342                                 // back to the step labeled inner loop.
1343                                 if (!node_in_afe) {
1344                                         for (i = 0; i < open_els.length; ++i) {
1345                                                 t = open_els[i]
1346                                                 if (t === node) {
1347                                                         node_above = open_els[i + 1]
1348                                                         open_els.splice(i, 1)
1349                                                         break
1350                                                 }
1351                                         }
1352                                         continue
1353                                 }
1354                                 // 7. create an element for the token for which the element node
1355                                 // was created, in the HTML namespace, with common ancestor as
1356                                 // the intended parent; replace the entry for node in the list
1357                                 // of active formatting elements with an entry for the new
1358                                 // element, replace the entry for node in the stack of open
1359                                 // elements with an entry for the new element, and let node be
1360                                 // the new element.
1361                                 new_node = token_to_element(node.token, NS_HTML, ca)
1362                                 for (i = 0; i < afe.length; ++i) {
1363                                         t = afe[i]
1364                                         if (t === node) {
1365                                                 afe[i] = new_node
1366                                                 break
1367                                         }
1368                                 }
1369                                 for (i = 0; i < open_els.length; ++i) {
1370                                         t = open_els[i]
1371                                         if (t === node) {
1372                                                 node_above = open_els[i + 1]
1373                                                 open_els[i] = new_node
1374                                                 break
1375                                         }
1376                                 }
1377                                 node = new_node
1378                                 // 8. If last node is furthest block, then move the
1379                                 // aforementioned bookmark to be immediately after the new node
1380                                 // in the list of active formatting elements.
1381                                 if (last_node === fb) {
1382                                         for (i = 0; i < afe.length; ++i) {
1383                                                 t = afe[i]
1384                                                 if (t === bookmark) {
1385                                                         afe.splice(i, 1)
1386                                                         break
1387                                                 }
1388                                         }
1389                                         for (i = 0; i < afe.length; ++i) {
1390                                                 t = afe[i]
1391                                                 if (t === node) {
1392                                                         // "after" means lower
1393                                                         afe.splice(i, 0, bookmark) // "after as <-
1394                                                         break
1395                                                 }
1396                                         }
1397                                 }
1398                                 // 9. Insert last node into node, first removing it from its
1399                                 // previous parent node if any.
1400                                 if (last_node.parent != null) {
1401                                         for (i = 0; i < last_node.parent.children.length; ++i) {
1402                                                 c = last_node.parent.children[i]
1403                                                 if (c === last_node) {
1404                                                         last_node.parent.children.splice(i, 1)
1405                                                         break
1406                                                 }
1407                                         }
1408                                 }
1409                                 node.children.push(last_node)
1410                                 last_node.parent = node
1411                                 // 10. Let last node be node.
1412                                 last_node = node
1413                                 // 11. Return to the step labeled inner loop.
1414                         }
1415                         // 14. Insert whatever last node ended up being in the previous step
1416                         // at the appropriate place for inserting a node, but using common
1417                         // ancestor as the override target.
1418
1419                         // In the case where fe is immediately followed by fb:
1420                         //   * inner loop exits out early (node==fe)
1421                         //   * last_node is fb
1422                         //   * last_node is still in the tree (not a duplicate)
1423                         if (last_node.parent != null) {
1424                                 for (i = 0; i < last_node.parent.children.length; ++i) {
1425                                         c = last_node.parent.children[i]
1426                                         if (c === last_node) {
1427                                                 last_node.parent.children.splice(i, 1)
1428                                                 break
1429                                         }
1430                                 }
1431                         }
1432                         // can't use standard insert token thing, because it's already in
1433                         // open_els and must stay at it's current position in open_els
1434                         dest = adjusted_insertion_location(ca)
1435                         dest[0].children.splice(dest[1], 0, last_node)
1436                         last_node.parent = dest[0]
1437                         // 15. Create an element for the token for which formatting element
1438                         // was created, in the HTML namespace, with furthest block as the
1439                         // intended parent.
1440                         new_element = token_to_element(fe.token, NS_HTML, fb)
1441                         // 16. Take all of the child nodes of furthest block and append them
1442                         // to the element created in the last step.
1443                         while (fb.children.length) {
1444                                 t = fb.children.shift()
1445                                 t.parent = new_element
1446                                 new_element.children.push(t)
1447                         }
1448                         // 17. Append that new element to furthest block.
1449                         new_element.parent = fb
1450                         fb.children.push(new_element)
1451                         // 18. Remove formatting element from the list of active formatting
1452                         // elements, and insert the new element into the list of active
1453                         // formatting elements at the position of the aforementioned
1454                         // bookmark.
1455                         for (i = 0; i < afe.length; ++i) {
1456                                 t = afe[i]
1457                                 if (t === fe) {
1458                                         afe.splice(i, 1)
1459                                         break
1460                                 }
1461                         }
1462                         for (i = 0; i < afe.length; ++i) {
1463                                 t = afe[i]
1464                                 if (t === bookmark) {
1465                                         afe[i] = new_element
1466                                         break
1467                                 }
1468                         }
1469                         // 19. Remove formatting element from the stack of open elements,
1470                         // and insert the new element into the stack of open elements
1471                         // immediately below the position of furthest block in that stack.
1472                         for (i = 0; i < open_els.length; ++i) {
1473                                 t = open_els[i]
1474                                 if (t === fe) {
1475                                         open_els.splice(i, 1)
1476                                         break
1477                                 }
1478                         }
1479                         for (i = 0; i < open_els.length; ++i) {
1480                                 t = open_els[i]
1481                                 if (t === fb) {
1482                                         open_els.splice(i, 0, new_element)
1483                                         break
1484                                 }
1485                         }
1486                         // 20. Jump back to the step labeled outer loop.
1487                 }
1488         }
1489
1490         // http://www.w3.org/TR/html5/syntax.html#close-a-p-element
1491         close_p_element = function () {
1492                 generate_implied_end_tags('p') // arg is exception
1493                 if (!(open_els[0].name === 'p' && open_els[0].namespace === NS_HTML)) {
1494                         parse_error()
1495                 }
1496                 while (open_els.length > 1) { // just in case
1497                         el = open_els.shift()
1498                         if (el.name === 'p' && el.namespace === NS_HTML) {
1499                                 return
1500                         }
1501                 }
1502         }
1503         close_p_if_in_button_scope = function () {
1504                 if (is_in_button_scope('p', NS_HTML)) {
1505                         close_p_element()
1506                 }
1507         }
1508
1509         // http://www.w3.org/TR/html5/syntax.html#insert-a-character
1510         // aka insert_a_character = function (t) {
1511         insert_character = function (t) {
1512                 var dest, prev
1513                 dest = adjusted_insertion_location()
1514                 // fixfull check for Document node
1515                 if (dest[1] > 0) {
1516                         prev = dest[0].children[dest[1] - 1]
1517                         if (prev.type === TYPE_TEXT) {
1518                                 prev.text += t.text
1519                                 return
1520                         }
1521                 }
1522                 dest[0].children.splice(dest[1], 0, t)
1523                 t.parent = dest[0]
1524         }
1525
1526         // 8.2.5 http://www.w3.org/TR/html5/syntax.html#tree-construction
1527         process_token = function (t) {
1528                 var acn
1529                 acn = adjusted_current_node()
1530                 if (acn == null) {
1531                         ins_mode(t)
1532                         return
1533                 }
1534                 if (acn.namespace === NS_HTML) {
1535                         ins_mode(t)
1536                         return
1537                 }
1538                 if (is_mathml_text_integration_point(acn)) {
1539                         if (t.type === TYPE_START_TAG && !(t.name === 'mglyph' || t.name === 'malignmark')) {
1540                                 ins_mode(t)
1541                                 return
1542                         }
1543                         if (t.type === TYPE_TEXT) {
1544                                 ins_mode(t)
1545                                 return
1546                         }
1547                 }
1548                 if (acn.namespace === NS_MATHML && acn.name === 'annotation-xml' && t.type === TYPE_START_TAG && t.name === 'svg') {
1549                         ins_mode(t)
1550                         return
1551                 }
1552                 if (is_html_integration(acn)) {
1553                         if (t.type === TYPE_START_TAG || t.type === TYPE_TEXT) {
1554                                 ins_mode(t)
1555                                 return
1556                         }
1557                 }
1558                 if (t.type === TYPE_EOF) {
1559                         ins_mode(t)
1560                         return
1561                 }
1562                 in_foreign_content(t)
1563         }
1564
1565         // 8.2.5.1
1566         // http://www.w3.org/TR/html5/syntax.html#creating-and-inserting-nodes
1567         // http://www.w3.org/TR/html5/syntax.html#appropriate-place-for-inserting-a-node
1568         adjusted_insertion_location = function (override_target) {
1569                 var c, el, i, j, l, last_table, last_table_i, last_template, last_template_i, len, len1, len2, m, previous_element, ref, target, target_i
1570                 // 1. If there was an override target specified, then let target be the
1571                 // override target.
1572                 if (override_target != null) {
1573                         target = override_target
1574                 } else { // Otherwise, let target be the current node.
1575                         target = open_els[0]
1576                 }
1577                 // 2. Determine the adjusted insertion location using the first matching
1578                 // steps from the following list:
1579                 //
1580                 // If foster parenting is enabled and target is a table, tbody, tfoot,
1581                 // thead, or tr element Foster parenting happens when content is
1582                 // misnested in tables.
1583                 if (flag_foster_parenting && foster_parenting_targets[target.name] === target.namespace) {
1584                         while (true) { // once. this is here so we can ``break`` to "abort these substeps"
1585                                 // 1. Let last template be the last template element in the
1586                                 // stack of open elements, if any.
1587                                 last_template = null
1588                                 last_template_i = null
1589                                 for (i = 0; i < open_els.length; ++i) {
1590                                         el = open_els[i]
1591                                         if (el.name === 'template' && el.namespace === NS_HTML) {
1592                                                 last_template = el
1593                                                 last_template_i = i
1594                                                 break
1595                                         }
1596                                 }
1597                                 // 2. Let last table be the last table element in the stack of
1598                                 // open elements, if any.
1599                                 last_table = null
1600                                 last_table_i
1601                                 for (i = 0; i < open_els.length; ++i) {
1602                                         el = open_els[i]
1603                                         if (el.name === 'table' && el.namespace === NS_HTML) {
1604                                                 last_table = el
1605                                                 last_table_i = i
1606                                                 break
1607                                         }
1608                                 }
1609                                 // 3. If there is a last template and either there is no last
1610                                 // table, or there is one, but last template is lower (more
1611                                 // recently added) than last table in the stack of open
1612                                 // elements, then: let adjusted insertion location be inside
1613                                 // last template's template contents, after its last child (if
1614                                 // any), and abort these substeps.
1615                                 if (last_template && (last_table === null || last_template_i < last_table_i)) {
1616                                         target = last_template // fixfull should be it's contents
1617                                         target_i = target.children.length
1618                                         break
1619                                 }
1620                                 // 4. If there is no last table, then let adjusted insertion
1621                                 // location be inside the first element in the stack of open
1622                                 // elements (the html element), after its last child (if any),
1623                                 // and abort these substeps. (fragment case)
1624                                 if (last_table === null) {
1625                                         // this is odd
1626                                         target = open_els[open_els.length - 1]
1627                                         target_i = target.children.length
1628                                         break
1629                                 }
1630                                 // 5. If last table has a parent element, then let adjusted
1631                                 // insertion location be inside last table's parent element,
1632                                 // immediately before last table, and abort these substeps.
1633                                 if (last_table.parent != null) {
1634                                         for (i = 0; i < last_table.parent.children.length; ++i) {
1635                                                 c = last_table.parent.children[i]
1636                                                 if (c === last_table) {
1637                                                         target = last_table.parent
1638                                                         target_i = i
1639                                                         break
1640                                                 }
1641                                         }
1642                                         break
1643                                 }
1644                                 // 6. Let previous element be the element immediately above last
1645                                 // table in the stack of open elements.
1646                                 //
1647                                 // huh? how could it not have a parent?
1648                                 previous_element = open_els[last_table_i + 1]
1649                                 // 7. Let adjusted insertion location be inside previous
1650                                 // element, after its last child (if any).
1651                                 target = previous_element
1652                                 target_i = target.children.length
1653                                 // Note: These steps are involved in part because it's possible
1654                                 // for elements, the table element in this case in particular,
1655                                 // to have been moved by a script around in the DOM, or indeed
1656                                 // removed from the DOM entirely, after the element was inserted
1657                                 // by the parser.
1658                                 break // don't really loop
1659                         }
1660                 } else {
1661                         // Otherwise Let adjusted insertion location be inside target, after
1662                         // its last child (if any).
1663                         target_i = target.children.length
1664                 }
1665
1666                 // 3. If the adjusted insertion location is inside a template element,
1667                 // let it instead be inside the template element's template contents,
1668                 // after its last child (if any).
1669                 // fixfull (template)
1670
1671                 // 4. Return the adjusted insertion location.
1672                 return [target, target_i]
1673         }
1674
1675         // http://www.w3.org/TR/html5/syntax.html#create-an-element-for-the-token
1676         // aka create_an_element_for_token
1677         token_to_element = function (t, namespace, intended_parent) {
1678                 var a, attrs, el, i
1679                 // convert attributes into a hash
1680                 attrs = {}
1681                 for (i = 0; i < t.attrs_a.length; ++i) {
1682                         a = t.attrs_a[i]
1683                         attrs[a[0]] = a[1] // TODO check what to do with dupilcate attrs
1684                 }
1685                 el = new Node(TYPE_TAG, {name: t.name, namespace: namespace, attrs: attrs, token: t})
1686
1687                 // TODO 2. If the newly created element has an xmlns attribute in the
1688                 // XMLNS namespace whose value is not exactly the same as the element's
1689                 // namespace, that is a parse error. Similarly, if the newly created
1690                 // element has an xmlns:xlink attribute in the XMLNS namespace whose
1691                 // value is not the XLink Namespace, that is a parse error.
1692
1693                 // fixfull: the spec says stuff about form pointers and ownerDocument
1694
1695                 return el
1696         }
1697
1698         // http://www.w3.org/TR/html5/syntax.html#insert-a-foreign-element
1699         insert_foreign_element = function (token, namespace) {
1700                 var ail, ail_el, ail_i, el
1701                 ail = adjusted_insertion_location()
1702                 ail_el = ail[0]
1703                 ail_i = ail[1]
1704                 el = token_to_element(token, namespace, ail_el)
1705                 // TODO skip this next step if it's broken (eg ail_el is document with child already)
1706                 el.parent = ail_el
1707                 ail_el.children.splice(ail_i, 0, el)
1708                 open_els.unshift(el)
1709                 return el
1710         }
1711         // http://www.w3.org/TR/html5/syntax.html#insert-an-html-element
1712         insert_html_element = function (token) {
1713                 return insert_foreign_element(token, NS_HTML)
1714         }
1715
1716         // http://www.w3.org/TR/html5/syntax.html#insert-a-comment
1717         // position should be [node, index_within_children]
1718         insert_comment = function (t, position) {
1719                 if (position == null) {
1720                         position = adjusted_insertion_location()
1721                 }
1722                 position[0].children.splice(position[1], 0, t)
1723                 return
1724         }
1725
1726         // 8.2.5.2
1727         // http://www.w3.org/TR/html5/syntax.html#generic-raw-text-element-parsing-algorithm
1728         parse_generic_raw_text = function (t) {
1729                 insert_html_element(t)
1730                 tok_state = tok_state_rawtext
1731                 original_ins_mode = ins_mode
1732                 ins_mode = ins_mode_text
1733         }
1734         parse_generic_rcdata_text = function (t) {
1735                 insert_html_element(t)
1736                 tok_state = tok_state_rcdata
1737                 original_ins_mode = ins_mode
1738                 ins_mode = ins_mode_text
1739         }
1740
1741         // 8.2.5.3 http://www.w3.org/TR/html5/syntax.html#closing-elements-that-have-implied-end-tags
1742         // http://www.w3.org/TR/html5/syntax.html#generate-implied-end-tags
1743         generate_implied_end_tags = function (except) {
1744                 if (except == null) {
1745                         except = null
1746                 }
1747                 while (end_tag_implied[open_els[0].name] === open_els[0].namespace && open_els[0].name !== except) {
1748                         open_els.shift()
1749                 }
1750         }
1751
1752         // 8.2.5.4 The rules for parsing tokens in HTML content
1753         // http://www.w3.org/TR/html5/syntax.html#parsing-main-inhtml
1754
1755         // 8.2.5.4.1 The "initial" insertion mode
1756         // http://www.w3.org/TR/html5/syntax.html#the-initial-insertion-mode
1757         is_quirks_yes_doctype = function (t) {
1758                 var i, p, pi
1759                 if (t.flag('force-quirks')) {
1760                         return true
1761                 }
1762                 if (t.name !== 'html') {
1763                         return true
1764                 }
1765                 if (t.public_identifier != null) {
1766                         pi = t.public_identifier.toLowerCase()
1767                         for (i = 0; i < quirks_yes_pi_prefixes.length; ++i) {
1768                                 p = quirks_yes_pi_prefixes[i]
1769                                 if (pi.substr(0, p.length) === p) {
1770                                         return true
1771                                 }
1772                         }
1773                         if (pi === '-//w3o//dtd w3 html strict 3.0//en//' || pi === '-/w3c/dtd html 4.0 transitional/en' || pi === 'html') {
1774                                 return true
1775                         }
1776                 }
1777                 if (t.system_identifier != null) {
1778                         if (t.system_identifier.toLowerCase() === 'http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd') {
1779                                 return true
1780                         }
1781                 } else if (t.public_identifier != null) {
1782                         // already did this: pi = t.public_identifier.toLowerCase()
1783                         if (pi.substr(0, 32) === '-//w3c//dtd html 4.01 frameset//' || pi.substr(0, 36) === '-//w3c//dtd html 4.01 transitional//') {
1784                                 return true
1785                         }
1786                 }
1787                 return false
1788         }
1789         is_quirks_limited_doctype = function (t) {
1790                 var pi
1791                 if (t.public_identifier != null) {
1792                         pi = t.public_identifier.toLowerCase()
1793                         if (pi.substr(0, 32) === '-//w3c//dtd xhtml 1.0 frameset//' || pi.substr(0, 36) === '-//w3c//dtd xhtml 1.0 transitional//') {
1794                                 return true
1795                         }
1796                         if (t.system_identifier != null) {
1797                                 if (pi.substr(0, 32) === '-//w3c//dtd html 4.01 frameset//' || pi.substr(0, 36) === '-//w3c//dtd html 4.01 transitional//') {
1798                                         return true
1799                                 }
1800                         }
1801                 }
1802                 return false
1803         }
1804         ins_mode_initial = function (t) {
1805                 if (is_space_tok(t)) {
1806                         return
1807                 }
1808                 if (t.type === TYPE_COMMENT) {
1809                         // ?fixfull
1810                         doc.children.push(t)
1811                         return
1812                 }
1813                 if (t.type === TYPE_DOCTYPE) {
1814                         // fixfull syntax error from first paragraph and following bullets
1815                         // fixfull set doc.doctype
1816                         // fixfull is the "not an iframe srcdoc" thing relevant?
1817                         if (is_quirks_yes_doctype(t)) {
1818                                 doc.flag('quirks mode', QUIRKS_YES)
1819                         } else if (is_quirks_limited_doctype(t)) {
1820                                 doc.flag('quirks mode', QUIRKS_LIMITED)
1821                         }
1822                         doc.children.push(t)
1823                         ins_mode = ins_mode_before_html
1824                         return
1825                 }
1826                 // Anything else
1827                 // fixfull not iframe srcdoc?
1828                 parse_error()
1829                 doc.flag('quirks mode', QUIRKS_YES)
1830                 ins_mode = ins_mode_before_html
1831                 process_token(t)
1832         }
1833
1834         // 8.2.5.4.2 http://www.w3.org/TR/html5/syntax.html#the-before-html-insertion-mode
1835         ins_mode_before_html = function (t) {
1836                 if (t.type === TYPE_DOCTYPE) {
1837                         parse_error()
1838                         return
1839                 }
1840                 if (t.type === TYPE_COMMENT) {
1841                         doc.children.push(t)
1842                         return
1843                 }
1844                 if (is_space_tok(t)) {
1845                         return
1846                 }
1847                 if (t.type === TYPE_START_TAG && t.name === 'html') {
1848                         el = token_to_element(t, NS_HTML, doc)
1849                         doc.children.push(el)
1850                         el.document = doc
1851                         open_els.unshift(el)
1852                         // fixfull (big paragraph in spec about manifest, fragment, urls, etc)
1853                         ins_mode = ins_mode_before_head
1854                         return
1855                 }
1856                 if (t.type === TYPE_END_TAG) {
1857                         if (t.name === 'head' || t.name === 'body' || t.name === 'html' || t.name === 'br') {
1858                                 // fall through to "anything else"
1859                         } else {
1860                                 parse_error()
1861                                 return
1862                         }
1863                 }
1864                 // Anything else
1865                 el = token_to_element(new_open_tag('html'), NS_HTML, doc)
1866                 doc.children.push(el)
1867                 el.document = doc
1868                 open_els.unshift(el)
1869                 // ?fixfull browsing context
1870                 ins_mode = ins_mode_before_head
1871                 process_token(t)
1872         }
1873
1874         // 8.2.5.4.3 http://www.w3.org/TR/html5/syntax.html#the-before-head-insertion-mode
1875         ins_mode_before_head = function (t) {
1876                 var el
1877                 if (is_space_tok(t)) {
1878                         return
1879                 }
1880                 if (t.type === TYPE_COMMENT) {
1881                         insert_comment(t)
1882                         return
1883                 }
1884                 if (t.type === TYPE_DOCTYPE) {
1885                         parse_error()
1886                         return
1887                 }
1888                 if (t.type === TYPE_START_TAG && t.name === 'html') {
1889                         ins_mode_in_body(t)
1890                         return
1891                 }
1892                 if (t.type === TYPE_START_TAG && t.name === 'head') {
1893                         el = insert_html_element(t)
1894                         head_element_pointer = el
1895                         ins_mode = ins_mode_in_head
1896                         return
1897                 }
1898                 if (t.type === TYPE_END_TAG) {
1899                         if (t.name === 'head' || t.name === 'body' || t.name === 'html' || t.name === 'br') {
1900                                 // fall through to Anything else below
1901                         } else {
1902                                 parse_error()
1903                                 return
1904                         }
1905                 }
1906                 // Anything else
1907                 el = insert_html_element(new_open_tag('head'))
1908                 head_element_pointer = el
1909                 ins_mode = ins_mode_in_head
1910                 process_token(t)
1911         }
1912
1913         // 8.2.5.4.4 http://www.w3.org/TR/html5/syntax.html#parsing-main-inhead
1914         ins_mode_in_head_else = function (t) { // factored out for same-as-spec flow control
1915                 open_els.shift() // spec says this will be a 'head' node
1916                 ins_mode = ins_mode_after_head
1917                 process_token(t)
1918         }
1919         ins_mode_in_head = function (t) {
1920                 var ail, el
1921                 if (t.type === TYPE_TEXT && (t.text === "\t" || t.text === "\n" || t.text === "\u000c" || t.text === ' ')) {
1922                         insert_character(t)
1923                         return
1924                 }
1925                 if (t.type === TYPE_COMMENT) {
1926                         insert_comment(t)
1927                         return
1928                 }
1929                 if (t.type === TYPE_DOCTYPE) {
1930                         parse_error()
1931                         return
1932                 }
1933                 if (t.type === TYPE_START_TAG && t.name === 'html') {
1934                         ins_mode_in_body(t)
1935                         return
1936                 }
1937                 if (t.type === TYPE_START_TAG && (t.name === 'base' || t.name === 'basefont' || t.name === 'bgsound' || t.name === 'link')) {
1938                         el = insert_html_element(t)
1939                         open_els.shift()
1940                         t.acknowledge_self_closing()
1941                         return
1942                 }
1943                 if (t.type === TYPE_START_TAG && t.name === 'meta') {
1944                         el = insert_html_element(t)
1945                         open_els.shift()
1946                         t.acknowledge_self_closing()
1947                         // fixfull encoding stuff
1948                         return
1949                 }
1950                 if (t.type === TYPE_START_TAG && t.name === 'title') {
1951                         parse_generic_rcdata_text(t)
1952                         return
1953                 }
1954                 if (t.type === TYPE_START_TAG && ((t.name === 'noscript' && flag_scripting) || t.name === 'noframes' || t.name === 'style')) {
1955                         parse_generic_raw_text(t)
1956                         return
1957                 }
1958                 if (t.type === TYPE_START_TAG && t.name === 'noscript' && flag_scripting === false) {
1959                         insert_html_element(t)
1960                         ins_mode = ins_mode_in_head_noscript
1961                         return
1962                 }
1963                 if (t.type === TYPE_START_TAG && t.name === 'script') {
1964                         ail = adjusted_insertion_location()
1965                         el = token_to_element(t, NS_HTML, ail)
1966                         el.flag('parser-inserted', true)
1967                         // fixfull frament case
1968                         ail[0].children.splice(ail[1], 0, el)
1969                         open_els.unshift(el)
1970                         tok_state = tok_state_script_data
1971                         original_ins_mode = ins_mode // make sure orig... is defined
1972                         ins_mode = ins_mode_text
1973                         return
1974                 }
1975                 if (t.type === TYPE_END_TAG && t.name === 'head') {
1976                         open_els.shift() // will be a head element... spec says so
1977                         ins_mode = ins_mode_after_head
1978                         return
1979                 }
1980                 if (t.type === TYPE_END_TAG && (t.name === 'body' || t.name === 'html' || t.name === 'br')) {
1981                         ins_mode_in_head_else(t)
1982                         return
1983                 }
1984                 if (t.type === TYPE_START_TAG && t.name === 'template') {
1985                         insert_html_element(t)
1986                         afe_push_marker()
1987                         flag_frameset_ok = false
1988                         ins_mode = ins_mode_in_template
1989                         template_ins_modes.unshift(ins_mode_in_template)
1990                         return
1991                 }
1992                 if (t.type === TYPE_END_TAG && t.name === 'template') {
1993                         if (template_tag_is_open()) {
1994                                 generate_implied_end_tags
1995                                 if (open_els[0].name !== 'template') {
1996                                         parse_error()
1997                                 }
1998                                 while (true) {
1999                                         el = open_els.shift()
2000                                         if (el.name === 'template' && el.namespace === NS_HTML) {
2001                                                 break
2002                                         }
2003                                 }
2004                                 clear_afe_to_marker()
2005                                 template_ins_modes.shift()
2006                                 reset_ins_mode()
2007                         } else {
2008                                 parse_error()
2009                         }
2010                         return
2011                 }
2012                 if ((t.type === TYPE_START_TAG && t.name === 'head') || t.type === TYPE_END_TAG) {
2013                         parse_error()
2014                         return
2015                 }
2016                 ins_mode_in_head_else(t)
2017         }
2018
2019         // 8.2.5.4.5 http://www.w3.org/TR/html5/syntax.html#parsing-main-inheadnoscript
2020         ins_mode_in_head_noscript_else = function (t) {
2021                 parse_error()
2022                 open_els.shift()
2023                 ins_mode = ins_mode_in_head
2024                 process_token(t)
2025         }
2026         ins_mode_in_head_noscript = function (t) {
2027                 if (t.type === TYPE_DOCTYPE) {
2028                         parse_error()
2029                         return
2030                 }
2031                 if (t.type === TYPE_START_TAG && t.name === 'html') {
2032                         ins_mode_in_body(t)
2033                         return
2034                 }
2035                 if (t.type === TYPE_END_TAG && t.name === 'noscript') {
2036                         open_els.shift()
2037                         ins_mode = ins_mode_in_head
2038                         return
2039                 }
2040                 if (is_space_tok(t) || t.type === TYPE_COMMENT || (t.type === TYPE_START_TAG && (t.name === 'basefont' || t.name === 'bgsound' || t.name === 'link' || t.name === 'meta' || t.name === 'noframes' || t.name === 'style'))) {
2041                         ins_mode_in_head(t)
2042                         return
2043                 }
2044                 if (t.type === TYPE_END_TAG && t.name === 'br') {
2045                         ins_mode_in_head_noscript_else(t)
2046                         return
2047                 }
2048                 if ((t.type === TYPE_START_TAG && (t.name === 'head' || t.name === 'noscript')) || t.type === TYPE_END_TAG) {
2049                         parse_error()
2050                         return
2051                 }
2052                 // Anything else
2053                 ins_mode_in_head_noscript_else(t)
2054         }
2055
2056         // 8.2.5.4.6 http://www.w3.org/TR/html5/syntax.html#the-after-head-insertion-mode
2057         ins_mode_after_head_else = function (t) {
2058                 var body_tok
2059                 body_tok = new_open_tag('body')
2060                 insert_html_element(body_tok)
2061                 ins_mode = ins_mode_in_body
2062                 process_token(t)
2063         }
2064         ins_mode_after_head = function (t) {
2065                 var el, i, j, len
2066                 if (is_space_tok(t)) {
2067                         insert_character(t)
2068                         return
2069                 }
2070                 if (t.type === TYPE_COMMENT) {
2071                         insert_comment(t)
2072                         return
2073                 }
2074                 if (t.type === TYPE_DOCTYPE) {
2075                         parse_error()
2076                         return
2077                 }
2078                 if (t.type === TYPE_START_TAG && t.name === 'html') {
2079                         ins_mode_in_body(t)
2080                         return
2081                 }
2082                 if (t.type === TYPE_START_TAG && t.name === 'body') {
2083                         insert_html_element(t)
2084                         flag_frameset_ok = false
2085                         ins_mode = ins_mode_in_body
2086                         return
2087                 }
2088                 if (t.type === TYPE_START_TAG && t.name === 'frameset') {
2089                         insert_html_element(t)
2090                         ins_mode = ins_mode_in_frameset
2091                         return
2092                 }
2093                 if (t.type === TYPE_START_TAG && (t.name === 'base' || t.name === 'basefont' || t.name === 'bgsound' || t.name === 'link' || t.name === 'meta' || t.name === 'noframes' || t.name === 'script' || t.name === 'style' || t.name === 'template' || t.name === 'title')) {
2094                         parse_error()
2095                         open_els.unshift(head_element_pointer)
2096                         ins_mode_in_head(t)
2097                         for (i = 0; i < open_els.length; ++i) {
2098                                 el = open_els[i]
2099                                 if (el === head_element_pointer) {
2100                                         open_els.splice(i, 1)
2101                                         return
2102                                 }
2103                         }
2104                         return
2105                 }
2106                 if (t.type === TYPE_END_TAG && t.name === 'template') {
2107                         ins_mode_in_head(t)
2108                         return
2109                 }
2110                 if (t.type === TYPE_END_TAG && (t.name === 'body' || t.name === 'html' || t.name === 'br')) {
2111                         ins_mode_after_head_else(t)
2112                         return
2113                 }
2114                 if ((t.type === TYPE_START_TAG && t.name === 'head') || t.type === TYPE_END_TAG) {
2115                         parse_error()
2116                         return
2117                 }
2118                 // Anything else
2119                 ins_mode_after_head_else(t)
2120         }
2121
2122         // 8.2.5.4.7 http://www.w3.org/TR/html5/syntax.html#parsing-main-inbody
2123         in_body_any_other_end_tag = function (name) { // factored out because adoption agency calls it
2124                 var el, i, node
2125                 node = open_els[0]
2126                 while (true) {
2127                         if (node.name === name && node.namespace === NS_HTML) {
2128                                 generate_implied_end_tags(name) // arg is exception
2129                                 if (node !== open_els[0]) {
2130                                         parse_error()
2131                                 }
2132                                 while (true) {
2133                                         el = open_els.shift()
2134                                         if (el === node) {
2135                                                 return
2136                                         }
2137                                 }
2138                         }
2139                         if (special_elements[node.name] === node.namespace) {
2140                                 parse_error()
2141                                 return
2142                         }
2143                         for (i = 0; i < open_els.length; ++i) {
2144                                 el = open_els[i]
2145                                 if (node === el) {
2146                                         node = open_els[i + 1]
2147                                         break
2148                                 }
2149                         }
2150                 }
2151         }
2152         ins_mode_in_body = function (t) {
2153                 var a, aa, ab, ac, el, found, h_in_scope, i, input_el, j, l, len, len1, len10, len11, len12, len13, len14, len2, len3, len4, len5, len6, len7, len8, len9, m, n, node, o, ok_tags, prompt, q, r, ref, ref1, ref2, ref3, ref4, root_attrs, s, second, second_i, u, w, y, z
2154                 if (t.type === TYPE_TEXT && t.text === "\u0000") {
2155                         parse_error()
2156                         return
2157                 }
2158                 if (is_space_tok(t)) {
2159                         reconstruct_afe()
2160                         insert_character(t)
2161                         return
2162                 }
2163                 if (t.type === TYPE_TEXT) {
2164                         reconstruct_afe()
2165                         insert_character(t)
2166                         flag_frameset_ok = false
2167                         return
2168                 }
2169                 if (t.type === TYPE_COMMENT) {
2170                         insert_comment(t)
2171                         return
2172                 }
2173                 if (t.type === TYPE_DOCTYPE) {
2174                         parse_error()
2175                         return
2176                 }
2177                 if (t.type === TYPE_START_TAG && t.name === 'html') {
2178                         parse_error()
2179                         if (template_tag_is_open()) {
2180                                 return
2181                         }
2182                         root_attrs = open_els[open_els.length - 1].attrs
2183                         for (i = 0; i < t.attrs_a.length; ++i) {
2184                                 a = t.attrs_a[i]
2185                                 if (root_attrs[a[0]] == null) {
2186                                         root_attrs[a[0]] = a[1]
2187                                 }
2188                         }
2189                         return
2190                 }
2191
2192                 if ((t.type === TYPE_START_TAG && (t.name === 'base' || t.name === 'basefont' || t.name === 'bgsound' || t.name === 'link' || t.name === 'meta' || t.name === 'noframes' || t.name === 'script' || t.name === 'style' || t.name === 'template' || t.name === 'title')) || (t.type === TYPE_END_TAG && t.name === 'template')) {
2193                         ins_mode_in_head(t)
2194                         return
2195                 }
2196                 if (t.type === TYPE_START_TAG && t.name === 'body') {
2197                         parse_error()
2198                         if (open_els.length < 2) {
2199                                 return
2200                         }
2201                         second = open_els[open_els.length - 2]
2202                         if (second.namespace !== NS_HTML) {
2203                                 return
2204                         }
2205                         if (second.name !== 'body') {
2206                                 return
2207                         }
2208                         if (template_tag_is_open()) {
2209                                 return
2210                         }
2211                         flag_frameset_ok = false
2212                         for (i = 0; i < t.attrs_a.length; ++i) {
2213                                 a = t.attrs_a[i]
2214                                 if (second.attrs[a[0]] == null) {
2215                                         second.attrs[a[0]] = a[1]
2216                                 }
2217                         }
2218                         return
2219                 }
2220                 if (t.type === TYPE_START_TAG && t.name === 'frameset') {
2221                         parse_error()
2222                         if (open_els.length < 2) {
2223                                 return
2224                         }
2225                         second_i = open_els.length - 2
2226                         second = open_els[second_i]
2227                         if (second.namespace !== NS_HTML) {
2228                                 return
2229                         }
2230                         if (second.name !== 'body') {
2231                                 return
2232                         }
2233                         if (flag_frameset_ok === false) {
2234                                 return
2235                         }
2236                         if (second.parent != null) {
2237                                 for (i = 0; i < second.parent.children.length; ++i) {
2238                                         el = second.parent.children[i]
2239                                         if (el === second) {
2240                                                 second.parent.children.splice(i, 1)
2241                                                 break
2242                                         }
2243                                 }
2244                         }
2245                         open_els.splice(second_i, 1)
2246                         // pop everything except the "root html element"
2247                         while (open_els.length > 1) {
2248                                 open_els.shift()
2249                         }
2250                         insert_html_element(t)
2251                         ins_mode = ins_mode_in_frameset
2252                         return
2253                 }
2254                 if (t.type === TYPE_EOF) {
2255                         ok_tags = {
2256                                 dd: NS_HTML, dt: NS_HTML, li: NS_HTML, p: NS_HTML, tbody: NS_HTML,
2257                                 td: NS_HTML, tfoot: NS_HTML, th: NS_HTML, thead: NS_HTML,
2258                                 tr: NS_HTML, body: NS_HTML, html: NS_HTML
2259                         }
2260                         for (i = 0; i < open_els.length; ++i) {
2261                                 el = open_els[i]
2262                                 if (ok_tags[t.name] !== el.namespace) {
2263                                         parse_error()
2264                                         break
2265                                 }
2266                         }
2267                         if (template_ins_modes.length > 0) {
2268                                 ins_mode_in_template(t)
2269                         } else {
2270                                 stop_parsing()
2271                         }
2272                         return
2273                 }
2274                 if (t.type === TYPE_END_TAG && t.name === 'body') {
2275                         if (!is_in_scope('body', NS_HTML)) {
2276                                 parse_error()
2277                                 return
2278                         }
2279                         ok_tags = {
2280                                 dd: NS_HTML, dt: NS_HTML, li: NS_HTML, optgroup: NS_HTML,
2281                                 option: NS_HTML, p: NS_HTML, rb: NS_HTML, rp: NS_HTML, rt: NS_HTML,
2282                                 rtc: NS_HTML, tbody: NS_HTML, td: NS_HTML, tfoot: NS_HTML,
2283                                 th: NS_HTML, thead: NS_HTML, tr: NS_HTML, body: NS_HTML,
2284                                 html: NS_HTML
2285                         }
2286                         for (i = 0; i < open_els.length; ++i) {
2287                                 el = open_els[i]
2288                                 if (ok_tags[t.name] !== el.namespace) {
2289                                         parse_error()
2290                                         break
2291                                 }
2292                         }
2293                         ins_mode = ins_mode_after_body
2294                         return
2295                 }
2296                 if (t.type === TYPE_END_TAG && t.name === 'html') {
2297                         if (!is_in_scope('body', NS_HTML)) {
2298                                 parse_error()
2299                                 return
2300                         }
2301                         ok_tags = {
2302                                 dd: NS_HTML, dt: NS_HTML, li: NS_HTML, optgroup: NS_HTML,
2303                                 option: NS_HTML, p: NS_HTML, rb: NS_HTML, rp: NS_HTML, rt: NS_HTML,
2304                                 rtc: NS_HTML, tbody: NS_HTML, td: NS_HTML, tfoot: NS_HTML,
2305                                 th: NS_HTML, thead: NS_HTML, tr: NS_HTML, body: NS_HTML,
2306                                 html: NS_HTML
2307                         }
2308                         for (i = 0; i < open_els.length; ++i) {
2309                                 el = open_els[i]
2310                                 if (ok_tags[t.name] !== el.namespace) {
2311                                         parse_error()
2312                                         break
2313                                 }
2314                         }
2315                         ins_mode = ins_mode_after_body
2316                         process_token(t)
2317                         return
2318                 }
2319                 if (t.type === TYPE_START_TAG && (t.name === 'address' || t.name === 'article' || t.name === 'aside' || t.name === 'blockquote' || t.name === 'center' || t.name === 'details' || t.name === 'dialog' || t.name === 'dir' || t.name === 'div' || t.name === 'dl' || t.name === 'fieldset' || t.name === 'figcaption' || t.name === 'figure' || t.name === 'footer' || t.name === 'header' || t.name === 'hgroup' || t.name === 'main' || t.name === 'nav' || t.name === 'ol' || t.name === 'p' || t.name === 'section' || t.name === 'summary' || t.name === 'ul')) {
2320                         close_p_if_in_button_scope()
2321                         insert_html_element(t)
2322                         return
2323                 }
2324                 if (t.type === TYPE_START_TAG && (h_tags[t.name] != null)) { // extra perens because Coffeescript did that
2325                         close_p_if_in_button_scope()
2326                         if (h_tags[open_els[0].name] === open_els[0].namespace) {
2327                                 parse_error()
2328                                 open_els.shift()
2329                         }
2330                         insert_html_element(t)
2331                         return
2332                 }
2333                 if (t.type === TYPE_START_TAG && (t.name === 'pre' || t.name === 'listing')) {
2334                         close_p_if_in_button_scope()
2335                         insert_html_element(t)
2336                         eat_next_token_if_newline()
2337                         flag_frameset_ok = false
2338                         return
2339                 }
2340                 if (t.type === TYPE_START_TAG && t.name === 'form') {
2341                         if (!(form_element_pointer === null || template_tag_is_open())) {
2342                                 parse_error()
2343                                 return
2344                         }
2345                         close_p_if_in_button_scope()
2346                         el = insert_html_element(t)
2347                         if (!template_tag_is_open()) {
2348                                 form_element_pointer = el
2349                         }
2350                         return
2351                 }
2352                 if (t.type === TYPE_START_TAG && t.name === 'li') {
2353                         flag_frameset_ok = false
2354                         for (i = 0; i < open_els.length; ++i) {
2355                                 node = open_els[i]
2356                                 if (node.name === 'li' && node.namespace === NS_HTML) {
2357                                         generate_implied_end_tags('li') // arg is exception
2358                                         if (open_els[0].name !== 'li' || open_els[0].namespace !== NS_HTML) {
2359                                                 parse_error()
2360                                         }
2361                                         while (true) {
2362                                                 el = open_els.shift()
2363                                                 if (el.name === 'li' && el.namespace === NS_HTML) {
2364                                                         break
2365                                                 }
2366                                         }
2367                                         break
2368                                 }
2369                                 if (el_is_special_not_adp(node)) {
2370                                         break
2371                                 }
2372                         }
2373                         close_p_if_in_button_scope()
2374                         insert_html_element(t)
2375                         return
2376                 }
2377                 if (t.type === TYPE_START_TAG && (t.name === 'dd' || t.name === 'dt')) {
2378                         flag_frameset_ok = false
2379                         for (i = 0; i < open_els.length; ++i) {
2380                                 node = open_els[i]
2381                                 if (node.name === 'dd' && node.namespace === NS_HTML) {
2382                                         generate_implied_end_tags('dd') // arg is exception
2383                                         if (open_els[0].name !== 'dd' || open_els[0].namespace !== NS_HTML) {
2384                                                 parse_error()
2385                                         }
2386                                         while (true) {
2387                                                 el = open_els.shift()
2388                                                 if (el.name === 'dd' && el.namespace === NS_HTML) {
2389                                                         break
2390                                                 }
2391                                         }
2392                                         break
2393                                 }
2394                                 if (node.name === 'dt' && node.namespace === NS_HTML) {
2395                                         generate_implied_end_tags('dt') // arg is exception
2396                                         if (open_els[0].name !== 'dt' || open_els[0].namespace !== NS_HTML) {
2397                                                 parse_error()
2398                                         }
2399                                         while (true) {
2400                                                 el = open_els.shift()
2401                                                 if (el.name === 'dt' && el.namespace === NS_HTML) {
2402                                                         break
2403                                                 }
2404                                         }
2405                                         break
2406                                 }
2407                                 if (el_is_special_not_adp(node)) {
2408                                         break
2409                                 }
2410                         }
2411                         close_p_if_in_button_scope()
2412                         insert_html_element(t)
2413                         return
2414                 }
2415                 if (t.type === TYPE_START_TAG && t.name === 'plaintext') {
2416                         close_p_if_in_button_scope()
2417                         insert_html_element(t)
2418                         tok_state = tok_state_plaintext
2419                         return
2420                 }
2421                 if (t.type === TYPE_START_TAG && t.name === 'button') {
2422                         if (is_in_scope('button', NS_HTML)) {
2423                                 parse_error()
2424                                 generate_implied_end_tags()
2425                                 while (true) {
2426                                         el = open_els.shift()
2427                                         if (el.name === 'button' && el.namespace === NS_HTML) {
2428                                                 break
2429                                         }
2430                                 }
2431                         }
2432                         reconstruct_afe()
2433                         insert_html_element(t)
2434                         flag_frameset_ok = false
2435                         return
2436                 }
2437                 if (t.type === TYPE_END_TAG && (t.name === 'address' || t.name === 'article' || t.name === 'aside' || t.name === 'blockquote' || t.name === 'button' || t.name === 'center' || t.name === 'details' || t.name === 'dialog' || t.name === 'dir' || t.name === 'div' || t.name === 'dl' || t.name === 'fieldset' || t.name === 'figcaption' || t.name === 'figure' || t.name === 'footer' || t.name === 'header' || t.name === 'hgroup' || t.name === 'listing' || t.name === 'main' || t.name === 'nav' || t.name === 'ol' || t.name === 'pre' || t.name === 'section' || t.name === 'summary' || t.name === 'ul')) {
2438                         if (!is_in_scope(t.name, NS_HTML)) {
2439                                 parse_error()
2440                                 return
2441                         }
2442                         generate_implied_end_tags()
2443                         if (!(open_els[0].name === t.name && open_els[0].namespace === NS_HTML)) {
2444                                 parse_error()
2445                         }
2446                         while (true) {
2447                                 el = open_els.shift()
2448                                 if (el.name === t.name && el.namespace === NS_HTML) {
2449                                         return
2450                                 }
2451                         }
2452                         return
2453                 }
2454                 if (t.type === TYPE_END_TAG && t.name === 'form') {
2455                         if (!template_tag_is_open()) {
2456                                 node = form_element_pointer
2457                                 form_element_pointer = null
2458                                 if (node === null || !el_is_in_scope(node)) {
2459                                         parse_error()
2460                                         return
2461                                 }
2462                                 generate_implied_end_tags()
2463                                 if (open_els[0] !== node) {
2464                                         parse_error()
2465                                 }
2466                                 for (i = 0; i < open_els.length; ++i) {
2467                                         el = open_els[i]
2468                                         if (el === node) {
2469                                                 open_els.splice(i, 1)
2470                                                 break
2471                                         }
2472                                 }
2473                         } else {
2474                                 if (!is_in_scope('form', NS_HTML)) {
2475                                         parse_error()
2476                                         return
2477                                 }
2478                                 generate_implied_end_tags()
2479                                 if (open_els[0].name !== 'form' || open_els[0].namespace !== NS_HTML) {
2480                                         parse_error()
2481                                 }
2482                                 while (true) {
2483                                         el = open_els.shift()
2484                                         if (el.name === 'form' && el.namespace === NS_HTML) {
2485                                                 break
2486                                         }
2487                                 }
2488                         }
2489                         return
2490                 }
2491                 if (t.type === TYPE_END_TAG && t.name === 'p') {
2492                         if (!is_in_button_scope('p', NS_HTML)) {
2493                                 parse_error()
2494                                 insert_html_element(new_open_tag('p'))
2495                         }
2496                         close_p_element()
2497                         return
2498                 }
2499                 if (t.type === TYPE_END_TAG && t.name === 'li') {
2500                         if (!is_in_li_scope('li', NS_HTML)) {
2501                                 parse_error()
2502                                 return
2503                         }
2504                         generate_implied_end_tags('li') // arg is exception
2505                         if (open_els[0].name !== 'li' || open_els[0].namespace !== NS_HTML) {
2506                                 parse_error()
2507                         }
2508                         while (true) {
2509                                 el = open_els.shift()
2510                                 if (el.name === 'li' && el.namespace === NS_HTML) {
2511                                         break
2512                                 }
2513                         }
2514                         return
2515                 }
2516                 if (t.type === TYPE_END_TAG && (t.name === 'dd' || t.name === 'dt')) {
2517                         if (!is_in_scope(t.name, NS_HTML)) {
2518                                 parse_error()
2519                                 return
2520                         }
2521                         generate_implied_end_tags(t.name) // arg is exception
2522                         if (open_els[0].name !== t.name || open_els[0].namespace !== NS_HTML) {
2523                                 parse_error()
2524                         }
2525                         while (true) {
2526                                 el = open_els.shift()
2527                                 if (el.name === t.name && el.namespace === NS_HTML) {
2528                                         break
2529                                 }
2530                         }
2531                         return
2532                 }
2533                 if (t.type === TYPE_END_TAG && (h_tags[t.name] != null)) { // extra perens because Coffeescript did
2534                         h_in_scope = false
2535                         for (i = 0; i < open_els.length; ++i) {
2536                                 el = open_els[i]
2537                                 if (h_tags[el.name] === el.namespace) {
2538                                         h_in_scope = true
2539                                         break
2540                                 }
2541                                 if (standard_scopers[el.name] === el.namespace) {
2542                                         break
2543                                 }
2544                         }
2545                         if (!h_in_scope) {
2546                                 parse_error()
2547                                 return
2548                         }
2549                         generate_implied_end_tags()
2550                         if (open_els[0].name !== t.name || open_els[0].namespace !== NS_HTML) {
2551                                 parse_error()
2552                         }
2553                         while (true) {
2554                                 el = open_els.shift()
2555                                 if (h_tags[el.name] === el.namespace) {
2556                                         break
2557                                 }
2558                         }
2559                         return
2560                 }
2561                 // deep breath!
2562                 if (t.type === TYPE_START_TAG && t.name === 'a') {
2563                         // If the list of active formatting elements contains an a element
2564                         // between the end of the list and the last marker on the list (or
2565                         // the start of the list if there is no marker on the list), then
2566                         // this is a parse error; run the adoption agency algorithm for the
2567                         // tag name "a", then remove that element from the list of active
2568                         // formatting elements and the stack of open elements if the
2569                         // adoption agency algorithm didn't already remove it (it might not
2570                         // have if the element is not in table scope).
2571                         found = false
2572                         for (i = 0; i < afe.length; ++i) {
2573                                 el = afe[i]
2574                                 if (el.type === TYPE_AFE_MARKER) {
2575                                         break
2576                                 }
2577                                 if (el.name === 'a' && el.namespace === NS_HTML) {
2578                                         found = el
2579                                 }
2580                         }
2581                         if (found != null) {
2582                                 parse_error()
2583                                 adoption_agency('a')
2584                                 for (i = 0; i < afe.length; ++i) {
2585                                         el = afe[i]
2586                                         if (el === found) {
2587                                                 afe.splice(i, 1)
2588                                         }
2589                                 }
2590                                 for (i = 0; i < open_els.length; ++i) {
2591                                         el = open_els[i]
2592                                         if (el === found) {
2593                                                 open_els.splice(i, 1)
2594                                         }
2595                                 }
2596                         }
2597                         reconstruct_afe()
2598                         el = insert_html_element(t)
2599                         afe_push(el)
2600                         return
2601                 }
2602                 if (t.type === TYPE_START_TAG && (t.name === 'b' || t.name === 'big' || t.name === 'code' || t.name === 'em' || t.name === 'font' || t.name === 'i' || t.name === 's' || t.name === 'small' || t.name === 'strike' || t.name === 'strong' || t.name === 'tt' || t.name === 'u')) {
2603                         reconstruct_afe()
2604                         el = insert_html_element(t)
2605                         afe_push(el)
2606                         return
2607                 }
2608                 if (t.type === TYPE_START_TAG && t.name === 'nobr') {
2609                         reconstruct_afe()
2610                         if (is_in_scope('nobr', NS_HTML)) {
2611                                 parse_error()
2612                                 adoption_agency('nobr')
2613                                 reconstruct_afe()
2614                         }
2615                         el = insert_html_element(t)
2616                         afe_push(el)
2617                         return
2618                 }
2619                 if (t.type === TYPE_END_TAG && (t.name === 'a' || t.name === 'b' || t.name === 'big' || t.name === 'code' || t.name === 'em' || t.name === 'font' || t.name === 'i' || t.name === 'nobr' || t.name === 's' || t.name === 'small' || t.name === 'strike' || t.name === 'strong' || t.name === 'tt' || t.name === 'u')) {
2620                         adoption_agency(t.name)
2621                         return
2622                 }
2623                 if (t.type === TYPE_START_TAG && (t.name === 'applet' || t.name === 'marquee' || t.name === 'object')) {
2624                         reconstruct_afe()
2625                         insert_html_element(t)
2626                         afe_push_marker()
2627                         flag_frameset_ok = false
2628                         return
2629                 }
2630                 if (t.type === TYPE_END_TAG && (t.name === 'applet' || t.name === 'marquee' || t.name === 'object')) {
2631                         if (!is_in_scope(t.name, NS_HTML)) {
2632                                 parse_error()
2633                                 return
2634                         }
2635                         generate_implied_end_tags()
2636                         if (open_els[0].name !== t.name || open_els[0].namespace !== NS_HTML) {
2637                                 parse_error()
2638                         }
2639                         while (true) {
2640                                 el = open_els.shift()
2641                                 if (el.name === t.name && el.namespace === NS_HTML) {
2642                                         break
2643                                 }
2644                         }
2645                         clear_afe_to_marker()
2646                         return
2647                 }
2648                 if (t.type === TYPE_START_TAG && t.name === 'table') {
2649                         if (doc.flag('quirks mode') !== QUIRKS_YES) {
2650                                 close_p_if_in_button_scope() // test
2651                         }
2652                         insert_html_element(t)
2653                         flag_frameset_ok = false
2654                         ins_mode = ins_mode_in_table
2655                         return
2656                 }
2657                 if (t.type === TYPE_END_TAG && t.name === 'br') {
2658                         parse_error()
2659                         // W3C: t.type = TYPE_START_TAG
2660                         t = new_open_tag('br') // WHATWG
2661                         // fall through
2662                 }
2663                 if (t.type === TYPE_START_TAG && (t.name === 'area' || t.name === 'br' || t.name === 'embed' || t.name === 'img' || t.name === 'keygen' || t.name === 'wbr')) {
2664                         reconstruct_afe()
2665                         insert_html_element(t)
2666                         open_els.shift()
2667                         t.acknowledge_self_closing()
2668                         flag_frameset_ok = false
2669                         return
2670                 }
2671                 if (t.type === TYPE_START_TAG && t.name === 'input') {
2672                         reconstruct_afe()
2673                         insert_html_element(t)
2674                         open_els.shift()
2675                         t.acknowledge_self_closing()
2676                         if (!is_input_hidden_tok(t)) {
2677                                 flag_frameset_ok = false
2678                         }
2679                         return
2680                 }
2681                 if (t.type === TYPE_START_TAG && (t.name === 'menuitem' || t.name === 'param' || t.name === 'source' || t.name === 'track')) {
2682                         // WHATWG adds 'menuitem' for this block
2683                         insert_html_element(t)
2684                         open_els.shift()
2685                         t.acknowledge_self_closing()
2686                         return
2687                 }
2688                 if (t.type === TYPE_START_TAG && t.name === 'hr') {
2689                         close_p_if_in_button_scope()
2690                         insert_html_element(t)
2691                         open_els.shift()
2692                         t.acknowledge_self_closing()
2693                         flag_frameset_ok = false
2694                         return
2695                 }
2696                 if (t.type === TYPE_START_TAG && t.name === 'image') {
2697                         parse_error()
2698                         t.name = 'img'
2699                         process_token(t)
2700                         return
2701                 }
2702                 if (t.type === TYPE_START_TAG && t.name === 'isindex') {
2703                         parse_error()
2704                         if (template_tag_is_open() === false && form_element_pointer !== null) {
2705                                 return
2706                         }
2707                         t.acknowledge_self_closing()
2708                         flag_frameset_ok = false
2709                         close_p_if_in_button_scope()
2710                         el = insert_html_element(new_open_tag('form'))
2711                         if (!template_tag_is_open()) {
2712                                 form_element_pointer = el
2713                         }
2714                         for (i = 0; i < t.attrs_a.length; ++i) {
2715                                 a = t.attrs_a[i]
2716                                 if (a[0] === 'action') {
2717                                         el.attrs['action'] = a[1]
2718                                         break
2719                                 }
2720                         }
2721                         insert_html_element(new_open_tag('hr'))
2722                         open_els.shift()
2723                         reconstruct_afe()
2724                         insert_html_element(new_open_tag('label'))
2725                         // note: this is a little out-of-spec-order so we only have to scan t.attrs_a once
2726                         input_el = new_open_tag('input')
2727                         prompt = null
2728                         for (i = 0; i < t.attrs_a.length; ++i) {
2729                                 a = t.attrs_a[i]
2730                                 if (a[0] === 'prompt') {
2731                                         prompt = a[1]
2732                                 }
2733                                 if (a[0] !== 'name' && a[0] !== 'action' && a[0] !== 'prompt') {
2734                                         input_el.attrs_a.push([a[0], a[1]])
2735                                 }
2736                         }
2737                         input_el.attrs_a.push(['name', 'isindex'])
2738                         // fixfull this next bit is in english... internationalize?
2739                         if (prompt == null) {
2740                                 prompt = "This is a searchable index. Enter search keywords: "
2741                         }
2742                         insert_character(new_character_token(prompt)) // fixfull split
2743                         // TODO submit typo "balue" in spec
2744                         insert_html_element(input_el)
2745                         open_els.shift()
2746                         // insert_character('') // you can put chars here if prompt attr missing
2747                         open_els.shift()
2748                         insert_html_element(new_open_tag('hr'))
2749                         open_els.shift()
2750                         open_els.shift()
2751                         if (!template_tag_is_open()) {
2752                                 form_element_pointer = null
2753                         }
2754                         return
2755                 }
2756                 if (t.type === TYPE_START_TAG && t.name === 'textarea') {
2757                         insert_html_element(t)
2758                         eat_next_token_if_newline()
2759                         tok_state = tok_state_rcdata
2760                         original_ins_mode = ins_mode
2761                         flag_frameset_ok = false
2762                         ins_mode = ins_mode_text
2763                         return
2764                 }
2765                 if (t.type === TYPE_START_TAG && t.name === 'xmp') {
2766                         close_p_if_in_button_scope()
2767                         reconstruct_afe()
2768                         flag_frameset_ok = false
2769                         parse_generic_raw_text(t)
2770                         return
2771                 }
2772                 if (t.type === TYPE_START_TAG && t.name === 'iframe') {
2773                         flag_frameset_ok = false
2774                         parse_generic_raw_text(t)
2775                         return
2776                 }
2777                 if (t.type === TYPE_START_TAG && (t.name === 'noembed' || (t.name === 'noscript' && flag_scripting))) {
2778                         parse_generic_raw_text(t)
2779                         return
2780                 }
2781                 if (t.type === TYPE_START_TAG && t.name === 'select') {
2782                         reconstruct_afe()
2783                         insert_html_element(t)
2784                         flag_frameset_ok = false
2785                         if (ins_mode === ins_mode_in_table || ins_mode === ins_mode_in_caption || ins_mode === ins_mode_in_table_body || ins_mode === ins_mode_in_row || ins_mode === ins_mode_in_cell) {
2786                                 ins_mode = ins_mode_in_select_in_table
2787                         } else {
2788                                 ins_mode = ins_mode_in_select
2789                         }
2790                         return
2791                 }
2792                 if (t.type === TYPE_START_TAG && (t.name === 'optgroup' || t.name === 'option')) {
2793                         if (open_els[0].name === 'option' && open_els[0].namespace === NS_HTML) {
2794                                 open_els.shift()
2795                         }
2796                         reconstruct_afe()
2797                         insert_html_element(t)
2798                         return
2799                 }
2800 // this comment block implements the W3C spec
2801 //              if t.type === TYPE_START_TAG && (t.name === 'rb' || t.name === 'rp' || t.name === 'rtc')
2802 //                      if is_in_scope 'ruby', NS_HTML
2803 //                              generate_implied_end_tags()
2804 //                              unless open_els[0].name === 'ruby' && open_els[0].namespace === NS_HTML
2805 //                                      parse_error()
2806 //                      insert_html_element t
2807 //                      return
2808 //              if t.type === TYPE_START_TAG && t.name === 'rt'
2809 //                      if is_in_scope 'ruby', NS_HTML
2810 //                              generate_implied_end_tags 'rtc' // arg === exception
2811 //                              unless (open_els[0].name === 'ruby' || open_els[0].name === 'rtc') && open_els[0].namespace === NS_HTML
2812 //                                      parse_error()
2813 //                      insert_html_element t
2814 //                      return
2815 // below implements the WHATWG spec https://html.spec.whatwg.org/multipage/syntax.html#parsing-main-inbody
2816                 if (t.type === TYPE_START_TAG && (t.name === 'rb' || t.name === 'rtc')) {
2817                         if (is_in_scope('ruby', NS_HTML)) {
2818                                 generate_implied_end_tags()
2819                                 if (!(open_els[0].name === 'ruby' && open_els[0].namespace === NS_HTML)) {
2820                                         parse_error()
2821                                 }
2822                         }
2823                         insert_html_element(t)
2824                         return
2825                 }
2826                 if (t.type === TYPE_START_TAG && (t.name === 'rp' || t.name === 'rt')) {
2827                         if (is_in_scope('ruby', NS_HTML)) {
2828                                 generate_implied_end_tags('rtc')
2829                                 if (!((open_els[0].name === 'ruby' || open_els[0].name === 'rtc') && open_els[0].namespace === NS_HTML)) {
2830                                         parse_error()
2831                                 }
2832                         }
2833                         insert_html_element(t)
2834                         return
2835                 }
2836 // end WHATWG chunk
2837                 if (t.type === TYPE_START_TAG && t.name === 'math') {
2838                         reconstruct_afe()
2839                         adjust_mathml_attributes(t)
2840                         adjust_foreign_attributes(t)
2841                         insert_foreign_element(t, NS_MATHML)
2842                         if (t.flag('self-closing')) {
2843                                 open_els.shift()
2844                                 t.acknowledge_self_closing()
2845                         }
2846                         return
2847                 }
2848                 if (t.type === TYPE_START_TAG && t.name === 'svg') {
2849                         reconstruct_afe()
2850                         adjust_svg_attributes(t)
2851                         adjust_foreign_attributes(t)
2852                         insert_foreign_element(t, NS_SVG)
2853                         if (t.flag('self-closing')) {
2854                                 open_els.shift()
2855                                 t.acknowledge_self_closing()
2856                         }
2857                         return
2858                 }
2859                 if (t.type === TYPE_START_TAG && (t.name === 'caption' || t.name === 'col' || t.name === 'colgroup' || t.name === 'frame' || t.name === 'head' || t.name === 'tbody' || t.name === 'td' || t.name === 'tfoot' || t.name === 'th' || t.name === 'thead' || t.name === 'tr')) {
2860                         parse_error()
2861                         return
2862                 }
2863                 if (t.type === TYPE_START_TAG) { // any other start tag
2864                         reconstruct_afe()
2865                         insert_html_element(t)
2866                         return
2867                 }
2868                 if (t.type === TYPE_END_TAG) { // any other end tag
2869                         in_body_any_other_end_tag(t.name)
2870                         return
2871                 }
2872         }
2873
2874         // 8.2.5.4.8 http://www.w3.org/TR/html5/syntax.html#parsing-main-incdata
2875         ins_mode_text = function (t) {
2876                 if (t.type === TYPE_TEXT) {
2877                         insert_character(t)
2878                         return
2879                 }
2880                 if (t.type === TYPE_EOF) {
2881                         parse_error()
2882                         if (open_els[0].name === 'script' && open_els[0].namespace === NS_HTML) {
2883                                 open_els[0].flag('already started', true)
2884                         }
2885                         open_els.shift()
2886                         ins_mode = original_ins_mode
2887                         process_token(t)
2888                         return
2889                 }
2890                 if (t.type === TYPE_END_TAG && t.name === 'script') {
2891                         open_els.shift()
2892                         ins_mode = original_ins_mode
2893                         // fixfull the spec seems to assume that I'm going to run the script
2894                         // http://www.w3.org/TR/html5/syntax.html#scriptEndTag
2895                         return
2896                 }
2897                 if (t.type === TYPE_END_TAG) {
2898                         open_els.shift()
2899                         ins_mode = original_ins_mode
2900                         return
2901                 }
2902         }
2903
2904         // the functions below implement the tokenizer stats described here:
2905         // http://www.w3.org/TR/html5/syntax.html#tokenization
2906
2907         // 8.2.5.4.9 http://www.w3.org/TR/html5/syntax.html#parsing-main-intable
2908         ins_mode_in_table_else = function (t) {
2909                 parse_error()
2910                 flag_foster_parenting = true
2911                 ins_mode_in_body(t)
2912                 flag_foster_parenting = false
2913         }
2914         ins_mode_in_table = function (t) {
2915                 var el
2916                 switch (t.type) {
2917                         case TYPE_TEXT:
2918                                 if ((open_els[0].name === 'table' || open_els[0].name === 'tbody' || open_els[0].name === 'tfoot' || open_els[0].name === 'thead' || open_els[0].name === 'tr') && open_els[0].namespace === NS_HTML) {
2919                                         pending_table_character_tokens = []
2920                                         original_ins_mode = ins_mode
2921                                         ins_mode = ins_mode_in_table_text
2922                                         process_token(t)
2923                                 } else {
2924                                         ins_mode_in_table_else(t)
2925                                 }
2926                         break
2927                         case TYPE_COMMENT:
2928                                 insert_comment(t)
2929                         break
2930                         case TYPE_DOCTYPE:
2931                                 parse_error()
2932                         break
2933                         case TYPE_START_TAG:
2934                                 switch (t.name) {
2935                                         case 'caption':
2936                                                 clear_stack_to_table_context()
2937                                                 afe_push_marker()
2938                                                 insert_html_element(t)
2939                                                 ins_mode = ins_mode_in_caption
2940                                         break
2941                                         case 'colgroup':
2942                                                 clear_stack_to_table_context()
2943                                                 insert_html_element(t)
2944                                                 ins_mode = ins_mode_in_column_group
2945                                         break
2946                                         case 'col':
2947                                                 clear_stack_to_table_context()
2948                                                 insert_html_element(new_open_tag('colgroup'))
2949                                                 ins_mode = ins_mode_in_column_group
2950                                                 process_token(t)
2951                                         break
2952                                         case 'tbody':
2953                                         case 'tfoot':
2954                                         case 'thead':
2955                                                 clear_stack_to_table_context()
2956                                                 insert_html_element(t)
2957                                                 ins_mode = ins_mode_in_table_body
2958                                         break
2959                                         case 'td':
2960                                         case 'th':
2961                                         case 'tr':
2962                                                 clear_stack_to_table_context()
2963                                                 insert_html_element(new_open_tag('tbody'))
2964                                                 ins_mode = ins_mode_in_table_body
2965                                                 process_token(t)
2966                                         break
2967                                         case 'table':
2968                                                 parse_error()
2969                                                 if (is_in_table_scope('table', NS_HTML)) {
2970                                                         while (true) {
2971                                                                 el = open_els.shift()
2972                                                                 if (el.name === 'table' && el.namespace === NS_HTML) {
2973                                                                         break
2974                                                                 }
2975                                                         }
2976                                                         reset_ins_mode()
2977                                                         process_token(t)
2978                                                 }
2979                                         break
2980                                         case 'style':
2981                                         case 'script':
2982                                         case 'template':
2983                                                 ins_mode_in_head(t)
2984                                         break
2985                                         case 'input':
2986                                                 if (!is_input_hidden_tok(t)) {
2987                                                         ins_mode_in_table_else(t)
2988                                                 } else {
2989                                                         parse_error()
2990                                                         el = insert_html_element(t)
2991                                                         open_els.shift()
2992                                                         t.acknowledge_self_closing()
2993                                                 }
2994                                         break
2995                                         case 'form':
2996                                                 parse_error()
2997                                                 if (form_element_pointer != null) {
2998                                                         return
2999                                                 }
3000                                                 if (template_tag_is_open()) {
3001                                                         return
3002                                                 }
3003                                                 form_element_pointer = insert_html_element(t)
3004                                                 open_els.shift()
3005                                         break
3006                                         default:
3007                                                 ins_mode_in_table_else(t)
3008                                 }
3009                         break
3010                         case TYPE_END_TAG:
3011                                 switch (t.name) {
3012                                         case 'table':
3013                                                 if (is_in_table_scope('table', NS_HTML)) {
3014                                                         while (true) {
3015                                                                 el = open_els.shift()
3016                                                                 if (el.name === 'table' && el.namespace === NS_HTML) {
3017                                                                         break
3018                                                                 }
3019                                                         }
3020                                                         reset_ins_mode()
3021                                                 } else {
3022                                                         parse_error()
3023                                                 }
3024                                         break
3025                                         case 'body':
3026                                         case 'caption':
3027                                         case 'col':
3028                                         case 'colgroup':
3029                                         case 'html':
3030                                         case 'tbody':
3031                                         case 'td':
3032                                         case 'tfoot':
3033                                         case 'th':
3034                                         case 'thead':
3035                                         case 'tr':
3036                                                 parse_error()
3037                                         break
3038                                         case 'template':
3039                                                 ins_mode_in_head(t)
3040                                         break
3041                                         default:
3042                                                 ins_mode_in_table_else(t)
3043                                 }
3044                         break
3045                         case TYPE_EOF:
3046                                 ins_mode_in_body(t)
3047                         break
3048                         default:
3049                                 ins_mode_in_table_else(t)
3050                 }
3051         }
3052
3053         // 8.2.5.4.10 http://www.w3.org/TR/html5/syntax.html#parsing-main-intabletext
3054         ins_mode_in_table_text = function (t) {
3055                 var all_space, i, l, m, old
3056                 if (t.type === TYPE_TEXT && t.text === "\u0000") {
3057                         // from javascript?
3058                         parse_error()
3059                         return
3060                 }
3061                 if (t.type === TYPE_TEXT) {
3062                         pending_table_character_tokens.push(t)
3063                         return
3064                 }
3065                 // Anything else
3066                 all_space = true
3067                 for (i = 0; i < pending_table_character_tokens.length; ++i) {
3068                         old = pending_table_character_tokens[i]
3069                         if (!is_space_tok(old)) {
3070                                 all_space = false
3071                                 break
3072                         }
3073                 }
3074                 if (all_space) {
3075                         for (i = 0; i < pending_table_character_tokens.length; ++i) {
3076                                 old = pending_table_character_tokens[i]
3077                                 insert_character(old)
3078                         }
3079                 } else {
3080                         for (i = 0; i < pending_table_character_tokens.length; ++i) {
3081                                 old = pending_table_character_tokens[i]
3082                                 ins_mode_in_table_else(old)
3083                         }
3084                 }
3085                 pending_table_character_tokens = []
3086                 ins_mode = original_ins_mode
3087                 process_token(t)
3088         }
3089
3090         // 8.2.5.4.11 http://www.w3.org/TR/html5/syntax.html#parsing-main-incaption
3091         ins_mode_in_caption = function (t) {
3092                 var el
3093                 if (t.type === TYPE_END_TAG && t.name === 'caption') {
3094                         if (is_in_table_scope('caption', NS_HTML)) {
3095                                 generate_implied_end_tags()
3096                                 if (open_els[0].name !== 'caption') {
3097                                         parse_error()
3098                                 }
3099                                 while (true) {
3100                                         el = open_els.shift()
3101                                         if (el.name === 'caption' && el.namespace === NS_HTML) {
3102                                                 break
3103                                         }
3104                                 }
3105                                 clear_afe_to_marker()
3106                                 ins_mode = ins_mode_in_table
3107                         } else {
3108                                 parse_error()
3109                                 // fragment case
3110                         }
3111                         return
3112                 }
3113                 if ((t.type === TYPE_START_TAG && (t.name === 'caption' || t.name === 'col' || t.name === 'colgroup' || t.name === 'tbody' || t.name === 'td' || t.name === 'tfoot' || t.name === 'th' || t.name === 'thead' || t.name === 'tr')) || t.type === TYPE_END_TAG && t.name === 'table') {
3114                         parse_error()
3115                         if (is_in_table_scope('caption', NS_HTML)) {
3116                                 while (true) {
3117                                         el = open_els.shift()
3118                                         if (el.name === 'caption' && el.namespace === NS_HTML) {
3119                                                 break
3120                                         }
3121                                 }
3122                                 clear_afe_to_marker()
3123                                 ins_mode = ins_mode_in_table
3124                                 process_token(t)
3125                         }
3126                         // else fragment case
3127                         return
3128                 }
3129                 if (t.type === TYPE_END_TAG && (t.name === 'body' || t.name === 'col' || t.name === 'colgroup' || t.name === 'html' || t.name === 'tbody' || t.name === 'td' || t.name === 'tfoot' || t.name === 'th' || t.name === 'thead' || t.name === 'tr')) {
3130                         parse_error()
3131                         return
3132                 }
3133                 // Anything else
3134                 ins_mode_in_body(t)
3135         }
3136
3137         // 8.2.5.4.12 http://www.w3.org/TR/html5/syntax.html#parsing-main-incolgroup
3138         ins_mode_in_column_group = function (t) {
3139                 var el
3140                 if (is_space_tok(t)) {
3141                         insert_character(t)
3142                         return
3143                 }
3144                 if (t.type === TYPE_COMMENT) {
3145                         insert_comment(t)
3146                         return
3147                 }
3148                 if (t.type === TYPE_DOCTYPE) {
3149                         parse_error()
3150                         return
3151                 }
3152                 if (t.type === TYPE_START_TAG && t.name === 'html') {
3153                         ins_mode_in_body(t)
3154                         return
3155                 }
3156                 if (t.type === TYPE_START_TAG && t.name === 'col') {
3157                         el = insert_html_element(t)
3158                         open_els.shift()
3159                         t.acknowledge_self_closing()
3160                         return
3161                 }
3162                 if (t.type === TYPE_END_TAG && t.name === 'colgroup') {
3163                         if (open_els[0].name === 'colgroup' && open_els.namespace === NS_HTML) {
3164                                 open_els.shift()
3165                                 ins_mode = ins_mode_in_table
3166                         } else {
3167                                 parse_error()
3168                         }
3169                         return
3170                 }
3171                 if (t.type === TYPE_END_TAG && t.name === 'col') {
3172                         parse_error()
3173                         return
3174                 }
3175                 if ((t.type === TYPE_START_TAG || t.type === TYPE_END_TAG) && t.name === 'template') {
3176                         ins_mode_in_head(t)
3177                         return
3178                 }
3179                 if (t.type === TYPE_EOF) {
3180                         ins_mode_in_body(t)
3181                         return
3182                 }
3183                 // Anything else
3184                 if (open_els[0].name !== 'colgroup') {
3185                         parse_error()
3186                         return
3187                 }
3188                 open_els.shift()
3189                 ins_mode = ins_mode_in_table
3190                 process_token(t)
3191         }
3192
3193         // 8.2.5.4.13 http://www.w3.org/TR/html5/syntax.html#parsing-main-intbody
3194         ins_mode_in_table_body = function (t) {
3195                 var el, has, i
3196                 if (t.type === TYPE_START_TAG && t.name === 'tr') {
3197                         clear_stack_to_table_body_context()
3198                         insert_html_element(t)
3199                         ins_mode = ins_mode_in_row
3200                         return
3201                 }
3202                 if (t.type === TYPE_START_TAG && (t.name === 'th' || t.name === 'td')) {
3203                         parse_error()
3204                         clear_stack_to_table_body_context()
3205                         insert_html_element(new_open_tag('tr'))
3206                         ins_mode = ins_mode_in_row
3207                         process_token(t)
3208                         return
3209                 }
3210                 if (t.type === TYPE_END_TAG && (t.name === 'tbody' || t.name === 'tfoot' || t.name === 'thead')) {
3211                         if (!is_in_table_scope(t.name, NS_HTML)) {
3212                                 parse_error()
3213                                 return
3214                         }
3215                         clear_stack_to_table_body_context()
3216                         open_els.shift()
3217                         ins_mode = ins_mode_in_table
3218                         return
3219                 }
3220                 if ((t.type === TYPE_START_TAG && (t.name === 'caption' || t.name === 'col' || t.name === 'colgroup' || t.name === 'tbody' || t.name === 'tfoot' || t.name === 'thead')) || (t.type === TYPE_END_TAG && t.name === 'table')) {
3221                         has = false
3222                         for (i = 0; i < open_els.length; ++i) {
3223                                 el = open_els[i]
3224                                 if (el.namespace === NS_HTML && (el.name === 'tbody' || el.name === 'tfoot' || el.name === 'thead')) {
3225                                         has = true
3226                                         break
3227                                 }
3228                                 if (table_scopers[el.name] === el.namespace) {
3229                                         break
3230                                 }
3231                         }
3232                         if (!has) {
3233                                 parse_error()
3234                                 return
3235                         }
3236                         clear_stack_to_table_body_context()
3237                         open_els.shift()
3238                         ins_mode = ins_mode_in_table
3239                         process_token(t)
3240                         return
3241                 }
3242                 if (t.type === TYPE_END_TAG && (t.name === 'body' || t.name === 'caption' || t.name === 'col' || t.name === 'colgroup' || t.name === 'html' || t.name === 'td' || t.name === 'th' || t.name === 'tr')) {
3243                         parse_error()
3244                         return
3245                 }
3246                 // Anything else
3247                 ins_mode_in_table(t)
3248         }
3249
3250         // 8.2.5.4.14 http://www.w3.org/TR/html5/syntax.html#parsing-main-intr
3251         ins_mode_in_row = function (t) {
3252                 if (t.type === TYPE_START_TAG && (t.name === 'th' || t.name === 'td')) {
3253                         clear_stack_to_table_row_context()
3254                         insert_html_element(t)
3255                         ins_mode = ins_mode_in_cell
3256                         afe_push_marker()
3257                         return
3258                 }
3259                 if (t.type === TYPE_END_TAG && t.name === 'tr') {
3260                         if (is_in_table_scope('tr', NS_HTML)) {
3261                                 clear_stack_to_table_row_context()
3262                                 open_els.shift()
3263                                 ins_mode = ins_mode_in_table_body
3264                         } else {
3265                                 parse_error()
3266                         }
3267                         return
3268                 }
3269                 if ((t.type === TYPE_START_TAG && (t.name === 'caption' || t.name === 'col' || t.name === 'colgroup' || t.name === 'tbody' || t.name === 'tfoot' || t.name === 'thead' || t.name === 'tr')) || t.type === TYPE_END_TAG && t.name === 'table') {
3270                         if (is_in_table_scope('tr', NS_HTML)) {
3271                                 clear_stack_to_table_row_context()
3272                                 open_els.shift()
3273                                 ins_mode = ins_mode_in_table_body
3274                                 process_token(t)
3275                         } else {
3276                                 parse_error()
3277                         }
3278                         return
3279                 }
3280                 if (t.type === TYPE_END_TAG && (t.name === 'tbody' || t.name === 'tfoot' || t.name === 'thead')) {
3281                         if (is_in_table_scope(t.name, NS_HTML)) {
3282                                 if (is_in_table_scope('tr', NS_HTML)) {
3283                                         clear_stack_to_table_row_context()
3284                                         open_els.shift()
3285                                         ins_mode = ins_mode_in_table_body
3286                                         process_token(t)
3287                                 }
3288                         } else {
3289                                 parse_error()
3290                         }
3291                         return
3292                 }
3293                 if (t.type === TYPE_END_TAG && (t.name === 'body' || t.name === 'caption' || t.name === 'col' || t.name === 'colgroup' || t.name === 'html' || t.name === 'td' || t.name === 'th')) {
3294                         parse_error()
3295                         return
3296                 }
3297                 // Anything else
3298                 ins_mode_in_table(t)
3299         }
3300
3301         // http://www.w3.org/TR/html5/syntax.html#close-the-cell
3302         close_the_cell = function () {
3303                 var el
3304                 generate_implied_end_tags()
3305                 if (!((open_els[0].name === 'td' || open_els[0] === 'th') && open_els[0].namespace === NS_HTML)) {
3306                         parse_error()
3307                 }
3308                 while (true) {
3309                         el = open_els.shift()
3310                         if (el.namespace === NS_HTML && (el.name === 'td' || el.name === 'th')) {
3311                                 break
3312                         }
3313                 }
3314                 clear_afe_to_marker()
3315                 ins_mode = ins_mode_in_row
3316         }
3317
3318         // 8.2.5.4.15 http://www.w3.org/TR/html5/syntax.html#parsing-main-intd
3319         ins_mode_in_cell = function (t) {
3320                 var el, has, i
3321                 if (t.type === TYPE_END_TAG && (t.name === 'td' || t.name === 'th')) {
3322                         if (is_in_table_scope(t.name, NS_HTML)) {
3323                                 generate_implied_end_tags()
3324                                 if (!((open_els[0].name === t.name) && open_els[0].namespace === NS_HTML)) {
3325                                         parse_error()
3326                                 }
3327                                 while (true) {
3328                                         el = open_els.shift()
3329                                         if (el.name === t.name && el.namespace === NS_HTML) {
3330                                                 break
3331                                         }
3332                                 }
3333                                 clear_afe_to_marker()
3334                                 ins_mode = ins_mode_in_row
3335                         } else {
3336                                 parse_error()
3337                         }
3338                         return
3339                 }
3340                 if (t.type === TYPE_START_TAG && (t.name === 'caption' || t.name === 'col' || t.name === 'colgroup' || t.name === 'tbody' || t.name === 'td' || t.name === 'tfoot' || t.name === 'th' || t.name === 'thead' || t.name === 'tr')) {
3341                         has = false
3342                         for (i = 0; i < open_els.length; ++i) {
3343                                 el = open_els[i]
3344                                 if (el.namespace === NS_HTML && (el.name === 'td' || el.name === 'th')) {
3345                                         has = true
3346                                         break
3347                                 }
3348                                 if (table_scopers[el.name] === el.namespace) {
3349                                         break
3350                                 }
3351                         }
3352                         if (!has) {
3353                                 parse_error()
3354                                 return
3355                         }
3356                         close_the_cell()
3357                         process_token(t)
3358                         return
3359                 }
3360                 if (t.type === TYPE_END_TAG && (t.name === 'body' || t.name === 'caption' || t.name === 'col' || t.name === 'colgroup' || t.name === 'html')) {
3361                         parse_error()
3362                         return
3363                 }
3364                 if (t.type === TYPE_END_TAG && (t.name === 'table' || t.name === 'tbody' || t.name === 'tfoot' || t.name === 'thead' || t.name === 'tr')) {
3365                         if (is_in_table_scope(t.name, NS_HTML)) {
3366                                 close_the_cell()
3367                                 process_token(t)
3368                         } else {
3369                                 parse_error()
3370                         }
3371                         return
3372                 }
3373                 // Anything Else
3374                 ins_mode_in_body(t)
3375         }
3376
3377         // 8.2.5.4.16 http://www.w3.org/TR/html5/syntax.html#parsing-main-inselect
3378         ins_mode_in_select = function (t) {
3379                 var el
3380                 if (t.type === TYPE_TEXT && t.text === "\u0000") {
3381                         parse_error()
3382                         return
3383                 }
3384                 if (t.type === TYPE_TEXT) {
3385                         insert_character(t)
3386                         return
3387                 }
3388                 if (t.type === TYPE_COMMENT) {
3389                         insert_comment(t)
3390                         return
3391                 }
3392                 if (t.type === TYPE_DOCTYPE) {
3393                         parse_error()
3394                         return
3395                 }
3396                 if (t.type === TYPE_START_TAG && t.name === 'html') {
3397                         ins_mode_in_body(t)
3398                         return
3399                 }
3400                 if (t.type === TYPE_START_TAG && t.name === 'option') {
3401                         if (open_els[0].name === 'option' && open_els[0].namespace === NS_HTML) {
3402                                 open_els.shift()
3403                         }
3404                         insert_html_element(t)
3405                         return
3406                 }
3407                 if (t.type === TYPE_START_TAG && t.name === 'optgroup') {
3408                         if (open_els[0].name === 'option' && open_els[0].namespace === NS_HTML) {
3409                                 open_els.shift()
3410                         }
3411                         if (open_els[0].name === 'optgroup' && open_els[0].namespace === NS_HTML) {
3412                                 open_els.shift()
3413                         }
3414                         insert_html_element(t)
3415                         return
3416                 }
3417                 if (t.type === TYPE_END_TAG && t.name === 'optgroup') {
3418                         if (open_els[0].name === 'option' && open_els[0].namespace === NS_HTML) {
3419                                 if (open_els[1].name === 'optgroup' && open_els[0].namespace === NS_HTML) {
3420                                         open_els.shift()
3421                                 }
3422                         }
3423                         if (open_els[0].name === 'optgroup' && open_els[0].namespace === NS_HTML) {
3424                                 open_els.shift()
3425                         } else {
3426                                 parse_error()
3427                         }
3428                         return
3429                 }
3430                 if (t.type === TYPE_END_TAG && t.name === 'option') {
3431                         if (open_els[0].name === 'option' && open_els[0].namespace === NS_HTML) {
3432                                 open_els.shift()
3433                         } else {
3434                                 parse_error()
3435                         }
3436                         return
3437                 }
3438                 if (t.type === TYPE_END_TAG && t.name === 'select') {
3439                         if (is_in_select_scope('select', NS_HTML)) {
3440                                 while (true) {
3441                                         el = open_els.shift()
3442                                         if (el.name === 'select' && el.namespace === NS_HTML) {
3443                                                 break
3444                                         }
3445                                 }
3446                                 reset_ins_mode()
3447                         } else {
3448                                 parse_error()
3449                         }
3450                         return
3451                 }
3452                 if (t.type === TYPE_START_TAG && t.name === 'select') {
3453                         parse_error()
3454                         while (true) {
3455                                 el = open_els.shift()
3456                                 if (el.name === 'select' && el.namespace === NS_HTML) {
3457                                         break
3458                                 }
3459                         }
3460                         reset_ins_mode()
3461                         // spec says that this is the same as </select> but it doesn't say
3462                         // to check scope first
3463                         return
3464                 }
3465                 if (t.type === TYPE_START_TAG && (t.name === 'input' || t.name === 'keygen' || t.name === 'textarea')) {
3466                         parse_error()
3467                         if (!is_in_select_scope('select', NS_HTML)) {
3468                                 return
3469                         }
3470                         while (true) {
3471                                 el = open_els.shift()
3472                                 if (el.name === 'select' && el.namespace === NS_HTML) {
3473                                         break
3474                                 }
3475                         }
3476                         reset_ins_mode()
3477                         process_token(t)
3478                         return
3479                 }
3480                 if (t.type === TYPE_START_TAG && (t.name === 'script' || t.name === 'template')) {
3481                         ins_mode_in_head(t)
3482                         return
3483                 }
3484                 if (t.type === TYPE_EOF) {
3485                         ins_mode_in_body(t)
3486                         return
3487                 }
3488                 // Anything else
3489                 parse_error()
3490         }
3491
3492         // 8.2.5.4.17 http://www.w3.org/TR/html5/syntax.html#parsing-main-inselectintable
3493         ins_mode_in_select_in_table = function (t) {
3494                 var el
3495                 if (t.type === TYPE_START_TAG && (t.name === 'caption' || t.name === 'table' || t.name === 'tbody' || t.name === 'tfoot' || t.name === 'thead' || t.name === 'tr' || t.name === 'td' || t.name === 'th')) {
3496                         parse_error()
3497                         while (true) {
3498                                 el = open_els.shift()
3499                                 if (el.name === 'select' && el.namespace === NS_HTML) {
3500                                         break
3501                                 }
3502                         }
3503                         reset_ins_mode()
3504                         process_token(t)
3505                         return
3506                 }
3507                 if (t.type === TYPE_END_TAG && (t.name === 'caption' || t.name === 'table' || t.name === 'tbody' || t.name === 'tfoot' || t.name === 'thead' || t.name === 'tr' || t.name === 'td' || t.name === 'th')) {
3508                         parse_error()
3509                         if (!is_in_table_scope(t.name, NS_HTML)) {
3510                                 return
3511                         }
3512                         while (true) {
3513                                 el = open_els.shift()
3514                                 if (el.name === 'select' && el.namespace === NS_HTML) {
3515                                         break
3516                                 }
3517                         }
3518                         reset_ins_mode()
3519                         process_token(t)
3520                         return
3521                 }
3522                 // Anything else
3523                 ins_mode_in_select(t)
3524         }
3525
3526         // 8.2.5.4.18 http://www.w3.org/TR/html5/syntax.html#parsing-main-intemplate
3527         ins_mode_in_template = function (t) {
3528                 var el
3529                 if (t.type === TYPE_TEXT || t.type === TYPE_COMMENT || t.type === TYPE_DOCTYPE) {
3530                         ins_mode_in_body(t)
3531                         return
3532                 }
3533                 if ((t.type === TYPE_START_TAG && (t.name === 'base' || t.name === 'basefont' || t.name === 'bgsound' || t.name === 'link' || t.name === 'meta' || t.name === 'noframes' || t.name === 'script' || t.name === 'style' || t.name === 'template' || t.name === 'title')) || (t.type === TYPE_END_TAG && t.name === 'template')) {
3534                         ins_mode_in_head(t)
3535                         return
3536                 }
3537                 if (t.type === TYPE_START_TAG && (t.name === 'caption' || t.name === 'colgroup' || t.name === 'tbody' || t.name === 'tfoot' || t.name === 'thead')) {
3538                         template_ins_modes.shift()
3539                         template_ins_modes.unshift(ins_mode_in_table)
3540                         ins_mode = ins_mode_in_table
3541                         process_token(t)
3542                         return
3543                 }
3544                 if (t.type === TYPE_START_TAG && t.name === 'col') {
3545                         template_ins_modes.shift()
3546                         template_ins_modes.unshift(ins_mode_in_column_group)
3547                         ins_mode = ins_mode_in_column_group
3548                         process_token(t)
3549                         return
3550                 }
3551                 if (t.type === TYPE_START_TAG && t.name === 'tr') {
3552                         template_ins_modes.shift()
3553                         template_ins_modes.unshift(ins_mode_in_table_body)
3554                         ins_mode = ins_mode_in_table_body
3555                         process_token(t)
3556                         return
3557                 }
3558                 if (t.type === TYPE_START_TAG && (t.name === 'td' || t.name === 'th')) {
3559                         template_ins_modes.shift()
3560                         template_ins_modes.unshift(ins_mode_in_row)
3561                         ins_mode = ins_mode_in_row
3562                         process_token(t)
3563                         return
3564                 }
3565                 if (t.type === TYPE_START_TAG) {
3566                         template_ins_modes.shift()
3567                         template_ins_modes.unshift(ins_mode_in_body)
3568                         ins_mode = ins_mode_in_body
3569                         process_token(t)
3570                         return
3571                 }
3572                 if (t.type === TYPE_END_TAG) {
3573                         parse_error()
3574                         return
3575                 }
3576                 if (t.type === TYPE_EOF) {
3577                         if (!template_tag_is_open()) {
3578                                 stop_parsing()
3579                                 return
3580                         }
3581                         parse_error()
3582                         while (true) {
3583                                 el = open_els.shift()
3584                                 if (el.name === 'template' && el.namespace === NS_HTML) {
3585                                         break
3586                                 }
3587                         }
3588                         clear_afe_to_marker()
3589                         template_ins_modes.shift()
3590                         reset_ins_mode()
3591                         process_token(t)
3592                 }
3593         }
3594
3595         // 8.2.5.4.19 http://www.w3.org/TR/html5/syntax.html#parsing-main-afterbody
3596         ins_mode_after_body = function (t) {
3597                 var first
3598                 if (is_space_tok(t)) {
3599                         ins_mode_in_body(t)
3600                         return
3601                 }
3602                 if (t.type === TYPE_COMMENT) {
3603                         first = open_els[open_els.length - 1]
3604                         insert_comment(t, [first, first.children.length])
3605                         return
3606                 }
3607                 if (t.type === TYPE_DOCTYPE) {
3608                         parse_error()
3609                         return
3610                 }
3611                 if (t.type === TYPE_START_TAG && t.name === 'html') {
3612                         ins_mode_in_body(t)
3613                         return
3614                 }
3615                 if (t.type === TYPE_END_TAG && t.name === 'html') {
3616                         if (flag_fragment_parsing) {
3617                                 parse_error()
3618                                 return
3619                         }
3620                         ins_mode = ins_mode_after_after_body
3621                         return
3622                 }
3623                 if (t.type === TYPE_EOF) {
3624                         stop_parsing()
3625                         return
3626                 }
3627                 // Anything ELse
3628                 parse_error()
3629                 ins_mode = ins_mode_in_body
3630                 process_token(t)
3631         }
3632
3633         // 8.2.5.4.20 http://www.w3.org/TR/html5/syntax.html#parsing-main-inframeset
3634         ins_mode_in_frameset = function (t) {
3635                 if (is_space_tok(t)) {
3636                         insert_character(t)
3637                         return
3638                 }
3639                 if (t.type === TYPE_COMMENT) {
3640                         insert_comment(t)
3641                         return
3642                 }
3643                 if (t.type === TYPE_DOCTYPE) {
3644                         parse_error()
3645                         return
3646                 }
3647                 if (t.type === TYPE_START_TAG && t.name === 'html') {
3648                         ins_mode_in_body(t)
3649                         return
3650                 }
3651                 if (t.type === TYPE_START_TAG && t.name === 'frameset') {
3652                         insert_html_element(t)
3653                         return
3654                 }
3655                 if (t.type === TYPE_END_TAG && t.name === 'frameset') {
3656                         if (open_els.length === 1) {
3657                                 parse_error()
3658                                 return // fragment case
3659                         }
3660                         open_els.shift()
3661                         if (flag_fragment_parsing === false && open_els[0].name !== 'frameset') {
3662                                 ins_mode = ins_mode_after_frameset
3663                         }
3664                         return
3665                 }
3666                 if (t.type === TYPE_START_TAG && t.name === 'frame') {
3667                         insert_html_element(t)
3668                         open_els.shift()
3669                         t.acknowledge_self_closing()
3670                         return
3671                 }
3672                 if (t.type === TYPE_START_TAG && t.name === 'noframes') {
3673                         ins_mode_in_head(t)
3674                         return
3675                 }
3676                 if (t.type === TYPE_EOF) {
3677                         if (open_els.length !== 1) {
3678                                 parse_error()
3679                         }
3680                         stop_parsing()
3681                         return
3682                 }
3683                 // Anything else
3684                 parse_error()
3685         }
3686
3687         // 8.2.5.4.21 http://www.w3.org/TR/html5/syntax.html#parsing-main-afterframeset
3688         ins_mode_after_frameset = function (t) {
3689                 if (is_space_tok(t)) {
3690                         insert_character(t)
3691                         return
3692                 }
3693                 if (t.type === TYPE_COMMENT) {
3694                         insert_comment(t)
3695                         return
3696                 }
3697                 if (t.type === TYPE_DOCTYPE) {
3698                         parse_error()
3699                         return
3700                 }
3701                 if (t.type === TYPE_START_TAG && t.name === 'html') {
3702                         ins_mode_in_body(t)
3703                         return
3704                 }
3705                 if (t.type === TYPE_END_TAG && t.name === 'html') {
3706                         ins_mode = ins_mode_after_after_frameset
3707                         return
3708                 }
3709                 if (t.type === TYPE_START_TAG && t.name === 'noframes') {
3710                         ins_mode_in_head(t)
3711                         return
3712                 }
3713                 if (t.type === TYPE_EOF) {
3714                         stop_parsing()
3715                         return
3716                 }
3717                 // Anything else
3718                 parse_error()
3719         }
3720
3721         // 8.2.5.4.22 http://www.w3.org/TR/html5/syntax.html#the-after-after-body-insertion-mode
3722         ins_mode_after_after_body = function (t) {
3723                 if (t.type === TYPE_COMMENT) {
3724                         insert_comment(t, [doc, doc.children.length])
3725                         return
3726                 }
3727                 if (t.type === TYPE_DOCTYPE || is_space_tok(t) || (t.type === TYPE_START_TAG && t.name === 'html')) {
3728                         ins_mode_in_body(t)
3729                         return
3730                 }
3731                 if (t.type === TYPE_EOF) {
3732                         stop_parsing()
3733                         return
3734                 }
3735                 // Anything else
3736                 parse_error()
3737                 ins_mode = ins_mode_in_body
3738                 process_token(t)
3739         }
3740
3741         // 8.2.5.4.23 http://www.w3.org/TR/html5/syntax.html#the-after-after-frameset-insertion-mode
3742         ins_mode_after_after_frameset = function (t) {
3743                 if (t.type === TYPE_COMMENT) {
3744                         insert_comment(t, [doc, doc.children.length])
3745                         return
3746                 }
3747                 if (t.type === TYPE_DOCTYPE || is_space_tok(t) || (t.type === TYPE_START_TAG && t.name === 'html')) {
3748                         ins_mode_in_body(t)
3749                         return
3750                 }
3751                 if (t.type === TYPE_EOF) {
3752                         stop_parsing()
3753                         return
3754                 }
3755                 if (t.type === TYPE_START_TAG && t.name === 'noframes') {
3756                         ins_mode_in_head(t)
3757                         return
3758                 }
3759                 // Anything else
3760                 parse_error()
3761                 return
3762         }
3763
3764         // 8.2.5.5 http://www.w3.org/TR/html5/syntax.html#parsing-main-inforeign
3765         has_color_face_or_size = function (t) {
3766                 var a, i
3767                 for (i = 0; i < t.attrs_a.length; ++i) {
3768                         a = t.attrs_a[i]
3769                         if (a[0] === 'color' || a[0] === 'face' || a[0] === 'size') {
3770                                 return true
3771                         }
3772                 }
3773                 return false
3774         }
3775         in_foreign_content_end_script = function () {
3776                 open_els.shift()
3777                 // fixfull
3778         }
3779         in_foreign_content_other_start = function (t) {
3780                 var acn
3781                 acn = adjusted_current_node()
3782                 if (acn.namespace === NS_MATHML) {
3783                         adjust_mathml_attributes(t)
3784                 }
3785                 if (acn.namespace === NS_SVG && (svg_name_fixes[t.name] != null)) { // extra perens because Coffeescript did
3786                         t.name = svg_name_fixes[t.name]
3787                 }
3788                 if (acn.namespace === NS_SVG) {
3789                         adjust_svg_attributes(t)
3790                 }
3791                 adjust_foreign_attributes(t)
3792                 insert_foreign_element(t, acn.namespace)
3793                 if (t.flag('self-closing')) {
3794                         if (t.name === 'script') {
3795                                 t.acknowledge_self_closing()
3796                                 in_foreign_content_end_script()
3797                                 // fixfull
3798                         } else {
3799                                 open_els.shift()
3800                                 t.acknowledge_self_closing()
3801                         }
3802                 }
3803         }
3804         in_foreign_content = function (t) {
3805                 var el, i, node
3806                 if (t.type === TYPE_TEXT && t.text === "\u0000") {
3807                         parse_error()
3808                         insert_character(new_character_token("\ufffd"))
3809                         return
3810                 }
3811                 if (is_space_tok(t)) {
3812                         insert_character(t)
3813                         return
3814                 }
3815                 if (t.type === TYPE_TEXT) {
3816                         flag_frameset_ok = false
3817                         insert_character(t)
3818                         return
3819                 }
3820                 if (t.type === TYPE_COMMENT) {
3821                         insert_comment(t)
3822                         return
3823                 }
3824                 if (t.type === TYPE_DOCTYPE) {
3825                         parse_error()
3826                         return
3827                 }
3828                 if (t.type === TYPE_START_TAG && (t.name === 'b' || t.name === 'big' || t.name === 'blockquote' || t.name === 'body' || t.name === 'br' || t.name === 'center' || t.name === 'code' || t.name === 'dd' || t.name === 'div' || t.name === 'dl' || t.name === 'dt' || t.name === 'em' || t.name === 'embed' || t.name === 'h1' || t.name === 'h2' || t.name === 'h3' || t.name === 'h4' || t.name === 'h5' || t.name === 'h6' || t.name === 'head' || t.name === 'hr' || t.name === 'i' || t.name === 'img' || t.name === 'li' || t.name === 'listing' || t.name === 'main' || t.name === 'meta' || t.name === 'nobr' || t.name === 'ol' || t.name === 'p' || t.name === 'pre' || t.name === 'ruby' || t.name === 's' || t.name === 'small' || t.name === 'span' || t.name === 'strong' || t.name === 'strike' || t.name === 'sub' || t.name === 'sup' || t.name === 'table' || t.name === 'tt' || t.name === 'u' || t.name === 'ul' || t.name === 'var' || (t.name === 'font' && has_color_face_or_size(t)))) {
3829                         parse_error()
3830                         if (flag_fragment_parsing) {
3831                                 in_foreign_content_other_start(t)
3832                                 return
3833                         }
3834                         while (true) { // is this safe?
3835                                 open_els.shift()
3836                                 if (is_mathml_text_integration_point(open_els[0]) || is_html_integration(open_els[0]) || open_els[0].namespace === NS_HTML) {
3837                                         break
3838                                 }
3839                         }
3840                         process_token(t)
3841                         return
3842                 }
3843                 if (t.type === TYPE_START_TAG) {
3844                         in_foreign_content_other_start(t)
3845                         return
3846                 }
3847                 if (t.type === TYPE_END_TAG && t.name === 'script' && open_els[0].name === 'script' && open_els[0].namespace === NS_SVG) {
3848                         in_foreign_content_end_script()
3849                         return
3850                 }
3851                 if (t.type === TYPE_END_TAG) {
3852                         i = 0
3853                         node = open_els[i]
3854                         if (node.name.toLowerCase() !== t.name) {
3855                                 parse_error()
3856                         }
3857                         while (true) {
3858                                 if (node === open_els[open_els.length - 1]) {
3859                                         return
3860                                 }
3861                                 if (node.name.toLowerCase() === t.name) {
3862                                         while (true) {
3863                                                 el = open_els.shift()
3864                                                 if (el === node) {
3865                                                         return
3866                                                 }
3867                                         }
3868                                 }
3869                                 i += 1
3870                                 node = open_els[i]
3871                                 if (node.namespace === NS_HTML) {
3872                                         break
3873                                 }
3874                         }
3875                         ins_mode(t) // explicitly call HTML insertion mode
3876                 }
3877         }
3878
3879
3880         // 8.2.4.1 http://www.w3.org/TR/html5/syntax.html#data-state
3881         tok_state_data = function () {
3882                 var c
3883                 switch (c = txt.charAt(cur++)) {
3884                         case '&':
3885                                 return new_text_node(parse_character_reference())
3886                         break
3887                         case '<':
3888                                 tok_state = tok_state_tag_open
3889                         break
3890                         case "\u0000":
3891                                 parse_error()
3892                                 return new_text_node(c)
3893                         break
3894                         case '': // EOF
3895                                 return new_eof_token()
3896                         break
3897                         default:
3898                                 return new_text_node(c)
3899                 }
3900                 return null
3901         }
3902
3903         // 8.2.4.2 http://www.w3.org/TR/html5/syntax.html#character-reference-in-data-state
3904         // not needed: tok_state_character_reference_in_data = function () {
3905         // just call parse_character_reference()
3906
3907         // 8.2.4.3 http://www.w3.org/TR/html5/syntax.html#rcdata-state
3908         tok_state_rcdata = function () {
3909                 var c
3910                 switch (c = txt.charAt(cur++)) {
3911                         case '&':
3912                                 return new_text_node(parse_character_reference())
3913                         break
3914                         case '<':
3915                                 tok_state = tok_state_rcdata_less_than_sign
3916                         break
3917                         case "\u0000":
3918                                 parse_error()
3919                                 return new_character_token("\ufffd")
3920                         break
3921                         case '': // EOF
3922                                 return new_eof_token()
3923                         break
3924                         default:
3925                                 return new_character_token(c)
3926                 }
3927                 return null
3928         }
3929
3930         // 8.2.4.4 http://www.w3.org/TR/html5/syntax.html#character-reference-in-rcdata-state
3931         // not needed: tok_state_character_reference_in_rcdata = function () {
3932         // just call parse_character_reference()
3933
3934         // 8.2.4.5 http://www.w3.org/TR/html5/syntax.html#rawtext-state
3935         tok_state_rawtext = function () {
3936                 var c
3937                 switch (c = txt.charAt(cur++)) {
3938                         case '<':
3939                                 tok_state = tok_state_rawtext_less_than_sign
3940                         break
3941                         case "\u0000":
3942                                 parse_error()
3943                                 return new_character_token("\ufffd")
3944                         break
3945                         case '': // EOF
3946                                 return new_eof_token()
3947                         break
3948                         default:
3949                                 return new_character_token(c)
3950                 }
3951                 return null
3952         }
3953
3954         // 8.2.4.6 http://www.w3.org/TR/html5/syntax.html#script-data-state
3955         tok_state_script_data = function () {
3956                 var c
3957                 switch (c = txt.charAt(cur++)) {
3958                         case '<':
3959                                 tok_state = tok_state_script_data_less_than_sign
3960                         break
3961                         case "\u0000":
3962                                 parse_error()
3963                                 return new_character_token("\ufffd")
3964                         break
3965                         case '': // EOF
3966                                 return new_eof_token()
3967                         break
3968                         default:
3969                                 return new_character_token(c)
3970                 }
3971                 return null
3972         }
3973
3974         // 8.2.4.7 http://www.w3.org/TR/html5/syntax.html#plaintext-state
3975         tok_state_plaintext = function () {
3976                 var c
3977                 switch (c = txt.charAt(cur++)) {
3978                         case "\u0000":
3979                                 parse_error()
3980                                 return new_character_token("\ufffd")
3981                         break
3982                         case '': // EOF
3983                                 return new_eof_token()
3984                         break
3985                         default:
3986                                 return new_character_token(c)
3987                 }
3988                 return null
3989         }
3990
3991         // 8.2.4.8 http://www.w3.org/TR/html5/syntax.html#tag-open-state
3992         tok_state_tag_open = function () {
3993                 var c
3994                 c = txt.charAt(cur++)
3995                 if (c === '!') {
3996                         tok_state = tok_state_markup_declaration_open
3997                         return
3998                 }
3999                 if (c === '/') {
4000                         tok_state = tok_state_end_tag_open
4001                         return
4002                 }
4003                 if (is_uc_alpha(c)) {
4004                         tok_cur_tag = new_open_tag(c.toLowerCase())
4005                         tok_state = tok_state_tag_name
4006                         return
4007                 }
4008                 if (is_lc_alpha(c)) {
4009                         tok_cur_tag = new_open_tag(c)
4010                         tok_state = tok_state_tag_name
4011                         return
4012                 }
4013                 if (c === '?') {
4014                         parse_error()
4015                         tok_cur_tag = new_comment_token('?') // FIXME right?
4016                         tok_state = tok_state_bogus_comment
4017                         return
4018                 }
4019                 // Anything else
4020                 parse_error()
4021                 tok_state = tok_state_data
4022                 cur -= 1 // we didn't parse/handle the char after <
4023                 return new_text_node('<')
4024         }
4025
4026         // 8.2.4.9 http://www.w3.org/TR/html5/syntax.html#end-tag-open-state
4027         tok_state_end_tag_open = function () {
4028                 var c
4029                 c = txt.charAt(cur++)
4030                 if (is_uc_alpha(c)) {
4031                         tok_cur_tag = new_end_tag(c.toLowerCase())
4032                         tok_state = tok_state_tag_name
4033                         return
4034                 }
4035                 if (is_lc_alpha(c)) {
4036                         tok_cur_tag = new_end_tag(c)
4037                         tok_state = tok_state_tag_name
4038                         return
4039                 }
4040                 if (c === '>') {
4041                         parse_error()
4042                         tok_state = tok_state_data
4043                         return
4044                 }
4045                 if (c === '') { // EOF
4046                         parse_error()
4047                         tok_state = tok_state_data
4048                         return new_text_node('</')
4049                 }
4050                 // Anything else
4051                 parse_error()
4052                 tok_cur_tag = new_comment_token(c)
4053                 tok_state = tok_state_bogus_comment
4054                 return null
4055         }
4056
4057         // 8.2.4.10 http://www.w3.org/TR/html5/syntax.html#tag-name-state
4058         tok_state_tag_name = function () {
4059                 var c, tmp
4060                 switch (c = txt.charAt(cur++)) {
4061                         case "\t":
4062                         case "\n":
4063                         case "\u000c":
4064                         case ' ':
4065                                 tok_state = tok_state_before_attribute_name
4066                         break
4067                         case '/':
4068                                 tok_state = tok_state_self_closing_start_tag
4069                         break
4070                         case '>':
4071                                 tok_state = tok_state_data
4072                                 tmp = tok_cur_tag
4073                                 tok_cur_tag = null
4074                                 return tmp
4075                         break
4076                         case "\u0000":
4077                                 parse_error()
4078                                 tok_cur_tag.name += "\ufffd"
4079                         break
4080                         case '': // EOF
4081                                 parse_error()
4082                                 tok_state = tok_state_data
4083                         break
4084                         default:
4085                                 if (is_uc_alpha(c)) {
4086                                         tok_cur_tag.name += c.toLowerCase()
4087                                 } else {
4088                                         tok_cur_tag.name += c
4089                                 }
4090                 }
4091                 return null
4092         }
4093
4094         // 8.2.4.11 http://www.w3.org/TR/html5/syntax.html#rcdata-less-than-sign-state
4095         tok_state_rcdata_less_than_sign = function () {
4096                 var c
4097                 c = txt.charAt(cur++)
4098                 if (c === '/') {
4099                         temporary_buffer = ''
4100                         tok_state = tok_state_rcdata_end_tag_open
4101                         return null
4102                 }
4103                 // Anything else
4104                 tok_state = tok_state_rcdata
4105                 cur -= 1 // reconsume the input character
4106                 return new_character_token('<')
4107         }
4108
4109         // 8.2.4.12 http://www.w3.org/TR/html5/syntax.html#rcdata-end-tag-open-state
4110         tok_state_rcdata_end_tag_open = function () {
4111                 var c
4112                 c = txt.charAt(cur++)
4113                 if (is_uc_alpha(c)) {
4114                         tok_cur_tag = new_end_tag(c.toLowerCase())
4115                         temporary_buffer += c
4116                         tok_state = tok_state_rcdata_end_tag_name
4117                         return null
4118                 }
4119                 if (is_lc_alpha(c)) {
4120                         tok_cur_tag = new_end_tag(c)
4121                         temporary_buffer += c
4122                         tok_state = tok_state_rcdata_end_tag_name
4123                         return null
4124                 }
4125                 // Anything else
4126                 tok_state = tok_state_rcdata
4127                 cur -= 1 // reconsume the input character
4128                 return new_character_token("</") // fixfull separate these
4129         }
4130
4131         // http://www.w3.org/TR/html5/syntax.html#appropriate-end-tag-token
4132         is_appropriate_end_tag = function (t) {
4133                 // fixfull: this assumes that open_els[0].name is "the tag name of the last
4134                 // start tag to have been emitted from this tokenizer"
4135                 return t.type === TYPE_END_TAG && t.name === open_els[0].name
4136         }
4137
4138         // 8.2.4.13 http://www.w3.org/TR/html5/syntax.html#rcdata-end-tag-name-state
4139         tok_state_rcdata_end_tag_name = function () {
4140                 var c
4141                 c = txt.charAt(cur++)
4142                 if (c === "\t" || c === "\n" || c === "\u000c" || c === ' ') {
4143                         if (is_appropriate_end_tag(tok_cur_tag)) {
4144                                 tok_state = tok_state_before_attribute_name
4145                                 return
4146                         }
4147                         // else fall through to "Anything else"
4148                 }
4149                 if (c === '/') {
4150                         if (is_appropriate_end_tag(tok_cur_tag)) {
4151                                 tok_state = tok_state_self_closing_start_tag // FIXME spec typo?
4152                                 return
4153                         }
4154                         // else fall through to "Anything else"
4155                 }
4156                 if (c === '>') {
4157                         if (is_appropriate_end_tag(tok_cur_tag)) {
4158                                 tok_state = tok_state_data
4159                                 return tok_cur_tag
4160                         }
4161                         // else fall through to "Anything else"
4162                 }
4163                 if (is_uc_alpha(c)) {
4164                         tok_cur_tag.name += c.toLowerCase()
4165                         temporary_buffer += c
4166                         return null
4167                 }
4168                 if (is_lc_alpha(c)) {
4169                         tok_cur_tag.name += c
4170                         temporary_buffer += c
4171                         return null
4172                 }
4173                 // Anything else
4174                 tok_state = tok_state_rcdata
4175                 cur -= 1 // reconsume the input character
4176                 return new_character_token('</' + temporary_buffer) // fixfull separate these
4177         }
4178
4179         // 8.2.4.14 http://www.w3.org/TR/html5/syntax.html#rawtext-less-than-sign-state
4180         tok_state_rawtext_less_than_sign = function () {
4181                 var c
4182                 c = txt.charAt(cur++)
4183                 if (c === '/') {
4184                         temporary_buffer = ''
4185                         tok_state = tok_state_rawtext_end_tag_open
4186                         return null
4187                 }
4188                 // Anything else
4189                 tok_state = tok_state_rawtext
4190                 cur -= 1 // reconsume the input character
4191                 return new_character_token('<')
4192         }
4193
4194         // 8.2.4.15 http://www.w3.org/TR/html5/syntax.html#rawtext-end-tag-open-state
4195         tok_state_rawtext_end_tag_open = function () {
4196                 c = txt.charAt(cur++)
4197                 if (is_uc_alpha(c)) {
4198                         tok_cur_tag = new_end_tag(c.toLowerCase())
4199                         temporary_buffer += c
4200                         tok_state = tok_state_rawtext_end_tag_name
4201                         return null
4202                 }
4203                 if (is_lc_alpha(c)) {
4204                         tok_cur_tag = new_end_tag(c)
4205                         temporary_buffer += c
4206                         tok_state = tok_state_rawtext_end_tag_name
4207                         return null
4208                 }
4209                 // Anything else
4210                 tok_state = tok_state_rawtext
4211                 cur -= 1 // reconsume the input character
4212                 return new_character_token("</") // fixfull separate these
4213         }
4214
4215         // 8.2.4.16 http://www.w3.org/TR/html5/syntax.html#rawtext-end-tag-name-state
4216         tok_state_rawtext_end_tag_name = function () {
4217                 var c
4218                 c = txt.charAt(cur++)
4219                 if (c === "\t" || c === "\n" || c === "\u000c" || c === ' ') {
4220                         if (is_appropriate_end_tag(tok_cur_tag)) {
4221                                 tok_state = tok_state_before_attribute_name
4222                                 return
4223                         }
4224                         // else fall through to "Anything else"
4225                 }
4226                 if (c === '/') {
4227                         if (is_appropriate_end_tag(tok_cur_tag)) {
4228                                 tok_state = tok_state_self_closing_start_tag
4229                                 return
4230                         }
4231                         // else fall through to "Anything else"
4232                 }
4233                 if (c === '>') {
4234                         if (is_appropriate_end_tag(tok_cur_tag)) {
4235                                 tok_state = tok_state_data
4236                                 return tok_cur_tag
4237                         }
4238                         // else fall through to "Anything else"
4239                 }
4240                 if (is_uc_alpha(c)) {
4241                         tok_cur_tag.name += c.toLowerCase()
4242                         temporary_buffer += c
4243                         return null
4244                 }
4245                 if (is_lc_alpha(c)) {
4246                         tok_cur_tag.name += c
4247                         temporary_buffer += c
4248                         return null
4249                 }
4250                 // Anything else
4251                 tok_state = tok_state_rawtext
4252                 cur -= 1 // reconsume the input character
4253                 return new_character_token('</' + temporary_buffer) // fixfull separate these
4254         }
4255
4256         // 8.2.4.17 http://www.w3.org/TR/html5/syntax.html#script-data-less-than-sign-state
4257         tok_state_script_data_less_than_sign = function () {
4258                 var c
4259                 c = txt.charAt(cur++)
4260                 if (c === '/') {
4261                         temporary_buffer = ''
4262                         tok_state = tok_state_script_data_end_tag_open
4263                         return
4264                 }
4265                 if (c === '!') {
4266                         tok_state = tok_state_script_data_escape_start
4267                         return new_character_token('<!') // fixfull split
4268                 }
4269                 // Anything else
4270                 tok_state = tok_state_script_data
4271                 cur -= 1 // reconsume
4272                 return new_character_token('<')
4273         }
4274
4275         // 8.2.4.18 http://www.w3.org/TR/html5/syntax.html#script-data-end-tag-open-state
4276         tok_state_script_data_end_tag_open = function () {
4277                 var c
4278                 c = txt.charAt(cur++)
4279                 if (is_uc_alpha(c)) {
4280                         tok_cur_tag = new_end_tag(c.toLowerCase())
4281                         temporary_buffer += c
4282                         tok_state = tok_state_script_data_end_tag_name
4283                         return
4284                 }
4285                 if (is_lc_alpha(c)) {
4286                         tok_cur_tag = new_end_tag(c)
4287                         temporary_buffer += c
4288                         tok_state = tok_state_script_data_end_tag_name
4289                         return
4290                 }
4291                 // Anything else
4292                 tok_state = tok_state_script_data
4293                 cur -= 1 // reconsume
4294                 return new_character_token('</')
4295         }
4296
4297         // 8.2.4.19 http://www.w3.org/TR/html5/syntax.html#script-data-end-tag-open-state
4298         tok_state_script_data_end_tag_name = function () {
4299                 var c
4300                 c = txt.charAt(cur++)
4301                 if (c === "\t" || c === "\n" || c === "\u000c" || c === ' ') {
4302                         if (is_appropriate_end_tag(tok_cur_tag)) {
4303                                 tok_state = tok_state_before_attribute_name
4304                                 return
4305                         }
4306                         // fall through
4307                 }
4308                 if (c === '/') {
4309                         if (is_appropriate_end_tag(tok_cur_tag)) {
4310                                 tok_state = tok_state_self_closing_start_tag
4311                                 return
4312                         }
4313                         // fall through
4314                 }
4315                 if (c === '>') {
4316                         if (is_appropriate_end_tag(tok_cur_tag)) {
4317                                 tok_state = tok_state_data
4318                                 return tok_cur_tag
4319                         }
4320                         // fall through
4321                 }
4322                 if (is_uc_alpha(c)) {
4323                         tok_cur_tag.name += c.toLowerCase()
4324                         temporary_buffer += c
4325                         return
4326                 }
4327                 if (is_lc_alpha(c)) {
4328                         tok_cur_tag.name += c
4329                         temporary_buffer += c
4330                         return
4331                 }
4332                 // Anything else
4333                 tok_state = tok_state_script_data
4334                 cur -= 1 // reconsume
4335                 return new_character_token("</" + temporary_buffer) // fixfull split
4336         }
4337
4338         // 8.2.4.20 http://www.w3.org/TR/html5/syntax.html#script-data-escape-start-state
4339         tok_state_script_data_escape_start = function () {
4340                 var c
4341                 c = txt.charAt(cur++)
4342                 if (c === '-') {
4343                         tok_state = tok_state_script_data_escape_start_dash
4344                         return new_character_token('-')
4345                 }
4346                 // Anything else
4347                 tok_state = tok_state_script_data
4348                 cur -= 1 // reconsume
4349         }
4350
4351         // 8.2.4.21 http://www.w3.org/TR/html5/syntax.html#script-data-escape-start-dash-state
4352         tok_state_script_data_escape_start_dash = function () {
4353                 var c
4354                 c = txt.charAt(cur++)
4355                 if (c === '-') {
4356                         tok_state = tok_state_script_data_escaped_dash_dash
4357                         return new_character_token('-')
4358                 }
4359                 // Anything else
4360                 tok_state = tok_state_script_data
4361                 cur -= 1 // reconsume
4362         }
4363
4364         // 8.2.4.22 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-state
4365         tok_state_script_data_escaped = function () {
4366                 var c
4367                 c = txt.charAt(cur++)
4368                 if (c === '-') {
4369                         tok_state = tok_state_script_data_escaped_dash
4370                         return new_character_token('-')
4371                 }
4372                 if (c === '<') {
4373                         tok_state = tok_state_script_data_escaped_less_than_sign
4374                         return
4375                 }
4376                 if (c === "\u0000") {
4377                         parse_error()
4378                         return new_character_token("\ufffd")
4379                 }
4380                 if (c === '') { // EOF
4381                         tok_state = tok_state_data
4382                         parse_error()
4383                         cur -= 1 // reconsume
4384                         return
4385                 }
4386                 // Anything else
4387                 return new_character_token(c)
4388         }
4389
4390         // 8.2.4.23 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-dash-state
4391         tok_state_script_data_escaped_dash = function () {
4392                 var c
4393                 c = txt.charAt(cur++)
4394                 if (c === '-') {
4395                         tok_state = tok_state_script_data_escaped_dash_dash
4396                         return new_character_token('-')
4397                 }
4398                 if (c === '<') {
4399                         tok_state = tok_state_script_data_escaped_less_than_sign
4400                         return
4401                 }
4402                 if (c === "\u0000") {
4403                         parse_error()
4404                         tok_state = tok_state_script_data_escaped
4405                         return new_character_token("\ufffd")
4406                 }
4407                 if (c === '') { // EOF
4408                         tok_state = tok_state_data
4409                         parse_error()
4410                         cur -= 1 // reconsume
4411                         return
4412                 }
4413                 // Anything else
4414                 tok_state = tok_state_script_data_escaped
4415                 return new_character_token(c)
4416         }
4417
4418         // 8.2.4.24 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-dash-dash-state
4419         tok_state_script_data_escaped_dash_dash = function () {
4420                 var c
4421                 c = txt.charAt(cur++)
4422                 if (c === '-') {
4423                         return new_character_token('-')
4424                 }
4425                 if (c === '<') {
4426                         tok_state = tok_state_script_data_escaped_less_than_sign
4427                         return
4428                 }
4429                 if (c === '>') {
4430                         tok_state = tok_state_script_data
4431                         return new_character_token('>')
4432                 }
4433                 if (c === "\u0000") {
4434                         parse_error()
4435                         tok_state = tok_state_script_data_escaped
4436                         return new_character_token("\ufffd")
4437                 }
4438                 if (c === '') { // EOF
4439                         parse_error()
4440                         tok_state = tok_state_data
4441                         cur -= 1 // reconsume
4442                         return
4443                 }
4444                 // Anything else
4445                 tok_state = tok_state_script_data_escaped
4446                 return new_character_token(c)
4447         }
4448
4449         // 8.2.4.25 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-less-than-sign-state
4450         tok_state_script_data_escaped_less_than_sign = function () {
4451                 var c
4452                 c = txt.charAt(cur++)
4453                 if (c === '/') {
4454                         temporary_buffer = ''
4455                         tok_state = tok_state_script_data_escaped_end_tag_open
4456                         return
4457                 }
4458                 if (is_uc_alpha(c)) {
4459                         temporary_buffer = c.toLowerCase() // yes, really
4460                         tok_state = tok_state_script_data_double_escape_start
4461                         return new_character_token("<" + c) // fixfull split
4462                 }
4463                 if (is_lc_alpha(c)) {
4464                         temporary_buffer = c
4465                         tok_state = tok_state_script_data_double_escape_start
4466                         return new_character_token("<" + c) // fixfull split
4467                 }
4468                 // Anything else
4469                 tok_state = tok_state_script_data_escaped
4470                 cur -= 1 // reconsume
4471                 return new_character_token('<')
4472         }
4473
4474         // 8.2.4.26 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-end-tag-open-state
4475         tok_state_script_data_escaped_end_tag_open = function () {
4476                 var c
4477                 c = txt.charAt(cur++)
4478                 if (is_uc_alpha(c)) {
4479                         tok_cur_tag = new_end_tag(c.toLowerCase())
4480                         temporary_buffer += c
4481                         tok_state = tok_state_script_data_escaped_end_tag_name
4482                         return
4483                 }
4484                 if (is_lc_alpha(c)) {
4485                         tok_cur_tag = new_end_tag(c)
4486                         temporary_buffer += c
4487                         tok_state = tok_state_script_data_escaped_end_tag_name
4488                         return
4489                 }
4490                 // Anything else
4491                 tok_state = tok_state_script_data_escaped
4492                 cur -= 1 // reconsume
4493                 return new_character_token('</') // fixfull split
4494         }
4495
4496         // 8.2.4.27 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-end-tag-name-state
4497         tok_state_script_data_escaped_end_tag_name = function () {
4498                 var c
4499                 c = txt.charAt(cur++)
4500                 if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ') {
4501                         if (is_appropriate_end_tag(tok_cur_tag)) {
4502                                 tok_state = tok_state_before_attribute_name
4503                                 return
4504                         }
4505                         // fall through
4506                 }
4507                 if (c === '/') {
4508                         if (is_appropriate_end_tag(tok_cur_tag)) {
4509                                 tok_state = tok_state_self_closing_start_tag
4510                                 return
4511                         }
4512                         // fall through
4513                 }
4514                 if (c === '>') {
4515                         if (is_appropriate_end_tag(tok_cur_tag)) {
4516                                 tok_state = tok_state_data
4517                                 return tok_cur_tag
4518                         }
4519                         // fall through
4520                 }
4521                 if (is_uc_alpha(c)) {
4522                         tok_cur_tag.name += c.toLowerCase()
4523                         temporary_buffer += c.toLowerCase()
4524                         return
4525                 }
4526                 if (is_lc_alpha(c)) {
4527                         tok_cur_tag.name += c
4528                         temporary_buffer += c.toLowerCase()
4529                         return
4530                 }
4531                 // Anything else
4532                 tok_state = tok_state_script_data_escaped
4533                 cur -= 1 // reconsume
4534                 return new_character_token("</" + temporary_buffer) // fixfull split
4535         }
4536
4537         // 8.2.4.28 http://www.w3.org/TR/html5/syntax.html#script-data-double-escape-start-state
4538         tok_state_script_data_double_escape_start = function () {
4539                 var c
4540                 c = txt.charAt(cur++)
4541                 if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ' || c === '/' || c === '>') {
4542                         if (temporary_buffer === 'script') {
4543                                 tok_state = tok_state_script_data_double_escaped
4544                         } else {
4545                                 tok_state = tok_state_script_data_escaped
4546                         }
4547                         return new_character_token(c)
4548                 }
4549                 if (is_uc_alpha(c)) {
4550                         temporary_buffer += c.toLowerCase() // yes, really lowercase
4551                         return new_character_token(c)
4552                 }
4553                 if (is_lc_alpha(c)) {
4554                         temporary_buffer += c
4555                         return new_character_token(c)
4556                 }
4557                 // Anything else
4558                 tok_state = tok_state_script_data_escaped
4559                 cur -= 1 // reconsume
4560         }
4561
4562         // 8.2.4.29 http://www.w3.org/TR/html5/syntax.html#script-data-double-escaped-state
4563         tok_state_script_data_double_escaped = function () {
4564                 var c
4565                 c = txt.charAt(cur++)
4566                 if (c === '-') {
4567                         tok_state = tok_state_script_data_double_escaped_dash
4568                         return new_character_token('-')
4569                 }
4570                 if (c === '<') {
4571                         tok_state = tok_state_script_data_double_escaped_less_than_sign
4572                         return new_character_token('<')
4573                 }
4574                 if (c === "\u0000") {
4575                         parse_error()
4576                         return new_character_token("\ufffd")
4577                 }
4578                 if (c === '') { // EOF
4579                         parse_error()
4580                         tok_state = tok_state_data
4581                         cur -= 1 // reconsume
4582                         return
4583                 }
4584                 // Anything else
4585                 return new_character_token(c)
4586         }
4587
4588         // 8.2.4.30 http://www.w3.org/TR/html5/syntax.html#script-data-double-escaped-dash-state
4589         tok_state_script_data_double_escaped_dash = function () {
4590                 var c
4591                 c = txt.charAt(cur++)
4592                 if (c === '-') {
4593                         tok_state = tok_state_script_data_double_escaped_dash_dash
4594                         return new_character_token('-')
4595                 }
4596                 if (c === '<') {
4597                         tok_state = tok_state_script_data_double_escaped_less_than_sign
4598                         return new_character_token('<')
4599                 }
4600                 if (c === "\u0000") {
4601                         parse_error()
4602                         tok_state = tok_state_script_data_double_escaped
4603                         return new_character_token("\ufffd")
4604                 }
4605                 if (c === '') { // EOF
4606                         parse_error()
4607                         tok_state = tok_state_data
4608                         cur -= 1 // reconsume
4609                         return
4610                 }
4611                 // Anything else
4612                 tok_state = tok_state_script_data_double_escaped
4613                 return new_character_token(c)
4614         }
4615
4616         // 8.2.4.31 http://www.w3.org/TR/html5/syntax.html#script-data-double-escaped-dash-dash-state
4617         tok_state_script_data_double_escaped_dash_dash = function () {
4618                 var c
4619                 c = txt.charAt(cur++)
4620                 if (c === '-') {
4621                         return new_character_token('-')
4622                 }
4623                 if (c === '<') {
4624                         tok_state = tok_state_script_data_double_escaped_less_than_sign
4625                         return new_character_token('<')
4626                 }
4627                 if (c === '>') {
4628                         tok_state = tok_state_script_data
4629                         return new_character_token('>')
4630                 }
4631                 if (c === "\u0000") {
4632                         parse_error()
4633                         tok_state = tok_state_script_data_double_escaped
4634                         return new_character_token("\ufffd")
4635                 }
4636                 if (c === '') { // EOF
4637                         parse_error()
4638                         tok_state = tok_state_data
4639                         cur -= 1 // reconsume
4640                         return
4641                 }
4642                 // Anything else
4643                 tok_state = tok_state_script_data_double_escaped
4644                 return new_character_token(c)
4645         }
4646
4647         // 8.2.4.32 http://www.w3.org/TR/html5/syntax.html#script-data-double-escaped-less-than-sign-state
4648         tok_state_script_data_double_escaped_less_than_sign = function () {
4649                 var c
4650                 c = txt.charAt(cur++)
4651                 if (c === '/') {
4652                         temporary_buffer = ''
4653                         tok_state = tok_state_script_data_double_escape_end
4654                         return new_character_token('/')
4655                 }
4656                 // Anything else
4657                 tok_state = tok_state_script_data_double_escaped
4658                 cur -= 1 // reconsume
4659         }
4660
4661         // 8.2.4.33 http://www.w3.org/TR/html5/syntax.html#script-data-double-escape-end-state
4662         tok_state_script_data_double_escape_end = function () {
4663                 var c
4664                 c = txt.charAt(cur++)
4665                 if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ' || c === '/' || c === '>') {
4666                         if (temporary_buffer === 'script') {
4667                                 tok_state = tok_state_script_data_escaped
4668                         } else {
4669                                 tok_state = tok_state_script_data_double_escaped
4670                         }
4671                         return new_character_token(c)
4672                 }
4673                 if (is_uc_alpha(c)) {
4674                         temporary_buffer += c.toLowerCase() // yes, really lowercase
4675                         return new_character_token(c)
4676                 }
4677                 if (is_lc_alpha(c)) {
4678                         temporary_buffer += c
4679                         return new_character_token(c)
4680                 }
4681                 // Anything else
4682                 tok_state = tok_state_script_data_double_escaped
4683                 cur -= 1 // reconsume
4684         }
4685
4686         // 8.2.4.34 http://www.w3.org/TR/html5/syntax.html#before-attribute-name-state
4687         tok_state_before_attribute_name = function () {
4688                 var attr_name, c, tmp
4689                 attr_name = null
4690                 switch (c = txt.charAt(cur++)) {
4691                         case "\t":
4692                         case "\n":
4693                         case "\u000c":
4694                         case ' ':
4695                                 return null
4696                         break
4697                         case '/':
4698                                 tok_state = tok_state_self_closing_start_tag
4699                                 return null
4700                         break
4701                         case '>':
4702                                 tok_state = tok_state_data
4703                                 tmp = tok_cur_tag
4704                                 tok_cur_tag = null
4705                                 return tmp
4706                         break
4707                         case "\u0000":
4708                                 parse_error()
4709                                 attr_name = "\ufffd"
4710                         break
4711                         case '"':
4712                         case "'":
4713                         case '<':
4714                         case '=':
4715                                 parse_error()
4716                                 attr_name = c
4717                         break
4718                         case '': // EOF
4719                                 parse_error()
4720                                 tok_state = tok_state_data
4721                         break
4722                         default:
4723                                 if (is_uc_alpha(c)) {
4724                                         attr_name = c.toLowerCase()
4725                                 } else {
4726                                         attr_name = c
4727                                 }
4728                 }
4729                 if (attr_name != null) {
4730                         tok_cur_tag.attrs_a.unshift([attr_name, ''])
4731                         tok_state = tok_state_attribute_name
4732                 }
4733                 return null
4734         }
4735
4736         // 8.2.4.35 http://www.w3.org/TR/html5/syntax.html#attribute-name-state
4737         tok_state_attribute_name = function () {
4738                 var c, tmp
4739                 switch (c = txt.charAt(cur++)) {
4740                         case "\t":
4741                         case "\n":
4742                         case "\u000c":
4743                         case ' ':
4744                                 tok_state = tok_state_after_attribute_name
4745                         break
4746                         case '/':
4747                                 tok_state = tok_state_self_closing_start_tag
4748                         break
4749                         case '=':
4750                                 tok_state = tok_state_before_attribute_value
4751                         break
4752                         case '>':
4753                                 tok_state = tok_state_data
4754                                 tmp = tok_cur_tag
4755                                 tok_cur_tag = null
4756                                 return tmp
4757                         break
4758                         case "\u0000":
4759                                 parse_error()
4760                                 tok_cur_tag.attrs_a[0][0] += "\ufffd"
4761                         break
4762                         case '"':
4763                         case "'":
4764                         case '<':
4765                                 parse_error()
4766                                 tok_cur_tag.attrs_a[0][0] += c
4767                         break
4768                         case '': // EOF
4769                                 parse_error()
4770                                 tok_state = tok_state_data
4771                         break
4772                         default:
4773                                 if (is_uc_alpha(c)) {
4774                                         tok_cur_tag.attrs_a[0][0] += c.toLowerCase()
4775                                 } else {
4776                                         tok_cur_tag.attrs_a[0][0] += c
4777                                 }
4778                 }
4779                 return null
4780         }
4781
4782         // 8.2.4.36 http://www.w3.org/TR/html5/syntax.html#after-attribute-name-state
4783         tok_state_after_attribute_name = function () {
4784                 var c
4785                 c = txt.charAt(cur++)
4786                 if (c === "\t" || c === "\n" || c === "\u000c" || c === ' ') {
4787                         return
4788                 }
4789                 if (c === '/') {
4790                         tok_state = tok_state_self_closing_start_tag
4791                         return
4792                 }
4793                 if (c === '=') {
4794                         tok_state = tok_state_before_attribute_value
4795                         return
4796                 }
4797                 if (c === '>') {
4798                         tok_state = tok_state_data
4799                         return tok_cur_tag
4800                 }
4801                 if (is_uc_alpha(c)) {
4802                         tok_cur_tag.attrs_a.unshift([c.toLowerCase(), ''])
4803                         tok_state = tok_state_attribute_name
4804                         return
4805                 }
4806                 if (c === "\u0000") {
4807                         parse_error()
4808                         tok_cur_tag.attrs_a.unshift(["\ufffd", ''])
4809                         tok_state = tok_state_attribute_name
4810                         return
4811                 }
4812                 if (c === '') { // EOF
4813                         parse_error()
4814                         tok_state = tok_state_data
4815                         cur -= 1 // reconsume
4816                         return
4817                 }
4818                 if (c === '"' || c === "'" || c === '<') {
4819                         parse_error()
4820                         // fall through to Anything else
4821                 }
4822                 // Anything else
4823                 tok_cur_tag.attrs_a.unshift([c, ''])
4824                 tok_state = tok_state_attribute_name
4825         }
4826
4827         // 8.2.4.37 http://www.w3.org/TR/html5/syntax.html#before-attribute-value-state
4828         tok_state_before_attribute_value = function () {
4829                 var c, tmp
4830                 switch (c = txt.charAt(cur++)) {
4831                         case "\t":
4832                         case "\n":
4833                         case "\u000c":
4834                         case ' ':
4835                                 return null
4836                         break
4837                         case '"':
4838                                 tok_state = tok_state_attribute_value_double_quoted
4839                         break
4840                         case '&':
4841                                 tok_state = tok_state_attribute_value_unquoted
4842                                 cur -= 1
4843                         break
4844                         case "'":
4845                                 tok_state = tok_state_attribute_value_single_quoted
4846                         break
4847                         case "\u0000":
4848                                 // Parse error
4849                                 tok_cur_tag.attrs_a[0][1] += "\ufffd"
4850                                 tok_state = tok_state_attribute_value_unquoted
4851                         break
4852                         case '>':
4853                                 // Parse error
4854                                 tok_state = tok_state_data
4855                                 tmp = tok_cur_tag
4856                                 tok_cur_tag = null
4857                                 return tmp
4858                         break
4859                         case '': // EOF
4860                                 parse_error()
4861                                 tok_state = tok_state_data
4862                         break
4863                         default:
4864                                 tok_cur_tag.attrs_a[0][1] += c
4865                                 tok_state = tok_state_attribute_value_unquoted
4866                 }
4867                 return null
4868         }
4869
4870         // 8.2.4.38 http://www.w3.org/TR/html5/syntax.html#attribute-value-(double-quoted)-state
4871         tok_state_attribute_value_double_quoted = function () {
4872                 var c
4873                 switch (c = txt.charAt(cur++)) {
4874                         case '"':
4875                                 tok_state = tok_state_after_attribute_value_quoted
4876                         break
4877                         case '&':
4878                                 tok_cur_tag.attrs_a[0][1] += parse_character_reference('"', true)
4879                         break
4880                         case "\u0000":
4881                                 // Parse error
4882                                 tok_cur_tag.attrs_a[0][1] += "\ufffd"
4883                         break
4884                         case '': // EOF
4885                                 parse_error()
4886                                 tok_state = tok_state_data
4887                         break
4888                         default:
4889                                 tok_cur_tag.attrs_a[0][1] += c
4890                 }
4891                 return null
4892         }
4893
4894         // 8.2.4.39 http://www.w3.org/TR/html5/syntax.html#attribute-value-(single-quoted)-state
4895         tok_state_attribute_value_single_quoted = function () {
4896                 var c
4897                 switch (c = txt.charAt(cur++)) {
4898                         case "'":
4899                                 tok_state = tok_state_after_attribute_value_quoted
4900                         break
4901                         case '&':
4902                                 tok_cur_tag.attrs_a[0][1] += parse_character_reference("'", true)
4903                         break
4904                         case "\u0000":
4905                                 // Parse error
4906                                 tok_cur_tag.attrs_a[0][1] += "\ufffd"
4907                         break
4908                         case '': // EOF
4909                                 parse_error()
4910                                 tok_state = tok_state_data
4911                         break
4912                         default:
4913                                 tok_cur_tag.attrs_a[0][1] += c
4914                 }
4915                 return null
4916         }
4917
4918         // 8.2.4.40 http://www.w3.org/TR/html5/syntax.html#attribute-value-(unquoted)-state
4919         tok_state_attribute_value_unquoted = function () {
4920                 var c, tmp
4921                 switch (c = txt.charAt(cur++)) {
4922                         case "\t":
4923                         case "\n":
4924                         case "\u000c":
4925                         case ' ':
4926                                 tok_state = tok_state_before_attribute_name
4927                         break
4928                         case '&':
4929                                 tok_cur_tag.attrs_a[0][1] += parse_character_reference('>', true)
4930                         break
4931                         case '>':
4932                                 tok_state = tok_state_data
4933                                 tmp = tok_cur_tag
4934                                 tok_cur_tag = null
4935                                 return tmp
4936                         break
4937                         case "\u0000":
4938                                 tok_cur_tag.attrs_a[0][1] += "\ufffd"
4939                         break
4940                         case '': // EOF
4941                                 parse_error()
4942                                 tok_state = tok_state_data
4943                         break
4944                         default:
4945                                 // Parse Error if ', <, = or ` (backtick)
4946                                 tok_cur_tag.attrs_a[0][1] += c
4947                 }
4948                 return null
4949         }
4950
4951         // 8.2.4.42 http://www.w3.org/TR/html5/syntax.html#after-attribute-value-(quoted)-state
4952         tok_state_after_attribute_value_quoted = function () {
4953                 var c, tmp
4954                 switch (c = txt.charAt(cur++)) {
4955                         case "\t":
4956                         case "\n":
4957                         case "\u000c":
4958                         case ' ':
4959                                 tok_state = tok_state_before_attribute_name
4960                         break
4961                         case '/':
4962                                 tok_state = tok_state_self_closing_start_tag
4963                         break
4964                         case '>':
4965                                 tok_state = tok_state_data
4966                                 tmp = tok_cur_tag
4967                                 tok_cur_tag = null
4968                                 return tmp
4969                         break
4970                         case '': // EOF
4971                                 parse_error()
4972                                 tok_state = tok_state_data
4973                         break
4974                         default:
4975                                 // Parse Error
4976                                 tok_state = tok_state_before_attribute_name
4977                                 cur -= 1 // we didn't handle that char
4978                 }
4979                 return null
4980         }
4981
4982         // 8.2.4.43 http://www.w3.org/TR/html5/syntax.html#self-closing-start-tag-state
4983         tok_state_self_closing_start_tag = function () {
4984                 var c
4985                 c = txt.charAt(cur++)
4986                 if (c === '>') {
4987                         tok_cur_tag.flag('self-closing', true)
4988                         tok_state = tok_state_data
4989                         return tok_cur_tag
4990                 }
4991                 if (c === '') {
4992                         parse_error()
4993                         tok_state = tok_state_data
4994                         cur -= 1 // reconsume
4995                         return
4996                 }
4997                 // Anything else
4998                 parse_error()
4999                 tok_state = tok_state_before_attribute_name
5000                 cur -= 1 // reconsume
5001         }
5002
5003         // 8.2.4.44 http://www.w3.org/TR/html5/syntax.html#bogus-comment-state
5004         // WARNING: put a comment token in tok_cur_tag before setting this state
5005         tok_state_bogus_comment = function () {
5006                 var next_gt, val
5007                 next_gt = txt.indexOf('>', cur)
5008                 if (next_gt === -1) {
5009                         val = txt.substr(cur)
5010                         cur = txt.length
5011                 } else {
5012                         val = txt.substr(cur, next_gt - cur)
5013                         cur = next_gt + 1
5014                 }
5015                 val = val.replace(new RegExp("\u0000", 'g'), "\ufffd")
5016                 tok_cur_tag.text += val
5017                 tok_state = tok_state_data
5018                 return tok_cur_tag
5019         }
5020
5021         // 8.2.4.45 http://www.w3.org/TR/html5/syntax.html#markup-declaration-open-state
5022         tok_state_markup_declaration_open = function () {
5023                 var acn
5024                 if (txt.substr(cur, 2) === '--') {
5025                         cur += 2
5026                         tok_cur_tag = new_comment_token('')
5027                         tok_state = tok_state_comment_start
5028                         return
5029                 }
5030                 if (txt.substr(cur, 7).toLowerCase() === 'doctype') {
5031                         cur += 7
5032                         tok_state = tok_state_doctype
5033                         return
5034                 }
5035                 acn = adjusted_current_node()
5036                 if (acn && acn.namespace !== NS_HTML && txt.substr(cur, 7) === '[CDATA[') {
5037                         cur += 7
5038                         tok_state = tok_state_cdata_section
5039                         return
5040                 }
5041                 // Otherwise
5042                 parse_error()
5043                 tok_cur_tag = new_comment_token('')
5044                 tok_state = tok_state_bogus_comment
5045         }
5046
5047         // 8.2.4.46 http://www.w3.org/TR/html5/syntax.html#comment-start-state
5048         tok_state_comment_start = function () {
5049                 var c
5050                 switch (c = txt.charAt(cur++)) {
5051                         case '-':
5052                                 tok_state = tok_state_comment_start_dash
5053                         break
5054                         case "\u0000":
5055                                 parse_error()
5056                                 tok_state = tok_state_comment
5057                                 return new_character_token("\ufffd")
5058                         break
5059                         case '>':
5060                                 parse_error()
5061                                 tok_state = tok_state_data
5062                                 return tok_cur_tag
5063                         break
5064                         case '': // EOF
5065                                 parse_error()
5066                                 tok_state = tok_state_data
5067                                 cur -= 1 // reconsume
5068                                 return tok_cur_tag
5069                         break
5070                         default:
5071                                 tok_cur_tag.text += c
5072                                 tok_state = tok_state_comment
5073                 }
5074                 return null
5075         }
5076
5077         // 8.2.4.47 http://www.w3.org/TR/html5/syntax.html#comment-start-dash-state
5078         tok_state_comment_start_dash = function () {
5079                 var c
5080                 switch (c = txt.charAt(cur++)) {
5081                         case '-':
5082                                 tok_state = tok_state_comment_end
5083                         break
5084                         case "\u0000":
5085                                 parse_error()
5086                                 tok_cur_tag.text += "-\ufffd"
5087                                 tok_state = tok_state_comment
5088                         break
5089                         case '>':
5090                                 parse_error()
5091                                 tok_state = tok_state_data
5092                                 return tok_cur_tag
5093                         break
5094                         case '': // EOF
5095                                 parse_error()
5096                                 tok_state = tok_state_data
5097                                 cur -= 1 // reconsume
5098                                 return tok_cur_tag
5099                         break
5100                         default:
5101                                 tok_cur_tag.text += "-" + c
5102                                 tok_state = tok_state_comment
5103                 }
5104                 return null
5105         }
5106
5107         // 8.2.4.48 http://www.w3.org/TR/html5/syntax.html#comment-state
5108         tok_state_comment = function () {
5109                 var c
5110                 switch (c = txt.charAt(cur++)) {
5111                         case '-':
5112                                 tok_state = tok_state_comment_end_dash
5113                         break
5114                         case "\u0000":
5115                                 parse_error()
5116                                 tok_cur_tag.text += "\ufffd"
5117                         break
5118                         case '': // EOF
5119                                 parse_error()
5120                                 tok_state = tok_state_data
5121                                 cur -= 1 // reconsume
5122                                 return tok_cur_tag
5123                         break
5124                         default:
5125                                 tok_cur_tag.text += c
5126                 }
5127                 return null
5128         }
5129
5130         // 8.2.4.49 http://www.w3.org/TR/html5/syntax.html#comment-end-dash-state
5131         tok_state_comment_end_dash = function () {
5132                 var c
5133                 switch (c = txt.charAt(cur++)) {
5134                         case '-':
5135                                 tok_state = tok_state_comment_end
5136                         break
5137                         case "\u0000":
5138                                 parse_error()
5139                                 tok_cur_tag.text += "-\ufffd"
5140                                 tok_state = tok_state_comment
5141                         break
5142                         case '': // EOF
5143                                 parse_error()
5144                                 tok_state = tok_state_data
5145                                 cur -= 1 // reconsume
5146                                 return tok_cur_tag
5147                         break
5148                         default:
5149                                 tok_cur_tag.text += "-" + c
5150                                 tok_state = tok_state_comment
5151                 }
5152                 return null
5153         }
5154
5155         // 8.2.4.50 http://www.w3.org/TR/html5/syntax.html#comment-end-state
5156         tok_state_comment_end = function () {
5157                 var c
5158                 switch (c = txt.charAt(cur++)) {
5159                         case '>':
5160                                 tok_state = tok_state_data
5161                                 return tok_cur_tag
5162                         break
5163                         case "\u0000":
5164                                 parse_error()
5165                                 tok_cur_tag.text += "--\ufffd"
5166                                 tok_state = tok_state_comment
5167                         break
5168                         case '!':
5169                                 parse_error()
5170                                 tok_state = tok_state_comment_end_bang
5171                         break
5172                         case '-':
5173                                 parse_error()
5174                                 tok_cur_tag.text += '-'
5175                         break
5176                         case '': // EOF
5177                                 parse_error()
5178                                 tok_state = tok_state_data
5179                                 cur -= 1 // reconsume
5180                                 return tok_cur_tag
5181                         break
5182                         default:
5183                                 parse_error()
5184                                 tok_cur_tag.text += "--" + c
5185                                 tok_state = tok_state_comment
5186                 }
5187                 return null
5188         }
5189
5190         // 8.2.4.51 http://www.w3.org/TR/html5/syntax.html#comment-end-bang-state
5191         tok_state_comment_end_bang = function () {
5192                 var c
5193                 switch (c = txt.charAt(cur++)) {
5194                         case '-':
5195                                 tok_cur_tag.text += "--!" + c
5196                                 tok_state = tok_state_comment_end_dash
5197                         break
5198                         case '>':
5199                                 tok_state = tok_state_data
5200                                 return tok_cur_tag
5201                         break
5202                         case "\u0000":
5203                                 parse_error()
5204                                 tok_cur_tag.text += "--!\ufffd"
5205                                 tok_state = tok_state_comment
5206                         break
5207                         case '': // EOF
5208                                 parse_error()
5209                                 tok_state = tok_state_data
5210                                 cur -= 1 // reconsume
5211                                 return tok_cur_tag
5212                         break
5213                         default:
5214                                 tok_cur_tag.text += "--!" + c
5215                                 tok_state = tok_state_comment
5216                 }
5217                 return null
5218         }
5219
5220         // 8.2.4.52 http://www.w3.org/TR/html5/syntax.html#doctype-state
5221         tok_state_doctype = function () {
5222                 var c, el
5223                 switch (c = txt.charAt(cur++)) {
5224                         case "\t":
5225                         case "\u000a":
5226                         case "\u000c":
5227                         case ' ':
5228                                 tok_state = tok_state_before_doctype_name
5229                         break
5230                         case '': // EOF
5231                                 parse_error()
5232                                 tok_state = tok_state_data
5233                                 el = new_doctype_token('')
5234                                 el.flag('force-quirks', true)
5235                                 cur -= 1 // reconsume
5236                                 return el
5237                         break
5238                         default:
5239                                 parse_error()
5240                                 tok_state = tok_state_before_doctype_name
5241                                 cur -= 1 // reconsume
5242                 }
5243                 return null
5244         }
5245
5246         // 8.2.4.52 http://www.w3.org/TR/html5/syntax.html#doctype-state
5247         tok_state_before_doctype_name = function () {
5248                 var c, el
5249                 c = txt.charAt(cur++)
5250                 if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ') {
5251                         return
5252                 }
5253                 if (is_uc_alpha(c)) {
5254                         tok_cur_tag = new_doctype_token(c.toLowerCase())
5255                         tok_state = tok_state_doctype_name
5256                         return
5257                 }
5258                 if (c === "\u0000") {
5259                         parse_error()
5260                         tok_cur_tag = new_doctype_token("\ufffd")
5261                         tok_state = tok_state_doctype_name
5262                         return
5263                 }
5264                 if (c === '>') {
5265                         parse_error()
5266                         el = new_doctype_token('')
5267                         el.flag('force-quirks', true)
5268                         tok_state = tok_state_data
5269                         return el
5270                 }
5271                 if (c === '') { // EOF
5272                         parse_error()
5273                         tok_state = tok_state_data
5274                         el = new_doctype_token('')
5275                         el.flag('force-quirks', true)
5276                         cur -= 1 // reconsume
5277                         return el
5278                 }
5279                 // Anything else
5280                 tok_cur_tag = new_doctype_token(c)
5281                 tok_state = tok_state_doctype_name
5282                 return null
5283         }
5284
5285         // 8.2.4.54 http://www.w3.org/TR/html5/syntax.html#doctype-name-state
5286         tok_state_doctype_name = function () {
5287                 var c
5288                 c = txt.charAt(cur++)
5289                 if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ') {
5290                         tok_state = tok_state_after_doctype_name
5291                         return
5292                 }
5293                 if (c === '>') {
5294                         tok_state = tok_state_data
5295                         return tok_cur_tag
5296                 }
5297                 if (is_uc_alpha(c)) {
5298                         tok_cur_tag.name += c.toLowerCase()
5299                         return
5300                 }
5301                 if (c === "\u0000") {
5302                         parse_error()
5303                         tok_cur_tag.name += "\ufffd"
5304                         return
5305                 }
5306                 if (c === '') { // EOF
5307                         parse_error()
5308                         tok_state = tok_state_data
5309                         tok_cur_tag.flag('force-quirks', true)
5310                         cur -= 1 // reconsume
5311                         return tok_cur_tag
5312                 }
5313                 // Anything else
5314                 tok_cur_tag.name += c
5315                 return null
5316         }
5317
5318         // 8.2.4.55 http://www.w3.org/TR/html5/syntax.html#after-doctype-name-state
5319         tok_state_after_doctype_name = function () {
5320                 var c
5321                 c = txt.charAt(cur++)
5322                 if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ') {
5323                         return
5324                 }
5325                 if (c === '>') {
5326                         tok_state = tok_state_data
5327                         return tok_cur_tag
5328                 }
5329                 if (c === '') { // EOF
5330                         parse_error()
5331                         tok_state = tok_state_data
5332                         tok_cur_tag.flag('force-quirks', true)
5333                         cur -= 1 // reconsume
5334                         return tok_cur_tag
5335                 }
5336                 // Anything else
5337                 if (txt.substr(cur - 1, 6).toLowerCase() === 'public') {
5338                         cur += 5
5339                         tok_state = tok_state_after_doctype_public_keyword
5340                         return
5341                 }
5342                 if (txt.substr(cur - 1, 6).toLowerCase() === 'system') {
5343                         cur += 5
5344                         tok_state = tok_state_after_doctype_system_keyword
5345                         return
5346                 }
5347                 parse_error()
5348                 tok_cur_tag.flag('force-quirks', true)
5349                 tok_state = tok_state_bogus_doctype
5350                 return null
5351         }
5352
5353         // 8.2.4.56 http://www.w3.org/TR/html5/syntax.html#after-doctype-public-keyword-state
5354         tok_state_after_doctype_public_keyword = function () {
5355                 var c
5356                 c = txt.charAt(cur++)
5357                 if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ') {
5358                         tok_state = tok_state_before_doctype_public_identifier
5359                         return
5360                 }
5361                 if (c === '"') {
5362                         parse_error()
5363                         tok_cur_tag.public_identifier = ''
5364                         tok_state = tok_state_doctype_public_identifier_double_quoted
5365                         return
5366                 }
5367                 if (c === "'") {
5368                         parse_error()
5369                         tok_cur_tag.public_identifier = ''
5370                         tok_state = tok_state_doctype_public_identifier_single_quoted
5371                         return
5372                 }
5373                 if (c === '>') {
5374                         parse_error()
5375                         tok_cur_tag.flag('force-quirks', true)
5376                         tok_state = tok_state_data
5377                         return tok_cur_tag
5378                 }
5379                 if (c === '') { // EOF
5380                         parse_error()
5381                         tok_state = tok_state_data
5382                         tok_cur_tag.flag('force-quirks', true)
5383                         cur -= 1 // reconsume
5384                         return tok_cur_tag
5385                 }
5386                 // Anything else
5387                 parse_error()
5388                 tok_cur_tag.flag('force-quirks', true)
5389                 tok_state = tok_state_bogus_doctype
5390                 return null
5391         }
5392
5393         // 8.2.4.57 http://www.w3.org/TR/html5/syntax.html#before-doctype-public-identifier-state
5394         tok_state_before_doctype_public_identifier = function () {
5395                 var c
5396                 c = txt.charAt(cur++)
5397                 if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ') {
5398                         return
5399                 }
5400                 if (c === '"') {
5401                         parse_error()
5402                         tok_cur_tag.public_identifier = ''
5403                         tok_state = tok_state_doctype_public_identifier_double_quoted
5404                         return
5405                 }
5406                 if (c === "'") {
5407                         parse_error()
5408                         tok_cur_tag.public_identifier = ''
5409                         tok_state = tok_state_doctype_public_identifier_single_quoted
5410                         return
5411                 }
5412                 if (c === '>') {
5413                         parse_error()
5414                         tok_cur_tag.flag('force-quirks', true)
5415                         tok_state = tok_state_data
5416                         return tok_cur_tag
5417                 }
5418                 if (c === '') { // EOF
5419                         parse_error()
5420                         tok_state = tok_state_data
5421                         tok_cur_tag.flag('force-quirks', true)
5422                         cur -= 1 // reconsume
5423                         return tok_cur_tag
5424                 }
5425                 // Anything else
5426                 parse_error()
5427                 tok_cur_tag.flag('force-quirks', true)
5428                 tok_state = tok_state_bogus_doctype
5429                 return null
5430         }
5431
5432
5433         // 8.2.4.58 http://www.w3.org/TR/html5/syntax.html#doctype-public-identifier-(double-quoted)-state
5434         tok_state_doctype_public_identifier_double_quoted = function () {
5435                 var c
5436                 c = txt.charAt(cur++)
5437                 if (c === '"') {
5438                         tok_state = tok_state_after_doctype_public_identifier
5439                         return
5440                 }
5441                 if (c === "\u0000") {
5442                         parse_error()
5443                         tok_cur_tag.public_identifier += "\ufffd"
5444                         return
5445                 }
5446                 if (c === '>') {
5447                         parse_error()
5448                         tok_cur_tag.flag('force-quirks', true)
5449                         tok_state = tok_state_data
5450                         return tok_cur_tag
5451                 }
5452                 if (c === '') { // EOF
5453                         parse_error()
5454                         tok_state = tok_state_data
5455                         tok_cur_tag.flag('force-quirks', true)
5456                         cur -= 1 // reconsume
5457                         return tok_cur_tag
5458                 }
5459                 // Anything else
5460                 tok_cur_tag.public_identifier += c
5461                 return null
5462         }
5463
5464         // 8.2.4.59 http://www.w3.org/TR/html5/syntax.html#doctype-public-identifier-(single-quoted)-state
5465         tok_state_doctype_public_identifier_single_quoted = function () {
5466                 var c
5467                 c = txt.charAt(cur++)
5468                 if (c === "'") {
5469                         tok_state = tok_state_after_doctype_public_identifier
5470                         return
5471                 }
5472                 if (c === "\u0000") {
5473                         parse_error()
5474                         tok_cur_tag.public_identifier += "\ufffd"
5475                         return
5476                 }
5477                 if (c === '>') {
5478                         parse_error()
5479                         tok_cur_tag.flag('force-quirks', true)
5480                         tok_state = tok_state_data
5481                         return tok_cur_tag
5482                 }
5483                 if (c === '') { // EOF
5484                         parse_error()
5485                         tok_state = tok_state_data
5486                         tok_cur_tag.flag('force-quirks', true)
5487                         cur -= 1 // reconsume
5488                         return tok_cur_tag
5489                 }
5490                 // Anything else
5491                 tok_cur_tag.public_identifier += c
5492                 return null
5493         }
5494
5495         // 8.2.4.60 http://www.w3.org/TR/html5/syntax.html#after-doctype-public-identifier-state
5496         tok_state_after_doctype_public_identifier = function () {
5497                 var c
5498                 c = txt.charAt(cur++)
5499                 if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ') {
5500                         tok_state = tok_state_between_doctype_public_and_system_identifiers
5501                         return
5502                 }
5503                 if (c === '>') {
5504                         tok_state = tok_state_data
5505                         return tok_cur_tag
5506                 }
5507                 if (c === '"') {
5508                         parse_error()
5509                         tok_cur_tag.system_identifier = ''
5510                         tok_state = tok_state_doctype_system_identifier_double_quoted
5511                         return
5512                 }
5513                 if (c === "'") {
5514                         parse_error()
5515                         tok_cur_tag.system_identifier = ''
5516                         tok_state = tok_state_doctype_system_identifier_single_quoted
5517                         return
5518                 }
5519                 if (c === '') { // EOF
5520                         parse_error()
5521                         tok_state = tok_state_data
5522                         tok_cur_tag.flag('force-quirks', true)
5523                         cur -= 1 // reconsume
5524                         return tok_cur_tag
5525                 }
5526                 // Anything else
5527                 parse_error()
5528                 tok_cur_tag.flag('force-quirks', true)
5529                 tok_state = tok_state_bogus_doctype
5530                 return null
5531         }
5532
5533         // 8.2.4.61 http://www.w3.org/TR/html5/syntax.html#between-doctype-public-&&-system-identifiers-state
5534         tok_state_between_doctype_public_and_system_identifiers = function () {
5535                 var c
5536                 c = txt.charAt(cur++)
5537                 if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ') {
5538                         return
5539                 }
5540                 if (c === '>') {
5541                         tok_state = tok_state_data
5542                         return tok_cur_tag
5543                 }
5544                 if (c === '"') {
5545                         parse_error()
5546                         tok_cur_tag.system_identifier = ''
5547                         tok_state = tok_state_doctype_system_identifier_double_quoted
5548                         return
5549                 }
5550                 if (c === "'") {
5551                         parse_error()
5552                         tok_cur_tag.system_identifier = ''
5553                         tok_state = tok_state_doctype_system_identifier_single_quoted
5554                         return
5555                 }
5556                 if (c === '') { // EOF
5557                         parse_error()
5558                         tok_state = tok_state_data
5559                         tok_cur_tag.flag('force-quirks', true)
5560                         cur -= 1 // reconsume
5561                         return tok_cur_tag
5562                 }
5563                 // Anything else
5564                 parse_error()
5565                 tok_cur_tag.flag('force-quirks', true)
5566                 tok_state = tok_state_bogus_doctype
5567                 return null
5568         }
5569
5570         // 8.2.4.62 http://www.w3.org/TR/html5/syntax.html#after-doctype-system-keyword-state
5571         tok_state_after_doctype_system_keyword = function () {
5572                 var c
5573                 c = txt.charAt(cur++)
5574                 if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ') {
5575                         tok_state = tok_state_before_doctype_system_identifier
5576                         return
5577                 }
5578                 if (c === '"') {
5579                         parse_error()
5580                         tok_cur_tag.system_identifier = ''
5581                         tok_state = tok_state_doctype_system_identifier_double_quoted
5582                         return
5583                 }
5584                 if (c === "'") {
5585                         parse_error()
5586                         tok_cur_tag.system_identifier = ''
5587                         tok_state = tok_state_doctype_system_identifier_single_quoted
5588                         return
5589                 }
5590                 if (c === '>') {
5591                         parse_error()
5592                         tok_cur_tag.flag('force-quirks', true)
5593                         tok_state = tok_state_data
5594                         return tok_cur_tag
5595                 }
5596                 if (c === '') { // EOF
5597                         parse_error()
5598                         tok_state = tok_state_data
5599                         tok_cur_tag.flag('force-quirks', true)
5600                         cur -= 1 // reconsume
5601                         return tok_cur_tag
5602                 }
5603                 // Anything else
5604                 parse_error()
5605                 tok_cur_tag.flag('force-quirks', true)
5606                 tok_state = tok_state_bogus_doctype
5607                 return null
5608         }
5609
5610         // 8.2.4.63 http://www.w3.org/TR/html5/syntax.html#before-doctype-system-identifier-state
5611         tok_state_before_doctype_system_identifier = function () {
5612                 var c
5613                 c = txt.charAt(cur++)
5614                 if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ') {
5615                         return
5616                 }
5617                 if (c === '"') {
5618                         tok_cur_tag.system_identifier = ''
5619                         tok_state = tok_state_doctype_system_identifier_double_quoted
5620                         return
5621                 }
5622                 if (c === "'") {
5623                         tok_cur_tag.system_identifier = ''
5624                         tok_state = tok_state_doctype_system_identifier_single_quoted
5625                         return
5626                 }
5627                 if (c === '>') {
5628                         parse_error()
5629                         tok_cur_tag.flag('force-quirks', true)
5630                         tok_state = tok_state_data
5631                         return tok_cur_tag
5632                 }
5633                 if (c === '') { // EOF
5634                         parse_error()
5635                         tok_state = tok_state_data
5636                         tok_cur_tag.flag('force-quirks', true)
5637                         cur -= 1 // reconsume
5638                         return tok_cur_tag
5639                 }
5640                 // Anything else
5641                 parse_error()
5642                 tok_cur_tag.flag('force-quirks', true)
5643                 tok_state = tok_state_bogus_doctype
5644                 return null
5645         }
5646
5647         // 8.2.4.64 http://www.w3.org/TR/html5/syntax.html#doctype-system-identifier-(double-quoted)-state
5648         tok_state_doctype_system_identifier_double_quoted = function () {
5649                 var c
5650                 c = txt.charAt(cur++)
5651                 if (c === '"') {
5652                         tok_state = tok_state_after_doctype_system_identifier
5653                         return
5654                 }
5655                 if (c === "\u0000") {
5656                         parse_error()
5657                         tok_cur_tag.system_identifier += "\ufffd"
5658                         return
5659                 }
5660                 if (c === '>') {
5661                         parse_error()
5662                         tok_cur_tag.flag('force-quirks', true)
5663                         tok_state = tok_state_data
5664                         return tok_cur_tag
5665                 }
5666                 if (c === '') { // EOF
5667                         parse_error()
5668                         tok_state = tok_state_data
5669                         tok_cur_tag.flag('force-quirks', true)
5670                         cur -= 1 // reconsume
5671                         return tok_cur_tag
5672                 }
5673                 // Anything else
5674                 tok_cur_tag.system_identifier += c
5675                 return null
5676         }
5677
5678         // 8.2.4.65 http://www.w3.org/TR/html5/syntax.html#doctype-system-identifier-(single-quoted)-state
5679         tok_state_doctype_system_identifier_single_quoted = function () {
5680                 var c
5681                 c = txt.charAt(cur++)
5682                 if (c === "'") {
5683                         tok_state = tok_state_after_doctype_system_identifier
5684                         return
5685                 }
5686                 if (c === "\u0000") {
5687                         parse_error()
5688                         tok_cur_tag.system_identifier += "\ufffd"
5689                         return
5690                 }
5691                 if (c === '>') {
5692                         parse_error()
5693                         tok_cur_tag.flag('force-quirks', true)
5694                         tok_state = tok_state_data
5695                         return tok_cur_tag
5696                 }
5697                 if (c === '') { // EOF
5698                         parse_error()
5699                         tok_state = tok_state_data
5700                         tok_cur_tag.flag('force-quirks', true)
5701                         cur -= 1 // reconsume
5702                         return tok_cur_tag
5703                 }
5704                 // Anything else
5705                 tok_cur_tag.system_identifier += c
5706                 return null
5707         }
5708
5709         // 8.2.4.66 http://www.w3.org/TR/html5/syntax.html#after-doctype-system-identifier-state
5710         tok_state_after_doctype_system_identifier = function () {
5711                 var c
5712                 c = txt.charAt(cur++)
5713                 if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ') {
5714                         return
5715                 }
5716                 if (c === '>') {
5717                         tok_state = tok_state_data
5718                         return tok_cur_tag
5719                 }
5720                 if (c === '') { // EOF
5721                         parse_error()
5722                         tok_state = tok_state_data
5723                         tok_cur_tag.flag('force-quirks', true)
5724                         cur -= 1 // reconsume
5725                         return tok_cur_tag
5726                 }
5727                 // Anything else
5728                 parse_error()
5729                 // do _not_ tok_cur_tag.flag 'force-quirks', true
5730                 tok_state = tok_state_bogus_doctype
5731                 return null
5732         }
5733
5734         // 8.2.4.67 http://www.w3.org/TR/html5/syntax.html#bogus-doctype-state
5735         tok_state_bogus_doctype = function () {
5736                 var c
5737                 c = txt.charAt(cur++)
5738                 if (c === '>') {
5739                         tok_state = tok_state_data
5740                         return tok_cur_tag
5741                 }
5742                 if (c === '') { // EOF
5743                         tok_state = tok_state_data
5744                         cur -= 1 // reconsume
5745                         return tok_cur_tag
5746                 }
5747                 // Anything else
5748                 return null
5749         }
5750
5751         // 8.2.4.68 http://www.w3.org/TR/html5/syntax.html#cdata-section-state
5752         tok_state_cdata_section = function () {
5753                 var next_gt, val
5754                 tok_state = tok_state_data
5755                 next_gt = txt.indexOf(']]>', cur)
5756                 if (next_gt === -1) {
5757                         val = txt.substr(cur)
5758                         cur = txt.length
5759                 } else {
5760                         val = txt.substr(cur, next_gt - cur)
5761                         cur = next_gt + 3
5762                 }
5763                 val = val.replace(new RegExp("\u0000", 'g'), "\ufffd")
5764                 if (val.length > 0) {
5765                         return new_character_token(val) // fixfull split
5766                 }
5767                 return null
5768         }
5769
5770         // 8.2.4.69 http://www.w3.org/TR/html5/syntax.html#consume-a-character-reference
5771         // Don't set this as a state, just call it
5772         // returns a string (NOT a text node)
5773         parse_character_reference = function (allowed_char, in_attr) {
5774                 var base, c, charset, code_point, decoded, i, max, start
5775                 if (allowed_char == null) {
5776                         allowed_char = null
5777                 }
5778                 if (in_attr == null) {
5779                         in_attr = false
5780                 }
5781                 if (cur >= txt.length) {
5782                         return '&'
5783                 }
5784                 switch (c = txt.charAt(cur)) {
5785                         case "\t":
5786                         case "\n":
5787                         case "\u000c":
5788                         case ' ':
5789                         case '<':
5790                         case '&':
5791                         case '':
5792                         case allowed_char:
5793                                 // explicitly not a parse error
5794                                 return '&'
5795                         break
5796                         case ';':
5797                                 // there has to be "one or more" alnums between & and ; to be a parse error
5798                                 return '&'
5799                         break
5800                         case '#':
5801                                 if (cur + 1 >= txt.length) {
5802                                         return '&'
5803                                 }
5804                                 if (txt.charAt(cur + 1).toLowerCase() === 'x') {
5805                                         base = 16
5806                                         charset = hex_chars
5807                                         start = cur + 2
5808                                 } else {
5809                                         charset = digits
5810                                         start = cur + 1
5811                                         base = 10
5812                                 }
5813                                 i = 0
5814                                 while (start + i < txt.length && charset.indexOf(txt.charAt(start + i)) > -1) {
5815                                         i += 1
5816                                 }
5817                                 if (i === 0) {
5818                                         return '&'
5819                                 }
5820                                 cur = start + i
5821                                 if (txt.charAt(start + i) === ';') {
5822                                         cur += 1
5823                                 } else {
5824                                         parse_error()
5825                                 }
5826                                 code_point = txt.substr(start, i)
5827                                 while (code_point.charAt(0) === '0' && code_point.length > 1) {
5828                                         code_point = code_point.substr(1)
5829                                 }
5830                                 code_point = parseInt(code_point, base)
5831                                 if (unicode_fixes[code_point] != null) {
5832                                         parse_error()
5833                                         return unicode_fixes[code_point]
5834                                 } else {
5835                                         if ((code_point >= 0xd800 && code_point <= 0xdfff) || code_point > 0x10ffff) {
5836                                                 parse_error()
5837                                                 return "\ufffd"
5838                                         } else {
5839                                                 if ((code_point >= 0x0001 && code_point <= 0x0008) || (code_point >= 0x000D && code_point <= 0x001F) || (code_point >= 0x007F && code_point <= 0x009F) || (code_point >= 0xFDD0 && code_point <= 0xFDEF) || code_point === 0x000B || code_point === 0xFFFE || code_point === 0xFFFF || code_point === 0x1FFFE || code_point === 0x1FFFF || code_point === 0x2FFFE || code_point === 0x2FFFF || code_point === 0x3FFFE || code_point === 0x3FFFF || code_point === 0x4FFFE || code_point === 0x4FFFF || code_point === 0x5FFFE || code_point === 0x5FFFF || code_point === 0x6FFFE || code_point === 0x6FFFF || code_point === 0x7FFFE || code_point === 0x7FFFF || code_point === 0x8FFFE || code_point === 0x8FFFF || code_point === 0x9FFFE || code_point === 0x9FFFF || code_point === 0xAFFFE || code_point === 0xAFFFF || code_point === 0xBFFFE || code_point === 0xBFFFF || code_point === 0xCFFFE || code_point === 0xCFFFF || code_point === 0xDFFFE || code_point === 0xDFFFF || code_point === 0xEFFFE || code_point === 0xEFFFF || code_point === 0xFFFFE || code_point === 0xFFFFF || code_point === 0x10FFFE || code_point === 0x10FFFF) {
5840                                                         parse_error()
5841                                                 }
5842                                                 return from_code_point(code_point)
5843                                         }
5844                                 }
5845                                 return
5846                         break
5847                         default:
5848                                 for (i = 0; i < 31; ++i) {
5849                                         if (alnum.indexOf(txt.charAt(cur + i)) === -1) {
5850                                                 break
5851                                         }
5852                                 }
5853                                 if (i === 0) {
5854                                         // exit early, because parse_error() below needs at least one alnum
5855                                         return '&'
5856                                 }
5857                                 if (txt.charAt(cur + i) === ';') {
5858                                         decoded = decode_named_char_ref(txt.substr(cur, i))
5859                                         i += 1 // scan past the ';' (after, so we dno't pass it to decode)
5860                                         if (decoded != null) {
5861                                                 cur += i
5862                                                 return decoded
5863                                         }
5864                                         // else FALL THROUGH (check for match without last char(s) or ";")
5865                                 }
5866                                 // no ';' terminator (only legacy char refs)
5867                                 max = i
5868                                 for (i = 2; i <= max; ++i) { // no prefix matches, so ok to check shortest first
5869                                         c = legacy_char_refs[txt.substr(cur, i)]
5870                                         if (c != null) {
5871                                                 if (in_attr) {
5872                                                         if (txt.charAt(cur + i) === '=') {
5873                                                                 // "because some legacy user agents will
5874                                                                 // misinterpret the markup in those cases"
5875                                                                 parse_error()
5876                                                                 return '&'
5877                                                         }
5878                                                         if (alnum.indexOf(txt.charAt(cur + i)) > -1) {
5879                                                                 // this makes attributes forgiving about url args
5880                                                                 return '&'
5881                                                         }
5882                                                 }
5883                                                 // ok, and besides the weird exceptions for attributes...
5884                                                 // return the matching char
5885                                                 cur += i // consume entity chars
5886                                                 parse_error() // because no terminating ";"
5887                                                 return c
5888                                         }
5889                                 }
5890                                 parse_error()
5891                                 return '&'
5892                 }
5893                 // never reached
5894         }
5895
5896         eat_next_token_if_newline = function () {
5897                 var old_cur, t
5898                 old_cur = cur
5899                 t = null
5900                 while (t == null) {
5901                         t = tok_state()
5902                 }
5903                 if (t.type === TYPE_TEXT) {
5904                         // definition of a newline depends on whether it was a character ref or not
5905                         if (cur - old_cur === 1) {
5906                                 // not a character reference
5907                                 if (t.text === "\u000d" || t.text === "\u000a") {
5908                                         return
5909                                 }
5910                         } else {
5911                                 if (t.text === "\u000a") {
5912                                         return
5913                                 }
5914                         }
5915                 }
5916                 // not a "newline"
5917                 cur = old_cur
5918         }
5919
5920         // tree constructor initialization
5921         // see comments on TYPE_TAG/etc for the structure of this data
5922         txt = args_html
5923         cur = 0
5924         doc = new Node(TYPE_TAG, {name: 'document', namespace: NS_HTML})
5925         doc.flag('quirks mode', QUIRKS_NO) // TODO bugreport spec for not specifying this
5926         fragment_root = null // fragment parsing algorithm returns children of this
5927         open_els = []
5928         afe = [] // active formatting elements
5929         template_ins_modes = []
5930         ins_mode = ins_mode_initial
5931         original_ins_mode = ins_mode // TODO check spec
5932         flag_scripting = args.scripting != null ? args.scripting : true // TODO might need an extra flag to get <noscript> to parse correctly
5933         flag_frameset_ok = true
5934         flag_parsing = true
5935         flag_foster_parenting = false
5936         form_element_pointer = null
5937         temporary_buffer = null
5938         pending_table_character_tokens = []
5939         head_element_pointer = null
5940         flag_fragment_parsing = false
5941         context_element = null
5942         prev_node_id = 0 // just for debugging
5943
5944         // tokenizer initialization
5945         tok_state = tok_state_data
5946
5947         parse_init = function () {
5948                 var el, f, ns, old_doc, t
5949                 // fragment parsing (text arg)
5950                 if (args.fragment != null) {
5951                         // this handles the fragment from the tests in the format described here:
5952                         // https://github.com/html5lib/html5lib-tests/blob/master/tree-construction/README.md
5953                         f = args.fragment
5954                         ns = NS_HTML
5955                         if (f.substr(0, 5) === 'math ') {
5956                                 f = f.substr(5)
5957                                 ns = NS_MATHML
5958                         } else if (f.substr(0, 4) === 'svg ') {
5959                                 f = f.substr(4)
5960                                 ns = NS_SVG
5961                         }
5962                         t = new_open_tag(f)
5963                         context_element = token_to_element(t, ns)
5964                         context_element.document = new Node(TYPE_TAG, {name: 'document', namespace: NS_HTML})
5965                         context_element.document.flag('quirks mode', QUIRKS_NO)
5966                 }
5967                 // fragment parsing (Node arg)
5968                 if (args.context != null) {
5969                         context_element = args.context
5970                 }
5971
5972                 // http://www.w3.org/TR/html5/syntax.html#parsing-html-fragments
5973                 // fragment parsing algorithm
5974                 if (context_element != null) {
5975                         flag_fragment_parsing = true
5976                         doc = new Node(TYPE_TAG, {name: 'html', namespace: NS_HTML})
5977                         // search up the tree from context, to try to find it's document,
5978                         // because this file only puts a "document" property on the root
5979                         // element.
5980                         old_doc = null
5981                         el = context_element
5982                         while (true) {
5983                                 if (el.document != null) {
5984                                         old_doc = el.document
5985                                         break
5986                                 }
5987                                 if (el.parent) {
5988                                         el = el.parent
5989                                 } else {
5990                                         break
5991                                 }
5992                         }
5993                         if (old_doc) {
5994                                 doc.flag('quirks mode', old_doc.flag('quirks mode'))
5995                         }
5996                         // set tok_state
5997                         if (context_element.namespace === NS_HTML) {
5998                                 switch (context_element.name) {
5999                                         case 'title':
6000                                         case 'textarea':
6001                                                 tok_state = tok_state_rcdata
6002                                         break
6003                                         case 'style':
6004                                         case 'xmp':
6005                                         case 'iframe':
6006                                         case 'noembed':
6007                                         case 'noframes':
6008                                                 tok_state = tok_state_rawtext
6009                                         break
6010                                         case 'script':
6011                                                 tok_state = tok_state_script_data
6012                                         break
6013                                         case 'noscript':
6014                                                 if (flag_scripting) {
6015                                                         tok_state = tok_state_rawtext
6016                                                 }
6017                                         break
6018                                         case 'plaintext':
6019                                                 tok_state = tok_state_plaintext
6020                                 }
6021                         }
6022                         fragment_root = new Node(TYPE_TAG, {name: 'html', namespace: NS_HTML})
6023                         doc.children.push(fragment_root)
6024                         fragment_root.document = doc
6025                         open_els = [fragment_root]
6026                         if (context_element.name === 'template' && context_element.namespace === NS_HTML) {
6027                                 template_ins_modes.unshift(ins_mode_in_template)
6028                         }
6029                         // fixfull create token for context (it should have it's original one already)
6030                         reset_ins_mode()
6031                         // set form_element pointer... in the foreign doc?!
6032                         el = context_element
6033                         while (true) {
6034                                 if (el.name === 'form' && el.namespace === NS_HTML) {
6035                                         form_element_pointer = el
6036                                         break
6037                                 }
6038                                 if (el.parent) {
6039                                         el = el.parent
6040                                 } else {
6041                                         break
6042                                 }
6043                         }
6044                 }
6045
6046                 // text pre-processing
6047                 // FIXME check http://www.w3.org/TR/html5/syntax.html#preprocessing-the-input-stream
6048                 txt = txt.replace(new RegExp("\r\n", 'g'), "\n") // fixfull spec doesn't say this
6049                 txt = txt.replace(new RegExp("\r", 'g'), "\n") // fixfull spec doesn't say this
6050         }
6051
6052         // http://www.w3.org/TR/html5/syntax.html#tree-construction
6053         parse_main_loop = function () {
6054                 var t
6055                 while (flag_parsing) {
6056                         t = tok_state()
6057                         if (t != null) {
6058                                 process_token(t)
6059                                 // fixfull parse error if has self-closing flag, but it wasn't acknolwedged
6060                         }
6061                 }
6062         }
6063         parse_init()
6064         parse_main_loop()
6065
6066         if (flag_fragment_parsing) {
6067                 return fragment_root.children
6068         }
6069         return doc.children
6070 }
6071
6072 exports.parse = parse_html
6073 exports.Node = Node
6074 exports.debug_log_reset = debug_log_reset
6075 exports.debug_log_each = debug_log_each
6076 exports.TYPE_TAG = TYPE_TAG
6077 exports.TYPE_TEXT = TYPE_TEXT
6078 exports.TYPE_COMMENT = TYPE_COMMENT
6079 exports.TYPE_DOCTYPE = TYPE_DOCTYPE
6080 exports.NS_HTML = NS_HTML
6081 exports.NS_MATHML = NS_MATHML
6082 exports.NS_SVG = NS_SVG
6083 exports.QUIRKS_NO = QUIRKS_NO
6084 exports.QUIRKS_LIMITED = QUIRKS_LIMITED
6085 exports.QUIRKS_YES = QUIRKS_YES