JasonWoof Got questions, comments, patches, etc.? Contact Jason Woofenden
change parser api again
[peach-html5-editor.git] / parser.js
1 // todo remove unused variables
2 // todo remove debug log, or make a way to access it
3
4 // Copyright 2015 Jason Woofenden
5 // This file implements an HTML5 parser
6 //
7 // This program is free software: you can redistribute it and/or modify it under
8 // the terms of the GNU Affero General Public License as published by the Free
9 // Software Foundation, either version 3 of the License, or (at your option) any
10 // later version.
11 //
12 // This program is distributed in the hope that it will be useful, but WITHOUT
13 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 // FOR A PARTICULAR PURPOSE.  See the GNU Affero General Public License for more
15 // details.
16 //
17 // You should have received a copy of the GNU Affero General Public License
18 // along with this program.  If not, see <http://www.gnu.org/licenses/>.
19
20
21 // This file implements a thorough parser for html5, meant to be used by a
22 // WYSIWYG editor.
23
24 // The implementation is a pretty direct implementation of the parsing algorithm
25 // described here:
26 //
27 //     http://www.w3.org/TR/html5/syntax.html
28 //
29 // except for some places marked "WHATWG" that are implemented as described here:
30 //
31 //     https://html.spec.whatwg.org/multipage/syntax.html
32 //
33 // This code passes all of the tests in the .dat files at:
34 //
35 //     https://github.com/JasonWoof/html5lib-tests/tree/patch-1/tree-construction
36
37
38 //////////////////////////
39 // how to use this code //
40 //////////////////////////
41 //
42 // See README.md for how to run this file in the browser or in node.js.
43 //
44 // This file exports a single useful function: parse, and some constants
45 // (see the bottom of this file for those.)
46 //
47 // Call it like this:
48 //
49 //     peach_parser.parse("<p><b>hi</p>")
50 //
51 // Or, if you don't want <html><head><body>/etc, do this:
52 //
53 //     peach_parser.parse("<p><b>hi</p>", {fragment: "body"})
54 //
55 // return value is an array of Nodes, A Node contains:
56 //     type: one of: "tag", "text", "comment", "doctype"
57 //     text: contents for text/comment nodes
58 //     attrs: object of attributes, eg {href: "#main"}
59 //     children: array of Nodes
60 //     namespace: one of: "html", "mathml", "svg"
61 //     parent: another Node or null
62
63 // This code is a work in progress, eg try search this file for "fixfull",
64 // "TODO" and "FIXME"
65
66
67 // Notes:  stacks/lists
68 //
69 // Jason was frequently confused by the terminology used to refer to different
70 // parts of the stacks and lists in the spec, so he made this chart to help keep
71 // his head straight:
72 //
73 // stacks grow downward (current element is index=0)
74 //
75 // example: open_els = [a, b, c, d, e, f, g]
76 //
77 // "grows downwards" means it's visualized like this: (index: el "names")
78 //
79 //   6: g "start of the list", "topmost", "first"
80 //   5: f
81 //   4: e "previous" (to d), "above", "before"
82 //   3: d   (previous/next are relative to this element)
83 //   2: c "next", "after", "lower", "below"
84 //   1: b
85 //   0: a "end of the list", "current node", "bottommost", "last"
86 (function () {
87
88 var NS_HTML, NS_MATHML, NS_SVG, QUIRKS_LIMITED, QUIRKS_NO, QUIRKS_YES, TYPE_AAA_BOOKMARK, TYPE_AFE_MARKER, TYPE_COMMENT, TYPE_DOCTYPE, TYPE_END_TAG, TYPE_EOF, TYPE_START_TAG, TYPE_TAG, TYPE_TEXT, _decode_named_char_ref, adjust_foreign_attributes, adjust_mathml_attributes, adjust_svg_attributes, adp_els, alnum, context, debug_log, debug_log_each, debug_log_reset, decode_named_char_ref, decode_named_char_ref_cache, decode_named_char_ref_el, digits, el_is_special, el_is_special_not_adp, end_tag_implied, exports, foreign_attr_fixes, formatting_elements, foster_parenting_targets, from_code_point, g_debug_log, h_tags, hex_chars, is_html_integration, is_input_hidden_tok, is_lc_alpha, is_mathml_text_integration_point, is_space, is_space_tok, is_uc_alpha, lc_alpha, legacy_char_refs, mathml_elements, mathml_text_integration, new_aaa_bookmark, new_afe_marker, new_character_token, new_comment_token, new_doctype_token, new_element, new_end_tag, new_eof_token, new_open_tag, new_text_node, parse_html, prev_node_id, quirks_yes_pi_prefixes, space_chars, special_elements, svg_attribute_fixes, svg_elements, svg_name_fixes, tag_name_chars, uc_alpha, unicode_fixes, whitespace_chars
89
90 if ((typeof module) !== 'undefined' && (module.exports != null)) {
91         context = 'module'
92 } else {
93         context = 'browser'
94         window.peach_parser = {}
95 }
96
97 from_code_point = function (x) {
98         if (String.fromCodePoint != null) {
99                 return String.fromCodePoint(x)
100         } else {
101                 if (x <= 0xffff) {
102                         return String.fromCharCode(x)
103                 }
104                 x -= 0x10000
105                 return String.fromCharCode((x >> 10) + 0xd800, (x % 0x400) + 0xdc00)
106         }
107 }
108
109 // Each node is an obect of the Node class. Here are the Node types:
110 TYPE_TAG = 'tag' // name, {attributes}, [children]
111 TYPE_TEXT = 'text' // "text"
112 TYPE_COMMENT = 'comment'
113 TYPE_DOCTYPE = 'doctype'
114 // the following types are emited by the tokenizer, but shouldn't end up in the tree:
115 TYPE_START_TAG = 4 // name, [attributes ([key,value]...) in reverse order], [children]
116 TYPE_END_TAG = 5 // name
117 TYPE_EOF = 6
118 TYPE_AFE_MARKER = 7 // http://www.w3.org/TR/html5/syntax.html#reconstruct-the-active-formatting-elements
119 TYPE_AAA_BOOKMARK = 8 // http://www.w3.org/TR/html5/syntax.html#adoption-agency-algorithm
120
121 // namespace constants
122 NS_HTML = 'html'
123 NS_MATHML = 'mathml'
124 NS_SVG = 'svg'
125
126 // quirks mode constants
127 QUIRKS_NO = 'no'
128 QUIRKS_LIMITED = 'limited'
129 QUIRKS_YES = 'yes'
130
131 // queue up debug logs, so eg they can be shown only for tests that fail
132 g_debug_log = []
133 debug_log_reset = function () {
134         g_debug_log = []
135 }
136 debug_log = function (str) {
137         g_debug_log.push(str)
138 }
139 debug_log_each = function (cb) {
140         var i
141         for (i = 0; i < g_debug_log.length; ++i) {
142                 cb(g_debug_log[i])
143         }
144 }
145
146 prev_node_id = 0
147 function Node (type, args) {
148         if (args == null) {
149                 args = {}
150         }
151         this.type = type // one of the TYPE_* constants above
152         this.name = args.name != null ? args.name : '' // tag name
153         this.text = args.text != null ? args.text : '' // contents for text/comment nodes
154         this.attrs = args.attrs != null ? args.attrs : {}
155         this.children = args.children != null ? args.children : []
156         this.namespace = args.namespace != null ? args.namespace : NS_HTML
157         this.parent = args.parent != null ? args.parent : null
158         // private:
159         this.attrs_a = args.attr_k != null ? args.attr_k : [] // attrs in progress, TYPE_START_TAG only
160         this.token = args.token != null ? args.token : null
161         this.flags = args.flags != null ? args.flags : {}
162         if (args.id != null) {
163                 this.id = args.id + "+"
164         } else {
165                 this.id = "" + (++prev_node_id)
166         }
167 }
168
169 Node.prototype.acknowledge_self_closing = function () {
170         if (this.token != null) {
171                 this.token.flag('did_self_close', true)
172         } else {
173                 this.flag('did_self_close', true)
174         }
175 }
176
177 Node.prototype.flag = function (key, value) {
178         if (value != null) {
179                 this.flags[key] = value
180         } else {
181                 return this.flags[key]
182         }
183 }
184
185 // helpers: (only take args that are normally known when parser creates nodes)
186 new_open_tag = function (name) {
187         return new Node(TYPE_START_TAG, {name: name})
188 }
189 new_end_tag = function (name) {
190         return new Node(TYPE_END_TAG, {name: name})
191 }
192 new_element = function (name) {
193         return new Node(TYPE_TAG, {name: name})
194 }
195 new_text_node = function (txt) {
196         return new Node(TYPE_TEXT, {text: txt})
197 }
198 new_character_token = new_text_node
199 new_comment_token = function (txt) {
200         return new Node(TYPE_COMMENT, {text: txt})
201 }
202 new_doctype_token = function (name) {
203         return new Node(TYPE_DOCTYPE, {name: name})
204 }
205 new_eof_token = function () {
206         return new Node(TYPE_EOF)
207 }
208 new_afe_marker = function () {
209         return new Node(TYPE_AFE_MARKER)
210 }
211 new_aaa_bookmark = function () {
212         return new Node(TYPE_AAA_BOOKMARK)
213 }
214
215 lc_alpha = "abcdefghijklmnopqrstuvwxyz"
216 uc_alpha = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
217 digits = "0123456789"
218 alnum = lc_alpha + uc_alpha + digits
219 hex_chars = digits + "abcdefABCDEF"
220
221 is_uc_alpha = function (str) {
222         return str.length === 1 && uc_alpha.indexOf(str) > -1
223 }
224 is_lc_alpha = function (str) {
225         return str.length === 1 && lc_alpha.indexOf(str) > -1
226 }
227
228 // some SVG elements have dashes in them
229 tag_name_chars = alnum + "-"
230
231 // http://www.w3.org/TR/html5/infrastructure.html#space-character
232 space_chars = "\u0009\u000a\u000c\u000d\u0020"
233 is_space = function (txt) {
234         return txt.length === 1 && space_chars.indexOf(txt) > -1
235 }
236 is_space_tok = function (t) {
237         return t.type === TYPE_TEXT && t.text.length === 1 && space_chars.indexOf(t.text) > -1
238 }
239
240 is_input_hidden_tok = function (t) {
241         var i, a
242         if (t.type !== TYPE_START_TAG) {
243                 return false
244         }
245         for (i = 0; i < t.attrs_a.length; ++i) {
246                 a = t.attrs_a[i]
247                 if (a[0] === 'type') {
248                         if (a[1].toLowerCase() === 'hidden') {
249                                 return true
250                         }
251                         return false
252                 }
253         }
254         return false
255 }
256
257 // https://en.wikipedia.org/wiki/Whitespace_character#Unicode
258 whitespace_chars = "\u0009\u000a\u000b\u000c\u000d\u0020\u0085\u00a0\u1680\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200a\u2028\u2029\u202f\u205f\u3000"
259
260 unicode_fixes = {}
261 unicode_fixes[0x00] = "\uFFFD"
262 unicode_fixes[0x80] = "\u20AC"
263 unicode_fixes[0x82] = "\u201A"
264 unicode_fixes[0x83] = "\u0192"
265 unicode_fixes[0x84] = "\u201E"
266 unicode_fixes[0x85] = "\u2026"
267 unicode_fixes[0x86] = "\u2020"
268 unicode_fixes[0x87] = "\u2021"
269 unicode_fixes[0x88] = "\u02C6"
270 unicode_fixes[0x89] = "\u2030"
271 unicode_fixes[0x8A] = "\u0160"
272 unicode_fixes[0x8B] = "\u2039"
273 unicode_fixes[0x8C] = "\u0152"
274 unicode_fixes[0x8E] = "\u017D"
275 unicode_fixes[0x91] = "\u2018"
276 unicode_fixes[0x92] = "\u2019"
277 unicode_fixes[0x93] = "\u201C"
278 unicode_fixes[0x94] = "\u201D"
279 unicode_fixes[0x95] = "\u2022"
280 unicode_fixes[0x96] = "\u2013"
281 unicode_fixes[0x97] = "\u2014"
282 unicode_fixes[0x98] = "\u02DC"
283 unicode_fixes[0x99] = "\u2122"
284 unicode_fixes[0x9A] = "\u0161"
285 unicode_fixes[0x9B] = "\u203A"
286 unicode_fixes[0x9C] = "\u0153"
287 unicode_fixes[0x9E] = "\u017E"
288 unicode_fixes[0x9F] = "\u0178"
289
290 quirks_yes_pi_prefixes = [
291         "+//silmaril//dtd html pro v0r11 19970101//",
292         "-//as//dtd html 3.0 aswedit + extensions//",
293         "-//advasoft ltd//dtd html 3.0 aswedit + extensions//",
294         "-//ietf//dtd html 2.0 level 1//",
295         "-//ietf//dtd html 2.0 level 2//",
296         "-//ietf//dtd html 2.0 strict level 1//",
297         "-//ietf//dtd html 2.0 strict level 2//",
298         "-//ietf//dtd html 2.0 strict//",
299         "-//ietf//dtd html 2.0//",
300         "-//ietf//dtd html 2.1e//",
301         "-//ietf//dtd html 3.0//",
302         "-//ietf//dtd html 3.2 final//",
303         "-//ietf//dtd html 3.2//",
304         "-//ietf//dtd html 3//",
305         "-//ietf//dtd html level 0//",
306         "-//ietf//dtd html level 1//",
307         "-//ietf//dtd html level 2//",
308         "-//ietf//dtd html level 3//",
309         "-//ietf//dtd html strict level 0//",
310         "-//ietf//dtd html strict level 1//",
311         "-//ietf//dtd html strict level 2//",
312         "-//ietf//dtd html strict level 3//",
313         "-//ietf//dtd html strict//",
314         "-//ietf//dtd html//",
315         "-//metrius//dtd metrius presentational//",
316         "-//microsoft//dtd internet explorer 2.0 html strict//",
317         "-//microsoft//dtd internet explorer 2.0 html//",
318         "-//microsoft//dtd internet explorer 2.0 tables//",
319         "-//microsoft//dtd internet explorer 3.0 html strict//",
320         "-//microsoft//dtd internet explorer 3.0 html//",
321         "-//microsoft//dtd internet explorer 3.0 tables//",
322         "-//netscape comm. corp.//dtd html//",
323         "-//netscape comm. corp.//dtd strict html//",
324         "-//o'reilly and associates//dtd html 2.0//",
325         "-//o'reilly and associates//dtd html extended 1.0//",
326         "-//o'reilly and associates//dtd html extended relaxed 1.0//",
327         "-//sq//dtd html 2.0 hotmetal + extensions//",
328         "-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//",
329         "-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//",
330         "-//spyglass//dtd html 2.0 extended//",
331         "-//sun microsystems corp.//dtd hotjava html//",
332         "-//sun microsystems corp.//dtd hotjava strict html//",
333         "-//w3c//dtd html 3 1995-03-24//",
334         "-//w3c//dtd html 3.2 draft//",
335         "-//w3c//dtd html 3.2 final//",
336         "-//w3c//dtd html 3.2//",
337         "-//w3c//dtd html 3.2s draft//",
338         "-//w3c//dtd html 4.0 frameset//",
339         "-//w3c//dtd html 4.0 transitional//",
340         "-//w3c//dtd html experimental 19960712//",
341         "-//w3c//dtd html experimental 970421//",
342         "-//w3c//dtd w3 html//",
343         "-//w3o//dtd w3 html 3.0//",
344         "-//webtechs//dtd mozilla html 2.0//",
345         "-//webtechs//dtd mozilla html//",
346 ]
347
348 // These are the character references that don't need a terminating semicolon
349 // min length: 2, max: 6, none are a prefix of any other.
350 legacy_char_refs = {
351         Aacute: 'Á', aacute: 'á', Acirc: 'Â', acirc: 'â', acute: '´', AElig: 'Æ',
352         aelig: 'æ', Agrave: 'À', agrave: 'à', AMP: '&', amp: '&', Aring: 'Å',
353         aring: 'å', Atilde: 'Ã', atilde: 'ã', Auml: 'Ä', auml: 'ä', brvbar: '¦',
354         Ccedil: 'Ç', ccedil: 'ç', cedil: '¸', cent: '¢', COPY: '©', copy: '©',
355         curren: '¤', deg: '°', divide: '÷', Eacute: 'É', eacute: 'é', Ecirc: 'Ê',
356         ecirc: 'ê', Egrave: 'È', egrave: 'è', ETH: 'Ð', eth: 'ð', Euml: 'Ë',
357         euml: 'ë', frac12: '½', frac14: '¼', frac34: '¾', GT: '>', gt: '>',
358         Iacute: 'Í', iacute: 'í', Icirc: 'Î', icirc: 'î', iexcl: '¡', Igrave: 'Ì',
359         igrave: 'ì', iquest: '¿', Iuml: 'Ï', iuml: 'ï', laquo: '«', LT: '<',
360         lt: '<', macr: '¯', micro: 'µ', middot: '·', nbsp: "\u00a0", not: '¬',
361         Ntilde: 'Ñ', ntilde: 'ñ', Oacute: 'Ó', oacute: 'ó', Ocirc: 'Ô', ocirc: 'ô',
362         Ograve: 'Ò', ograve: 'ò', ordf: 'ª', ordm: 'º', Oslash: 'Ø', oslash: 'ø',
363         Otilde: 'Õ', otilde: 'õ', Ouml: 'Ö', ouml: 'ö', para: '¶', plusmn: '±',
364         pound: '£', QUOT: '"', quot: '"', raquo: '»', REG: '®', reg: '®', sect: '§',
365         shy: '­', sup1: '¹', sup2: '²', sup3: '³', szlig: 'ß', THORN: 'Þ', thorn: 'þ',
366         times: '×', Uacute: 'Ú', uacute: 'ú', Ucirc: 'Û', ucirc: 'û', Ugrave: 'Ù',
367         ugrave: 'ù', uml: '¨', Uuml: 'Ü', uuml: 'ü', Yacute: 'Ý', yacute: 'ý',
368         yen: '¥', yuml: 'ÿ'
369 }
370
371 //void_elements = ['area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input', 'keygen', 'link', 'meta', 'param', 'source', 'track', 'wbr']
372 //raw_text_elements = ['script', 'style']
373 //escapable_raw_text_elements = ['textarea', 'title']
374 // http://www.w3.org/TR/SVG/ 1.1 (Second Edition)
375 svg_elements = [
376         'a', 'altGlyph', 'altGlyphDef', 'altGlyphItem', 'animate', 'animateColor',
377         'animateMotion', 'animateTransform', 'circle', 'clipPath', 'color-profile',
378         'cursor', 'defs', 'desc', 'ellipse', 'feBlend', 'feColorMatrix',
379         'feComponentTransfer', 'feComposite', 'feConvolveMatrix',
380         'feDiffuseLighting', 'feDisplacementMap', 'feDistantLight', 'feFlood',
381         'feFuncA', 'feFuncB', 'feFuncG', 'feFuncR', 'feGaussianBlur', 'feImage',
382         'feMerge', 'feMergeNode', 'feMorphology', 'feOffset', 'fePointLight',
383         'feSpecularLighting', 'feSpotLight', 'feTile', 'feTurbulence', 'filter',
384         'font', 'font-face', 'font-face-format', 'font-face-name', 'font-face-src',
385         'font-face-uri', 'foreignObject', 'g', 'glyph', 'glyphRef', 'hkern',
386         'image', 'line', 'linearGradient', 'marker', 'mask', 'metadata',
387         'missing-glyph', 'mpath', 'path', 'pattern', 'polygon', 'polyline',
388         'radialGradient', 'rect', 'script', 'set', 'stop', 'style', 'svg',
389         'switch', 'symbol', 'text', 'textPath', 'title', 'tref', 'tspan', 'use',
390         'view', 'vkern'
391 ]
392
393 // http://www.w3.org/TR/MathML/ Version 3.0 2nd Edition
394 mathml_elements = [
395         'abs', 'and', 'annotation', 'annotation-xml', 'apply', 'approx', 'arccos',
396         'arccosh', 'arccot', 'arccoth', 'arccsc', 'arccsch', 'arcsec', 'arcsech',
397         'arcsin', 'arcsinh', 'arctan', 'arctanh', 'arg', 'bind', 'bvar', 'card',
398         'cartesianproduct', 'cbytes', 'ceiling', 'cerror', 'ci', 'cn', 'codomain',
399         'complexes', 'compose', 'condition', 'conjugate', 'cos', 'cosh', 'cot',
400         'coth', 'cs', 'csc', 'csch', 'csymbol', 'curl', 'declare', 'degree',
401         'determinant', 'diff', 'divergence', 'divide', 'domain',
402         'domainofapplication', 'emptyset', 'eq', 'equivalent', 'eulergamma',
403         'exists', 'exp', 'exponentiale', 'factorial', 'factorof', 'false', 'floor',
404         'fn', 'forall', 'gcd', 'geq', 'grad', 'gt', 'ident', 'image', 'imaginary',
405         'imaginaryi', 'implies', 'in', 'infinity', 'int', 'integers', 'intersect',
406         'interval', 'inverse', 'lambda', 'laplacian', 'lcm', 'leq', 'limit',
407         'list', 'ln', 'log', 'logbase', 'lowlimit', 'lt', 'maction', 'maligngroup',
408         'malignmark', 'math', 'matrix', 'matrixrow', 'max', 'mean', 'median',
409         'menclose', 'merror', 'mfenced', 'mfrac', 'mglyph', 'mi', 'mi', 'min',
410         'minus', 'mlabeledtr', 'mlongdiv', 'mmultiscripts', 'mn', 'mo', 'mode',
411         'moment', 'momentabout', 'mover', 'mpadded', 'mphantom', 'mprescripts',
412         'mroot', 'mrow', 'ms', 'mscarries', 'mscarry', 'msgroup', 'msline',
413         'mspace', 'msqrt', 'msrow', 'mstack', 'mstyle', 'msub', 'msubsup', 'msup',
414         'mtable', 'mtd', 'mtext', 'mtr', 'munder', 'munderover', 'naturalnumbers',
415         'neq', 'none', 'not', 'notanumber', 'notin', 'notprsubset', 'notsubset',
416         'or', 'otherwise', 'outerproduct', 'partialdiff', 'pi', 'piece',
417         'piecewise', 'plus', 'power', 'primes', 'product', 'prsubset', 'quotient',
418         'rationals', 'real', 'reals', 'reln', 'rem', 'root', 'scalarproduct',
419         'sdev', 'sec', 'sech', 'selector', 'semantics', 'sep', 'set', 'setdiff',
420         'share', 'sin', 'sinh', 'span', 'subset', 'sum', 'tan', 'tanh', 'tendsto',
421         'times', 'transpose', 'true', 'union', 'uplimit', 'variance', 'vector',
422         'vectorproduct', 'xor'
423 ]
424 // foreign_elements = [svg_elements..., mathml_elements...]
425 //normal_elements = All other allowed HTML elements are normal elements.
426
427 special_elements = {
428         // HTML:
429         address: NS_HTML, applet: NS_HTML, area: NS_HTML, article: NS_HTML,
430         aside: NS_HTML, base: NS_HTML, basefont: NS_HTML, bgsound: NS_HTML,
431         blockquote: NS_HTML, body: NS_HTML, br: NS_HTML, button: NS_HTML,
432         caption: NS_HTML, center: NS_HTML, col: NS_HTML, colgroup: NS_HTML, dd: NS_HTML,
433         details: NS_HTML, dir: NS_HTML, div: NS_HTML, dl: NS_HTML, dt: NS_HTML,
434         embed: NS_HTML, fieldset: NS_HTML, figcaption: NS_HTML, figure: NS_HTML,
435         footer: NS_HTML, form: NS_HTML, frame: NS_HTML, frameset: NS_HTML, h1: NS_HTML,
436         h2: NS_HTML, h3: NS_HTML, h4: NS_HTML, h5: NS_HTML, h6: NS_HTML, head: NS_HTML,
437         header: NS_HTML, hgroup: NS_HTML, hr: NS_HTML, html: NS_HTML, iframe: NS_HTML,
438         img: NS_HTML, input: NS_HTML, isindex: NS_HTML, li: NS_HTML, link: NS_HTML,
439         listing: NS_HTML, main: NS_HTML, marquee: NS_HTML,
440
441         menu: NS_HTML,menuitem: NS_HTML, // WHATWG adds these
442
443         meta: NS_HTML, nav: NS_HTML, noembed: NS_HTML, noframes: NS_HTML,
444         noscript: NS_HTML, object: NS_HTML, ol: NS_HTML, p: NS_HTML, param: NS_HTML,
445         plaintext: NS_HTML, pre: NS_HTML, script: NS_HTML, section: NS_HTML,
446         select: NS_HTML, source: NS_HTML, style: NS_HTML, summary: NS_HTML,
447         table: NS_HTML, tbody: NS_HTML, td: NS_HTML, template: NS_HTML,
448         textarea: NS_HTML, tfoot: NS_HTML, th: NS_HTML, thead: NS_HTML, title: NS_HTML,
449         tr: NS_HTML, track: NS_HTML, ul: NS_HTML, wbr: NS_HTML, xmp: NS_HTML,
450
451         // MathML: 
452         mi: NS_MATHML, mo: NS_MATHML, mn: NS_MATHML, ms: NS_MATHML, mtext: NS_MATHML,
453         'annotation-xml': NS_MATHML,
454
455         // SVG: 
456         foreignObject: NS_SVG, desc: NS_SVG, title: NS_SVG
457 }
458
459 formatting_elements = {
460         a: true, b: true, big: true, code: true, em: true, font: true, i: true,
461         nobr: true, s: true, small: true, strike: true, strong: true, tt: true,
462         u: true
463 }
464
465 mathml_text_integration = {
466         mi: NS_MATHML, mo: NS_MATHML, mn: NS_MATHML, ms: NS_MATHML, mtext: NS_MATHML
467 }
468 is_mathml_text_integration_point = function (el) {
469         return mathml_text_integration[el.name] === el.namespace
470 }
471 is_html_integration = function (el) { // DON'T PASS A TOKEN
472         if (el.namespace === NS_MATHML) {
473                 if (el.name === 'annotation-xml') {
474                         if (el.attrs.encoding != null) {
475                                 if (el.attrs.encoding.toLowerCase() === 'text/html') {
476                                         return true
477                                 }
478                                 if (el.attrs.encoding.toLowerCase() === 'application/xhtml+xml') {
479                                         return true
480                                 }
481                         }
482                 }
483                 return false
484         }
485         if (el.namespace === NS_SVG) {
486                 if (el.name === 'foreignObject' || el.name === 'desc' || el.name === 'title') {
487                         return true
488                 }
489         }
490         return false
491 }
492
493 h_tags = {
494         h1: NS_HTML, h2: NS_HTML, h3: NS_HTML, h4: NS_HTML, h5: NS_HTML, h6: NS_HTML
495 }
496
497 foster_parenting_targets = {
498         table: NS_HTML,
499         tbody: NS_HTML,
500         tfoot: NS_HTML,
501         thead: NS_HTML,
502         tr: NS_HTML
503 }
504
505 end_tag_implied = {
506         dd: NS_HTML,
507         dt: NS_HTML,
508         li: NS_HTML,
509         option: NS_HTML,
510         optgroup: NS_HTML,
511         p: NS_HTML,
512         rb: NS_HTML,
513         rp: NS_HTML,
514         rt: NS_HTML,
515         rtc: NS_HTML
516 }
517
518 el_is_special = function (e) {
519         return special_elements[e.name] === e.namespace
520 }
521
522 adp_els = { address: NS_HTML, div: NS_HTML, p: NS_HTML }
523 el_is_special_not_adp = function (el) {
524         return special_elements[el.name] === el.namespace && adp_els[el.name] !== el.namespace
525 }
526
527 svg_name_fixes = {
528         altglyph: 'altGlyph',
529         altglyphdef: 'altGlyphDef',
530         altglyphitem: 'altGlyphItem',
531         animatecolor: 'animateColor',
532         animatemotion: 'animateMotion',
533         animatetransform: 'animateTransform',
534         clippath: 'clipPath',
535         feblend: 'feBlend',
536         fecolormatrix: 'feColorMatrix',
537         fecomponenttransfer: 'feComponentTransfer',
538         fecomposite: 'feComposite',
539         feconvolvematrix: 'feConvolveMatrix',
540         fediffuselighting: 'feDiffuseLighting',
541         fedisplacementmap: 'feDisplacementMap',
542         fedistantlight: 'feDistantLight',
543         fedropshadow: 'feDropShadow',
544         feflood: 'feFlood',
545         fefunca: 'feFuncA',
546         fefuncb: 'feFuncB',
547         fefuncg: 'feFuncG',
548         fefuncr: 'feFuncR',
549         fegaussianblur: 'feGaussianBlur',
550         feimage: 'feImage',
551         femerge: 'feMerge',
552         femergenode: 'feMergeNode',
553         femorphology: 'feMorphology',
554         feoffset: 'feOffset',
555         fepointlight: 'fePointLight',
556         fespecularlighting: 'feSpecularLighting',
557         fespotlight: 'feSpotLight',
558         fetile: 'feTile',
559         feturbulence: 'feTurbulence',
560         foreignobject: 'foreignObject',
561         glyphref: 'glyphRef',
562         lineargradient: 'linearGradient',
563         radialgradient: 'radialGradient',
564         textpath: 'textPath'
565 }
566 svg_attribute_fixes = {
567         attributename: 'attributeName',
568         attributetype: 'attributeType',
569         basefrequency: 'baseFrequency',
570         baseprofile: 'baseProfile',
571         calcmode: 'calcMode',
572         clippathunits: 'clipPathUnits',
573         contentscripttype: 'contentScriptType',
574         contentstyletype: 'contentStyleType',
575         diffuseconstant: 'diffuseConstant',
576         edgemode: 'edgeMode',
577         externalresourcesrequired: 'externalResourcesRequired',
578         // WHATWG removes this: filterres: 'filterRes',
579         filterunits: 'filterUnits',
580         glyphref: 'glyphRef',
581         gradienttransform: 'gradientTransform',
582         gradientunits: 'gradientUnits',
583         kernelmatrix: 'kernelMatrix',
584         kernelunitlength: 'kernelUnitLength',
585         keypoints: 'keyPoints',
586         keysplines: 'keySplines',
587         keytimes: 'keyTimes',
588         lengthadjust: 'lengthAdjust',
589         limitingconeangle: 'limitingConeAngle',
590         markerheight: 'markerHeight',
591         markerunits: 'markerUnits',
592         markerwidth: 'markerWidth',
593         maskcontentunits: 'maskContentUnits',
594         maskunits: 'maskUnits',
595         numoctaves: 'numOctaves',
596         pathlength: 'pathLength',
597         patterncontentunits: 'patternContentUnits',
598         patterntransform: 'patternTransform',
599         patternunits: 'patternUnits',
600         pointsatx: 'pointsAtX',
601         pointsaty: 'pointsAtY',
602         pointsatz: 'pointsAtZ',
603         preservealpha: 'preserveAlpha',
604         preserveaspectratio: 'preserveAspectRatio',
605         primitiveunits: 'primitiveUnits',
606         refx: 'refX',
607         refy: 'refY',
608         repeatcount: 'repeatCount',
609         repeatdur: 'repeatDur',
610         requiredextensions: 'requiredExtensions',
611         requiredfeatures: 'requiredFeatures',
612         specularconstant: 'specularConstant',
613         specularexponent: 'specularExponent',
614         spreadmethod: 'spreadMethod',
615         startoffset: 'startOffset',
616         stddeviation: 'stdDeviation',
617         stitchtiles: 'stitchTiles',
618         surfacescale: 'surfaceScale',
619         systemlanguage: 'systemLanguage',
620         tablevalues: 'tableValues',
621         targetx: 'targetX',
622         targety: 'targetY',
623         textlength: 'textLength',
624         viewbox: 'viewBox',
625         viewtarget: 'viewTarget',
626         xchannelselector: 'xChannelSelector',
627         ychannelselector: 'yChannelSelector',
628         zoomandpan: 'zoomAndPan'
629 }
630 foreign_attr_fixes = {
631         'xlink:actuate': 'xlink actuate',
632         'xlink:arcrole': 'xlink arcrole',
633         'xlink:href': 'xlink href',
634         'xlink:role': 'xlink role',
635         'xlink:show': 'xlink show',
636         'xlink:title': 'xlink title',
637         'xlink:type': 'xlink type',
638         'xml:base': 'xml base',
639         'xml:lang': 'xml lang',
640         'xml:space': 'xml space',
641         'xmlns': 'xmlns',
642         'xmlns:xlink': 'xmlns xlink'
643 }
644 adjust_mathml_attributes = function (t) {
645         var i, a
646         for (i = 0; i < t.attrs_a.length; ++i) {
647                 a = t.attrs_a[i]
648                 if (a[0] === 'definitionurl') {
649                         a[0] = 'definitionURL'
650                 }
651         }
652 }
653 adjust_svg_attributes = function (t) {
654         var i, a
655         for (i = 0; i < t.attrs_a.length; ++i) {
656                 a = t.attrs_a[i]
657                 if (svg_attribute_fixes[a[0]] != null) {
658                         a[0] = svg_attribute_fixes[a[0]]
659                 }
660         }
661 }
662 adjust_foreign_attributes = function (t) {
663         // fixfull
664         var i, a
665         for (i = 0; i < t.attrs_a.length; ++i) {
666                 a = t.attrs_a[i]
667                 if (foreign_attr_fixes[a[0]] != null) {
668                         a[0] = foreign_attr_fixes[a[0]]
669                 }
670         }
671 }
672
673 // decode_named_char_ref()
674 //
675 // The list of named character references is _huge_ so if we're running in a
676 // browser, we get the browser to decode them, rather than increasing the code
677 // size to include the table.
678 if (context === 'module') {
679         _decode_named_char_ref = require('./parser_no_browser_helper.js')
680 } else {
681         decode_named_char_ref_el = document.createElement('textarea')
682         _decode_named_char_ref = function (txt) {
683                 var decoded
684                 txt = "&" + txt + ";"
685                 decode_named_char_ref_el.innerHTML = txt
686                 decoded = decode_named_char_ref_el.value
687                 if (decoded === txt) {
688                         return null
689                 }
690                 return decoded
691         }
692 }
693 // Pass the name of a named entity _that has a terminating semicolon_
694 // Entities without terminating semicolons should use legacy_char_refs[]
695 // Do not include the "&" or ";" in your argument, eg pass "alpha"
696 decode_named_char_ref_cache = {}
697 decode_named_char_ref = function (txt) {
698         var decoded
699         decoded = decode_named_char_ref_cache[txt]
700         if (decoded != null) {
701                 return decoded
702         }
703         decoded = _decode_named_char_ref(txt)
704         return decode_named_char_ref_cache[txt] = decoded
705 }
706
707 parse_html = function (args_html, args) {
708         var adjusted_current_node, adjusted_insertion_location, adoption_agency, afe, afe_push, afe_push_marker, button_scopers, clear_afe_to_marker, clear_stack_to_table_body_context, clear_stack_to_table_context, clear_stack_to_table_row_context, clear_to_table_body_stopers, clear_to_table_row_stopers, clear_to_table_stopers, close_p_element, close_p_if_in_button_scope, close_the_cell, context_element, cur, doc, eat_next_token_if_newline, el_is_in_scope, flag_foster_parenting, flag_fragment_parsing, flag_frameset_ok, flag_parsing, flag_scripting, form_element_pointer, fragment_root, generate_implied_end_tags, has_color_face_or_size, head_element_pointer, in_body_any_other_end_tag, in_foreign_content, in_foreign_content_end_script, in_foreign_content_other_start, ins_mode, ins_mode_after_after_body, ins_mode_after_after_frameset, ins_mode_after_body, ins_mode_after_frameset, ins_mode_after_head, ins_mode_after_head_else, ins_mode_before_head, ins_mode_before_html, ins_mode_in_body, ins_mode_in_caption, ins_mode_in_cell, ins_mode_in_column_group, ins_mode_in_frameset, ins_mode_in_head, ins_mode_in_head_else, ins_mode_in_head_noscript, ins_mode_in_head_noscript_else, ins_mode_in_row, ins_mode_in_select, ins_mode_in_select_in_table, ins_mode_in_table, ins_mode_in_table_body, ins_mode_in_table_else, ins_mode_in_table_text, ins_mode_in_template, ins_mode_initial, ins_mode_text, insert_character, insert_comment, insert_foreign_element, insert_html_element, is_appropriate_end_tag, is_in_button_scope, is_in_li_scope, is_in_scope, is_in_scope_x, is_in_scope_x_y, is_in_select_scope, is_in_table_scope, is_quirks_limited_doctype, is_quirks_yes_doctype, li_scopers, open_els, original_ins_mode, parse_character_reference, parse_error, parse_generic_raw_text, parse_generic_rcdata_text, parse_init, parse_main_loop, pending_table_character_tokens, process_token, reconstruct_afe, reset_ins_mode, standard_scopers, stop_parsing, table_scopers, template_ins_modes, template_tag_is_open, temporary_buffer, tok_cur_tag, tok_state, tok_state_after_attribute_name, tok_state_after_attribute_value_quoted, tok_state_after_doctype_name, tok_state_after_doctype_public_identifier, tok_state_after_doctype_public_keyword, tok_state_after_doctype_system_identifier, tok_state_after_doctype_system_keyword, tok_state_attribute_name, tok_state_attribute_value_double_quoted, tok_state_attribute_value_single_quoted, tok_state_attribute_value_unquoted, tok_state_before_attribute_name, tok_state_before_attribute_value, tok_state_before_doctype_name, tok_state_before_doctype_public_identifier, tok_state_before_doctype_system_identifier, tok_state_between_doctype_public_and_system_identifiers, tok_state_bogus_comment, tok_state_bogus_doctype, tok_state_cdata_section, tok_state_comment, tok_state_comment_end, tok_state_comment_end_bang, tok_state_comment_end_dash, tok_state_comment_start, tok_state_comment_start_dash, tok_state_data, tok_state_doctype, tok_state_doctype_name, tok_state_doctype_public_identifier_double_quoted, tok_state_doctype_public_identifier_single_quoted, tok_state_doctype_system_identifier_double_quoted, tok_state_doctype_system_identifier_single_quoted, tok_state_end_tag_open, tok_state_markup_declaration_open, tok_state_plaintext, tok_state_rawtext, tok_state_rawtext_end_tag_name, tok_state_rawtext_end_tag_open, tok_state_rawtext_less_than_sign, tok_state_rcdata, tok_state_rcdata_end_tag_name, tok_state_rcdata_end_tag_open, tok_state_rcdata_less_than_sign, tok_state_script_data, tok_state_script_data_double_escape_end, tok_state_script_data_double_escape_start, tok_state_script_data_double_escaped, tok_state_script_data_double_escaped_dash, tok_state_script_data_double_escaped_dash_dash, tok_state_script_data_double_escaped_less_than_sign, tok_state_script_data_end_tag_name, tok_state_script_data_end_tag_open, tok_state_script_data_escape_start, tok_state_script_data_escape_start_dash, tok_state_script_data_escaped, tok_state_script_data_escaped_dash, tok_state_script_data_escaped_dash_dash, tok_state_script_data_escaped_end_tag_name, tok_state_script_data_escaped_end_tag_open, tok_state_script_data_escaped_less_than_sign, tok_state_script_data_less_than_sign, tok_state_self_closing_start_tag, tok_state_tag_name, tok_state_tag_open, token_to_element, txt
709         if (args == null) {
710                 args = {}
711         }
712         txt = null
713         cur = null // index of next char in txt to be parsed
714         // declare doc and tokenizer variables so they're in scope below
715         doc = null
716         open_els = null // stack of open elements
717         afe = null // active formatting elements
718         template_ins_modes = null
719         ins_mode = null
720         original_ins_mode = null
721         tok_state = null
722         tok_cur_tag = null // partially parsed tag
723         flag_scripting = null
724         flag_frameset_ok = null
725         flag_parsing = null
726         flag_foster_parenting = null
727         form_element_pointer = null
728         temporary_buffer = null
729         pending_table_character_tokens = null
730         head_element_pointer = null
731         flag_fragment_parsing = null
732         context_element = null
733
734         stop_parsing = function () {
735                 flag_parsing = false
736         }
737
738         parse_error = function () {
739                 if (args.error_cb != null) {
740                         args.error_cb(cur)
741                 }
742         }
743
744         // http://www.w3.org/TR/html5/syntax.html#push-onto-the-list-of-active-formatting-elements
745         // "Noah's Ark clause" but with three
746         afe_push = function (new_el) {
747                 var attrs_match, el, i, j, k, matches, v
748                 matches = 0
749                 for (i = 0; i < afe.length; ++i) {
750                         el = afe[i]
751                         if (el.type === TYPE_AFE_MARKER) {
752                                 break
753                         }
754                         if (el.name === new_el.name && el.namespace === new_el.namespace) {
755                                 attrs_match = true
756                                 for (k in el.attrs) {
757                                         v = el.attrs[k]
758                                         if (new_el.attrs[k] !== v) {
759                                                 attrs_match = false
760                                                 break
761                                         }
762                                 }
763                                 if (attrs_match) {
764                                         for (k in new_el.attrs) {
765                                                 v = new_el.attrs[k]
766                                                 if (el.attrs[k] !== v) {
767                                                         attrs_match = false
768                                                         break
769                                                 }
770                                         }
771                                 }
772                                 if (attrs_match) {
773                                         matches += 1
774                                         if (matches === 3) {
775                                                 afe.splice(i, 1)
776                                                 break
777                                         }
778                                 }
779                         }
780                 }
781                 afe.unshift(new_el)
782         }
783
784         afe_push_marker = function () {
785                 afe.unshift(new_afe_marker())
786         }
787
788         // the functions below impliment the Tree Contstruction algorithm
789         // http://www.w3.org/TR/html5/syntax.html#tree-construction
790
791         // But first... the helpers
792         template_tag_is_open = function () {
793                 var i, el
794                 for (i = 0; i < open_els.length; ++i) {
795                         el = open_els[i]
796                         if (el.name === 'template' && el.namespace === NS_HTML) {
797                                 return true
798                         }
799                 }
800                 return false
801         }
802         is_in_scope_x = function (tag_name, scope, namespace) {
803                 var i, el
804                 for (i = 0; i < open_els.length; ++i) {
805                         el = open_els[i]
806                         if (el.name === tag_name && (namespace === null || namespace === el.namespace)) {
807                                 return true
808                         }
809                         if (scope[el.name] === el.namespace) {
810                                 return false
811                         }
812                 }
813                 return false
814         }
815         is_in_scope_x_y = function (tag_name, scope, scope2, namespace) {
816                 var i, el
817                 for (i = 0; i < open_els.length; ++i) {
818                         el = open_els[i]
819                         if (el.name === tag_name && (namespace === null || namespace === el.namespace)) {
820                                 return true
821                         }
822                         if (scope[el.name] === el.namespace) {
823                                 return false
824                         }
825                         if (scope2[el.name] === el.namespace) {
826                                 return false
827                         }
828                 }
829                 return false
830         }
831         standard_scopers = {
832                 applet: NS_HTML, caption: NS_HTML, html: NS_HTML, table: NS_HTML,
833                 td: NS_HTML, th: NS_HTML, marquee: NS_HTML, object: NS_HTML,
834                 template: NS_HTML,
835
836                 mi: NS_MATHML, mo: NS_MATHML, mn: NS_MATHML, ms: NS_MATHML,
837                 mtext: NS_MATHML, 'annotation-xml': NS_MATHML,
838
839                 foreignObject: NS_SVG, desc: NS_SVG, title: NS_SVG
840         }
841         button_scopers = { button: NS_HTML }
842         li_scopers = { ol: NS_HTML, ul: NS_HTML }
843         table_scopers = { html: NS_HTML, table: NS_HTML, template: NS_HTML }
844         is_in_scope = function (tag_name, namespace) {
845                 if (namespace == null) {
846                         namespace = null
847                 }
848                 return is_in_scope_x(tag_name, standard_scopers, namespace)
849         }
850         is_in_button_scope = function (tag_name, namespace) {
851                 if (namespace == null) {
852                         namespace = null
853                 }
854                 return is_in_scope_x_y(tag_name, standard_scopers, button_scopers, namespace)
855         }
856         is_in_table_scope = function (tag_name, namespace) {
857                 if (namespace == null) {
858                         namespace = null
859                 }
860                 return is_in_scope_x(tag_name, table_scopers, namespace)
861         }
862         // aka is_in_list_item_scope
863         is_in_li_scope = function (tag_name, namespace) {
864                 if (namespace == null) {
865                         namespace = null
866                 }
867                 return is_in_scope_x_y(tag_name, standard_scopers, li_scopers, namespace)
868         }
869         is_in_select_scope = function (tag_name, namespace) {
870                 var i, t
871                 if (namespace == null) {
872                         namespace = null
873                 }
874                 for (i = 0; i < open_els.length; ++i) {
875                         t = open_els[i]
876                         if (t.name === tag_name && (namespace === null || namespace === t.namespace)) {
877                                 return true
878                         }
879                         if (t.namespace !== NS_HTML && t.name !== 'optgroup' && t.name !== 'option') {
880                                 return false
881                         }
882                 }
883                 return false
884         }
885         // this checks for a particular element, not by name
886         // this requires a namespace match
887         el_is_in_scope = function (needle) {
888                 var i
889                 for (i = 0; i < open_els.length; ++i) {
890                         el = open_els[i]
891                         if (el === needle) {
892                                 return true
893                         }
894                         if (standard_scopers[el.name] === el.namespace) {
895                                 return false
896                         }
897                 }
898                 return false
899         }
900
901         clear_to_table_stopers = {
902                 'table': true,
903                 'template': true,
904                 'html': true
905         }
906         clear_stack_to_table_context = function () {
907                 while (true) {
908                         if (clear_to_table_stopers[open_els[0].name] != null) {
909                                 break
910                         }
911                         open_els.shift()
912                 }
913         }
914         clear_to_table_body_stopers = {
915                 tbody: NS_HTML,
916                 tfoot: NS_HTML,
917                 thead: NS_HTML,
918                 template: NS_HTML,
919                 html: NS_HTML
920         }
921         clear_stack_to_table_body_context = function () {
922                 while (true) {
923                         if (clear_to_table_body_stopers[open_els[0].name] === open_els[0].namespace) {
924                                 break
925                         }
926                         open_els.shift()
927                 }
928         }
929         clear_to_table_row_stopers = {
930                 'tr': true,
931                 'template': true,
932                 'html': true
933         }
934         clear_stack_to_table_row_context = function () {
935                 while (true) {
936                         if (clear_to_table_row_stopers[open_els[0].name] != null) {
937                                 break
938                         }
939                         open_els.shift()
940                 }
941         }
942         clear_afe_to_marker = function () {
943                 var el
944                 while (true) {
945                         if (!(afe.length > 0)) { // this happens in fragment case, ?spec error
946                                 return
947                         }
948                         el = afe.shift()
949                         if (el.type === TYPE_AFE_MARKER) {
950                                 return
951                         }
952                 }
953         }
954
955         // 8.2.3.1 ...
956         // http://www.w3.org/TR/html5/syntax.html#reset-the-insertion-mode-appropriately
957         reset_ins_mode = function () {
958                 var ancestor, ancestor_i, last, node, node_i
959                 // 1. Let last be false.
960                 last = false
961                 // 2. Let node be the last node in the stack of open elements.
962                 node_i = 0
963                 node = open_els[node_i]
964                 // 3. Loop: If node is the first node in the stack of open elements,
965                 // then set last to true, and, if the parser was originally created as
966                 // part of the HTML fragment parsing algorithm (fragment case) set node
967                 // to the context element.
968                 while (true) {
969                         if (node_i === open_els.length - 1) {
970                                 last = true
971                                 if (flag_fragment_parsing) {
972                                         node = context_element
973                                 }
974                         }
975                         // 4. If node is a select element, run these substeps:
976                         if (node.name === 'select' && node.namespace === NS_HTML) {
977                                 // 1. If last is true, jump to the step below labeled done.
978                                 if (!last) {
979                                         // 2. Let ancestor be node.
980                                         ancestor_i = node_i
981                                         ancestor = node
982                                         // 3. Loop: If ancestor is the first node in the stack of
983                                         // open elements, jump to the step below labeled done.
984                                         while (true) {
985                                                 if (ancestor_i === open_els.length - 1) {
986                                                         break
987                                                 }
988                                                 // 4. Let ancestor be the node before ancestor in the stack
989                                                 // of open elements.
990                                                 ancestor_i += 1
991                                                 ancestor = open_els[ancestor_i]
992                                                 // 5. If ancestor is a template node, jump to the step below
993                                                 // labeled done.
994                                                 if (ancestor.name === 'template' && ancestor.namespace === NS_HTML) {
995                                                         break
996                                                 }
997                                                 // 6. If ancestor is a table node, switch the insertion mode
998                                                 // to "in select in table" and abort these steps.
999                                                 if (ancestor.name === 'table' && ancestor.namespace === NS_HTML) {
1000                                                         ins_mode = ins_mode_in_select_in_table
1001                                                         return
1002                                                 }
1003                                                 // 7. Jump back to the step labeled loop.
1004                                         }
1005                                 }
1006                                 // 8. Done: Switch the insertion mode to "in select" and abort
1007                                 // these steps.
1008                                 ins_mode = ins_mode_in_select
1009                                 return
1010                         }
1011                         // 5. If node is a td or th element and last is false, then switch
1012                         // the insertion mode to "in cell" and abort these steps.
1013                         if ((node.name === 'td' || node.name === 'th') && node.namespace === NS_HTML && last === false) {
1014                                 ins_mode = ins_mode_in_cell
1015                                 return
1016                         }
1017                         // 6. If node is a tr element, then switch the insertion mode to "in
1018                         // row" and abort these steps.
1019                         if (node.name === 'tr' && node.namespace === NS_HTML) {
1020                                 ins_mode = ins_mode_in_row
1021                                 return
1022                         }
1023                         // 7. If node is a tbody, thead, or tfoot element, then switch the
1024                         // insertion mode to "in table body" and abort these steps.
1025                         if ((node.name === 'tbody' || node.name === 'thead' || node.name === 'tfoot') && node.namespace === NS_HTML) {
1026                                 ins_mode = ins_mode_in_table_body
1027                                 return
1028                         }
1029                         // 8. If node is a caption element, then switch the insertion mode
1030                         // to "in caption" and abort these steps.
1031                         if (node.name === 'caption' && node.namespace === NS_HTML) {
1032                                 ins_mode = ins_mode_in_caption
1033                                 return
1034                         }
1035                         // 9. If node is a colgroup element, then switch the insertion mode
1036                         // to "in column group" and abort these steps.
1037                         if (node.name === 'colgroup' && node.namespace === NS_HTML) {
1038                                 ins_mode = ins_mode_in_column_group
1039                                 return
1040                         }
1041                         // 10. If node is a table element, then switch the insertion mode to
1042                         // "in table" and abort these steps.
1043                         if (node.name === 'table' && node.namespace === NS_HTML) {
1044                                 ins_mode = ins_mode_in_table
1045                                 return
1046                         }
1047                         // 11. If node is a template element, then switch the insertion mode
1048                         // to the current template insertion mode and abort these steps.
1049                         if (node.name === 'template' && node.namespace === NS_HTML) {
1050                                 ins_mode = template_ins_modes[0]
1051                                 return
1052                         }
1053                         // 12. If node is a head element and last is true, then switch the
1054                         // insertion mode to "in body" ("in body"! not "in head"!) and abort
1055                         // these steps. (fragment case)
1056                         if (node.name === 'head' && node.namespace === NS_HTML && last) {
1057                                 ins_mode = ins_mode_in_body
1058                                 return
1059                         }
1060                         // 13. If node is a head element and last is false, then switch the
1061                         // insertion mode to "in head" and abort these steps.
1062                         if (node.name === 'head' && node.namespace === NS_HTML && last === false) {
1063                                 ins_mode = ins_mode_in_head
1064                                 return
1065                         }
1066                         // 14. If node is a body element, then switch the insertion mode to
1067                         // "in body" and abort these steps.
1068                         if (node.name === 'body' && node.namespace === NS_HTML) {
1069                                 ins_mode = ins_mode_in_body
1070                                 return
1071                         }
1072                         // 15. If node is a frameset element, then switch the insertion mode
1073                         // to "in frameset" and abort these steps. (fragment case)
1074                         if (node.name === 'frameset' && node.namespace === NS_HTML) {
1075                                 ins_mode = ins_mode_in_frameset
1076                                 return
1077                         }
1078                         // 16. If node is an html element, run these substeps:
1079                         if (node.name === 'html' && node.namespace === NS_HTML) {
1080                                 // 1. If the head element pointer is null, switch the insertion
1081                                 // mode to "before head" and abort these steps. (fragment case)
1082                                 if (head_element_pointer === null) {
1083                                         ins_mode = ins_mode_before_head
1084                                 } else {
1085                                         // 2. Otherwise, the head element pointer is not null,
1086                                         // switch the insertion mode to "after head" and abort these
1087                                         // steps.
1088                                         ins_mode = ins_mode_after_head
1089                                 }
1090                                 return
1091                         }
1092                         // 17. If last is true, then switch the insertion mode to "in body"
1093                         // and abort these steps. (fragment case)
1094                         if (last) {
1095                                 ins_mode = ins_mode_in_body
1096                                 return
1097                         }
1098                         // 18. Let node now be the node before node in the stack of open
1099                         // elements.
1100                         node_i += 1
1101                         node = open_els[node_i]
1102                         // 19. Return to the step labeled loop.
1103                 }
1104         }
1105
1106         // 8.2.3.2
1107
1108         // http://www.w3.org/TR/html5/syntax.html#adjusted-current-node
1109         adjusted_current_node = function () {
1110                 if (open_els.length === 1 && flag_fragment_parsing) {
1111                         return context_element
1112                 }
1113                 return open_els[0]
1114         }
1115
1116         // http://www.w3.org/TR/html5/syntax.html#reconstruct-the-active-formatting-elements
1117         // this implementation is structured (mostly) as described at the link above.
1118         // capitalized comments are the "labels" described at the link above.
1119         reconstruct_afe = function () {
1120                 var el, i
1121                 if (afe.length === 0) {
1122                         return
1123                 }
1124                 if (afe[0].type === TYPE_AFE_MARKER || open_els.indexOf(afe[0]) >= 0) {
1125                         return
1126                 }
1127                 // Rewind
1128                 i = 0
1129                 while (true) {
1130                         if (i === afe.length - 1) {
1131                                 break
1132                         }
1133                         i += 1
1134                         if (afe[i].type === TYPE_AFE_MARKER || open_els.indexOf(afe[i]) >= 0) {
1135                                 i -= 1 // Advance
1136                                 break
1137                         }
1138                 }
1139                 // Create
1140                 while (true) {
1141                         el = insert_html_element(afe[i].token)
1142                         afe[i] = el
1143                         if (i === 0) {
1144                                 break
1145                         }
1146                         i -= 1 // Advance
1147                 }
1148         }
1149
1150         // http://www.w3.org/TR/html5/syntax.html#adoption-agency-algorithm
1151         // adoption agency algorithm
1152         // overview here:
1153         //   http://www.w3.org/TR/html5/syntax.html#misnested-tags:-b-i-/b-/i
1154         //   http://www.w3.org/TR/html5/syntax.html#misnested-tags:-b-p-/b-/p
1155         //   http://www.w3.org/TR/html5/syntax.html#unclosed-formatting-elements
1156         adoption_agency = function (subject) {
1157                 var aa, ab, ac, ad, ae, af, bookmark, c, ca, dest, el, fb, fb_of_open_els, fe, fe_of_afe, fe_of_open_els, i, in_afe, in_open_els, inner, j, l, last_node, len, len1, len10, len11, len12, len13, len14, len15, len16, len17, len2, len3, len4, len5, len6, len7, len8, len9, m, n, new_node, node, node_above, node_in_afe, node_next, o, outer, q, r, s, t, u, w, y, z
1158 // this block implements tha W3C spec
1159 //              # 1. If the current node is an HTML element whose tag name is subject,
1160 //              # then run these substeps:
1161 //              #
1162 //              # 1. Let element be the current node.
1163 //              #
1164 //              # 2. Pop element off the stack of open elements.
1165 //              #
1166 //              # 3. If element is also in the list of active formatting elements,
1167 //              # remove the element from the list.
1168 //              #
1169 //              # 4. Abort the adoption agency algorithm.
1170 //              if open_els[0].name is subject and open_els[0].namespace is NS_HTML
1171 //                      el = open_els.shift()
1172 //                      # remove it from the list of active formatting elements (if found)
1173 //                      for t, i in afe
1174 //                              if t is el
1175 //                                      afe.splice i, 1
1176 //                                      break
1177 //                      return
1178 // WHATWG: https://html.spec.whatwg.org/multipage/syntax.html#adoption-agency-algorithm
1179                 // If the current node is an HTML element whose tag name is subject, and
1180                 // the current node is not in the list of active formatting elements,
1181                 // then pop the current node off the stack of open elements, and abort
1182                 // these steps.
1183                 if (open_els[0].name === subject && open_els[0].namespace === NS_HTML) {
1184                         // remove it from the list of active formatting elements (if found)
1185                         in_afe = false
1186                         for (i = 0; i < afe.length; ++i) {
1187                                 el = afe[i]
1188                                 if (el === open_els[0]) {
1189                                         in_afe = true
1190                                         break
1191                                 }
1192                         }
1193                         if (!in_afe) {
1194                                 open_els.shift()
1195                                 return
1196                         }
1197                         // fall through
1198                 }
1199 // END WHATWG
1200                 outer = 0
1201                 while (true) {
1202                         if (outer >= 8) {
1203                                 return
1204                         }
1205                         outer += 1
1206                         // 5. Let formatting element be the last element in the list of
1207                         // active formatting elements that: is between the end of the list
1208                         // and the last scope marker in the list, if any, or the start of
1209                         // the list otherwise, and  has the tag name subject.
1210                         fe = null
1211                         for (fe_of_afe = 0; fe_of_afe < afe.length; ++fe_of_afe) {
1212                                 t = afe[fe_of_afe]
1213                                 if (t.type === TYPE_AFE_MARKER) {
1214                                         break
1215                                 }
1216                                 if (t.name === subject) {
1217                                         fe = t
1218                                         break
1219                                 }
1220                         }
1221                         // If there is no such element, then abort these steps and instead
1222                         // act as described in the "any other end tag" entry above.
1223                         if (fe === null) {
1224                                 in_body_any_other_end_tag(subject)
1225                                 return
1226                         }
1227                         // 6. If formatting element is not in the stack of open elements,
1228                         // then this is a parse error; remove the element from the list, and
1229                         // abort these steps.
1230                         in_open_els = false
1231                         for (fe_of_open_els = 0; fe_of_open_els < open_els.length; ++fe_of_open_els) {
1232                                 t = open_els[fe_of_open_els]
1233                                 if (t === fe) {
1234                                         in_open_els = true
1235                                         break
1236                                 }
1237                         }
1238                         if (!in_open_els) {
1239                                 parse_error()
1240                                 // "remove it from the list" must mean afe, since it's not in open_els
1241                                 afe.splice(fe_of_afe, 1)
1242                                 return
1243                         }
1244                         // 7. If formatting element is in the stack of open elements, but
1245                         // the element is not in scope, then this is a parse error; abort
1246                         // these steps.
1247                         if (!el_is_in_scope(fe)) {
1248                                 parse_error()
1249                                 return
1250                         }
1251                         // 8. If formatting element is not the current node, this is a parse
1252                         // error. (But do not abort these steps.)
1253                         if (open_els[0] !== fe) {
1254                                 parse_error()
1255                                 // continue
1256                         }
1257                         // 9. Let furthest block be the topmost node in the stack of open
1258                         // elements that is lower in the stack than formatting element, and
1259                         // is an element in the special category. There might not be one.
1260                         fb = null
1261                         fb_of_open_els = null
1262                         for (i = 0; i < open_els.length; ++i) {
1263                                 t = open_els[i]
1264                                 if (t === fe) {
1265                                         break
1266                                 }
1267                                 if (el_is_special(t)) {
1268                                         fb = t
1269                                         fb_of_open_els = i
1270                                         // and continue, to see if there's one that's more "topmost"
1271                                 }
1272                         }
1273                         // 10. If there is no furthest block, then the UA must first pop all
1274                         // the nodes from the bottom of the stack of open elements, from the
1275                         // current node up to and including formatting element, then remove
1276                         // formatting element from the list of active formatting elements,
1277                         // and finally abort these steps.
1278                         if (fb === null) {
1279                                 while (true) {
1280                                         t = open_els.shift()
1281                                         if (t === fe) {
1282                                                 afe.splice(fe_of_afe, 1)
1283                                                 return
1284                                         }
1285                                 }
1286                         }
1287                         // 11. Let common ancestor be the element immediately above
1288                         // formatting element in the stack of open elements.
1289                         ca = open_els[fe_of_open_els + 1] // common ancestor
1290
1291                         node_above = open_els[fb_of_open_els + 1] // next node if node isn't in open_els anymore
1292                         // 12. Let a bookmark note the position of formatting element in the list of active formatting elements relative to the elements on either side of it in the list.
1293                         bookmark = new_aaa_bookmark()
1294                         for (i = 0; i < afe.length; ++i) {
1295                                 t = afe[i]
1296                                 if (t === fe) {
1297                                         afe.splice(i, 0, bookmark)
1298                                         break
1299                                 }
1300                         }
1301                         node = last_node = fb
1302                         inner = 0
1303                         while (true) {
1304                                 inner += 1
1305                                 // 3. Let node be the element immediately above node in the
1306                                 // stack of open elements, or if node is no longer in the stack
1307                                 // of open elements (e.g. because it got removed by this
1308                                 // algorithm), the element that was immediately above node in
1309                                 // the stack of open elements before node was removed.
1310                                 node_next = null
1311                                 for (i = 0; i < open_els.length; ++i) {
1312                                         t = open_els[i]
1313                                         if (t === node) {
1314                                                 node_next = open_els[i + 1]
1315                                                 break
1316                                         }
1317                                 }
1318                                 node = node_next != null ? node_next : node_above
1319                                 // TODO make sure node_above gets re-set if/when node is removed from open_els
1320
1321                                 // 4. If node is formatting element, then go to the next step in
1322                                 // the overall algorithm.
1323                                 if (node === fe) {
1324                                         break
1325                                 }
1326                                 // 5. If inner loop counter is greater than three and node is in
1327                                 // the list of active formatting elements, then remove node from
1328                                 // the list of active formatting elements.
1329                                 node_in_afe = false
1330                                 for (i = 0; i < afe.length; ++i) {
1331                                         t = afe[i]
1332                                         if (t === node) {
1333                                                 if (inner > 3) {
1334                                                         afe.splice(i, 1)
1335                                                 } else {
1336                                                         node_in_afe = true
1337                                                 }
1338                                                 break
1339                                         }
1340                                 }
1341                                 // 6. If node is not in the list of active formatting elements,
1342                                 // then remove node from the stack of open elements and then go
1343                                 // back to the step labeled inner loop.
1344                                 if (!node_in_afe) {
1345                                         for (i = 0; i < open_els.length; ++i) {
1346                                                 t = open_els[i]
1347                                                 if (t === node) {
1348                                                         node_above = open_els[i + 1]
1349                                                         open_els.splice(i, 1)
1350                                                         break
1351                                                 }
1352                                         }
1353                                         continue
1354                                 }
1355                                 // 7. create an element for the token for which the element node
1356                                 // was created, in the HTML namespace, with common ancestor as
1357                                 // the intended parent; replace the entry for node in the list
1358                                 // of active formatting elements with an entry for the new
1359                                 // element, replace the entry for node in the stack of open
1360                                 // elements with an entry for the new element, and let node be
1361                                 // the new element.
1362                                 new_node = token_to_element(node.token, NS_HTML, ca)
1363                                 for (i = 0; i < afe.length; ++i) {
1364                                         t = afe[i]
1365                                         if (t === node) {
1366                                                 afe[i] = new_node
1367                                                 break
1368                                         }
1369                                 }
1370                                 for (i = 0; i < open_els.length; ++i) {
1371                                         t = open_els[i]
1372                                         if (t === node) {
1373                                                 node_above = open_els[i + 1]
1374                                                 open_els[i] = new_node
1375                                                 break
1376                                         }
1377                                 }
1378                                 node = new_node
1379                                 // 8. If last node is furthest block, then move the
1380                                 // aforementioned bookmark to be immediately after the new node
1381                                 // in the list of active formatting elements.
1382                                 if (last_node === fb) {
1383                                         for (i = 0; i < afe.length; ++i) {
1384                                                 t = afe[i]
1385                                                 if (t === bookmark) {
1386                                                         afe.splice(i, 1)
1387                                                         break
1388                                                 }
1389                                         }
1390                                         for (i = 0; i < afe.length; ++i) {
1391                                                 t = afe[i]
1392                                                 if (t === node) {
1393                                                         // "after" means lower
1394                                                         afe.splice(i, 0, bookmark) // "after as <-
1395                                                         break
1396                                                 }
1397                                         }
1398                                 }
1399                                 // 9. Insert last node into node, first removing it from its
1400                                 // previous parent node if any.
1401                                 if (last_node.parent != null) {
1402                                         for (i = 0; i < last_node.parent.children.length; ++i) {
1403                                                 c = last_node.parent.children[i]
1404                                                 if (c === last_node) {
1405                                                         last_node.parent.children.splice(i, 1)
1406                                                         break
1407                                                 }
1408                                         }
1409                                 }
1410                                 node.children.push(last_node)
1411                                 last_node.parent = node
1412                                 // 10. Let last node be node.
1413                                 last_node = node
1414                                 // 11. Return to the step labeled inner loop.
1415                         }
1416                         // 14. Insert whatever last node ended up being in the previous step
1417                         // at the appropriate place for inserting a node, but using common
1418                         // ancestor as the override target.
1419
1420                         // In the case where fe is immediately followed by fb:
1421                         //   * inner loop exits out early (node==fe)
1422                         //   * last_node is fb
1423                         //   * last_node is still in the tree (not a duplicate)
1424                         if (last_node.parent != null) {
1425                                 for (i = 0; i < last_node.parent.children.length; ++i) {
1426                                         c = last_node.parent.children[i]
1427                                         if (c === last_node) {
1428                                                 last_node.parent.children.splice(i, 1)
1429                                                 break
1430                                         }
1431                                 }
1432                         }
1433                         // can't use standard insert token thing, because it's already in
1434                         // open_els and must stay at it's current position in open_els
1435                         dest = adjusted_insertion_location(ca)
1436                         dest[0].children.splice(dest[1], 0, last_node)
1437                         last_node.parent = dest[0]
1438                         // 15. Create an element for the token for which formatting element
1439                         // was created, in the HTML namespace, with furthest block as the
1440                         // intended parent.
1441                         new_element = token_to_element(fe.token, NS_HTML, fb)
1442                         // 16. Take all of the child nodes of furthest block and append them
1443                         // to the element created in the last step.
1444                         while (fb.children.length) {
1445                                 t = fb.children.shift()
1446                                 t.parent = new_element
1447                                 new_element.children.push(t)
1448                         }
1449                         // 17. Append that new element to furthest block.
1450                         new_element.parent = fb
1451                         fb.children.push(new_element)
1452                         // 18. Remove formatting element from the list of active formatting
1453                         // elements, and insert the new element into the list of active
1454                         // formatting elements at the position of the aforementioned
1455                         // bookmark.
1456                         for (i = 0; i < afe.length; ++i) {
1457                                 t = afe[i]
1458                                 if (t === fe) {
1459                                         afe.splice(i, 1)
1460                                         break
1461                                 }
1462                         }
1463                         for (i = 0; i < afe.length; ++i) {
1464                                 t = afe[i]
1465                                 if (t === bookmark) {
1466                                         afe[i] = new_element
1467                                         break
1468                                 }
1469                         }
1470                         // 19. Remove formatting element from the stack of open elements,
1471                         // and insert the new element into the stack of open elements
1472                         // immediately below the position of furthest block in that stack.
1473                         for (i = 0; i < open_els.length; ++i) {
1474                                 t = open_els[i]
1475                                 if (t === fe) {
1476                                         open_els.splice(i, 1)
1477                                         break
1478                                 }
1479                         }
1480                         for (i = 0; i < open_els.length; ++i) {
1481                                 t = open_els[i]
1482                                 if (t === fb) {
1483                                         open_els.splice(i, 0, new_element)
1484                                         break
1485                                 }
1486                         }
1487                         // 20. Jump back to the step labeled outer loop.
1488                 }
1489         }
1490
1491         // http://www.w3.org/TR/html5/syntax.html#close-a-p-element
1492         close_p_element = function () {
1493                 generate_implied_end_tags('p') // arg is exception
1494                 if (!(open_els[0].name === 'p' && open_els[0].namespace === NS_HTML)) {
1495                         parse_error()
1496                 }
1497                 while (open_els.length > 1) { // just in case
1498                         el = open_els.shift()
1499                         if (el.name === 'p' && el.namespace === NS_HTML) {
1500                                 return
1501                         }
1502                 }
1503         }
1504         close_p_if_in_button_scope = function () {
1505                 if (is_in_button_scope('p', NS_HTML)) {
1506                         close_p_element()
1507                 }
1508         }
1509
1510         // http://www.w3.org/TR/html5/syntax.html#insert-a-character
1511         // aka insert_a_character = function (t) {
1512         insert_character = function (t) {
1513                 var dest, prev
1514                 dest = adjusted_insertion_location()
1515                 // fixfull check for Document node
1516                 if (dest[1] > 0) {
1517                         prev = dest[0].children[dest[1] - 1]
1518                         if (prev.type === TYPE_TEXT) {
1519                                 prev.text += t.text
1520                                 return
1521                         }
1522                 }
1523                 dest[0].children.splice(dest[1], 0, t)
1524                 t.parent = dest[0]
1525         }
1526
1527         // 8.2.5 http://www.w3.org/TR/html5/syntax.html#tree-construction
1528         process_token = function (t) {
1529                 var acn
1530                 acn = adjusted_current_node()
1531                 if (acn == null) {
1532                         ins_mode(t)
1533                         return
1534                 }
1535                 if (acn.namespace === NS_HTML) {
1536                         ins_mode(t)
1537                         return
1538                 }
1539                 if (is_mathml_text_integration_point(acn)) {
1540                         if (t.type === TYPE_START_TAG && !(t.name === 'mglyph' || t.name === 'malignmark')) {
1541                                 ins_mode(t)
1542                                 return
1543                         }
1544                         if (t.type === TYPE_TEXT) {
1545                                 ins_mode(t)
1546                                 return
1547                         }
1548                 }
1549                 if (acn.namespace === NS_MATHML && acn.name === 'annotation-xml' && t.type === TYPE_START_TAG && t.name === 'svg') {
1550                         ins_mode(t)
1551                         return
1552                 }
1553                 if (is_html_integration(acn)) {
1554                         if (t.type === TYPE_START_TAG || t.type === TYPE_TEXT) {
1555                                 ins_mode(t)
1556                                 return
1557                         }
1558                 }
1559                 if (t.type === TYPE_EOF) {
1560                         ins_mode(t)
1561                         return
1562                 }
1563                 in_foreign_content(t)
1564         }
1565
1566         // 8.2.5.1
1567         // http://www.w3.org/TR/html5/syntax.html#creating-and-inserting-nodes
1568         // http://www.w3.org/TR/html5/syntax.html#appropriate-place-for-inserting-a-node
1569         adjusted_insertion_location = function (override_target) {
1570                 var c, el, i, j, l, last_table, last_table_i, last_template, last_template_i, len, len1, len2, m, previous_element, target, target_i
1571                 // 1. If there was an override target specified, then let target be the
1572                 // override target.
1573                 if (override_target != null) {
1574                         target = override_target
1575                 } else { // Otherwise, let target be the current node.
1576                         target = open_els[0]
1577                 }
1578                 // 2. Determine the adjusted insertion location using the first matching
1579                 // steps from the following list:
1580                 //
1581                 // If foster parenting is enabled and target is a table, tbody, tfoot,
1582                 // thead, or tr element Foster parenting happens when content is
1583                 // misnested in tables.
1584                 if (flag_foster_parenting && foster_parenting_targets[target.name] === target.namespace) {
1585                         while (true) { // once. this is here so we can ``break`` to "abort these substeps"
1586                                 // 1. Let last template be the last template element in the
1587                                 // stack of open elements, if any.
1588                                 last_template = null
1589                                 last_template_i = null
1590                                 for (i = 0; i < open_els.length; ++i) {
1591                                         el = open_els[i]
1592                                         if (el.name === 'template' && el.namespace === NS_HTML) {
1593                                                 last_template = el
1594                                                 last_template_i = i
1595                                                 break
1596                                         }
1597                                 }
1598                                 // 2. Let last table be the last table element in the stack of
1599                                 // open elements, if any.
1600                                 last_table = null
1601                                 last_table_i
1602                                 for (i = 0; i < open_els.length; ++i) {
1603                                         el = open_els[i]
1604                                         if (el.name === 'table' && el.namespace === NS_HTML) {
1605                                                 last_table = el
1606                                                 last_table_i = i
1607                                                 break
1608                                         }
1609                                 }
1610                                 // 3. If there is a last template and either there is no last
1611                                 // table, or there is one, but last template is lower (more
1612                                 // recently added) than last table in the stack of open
1613                                 // elements, then: let adjusted insertion location be inside
1614                                 // last template's template contents, after its last child (if
1615                                 // any), and abort these substeps.
1616                                 if (last_template && (last_table === null || last_template_i < last_table_i)) {
1617                                         target = last_template // fixfull should be it's contents
1618                                         target_i = target.children.length
1619                                         break
1620                                 }
1621                                 // 4. If there is no last table, then let adjusted insertion
1622                                 // location be inside the first element in the stack of open
1623                                 // elements (the html element), after its last child (if any),
1624                                 // and abort these substeps. (fragment case)
1625                                 if (last_table === null) {
1626                                         // this is odd
1627                                         target = open_els[open_els.length - 1]
1628                                         target_i = target.children.length
1629                                         break
1630                                 }
1631                                 // 5. If last table has a parent element, then let adjusted
1632                                 // insertion location be inside last table's parent element,
1633                                 // immediately before last table, and abort these substeps.
1634                                 if (last_table.parent != null) {
1635                                         for (i = 0; i < last_table.parent.children.length; ++i) {
1636                                                 c = last_table.parent.children[i]
1637                                                 if (c === last_table) {
1638                                                         target = last_table.parent
1639                                                         target_i = i
1640                                                         break
1641                                                 }
1642                                         }
1643                                         break
1644                                 }
1645                                 // 6. Let previous element be the element immediately above last
1646                                 // table in the stack of open elements.
1647                                 //
1648                                 // huh? how could it not have a parent?
1649                                 previous_element = open_els[last_table_i + 1]
1650                                 // 7. Let adjusted insertion location be inside previous
1651                                 // element, after its last child (if any).
1652                                 target = previous_element
1653                                 target_i = target.children.length
1654                                 // Note: These steps are involved in part because it's possible
1655                                 // for elements, the table element in this case in particular,
1656                                 // to have been moved by a script around in the DOM, or indeed
1657                                 // removed from the DOM entirely, after the element was inserted
1658                                 // by the parser.
1659                                 break // don't really loop
1660                         }
1661                 } else {
1662                         // Otherwise Let adjusted insertion location be inside target, after
1663                         // its last child (if any).
1664                         target_i = target.children.length
1665                 }
1666
1667                 // 3. If the adjusted insertion location is inside a template element,
1668                 // let it instead be inside the template element's template contents,
1669                 // after its last child (if any).
1670                 // fixfull (template)
1671
1672                 // 4. Return the adjusted insertion location.
1673                 return [target, target_i]
1674         }
1675
1676         // http://www.w3.org/TR/html5/syntax.html#create-an-element-for-the-token
1677         // aka create_an_element_for_token
1678         token_to_element = function (t, namespace, intended_parent) {
1679                 var a, attrs, el, i
1680                 // convert attributes into a hash
1681                 attrs = {}
1682                 for (i = 0; i < t.attrs_a.length; ++i) {
1683                         a = t.attrs_a[i]
1684                         attrs[a[0]] = a[1] // TODO check what to do with dupilcate attrs
1685                 }
1686                 el = new Node(TYPE_TAG, {name: t.name, namespace: namespace, attrs: attrs, token: t})
1687
1688                 // TODO 2. If the newly created element has an xmlns attribute in the
1689                 // XMLNS namespace whose value is not exactly the same as the element's
1690                 // namespace, that is a parse error. Similarly, if the newly created
1691                 // element has an xmlns:xlink attribute in the XMLNS namespace whose
1692                 // value is not the XLink Namespace, that is a parse error.
1693
1694                 // fixfull: the spec says stuff about form pointers and ownerDocument
1695
1696                 return el
1697         }
1698
1699         // http://www.w3.org/TR/html5/syntax.html#insert-a-foreign-element
1700         insert_foreign_element = function (token, namespace) {
1701                 var ail, ail_el, ail_i, el
1702                 ail = adjusted_insertion_location()
1703                 ail_el = ail[0]
1704                 ail_i = ail[1]
1705                 el = token_to_element(token, namespace, ail_el)
1706                 // TODO skip this next step if it's broken (eg ail_el is document with child already)
1707                 el.parent = ail_el
1708                 ail_el.children.splice(ail_i, 0, el)
1709                 open_els.unshift(el)
1710                 return el
1711         }
1712         // http://www.w3.org/TR/html5/syntax.html#insert-an-html-element
1713         insert_html_element = function (token) {
1714                 return insert_foreign_element(token, NS_HTML)
1715         }
1716
1717         // http://www.w3.org/TR/html5/syntax.html#insert-a-comment
1718         // position should be [node, index_within_children]
1719         insert_comment = function (t, position) {
1720                 if (position == null) {
1721                         position = adjusted_insertion_location()
1722                 }
1723                 position[0].children.splice(position[1], 0, t)
1724                 return
1725         }
1726
1727         // 8.2.5.2
1728         // http://www.w3.org/TR/html5/syntax.html#generic-raw-text-element-parsing-algorithm
1729         parse_generic_raw_text = function (t) {
1730                 insert_html_element(t)
1731                 tok_state = tok_state_rawtext
1732                 original_ins_mode = ins_mode
1733                 ins_mode = ins_mode_text
1734         }
1735         parse_generic_rcdata_text = function (t) {
1736                 insert_html_element(t)
1737                 tok_state = tok_state_rcdata
1738                 original_ins_mode = ins_mode
1739                 ins_mode = ins_mode_text
1740         }
1741
1742         // 8.2.5.3 http://www.w3.org/TR/html5/syntax.html#closing-elements-that-have-implied-end-tags
1743         // http://www.w3.org/TR/html5/syntax.html#generate-implied-end-tags
1744         generate_implied_end_tags = function (except) {
1745                 if (except == null) {
1746                         except = null
1747                 }
1748                 while (end_tag_implied[open_els[0].name] === open_els[0].namespace && open_els[0].name !== except) {
1749                         open_els.shift()
1750                 }
1751         }
1752
1753         // 8.2.5.4 The rules for parsing tokens in HTML content
1754         // http://www.w3.org/TR/html5/syntax.html#parsing-main-inhtml
1755
1756         // 8.2.5.4.1 The "initial" insertion mode
1757         // http://www.w3.org/TR/html5/syntax.html#the-initial-insertion-mode
1758         is_quirks_yes_doctype = function (t) {
1759                 var i, p, pi
1760                 if (t.flag('force-quirks')) {
1761                         return true
1762                 }
1763                 if (t.name !== 'html') {
1764                         return true
1765                 }
1766                 if (t.public_identifier != null) {
1767                         pi = t.public_identifier.toLowerCase()
1768                         for (i = 0; i < quirks_yes_pi_prefixes.length; ++i) {
1769                                 p = quirks_yes_pi_prefixes[i]
1770                                 if (pi.substr(0, p.length) === p) {
1771                                         return true
1772                                 }
1773                         }
1774                         if (pi === '-//w3o//dtd w3 html strict 3.0//en//' || pi === '-/w3c/dtd html 4.0 transitional/en' || pi === 'html') {
1775                                 return true
1776                         }
1777                 }
1778                 if (t.system_identifier != null) {
1779                         if (t.system_identifier.toLowerCase() === 'http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd') {
1780                                 return true
1781                         }
1782                 } else if (t.public_identifier != null) {
1783                         // already did this: pi = t.public_identifier.toLowerCase()
1784                         if (pi.substr(0, 32) === '-//w3c//dtd html 4.01 frameset//' || pi.substr(0, 36) === '-//w3c//dtd html 4.01 transitional//') {
1785                                 return true
1786                         }
1787                 }
1788                 return false
1789         }
1790         is_quirks_limited_doctype = function (t) {
1791                 var pi
1792                 if (t.public_identifier != null) {
1793                         pi = t.public_identifier.toLowerCase()
1794                         if (pi.substr(0, 32) === '-//w3c//dtd xhtml 1.0 frameset//' || pi.substr(0, 36) === '-//w3c//dtd xhtml 1.0 transitional//') {
1795                                 return true
1796                         }
1797                         if (t.system_identifier != null) {
1798                                 if (pi.substr(0, 32) === '-//w3c//dtd html 4.01 frameset//' || pi.substr(0, 36) === '-//w3c//dtd html 4.01 transitional//') {
1799                                         return true
1800                                 }
1801                         }
1802                 }
1803                 return false
1804         }
1805         ins_mode_initial = function (t) {
1806                 if (is_space_tok(t)) {
1807                         return
1808                 }
1809                 if (t.type === TYPE_COMMENT) {
1810                         // ?fixfull
1811                         doc.children.push(t)
1812                         return
1813                 }
1814                 if (t.type === TYPE_DOCTYPE) {
1815                         // fixfull syntax error from first paragraph and following bullets
1816                         // fixfull set doc.doctype
1817                         // fixfull is the "not an iframe srcdoc" thing relevant?
1818                         if (is_quirks_yes_doctype(t)) {
1819                                 doc.flag('quirks mode', QUIRKS_YES)
1820                         } else if (is_quirks_limited_doctype(t)) {
1821                                 doc.flag('quirks mode', QUIRKS_LIMITED)
1822                         }
1823                         doc.children.push(t)
1824                         ins_mode = ins_mode_before_html
1825                         return
1826                 }
1827                 // Anything else
1828                 // fixfull not iframe srcdoc?
1829                 parse_error()
1830                 doc.flag('quirks mode', QUIRKS_YES)
1831                 ins_mode = ins_mode_before_html
1832                 process_token(t)
1833         }
1834
1835         // 8.2.5.4.2 http://www.w3.org/TR/html5/syntax.html#the-before-html-insertion-mode
1836         ins_mode_before_html = function (t) {
1837                 if (t.type === TYPE_DOCTYPE) {
1838                         parse_error()
1839                         return
1840                 }
1841                 if (t.type === TYPE_COMMENT) {
1842                         doc.children.push(t)
1843                         return
1844                 }
1845                 if (is_space_tok(t)) {
1846                         return
1847                 }
1848                 if (t.type === TYPE_START_TAG && t.name === 'html') {
1849                         el = token_to_element(t, NS_HTML, doc)
1850                         doc.children.push(el)
1851                         el.document = doc
1852                         open_els.unshift(el)
1853                         // fixfull (big paragraph in spec about manifest, fragment, urls, etc)
1854                         ins_mode = ins_mode_before_head
1855                         return
1856                 }
1857                 if (t.type === TYPE_END_TAG) {
1858                         if (t.name === 'head' || t.name === 'body' || t.name === 'html' || t.name === 'br') {
1859                                 // fall through to "anything else"
1860                         } else {
1861                                 parse_error()
1862                                 return
1863                         }
1864                 }
1865                 // Anything else
1866                 el = token_to_element(new_open_tag('html'), NS_HTML, doc)
1867                 doc.children.push(el)
1868                 el.document = doc
1869                 open_els.unshift(el)
1870                 // ?fixfull browsing context
1871                 ins_mode = ins_mode_before_head
1872                 process_token(t)
1873         }
1874
1875         // 8.2.5.4.3 http://www.w3.org/TR/html5/syntax.html#the-before-head-insertion-mode
1876         ins_mode_before_head = function (t) {
1877                 var el
1878                 if (is_space_tok(t)) {
1879                         return
1880                 }
1881                 if (t.type === TYPE_COMMENT) {
1882                         insert_comment(t)
1883                         return
1884                 }
1885                 if (t.type === TYPE_DOCTYPE) {
1886                         parse_error()
1887                         return
1888                 }
1889                 if (t.type === TYPE_START_TAG && t.name === 'html') {
1890                         ins_mode_in_body(t)
1891                         return
1892                 }
1893                 if (t.type === TYPE_START_TAG && t.name === 'head') {
1894                         el = insert_html_element(t)
1895                         head_element_pointer = el
1896                         ins_mode = ins_mode_in_head
1897                         return
1898                 }
1899                 if (t.type === TYPE_END_TAG) {
1900                         if (t.name === 'head' || t.name === 'body' || t.name === 'html' || t.name === 'br') {
1901                                 // fall through to Anything else below
1902                         } else {
1903                                 parse_error()
1904                                 return
1905                         }
1906                 }
1907                 // Anything else
1908                 el = insert_html_element(new_open_tag('head'))
1909                 head_element_pointer = el
1910                 ins_mode = ins_mode_in_head
1911                 process_token(t)
1912         }
1913
1914         // 8.2.5.4.4 http://www.w3.org/TR/html5/syntax.html#parsing-main-inhead
1915         ins_mode_in_head_else = function (t) { // factored out for same-as-spec flow control
1916                 open_els.shift() // spec says this will be a 'head' node
1917                 ins_mode = ins_mode_after_head
1918                 process_token(t)
1919         }
1920         ins_mode_in_head = function (t) {
1921                 var ail, el
1922                 if (t.type === TYPE_TEXT && (t.text === "\t" || t.text === "\n" || t.text === "\u000c" || t.text === ' ')) {
1923                         insert_character(t)
1924                         return
1925                 }
1926                 if (t.type === TYPE_COMMENT) {
1927                         insert_comment(t)
1928                         return
1929                 }
1930                 if (t.type === TYPE_DOCTYPE) {
1931                         parse_error()
1932                         return
1933                 }
1934                 if (t.type === TYPE_START_TAG && t.name === 'html') {
1935                         ins_mode_in_body(t)
1936                         return
1937                 }
1938                 if (t.type === TYPE_START_TAG && (t.name === 'base' || t.name === 'basefont' || t.name === 'bgsound' || t.name === 'link')) {
1939                         el = insert_html_element(t)
1940                         open_els.shift()
1941                         t.acknowledge_self_closing()
1942                         return
1943                 }
1944                 if (t.type === TYPE_START_TAG && t.name === 'meta') {
1945                         el = insert_html_element(t)
1946                         open_els.shift()
1947                         t.acknowledge_self_closing()
1948                         // fixfull encoding stuff
1949                         return
1950                 }
1951                 if (t.type === TYPE_START_TAG && t.name === 'title') {
1952                         parse_generic_rcdata_text(t)
1953                         return
1954                 }
1955                 if (t.type === TYPE_START_TAG && ((t.name === 'noscript' && flag_scripting) || t.name === 'noframes' || t.name === 'style')) {
1956                         parse_generic_raw_text(t)
1957                         return
1958                 }
1959                 if (t.type === TYPE_START_TAG && t.name === 'noscript' && flag_scripting === false) {
1960                         insert_html_element(t)
1961                         ins_mode = ins_mode_in_head_noscript
1962                         return
1963                 }
1964                 if (t.type === TYPE_START_TAG && t.name === 'script') {
1965                         ail = adjusted_insertion_location()
1966                         el = token_to_element(t, NS_HTML, ail)
1967                         el.flag('parser-inserted', true)
1968                         // fixfull frament case
1969                         ail[0].children.splice(ail[1], 0, el)
1970                         open_els.unshift(el)
1971                         tok_state = tok_state_script_data
1972                         original_ins_mode = ins_mode // make sure orig... is defined
1973                         ins_mode = ins_mode_text
1974                         return
1975                 }
1976                 if (t.type === TYPE_END_TAG && t.name === 'head') {
1977                         open_els.shift() // will be a head element... spec says so
1978                         ins_mode = ins_mode_after_head
1979                         return
1980                 }
1981                 if (t.type === TYPE_END_TAG && (t.name === 'body' || t.name === 'html' || t.name === 'br')) {
1982                         ins_mode_in_head_else(t)
1983                         return
1984                 }
1985                 if (t.type === TYPE_START_TAG && t.name === 'template') {
1986                         insert_html_element(t)
1987                         afe_push_marker()
1988                         flag_frameset_ok = false
1989                         ins_mode = ins_mode_in_template
1990                         template_ins_modes.unshift(ins_mode_in_template)
1991                         return
1992                 }
1993                 if (t.type === TYPE_END_TAG && t.name === 'template') {
1994                         if (template_tag_is_open()) {
1995                                 generate_implied_end_tags
1996                                 if (open_els[0].name !== 'template') {
1997                                         parse_error()
1998                                 }
1999                                 while (true) {
2000                                         el = open_els.shift()
2001                                         if (el.name === 'template' && el.namespace === NS_HTML) {
2002                                                 break
2003                                         }
2004                                 }
2005                                 clear_afe_to_marker()
2006                                 template_ins_modes.shift()
2007                                 reset_ins_mode()
2008                         } else {
2009                                 parse_error()
2010                         }
2011                         return
2012                 }
2013                 if ((t.type === TYPE_START_TAG && t.name === 'head') || t.type === TYPE_END_TAG) {
2014                         parse_error()
2015                         return
2016                 }
2017                 ins_mode_in_head_else(t)
2018         }
2019
2020         // 8.2.5.4.5 http://www.w3.org/TR/html5/syntax.html#parsing-main-inheadnoscript
2021         ins_mode_in_head_noscript_else = function (t) {
2022                 parse_error()
2023                 open_els.shift()
2024                 ins_mode = ins_mode_in_head
2025                 process_token(t)
2026         }
2027         ins_mode_in_head_noscript = function (t) {
2028                 if (t.type === TYPE_DOCTYPE) {
2029                         parse_error()
2030                         return
2031                 }
2032                 if (t.type === TYPE_START_TAG && t.name === 'html') {
2033                         ins_mode_in_body(t)
2034                         return
2035                 }
2036                 if (t.type === TYPE_END_TAG && t.name === 'noscript') {
2037                         open_els.shift()
2038                         ins_mode = ins_mode_in_head
2039                         return
2040                 }
2041                 if (is_space_tok(t) || t.type === TYPE_COMMENT || (t.type === TYPE_START_TAG && (t.name === 'basefont' || t.name === 'bgsound' || t.name === 'link' || t.name === 'meta' || t.name === 'noframes' || t.name === 'style'))) {
2042                         ins_mode_in_head(t)
2043                         return
2044                 }
2045                 if (t.type === TYPE_END_TAG && t.name === 'br') {
2046                         ins_mode_in_head_noscript_else(t)
2047                         return
2048                 }
2049                 if ((t.type === TYPE_START_TAG && (t.name === 'head' || t.name === 'noscript')) || t.type === TYPE_END_TAG) {
2050                         parse_error()
2051                         return
2052                 }
2053                 // Anything else
2054                 ins_mode_in_head_noscript_else(t)
2055         }
2056
2057         // 8.2.5.4.6 http://www.w3.org/TR/html5/syntax.html#the-after-head-insertion-mode
2058         ins_mode_after_head_else = function (t) {
2059                 var body_tok
2060                 body_tok = new_open_tag('body')
2061                 insert_html_element(body_tok)
2062                 ins_mode = ins_mode_in_body
2063                 process_token(t)
2064         }
2065         ins_mode_after_head = function (t) {
2066                 var el, i, j, len
2067                 if (is_space_tok(t)) {
2068                         insert_character(t)
2069                         return
2070                 }
2071                 if (t.type === TYPE_COMMENT) {
2072                         insert_comment(t)
2073                         return
2074                 }
2075                 if (t.type === TYPE_DOCTYPE) {
2076                         parse_error()
2077                         return
2078                 }
2079                 if (t.type === TYPE_START_TAG && t.name === 'html') {
2080                         ins_mode_in_body(t)
2081                         return
2082                 }
2083                 if (t.type === TYPE_START_TAG && t.name === 'body') {
2084                         insert_html_element(t)
2085                         flag_frameset_ok = false
2086                         ins_mode = ins_mode_in_body
2087                         return
2088                 }
2089                 if (t.type === TYPE_START_TAG && t.name === 'frameset') {
2090                         insert_html_element(t)
2091                         ins_mode = ins_mode_in_frameset
2092                         return
2093                 }
2094                 if (t.type === TYPE_START_TAG && (t.name === 'base' || t.name === 'basefont' || t.name === 'bgsound' || t.name === 'link' || t.name === 'meta' || t.name === 'noframes' || t.name === 'script' || t.name === 'style' || t.name === 'template' || t.name === 'title')) {
2095                         parse_error()
2096                         open_els.unshift(head_element_pointer)
2097                         ins_mode_in_head(t)
2098                         for (i = 0; i < open_els.length; ++i) {
2099                                 el = open_els[i]
2100                                 if (el === head_element_pointer) {
2101                                         open_els.splice(i, 1)
2102                                         return
2103                                 }
2104                         }
2105                         return
2106                 }
2107                 if (t.type === TYPE_END_TAG && t.name === 'template') {
2108                         ins_mode_in_head(t)
2109                         return
2110                 }
2111                 if (t.type === TYPE_END_TAG && (t.name === 'body' || t.name === 'html' || t.name === 'br')) {
2112                         ins_mode_after_head_else(t)
2113                         return
2114                 }
2115                 if ((t.type === TYPE_START_TAG && t.name === 'head') || t.type === TYPE_END_TAG) {
2116                         parse_error()
2117                         return
2118                 }
2119                 // Anything else
2120                 ins_mode_after_head_else(t)
2121         }
2122
2123         // 8.2.5.4.7 http://www.w3.org/TR/html5/syntax.html#parsing-main-inbody
2124         in_body_any_other_end_tag = function (name) { // factored out because adoption agency calls it
2125                 var el, i, node
2126                 node = open_els[0]
2127                 while (true) {
2128                         if (node.name === name && node.namespace === NS_HTML) {
2129                                 generate_implied_end_tags(name) // arg is exception
2130                                 if (node !== open_els[0]) {
2131                                         parse_error()
2132                                 }
2133                                 while (true) {
2134                                         el = open_els.shift()
2135                                         if (el === node) {
2136                                                 return
2137                                         }
2138                                 }
2139                         }
2140                         if (special_elements[node.name] === node.namespace) {
2141                                 parse_error()
2142                                 return
2143                         }
2144                         for (i = 0; i < open_els.length; ++i) {
2145                                 el = open_els[i]
2146                                 if (node === el) {
2147                                         node = open_els[i + 1]
2148                                         break
2149                                 }
2150                         }
2151                 }
2152         }
2153         ins_mode_in_body = function (t) {
2154                 var a, aa, ab, ac, el, found, h_in_scope, i, input_el, j, l, len, len1, len10, len11, len12, len13, len14, len2, len3, len4, len5, len6, len7, len8, len9, m, n, node, o, ok_tags, prompt, q, r, root_attrs, s, second, second_i, u, w, y, z
2155                 if (t.type === TYPE_TEXT && t.text === "\u0000") {
2156                         parse_error()
2157                         return
2158                 }
2159                 if (is_space_tok(t)) {
2160                         reconstruct_afe()
2161                         insert_character(t)
2162                         return
2163                 }
2164                 if (t.type === TYPE_TEXT) {
2165                         reconstruct_afe()
2166                         insert_character(t)
2167                         flag_frameset_ok = false
2168                         return
2169                 }
2170                 if (t.type === TYPE_COMMENT) {
2171                         insert_comment(t)
2172                         return
2173                 }
2174                 if (t.type === TYPE_DOCTYPE) {
2175                         parse_error()
2176                         return
2177                 }
2178                 if (t.type === TYPE_START_TAG && t.name === 'html') {
2179                         parse_error()
2180                         if (template_tag_is_open()) {
2181                                 return
2182                         }
2183                         root_attrs = open_els[open_els.length - 1].attrs
2184                         for (i = 0; i < t.attrs_a.length; ++i) {
2185                                 a = t.attrs_a[i]
2186                                 if (root_attrs[a[0]] == null) {
2187                                         root_attrs[a[0]] = a[1]
2188                                 }
2189                         }
2190                         return
2191                 }
2192
2193                 if ((t.type === TYPE_START_TAG && (t.name === 'base' || t.name === 'basefont' || t.name === 'bgsound' || t.name === 'link' || t.name === 'meta' || t.name === 'noframes' || t.name === 'script' || t.name === 'style' || t.name === 'template' || t.name === 'title')) || (t.type === TYPE_END_TAG && t.name === 'template')) {
2194                         ins_mode_in_head(t)
2195                         return
2196                 }
2197                 if (t.type === TYPE_START_TAG && t.name === 'body') {
2198                         parse_error()
2199                         if (open_els.length < 2) {
2200                                 return
2201                         }
2202                         second = open_els[open_els.length - 2]
2203                         if (second.namespace !== NS_HTML) {
2204                                 return
2205                         }
2206                         if (second.name !== 'body') {
2207                                 return
2208                         }
2209                         if (template_tag_is_open()) {
2210                                 return
2211                         }
2212                         flag_frameset_ok = false
2213                         for (i = 0; i < t.attrs_a.length; ++i) {
2214                                 a = t.attrs_a[i]
2215                                 if (second.attrs[a[0]] == null) {
2216                                         second.attrs[a[0]] = a[1]
2217                                 }
2218                         }
2219                         return
2220                 }
2221                 if (t.type === TYPE_START_TAG && t.name === 'frameset') {
2222                         parse_error()
2223                         if (open_els.length < 2) {
2224                                 return
2225                         }
2226                         second_i = open_els.length - 2
2227                         second = open_els[second_i]
2228                         if (second.namespace !== NS_HTML) {
2229                                 return
2230                         }
2231                         if (second.name !== 'body') {
2232                                 return
2233                         }
2234                         if (flag_frameset_ok === false) {
2235                                 return
2236                         }
2237                         if (second.parent != null) {
2238                                 for (i = 0; i < second.parent.children.length; ++i) {
2239                                         el = second.parent.children[i]
2240                                         if (el === second) {
2241                                                 second.parent.children.splice(i, 1)
2242                                                 break
2243                                         }
2244                                 }
2245                         }
2246                         open_els.splice(second_i, 1)
2247                         // pop everything except the "root html element"
2248                         while (open_els.length > 1) {
2249                                 open_els.shift()
2250                         }
2251                         insert_html_element(t)
2252                         ins_mode = ins_mode_in_frameset
2253                         return
2254                 }
2255                 if (t.type === TYPE_EOF) {
2256                         ok_tags = {
2257                                 dd: NS_HTML, dt: NS_HTML, li: NS_HTML, p: NS_HTML, tbody: NS_HTML,
2258                                 td: NS_HTML, tfoot: NS_HTML, th: NS_HTML, thead: NS_HTML,
2259                                 tr: NS_HTML, body: NS_HTML, html: NS_HTML
2260                         }
2261                         for (i = 0; i < open_els.length; ++i) {
2262                                 el = open_els[i]
2263                                 if (ok_tags[t.name] !== el.namespace) {
2264                                         parse_error()
2265                                         break
2266                                 }
2267                         }
2268                         if (template_ins_modes.length > 0) {
2269                                 ins_mode_in_template(t)
2270                         } else {
2271                                 stop_parsing()
2272                         }
2273                         return
2274                 }
2275                 if (t.type === TYPE_END_TAG && t.name === 'body') {
2276                         if (!is_in_scope('body', NS_HTML)) {
2277                                 parse_error()
2278                                 return
2279                         }
2280                         ok_tags = {
2281                                 dd: NS_HTML, dt: NS_HTML, li: NS_HTML, optgroup: NS_HTML,
2282                                 option: NS_HTML, p: NS_HTML, rb: NS_HTML, rp: NS_HTML, rt: NS_HTML,
2283                                 rtc: NS_HTML, tbody: NS_HTML, td: NS_HTML, tfoot: NS_HTML,
2284                                 th: NS_HTML, thead: NS_HTML, tr: NS_HTML, body: NS_HTML,
2285                                 html: NS_HTML
2286                         }
2287                         for (i = 0; i < open_els.length; ++i) {
2288                                 el = open_els[i]
2289                                 if (ok_tags[t.name] !== el.namespace) {
2290                                         parse_error()
2291                                         break
2292                                 }
2293                         }
2294                         ins_mode = ins_mode_after_body
2295                         return
2296                 }
2297                 if (t.type === TYPE_END_TAG && t.name === 'html') {
2298                         if (!is_in_scope('body', NS_HTML)) {
2299                                 parse_error()
2300                                 return
2301                         }
2302                         ok_tags = {
2303                                 dd: NS_HTML, dt: NS_HTML, li: NS_HTML, optgroup: NS_HTML,
2304                                 option: NS_HTML, p: NS_HTML, rb: NS_HTML, rp: NS_HTML, rt: NS_HTML,
2305                                 rtc: NS_HTML, tbody: NS_HTML, td: NS_HTML, tfoot: NS_HTML,
2306                                 th: NS_HTML, thead: NS_HTML, tr: NS_HTML, body: NS_HTML,
2307                                 html: NS_HTML
2308                         }
2309                         for (i = 0; i < open_els.length; ++i) {
2310                                 el = open_els[i]
2311                                 if (ok_tags[t.name] !== el.namespace) {
2312                                         parse_error()
2313                                         break
2314                                 }
2315                         }
2316                         ins_mode = ins_mode_after_body
2317                         process_token(t)
2318                         return
2319                 }
2320                 if (t.type === TYPE_START_TAG && (t.name === 'address' || t.name === 'article' || t.name === 'aside' || t.name === 'blockquote' || t.name === 'center' || t.name === 'details' || t.name === 'dialog' || t.name === 'dir' || t.name === 'div' || t.name === 'dl' || t.name === 'fieldset' || t.name === 'figcaption' || t.name === 'figure' || t.name === 'footer' || t.name === 'header' || t.name === 'hgroup' || t.name === 'main' || t.name === 'nav' || t.name === 'ol' || t.name === 'p' || t.name === 'section' || t.name === 'summary' || t.name === 'ul')) {
2321                         close_p_if_in_button_scope()
2322                         insert_html_element(t)
2323                         return
2324                 }
2325                 if (t.type === TYPE_START_TAG && (h_tags[t.name] != null)) { // extra perens because Coffeescript did that
2326                         close_p_if_in_button_scope()
2327                         if (h_tags[open_els[0].name] === open_els[0].namespace) {
2328                                 parse_error()
2329                                 open_els.shift()
2330                         }
2331                         insert_html_element(t)
2332                         return
2333                 }
2334                 if (t.type === TYPE_START_TAG && (t.name === 'pre' || t.name === 'listing')) {
2335                         close_p_if_in_button_scope()
2336                         insert_html_element(t)
2337                         eat_next_token_if_newline()
2338                         flag_frameset_ok = false
2339                         return
2340                 }
2341                 if (t.type === TYPE_START_TAG && t.name === 'form') {
2342                         if (!(form_element_pointer === null || template_tag_is_open())) {
2343                                 parse_error()
2344                                 return
2345                         }
2346                         close_p_if_in_button_scope()
2347                         el = insert_html_element(t)
2348                         if (!template_tag_is_open()) {
2349                                 form_element_pointer = el
2350                         }
2351                         return
2352                 }
2353                 if (t.type === TYPE_START_TAG && t.name === 'li') {
2354                         flag_frameset_ok = false
2355                         for (i = 0; i < open_els.length; ++i) {
2356                                 node = open_els[i]
2357                                 if (node.name === 'li' && node.namespace === NS_HTML) {
2358                                         generate_implied_end_tags('li') // arg is exception
2359                                         if (open_els[0].name !== 'li' || open_els[0].namespace !== NS_HTML) {
2360                                                 parse_error()
2361                                         }
2362                                         while (true) {
2363                                                 el = open_els.shift()
2364                                                 if (el.name === 'li' && el.namespace === NS_HTML) {
2365                                                         break
2366                                                 }
2367                                         }
2368                                         break
2369                                 }
2370                                 if (el_is_special_not_adp(node)) {
2371                                         break
2372                                 }
2373                         }
2374                         close_p_if_in_button_scope()
2375                         insert_html_element(t)
2376                         return
2377                 }
2378                 if (t.type === TYPE_START_TAG && (t.name === 'dd' || t.name === 'dt')) {
2379                         flag_frameset_ok = false
2380                         for (i = 0; i < open_els.length; ++i) {
2381                                 node = open_els[i]
2382                                 if (node.name === 'dd' && node.namespace === NS_HTML) {
2383                                         generate_implied_end_tags('dd') // arg is exception
2384                                         if (open_els[0].name !== 'dd' || open_els[0].namespace !== NS_HTML) {
2385                                                 parse_error()
2386                                         }
2387                                         while (true) {
2388                                                 el = open_els.shift()
2389                                                 if (el.name === 'dd' && el.namespace === NS_HTML) {
2390                                                         break
2391                                                 }
2392                                         }
2393                                         break
2394                                 }
2395                                 if (node.name === 'dt' && node.namespace === NS_HTML) {
2396                                         generate_implied_end_tags('dt') // arg is exception
2397                                         if (open_els[0].name !== 'dt' || open_els[0].namespace !== NS_HTML) {
2398                                                 parse_error()
2399                                         }
2400                                         while (true) {
2401                                                 el = open_els.shift()
2402                                                 if (el.name === 'dt' && el.namespace === NS_HTML) {
2403                                                         break
2404                                                 }
2405                                         }
2406                                         break
2407                                 }
2408                                 if (el_is_special_not_adp(node)) {
2409                                         break
2410                                 }
2411                         }
2412                         close_p_if_in_button_scope()
2413                         insert_html_element(t)
2414                         return
2415                 }
2416                 if (t.type === TYPE_START_TAG && t.name === 'plaintext') {
2417                         close_p_if_in_button_scope()
2418                         insert_html_element(t)
2419                         tok_state = tok_state_plaintext
2420                         return
2421                 }
2422                 if (t.type === TYPE_START_TAG && t.name === 'button') {
2423                         if (is_in_scope('button', NS_HTML)) {
2424                                 parse_error()
2425                                 generate_implied_end_tags()
2426                                 while (true) {
2427                                         el = open_els.shift()
2428                                         if (el.name === 'button' && el.namespace === NS_HTML) {
2429                                                 break
2430                                         }
2431                                 }
2432                         }
2433                         reconstruct_afe()
2434                         insert_html_element(t)
2435                         flag_frameset_ok = false
2436                         return
2437                 }
2438                 if (t.type === TYPE_END_TAG && (t.name === 'address' || t.name === 'article' || t.name === 'aside' || t.name === 'blockquote' || t.name === 'button' || t.name === 'center' || t.name === 'details' || t.name === 'dialog' || t.name === 'dir' || t.name === 'div' || t.name === 'dl' || t.name === 'fieldset' || t.name === 'figcaption' || t.name === 'figure' || t.name === 'footer' || t.name === 'header' || t.name === 'hgroup' || t.name === 'listing' || t.name === 'main' || t.name === 'nav' || t.name === 'ol' || t.name === 'pre' || t.name === 'section' || t.name === 'summary' || t.name === 'ul')) {
2439                         if (!is_in_scope(t.name, NS_HTML)) {
2440                                 parse_error()
2441                                 return
2442                         }
2443                         generate_implied_end_tags()
2444                         if (!(open_els[0].name === t.name && open_els[0].namespace === NS_HTML)) {
2445                                 parse_error()
2446                         }
2447                         while (true) {
2448                                 el = open_els.shift()
2449                                 if (el.name === t.name && el.namespace === NS_HTML) {
2450                                         return
2451                                 }
2452                         }
2453                         return
2454                 }
2455                 if (t.type === TYPE_END_TAG && t.name === 'form') {
2456                         if (!template_tag_is_open()) {
2457                                 node = form_element_pointer
2458                                 form_element_pointer = null
2459                                 if (node === null || !el_is_in_scope(node)) {
2460                                         parse_error()
2461                                         return
2462                                 }
2463                                 generate_implied_end_tags()
2464                                 if (open_els[0] !== node) {
2465                                         parse_error()
2466                                 }
2467                                 for (i = 0; i < open_els.length; ++i) {
2468                                         el = open_els[i]
2469                                         if (el === node) {
2470                                                 open_els.splice(i, 1)
2471                                                 break
2472                                         }
2473                                 }
2474                         } else {
2475                                 if (!is_in_scope('form', NS_HTML)) {
2476                                         parse_error()
2477                                         return
2478                                 }
2479                                 generate_implied_end_tags()
2480                                 if (open_els[0].name !== 'form' || open_els[0].namespace !== NS_HTML) {
2481                                         parse_error()
2482                                 }
2483                                 while (true) {
2484                                         el = open_els.shift()
2485                                         if (el.name === 'form' && el.namespace === NS_HTML) {
2486                                                 break
2487                                         }
2488                                 }
2489                         }
2490                         return
2491                 }
2492                 if (t.type === TYPE_END_TAG && t.name === 'p') {
2493                         if (!is_in_button_scope('p', NS_HTML)) {
2494                                 parse_error()
2495                                 insert_html_element(new_open_tag('p'))
2496                         }
2497                         close_p_element()
2498                         return
2499                 }
2500                 if (t.type === TYPE_END_TAG && t.name === 'li') {
2501                         if (!is_in_li_scope('li', NS_HTML)) {
2502                                 parse_error()
2503                                 return
2504                         }
2505                         generate_implied_end_tags('li') // arg is exception
2506                         if (open_els[0].name !== 'li' || open_els[0].namespace !== NS_HTML) {
2507                                 parse_error()
2508                         }
2509                         while (true) {
2510                                 el = open_els.shift()
2511                                 if (el.name === 'li' && el.namespace === NS_HTML) {
2512                                         break
2513                                 }
2514                         }
2515                         return
2516                 }
2517                 if (t.type === TYPE_END_TAG && (t.name === 'dd' || t.name === 'dt')) {
2518                         if (!is_in_scope(t.name, NS_HTML)) {
2519                                 parse_error()
2520                                 return
2521                         }
2522                         generate_implied_end_tags(t.name) // arg is exception
2523                         if (open_els[0].name !== t.name || open_els[0].namespace !== NS_HTML) {
2524                                 parse_error()
2525                         }
2526                         while (true) {
2527                                 el = open_els.shift()
2528                                 if (el.name === t.name && el.namespace === NS_HTML) {
2529                                         break
2530                                 }
2531                         }
2532                         return
2533                 }
2534                 if (t.type === TYPE_END_TAG && (h_tags[t.name] != null)) { // extra perens because Coffeescript did
2535                         h_in_scope = false
2536                         for (i = 0; i < open_els.length; ++i) {
2537                                 el = open_els[i]
2538                                 if (h_tags[el.name] === el.namespace) {
2539                                         h_in_scope = true
2540                                         break
2541                                 }
2542                                 if (standard_scopers[el.name] === el.namespace) {
2543                                         break
2544                                 }
2545                         }
2546                         if (!h_in_scope) {
2547                                 parse_error()
2548                                 return
2549                         }
2550                         generate_implied_end_tags()
2551                         if (open_els[0].name !== t.name || open_els[0].namespace !== NS_HTML) {
2552                                 parse_error()
2553                         }
2554                         while (true) {
2555                                 el = open_els.shift()
2556                                 if (h_tags[el.name] === el.namespace) {
2557                                         break
2558                                 }
2559                         }
2560                         return
2561                 }
2562                 // deep breath!
2563                 if (t.type === TYPE_START_TAG && t.name === 'a') {
2564                         // If the list of active formatting elements contains an a element
2565                         // between the end of the list and the last marker on the list (or
2566                         // the start of the list if there is no marker on the list), then
2567                         // this is a parse error; run the adoption agency algorithm for the
2568                         // tag name "a", then remove that element from the list of active
2569                         // formatting elements and the stack of open elements if the
2570                         // adoption agency algorithm didn't already remove it (it might not
2571                         // have if the element is not in table scope).
2572                         found = false
2573                         for (i = 0; i < afe.length; ++i) {
2574                                 el = afe[i]
2575                                 if (el.type === TYPE_AFE_MARKER) {
2576                                         break
2577                                 }
2578                                 if (el.name === 'a' && el.namespace === NS_HTML) {
2579                                         found = el
2580                                 }
2581                         }
2582                         if (found != null) {
2583                                 parse_error()
2584                                 adoption_agency('a')
2585                                 for (i = 0; i < afe.length; ++i) {
2586                                         el = afe[i]
2587                                         if (el === found) {
2588                                                 afe.splice(i, 1)
2589                                         }
2590                                 }
2591                                 for (i = 0; i < open_els.length; ++i) {
2592                                         el = open_els[i]
2593                                         if (el === found) {
2594                                                 open_els.splice(i, 1)
2595                                         }
2596                                 }
2597                         }
2598                         reconstruct_afe()
2599                         el = insert_html_element(t)
2600                         afe_push(el)
2601                         return
2602                 }
2603                 if (t.type === TYPE_START_TAG && (t.name === 'b' || t.name === 'big' || t.name === 'code' || t.name === 'em' || t.name === 'font' || t.name === 'i' || t.name === 's' || t.name === 'small' || t.name === 'strike' || t.name === 'strong' || t.name === 'tt' || t.name === 'u')) {
2604                         reconstruct_afe()
2605                         el = insert_html_element(t)
2606                         afe_push(el)
2607                         return
2608                 }
2609                 if (t.type === TYPE_START_TAG && t.name === 'nobr') {
2610                         reconstruct_afe()
2611                         if (is_in_scope('nobr', NS_HTML)) {
2612                                 parse_error()
2613                                 adoption_agency('nobr')
2614                                 reconstruct_afe()
2615                         }
2616                         el = insert_html_element(t)
2617                         afe_push(el)
2618                         return
2619                 }
2620                 if (t.type === TYPE_END_TAG && (t.name === 'a' || t.name === 'b' || t.name === 'big' || t.name === 'code' || t.name === 'em' || t.name === 'font' || t.name === 'i' || t.name === 'nobr' || t.name === 's' || t.name === 'small' || t.name === 'strike' || t.name === 'strong' || t.name === 'tt' || t.name === 'u')) {
2621                         adoption_agency(t.name)
2622                         return
2623                 }
2624                 if (t.type === TYPE_START_TAG && (t.name === 'applet' || t.name === 'marquee' || t.name === 'object')) {
2625                         reconstruct_afe()
2626                         insert_html_element(t)
2627                         afe_push_marker()
2628                         flag_frameset_ok = false
2629                         return
2630                 }
2631                 if (t.type === TYPE_END_TAG && (t.name === 'applet' || t.name === 'marquee' || t.name === 'object')) {
2632                         if (!is_in_scope(t.name, NS_HTML)) {
2633                                 parse_error()
2634                                 return
2635                         }
2636                         generate_implied_end_tags()
2637                         if (open_els[0].name !== t.name || open_els[0].namespace !== NS_HTML) {
2638                                 parse_error()
2639                         }
2640                         while (true) {
2641                                 el = open_els.shift()
2642                                 if (el.name === t.name && el.namespace === NS_HTML) {
2643                                         break
2644                                 }
2645                         }
2646                         clear_afe_to_marker()
2647                         return
2648                 }
2649                 if (t.type === TYPE_START_TAG && t.name === 'table') {
2650                         if (doc.flag('quirks mode') !== QUIRKS_YES) {
2651                                 close_p_if_in_button_scope() // test
2652                         }
2653                         insert_html_element(t)
2654                         flag_frameset_ok = false
2655                         ins_mode = ins_mode_in_table
2656                         return
2657                 }
2658                 if (t.type === TYPE_END_TAG && t.name === 'br') {
2659                         parse_error()
2660                         // W3C: t.type = TYPE_START_TAG
2661                         t = new_open_tag('br') // WHATWG
2662                         // fall through
2663                 }
2664                 if (t.type === TYPE_START_TAG && (t.name === 'area' || t.name === 'br' || t.name === 'embed' || t.name === 'img' || t.name === 'keygen' || t.name === 'wbr')) {
2665                         reconstruct_afe()
2666                         insert_html_element(t)
2667                         open_els.shift()
2668                         t.acknowledge_self_closing()
2669                         flag_frameset_ok = false
2670                         return
2671                 }
2672                 if (t.type === TYPE_START_TAG && t.name === 'input') {
2673                         reconstruct_afe()
2674                         insert_html_element(t)
2675                         open_els.shift()
2676                         t.acknowledge_self_closing()
2677                         if (!is_input_hidden_tok(t)) {
2678                                 flag_frameset_ok = false
2679                         }
2680                         return
2681                 }
2682                 if (t.type === TYPE_START_TAG && (t.name === 'menuitem' || t.name === 'param' || t.name === 'source' || t.name === 'track')) {
2683                         // WHATWG adds 'menuitem' for this block
2684                         insert_html_element(t)
2685                         open_els.shift()
2686                         t.acknowledge_self_closing()
2687                         return
2688                 }
2689                 if (t.type === TYPE_START_TAG && t.name === 'hr') {
2690                         close_p_if_in_button_scope()
2691                         insert_html_element(t)
2692                         open_els.shift()
2693                         t.acknowledge_self_closing()
2694                         flag_frameset_ok = false
2695                         return
2696                 }
2697                 if (t.type === TYPE_START_TAG && t.name === 'image') {
2698                         parse_error()
2699                         t.name = 'img'
2700                         process_token(t)
2701                         return
2702                 }
2703                 if (t.type === TYPE_START_TAG && t.name === 'isindex') {
2704                         parse_error()
2705                         if (template_tag_is_open() === false && form_element_pointer !== null) {
2706                                 return
2707                         }
2708                         t.acknowledge_self_closing()
2709                         flag_frameset_ok = false
2710                         close_p_if_in_button_scope()
2711                         el = insert_html_element(new_open_tag('form'))
2712                         if (!template_tag_is_open()) {
2713                                 form_element_pointer = el
2714                         }
2715                         for (i = 0; i < t.attrs_a.length; ++i) {
2716                                 a = t.attrs_a[i]
2717                                 if (a[0] === 'action') {
2718                                         el.attrs['action'] = a[1]
2719                                         break
2720                                 }
2721                         }
2722                         insert_html_element(new_open_tag('hr'))
2723                         open_els.shift()
2724                         reconstruct_afe()
2725                         insert_html_element(new_open_tag('label'))
2726                         // note: this is a little out-of-spec-order so we only have to scan t.attrs_a once
2727                         input_el = new_open_tag('input')
2728                         prompt = null
2729                         for (i = 0; i < t.attrs_a.length; ++i) {
2730                                 a = t.attrs_a[i]
2731                                 if (a[0] === 'prompt') {
2732                                         prompt = a[1]
2733                                 }
2734                                 if (a[0] !== 'name' && a[0] !== 'action' && a[0] !== 'prompt') {
2735                                         input_el.attrs_a.push([a[0], a[1]])
2736                                 }
2737                         }
2738                         input_el.attrs_a.push(['name', 'isindex'])
2739                         // fixfull this next bit is in english... internationalize?
2740                         if (prompt == null) {
2741                                 prompt = "This is a searchable index. Enter search keywords: "
2742                         }
2743                         insert_character(new_character_token(prompt)) // fixfull split
2744                         // TODO submit typo "balue" in spec
2745                         insert_html_element(input_el)
2746                         open_els.shift()
2747                         // insert_character('') // you can put chars here if prompt attr missing
2748                         open_els.shift()
2749                         insert_html_element(new_open_tag('hr'))
2750                         open_els.shift()
2751                         open_els.shift()
2752                         if (!template_tag_is_open()) {
2753                                 form_element_pointer = null
2754                         }
2755                         return
2756                 }
2757                 if (t.type === TYPE_START_TAG && t.name === 'textarea') {
2758                         insert_html_element(t)
2759                         eat_next_token_if_newline()
2760                         tok_state = tok_state_rcdata
2761                         original_ins_mode = ins_mode
2762                         flag_frameset_ok = false
2763                         ins_mode = ins_mode_text
2764                         return
2765                 }
2766                 if (t.type === TYPE_START_TAG && t.name === 'xmp') {
2767                         close_p_if_in_button_scope()
2768                         reconstruct_afe()
2769                         flag_frameset_ok = false
2770                         parse_generic_raw_text(t)
2771                         return
2772                 }
2773                 if (t.type === TYPE_START_TAG && t.name === 'iframe') {
2774                         flag_frameset_ok = false
2775                         parse_generic_raw_text(t)
2776                         return
2777                 }
2778                 if (t.type === TYPE_START_TAG && (t.name === 'noembed' || (t.name === 'noscript' && flag_scripting))) {
2779                         parse_generic_raw_text(t)
2780                         return
2781                 }
2782                 if (t.type === TYPE_START_TAG && t.name === 'select') {
2783                         reconstruct_afe()
2784                         insert_html_element(t)
2785                         flag_frameset_ok = false
2786                         if (ins_mode === ins_mode_in_table || ins_mode === ins_mode_in_caption || ins_mode === ins_mode_in_table_body || ins_mode === ins_mode_in_row || ins_mode === ins_mode_in_cell) {
2787                                 ins_mode = ins_mode_in_select_in_table
2788                         } else {
2789                                 ins_mode = ins_mode_in_select
2790                         }
2791                         return
2792                 }
2793                 if (t.type === TYPE_START_TAG && (t.name === 'optgroup' || t.name === 'option')) {
2794                         if (open_els[0].name === 'option' && open_els[0].namespace === NS_HTML) {
2795                                 open_els.shift()
2796                         }
2797                         reconstruct_afe()
2798                         insert_html_element(t)
2799                         return
2800                 }
2801 // this comment block implements the W3C spec
2802 //              if t.type === TYPE_START_TAG && (t.name === 'rb' || t.name === 'rp' || t.name === 'rtc')
2803 //                      if is_in_scope 'ruby', NS_HTML
2804 //                              generate_implied_end_tags()
2805 //                              unless open_els[0].name === 'ruby' && open_els[0].namespace === NS_HTML
2806 //                                      parse_error()
2807 //                      insert_html_element t
2808 //                      return
2809 //              if t.type === TYPE_START_TAG && t.name === 'rt'
2810 //                      if is_in_scope 'ruby', NS_HTML
2811 //                              generate_implied_end_tags 'rtc' // arg === exception
2812 //                              unless (open_els[0].name === 'ruby' || open_els[0].name === 'rtc') && open_els[0].namespace === NS_HTML
2813 //                                      parse_error()
2814 //                      insert_html_element t
2815 //                      return
2816 // below implements the WHATWG spec https://html.spec.whatwg.org/multipage/syntax.html#parsing-main-inbody
2817                 if (t.type === TYPE_START_TAG && (t.name === 'rb' || t.name === 'rtc')) {
2818                         if (is_in_scope('ruby', NS_HTML)) {
2819                                 generate_implied_end_tags()
2820                                 if (!(open_els[0].name === 'ruby' && open_els[0].namespace === NS_HTML)) {
2821                                         parse_error()
2822                                 }
2823                         }
2824                         insert_html_element(t)
2825                         return
2826                 }
2827                 if (t.type === TYPE_START_TAG && (t.name === 'rp' || t.name === 'rt')) {
2828                         if (is_in_scope('ruby', NS_HTML)) {
2829                                 generate_implied_end_tags('rtc')
2830                                 if (!((open_els[0].name === 'ruby' || open_els[0].name === 'rtc') && open_els[0].namespace === NS_HTML)) {
2831                                         parse_error()
2832                                 }
2833                         }
2834                         insert_html_element(t)
2835                         return
2836                 }
2837 // end WHATWG chunk
2838                 if (t.type === TYPE_START_TAG && t.name === 'math') {
2839                         reconstruct_afe()
2840                         adjust_mathml_attributes(t)
2841                         adjust_foreign_attributes(t)
2842                         insert_foreign_element(t, NS_MATHML)
2843                         if (t.flag('self-closing')) {
2844                                 open_els.shift()
2845                                 t.acknowledge_self_closing()
2846                         }
2847                         return
2848                 }
2849                 if (t.type === TYPE_START_TAG && t.name === 'svg') {
2850                         reconstruct_afe()
2851                         adjust_svg_attributes(t)
2852                         adjust_foreign_attributes(t)
2853                         insert_foreign_element(t, NS_SVG)
2854                         if (t.flag('self-closing')) {
2855                                 open_els.shift()
2856                                 t.acknowledge_self_closing()
2857                         }
2858                         return
2859                 }
2860                 if (t.type === TYPE_START_TAG && (t.name === 'caption' || t.name === 'col' || t.name === 'colgroup' || t.name === 'frame' || t.name === 'head' || t.name === 'tbody' || t.name === 'td' || t.name === 'tfoot' || t.name === 'th' || t.name === 'thead' || t.name === 'tr')) {
2861                         parse_error()
2862                         return
2863                 }
2864                 if (t.type === TYPE_START_TAG) { // any other start tag
2865                         reconstruct_afe()
2866                         insert_html_element(t)
2867                         return
2868                 }
2869                 if (t.type === TYPE_END_TAG) { // any other end tag
2870                         in_body_any_other_end_tag(t.name)
2871                         return
2872                 }
2873         }
2874
2875         // 8.2.5.4.8 http://www.w3.org/TR/html5/syntax.html#parsing-main-incdata
2876         ins_mode_text = function (t) {
2877                 if (t.type === TYPE_TEXT) {
2878                         insert_character(t)
2879                         return
2880                 }
2881                 if (t.type === TYPE_EOF) {
2882                         parse_error()
2883                         if (open_els[0].name === 'script' && open_els[0].namespace === NS_HTML) {
2884                                 open_els[0].flag('already started', true)
2885                         }
2886                         open_els.shift()
2887                         ins_mode = original_ins_mode
2888                         process_token(t)
2889                         return
2890                 }
2891                 if (t.type === TYPE_END_TAG && t.name === 'script') {
2892                         open_els.shift()
2893                         ins_mode = original_ins_mode
2894                         // fixfull the spec seems to assume that I'm going to run the script
2895                         // http://www.w3.org/TR/html5/syntax.html#scriptEndTag
2896                         return
2897                 }
2898                 if (t.type === TYPE_END_TAG) {
2899                         open_els.shift()
2900                         ins_mode = original_ins_mode
2901                         return
2902                 }
2903         }
2904
2905         // the functions below implement the tokenizer stats described here:
2906         // http://www.w3.org/TR/html5/syntax.html#tokenization
2907
2908         // 8.2.5.4.9 http://www.w3.org/TR/html5/syntax.html#parsing-main-intable
2909         ins_mode_in_table_else = function (t) {
2910                 parse_error()
2911                 flag_foster_parenting = true
2912                 ins_mode_in_body(t)
2913                 flag_foster_parenting = false
2914         }
2915         ins_mode_in_table = function (t) {
2916                 var el
2917                 switch (t.type) {
2918                         case TYPE_TEXT:
2919                                 if ((open_els[0].name === 'table' || open_els[0].name === 'tbody' || open_els[0].name === 'tfoot' || open_els[0].name === 'thead' || open_els[0].name === 'tr') && open_els[0].namespace === NS_HTML) {
2920                                         pending_table_character_tokens = []
2921                                         original_ins_mode = ins_mode
2922                                         ins_mode = ins_mode_in_table_text
2923                                         process_token(t)
2924                                 } else {
2925                                         ins_mode_in_table_else(t)
2926                                 }
2927                         break
2928                         case TYPE_COMMENT:
2929                                 insert_comment(t)
2930                         break
2931                         case TYPE_DOCTYPE:
2932                                 parse_error()
2933                         break
2934                         case TYPE_START_TAG:
2935                                 switch (t.name) {
2936                                         case 'caption':
2937                                                 clear_stack_to_table_context()
2938                                                 afe_push_marker()
2939                                                 insert_html_element(t)
2940                                                 ins_mode = ins_mode_in_caption
2941                                         break
2942                                         case 'colgroup':
2943                                                 clear_stack_to_table_context()
2944                                                 insert_html_element(t)
2945                                                 ins_mode = ins_mode_in_column_group
2946                                         break
2947                                         case 'col':
2948                                                 clear_stack_to_table_context()
2949                                                 insert_html_element(new_open_tag('colgroup'))
2950                                                 ins_mode = ins_mode_in_column_group
2951                                                 process_token(t)
2952                                         break
2953                                         case 'tbody':
2954                                         case 'tfoot':
2955                                         case 'thead':
2956                                                 clear_stack_to_table_context()
2957                                                 insert_html_element(t)
2958                                                 ins_mode = ins_mode_in_table_body
2959                                         break
2960                                         case 'td':
2961                                         case 'th':
2962                                         case 'tr':
2963                                                 clear_stack_to_table_context()
2964                                                 insert_html_element(new_open_tag('tbody'))
2965                                                 ins_mode = ins_mode_in_table_body
2966                                                 process_token(t)
2967                                         break
2968                                         case 'table':
2969                                                 parse_error()
2970                                                 if (is_in_table_scope('table', NS_HTML)) {
2971                                                         while (true) {
2972                                                                 el = open_els.shift()
2973                                                                 if (el.name === 'table' && el.namespace === NS_HTML) {
2974                                                                         break
2975                                                                 }
2976                                                         }
2977                                                         reset_ins_mode()
2978                                                         process_token(t)
2979                                                 }
2980                                         break
2981                                         case 'style':
2982                                         case 'script':
2983                                         case 'template':
2984                                                 ins_mode_in_head(t)
2985                                         break
2986                                         case 'input':
2987                                                 if (!is_input_hidden_tok(t)) {
2988                                                         ins_mode_in_table_else(t)
2989                                                 } else {
2990                                                         parse_error()
2991                                                         el = insert_html_element(t)
2992                                                         open_els.shift()
2993                                                         t.acknowledge_self_closing()
2994                                                 }
2995                                         break
2996                                         case 'form':
2997                                                 parse_error()
2998                                                 if (form_element_pointer != null) {
2999                                                         return
3000                                                 }
3001                                                 if (template_tag_is_open()) {
3002                                                         return
3003                                                 }
3004                                                 form_element_pointer = insert_html_element(t)
3005                                                 open_els.shift()
3006                                         break
3007                                         default:
3008                                                 ins_mode_in_table_else(t)
3009                                 }
3010                         break
3011                         case TYPE_END_TAG:
3012                                 switch (t.name) {
3013                                         case 'table':
3014                                                 if (is_in_table_scope('table', NS_HTML)) {
3015                                                         while (true) {
3016                                                                 el = open_els.shift()
3017                                                                 if (el.name === 'table' && el.namespace === NS_HTML) {
3018                                                                         break
3019                                                                 }
3020                                                         }
3021                                                         reset_ins_mode()
3022                                                 } else {
3023                                                         parse_error()
3024                                                 }
3025                                         break
3026                                         case 'body':
3027                                         case 'caption':
3028                                         case 'col':
3029                                         case 'colgroup':
3030                                         case 'html':
3031                                         case 'tbody':
3032                                         case 'td':
3033                                         case 'tfoot':
3034                                         case 'th':
3035                                         case 'thead':
3036                                         case 'tr':
3037                                                 parse_error()
3038                                         break
3039                                         case 'template':
3040                                                 ins_mode_in_head(t)
3041                                         break
3042                                         default:
3043                                                 ins_mode_in_table_else(t)
3044                                 }
3045                         break
3046                         case TYPE_EOF:
3047                                 ins_mode_in_body(t)
3048                         break
3049                         default:
3050                                 ins_mode_in_table_else(t)
3051                 }
3052         }
3053
3054         // 8.2.5.4.10 http://www.w3.org/TR/html5/syntax.html#parsing-main-intabletext
3055         ins_mode_in_table_text = function (t) {
3056                 var all_space, i, l, m, old
3057                 if (t.type === TYPE_TEXT && t.text === "\u0000") {
3058                         // from javascript?
3059                         parse_error()
3060                         return
3061                 }
3062                 if (t.type === TYPE_TEXT) {
3063                         pending_table_character_tokens.push(t)
3064                         return
3065                 }
3066                 // Anything else
3067                 all_space = true
3068                 for (i = 0; i < pending_table_character_tokens.length; ++i) {
3069                         old = pending_table_character_tokens[i]
3070                         if (!is_space_tok(old)) {
3071                                 all_space = false
3072                                 break
3073                         }
3074                 }
3075                 if (all_space) {
3076                         for (i = 0; i < pending_table_character_tokens.length; ++i) {
3077                                 old = pending_table_character_tokens[i]
3078                                 insert_character(old)
3079                         }
3080                 } else {
3081                         for (i = 0; i < pending_table_character_tokens.length; ++i) {
3082                                 old = pending_table_character_tokens[i]
3083                                 ins_mode_in_table_else(old)
3084                         }
3085                 }
3086                 pending_table_character_tokens = []
3087                 ins_mode = original_ins_mode
3088                 process_token(t)
3089         }
3090
3091         // 8.2.5.4.11 http://www.w3.org/TR/html5/syntax.html#parsing-main-incaption
3092         ins_mode_in_caption = function (t) {
3093                 var el
3094                 if (t.type === TYPE_END_TAG && t.name === 'caption') {
3095                         if (is_in_table_scope('caption', NS_HTML)) {
3096                                 generate_implied_end_tags()
3097                                 if (open_els[0].name !== 'caption') {
3098                                         parse_error()
3099                                 }
3100                                 while (true) {
3101                                         el = open_els.shift()
3102                                         if (el.name === 'caption' && el.namespace === NS_HTML) {
3103                                                 break
3104                                         }
3105                                 }
3106                                 clear_afe_to_marker()
3107                                 ins_mode = ins_mode_in_table
3108                         } else {
3109                                 parse_error()
3110                                 // fragment case
3111                         }
3112                         return
3113                 }
3114                 if ((t.type === TYPE_START_TAG && (t.name === 'caption' || t.name === 'col' || t.name === 'colgroup' || t.name === 'tbody' || t.name === 'td' || t.name === 'tfoot' || t.name === 'th' || t.name === 'thead' || t.name === 'tr')) || t.type === TYPE_END_TAG && t.name === 'table') {
3115                         parse_error()
3116                         if (is_in_table_scope('caption', NS_HTML)) {
3117                                 while (true) {
3118                                         el = open_els.shift()
3119                                         if (el.name === 'caption' && el.namespace === NS_HTML) {
3120                                                 break
3121                                         }
3122                                 }
3123                                 clear_afe_to_marker()
3124                                 ins_mode = ins_mode_in_table
3125                                 process_token(t)
3126                         }
3127                         // else fragment case
3128                         return
3129                 }
3130                 if (t.type === TYPE_END_TAG && (t.name === 'body' || t.name === 'col' || t.name === 'colgroup' || t.name === 'html' || t.name === 'tbody' || t.name === 'td' || t.name === 'tfoot' || t.name === 'th' || t.name === 'thead' || t.name === 'tr')) {
3131                         parse_error()
3132                         return
3133                 }
3134                 // Anything else
3135                 ins_mode_in_body(t)
3136         }
3137
3138         // 8.2.5.4.12 http://www.w3.org/TR/html5/syntax.html#parsing-main-incolgroup
3139         ins_mode_in_column_group = function (t) {
3140                 var el
3141                 if (is_space_tok(t)) {
3142                         insert_character(t)
3143                         return
3144                 }
3145                 if (t.type === TYPE_COMMENT) {
3146                         insert_comment(t)
3147                         return
3148                 }
3149                 if (t.type === TYPE_DOCTYPE) {
3150                         parse_error()
3151                         return
3152                 }
3153                 if (t.type === TYPE_START_TAG && t.name === 'html') {
3154                         ins_mode_in_body(t)
3155                         return
3156                 }
3157                 if (t.type === TYPE_START_TAG && t.name === 'col') {
3158                         el = insert_html_element(t)
3159                         open_els.shift()
3160                         t.acknowledge_self_closing()
3161                         return
3162                 }
3163                 if (t.type === TYPE_END_TAG && t.name === 'colgroup') {
3164                         if (open_els[0].name === 'colgroup' && open_els.namespace === NS_HTML) {
3165                                 open_els.shift()
3166                                 ins_mode = ins_mode_in_table
3167                         } else {
3168                                 parse_error()
3169                         }
3170                         return
3171                 }
3172                 if (t.type === TYPE_END_TAG && t.name === 'col') {
3173                         parse_error()
3174                         return
3175                 }
3176                 if ((t.type === TYPE_START_TAG || t.type === TYPE_END_TAG) && t.name === 'template') {
3177                         ins_mode_in_head(t)
3178                         return
3179                 }
3180                 if (t.type === TYPE_EOF) {
3181                         ins_mode_in_body(t)
3182                         return
3183                 }
3184                 // Anything else
3185                 if (open_els[0].name !== 'colgroup') {
3186                         parse_error()
3187                         return
3188                 }
3189                 open_els.shift()
3190                 ins_mode = ins_mode_in_table
3191                 process_token(t)
3192         }
3193
3194         // 8.2.5.4.13 http://www.w3.org/TR/html5/syntax.html#parsing-main-intbody
3195         ins_mode_in_table_body = function (t) {
3196                 var el, has, i
3197                 if (t.type === TYPE_START_TAG && t.name === 'tr') {
3198                         clear_stack_to_table_body_context()
3199                         insert_html_element(t)
3200                         ins_mode = ins_mode_in_row
3201                         return
3202                 }
3203                 if (t.type === TYPE_START_TAG && (t.name === 'th' || t.name === 'td')) {
3204                         parse_error()
3205                         clear_stack_to_table_body_context()
3206                         insert_html_element(new_open_tag('tr'))
3207                         ins_mode = ins_mode_in_row
3208                         process_token(t)
3209                         return
3210                 }
3211                 if (t.type === TYPE_END_TAG && (t.name === 'tbody' || t.name === 'tfoot' || t.name === 'thead')) {
3212                         if (!is_in_table_scope(t.name, NS_HTML)) {
3213                                 parse_error()
3214                                 return
3215                         }
3216                         clear_stack_to_table_body_context()
3217                         open_els.shift()
3218                         ins_mode = ins_mode_in_table
3219                         return
3220                 }
3221                 if ((t.type === TYPE_START_TAG && (t.name === 'caption' || t.name === 'col' || t.name === 'colgroup' || t.name === 'tbody' || t.name === 'tfoot' || t.name === 'thead')) || (t.type === TYPE_END_TAG && t.name === 'table')) {
3222                         has = false
3223                         for (i = 0; i < open_els.length; ++i) {
3224                                 el = open_els[i]
3225                                 if (el.namespace === NS_HTML && (el.name === 'tbody' || el.name === 'tfoot' || el.name === 'thead')) {
3226                                         has = true
3227                                         break
3228                                 }
3229                                 if (table_scopers[el.name] === el.namespace) {
3230                                         break
3231                                 }
3232                         }
3233                         if (!has) {
3234                                 parse_error()
3235                                 return
3236                         }
3237                         clear_stack_to_table_body_context()
3238                         open_els.shift()
3239                         ins_mode = ins_mode_in_table
3240                         process_token(t)
3241                         return
3242                 }
3243                 if (t.type === TYPE_END_TAG && (t.name === 'body' || t.name === 'caption' || t.name === 'col' || t.name === 'colgroup' || t.name === 'html' || t.name === 'td' || t.name === 'th' || t.name === 'tr')) {
3244                         parse_error()
3245                         return
3246                 }
3247                 // Anything else
3248                 ins_mode_in_table(t)
3249         }
3250
3251         // 8.2.5.4.14 http://www.w3.org/TR/html5/syntax.html#parsing-main-intr
3252         ins_mode_in_row = function (t) {
3253                 if (t.type === TYPE_START_TAG && (t.name === 'th' || t.name === 'td')) {
3254                         clear_stack_to_table_row_context()
3255                         insert_html_element(t)
3256                         ins_mode = ins_mode_in_cell
3257                         afe_push_marker()
3258                         return
3259                 }
3260                 if (t.type === TYPE_END_TAG && t.name === 'tr') {
3261                         if (is_in_table_scope('tr', NS_HTML)) {
3262                                 clear_stack_to_table_row_context()
3263                                 open_els.shift()
3264                                 ins_mode = ins_mode_in_table_body
3265                         } else {
3266                                 parse_error()
3267                         }
3268                         return
3269                 }
3270                 if ((t.type === TYPE_START_TAG && (t.name === 'caption' || t.name === 'col' || t.name === 'colgroup' || t.name === 'tbody' || t.name === 'tfoot' || t.name === 'thead' || t.name === 'tr')) || t.type === TYPE_END_TAG && t.name === 'table') {
3271                         if (is_in_table_scope('tr', NS_HTML)) {
3272                                 clear_stack_to_table_row_context()
3273                                 open_els.shift()
3274                                 ins_mode = ins_mode_in_table_body
3275                                 process_token(t)
3276                         } else {
3277                                 parse_error()
3278                         }
3279                         return
3280                 }
3281                 if (t.type === TYPE_END_TAG && (t.name === 'tbody' || t.name === 'tfoot' || t.name === 'thead')) {
3282                         if (is_in_table_scope(t.name, NS_HTML)) {
3283                                 if (is_in_table_scope('tr', NS_HTML)) {
3284                                         clear_stack_to_table_row_context()
3285                                         open_els.shift()
3286                                         ins_mode = ins_mode_in_table_body
3287                                         process_token(t)
3288                                 }
3289                         } else {
3290                                 parse_error()
3291                         }
3292                         return
3293                 }
3294                 if (t.type === TYPE_END_TAG && (t.name === 'body' || t.name === 'caption' || t.name === 'col' || t.name === 'colgroup' || t.name === 'html' || t.name === 'td' || t.name === 'th')) {
3295                         parse_error()
3296                         return
3297                 }
3298                 // Anything else
3299                 ins_mode_in_table(t)
3300         }
3301
3302         // http://www.w3.org/TR/html5/syntax.html#close-the-cell
3303         close_the_cell = function () {
3304                 var el
3305                 generate_implied_end_tags()
3306                 if (!((open_els[0].name === 'td' || open_els[0] === 'th') && open_els[0].namespace === NS_HTML)) {
3307                         parse_error()
3308                 }
3309                 while (true) {
3310                         el = open_els.shift()
3311                         if (el.namespace === NS_HTML && (el.name === 'td' || el.name === 'th')) {
3312                                 break
3313                         }
3314                 }
3315                 clear_afe_to_marker()
3316                 ins_mode = ins_mode_in_row
3317         }
3318
3319         // 8.2.5.4.15 http://www.w3.org/TR/html5/syntax.html#parsing-main-intd
3320         ins_mode_in_cell = function (t) {
3321                 var el, has, i
3322                 if (t.type === TYPE_END_TAG && (t.name === 'td' || t.name === 'th')) {
3323                         if (is_in_table_scope(t.name, NS_HTML)) {
3324                                 generate_implied_end_tags()
3325                                 if (!((open_els[0].name === t.name) && open_els[0].namespace === NS_HTML)) {
3326                                         parse_error()
3327                                 }
3328                                 while (true) {
3329                                         el = open_els.shift()
3330                                         if (el.name === t.name && el.namespace === NS_HTML) {
3331                                                 break
3332                                         }
3333                                 }
3334                                 clear_afe_to_marker()
3335                                 ins_mode = ins_mode_in_row
3336                         } else {
3337                                 parse_error()
3338                         }
3339                         return
3340                 }
3341                 if (t.type === TYPE_START_TAG && (t.name === 'caption' || t.name === 'col' || t.name === 'colgroup' || t.name === 'tbody' || t.name === 'td' || t.name === 'tfoot' || t.name === 'th' || t.name === 'thead' || t.name === 'tr')) {
3342                         has = false
3343                         for (i = 0; i < open_els.length; ++i) {
3344                                 el = open_els[i]
3345                                 if (el.namespace === NS_HTML && (el.name === 'td' || el.name === 'th')) {
3346                                         has = true
3347                                         break
3348                                 }
3349                                 if (table_scopers[el.name] === el.namespace) {
3350                                         break
3351                                 }
3352                         }
3353                         if (!has) {
3354                                 parse_error()
3355                                 return
3356                         }
3357                         close_the_cell()
3358                         process_token(t)
3359                         return
3360                 }
3361                 if (t.type === TYPE_END_TAG && (t.name === 'body' || t.name === 'caption' || t.name === 'col' || t.name === 'colgroup' || t.name === 'html')) {
3362                         parse_error()
3363                         return
3364                 }
3365                 if (t.type === TYPE_END_TAG && (t.name === 'table' || t.name === 'tbody' || t.name === 'tfoot' || t.name === 'thead' || t.name === 'tr')) {
3366                         if (is_in_table_scope(t.name, NS_HTML)) {
3367                                 close_the_cell()
3368                                 process_token(t)
3369                         } else {
3370                                 parse_error()
3371                         }
3372                         return
3373                 }
3374                 // Anything Else
3375                 ins_mode_in_body(t)
3376         }
3377
3378         // 8.2.5.4.16 http://www.w3.org/TR/html5/syntax.html#parsing-main-inselect
3379         ins_mode_in_select = function (t) {
3380                 var el
3381                 if (t.type === TYPE_TEXT && t.text === "\u0000") {
3382                         parse_error()
3383                         return
3384                 }
3385                 if (t.type === TYPE_TEXT) {
3386                         insert_character(t)
3387                         return
3388                 }
3389                 if (t.type === TYPE_COMMENT) {
3390                         insert_comment(t)
3391                         return
3392                 }
3393                 if (t.type === TYPE_DOCTYPE) {
3394                         parse_error()
3395                         return
3396                 }
3397                 if (t.type === TYPE_START_TAG && t.name === 'html') {
3398                         ins_mode_in_body(t)
3399                         return
3400                 }
3401                 if (t.type === TYPE_START_TAG && t.name === 'option') {
3402                         if (open_els[0].name === 'option' && open_els[0].namespace === NS_HTML) {
3403                                 open_els.shift()
3404                         }
3405                         insert_html_element(t)
3406                         return
3407                 }
3408                 if (t.type === TYPE_START_TAG && t.name === 'optgroup') {
3409                         if (open_els[0].name === 'option' && open_els[0].namespace === NS_HTML) {
3410                                 open_els.shift()
3411                         }
3412                         if (open_els[0].name === 'optgroup' && open_els[0].namespace === NS_HTML) {
3413                                 open_els.shift()
3414                         }
3415                         insert_html_element(t)
3416                         return
3417                 }
3418                 if (t.type === TYPE_END_TAG && t.name === 'optgroup') {
3419                         if (open_els[0].name === 'option' && open_els[0].namespace === NS_HTML) {
3420                                 if (open_els[1].name === 'optgroup' && open_els[0].namespace === NS_HTML) {
3421                                         open_els.shift()
3422                                 }
3423                         }
3424                         if (open_els[0].name === 'optgroup' && open_els[0].namespace === NS_HTML) {
3425                                 open_els.shift()
3426                         } else {
3427                                 parse_error()
3428                         }
3429                         return
3430                 }
3431                 if (t.type === TYPE_END_TAG && t.name === 'option') {
3432                         if (open_els[0].name === 'option' && open_els[0].namespace === NS_HTML) {
3433                                 open_els.shift()
3434                         } else {
3435                                 parse_error()
3436                         }
3437                         return
3438                 }
3439                 if (t.type === TYPE_END_TAG && t.name === 'select') {
3440                         if (is_in_select_scope('select', NS_HTML)) {
3441                                 while (true) {
3442                                         el = open_els.shift()
3443                                         if (el.name === 'select' && el.namespace === NS_HTML) {
3444                                                 break
3445                                         }
3446                                 }
3447                                 reset_ins_mode()
3448                         } else {
3449                                 parse_error()
3450                         }
3451                         return
3452                 }
3453                 if (t.type === TYPE_START_TAG && t.name === 'select') {
3454                         parse_error()
3455                         while (true) {
3456                                 el = open_els.shift()
3457                                 if (el.name === 'select' && el.namespace === NS_HTML) {
3458                                         break
3459                                 }
3460                         }
3461                         reset_ins_mode()
3462                         // spec says that this is the same as </select> but it doesn't say
3463                         // to check scope first
3464                         return
3465                 }
3466                 if (t.type === TYPE_START_TAG && (t.name === 'input' || t.name === 'keygen' || t.name === 'textarea')) {
3467                         parse_error()
3468                         if (!is_in_select_scope('select', NS_HTML)) {
3469                                 return
3470                         }
3471                         while (true) {
3472                                 el = open_els.shift()
3473                                 if (el.name === 'select' && el.namespace === NS_HTML) {
3474                                         break
3475                                 }
3476                         }
3477                         reset_ins_mode()
3478                         process_token(t)
3479                         return
3480                 }
3481                 if (t.type === TYPE_START_TAG && (t.name === 'script' || t.name === 'template')) {
3482                         ins_mode_in_head(t)
3483                         return
3484                 }
3485                 if (t.type === TYPE_EOF) {
3486                         ins_mode_in_body(t)
3487                         return
3488                 }
3489                 // Anything else
3490                 parse_error()
3491         }
3492
3493         // 8.2.5.4.17 http://www.w3.org/TR/html5/syntax.html#parsing-main-inselectintable
3494         ins_mode_in_select_in_table = function (t) {
3495                 var el
3496                 if (t.type === TYPE_START_TAG && (t.name === 'caption' || t.name === 'table' || t.name === 'tbody' || t.name === 'tfoot' || t.name === 'thead' || t.name === 'tr' || t.name === 'td' || t.name === 'th')) {
3497                         parse_error()
3498                         while (true) {
3499                                 el = open_els.shift()
3500                                 if (el.name === 'select' && el.namespace === NS_HTML) {
3501                                         break
3502                                 }
3503                         }
3504                         reset_ins_mode()
3505                         process_token(t)
3506                         return
3507                 }
3508                 if (t.type === TYPE_END_TAG && (t.name === 'caption' || t.name === 'table' || t.name === 'tbody' || t.name === 'tfoot' || t.name === 'thead' || t.name === 'tr' || t.name === 'td' || t.name === 'th')) {
3509                         parse_error()
3510                         if (!is_in_table_scope(t.name, NS_HTML)) {
3511                                 return
3512                         }
3513                         while (true) {
3514                                 el = open_els.shift()
3515                                 if (el.name === 'select' && el.namespace === NS_HTML) {
3516                                         break
3517                                 }
3518                         }
3519                         reset_ins_mode()
3520                         process_token(t)
3521                         return
3522                 }
3523                 // Anything else
3524                 ins_mode_in_select(t)
3525         }
3526
3527         // 8.2.5.4.18 http://www.w3.org/TR/html5/syntax.html#parsing-main-intemplate
3528         ins_mode_in_template = function (t) {
3529                 var el
3530                 if (t.type === TYPE_TEXT || t.type === TYPE_COMMENT || t.type === TYPE_DOCTYPE) {
3531                         ins_mode_in_body(t)
3532                         return
3533                 }
3534                 if ((t.type === TYPE_START_TAG && (t.name === 'base' || t.name === 'basefont' || t.name === 'bgsound' || t.name === 'link' || t.name === 'meta' || t.name === 'noframes' || t.name === 'script' || t.name === 'style' || t.name === 'template' || t.name === 'title')) || (t.type === TYPE_END_TAG && t.name === 'template')) {
3535                         ins_mode_in_head(t)
3536                         return
3537                 }
3538                 if (t.type === TYPE_START_TAG && (t.name === 'caption' || t.name === 'colgroup' || t.name === 'tbody' || t.name === 'tfoot' || t.name === 'thead')) {
3539                         template_ins_modes.shift()
3540                         template_ins_modes.unshift(ins_mode_in_table)
3541                         ins_mode = ins_mode_in_table
3542                         process_token(t)
3543                         return
3544                 }
3545                 if (t.type === TYPE_START_TAG && t.name === 'col') {
3546                         template_ins_modes.shift()
3547                         template_ins_modes.unshift(ins_mode_in_column_group)
3548                         ins_mode = ins_mode_in_column_group
3549                         process_token(t)
3550                         return
3551                 }
3552                 if (t.type === TYPE_START_TAG && t.name === 'tr') {
3553                         template_ins_modes.shift()
3554                         template_ins_modes.unshift(ins_mode_in_table_body)
3555                         ins_mode = ins_mode_in_table_body
3556                         process_token(t)
3557                         return
3558                 }
3559                 if (t.type === TYPE_START_TAG && (t.name === 'td' || t.name === 'th')) {
3560                         template_ins_modes.shift()
3561                         template_ins_modes.unshift(ins_mode_in_row)
3562                         ins_mode = ins_mode_in_row
3563                         process_token(t)
3564                         return
3565                 }
3566                 if (t.type === TYPE_START_TAG) {
3567                         template_ins_modes.shift()
3568                         template_ins_modes.unshift(ins_mode_in_body)
3569                         ins_mode = ins_mode_in_body
3570                         process_token(t)
3571                         return
3572                 }
3573                 if (t.type === TYPE_END_TAG) {
3574                         parse_error()
3575                         return
3576                 }
3577                 if (t.type === TYPE_EOF) {
3578                         if (!template_tag_is_open()) {
3579                                 stop_parsing()
3580                                 return
3581                         }
3582                         parse_error()
3583                         while (true) {
3584                                 el = open_els.shift()
3585                                 if (el.name === 'template' && el.namespace === NS_HTML) {
3586                                         break
3587                                 }
3588                         }
3589                         clear_afe_to_marker()
3590                         template_ins_modes.shift()
3591                         reset_ins_mode()
3592                         process_token(t)
3593                 }
3594         }
3595
3596         // 8.2.5.4.19 http://www.w3.org/TR/html5/syntax.html#parsing-main-afterbody
3597         ins_mode_after_body = function (t) {
3598                 var first
3599                 if (is_space_tok(t)) {
3600                         ins_mode_in_body(t)
3601                         return
3602                 }
3603                 if (t.type === TYPE_COMMENT) {
3604                         first = open_els[open_els.length - 1]
3605                         insert_comment(t, [first, first.children.length])
3606                         return
3607                 }
3608                 if (t.type === TYPE_DOCTYPE) {
3609                         parse_error()
3610                         return
3611                 }
3612                 if (t.type === TYPE_START_TAG && t.name === 'html') {
3613                         ins_mode_in_body(t)
3614                         return
3615                 }
3616                 if (t.type === TYPE_END_TAG && t.name === 'html') {
3617                         if (flag_fragment_parsing) {
3618                                 parse_error()
3619                                 return
3620                         }
3621                         ins_mode = ins_mode_after_after_body
3622                         return
3623                 }
3624                 if (t.type === TYPE_EOF) {
3625                         stop_parsing()
3626                         return
3627                 }
3628                 // Anything ELse
3629                 parse_error()
3630                 ins_mode = ins_mode_in_body
3631                 process_token(t)
3632         }
3633
3634         // 8.2.5.4.20 http://www.w3.org/TR/html5/syntax.html#parsing-main-inframeset
3635         ins_mode_in_frameset = function (t) {
3636                 if (is_space_tok(t)) {
3637                         insert_character(t)
3638                         return
3639                 }
3640                 if (t.type === TYPE_COMMENT) {
3641                         insert_comment(t)
3642                         return
3643                 }
3644                 if (t.type === TYPE_DOCTYPE) {
3645                         parse_error()
3646                         return
3647                 }
3648                 if (t.type === TYPE_START_TAG && t.name === 'html') {
3649                         ins_mode_in_body(t)
3650                         return
3651                 }
3652                 if (t.type === TYPE_START_TAG && t.name === 'frameset') {
3653                         insert_html_element(t)
3654                         return
3655                 }
3656                 if (t.type === TYPE_END_TAG && t.name === 'frameset') {
3657                         if (open_els.length === 1) {
3658                                 parse_error()
3659                                 return // fragment case
3660                         }
3661                         open_els.shift()
3662                         if (flag_fragment_parsing === false && open_els[0].name !== 'frameset') {
3663                                 ins_mode = ins_mode_after_frameset
3664                         }
3665                         return
3666                 }
3667                 if (t.type === TYPE_START_TAG && t.name === 'frame') {
3668                         insert_html_element(t)
3669                         open_els.shift()
3670                         t.acknowledge_self_closing()
3671                         return
3672                 }
3673                 if (t.type === TYPE_START_TAG && t.name === 'noframes') {
3674                         ins_mode_in_head(t)
3675                         return
3676                 }
3677                 if (t.type === TYPE_EOF) {
3678                         if (open_els.length !== 1) {
3679                                 parse_error()
3680                         }
3681                         stop_parsing()
3682                         return
3683                 }
3684                 // Anything else
3685                 parse_error()
3686         }
3687
3688         // 8.2.5.4.21 http://www.w3.org/TR/html5/syntax.html#parsing-main-afterframeset
3689         ins_mode_after_frameset = function (t) {
3690                 if (is_space_tok(t)) {
3691                         insert_character(t)
3692                         return
3693                 }
3694                 if (t.type === TYPE_COMMENT) {
3695                         insert_comment(t)
3696                         return
3697                 }
3698                 if (t.type === TYPE_DOCTYPE) {
3699                         parse_error()
3700                         return
3701                 }
3702                 if (t.type === TYPE_START_TAG && t.name === 'html') {
3703                         ins_mode_in_body(t)
3704                         return
3705                 }
3706                 if (t.type === TYPE_END_TAG && t.name === 'html') {
3707                         ins_mode = ins_mode_after_after_frameset
3708                         return
3709                 }
3710                 if (t.type === TYPE_START_TAG && t.name === 'noframes') {
3711                         ins_mode_in_head(t)
3712                         return
3713                 }
3714                 if (t.type === TYPE_EOF) {
3715                         stop_parsing()
3716                         return
3717                 }
3718                 // Anything else
3719                 parse_error()
3720         }
3721
3722         // 8.2.5.4.22 http://www.w3.org/TR/html5/syntax.html#the-after-after-body-insertion-mode
3723         ins_mode_after_after_body = function (t) {
3724                 if (t.type === TYPE_COMMENT) {
3725                         insert_comment(t, [doc, doc.children.length])
3726                         return
3727                 }
3728                 if (t.type === TYPE_DOCTYPE || is_space_tok(t) || (t.type === TYPE_START_TAG && t.name === 'html')) {
3729                         ins_mode_in_body(t)
3730                         return
3731                 }
3732                 if (t.type === TYPE_EOF) {
3733                         stop_parsing()
3734                         return
3735                 }
3736                 // Anything else
3737                 parse_error()
3738                 ins_mode = ins_mode_in_body
3739                 process_token(t)
3740         }
3741
3742         // 8.2.5.4.23 http://www.w3.org/TR/html5/syntax.html#the-after-after-frameset-insertion-mode
3743         ins_mode_after_after_frameset = function (t) {
3744                 if (t.type === TYPE_COMMENT) {
3745                         insert_comment(t, [doc, doc.children.length])
3746                         return
3747                 }
3748                 if (t.type === TYPE_DOCTYPE || is_space_tok(t) || (t.type === TYPE_START_TAG && t.name === 'html')) {
3749                         ins_mode_in_body(t)
3750                         return
3751                 }
3752                 if (t.type === TYPE_EOF) {
3753                         stop_parsing()
3754                         return
3755                 }
3756                 if (t.type === TYPE_START_TAG && t.name === 'noframes') {
3757                         ins_mode_in_head(t)
3758                         return
3759                 }
3760                 // Anything else
3761                 parse_error()
3762                 return
3763         }
3764
3765         // 8.2.5.5 http://www.w3.org/TR/html5/syntax.html#parsing-main-inforeign
3766         has_color_face_or_size = function (t) {
3767                 var a, i
3768                 for (i = 0; i < t.attrs_a.length; ++i) {
3769                         a = t.attrs_a[i]
3770                         if (a[0] === 'color' || a[0] === 'face' || a[0] === 'size') {
3771                                 return true
3772                         }
3773                 }
3774                 return false
3775         }
3776         in_foreign_content_end_script = function () {
3777                 open_els.shift()
3778                 // fixfull
3779         }
3780         in_foreign_content_other_start = function (t) {
3781                 var acn
3782                 acn = adjusted_current_node()
3783                 if (acn.namespace === NS_MATHML) {
3784                         adjust_mathml_attributes(t)
3785                 }
3786                 if (acn.namespace === NS_SVG && (svg_name_fixes[t.name] != null)) { // extra perens because Coffeescript did
3787                         t.name = svg_name_fixes[t.name]
3788                 }
3789                 if (acn.namespace === NS_SVG) {
3790                         adjust_svg_attributes(t)
3791                 }
3792                 adjust_foreign_attributes(t)
3793                 insert_foreign_element(t, acn.namespace)
3794                 if (t.flag('self-closing')) {
3795                         if (t.name === 'script') {
3796                                 t.acknowledge_self_closing()
3797                                 in_foreign_content_end_script()
3798                                 // fixfull
3799                         } else {
3800                                 open_els.shift()
3801                                 t.acknowledge_self_closing()
3802                         }
3803                 }
3804         }
3805         in_foreign_content = function (t) {
3806                 var el, i, node
3807                 if (t.type === TYPE_TEXT && t.text === "\u0000") {
3808                         parse_error()
3809                         insert_character(new_character_token("\ufffd"))
3810                         return
3811                 }
3812                 if (is_space_tok(t)) {
3813                         insert_character(t)
3814                         return
3815                 }
3816                 if (t.type === TYPE_TEXT) {
3817                         flag_frameset_ok = false
3818                         insert_character(t)
3819                         return
3820                 }
3821                 if (t.type === TYPE_COMMENT) {
3822                         insert_comment(t)
3823                         return
3824                 }
3825                 if (t.type === TYPE_DOCTYPE) {
3826                         parse_error()
3827                         return
3828                 }
3829                 if (t.type === TYPE_START_TAG && (t.name === 'b' || t.name === 'big' || t.name === 'blockquote' || t.name === 'body' || t.name === 'br' || t.name === 'center' || t.name === 'code' || t.name === 'dd' || t.name === 'div' || t.name === 'dl' || t.name === 'dt' || t.name === 'em' || t.name === 'embed' || t.name === 'h1' || t.name === 'h2' || t.name === 'h3' || t.name === 'h4' || t.name === 'h5' || t.name === 'h6' || t.name === 'head' || t.name === 'hr' || t.name === 'i' || t.name === 'img' || t.name === 'li' || t.name === 'listing' || t.name === 'main' || t.name === 'meta' || t.name === 'nobr' || t.name === 'ol' || t.name === 'p' || t.name === 'pre' || t.name === 'ruby' || t.name === 's' || t.name === 'small' || t.name === 'span' || t.name === 'strong' || t.name === 'strike' || t.name === 'sub' || t.name === 'sup' || t.name === 'table' || t.name === 'tt' || t.name === 'u' || t.name === 'ul' || t.name === 'var' || (t.name === 'font' && has_color_face_or_size(t)))) {
3830                         parse_error()
3831                         if (flag_fragment_parsing) {
3832                                 in_foreign_content_other_start(t)
3833                                 return
3834                         }
3835                         while (true) { // is this safe?
3836                                 open_els.shift()
3837                                 if (is_mathml_text_integration_point(open_els[0]) || is_html_integration(open_els[0]) || open_els[0].namespace === NS_HTML) {
3838                                         break
3839                                 }
3840                         }
3841                         process_token(t)
3842                         return
3843                 }
3844                 if (t.type === TYPE_START_TAG) {
3845                         in_foreign_content_other_start(t)
3846                         return
3847                 }
3848                 if (t.type === TYPE_END_TAG && t.name === 'script' && open_els[0].name === 'script' && open_els[0].namespace === NS_SVG) {
3849                         in_foreign_content_end_script()
3850                         return
3851                 }
3852                 if (t.type === TYPE_END_TAG) {
3853                         i = 0
3854                         node = open_els[i]
3855                         if (node.name.toLowerCase() !== t.name) {
3856                                 parse_error()
3857                         }
3858                         while (true) {
3859                                 if (node === open_els[open_els.length - 1]) {
3860                                         return
3861                                 }
3862                                 if (node.name.toLowerCase() === t.name) {
3863                                         while (true) {
3864                                                 el = open_els.shift()
3865                                                 if (el === node) {
3866                                                         return
3867                                                 }
3868                                         }
3869                                 }
3870                                 i += 1
3871                                 node = open_els[i]
3872                                 if (node.namespace === NS_HTML) {
3873                                         break
3874                                 }
3875                         }
3876                         ins_mode(t) // explicitly call HTML insertion mode
3877                 }
3878         }
3879
3880
3881         // 8.2.4.1 http://www.w3.org/TR/html5/syntax.html#data-state
3882         tok_state_data = function () {
3883                 var c
3884                 switch (c = txt.charAt(cur++)) {
3885                         case '&':
3886                                 return new_text_node(parse_character_reference())
3887                         break
3888                         case '<':
3889                                 tok_state = tok_state_tag_open
3890                         break
3891                         case "\u0000":
3892                                 parse_error()
3893                                 return new_text_node(c)
3894                         break
3895                         case '': // EOF
3896                                 return new_eof_token()
3897                         break
3898                         default:
3899                                 return new_text_node(c)
3900                 }
3901                 return null
3902         }
3903
3904         // 8.2.4.2 http://www.w3.org/TR/html5/syntax.html#character-reference-in-data-state
3905         // not needed: tok_state_character_reference_in_data = function () {
3906         // just call parse_character_reference()
3907
3908         // 8.2.4.3 http://www.w3.org/TR/html5/syntax.html#rcdata-state
3909         tok_state_rcdata = function () {
3910                 var c
3911                 switch (c = txt.charAt(cur++)) {
3912                         case '&':
3913                                 return new_text_node(parse_character_reference())
3914                         break
3915                         case '<':
3916                                 tok_state = tok_state_rcdata_less_than_sign
3917                         break
3918                         case "\u0000":
3919                                 parse_error()
3920                                 return new_character_token("\ufffd")
3921                         break
3922                         case '': // EOF
3923                                 return new_eof_token()
3924                         break
3925                         default:
3926                                 return new_character_token(c)
3927                 }
3928                 return null
3929         }
3930
3931         // 8.2.4.4 http://www.w3.org/TR/html5/syntax.html#character-reference-in-rcdata-state
3932         // not needed: tok_state_character_reference_in_rcdata = function () {
3933         // just call parse_character_reference()
3934
3935         // 8.2.4.5 http://www.w3.org/TR/html5/syntax.html#rawtext-state
3936         tok_state_rawtext = function () {
3937                 var c
3938                 switch (c = txt.charAt(cur++)) {
3939                         case '<':
3940                                 tok_state = tok_state_rawtext_less_than_sign
3941                         break
3942                         case "\u0000":
3943                                 parse_error()
3944                                 return new_character_token("\ufffd")
3945                         break
3946                         case '': // EOF
3947                                 return new_eof_token()
3948                         break
3949                         default:
3950                                 return new_character_token(c)
3951                 }
3952                 return null
3953         }
3954
3955         // 8.2.4.6 http://www.w3.org/TR/html5/syntax.html#script-data-state
3956         tok_state_script_data = function () {
3957                 var c
3958                 switch (c = txt.charAt(cur++)) {
3959                         case '<':
3960                                 tok_state = tok_state_script_data_less_than_sign
3961                         break
3962                         case "\u0000":
3963                                 parse_error()
3964                                 return new_character_token("\ufffd")
3965                         break
3966                         case '': // EOF
3967                                 return new_eof_token()
3968                         break
3969                         default:
3970                                 return new_character_token(c)
3971                 }
3972                 return null
3973         }
3974
3975         // 8.2.4.7 http://www.w3.org/TR/html5/syntax.html#plaintext-state
3976         tok_state_plaintext = function () {
3977                 var c
3978                 switch (c = txt.charAt(cur++)) {
3979                         case "\u0000":
3980                                 parse_error()
3981                                 return new_character_token("\ufffd")
3982                         break
3983                         case '': // EOF
3984                                 return new_eof_token()
3985                         break
3986                         default:
3987                                 return new_character_token(c)
3988                 }
3989                 return null
3990         }
3991
3992         // 8.2.4.8 http://www.w3.org/TR/html5/syntax.html#tag-open-state
3993         tok_state_tag_open = function () {
3994                 var c
3995                 c = txt.charAt(cur++)
3996                 if (c === '!') {
3997                         tok_state = tok_state_markup_declaration_open
3998                         return
3999                 }
4000                 if (c === '/') {
4001                         tok_state = tok_state_end_tag_open
4002                         return
4003                 }
4004                 if (is_uc_alpha(c)) {
4005                         tok_cur_tag = new_open_tag(c.toLowerCase())
4006                         tok_state = tok_state_tag_name
4007                         return
4008                 }
4009                 if (is_lc_alpha(c)) {
4010                         tok_cur_tag = new_open_tag(c)
4011                         tok_state = tok_state_tag_name
4012                         return
4013                 }
4014                 if (c === '?') {
4015                         parse_error()
4016                         tok_cur_tag = new_comment_token('?') // FIXME right?
4017                         tok_state = tok_state_bogus_comment
4018                         return
4019                 }
4020                 // Anything else
4021                 parse_error()
4022                 tok_state = tok_state_data
4023                 cur -= 1 // we didn't parse/handle the char after <
4024                 return new_text_node('<')
4025         }
4026
4027         // 8.2.4.9 http://www.w3.org/TR/html5/syntax.html#end-tag-open-state
4028         tok_state_end_tag_open = function () {
4029                 var c
4030                 c = txt.charAt(cur++)
4031                 if (is_uc_alpha(c)) {
4032                         tok_cur_tag = new_end_tag(c.toLowerCase())
4033                         tok_state = tok_state_tag_name
4034                         return
4035                 }
4036                 if (is_lc_alpha(c)) {
4037                         tok_cur_tag = new_end_tag(c)
4038                         tok_state = tok_state_tag_name
4039                         return
4040                 }
4041                 if (c === '>') {
4042                         parse_error()
4043                         tok_state = tok_state_data
4044                         return
4045                 }
4046                 if (c === '') { // EOF
4047                         parse_error()
4048                         tok_state = tok_state_data
4049                         return new_text_node('</')
4050                 }
4051                 // Anything else
4052                 parse_error()
4053                 tok_cur_tag = new_comment_token(c)
4054                 tok_state = tok_state_bogus_comment
4055                 return null
4056         }
4057
4058         // 8.2.4.10 http://www.w3.org/TR/html5/syntax.html#tag-name-state
4059         tok_state_tag_name = function () {
4060                 var c, tmp
4061                 switch (c = txt.charAt(cur++)) {
4062                         case "\t":
4063                         case "\n":
4064                         case "\u000c":
4065                         case ' ':
4066                                 tok_state = tok_state_before_attribute_name
4067                         break
4068                         case '/':
4069                                 tok_state = tok_state_self_closing_start_tag
4070                         break
4071                         case '>':
4072                                 tok_state = tok_state_data
4073                                 tmp = tok_cur_tag
4074                                 tok_cur_tag = null
4075                                 return tmp
4076                         break
4077                         case "\u0000":
4078                                 parse_error()
4079                                 tok_cur_tag.name += "\ufffd"
4080                         break
4081                         case '': // EOF
4082                                 parse_error()
4083                                 tok_state = tok_state_data
4084                         break
4085                         default:
4086                                 if (is_uc_alpha(c)) {
4087                                         tok_cur_tag.name += c.toLowerCase()
4088                                 } else {
4089                                         tok_cur_tag.name += c
4090                                 }
4091                 }
4092                 return null
4093         }
4094
4095         // 8.2.4.11 http://www.w3.org/TR/html5/syntax.html#rcdata-less-than-sign-state
4096         tok_state_rcdata_less_than_sign = function () {
4097                 var c
4098                 c = txt.charAt(cur++)
4099                 if (c === '/') {
4100                         temporary_buffer = ''
4101                         tok_state = tok_state_rcdata_end_tag_open
4102                         return null
4103                 }
4104                 // Anything else
4105                 tok_state = tok_state_rcdata
4106                 cur -= 1 // reconsume the input character
4107                 return new_character_token('<')
4108         }
4109
4110         // 8.2.4.12 http://www.w3.org/TR/html5/syntax.html#rcdata-end-tag-open-state
4111         tok_state_rcdata_end_tag_open = function () {
4112                 var c
4113                 c = txt.charAt(cur++)
4114                 if (is_uc_alpha(c)) {
4115                         tok_cur_tag = new_end_tag(c.toLowerCase())
4116                         temporary_buffer += c
4117                         tok_state = tok_state_rcdata_end_tag_name
4118                         return null
4119                 }
4120                 if (is_lc_alpha(c)) {
4121                         tok_cur_tag = new_end_tag(c)
4122                         temporary_buffer += c
4123                         tok_state = tok_state_rcdata_end_tag_name
4124                         return null
4125                 }
4126                 // Anything else
4127                 tok_state = tok_state_rcdata
4128                 cur -= 1 // reconsume the input character
4129                 return new_character_token("</") // fixfull separate these
4130         }
4131
4132         // http://www.w3.org/TR/html5/syntax.html#appropriate-end-tag-token
4133         is_appropriate_end_tag = function (t) {
4134                 // fixfull: this assumes that open_els[0].name is "the tag name of the last
4135                 // start tag to have been emitted from this tokenizer"
4136                 return t.type === TYPE_END_TAG && t.name === open_els[0].name
4137         }
4138
4139         // 8.2.4.13 http://www.w3.org/TR/html5/syntax.html#rcdata-end-tag-name-state
4140         tok_state_rcdata_end_tag_name = function () {
4141                 var c
4142                 c = txt.charAt(cur++)
4143                 if (c === "\t" || c === "\n" || c === "\u000c" || c === ' ') {
4144                         if (is_appropriate_end_tag(tok_cur_tag)) {
4145                                 tok_state = tok_state_before_attribute_name
4146                                 return
4147                         }
4148                         // else fall through to "Anything else"
4149                 }
4150                 if (c === '/') {
4151                         if (is_appropriate_end_tag(tok_cur_tag)) {
4152                                 tok_state = tok_state_self_closing_start_tag // FIXME spec typo?
4153                                 return
4154                         }
4155                         // else fall through to "Anything else"
4156                 }
4157                 if (c === '>') {
4158                         if (is_appropriate_end_tag(tok_cur_tag)) {
4159                                 tok_state = tok_state_data
4160                                 return tok_cur_tag
4161                         }
4162                         // else fall through to "Anything else"
4163                 }
4164                 if (is_uc_alpha(c)) {
4165                         tok_cur_tag.name += c.toLowerCase()
4166                         temporary_buffer += c
4167                         return null
4168                 }
4169                 if (is_lc_alpha(c)) {
4170                         tok_cur_tag.name += c
4171                         temporary_buffer += c
4172                         return null
4173                 }
4174                 // Anything else
4175                 tok_state = tok_state_rcdata
4176                 cur -= 1 // reconsume the input character
4177                 return new_character_token('</' + temporary_buffer) // fixfull separate these
4178         }
4179
4180         // 8.2.4.14 http://www.w3.org/TR/html5/syntax.html#rawtext-less-than-sign-state
4181         tok_state_rawtext_less_than_sign = function () {
4182                 var c
4183                 c = txt.charAt(cur++)
4184                 if (c === '/') {
4185                         temporary_buffer = ''
4186                         tok_state = tok_state_rawtext_end_tag_open
4187                         return null
4188                 }
4189                 // Anything else
4190                 tok_state = tok_state_rawtext
4191                 cur -= 1 // reconsume the input character
4192                 return new_character_token('<')
4193         }
4194
4195         // 8.2.4.15 http://www.w3.org/TR/html5/syntax.html#rawtext-end-tag-open-state
4196         tok_state_rawtext_end_tag_open = function () {
4197                 c = txt.charAt(cur++)
4198                 if (is_uc_alpha(c)) {
4199                         tok_cur_tag = new_end_tag(c.toLowerCase())
4200                         temporary_buffer += c
4201                         tok_state = tok_state_rawtext_end_tag_name
4202                         return null
4203                 }
4204                 if (is_lc_alpha(c)) {
4205                         tok_cur_tag = new_end_tag(c)
4206                         temporary_buffer += c
4207                         tok_state = tok_state_rawtext_end_tag_name
4208                         return null
4209                 }
4210                 // Anything else
4211                 tok_state = tok_state_rawtext
4212                 cur -= 1 // reconsume the input character
4213                 return new_character_token("</") // fixfull separate these
4214         }
4215
4216         // 8.2.4.16 http://www.w3.org/TR/html5/syntax.html#rawtext-end-tag-name-state
4217         tok_state_rawtext_end_tag_name = function () {
4218                 var c
4219                 c = txt.charAt(cur++)
4220                 if (c === "\t" || c === "\n" || c === "\u000c" || c === ' ') {
4221                         if (is_appropriate_end_tag(tok_cur_tag)) {
4222                                 tok_state = tok_state_before_attribute_name
4223                                 return
4224                         }
4225                         // else fall through to "Anything else"
4226                 }
4227                 if (c === '/') {
4228                         if (is_appropriate_end_tag(tok_cur_tag)) {
4229                                 tok_state = tok_state_self_closing_start_tag
4230                                 return
4231                         }
4232                         // else fall through to "Anything else"
4233                 }
4234                 if (c === '>') {
4235                         if (is_appropriate_end_tag(tok_cur_tag)) {
4236                                 tok_state = tok_state_data
4237                                 return tok_cur_tag
4238                         }
4239                         // else fall through to "Anything else"
4240                 }
4241                 if (is_uc_alpha(c)) {
4242                         tok_cur_tag.name += c.toLowerCase()
4243                         temporary_buffer += c
4244                         return null
4245                 }
4246                 if (is_lc_alpha(c)) {
4247                         tok_cur_tag.name += c
4248                         temporary_buffer += c
4249                         return null
4250                 }
4251                 // Anything else
4252                 tok_state = tok_state_rawtext
4253                 cur -= 1 // reconsume the input character
4254                 return new_character_token('</' + temporary_buffer) // fixfull separate these
4255         }
4256
4257         // 8.2.4.17 http://www.w3.org/TR/html5/syntax.html#script-data-less-than-sign-state
4258         tok_state_script_data_less_than_sign = function () {
4259                 var c
4260                 c = txt.charAt(cur++)
4261                 if (c === '/') {
4262                         temporary_buffer = ''
4263                         tok_state = tok_state_script_data_end_tag_open
4264                         return
4265                 }
4266                 if (c === '!') {
4267                         tok_state = tok_state_script_data_escape_start
4268                         return new_character_token('<!') // fixfull split
4269                 }
4270                 // Anything else
4271                 tok_state = tok_state_script_data
4272                 cur -= 1 // reconsume
4273                 return new_character_token('<')
4274         }
4275
4276         // 8.2.4.18 http://www.w3.org/TR/html5/syntax.html#script-data-end-tag-open-state
4277         tok_state_script_data_end_tag_open = function () {
4278                 var c
4279                 c = txt.charAt(cur++)
4280                 if (is_uc_alpha(c)) {
4281                         tok_cur_tag = new_end_tag(c.toLowerCase())
4282                         temporary_buffer += c
4283                         tok_state = tok_state_script_data_end_tag_name
4284                         return
4285                 }
4286                 if (is_lc_alpha(c)) {
4287                         tok_cur_tag = new_end_tag(c)
4288                         temporary_buffer += c
4289                         tok_state = tok_state_script_data_end_tag_name
4290                         return
4291                 }
4292                 // Anything else
4293                 tok_state = tok_state_script_data
4294                 cur -= 1 // reconsume
4295                 return new_character_token('</')
4296         }
4297
4298         // 8.2.4.19 http://www.w3.org/TR/html5/syntax.html#script-data-end-tag-open-state
4299         tok_state_script_data_end_tag_name = function () {
4300                 var c
4301                 c = txt.charAt(cur++)
4302                 if (c === "\t" || c === "\n" || c === "\u000c" || c === ' ') {
4303                         if (is_appropriate_end_tag(tok_cur_tag)) {
4304                                 tok_state = tok_state_before_attribute_name
4305                                 return
4306                         }
4307                         // fall through
4308                 }
4309                 if (c === '/') {
4310                         if (is_appropriate_end_tag(tok_cur_tag)) {
4311                                 tok_state = tok_state_self_closing_start_tag
4312                                 return
4313                         }
4314                         // fall through
4315                 }
4316                 if (c === '>') {
4317                         if (is_appropriate_end_tag(tok_cur_tag)) {
4318                                 tok_state = tok_state_data
4319                                 return tok_cur_tag
4320                         }
4321                         // fall through
4322                 }
4323                 if (is_uc_alpha(c)) {
4324                         tok_cur_tag.name += c.toLowerCase()
4325                         temporary_buffer += c
4326                         return
4327                 }
4328                 if (is_lc_alpha(c)) {
4329                         tok_cur_tag.name += c
4330                         temporary_buffer += c
4331                         return
4332                 }
4333                 // Anything else
4334                 tok_state = tok_state_script_data
4335                 cur -= 1 // reconsume
4336                 return new_character_token("</" + temporary_buffer) // fixfull split
4337         }
4338
4339         // 8.2.4.20 http://www.w3.org/TR/html5/syntax.html#script-data-escape-start-state
4340         tok_state_script_data_escape_start = function () {
4341                 var c
4342                 c = txt.charAt(cur++)
4343                 if (c === '-') {
4344                         tok_state = tok_state_script_data_escape_start_dash
4345                         return new_character_token('-')
4346                 }
4347                 // Anything else
4348                 tok_state = tok_state_script_data
4349                 cur -= 1 // reconsume
4350         }
4351
4352         // 8.2.4.21 http://www.w3.org/TR/html5/syntax.html#script-data-escape-start-dash-state
4353         tok_state_script_data_escape_start_dash = function () {
4354                 var c
4355                 c = txt.charAt(cur++)
4356                 if (c === '-') {
4357                         tok_state = tok_state_script_data_escaped_dash_dash
4358                         return new_character_token('-')
4359                 }
4360                 // Anything else
4361                 tok_state = tok_state_script_data
4362                 cur -= 1 // reconsume
4363         }
4364
4365         // 8.2.4.22 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-state
4366         tok_state_script_data_escaped = function () {
4367                 var c
4368                 c = txt.charAt(cur++)
4369                 if (c === '-') {
4370                         tok_state = tok_state_script_data_escaped_dash
4371                         return new_character_token('-')
4372                 }
4373                 if (c === '<') {
4374                         tok_state = tok_state_script_data_escaped_less_than_sign
4375                         return
4376                 }
4377                 if (c === "\u0000") {
4378                         parse_error()
4379                         return new_character_token("\ufffd")
4380                 }
4381                 if (c === '') { // EOF
4382                         tok_state = tok_state_data
4383                         parse_error()
4384                         cur -= 1 // reconsume
4385                         return
4386                 }
4387                 // Anything else
4388                 return new_character_token(c)
4389         }
4390
4391         // 8.2.4.23 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-dash-state
4392         tok_state_script_data_escaped_dash = function () {
4393                 var c
4394                 c = txt.charAt(cur++)
4395                 if (c === '-') {
4396                         tok_state = tok_state_script_data_escaped_dash_dash
4397                         return new_character_token('-')
4398                 }
4399                 if (c === '<') {
4400                         tok_state = tok_state_script_data_escaped_less_than_sign
4401                         return
4402                 }
4403                 if (c === "\u0000") {
4404                         parse_error()
4405                         tok_state = tok_state_script_data_escaped
4406                         return new_character_token("\ufffd")
4407                 }
4408                 if (c === '') { // EOF
4409                         tok_state = tok_state_data
4410                         parse_error()
4411                         cur -= 1 // reconsume
4412                         return
4413                 }
4414                 // Anything else
4415                 tok_state = tok_state_script_data_escaped
4416                 return new_character_token(c)
4417         }
4418
4419         // 8.2.4.24 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-dash-dash-state
4420         tok_state_script_data_escaped_dash_dash = function () {
4421                 var c
4422                 c = txt.charAt(cur++)
4423                 if (c === '-') {
4424                         return new_character_token('-')
4425                 }
4426                 if (c === '<') {
4427                         tok_state = tok_state_script_data_escaped_less_than_sign
4428                         return
4429                 }
4430                 if (c === '>') {
4431                         tok_state = tok_state_script_data
4432                         return new_character_token('>')
4433                 }
4434                 if (c === "\u0000") {
4435                         parse_error()
4436                         tok_state = tok_state_script_data_escaped
4437                         return new_character_token("\ufffd")
4438                 }
4439                 if (c === '') { // EOF
4440                         parse_error()
4441                         tok_state = tok_state_data
4442                         cur -= 1 // reconsume
4443                         return
4444                 }
4445                 // Anything else
4446                 tok_state = tok_state_script_data_escaped
4447                 return new_character_token(c)
4448         }
4449
4450         // 8.2.4.25 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-less-than-sign-state
4451         tok_state_script_data_escaped_less_than_sign = function () {
4452                 var c
4453                 c = txt.charAt(cur++)
4454                 if (c === '/') {
4455                         temporary_buffer = ''
4456                         tok_state = tok_state_script_data_escaped_end_tag_open
4457                         return
4458                 }
4459                 if (is_uc_alpha(c)) {
4460                         temporary_buffer = c.toLowerCase() // yes, really
4461                         tok_state = tok_state_script_data_double_escape_start
4462                         return new_character_token("<" + c) // fixfull split
4463                 }
4464                 if (is_lc_alpha(c)) {
4465                         temporary_buffer = c
4466                         tok_state = tok_state_script_data_double_escape_start
4467                         return new_character_token("<" + c) // fixfull split
4468                 }
4469                 // Anything else
4470                 tok_state = tok_state_script_data_escaped
4471                 cur -= 1 // reconsume
4472                 return new_character_token('<')
4473         }
4474
4475         // 8.2.4.26 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-end-tag-open-state
4476         tok_state_script_data_escaped_end_tag_open = function () {
4477                 var c
4478                 c = txt.charAt(cur++)
4479                 if (is_uc_alpha(c)) {
4480                         tok_cur_tag = new_end_tag(c.toLowerCase())
4481                         temporary_buffer += c
4482                         tok_state = tok_state_script_data_escaped_end_tag_name
4483                         return
4484                 }
4485                 if (is_lc_alpha(c)) {
4486                         tok_cur_tag = new_end_tag(c)
4487                         temporary_buffer += c
4488                         tok_state = tok_state_script_data_escaped_end_tag_name
4489                         return
4490                 }
4491                 // Anything else
4492                 tok_state = tok_state_script_data_escaped
4493                 cur -= 1 // reconsume
4494                 return new_character_token('</') // fixfull split
4495         }
4496
4497         // 8.2.4.27 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-end-tag-name-state
4498         tok_state_script_data_escaped_end_tag_name = function () {
4499                 var c
4500                 c = txt.charAt(cur++)
4501                 if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ') {
4502                         if (is_appropriate_end_tag(tok_cur_tag)) {
4503                                 tok_state = tok_state_before_attribute_name
4504                                 return
4505                         }
4506                         // fall through
4507                 }
4508                 if (c === '/') {
4509                         if (is_appropriate_end_tag(tok_cur_tag)) {
4510                                 tok_state = tok_state_self_closing_start_tag
4511                                 return
4512                         }
4513                         // fall through
4514                 }
4515                 if (c === '>') {
4516                         if (is_appropriate_end_tag(tok_cur_tag)) {
4517                                 tok_state = tok_state_data
4518                                 return tok_cur_tag
4519                         }
4520                         // fall through
4521                 }
4522                 if (is_uc_alpha(c)) {
4523                         tok_cur_tag.name += c.toLowerCase()
4524                         temporary_buffer += c.toLowerCase()
4525                         return
4526                 }
4527                 if (is_lc_alpha(c)) {
4528                         tok_cur_tag.name += c
4529                         temporary_buffer += c.toLowerCase()
4530                         return
4531                 }
4532                 // Anything else
4533                 tok_state = tok_state_script_data_escaped
4534                 cur -= 1 // reconsume
4535                 return new_character_token("</" + temporary_buffer) // fixfull split
4536         }
4537
4538         // 8.2.4.28 http://www.w3.org/TR/html5/syntax.html#script-data-double-escape-start-state
4539         tok_state_script_data_double_escape_start = function () {
4540                 var c
4541                 c = txt.charAt(cur++)
4542                 if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ' || c === '/' || c === '>') {
4543                         if (temporary_buffer === 'script') {
4544                                 tok_state = tok_state_script_data_double_escaped
4545                         } else {
4546                                 tok_state = tok_state_script_data_escaped
4547                         }
4548                         return new_character_token(c)
4549                 }
4550                 if (is_uc_alpha(c)) {
4551                         temporary_buffer += c.toLowerCase() // yes, really lowercase
4552                         return new_character_token(c)
4553                 }
4554                 if (is_lc_alpha(c)) {
4555                         temporary_buffer += c
4556                         return new_character_token(c)
4557                 }
4558                 // Anything else
4559                 tok_state = tok_state_script_data_escaped
4560                 cur -= 1 // reconsume
4561         }
4562
4563         // 8.2.4.29 http://www.w3.org/TR/html5/syntax.html#script-data-double-escaped-state
4564         tok_state_script_data_double_escaped = function () {
4565                 var c
4566                 c = txt.charAt(cur++)
4567                 if (c === '-') {
4568                         tok_state = tok_state_script_data_double_escaped_dash
4569                         return new_character_token('-')
4570                 }
4571                 if (c === '<') {
4572                         tok_state = tok_state_script_data_double_escaped_less_than_sign
4573                         return new_character_token('<')
4574                 }
4575                 if (c === "\u0000") {
4576                         parse_error()
4577                         return new_character_token("\ufffd")
4578                 }
4579                 if (c === '') { // EOF
4580                         parse_error()
4581                         tok_state = tok_state_data
4582                         cur -= 1 // reconsume
4583                         return
4584                 }
4585                 // Anything else
4586                 return new_character_token(c)
4587         }
4588
4589         // 8.2.4.30 http://www.w3.org/TR/html5/syntax.html#script-data-double-escaped-dash-state
4590         tok_state_script_data_double_escaped_dash = function () {
4591                 var c
4592                 c = txt.charAt(cur++)
4593                 if (c === '-') {
4594                         tok_state = tok_state_script_data_double_escaped_dash_dash
4595                         return new_character_token('-')
4596                 }
4597                 if (c === '<') {
4598                         tok_state = tok_state_script_data_double_escaped_less_than_sign
4599                         return new_character_token('<')
4600                 }
4601                 if (c === "\u0000") {
4602                         parse_error()
4603                         tok_state = tok_state_script_data_double_escaped
4604                         return new_character_token("\ufffd")
4605                 }
4606                 if (c === '') { // EOF
4607                         parse_error()
4608                         tok_state = tok_state_data
4609                         cur -= 1 // reconsume
4610                         return
4611                 }
4612                 // Anything else
4613                 tok_state = tok_state_script_data_double_escaped
4614                 return new_character_token(c)
4615         }
4616
4617         // 8.2.4.31 http://www.w3.org/TR/html5/syntax.html#script-data-double-escaped-dash-dash-state
4618         tok_state_script_data_double_escaped_dash_dash = function () {
4619                 var c
4620                 c = txt.charAt(cur++)
4621                 if (c === '-') {
4622                         return new_character_token('-')
4623                 }
4624                 if (c === '<') {
4625                         tok_state = tok_state_script_data_double_escaped_less_than_sign
4626                         return new_character_token('<')
4627                 }
4628                 if (c === '>') {
4629                         tok_state = tok_state_script_data
4630                         return new_character_token('>')
4631                 }
4632                 if (c === "\u0000") {
4633                         parse_error()
4634                         tok_state = tok_state_script_data_double_escaped
4635                         return new_character_token("\ufffd")
4636                 }
4637                 if (c === '') { // EOF
4638                         parse_error()
4639                         tok_state = tok_state_data
4640                         cur -= 1 // reconsume
4641                         return
4642                 }
4643                 // Anything else
4644                 tok_state = tok_state_script_data_double_escaped
4645                 return new_character_token(c)
4646         }
4647
4648         // 8.2.4.32 http://www.w3.org/TR/html5/syntax.html#script-data-double-escaped-less-than-sign-state
4649         tok_state_script_data_double_escaped_less_than_sign = function () {
4650                 var c
4651                 c = txt.charAt(cur++)
4652                 if (c === '/') {
4653                         temporary_buffer = ''
4654                         tok_state = tok_state_script_data_double_escape_end
4655                         return new_character_token('/')
4656                 }
4657                 // Anything else
4658                 tok_state = tok_state_script_data_double_escaped
4659                 cur -= 1 // reconsume
4660         }
4661
4662         // 8.2.4.33 http://www.w3.org/TR/html5/syntax.html#script-data-double-escape-end-state
4663         tok_state_script_data_double_escape_end = function () {
4664                 var c
4665                 c = txt.charAt(cur++)
4666                 if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ' || c === '/' || c === '>') {
4667                         if (temporary_buffer === 'script') {
4668                                 tok_state = tok_state_script_data_escaped
4669                         } else {
4670                                 tok_state = tok_state_script_data_double_escaped
4671                         }
4672                         return new_character_token(c)
4673                 }
4674                 if (is_uc_alpha(c)) {
4675                         temporary_buffer += c.toLowerCase() // yes, really lowercase
4676                         return new_character_token(c)
4677                 }
4678                 if (is_lc_alpha(c)) {
4679                         temporary_buffer += c
4680                         return new_character_token(c)
4681                 }
4682                 // Anything else
4683                 tok_state = tok_state_script_data_double_escaped
4684                 cur -= 1 // reconsume
4685         }
4686
4687         // 8.2.4.34 http://www.w3.org/TR/html5/syntax.html#before-attribute-name-state
4688         tok_state_before_attribute_name = function () {
4689                 var attr_name, c, tmp
4690                 attr_name = null
4691                 switch (c = txt.charAt(cur++)) {
4692                         case "\t":
4693                         case "\n":
4694                         case "\u000c":
4695                         case ' ':
4696                                 return null
4697                         break
4698                         case '/':
4699                                 tok_state = tok_state_self_closing_start_tag
4700                                 return null
4701                         break
4702                         case '>':
4703                                 tok_state = tok_state_data
4704                                 tmp = tok_cur_tag
4705                                 tok_cur_tag = null
4706                                 return tmp
4707                         break
4708                         case "\u0000":
4709                                 parse_error()
4710                                 attr_name = "\ufffd"
4711                         break
4712                         case '"':
4713                         case "'":
4714                         case '<':
4715                         case '=':
4716                                 parse_error()
4717                                 attr_name = c
4718                         break
4719                         case '': // EOF
4720                                 parse_error()
4721                                 tok_state = tok_state_data
4722                         break
4723                         default:
4724                                 if (is_uc_alpha(c)) {
4725                                         attr_name = c.toLowerCase()
4726                                 } else {
4727                                         attr_name = c
4728                                 }
4729                 }
4730                 if (attr_name != null) {
4731                         tok_cur_tag.attrs_a.unshift([attr_name, ''])
4732                         tok_state = tok_state_attribute_name
4733                 }
4734                 return null
4735         }
4736
4737         // 8.2.4.35 http://www.w3.org/TR/html5/syntax.html#attribute-name-state
4738         tok_state_attribute_name = function () {
4739                 var c, tmp
4740                 switch (c = txt.charAt(cur++)) {
4741                         case "\t":
4742                         case "\n":
4743                         case "\u000c":
4744                         case ' ':
4745                                 tok_state = tok_state_after_attribute_name
4746                         break
4747                         case '/':
4748                                 tok_state = tok_state_self_closing_start_tag
4749                         break
4750                         case '=':
4751                                 tok_state = tok_state_before_attribute_value
4752                         break
4753                         case '>':
4754                                 tok_state = tok_state_data
4755                                 tmp = tok_cur_tag
4756                                 tok_cur_tag = null
4757                                 return tmp
4758                         break
4759                         case "\u0000":
4760                                 parse_error()
4761                                 tok_cur_tag.attrs_a[0][0] += "\ufffd"
4762                         break
4763                         case '"':
4764                         case "'":
4765                         case '<':
4766                                 parse_error()
4767                                 tok_cur_tag.attrs_a[0][0] += c
4768                         break
4769                         case '': // EOF
4770                                 parse_error()
4771                                 tok_state = tok_state_data
4772                         break
4773                         default:
4774                                 if (is_uc_alpha(c)) {
4775                                         tok_cur_tag.attrs_a[0][0] += c.toLowerCase()
4776                                 } else {
4777                                         tok_cur_tag.attrs_a[0][0] += c
4778                                 }
4779                 }
4780                 return null
4781         }
4782
4783         // 8.2.4.36 http://www.w3.org/TR/html5/syntax.html#after-attribute-name-state
4784         tok_state_after_attribute_name = function () {
4785                 var c
4786                 c = txt.charAt(cur++)
4787                 if (c === "\t" || c === "\n" || c === "\u000c" || c === ' ') {
4788                         return
4789                 }
4790                 if (c === '/') {
4791                         tok_state = tok_state_self_closing_start_tag
4792                         return
4793                 }
4794                 if (c === '=') {
4795                         tok_state = tok_state_before_attribute_value
4796                         return
4797                 }
4798                 if (c === '>') {
4799                         tok_state = tok_state_data
4800                         return tok_cur_tag
4801                 }
4802                 if (is_uc_alpha(c)) {
4803                         tok_cur_tag.attrs_a.unshift([c.toLowerCase(), ''])
4804                         tok_state = tok_state_attribute_name
4805                         return
4806                 }
4807                 if (c === "\u0000") {
4808                         parse_error()
4809                         tok_cur_tag.attrs_a.unshift(["\ufffd", ''])
4810                         tok_state = tok_state_attribute_name
4811                         return
4812                 }
4813                 if (c === '') { // EOF
4814                         parse_error()
4815                         tok_state = tok_state_data
4816                         cur -= 1 // reconsume
4817                         return
4818                 }
4819                 if (c === '"' || c === "'" || c === '<') {
4820                         parse_error()
4821                         // fall through to Anything else
4822                 }
4823                 // Anything else
4824                 tok_cur_tag.attrs_a.unshift([c, ''])
4825                 tok_state = tok_state_attribute_name
4826         }
4827
4828         // 8.2.4.37 http://www.w3.org/TR/html5/syntax.html#before-attribute-value-state
4829         tok_state_before_attribute_value = function () {
4830                 var c, tmp
4831                 switch (c = txt.charAt(cur++)) {
4832                         case "\t":
4833                         case "\n":
4834                         case "\u000c":
4835                         case ' ':
4836                                 return null
4837                         break
4838                         case '"':
4839                                 tok_state = tok_state_attribute_value_double_quoted
4840                         break
4841                         case '&':
4842                                 tok_state = tok_state_attribute_value_unquoted
4843                                 cur -= 1
4844                         break
4845                         case "'":
4846                                 tok_state = tok_state_attribute_value_single_quoted
4847                         break
4848                         case "\u0000":
4849                                 // Parse error
4850                                 tok_cur_tag.attrs_a[0][1] += "\ufffd"
4851                                 tok_state = tok_state_attribute_value_unquoted
4852                         break
4853                         case '>':
4854                                 // Parse error
4855                                 tok_state = tok_state_data
4856                                 tmp = tok_cur_tag
4857                                 tok_cur_tag = null
4858                                 return tmp
4859                         break
4860                         case '': // EOF
4861                                 parse_error()
4862                                 tok_state = tok_state_data
4863                         break
4864                         default:
4865                                 tok_cur_tag.attrs_a[0][1] += c
4866                                 tok_state = tok_state_attribute_value_unquoted
4867                 }
4868                 return null
4869         }
4870
4871         // 8.2.4.38 http://www.w3.org/TR/html5/syntax.html#attribute-value-(double-quoted)-state
4872         tok_state_attribute_value_double_quoted = function () {
4873                 var c
4874                 switch (c = txt.charAt(cur++)) {
4875                         case '"':
4876                                 tok_state = tok_state_after_attribute_value_quoted
4877                         break
4878                         case '&':
4879                                 tok_cur_tag.attrs_a[0][1] += parse_character_reference('"', true)
4880                         break
4881                         case "\u0000":
4882                                 // Parse error
4883                                 tok_cur_tag.attrs_a[0][1] += "\ufffd"
4884                         break
4885                         case '': // EOF
4886                                 parse_error()
4887                                 tok_state = tok_state_data
4888                         break
4889                         default:
4890                                 tok_cur_tag.attrs_a[0][1] += c
4891                 }
4892                 return null
4893         }
4894
4895         // 8.2.4.39 http://www.w3.org/TR/html5/syntax.html#attribute-value-(single-quoted)-state
4896         tok_state_attribute_value_single_quoted = function () {
4897                 var c
4898                 switch (c = txt.charAt(cur++)) {
4899                         case "'":
4900                                 tok_state = tok_state_after_attribute_value_quoted
4901                         break
4902                         case '&':
4903                                 tok_cur_tag.attrs_a[0][1] += parse_character_reference("'", true)
4904                         break
4905                         case "\u0000":
4906                                 // Parse error
4907                                 tok_cur_tag.attrs_a[0][1] += "\ufffd"
4908                         break
4909                         case '': // EOF
4910                                 parse_error()
4911                                 tok_state = tok_state_data
4912                         break
4913                         default:
4914                                 tok_cur_tag.attrs_a[0][1] += c
4915                 }
4916                 return null
4917         }
4918
4919         // 8.2.4.40 http://www.w3.org/TR/html5/syntax.html#attribute-value-(unquoted)-state
4920         tok_state_attribute_value_unquoted = function () {
4921                 var c, tmp
4922                 switch (c = txt.charAt(cur++)) {
4923                         case "\t":
4924                         case "\n":
4925                         case "\u000c":
4926                         case ' ':
4927                                 tok_state = tok_state_before_attribute_name
4928                         break
4929                         case '&':
4930                                 tok_cur_tag.attrs_a[0][1] += parse_character_reference('>', true)
4931                         break
4932                         case '>':
4933                                 tok_state = tok_state_data
4934                                 tmp = tok_cur_tag
4935                                 tok_cur_tag = null
4936                                 return tmp
4937                         break
4938                         case "\u0000":
4939                                 tok_cur_tag.attrs_a[0][1] += "\ufffd"
4940                         break
4941                         case '': // EOF
4942                                 parse_error()
4943                                 tok_state = tok_state_data
4944                         break
4945                         default:
4946                                 // Parse Error if ', <, = or ` (backtick)
4947                                 tok_cur_tag.attrs_a[0][1] += c
4948                 }
4949                 return null
4950         }
4951
4952         // 8.2.4.42 http://www.w3.org/TR/html5/syntax.html#after-attribute-value-(quoted)-state
4953         tok_state_after_attribute_value_quoted = function () {
4954                 var c, tmp
4955                 switch (c = txt.charAt(cur++)) {
4956                         case "\t":
4957                         case "\n":
4958                         case "\u000c":
4959                         case ' ':
4960                                 tok_state = tok_state_before_attribute_name
4961                         break
4962                         case '/':
4963                                 tok_state = tok_state_self_closing_start_tag
4964                         break
4965                         case '>':
4966                                 tok_state = tok_state_data
4967                                 tmp = tok_cur_tag
4968                                 tok_cur_tag = null
4969                                 return tmp
4970                         break
4971                         case '': // EOF
4972                                 parse_error()
4973                                 tok_state = tok_state_data
4974                         break
4975                         default:
4976                                 // Parse Error
4977                                 tok_state = tok_state_before_attribute_name
4978                                 cur -= 1 // we didn't handle that char
4979                 }
4980                 return null
4981         }
4982
4983         // 8.2.4.43 http://www.w3.org/TR/html5/syntax.html#self-closing-start-tag-state
4984         tok_state_self_closing_start_tag = function () {
4985                 var c
4986                 c = txt.charAt(cur++)
4987                 if (c === '>') {
4988                         tok_cur_tag.flag('self-closing', true)
4989                         tok_state = tok_state_data
4990                         return tok_cur_tag
4991                 }
4992                 if (c === '') {
4993                         parse_error()
4994                         tok_state = tok_state_data
4995                         cur -= 1 // reconsume
4996                         return
4997                 }
4998                 // Anything else
4999                 parse_error()
5000                 tok_state = tok_state_before_attribute_name
5001                 cur -= 1 // reconsume
5002         }
5003
5004         // 8.2.4.44 http://www.w3.org/TR/html5/syntax.html#bogus-comment-state
5005         // WARNING: put a comment token in tok_cur_tag before setting this state
5006         tok_state_bogus_comment = function () {
5007                 var next_gt, val
5008                 next_gt = txt.indexOf('>', cur)
5009                 if (next_gt === -1) {
5010                         val = txt.substr(cur)
5011                         cur = txt.length
5012                 } else {
5013                         val = txt.substr(cur, next_gt - cur)
5014                         cur = next_gt + 1
5015                 }
5016                 val = val.replace(new RegExp("\u0000", 'g'), "\ufffd")
5017                 tok_cur_tag.text += val
5018                 tok_state = tok_state_data
5019                 return tok_cur_tag
5020         }
5021
5022         // 8.2.4.45 http://www.w3.org/TR/html5/syntax.html#markup-declaration-open-state
5023         tok_state_markup_declaration_open = function () {
5024                 var acn
5025                 if (txt.substr(cur, 2) === '--') {
5026                         cur += 2
5027                         tok_cur_tag = new_comment_token('')
5028                         tok_state = tok_state_comment_start
5029                         return
5030                 }
5031                 if (txt.substr(cur, 7).toLowerCase() === 'doctype') {
5032                         cur += 7
5033                         tok_state = tok_state_doctype
5034                         return
5035                 }
5036                 acn = adjusted_current_node()
5037                 if (acn && acn.namespace !== NS_HTML && txt.substr(cur, 7) === '[CDATA[') {
5038                         cur += 7
5039                         tok_state = tok_state_cdata_section
5040                         return
5041                 }
5042                 // Otherwise
5043                 parse_error()
5044                 tok_cur_tag = new_comment_token('')
5045                 tok_state = tok_state_bogus_comment
5046         }
5047
5048         // 8.2.4.46 http://www.w3.org/TR/html5/syntax.html#comment-start-state
5049         tok_state_comment_start = function () {
5050                 var c
5051                 switch (c = txt.charAt(cur++)) {
5052                         case '-':
5053                                 tok_state = tok_state_comment_start_dash
5054                         break
5055                         case "\u0000":
5056                                 parse_error()
5057                                 tok_state = tok_state_comment
5058                                 return new_character_token("\ufffd")
5059                         break
5060                         case '>':
5061                                 parse_error()
5062                                 tok_state = tok_state_data
5063                                 return tok_cur_tag
5064                         break
5065                         case '': // EOF
5066                                 parse_error()
5067                                 tok_state = tok_state_data
5068                                 cur -= 1 // reconsume
5069                                 return tok_cur_tag
5070                         break
5071                         default:
5072                                 tok_cur_tag.text += c
5073                                 tok_state = tok_state_comment
5074                 }
5075                 return null
5076         }
5077
5078         // 8.2.4.47 http://www.w3.org/TR/html5/syntax.html#comment-start-dash-state
5079         tok_state_comment_start_dash = function () {
5080                 var c
5081                 switch (c = txt.charAt(cur++)) {
5082                         case '-':
5083                                 tok_state = tok_state_comment_end
5084                         break
5085                         case "\u0000":
5086                                 parse_error()
5087                                 tok_cur_tag.text += "-\ufffd"
5088                                 tok_state = tok_state_comment
5089                         break
5090                         case '>':
5091                                 parse_error()
5092                                 tok_state = tok_state_data
5093                                 return tok_cur_tag
5094                         break
5095                         case '': // EOF
5096                                 parse_error()
5097                                 tok_state = tok_state_data
5098                                 cur -= 1 // reconsume
5099                                 return tok_cur_tag
5100                         break
5101                         default:
5102                                 tok_cur_tag.text += "-" + c
5103                                 tok_state = tok_state_comment
5104                 }
5105                 return null
5106         }
5107
5108         // 8.2.4.48 http://www.w3.org/TR/html5/syntax.html#comment-state
5109         tok_state_comment = function () {
5110                 var c
5111                 switch (c = txt.charAt(cur++)) {
5112                         case '-':
5113                                 tok_state = tok_state_comment_end_dash
5114                         break
5115                         case "\u0000":
5116                                 parse_error()
5117                                 tok_cur_tag.text += "\ufffd"
5118                         break
5119                         case '': // EOF
5120                                 parse_error()
5121                                 tok_state = tok_state_data
5122                                 cur -= 1 // reconsume
5123                                 return tok_cur_tag
5124                         break
5125                         default:
5126                                 tok_cur_tag.text += c
5127                 }
5128                 return null
5129         }
5130
5131         // 8.2.4.49 http://www.w3.org/TR/html5/syntax.html#comment-end-dash-state
5132         tok_state_comment_end_dash = function () {
5133                 var c
5134                 switch (c = txt.charAt(cur++)) {
5135                         case '-':
5136                                 tok_state = tok_state_comment_end
5137                         break
5138                         case "\u0000":
5139                                 parse_error()
5140                                 tok_cur_tag.text += "-\ufffd"
5141                                 tok_state = tok_state_comment
5142                         break
5143                         case '': // EOF
5144                                 parse_error()
5145                                 tok_state = tok_state_data
5146                                 cur -= 1 // reconsume
5147                                 return tok_cur_tag
5148                         break
5149                         default:
5150                                 tok_cur_tag.text += "-" + c
5151                                 tok_state = tok_state_comment
5152                 }
5153                 return null
5154         }
5155
5156         // 8.2.4.50 http://www.w3.org/TR/html5/syntax.html#comment-end-state
5157         tok_state_comment_end = function () {
5158                 var c
5159                 switch (c = txt.charAt(cur++)) {
5160                         case '>':
5161                                 tok_state = tok_state_data
5162                                 return tok_cur_tag
5163                         break
5164                         case "\u0000":
5165                                 parse_error()
5166                                 tok_cur_tag.text += "--\ufffd"
5167                                 tok_state = tok_state_comment
5168                         break
5169                         case '!':
5170                                 parse_error()
5171                                 tok_state = tok_state_comment_end_bang
5172                         break
5173                         case '-':
5174                                 parse_error()
5175                                 tok_cur_tag.text += '-'
5176                         break
5177                         case '': // EOF
5178                                 parse_error()
5179                                 tok_state = tok_state_data
5180                                 cur -= 1 // reconsume
5181                                 return tok_cur_tag
5182                         break
5183                         default:
5184                                 parse_error()
5185                                 tok_cur_tag.text += "--" + c
5186                                 tok_state = tok_state_comment
5187                 }
5188                 return null
5189         }
5190
5191         // 8.2.4.51 http://www.w3.org/TR/html5/syntax.html#comment-end-bang-state
5192         tok_state_comment_end_bang = function () {
5193                 var c
5194                 switch (c = txt.charAt(cur++)) {
5195                         case '-':
5196                                 tok_cur_tag.text += "--!" + c
5197                                 tok_state = tok_state_comment_end_dash
5198                         break
5199                         case '>':
5200                                 tok_state = tok_state_data
5201                                 return tok_cur_tag
5202                         break
5203                         case "\u0000":
5204                                 parse_error()
5205                                 tok_cur_tag.text += "--!\ufffd"
5206                                 tok_state = tok_state_comment
5207                         break
5208                         case '': // EOF
5209                                 parse_error()
5210                                 tok_state = tok_state_data
5211                                 cur -= 1 // reconsume
5212                                 return tok_cur_tag
5213                         break
5214                         default:
5215                                 tok_cur_tag.text += "--!" + c
5216                                 tok_state = tok_state_comment
5217                 }
5218                 return null
5219         }
5220
5221         // 8.2.4.52 http://www.w3.org/TR/html5/syntax.html#doctype-state
5222         tok_state_doctype = function () {
5223                 var c, el
5224                 switch (c = txt.charAt(cur++)) {
5225                         case "\t":
5226                         case "\u000a":
5227                         case "\u000c":
5228                         case ' ':
5229                                 tok_state = tok_state_before_doctype_name
5230                         break
5231                         case '': // EOF
5232                                 parse_error()
5233                                 tok_state = tok_state_data
5234                                 el = new_doctype_token('')
5235                                 el.flag('force-quirks', true)
5236                                 cur -= 1 // reconsume
5237                                 return el
5238                         break
5239                         default:
5240                                 parse_error()
5241                                 tok_state = tok_state_before_doctype_name
5242                                 cur -= 1 // reconsume
5243                 }
5244                 return null
5245         }
5246
5247         // 8.2.4.52 http://www.w3.org/TR/html5/syntax.html#doctype-state
5248         tok_state_before_doctype_name = function () {
5249                 var c, el
5250                 c = txt.charAt(cur++)
5251                 if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ') {
5252                         return
5253                 }
5254                 if (is_uc_alpha(c)) {
5255                         tok_cur_tag = new_doctype_token(c.toLowerCase())
5256                         tok_state = tok_state_doctype_name
5257                         return
5258                 }
5259                 if (c === "\u0000") {
5260                         parse_error()
5261                         tok_cur_tag = new_doctype_token("\ufffd")
5262                         tok_state = tok_state_doctype_name
5263                         return
5264                 }
5265                 if (c === '>') {
5266                         parse_error()
5267                         el = new_doctype_token('')
5268                         el.flag('force-quirks', true)
5269                         tok_state = tok_state_data
5270                         return el
5271                 }
5272                 if (c === '') { // EOF
5273                         parse_error()
5274                         tok_state = tok_state_data
5275                         el = new_doctype_token('')
5276                         el.flag('force-quirks', true)
5277                         cur -= 1 // reconsume
5278                         return el
5279                 }
5280                 // Anything else
5281                 tok_cur_tag = new_doctype_token(c)
5282                 tok_state = tok_state_doctype_name
5283                 return null
5284         }
5285
5286         // 8.2.4.54 http://www.w3.org/TR/html5/syntax.html#doctype-name-state
5287         tok_state_doctype_name = function () {
5288                 var c
5289                 c = txt.charAt(cur++)
5290                 if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ') {
5291                         tok_state = tok_state_after_doctype_name
5292                         return
5293                 }
5294                 if (c === '>') {
5295                         tok_state = tok_state_data
5296                         return tok_cur_tag
5297                 }
5298                 if (is_uc_alpha(c)) {
5299                         tok_cur_tag.name += c.toLowerCase()
5300                         return
5301                 }
5302                 if (c === "\u0000") {
5303                         parse_error()
5304                         tok_cur_tag.name += "\ufffd"
5305                         return
5306                 }
5307                 if (c === '') { // EOF
5308                         parse_error()
5309                         tok_state = tok_state_data
5310                         tok_cur_tag.flag('force-quirks', true)
5311                         cur -= 1 // reconsume
5312                         return tok_cur_tag
5313                 }
5314                 // Anything else
5315                 tok_cur_tag.name += c
5316                 return null
5317         }
5318
5319         // 8.2.4.55 http://www.w3.org/TR/html5/syntax.html#after-doctype-name-state
5320         tok_state_after_doctype_name = function () {
5321                 var c
5322                 c = txt.charAt(cur++)
5323                 if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ') {
5324                         return
5325                 }
5326                 if (c === '>') {
5327                         tok_state = tok_state_data
5328                         return tok_cur_tag
5329                 }
5330                 if (c === '') { // EOF
5331                         parse_error()
5332                         tok_state = tok_state_data
5333                         tok_cur_tag.flag('force-quirks', true)
5334                         cur -= 1 // reconsume
5335                         return tok_cur_tag
5336                 }
5337                 // Anything else
5338                 if (txt.substr(cur - 1, 6).toLowerCase() === 'public') {
5339                         cur += 5
5340                         tok_state = tok_state_after_doctype_public_keyword
5341                         return
5342                 }
5343                 if (txt.substr(cur - 1, 6).toLowerCase() === 'system') {
5344                         cur += 5
5345                         tok_state = tok_state_after_doctype_system_keyword
5346                         return
5347                 }
5348                 parse_error()
5349                 tok_cur_tag.flag('force-quirks', true)
5350                 tok_state = tok_state_bogus_doctype
5351                 return null
5352         }
5353
5354         // 8.2.4.56 http://www.w3.org/TR/html5/syntax.html#after-doctype-public-keyword-state
5355         tok_state_after_doctype_public_keyword = function () {
5356                 var c
5357                 c = txt.charAt(cur++)
5358                 if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ') {
5359                         tok_state = tok_state_before_doctype_public_identifier
5360                         return
5361                 }
5362                 if (c === '"') {
5363                         parse_error()
5364                         tok_cur_tag.public_identifier = ''
5365                         tok_state = tok_state_doctype_public_identifier_double_quoted
5366                         return
5367                 }
5368                 if (c === "'") {
5369                         parse_error()
5370                         tok_cur_tag.public_identifier = ''
5371                         tok_state = tok_state_doctype_public_identifier_single_quoted
5372                         return
5373                 }
5374                 if (c === '>') {
5375                         parse_error()
5376                         tok_cur_tag.flag('force-quirks', true)
5377                         tok_state = tok_state_data
5378                         return tok_cur_tag
5379                 }
5380                 if (c === '') { // EOF
5381                         parse_error()
5382                         tok_state = tok_state_data
5383                         tok_cur_tag.flag('force-quirks', true)
5384                         cur -= 1 // reconsume
5385                         return tok_cur_tag
5386                 }
5387                 // Anything else
5388                 parse_error()
5389                 tok_cur_tag.flag('force-quirks', true)
5390                 tok_state = tok_state_bogus_doctype
5391                 return null
5392         }
5393
5394         // 8.2.4.57 http://www.w3.org/TR/html5/syntax.html#before-doctype-public-identifier-state
5395         tok_state_before_doctype_public_identifier = function () {
5396                 var c
5397                 c = txt.charAt(cur++)
5398                 if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ') {
5399                         return
5400                 }
5401                 if (c === '"') {
5402                         parse_error()
5403                         tok_cur_tag.public_identifier = ''
5404                         tok_state = tok_state_doctype_public_identifier_double_quoted
5405                         return
5406                 }
5407                 if (c === "'") {
5408                         parse_error()
5409                         tok_cur_tag.public_identifier = ''
5410                         tok_state = tok_state_doctype_public_identifier_single_quoted
5411                         return
5412                 }
5413                 if (c === '>') {
5414                         parse_error()
5415                         tok_cur_tag.flag('force-quirks', true)
5416                         tok_state = tok_state_data
5417                         return tok_cur_tag
5418                 }
5419                 if (c === '') { // EOF
5420                         parse_error()
5421                         tok_state = tok_state_data
5422                         tok_cur_tag.flag('force-quirks', true)
5423                         cur -= 1 // reconsume
5424                         return tok_cur_tag
5425                 }
5426                 // Anything else
5427                 parse_error()
5428                 tok_cur_tag.flag('force-quirks', true)
5429                 tok_state = tok_state_bogus_doctype
5430                 return null
5431         }
5432
5433
5434         // 8.2.4.58 http://www.w3.org/TR/html5/syntax.html#doctype-public-identifier-(double-quoted)-state
5435         tok_state_doctype_public_identifier_double_quoted = function () {
5436                 var c
5437                 c = txt.charAt(cur++)
5438                 if (c === '"') {
5439                         tok_state = tok_state_after_doctype_public_identifier
5440                         return
5441                 }
5442                 if (c === "\u0000") {
5443                         parse_error()
5444                         tok_cur_tag.public_identifier += "\ufffd"
5445                         return
5446                 }
5447                 if (c === '>') {
5448                         parse_error()
5449                         tok_cur_tag.flag('force-quirks', true)
5450                         tok_state = tok_state_data
5451                         return tok_cur_tag
5452                 }
5453                 if (c === '') { // EOF
5454                         parse_error()
5455                         tok_state = tok_state_data
5456                         tok_cur_tag.flag('force-quirks', true)
5457                         cur -= 1 // reconsume
5458                         return tok_cur_tag
5459                 }
5460                 // Anything else
5461                 tok_cur_tag.public_identifier += c
5462                 return null
5463         }
5464
5465         // 8.2.4.59 http://www.w3.org/TR/html5/syntax.html#doctype-public-identifier-(single-quoted)-state
5466         tok_state_doctype_public_identifier_single_quoted = function () {
5467                 var c
5468                 c = txt.charAt(cur++)
5469                 if (c === "'") {
5470                         tok_state = tok_state_after_doctype_public_identifier
5471                         return
5472                 }
5473                 if (c === "\u0000") {
5474                         parse_error()
5475                         tok_cur_tag.public_identifier += "\ufffd"
5476                         return
5477                 }
5478                 if (c === '>') {
5479                         parse_error()
5480                         tok_cur_tag.flag('force-quirks', true)
5481                         tok_state = tok_state_data
5482                         return tok_cur_tag
5483                 }
5484                 if (c === '') { // EOF
5485                         parse_error()
5486                         tok_state = tok_state_data
5487                         tok_cur_tag.flag('force-quirks', true)
5488                         cur -= 1 // reconsume
5489                         return tok_cur_tag
5490                 }
5491                 // Anything else
5492                 tok_cur_tag.public_identifier += c
5493                 return null
5494         }
5495
5496         // 8.2.4.60 http://www.w3.org/TR/html5/syntax.html#after-doctype-public-identifier-state
5497         tok_state_after_doctype_public_identifier = function () {
5498                 var c
5499                 c = txt.charAt(cur++)
5500                 if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ') {
5501                         tok_state = tok_state_between_doctype_public_and_system_identifiers
5502                         return
5503                 }
5504                 if (c === '>') {
5505                         tok_state = tok_state_data
5506                         return tok_cur_tag
5507                 }
5508                 if (c === '"') {
5509                         parse_error()
5510                         tok_cur_tag.system_identifier = ''
5511                         tok_state = tok_state_doctype_system_identifier_double_quoted
5512                         return
5513                 }
5514                 if (c === "'") {
5515                         parse_error()
5516                         tok_cur_tag.system_identifier = ''
5517                         tok_state = tok_state_doctype_system_identifier_single_quoted
5518                         return
5519                 }
5520                 if (c === '') { // EOF
5521                         parse_error()
5522                         tok_state = tok_state_data
5523                         tok_cur_tag.flag('force-quirks', true)
5524                         cur -= 1 // reconsume
5525                         return tok_cur_tag
5526                 }
5527                 // Anything else
5528                 parse_error()
5529                 tok_cur_tag.flag('force-quirks', true)
5530                 tok_state = tok_state_bogus_doctype
5531                 return null
5532         }
5533
5534         // 8.2.4.61 http://www.w3.org/TR/html5/syntax.html#between-doctype-public-&&-system-identifiers-state
5535         tok_state_between_doctype_public_and_system_identifiers = function () {
5536                 var c
5537                 c = txt.charAt(cur++)
5538                 if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ') {
5539                         return
5540                 }
5541                 if (c === '>') {
5542                         tok_state = tok_state_data
5543                         return tok_cur_tag
5544                 }
5545                 if (c === '"') {
5546                         parse_error()
5547                         tok_cur_tag.system_identifier = ''
5548                         tok_state = tok_state_doctype_system_identifier_double_quoted
5549                         return
5550                 }
5551                 if (c === "'") {
5552                         parse_error()
5553                         tok_cur_tag.system_identifier = ''
5554                         tok_state = tok_state_doctype_system_identifier_single_quoted
5555                         return
5556                 }
5557                 if (c === '') { // EOF
5558                         parse_error()
5559                         tok_state = tok_state_data
5560                         tok_cur_tag.flag('force-quirks', true)
5561                         cur -= 1 // reconsume
5562                         return tok_cur_tag
5563                 }
5564                 // Anything else
5565                 parse_error()
5566                 tok_cur_tag.flag('force-quirks', true)
5567                 tok_state = tok_state_bogus_doctype
5568                 return null
5569         }
5570
5571         // 8.2.4.62 http://www.w3.org/TR/html5/syntax.html#after-doctype-system-keyword-state
5572         tok_state_after_doctype_system_keyword = function () {
5573                 var c
5574                 c = txt.charAt(cur++)
5575                 if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ') {
5576                         tok_state = tok_state_before_doctype_system_identifier
5577                         return
5578                 }
5579                 if (c === '"') {
5580                         parse_error()
5581                         tok_cur_tag.system_identifier = ''
5582                         tok_state = tok_state_doctype_system_identifier_double_quoted
5583                         return
5584                 }
5585                 if (c === "'") {
5586                         parse_error()
5587                         tok_cur_tag.system_identifier = ''
5588                         tok_state = tok_state_doctype_system_identifier_single_quoted
5589                         return
5590                 }
5591                 if (c === '>') {
5592                         parse_error()
5593                         tok_cur_tag.flag('force-quirks', true)
5594                         tok_state = tok_state_data
5595                         return tok_cur_tag
5596                 }
5597                 if (c === '') { // EOF
5598                         parse_error()
5599                         tok_state = tok_state_data
5600                         tok_cur_tag.flag('force-quirks', true)
5601                         cur -= 1 // reconsume
5602                         return tok_cur_tag
5603                 }
5604                 // Anything else
5605                 parse_error()
5606                 tok_cur_tag.flag('force-quirks', true)
5607                 tok_state = tok_state_bogus_doctype
5608                 return null
5609         }
5610
5611         // 8.2.4.63 http://www.w3.org/TR/html5/syntax.html#before-doctype-system-identifier-state
5612         tok_state_before_doctype_system_identifier = function () {
5613                 var c
5614                 c = txt.charAt(cur++)
5615                 if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ') {
5616                         return
5617                 }
5618                 if (c === '"') {
5619                         tok_cur_tag.system_identifier = ''
5620                         tok_state = tok_state_doctype_system_identifier_double_quoted
5621                         return
5622                 }
5623                 if (c === "'") {
5624                         tok_cur_tag.system_identifier = ''
5625                         tok_state = tok_state_doctype_system_identifier_single_quoted
5626                         return
5627                 }
5628                 if (c === '>') {
5629                         parse_error()
5630                         tok_cur_tag.flag('force-quirks', true)
5631                         tok_state = tok_state_data
5632                         return tok_cur_tag
5633                 }
5634                 if (c === '') { // EOF
5635                         parse_error()
5636                         tok_state = tok_state_data
5637                         tok_cur_tag.flag('force-quirks', true)
5638                         cur -= 1 // reconsume
5639                         return tok_cur_tag
5640                 }
5641                 // Anything else
5642                 parse_error()
5643                 tok_cur_tag.flag('force-quirks', true)
5644                 tok_state = tok_state_bogus_doctype
5645                 return null
5646         }
5647
5648         // 8.2.4.64 http://www.w3.org/TR/html5/syntax.html#doctype-system-identifier-(double-quoted)-state
5649         tok_state_doctype_system_identifier_double_quoted = function () {
5650                 var c
5651                 c = txt.charAt(cur++)
5652                 if (c === '"') {
5653                         tok_state = tok_state_after_doctype_system_identifier
5654                         return
5655                 }
5656                 if (c === "\u0000") {
5657                         parse_error()
5658                         tok_cur_tag.system_identifier += "\ufffd"
5659                         return
5660                 }
5661                 if (c === '>') {
5662                         parse_error()
5663                         tok_cur_tag.flag('force-quirks', true)
5664                         tok_state = tok_state_data
5665                         return tok_cur_tag
5666                 }
5667                 if (c === '') { // EOF
5668                         parse_error()
5669                         tok_state = tok_state_data
5670                         tok_cur_tag.flag('force-quirks', true)
5671                         cur -= 1 // reconsume
5672                         return tok_cur_tag
5673                 }
5674                 // Anything else
5675                 tok_cur_tag.system_identifier += c
5676                 return null
5677         }
5678
5679         // 8.2.4.65 http://www.w3.org/TR/html5/syntax.html#doctype-system-identifier-(single-quoted)-state
5680         tok_state_doctype_system_identifier_single_quoted = function () {
5681                 var c
5682                 c = txt.charAt(cur++)
5683                 if (c === "'") {
5684                         tok_state = tok_state_after_doctype_system_identifier
5685                         return
5686                 }
5687                 if (c === "\u0000") {
5688                         parse_error()
5689                         tok_cur_tag.system_identifier += "\ufffd"
5690                         return
5691                 }
5692                 if (c === '>') {
5693                         parse_error()
5694                         tok_cur_tag.flag('force-quirks', true)
5695                         tok_state = tok_state_data
5696                         return tok_cur_tag
5697                 }
5698                 if (c === '') { // EOF
5699                         parse_error()
5700                         tok_state = tok_state_data
5701                         tok_cur_tag.flag('force-quirks', true)
5702                         cur -= 1 // reconsume
5703                         return tok_cur_tag
5704                 }
5705                 // Anything else
5706                 tok_cur_tag.system_identifier += c
5707                 return null
5708         }
5709
5710         // 8.2.4.66 http://www.w3.org/TR/html5/syntax.html#after-doctype-system-identifier-state
5711         tok_state_after_doctype_system_identifier = function () {
5712                 var c
5713                 c = txt.charAt(cur++)
5714                 if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ') {
5715                         return
5716                 }
5717                 if (c === '>') {
5718                         tok_state = tok_state_data
5719                         return tok_cur_tag
5720                 }
5721                 if (c === '') { // EOF
5722                         parse_error()
5723                         tok_state = tok_state_data
5724                         tok_cur_tag.flag('force-quirks', true)
5725                         cur -= 1 // reconsume
5726                         return tok_cur_tag
5727                 }
5728                 // Anything else
5729                 parse_error()
5730                 // do _not_ tok_cur_tag.flag 'force-quirks', true
5731                 tok_state = tok_state_bogus_doctype
5732                 return null
5733         }
5734
5735         // 8.2.4.67 http://www.w3.org/TR/html5/syntax.html#bogus-doctype-state
5736         tok_state_bogus_doctype = function () {
5737                 var c
5738                 c = txt.charAt(cur++)
5739                 if (c === '>') {
5740                         tok_state = tok_state_data
5741                         return tok_cur_tag
5742                 }
5743                 if (c === '') { // EOF
5744                         tok_state = tok_state_data
5745                         cur -= 1 // reconsume
5746                         return tok_cur_tag
5747                 }
5748                 // Anything else
5749                 return null
5750         }
5751
5752         // 8.2.4.68 http://www.w3.org/TR/html5/syntax.html#cdata-section-state
5753         tok_state_cdata_section = function () {
5754                 var next_gt, val
5755                 tok_state = tok_state_data
5756                 next_gt = txt.indexOf(']]>', cur)
5757                 if (next_gt === -1) {
5758                         val = txt.substr(cur)
5759                         cur = txt.length
5760                 } else {
5761                         val = txt.substr(cur, next_gt - cur)
5762                         cur = next_gt + 3
5763                 }
5764                 val = val.replace(new RegExp("\u0000", 'g'), "\ufffd")
5765                 if (val.length > 0) {
5766                         return new_character_token(val) // fixfull split
5767                 }
5768                 return null
5769         }
5770
5771         // 8.2.4.69 http://www.w3.org/TR/html5/syntax.html#consume-a-character-reference
5772         // Don't set this as a state, just call it
5773         // returns a string (NOT a text node)
5774         parse_character_reference = function (allowed_char, in_attr) {
5775                 var base, c, charset, code_point, decoded, i, max, start
5776                 if (allowed_char == null) {
5777                         allowed_char = null
5778                 }
5779                 if (in_attr == null) {
5780                         in_attr = false
5781                 }
5782                 if (cur >= txt.length) {
5783                         return '&'
5784                 }
5785                 switch (c = txt.charAt(cur)) {
5786                         case "\t":
5787                         case "\n":
5788                         case "\u000c":
5789                         case ' ':
5790                         case '<':
5791                         case '&':
5792                         case '':
5793                         case allowed_char:
5794                                 // explicitly not a parse error
5795                                 return '&'
5796                         break
5797                         case ';':
5798                                 // there has to be "one or more" alnums between & and ; to be a parse error
5799                                 return '&'
5800                         break
5801                         case '#':
5802                                 if (cur + 1 >= txt.length) {
5803                                         return '&'
5804                                 }
5805                                 if (txt.charAt(cur + 1).toLowerCase() === 'x') {
5806                                         base = 16
5807                                         charset = hex_chars
5808                                         start = cur + 2
5809                                 } else {
5810                                         charset = digits
5811                                         start = cur + 1
5812                                         base = 10
5813                                 }
5814                                 i = 0
5815                                 while (start + i < txt.length && charset.indexOf(txt.charAt(start + i)) > -1) {
5816                                         i += 1
5817                                 }
5818                                 if (i === 0) {
5819                                         return '&'
5820                                 }
5821                                 cur = start + i
5822                                 if (txt.charAt(start + i) === ';') {
5823                                         cur += 1
5824                                 } else {
5825                                         parse_error()
5826                                 }
5827                                 code_point = txt.substr(start, i)
5828                                 while (code_point.charAt(0) === '0' && code_point.length > 1) {
5829                                         code_point = code_point.substr(1)
5830                                 }
5831                                 code_point = parseInt(code_point, base)
5832                                 if (unicode_fixes[code_point] != null) {
5833                                         parse_error()
5834                                         return unicode_fixes[code_point]
5835                                 } else {
5836                                         if ((code_point >= 0xd800 && code_point <= 0xdfff) || code_point > 0x10ffff) {
5837                                                 parse_error()
5838                                                 return "\ufffd"
5839                                         } else {
5840                                                 if ((code_point >= 0x0001 && code_point <= 0x0008) || (code_point >= 0x000D && code_point <= 0x001F) || (code_point >= 0x007F && code_point <= 0x009F) || (code_point >= 0xFDD0 && code_point <= 0xFDEF) || code_point === 0x000B || code_point === 0xFFFE || code_point === 0xFFFF || code_point === 0x1FFFE || code_point === 0x1FFFF || code_point === 0x2FFFE || code_point === 0x2FFFF || code_point === 0x3FFFE || code_point === 0x3FFFF || code_point === 0x4FFFE || code_point === 0x4FFFF || code_point === 0x5FFFE || code_point === 0x5FFFF || code_point === 0x6FFFE || code_point === 0x6FFFF || code_point === 0x7FFFE || code_point === 0x7FFFF || code_point === 0x8FFFE || code_point === 0x8FFFF || code_point === 0x9FFFE || code_point === 0x9FFFF || code_point === 0xAFFFE || code_point === 0xAFFFF || code_point === 0xBFFFE || code_point === 0xBFFFF || code_point === 0xCFFFE || code_point === 0xCFFFF || code_point === 0xDFFFE || code_point === 0xDFFFF || code_point === 0xEFFFE || code_point === 0xEFFFF || code_point === 0xFFFFE || code_point === 0xFFFFF || code_point === 0x10FFFE || code_point === 0x10FFFF) {
5841                                                         parse_error()
5842                                                 }
5843                                                 return from_code_point(code_point)
5844                                         }
5845                                 }
5846                                 return
5847                         break
5848                         default:
5849                                 for (i = 0; i < 31; ++i) {
5850                                         if (alnum.indexOf(txt.charAt(cur + i)) === -1) {
5851                                                 break
5852                                         }
5853                                 }
5854                                 if (i === 0) {
5855                                         // exit early, because parse_error() below needs at least one alnum
5856                                         return '&'
5857                                 }
5858                                 if (txt.charAt(cur + i) === ';') {
5859                                         decoded = decode_named_char_ref(txt.substr(cur, i))
5860                                         i += 1 // scan past the ';' (after, so we dno't pass it to decode)
5861                                         if (decoded != null) {
5862                                                 cur += i
5863                                                 return decoded
5864                                         }
5865                                         // else FALL THROUGH (check for match without last char(s) or ";")
5866                                 }
5867                                 // no ';' terminator (only legacy char refs)
5868                                 max = i
5869                                 for (i = 2; i <= max; ++i) { // no prefix matches, so ok to check shortest first
5870                                         c = legacy_char_refs[txt.substr(cur, i)]
5871                                         if (c != null) {
5872                                                 if (in_attr) {
5873                                                         if (txt.charAt(cur + i) === '=') {
5874                                                                 // "because some legacy user agents will
5875                                                                 // misinterpret the markup in those cases"
5876                                                                 parse_error()
5877                                                                 return '&'
5878                                                         }
5879                                                         if (alnum.indexOf(txt.charAt(cur + i)) > -1) {
5880                                                                 // this makes attributes forgiving about url args
5881                                                                 return '&'
5882                                                         }
5883                                                 }
5884                                                 // ok, and besides the weird exceptions for attributes...
5885                                                 // return the matching char
5886                                                 cur += i // consume entity chars
5887                                                 parse_error() // because no terminating ";"
5888                                                 return c
5889                                         }
5890                                 }
5891                                 parse_error()
5892                                 return '&'
5893                 }
5894                 // never reached
5895         }
5896
5897         eat_next_token_if_newline = function () {
5898                 var old_cur, t
5899                 old_cur = cur
5900                 t = null
5901                 while (t == null) {
5902                         t = tok_state()
5903                 }
5904                 if (t.type === TYPE_TEXT) {
5905                         // definition of a newline depends on whether it was a character ref or not
5906                         if (cur - old_cur === 1) {
5907                                 // not a character reference
5908                                 if (t.text === "\u000d" || t.text === "\u000a") {
5909                                         return
5910                                 }
5911                         } else {
5912                                 if (t.text === "\u000a") {
5913                                         return
5914                                 }
5915                         }
5916                 }
5917                 // not a "newline"
5918                 cur = old_cur
5919         }
5920
5921         // tree constructor initialization
5922         // see comments on TYPE_TAG/etc for the structure of this data
5923         txt = args_html
5924         cur = 0
5925         doc = new Node(TYPE_TAG, {name: 'document', namespace: NS_HTML})
5926         doc.flag('quirks mode', QUIRKS_NO) // TODO bugreport spec for not specifying this
5927         fragment_root = null // fragment parsing algorithm returns children of this
5928         open_els = []
5929         afe = [] // active formatting elements
5930         template_ins_modes = []
5931         ins_mode = ins_mode_initial
5932         original_ins_mode = ins_mode // TODO check spec
5933         flag_scripting = args.scripting != null ? args.scripting : true // TODO might need an extra flag to get <noscript> to parse correctly
5934         flag_frameset_ok = true
5935         flag_parsing = true
5936         flag_foster_parenting = false
5937         form_element_pointer = null
5938         temporary_buffer = null
5939         pending_table_character_tokens = []
5940         head_element_pointer = null
5941         flag_fragment_parsing = false
5942         context_element = null
5943         prev_node_id = 0 // just for debugging
5944
5945         // tokenizer initialization
5946         tok_state = tok_state_data
5947
5948         parse_init = function () {
5949                 var el, f, ns, old_doc, t
5950                 // fragment parsing (text arg)
5951                 if (args.fragment != null) {
5952                         // this handles the fragment from the tests in the format described here:
5953                         // https://github.com/html5lib/html5lib-tests/blob/master/tree-construction/README.md
5954                         f = args.fragment
5955                         ns = NS_HTML
5956                         if (f.substr(0, 5) === 'math ') {
5957                                 f = f.substr(5)
5958                                 ns = NS_MATHML
5959                         } else if (f.substr(0, 4) === 'svg ') {
5960                                 f = f.substr(4)
5961                                 ns = NS_SVG
5962                         }
5963                         t = new_open_tag(f)
5964                         context_element = token_to_element(t, ns)
5965                         context_element.document = new Node(TYPE_TAG, {name: 'document', namespace: NS_HTML})
5966                         context_element.document.flag('quirks mode', QUIRKS_NO)
5967                 }
5968                 // fragment parsing (Node arg)
5969                 if (args.context != null) {
5970                         context_element = args.context
5971                 }
5972
5973                 // http://www.w3.org/TR/html5/syntax.html#parsing-html-fragments
5974                 // fragment parsing algorithm
5975                 if (context_element != null) {
5976                         flag_fragment_parsing = true
5977                         doc = new Node(TYPE_TAG, {name: 'html', namespace: NS_HTML})
5978                         // search up the tree from context, to try to find it's document,
5979                         // because this file only puts a "document" property on the root
5980                         // element.
5981                         old_doc = null
5982                         el = context_element
5983                         while (true) {
5984                                 if (el.document != null) {
5985                                         old_doc = el.document
5986                                         break
5987                                 }
5988                                 if (el.parent) {
5989                                         el = el.parent
5990                                 } else {
5991                                         break
5992                                 }
5993                         }
5994                         if (old_doc) {
5995                                 doc.flag('quirks mode', old_doc.flag('quirks mode'))
5996                         }
5997                         // set tok_state
5998                         if (context_element.namespace === NS_HTML) {
5999                                 switch (context_element.name) {
6000                                         case 'title':
6001                                         case 'textarea':
6002                                                 tok_state = tok_state_rcdata
6003                                         break
6004                                         case 'style':
6005                                         case 'xmp':
6006                                         case 'iframe':
6007                                         case 'noembed':
6008                                         case 'noframes':
6009                                                 tok_state = tok_state_rawtext
6010                                         break
6011                                         case 'script':
6012                                                 tok_state = tok_state_script_data
6013                                         break
6014                                         case 'noscript':
6015                                                 if (flag_scripting) {
6016                                                         tok_state = tok_state_rawtext
6017                                                 }
6018                                         break
6019                                         case 'plaintext':
6020                                                 tok_state = tok_state_plaintext
6021                                 }
6022                         }
6023                         fragment_root = new Node(TYPE_TAG, {name: 'html', namespace: NS_HTML})
6024                         doc.children.push(fragment_root)
6025                         fragment_root.document = doc
6026                         open_els = [fragment_root]
6027                         if (context_element.name === 'template' && context_element.namespace === NS_HTML) {
6028                                 template_ins_modes.unshift(ins_mode_in_template)
6029                         }
6030                         // fixfull create token for context (it should have it's original one already)
6031                         reset_ins_mode()
6032                         // set form_element pointer... in the foreign doc?!
6033                         el = context_element
6034                         while (true) {
6035                                 if (el.name === 'form' && el.namespace === NS_HTML) {
6036                                         form_element_pointer = el
6037                                         break
6038                                 }
6039                                 if (el.parent) {
6040                                         el = el.parent
6041                                 } else {
6042                                         break
6043                                 }
6044                         }
6045                 }
6046
6047                 // text pre-processing
6048                 // FIXME check http://www.w3.org/TR/html5/syntax.html#preprocessing-the-input-stream
6049                 txt = txt.replace(new RegExp("\r\n", 'g'), "\n") // fixfull spec doesn't say this
6050                 txt = txt.replace(new RegExp("\r", 'g'), "\n") // fixfull spec doesn't say this
6051         }
6052
6053         // http://www.w3.org/TR/html5/syntax.html#tree-construction
6054         parse_main_loop = function () {
6055                 var t
6056                 while (flag_parsing) {
6057                         t = tok_state()
6058                         if (t != null) {
6059                                 process_token(t)
6060                                 // fixfull parse error if has self-closing flag, but it wasn't acknolwedged
6061                         }
6062                 }
6063         }
6064         parse_init()
6065         parse_main_loop()
6066
6067         if (flag_fragment_parsing) {
6068                 return fragment_root.children
6069         }
6070         return doc.children
6071 }
6072
6073 var this_module = {
6074         parse: parse_html,
6075         Node: Node,
6076 }
6077
6078 if (context === 'module') {
6079         module.exports = this_module
6080 } else {
6081         window.peach_parser = this_module
6082 }
6083
6084 }).call(this)