JasonWoof Got questions, comments, patches, etc.? Contact Jason Woofenden
preserve <script> tags and whitespace in plaintext elements
[peach-html5-editor.git] / parser.js
1 // todo remove unused variables
2 // todo remove debug log, or make a way to access it
3
4 // Copyright 2015 Jason Woofenden
5 // This file implements an HTML5 parser
6 //
7 // This program is free software: you can redistribute it and/or modify it under
8 // the terms of the GNU Affero General Public License as published by the Free
9 // Software Foundation, either version 3 of the License, or (at your option) any
10 // later version.
11 //
12 // This program is distributed in the hope that it will be useful, but WITHOUT
13 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 // FOR A PARTICULAR PURPOSE.  See the GNU Affero General Public License for more
15 // details.
16 //
17 // You should have received a copy of the GNU Affero General Public License
18 // along with this program.  If not, see <http://www.gnu.org/licenses/>.
19
20
21 // This file implements a thorough parser for html5, meant to be used by a
22 // WYSIWYG editor.
23
24 // The implementation is a pretty direct implementation of the parsing algorithm
25 // described here:
26 //
27 //     http://www.w3.org/TR/html5/syntax.html
28 //
29 // except for some places marked "WHATWG" that are implemented as described here:
30 //
31 //     https://html.spec.whatwg.org/multipage/syntax.html
32 //
33 // This code passes all of the tests in the .dat files at:
34 //
35 //     https://github.com/JasonWoof/html5lib-tests/tree/patch-1/tree-construction
36
37
38 //////////////////////////
39 // how to use this code //
40 //////////////////////////
41 //
42 // See README.md for how to run this file in the browser or in node.js.
43 //
44 // This file exports a single useful function: parse, and some constants
45 // (see the bottom of this file for those.)
46 //
47 // Call it like this:
48 //
49 //     peach_parser.parse("<p><b>hi</p>")
50 //
51 // Or, if you don't want <html><head><body>/etc, do this:
52 //
53 //     peach_parser.parse("<p><b>hi</p>", {fragment: "body"})
54 //
55 // return value is an array of Nodes, A Node contains:
56 //     type: one of: "tag", "text", "comment", "doctype"
57 //     text: contents for text/comment nodes
58 //     attrs: object of attributes, eg {href: "#main"}
59 //     children: array of Nodes
60 //     namespace: one of: "html", "mathml", "svg"
61 //     parent: another Node or null
62
63 // This code is a work in progress, eg try search this file for "fixfull",
64 // "TODO" and "FIXME"
65
66
67 // Notes:  stacks/lists
68 //
69 // Jason was frequently confused by the terminology used to refer to different
70 // parts of the stacks and lists in the spec, so he made this chart to help keep
71 // his head straight:
72 //
73 // stacks grow downward (current element is index=0)
74 //
75 // example: open_els = [a, b, c, d, e, f, g]
76 //
77 // "grows downwards" means it's visualized like this: (index: el "names")
78 //
79 //   6: g "start of the list", "topmost", "first"
80 //   5: f
81 //   4: e "previous" (to d), "above", "before"
82 //   3: d   (previous/next are relative to this element)
83 //   2: c "next", "after", "lower", "below"
84 //   1: b
85 //   0: a "end of the list", "current node", "bottommost", "last"
86 (function () {
87
88 var NS_HTML, NS_MATHML, NS_SVG, QUIRKS_LIMITED, QUIRKS_NO, QUIRKS_YES, TYPE_AAA_BOOKMARK, TYPE_AFE_MARKER, TYPE_COMMENT, TYPE_DOCTYPE, TYPE_END_TAG, TYPE_EOF, TYPE_START_TAG, TYPE_TAG, TYPE_TEXT, _decode_named_char_ref, adjust_foreign_attributes, adjust_mathml_attributes, adjust_svg_attributes, adp_els, alnum, context, debug_log, debug_log_each, debug_log_reset, decode_named_char_ref, decode_named_char_ref_cache, decode_named_char_ref_el, digits, el_is_special, el_is_special_not_adp, end_tag_implied, exports, foreign_attr_fixes, formatting_elements, foster_parenting_targets, from_code_point, g_debug_log, h_tags, hex_chars, is_html_integration, is_input_hidden_tok, is_lc_alpha, is_mathml_text_integration_point, is_space, is_space_tok, is_uc_alpha, lc_alpha, legacy_char_refs, mathml_elements, mathml_text_integration, new_aaa_bookmark, new_afe_marker, new_character_token, new_comment_token, new_doctype_token, new_element, new_end_tag, new_eof_token, new_open_tag, new_text_node, parse_html, prev_node_id, quirks_yes_pi_prefixes, space_chars, special_elements, svg_attribute_fixes, svg_elements, svg_name_fixes, tag_name_chars, uc_alpha, unicode_fixes, whitespace_chars
89
90 if ((typeof module) !== 'undefined' && (module.exports != null)) {
91         context = 'module'
92 } else {
93         context = 'browser'
94         window.peach_parser = {}
95 }
96
97 from_code_point = function (x) {
98         if (String.fromCodePoint != null) {
99                 return String.fromCodePoint(x)
100         } else {
101                 if (x <= 0xffff) {
102                         return String.fromCharCode(x)
103                 }
104                 x -= 0x10000
105                 return String.fromCharCode((x >> 10) + 0xd800, (x % 0x400) + 0xdc00)
106         }
107 }
108
109 // Each node is an obect of the Node class. Here are the Node types:
110 TYPE_TAG = 'tag' // name, {attributes}, [children]
111 TYPE_TEXT = 'text' // "text"
112 TYPE_COMMENT = 'comment'
113 TYPE_DOCTYPE = 'doctype'
114 // the following types are emited by the tokenizer, but shouldn't end up in the tree:
115 TYPE_START_TAG = 4 // name, [attributes ([key,value]...) in reverse order], [children]
116 TYPE_END_TAG = 5 // name
117 TYPE_EOF = 6
118 TYPE_AFE_MARKER = 7 // http://www.w3.org/TR/html5/syntax.html#reconstruct-the-active-formatting-elements
119 TYPE_AAA_BOOKMARK = 8 // http://www.w3.org/TR/html5/syntax.html#adoption-agency-algorithm
120
121 // namespace constants
122 NS_HTML = 'html'
123 NS_MATHML = 'mathml'
124 NS_SVG = 'svg'
125
126 // quirks mode constants
127 QUIRKS_NO = 'no'
128 QUIRKS_LIMITED = 'limited'
129 QUIRKS_YES = 'yes'
130
131 // queue up debug logs, so eg they can be shown only for tests that fail
132 g_debug_log = []
133 debug_log_reset = function () {
134         g_debug_log = []
135 }
136 debug_log = function (str) {
137         g_debug_log.push(str)
138 }
139 debug_log_each = function (cb) {
140         var i
141         for (i = 0; i < g_debug_log.length; ++i) {
142                 cb(g_debug_log[i])
143         }
144 }
145
146 prev_node_id = 0
147 function Node (type, args) {
148         if (args == null) {
149                 args = {}
150         }
151         this.type = type // one of the TYPE_* constants above
152         this.name = args.name != null ? args.name : '' // tag name
153         this.text = args.text != null ? args.text : '' // contents for text/comment nodes
154         this.attrs = args.attrs != null ? args.attrs : {}
155         this.children = args.children != null ? args.children : []
156         this.namespace = args.namespace != null ? args.namespace : NS_HTML
157         this.parent = args.parent != null ? args.parent : null
158         // private:
159         this.attrs_a = args.attr_k != null ? args.attr_k : [] // attrs in progress, TYPE_START_TAG only
160         this.token = args.token != null ? args.token : null
161         this.flags = args.flags != null ? args.flags : {}
162         if (args.id != null) {
163                 this.id = args.id + "+"
164         } else {
165                 this.id = "" + (++prev_node_id)
166         }
167 }
168
169 Node.prototype.acknowledge_self_closing = function () {
170         if (this.token != null) {
171                 this.token.flag('did_self_close', true)
172         } else {
173                 this.flag('did_self_close', true)
174         }
175 }
176
177 Node.prototype.flag = function (key, value) {
178         if (value != null) {
179                 this.flags[key] = value
180         } else {
181                 return this.flags[key]
182         }
183 }
184
185 // helpers: (only take args that are normally known when parser creates nodes)
186 new_open_tag = function (name) {
187         return new Node(TYPE_START_TAG, {name: name})
188 }
189 new_end_tag = function (name) {
190         return new Node(TYPE_END_TAG, {name: name})
191 }
192 new_element = function (name) {
193         return new Node(TYPE_TAG, {name: name})
194 }
195 new_text_node = function (txt) {
196         return new Node(TYPE_TEXT, {text: txt})
197 }
198 new_character_token = new_text_node
199 new_comment_token = function (txt) {
200         return new Node(TYPE_COMMENT, {text: txt})
201 }
202 new_doctype_token = function (name) {
203         return new Node(TYPE_DOCTYPE, {name: name})
204 }
205 new_eof_token = function () {
206         return new Node(TYPE_EOF)
207 }
208 new_afe_marker = function () {
209         return new Node(TYPE_AFE_MARKER)
210 }
211 new_aaa_bookmark = function () {
212         return new Node(TYPE_AAA_BOOKMARK)
213 }
214
215 lc_alpha = "abcdefghijklmnopqrstuvwxyz"
216 uc_alpha = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
217 digits = "0123456789"
218 alnum = lc_alpha + uc_alpha + digits
219 hex_chars = digits + "abcdefABCDEF"
220
221 is_uc_alpha = function (str) {
222         return str.length === 1 && uc_alpha.indexOf(str) > -1
223 }
224 is_lc_alpha = function (str) {
225         return str.length === 1 && lc_alpha.indexOf(str) > -1
226 }
227
228 // some SVG elements have dashes in them
229 tag_name_chars = alnum + "-"
230
231 // http://www.w3.org/TR/html5/infrastructure.html#space-character
232 space_chars = "\u0009\u000a\u000c\u000d\u0020"
233 is_space = function (txt) {
234         return txt.length === 1 && space_chars.indexOf(txt) > -1
235 }
236 is_space_tok = function (t) {
237         return t.type === TYPE_TEXT && t.text.length === 1 && space_chars.indexOf(t.text) > -1
238 }
239
240 is_input_hidden_tok = function (t) {
241         var i, a
242         if (t.type !== TYPE_START_TAG) {
243                 return false
244         }
245         for (i = 0; i < t.attrs_a.length; ++i) {
246                 a = t.attrs_a[i]
247                 if (a[0] === 'type') {
248                         if (a[1].toLowerCase() === 'hidden') {
249                                 return true
250                         }
251                         return false
252                 }
253         }
254         return false
255 }
256
257 // https://en.wikipedia.org/wiki/Whitespace_character#Unicode
258 whitespace_chars = "\u0009\u000a\u000b\u000c\u000d\u0020\u0085\u00a0\u1680\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200a\u2028\u2029\u202f\u205f\u3000"
259
260 unicode_fixes = {}
261 unicode_fixes[0x00] = "\uFFFD"
262 unicode_fixes[0x80] = "\u20AC"
263 unicode_fixes[0x82] = "\u201A"
264 unicode_fixes[0x83] = "\u0192"
265 unicode_fixes[0x84] = "\u201E"
266 unicode_fixes[0x85] = "\u2026"
267 unicode_fixes[0x86] = "\u2020"
268 unicode_fixes[0x87] = "\u2021"
269 unicode_fixes[0x88] = "\u02C6"
270 unicode_fixes[0x89] = "\u2030"
271 unicode_fixes[0x8A] = "\u0160"
272 unicode_fixes[0x8B] = "\u2039"
273 unicode_fixes[0x8C] = "\u0152"
274 unicode_fixes[0x8E] = "\u017D"
275 unicode_fixes[0x91] = "\u2018"
276 unicode_fixes[0x92] = "\u2019"
277 unicode_fixes[0x93] = "\u201C"
278 unicode_fixes[0x94] = "\u201D"
279 unicode_fixes[0x95] = "\u2022"
280 unicode_fixes[0x96] = "\u2013"
281 unicode_fixes[0x97] = "\u2014"
282 unicode_fixes[0x98] = "\u02DC"
283 unicode_fixes[0x99] = "\u2122"
284 unicode_fixes[0x9A] = "\u0161"
285 unicode_fixes[0x9B] = "\u203A"
286 unicode_fixes[0x9C] = "\u0153"
287 unicode_fixes[0x9E] = "\u017E"
288 unicode_fixes[0x9F] = "\u0178"
289
290 quirks_yes_pi_prefixes = [
291         "+//silmaril//dtd html pro v0r11 19970101//",
292         "-//as//dtd html 3.0 aswedit + extensions//",
293         "-//advasoft ltd//dtd html 3.0 aswedit + extensions//",
294         "-//ietf//dtd html 2.0 level 1//",
295         "-//ietf//dtd html 2.0 level 2//",
296         "-//ietf//dtd html 2.0 strict level 1//",
297         "-//ietf//dtd html 2.0 strict level 2//",
298         "-//ietf//dtd html 2.0 strict//",
299         "-//ietf//dtd html 2.0//",
300         "-//ietf//dtd html 2.1e//",
301         "-//ietf//dtd html 3.0//",
302         "-//ietf//dtd html 3.2 final//",
303         "-//ietf//dtd html 3.2//",
304         "-//ietf//dtd html 3//",
305         "-//ietf//dtd html level 0//",
306         "-//ietf//dtd html level 1//",
307         "-//ietf//dtd html level 2//",
308         "-//ietf//dtd html level 3//",
309         "-//ietf//dtd html strict level 0//",
310         "-//ietf//dtd html strict level 1//",
311         "-//ietf//dtd html strict level 2//",
312         "-//ietf//dtd html strict level 3//",
313         "-//ietf//dtd html strict//",
314         "-//ietf//dtd html//",
315         "-//metrius//dtd metrius presentational//",
316         "-//microsoft//dtd internet explorer 2.0 html strict//",
317         "-//microsoft//dtd internet explorer 2.0 html//",
318         "-//microsoft//dtd internet explorer 2.0 tables//",
319         "-//microsoft//dtd internet explorer 3.0 html strict//",
320         "-//microsoft//dtd internet explorer 3.0 html//",
321         "-//microsoft//dtd internet explorer 3.0 tables//",
322         "-//netscape comm. corp.//dtd html//",
323         "-//netscape comm. corp.//dtd strict html//",
324         "-//o'reilly and associates//dtd html 2.0//",
325         "-//o'reilly and associates//dtd html extended 1.0//",
326         "-//o'reilly and associates//dtd html extended relaxed 1.0//",
327         "-//sq//dtd html 2.0 hotmetal + extensions//",
328         "-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//",
329         "-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//",
330         "-//spyglass//dtd html 2.0 extended//",
331         "-//sun microsystems corp.//dtd hotjava html//",
332         "-//sun microsystems corp.//dtd hotjava strict html//",
333         "-//w3c//dtd html 3 1995-03-24//",
334         "-//w3c//dtd html 3.2 draft//",
335         "-//w3c//dtd html 3.2 final//",
336         "-//w3c//dtd html 3.2//",
337         "-//w3c//dtd html 3.2s draft//",
338         "-//w3c//dtd html 4.0 frameset//",
339         "-//w3c//dtd html 4.0 transitional//",
340         "-//w3c//dtd html experimental 19960712//",
341         "-//w3c//dtd html experimental 970421//",
342         "-//w3c//dtd w3 html//",
343         "-//w3o//dtd w3 html 3.0//",
344         "-//webtechs//dtd mozilla html 2.0//",
345         "-//webtechs//dtd mozilla html//",
346 ]
347
348 // These are the character references that don't need a terminating semicolon
349 // min length: 2, max: 6, none are a prefix of any other.
350 legacy_char_refs = {
351         Aacute: 'Á', aacute: 'á', Acirc: 'Â', acirc: 'â', acute: '´', AElig: 'Æ',
352         aelig: 'æ', Agrave: 'À', agrave: 'à', AMP: '&', amp: '&', Aring: 'Å',
353         aring: 'å', Atilde: 'Ã', atilde: 'ã', Auml: 'Ä', auml: 'ä', brvbar: '¦',
354         Ccedil: 'Ç', ccedil: 'ç', cedil: '¸', cent: '¢', COPY: '©', copy: '©',
355         curren: '¤', deg: '°', divide: '÷', Eacute: 'É', eacute: 'é', Ecirc: 'Ê',
356         ecirc: 'ê', Egrave: 'È', egrave: 'è', ETH: 'Ð', eth: 'ð', Euml: 'Ë',
357         euml: 'ë', frac12: '½', frac14: '¼', frac34: '¾', GT: '>', gt: '>',
358         Iacute: 'Í', iacute: 'í', Icirc: 'Î', icirc: 'î', iexcl: '¡', Igrave: 'Ì',
359         igrave: 'ì', iquest: '¿', Iuml: 'Ï', iuml: 'ï', laquo: '«', LT: '<',
360         lt: '<', macr: '¯', micro: 'µ', middot: '·', nbsp: "\u00a0", not: '¬',
361         Ntilde: 'Ñ', ntilde: 'ñ', Oacute: 'Ó', oacute: 'ó', Ocirc: 'Ô', ocirc: 'ô',
362         Ograve: 'Ò', ograve: 'ò', ordf: 'ª', ordm: 'º', Oslash: 'Ø', oslash: 'ø',
363         Otilde: 'Õ', otilde: 'õ', Ouml: 'Ö', ouml: 'ö', para: '¶', plusmn: '±',
364         pound: '£', QUOT: '"', quot: '"', raquo: '»', REG: '®', reg: '®', sect: '§',
365         shy: '­', sup1: '¹', sup2: '²', sup3: '³', szlig: 'ß', THORN: 'Þ', thorn: 'þ',
366         times: '×', Uacute: 'Ú', uacute: 'ú', Ucirc: 'Û', ucirc: 'û', Ugrave: 'Ù',
367         ugrave: 'ù', uml: '¨', Uuml: 'Ü', uuml: 'ü', Yacute: 'Ý', yacute: 'ý',
368         yen: '¥', yuml: 'ÿ'
369 }
370
371 //void_elements = ['area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input', 'keygen', 'link', 'meta', 'param', 'source', 'track', 'wbr']
372 //raw_text_elements = ['script', 'style']
373 //escapable_raw_text_elements = ['textarea', 'title']
374 // http://www.w3.org/TR/SVG/ 1.1 (Second Edition)
375 svg_elements = [
376         'a', 'altGlyph', 'altGlyphDef', 'altGlyphItem', 'animate', 'animateColor',
377         'animateMotion', 'animateTransform', 'circle', 'clipPath', 'color-profile',
378         'cursor', 'defs', 'desc', 'ellipse', 'feBlend', 'feColorMatrix',
379         'feComponentTransfer', 'feComposite', 'feConvolveMatrix',
380         'feDiffuseLighting', 'feDisplacementMap', 'feDistantLight', 'feFlood',
381         'feFuncA', 'feFuncB', 'feFuncG', 'feFuncR', 'feGaussianBlur', 'feImage',
382         'feMerge', 'feMergeNode', 'feMorphology', 'feOffset', 'fePointLight',
383         'feSpecularLighting', 'feSpotLight', 'feTile', 'feTurbulence', 'filter',
384         'font', 'font-face', 'font-face-format', 'font-face-name', 'font-face-src',
385         'font-face-uri', 'foreignObject', 'g', 'glyph', 'glyphRef', 'hkern',
386         'image', 'line', 'linearGradient', 'marker', 'mask', 'metadata',
387         'missing-glyph', 'mpath', 'path', 'pattern', 'polygon', 'polyline',
388         'radialGradient', 'rect', 'script', 'set', 'stop', 'style', 'svg',
389         'switch', 'symbol', 'text', 'textPath', 'title', 'tref', 'tspan', 'use',
390         'view', 'vkern'
391 ]
392
393 // http://www.w3.org/TR/MathML/ Version 3.0 2nd Edition
394 mathml_elements = [
395         'abs', 'and', 'annotation', 'annotation-xml', 'apply', 'approx', 'arccos',
396         'arccosh', 'arccot', 'arccoth', 'arccsc', 'arccsch', 'arcsec', 'arcsech',
397         'arcsin', 'arcsinh', 'arctan', 'arctanh', 'arg', 'bind', 'bvar', 'card',
398         'cartesianproduct', 'cbytes', 'ceiling', 'cerror', 'ci', 'cn', 'codomain',
399         'complexes', 'compose', 'condition', 'conjugate', 'cos', 'cosh', 'cot',
400         'coth', 'cs', 'csc', 'csch', 'csymbol', 'curl', 'declare', 'degree',
401         'determinant', 'diff', 'divergence', 'divide', 'domain',
402         'domainofapplication', 'emptyset', 'eq', 'equivalent', 'eulergamma',
403         'exists', 'exp', 'exponentiale', 'factorial', 'factorof', 'false', 'floor',
404         'fn', 'forall', 'gcd', 'geq', 'grad', 'gt', 'ident', 'image', 'imaginary',
405         'imaginaryi', 'implies', 'in', 'infinity', 'int', 'integers', 'intersect',
406         'interval', 'inverse', 'lambda', 'laplacian', 'lcm', 'leq', 'limit',
407         'list', 'ln', 'log', 'logbase', 'lowlimit', 'lt', 'maction', 'maligngroup',
408         'malignmark', 'math', 'matrix', 'matrixrow', 'max', 'mean', 'median',
409         'menclose', 'merror', 'mfenced', 'mfrac', 'mglyph', 'mi', 'mi', 'min',
410         'minus', 'mlabeledtr', 'mlongdiv', 'mmultiscripts', 'mn', 'mo', 'mode',
411         'moment', 'momentabout', 'mover', 'mpadded', 'mphantom', 'mprescripts',
412         'mroot', 'mrow', 'ms', 'mscarries', 'mscarry', 'msgroup', 'msline',
413         'mspace', 'msqrt', 'msrow', 'mstack', 'mstyle', 'msub', 'msubsup', 'msup',
414         'mtable', 'mtd', 'mtext', 'mtr', 'munder', 'munderover', 'naturalnumbers',
415         'neq', 'none', 'not', 'notanumber', 'notin', 'notprsubset', 'notsubset',
416         'or', 'otherwise', 'outerproduct', 'partialdiff', 'pi', 'piece',
417         'piecewise', 'plus', 'power', 'primes', 'product', 'prsubset', 'quotient',
418         'rationals', 'real', 'reals', 'reln', 'rem', 'root', 'scalarproduct',
419         'sdev', 'sec', 'sech', 'selector', 'semantics', 'sep', 'set', 'setdiff',
420         'share', 'sin', 'sinh', 'span', 'subset', 'sum', 'tan', 'tanh', 'tendsto',
421         'times', 'transpose', 'true', 'union', 'uplimit', 'variance', 'vector',
422         'vectorproduct', 'xor'
423 ]
424 // foreign_elements = [svg_elements..., mathml_elements...]
425 //normal_elements = All other allowed HTML elements are normal elements.
426
427 special_elements = {
428         // HTML:
429         address: NS_HTML, applet: NS_HTML, area: NS_HTML, article: NS_HTML,
430         aside: NS_HTML, base: NS_HTML, basefont: NS_HTML, bgsound: NS_HTML,
431         blockquote: NS_HTML, body: NS_HTML, br: NS_HTML, button: NS_HTML,
432         caption: NS_HTML, center: NS_HTML, col: NS_HTML, colgroup: NS_HTML, dd: NS_HTML,
433         details: NS_HTML, dir: NS_HTML, div: NS_HTML, dl: NS_HTML, dt: NS_HTML,
434         embed: NS_HTML, fieldset: NS_HTML, figcaption: NS_HTML, figure: NS_HTML,
435         footer: NS_HTML, form: NS_HTML, frame: NS_HTML, frameset: NS_HTML, h1: NS_HTML,
436         h2: NS_HTML, h3: NS_HTML, h4: NS_HTML, h5: NS_HTML, h6: NS_HTML, head: NS_HTML,
437         header: NS_HTML, hgroup: NS_HTML, hr: NS_HTML, html: NS_HTML, iframe: NS_HTML,
438         img: NS_HTML, input: NS_HTML, isindex: NS_HTML, li: NS_HTML, link: NS_HTML,
439         listing: NS_HTML, main: NS_HTML, marquee: NS_HTML,
440
441         menu: NS_HTML,menuitem: NS_HTML, // WHATWG adds these
442
443         meta: NS_HTML, nav: NS_HTML, noembed: NS_HTML, noframes: NS_HTML,
444         noscript: NS_HTML, object: NS_HTML, ol: NS_HTML, p: NS_HTML, param: NS_HTML,
445         plaintext: NS_HTML, pre: NS_HTML, script: NS_HTML, section: NS_HTML,
446         select: NS_HTML, source: NS_HTML, style: NS_HTML, summary: NS_HTML,
447         table: NS_HTML, tbody: NS_HTML, td: NS_HTML, template: NS_HTML,
448         textarea: NS_HTML, tfoot: NS_HTML, th: NS_HTML, thead: NS_HTML, title: NS_HTML,
449         tr: NS_HTML, track: NS_HTML, ul: NS_HTML, wbr: NS_HTML, xmp: NS_HTML,
450
451         // MathML: 
452         mi: NS_MATHML, mo: NS_MATHML, mn: NS_MATHML, ms: NS_MATHML, mtext: NS_MATHML,
453         'annotation-xml': NS_MATHML,
454
455         // SVG: 
456         foreignObject: NS_SVG, desc: NS_SVG, title: NS_SVG
457 }
458
459 formatting_elements = {
460         a: true, b: true, big: true, code: true, em: true, font: true, i: true,
461         nobr: true, s: true, small: true, strike: true, strong: true, tt: true,
462         u: true
463 }
464
465 mathml_text_integration = {
466         mi: NS_MATHML, mo: NS_MATHML, mn: NS_MATHML, ms: NS_MATHML, mtext: NS_MATHML
467 }
468 is_mathml_text_integration_point = function (el) {
469         return mathml_text_integration[el.name] === el.namespace
470 }
471 is_html_integration = function (el) { // DON'T PASS A TOKEN
472         if (el.namespace === NS_MATHML) {
473                 if (el.name === 'annotation-xml') {
474                         if (el.attrs.encoding != null) {
475                                 if (el.attrs.encoding.toLowerCase() === 'text/html') {
476                                         return true
477                                 }
478                                 if (el.attrs.encoding.toLowerCase() === 'application/xhtml+xml') {
479                                         return true
480                                 }
481                         }
482                 }
483                 return false
484         }
485         if (el.namespace === NS_SVG) {
486                 if (el.name === 'foreignObject' || el.name === 'desc' || el.name === 'title') {
487                         return true
488                 }
489         }
490         return false
491 }
492
493 h_tags = {
494         h1: NS_HTML, h2: NS_HTML, h3: NS_HTML, h4: NS_HTML, h5: NS_HTML, h6: NS_HTML
495 }
496
497 foster_parenting_targets = {
498         table: NS_HTML,
499         tbody: NS_HTML,
500         tfoot: NS_HTML,
501         thead: NS_HTML,
502         tr: NS_HTML
503 }
504
505 end_tag_implied = {
506         dd: NS_HTML,
507         dt: NS_HTML,
508         li: NS_HTML,
509         option: NS_HTML,
510         optgroup: NS_HTML,
511         p: NS_HTML,
512         rb: NS_HTML,
513         rp: NS_HTML,
514         rt: NS_HTML,
515         rtc: NS_HTML
516 }
517
518 el_is_special = function (e) {
519         return special_elements[e.name] === e.namespace
520 }
521
522 adp_els = { address: NS_HTML, div: NS_HTML, p: NS_HTML }
523 el_is_special_not_adp = function (el) {
524         return special_elements[el.name] === el.namespace && adp_els[el.name] !== el.namespace
525 }
526
527 svg_name_fixes = {
528         altglyph: 'altGlyph',
529         altglyphdef: 'altGlyphDef',
530         altglyphitem: 'altGlyphItem',
531         animatecolor: 'animateColor',
532         animatemotion: 'animateMotion',
533         animatetransform: 'animateTransform',
534         clippath: 'clipPath',
535         feblend: 'feBlend',
536         fecolormatrix: 'feColorMatrix',
537         fecomponenttransfer: 'feComponentTransfer',
538         fecomposite: 'feComposite',
539         feconvolvematrix: 'feConvolveMatrix',
540         fediffuselighting: 'feDiffuseLighting',
541         fedisplacementmap: 'feDisplacementMap',
542         fedistantlight: 'feDistantLight',
543         fedropshadow: 'feDropShadow',
544         feflood: 'feFlood',
545         fefunca: 'feFuncA',
546         fefuncb: 'feFuncB',
547         fefuncg: 'feFuncG',
548         fefuncr: 'feFuncR',
549         fegaussianblur: 'feGaussianBlur',
550         feimage: 'feImage',
551         femerge: 'feMerge',
552         femergenode: 'feMergeNode',
553         femorphology: 'feMorphology',
554         feoffset: 'feOffset',
555         fepointlight: 'fePointLight',
556         fespecularlighting: 'feSpecularLighting',
557         fespotlight: 'feSpotLight',
558         fetile: 'feTile',
559         feturbulence: 'feTurbulence',
560         foreignobject: 'foreignObject',
561         glyphref: 'glyphRef',
562         lineargradient: 'linearGradient',
563         radialgradient: 'radialGradient',
564         textpath: 'textPath'
565 }
566 svg_attribute_fixes = {
567         attributename: 'attributeName',
568         attributetype: 'attributeType',
569         basefrequency: 'baseFrequency',
570         baseprofile: 'baseProfile',
571         calcmode: 'calcMode',
572         clippathunits: 'clipPathUnits',
573         contentscripttype: 'contentScriptType',
574         contentstyletype: 'contentStyleType',
575         diffuseconstant: 'diffuseConstant',
576         edgemode: 'edgeMode',
577         externalresourcesrequired: 'externalResourcesRequired',
578         // WHATWG removes this: filterres: 'filterRes',
579         filterunits: 'filterUnits',
580         glyphref: 'glyphRef',
581         gradienttransform: 'gradientTransform',
582         gradientunits: 'gradientUnits',
583         kernelmatrix: 'kernelMatrix',
584         kernelunitlength: 'kernelUnitLength',
585         keypoints: 'keyPoints',
586         keysplines: 'keySplines',
587         keytimes: 'keyTimes',
588         lengthadjust: 'lengthAdjust',
589         limitingconeangle: 'limitingConeAngle',
590         markerheight: 'markerHeight',
591         markerunits: 'markerUnits',
592         markerwidth: 'markerWidth',
593         maskcontentunits: 'maskContentUnits',
594         maskunits: 'maskUnits',
595         numoctaves: 'numOctaves',
596         pathlength: 'pathLength',
597         patterncontentunits: 'patternContentUnits',
598         patterntransform: 'patternTransform',
599         patternunits: 'patternUnits',
600         pointsatx: 'pointsAtX',
601         pointsaty: 'pointsAtY',
602         pointsatz: 'pointsAtZ',
603         preservealpha: 'preserveAlpha',
604         preserveaspectratio: 'preserveAspectRatio',
605         primitiveunits: 'primitiveUnits',
606         refx: 'refX',
607         refy: 'refY',
608         repeatcount: 'repeatCount',
609         repeatdur: 'repeatDur',
610         requiredextensions: 'requiredExtensions',
611         requiredfeatures: 'requiredFeatures',
612         specularconstant: 'specularConstant',
613         specularexponent: 'specularExponent',
614         spreadmethod: 'spreadMethod',
615         startoffset: 'startOffset',
616         stddeviation: 'stdDeviation',
617         stitchtiles: 'stitchTiles',
618         surfacescale: 'surfaceScale',
619         systemlanguage: 'systemLanguage',
620         tablevalues: 'tableValues',
621         targetx: 'targetX',
622         targety: 'targetY',
623         textlength: 'textLength',
624         viewbox: 'viewBox',
625         viewtarget: 'viewTarget',
626         xchannelselector: 'xChannelSelector',
627         ychannelselector: 'yChannelSelector',
628         zoomandpan: 'zoomAndPan'
629 }
630 foreign_attr_fixes = {
631         'xlink:actuate': 'xlink actuate',
632         'xlink:arcrole': 'xlink arcrole',
633         'xlink:href': 'xlink href',
634         'xlink:role': 'xlink role',
635         'xlink:show': 'xlink show',
636         'xlink:title': 'xlink title',
637         'xlink:type': 'xlink type',
638         'xml:base': 'xml base',
639         'xml:lang': 'xml lang',
640         'xml:space': 'xml space',
641         'xmlns': 'xmlns',
642         'xmlns:xlink': 'xmlns xlink'
643 }
644 adjust_mathml_attributes = function (t) {
645         var i, a
646         for (i = 0; i < t.attrs_a.length; ++i) {
647                 a = t.attrs_a[i]
648                 if (a[0] === 'definitionurl') {
649                         a[0] = 'definitionURL'
650                 }
651         }
652 }
653 adjust_svg_attributes = function (t) {
654         var i, a
655         for (i = 0; i < t.attrs_a.length; ++i) {
656                 a = t.attrs_a[i]
657                 if (svg_attribute_fixes[a[0]] != null) {
658                         a[0] = svg_attribute_fixes[a[0]]
659                 }
660         }
661 }
662 adjust_foreign_attributes = function (t) {
663         // fixfull
664         var i, a
665         for (i = 0; i < t.attrs_a.length; ++i) {
666                 a = t.attrs_a[i]
667                 if (foreign_attr_fixes[a[0]] != null) {
668                         a[0] = foreign_attr_fixes[a[0]]
669                 }
670         }
671 }
672
673 // decode_named_char_ref()
674 //
675 // The list of named character references is _huge_ so if we're running in a
676 // browser, we get the browser to decode them, rather than increasing the code
677 // size to include the table.
678 if (context === 'module') {
679         _decode_named_char_ref = require('./parser_no_browser_helper.js')
680 } else {
681         decode_named_char_ref_el = document.createElement('textarea')
682         _decode_named_char_ref = function (txt) {
683                 var decoded
684                 txt = "&" + txt + ";"
685                 decode_named_char_ref_el.innerHTML = txt
686                 decoded = decode_named_char_ref_el.value
687                 if (decoded === txt) {
688                         return null
689                 }
690                 return decoded
691         }
692 }
693 // Pass the name of a named entity _that has a terminating semicolon_
694 // Entities without terminating semicolons should use legacy_char_refs[]
695 // Do not include the "&" or ";" in your argument, eg pass "alpha"
696 decode_named_char_ref_cache = {}
697 decode_named_char_ref = function (txt) {
698         var decoded
699         decoded = decode_named_char_ref_cache[txt]
700         if (decoded != null) {
701                 return decoded
702         }
703         decoded = _decode_named_char_ref(txt)
704         return decode_named_char_ref_cache[txt] = decoded
705 }
706
707 parse_html = function (args_html, args) {
708         var adjusted_current_node, adjusted_insertion_location, adoption_agency, afe, afe_push, afe_push_marker, button_scopers, clear_afe_to_marker, clear_stack_to_table_body_context, clear_stack_to_table_context, clear_stack_to_table_row_context, clear_to_table_body_stopers, clear_to_table_row_stopers, clear_to_table_stopers, close_p_element, close_p_if_in_button_scope, close_the_cell, context_element, cur, doc, eat_next_token_if_newline, el_is_in_scope, flag_foster_parenting, flag_fragment_parsing, flag_frameset_ok, flag_parsing, flag_scripting, form_element_pointer, fragment_root, generate_implied_end_tags, has_color_face_or_size, head_element_pointer, in_body_any_other_end_tag, in_foreign_content, in_foreign_content_end_script, in_foreign_content_other_start, ins_mode, ins_mode_after_after_body, ins_mode_after_after_frameset, ins_mode_after_body, ins_mode_after_frameset, ins_mode_after_head, ins_mode_after_head_else, ins_mode_before_head, ins_mode_before_html, ins_mode_in_body, ins_mode_in_caption, ins_mode_in_cell, ins_mode_in_column_group, ins_mode_in_frameset, ins_mode_in_head, ins_mode_in_head_else, ins_mode_in_head_noscript, ins_mode_in_head_noscript_else, ins_mode_in_row, ins_mode_in_select, ins_mode_in_select_in_table, ins_mode_in_table, ins_mode_in_table_body, ins_mode_in_table_else, ins_mode_in_table_text, ins_mode_in_template, ins_mode_initial, ins_mode_text, insert_character, insert_comment, insert_foreign_element, insert_html_element, is_appropriate_end_tag, is_in_button_scope, is_in_li_scope, is_in_scope, is_in_scope_x, is_in_scope_x_y, is_in_select_scope, is_in_table_scope, is_quirks_limited_doctype, is_quirks_yes_doctype, li_scopers, open_els, original_ins_mode, parse_character_reference, parse_error, parse_generic_raw_text, parse_generic_rcdata_text, parse_init, parse_main_loop, pending_table_character_tokens, process_token, reconstruct_afe, reset_ins_mode, standard_scopers, stop_parsing, table_scopers, template_ins_modes, template_tag_is_open, temporary_buffer, tok_cur_tag, tok_state, tok_state_after_attribute_name, tok_state_after_attribute_value_quoted, tok_state_after_doctype_name, tok_state_after_doctype_public_identifier, tok_state_after_doctype_public_keyword, tok_state_after_doctype_system_identifier, tok_state_after_doctype_system_keyword, tok_state_attribute_name, tok_state_attribute_value_double_quoted, tok_state_attribute_value_single_quoted, tok_state_attribute_value_unquoted, tok_state_before_attribute_name, tok_state_before_attribute_value, tok_state_before_doctype_name, tok_state_before_doctype_public_identifier, tok_state_before_doctype_system_identifier, tok_state_between_doctype_public_and_system_identifiers, tok_state_bogus_comment, tok_state_bogus_doctype, tok_state_cdata_section, tok_state_comment, tok_state_comment_end, tok_state_comment_end_bang, tok_state_comment_end_dash, tok_state_comment_start, tok_state_comment_start_dash, tok_state_data, tok_state_doctype, tok_state_doctype_name, tok_state_doctype_public_identifier_double_quoted, tok_state_doctype_public_identifier_single_quoted, tok_state_doctype_system_identifier_double_quoted, tok_state_doctype_system_identifier_single_quoted, tok_state_end_tag_open, tok_state_markup_declaration_open, tok_state_plaintext, tok_state_rawtext, tok_state_rawtext_end_tag_name, tok_state_rawtext_end_tag_open, tok_state_rawtext_less_than_sign, tok_state_rcdata, tok_state_rcdata_end_tag_name, tok_state_rcdata_end_tag_open, tok_state_rcdata_less_than_sign, tok_state_script_data, tok_state_script_data_double_escape_end, tok_state_script_data_double_escape_start, tok_state_script_data_double_escaped, tok_state_script_data_double_escaped_dash, tok_state_script_data_double_escaped_dash_dash, tok_state_script_data_double_escaped_less_than_sign, tok_state_script_data_end_tag_name, tok_state_script_data_end_tag_open, tok_state_script_data_escape_start, tok_state_script_data_escape_start_dash, tok_state_script_data_escaped, tok_state_script_data_escaped_dash, tok_state_script_data_escaped_dash_dash, tok_state_script_data_escaped_end_tag_name, tok_state_script_data_escaped_end_tag_open, tok_state_script_data_escaped_less_than_sign, tok_state_script_data_less_than_sign, tok_state_self_closing_start_tag, tok_state_tag_name, tok_state_tag_open, token_to_element, txt
709         if (args == null) {
710                 args = {}
711         }
712         txt = null
713         cur = null // index of next char in txt to be parsed
714         // declare doc and tokenizer variables so they're in scope below
715         doc = null
716         open_els = null // stack of open elements
717         afe = null // active formatting elements
718         template_ins_modes = null
719         ins_mode = null
720         original_ins_mode = null
721         tok_state = null
722         tok_cur_tag = null // partially parsed tag
723         flag_scripting = null
724         flag_frameset_ok = null
725         flag_parsing = null
726         flag_foster_parenting = null
727         form_element_pointer = null
728         temporary_buffer = null
729         pending_table_character_tokens = null
730         head_element_pointer = null
731         flag_fragment_parsing = null
732         context_element = null
733
734         stop_parsing = function () {
735                 flag_parsing = false
736         }
737
738         parse_error = function () {
739                 if (args.error_cb != null) {
740                         args.error_cb(cur)
741                 }
742         }
743
744         // http://www.w3.org/TR/html5/syntax.html#push-onto-the-list-of-active-formatting-elements
745         // "Noah's Ark clause" but with three
746         afe_push = function (new_el) {
747                 var attrs_match, el, i, j, k, matches, v
748                 matches = 0
749                 for (i = 0; i < afe.length; ++i) {
750                         el = afe[i]
751                         if (el.type === TYPE_AFE_MARKER) {
752                                 break
753                         }
754                         if (el.name === new_el.name && el.namespace === new_el.namespace) {
755                                 attrs_match = true
756                                 for (k in el.attrs) {
757                                         v = el.attrs[k]
758                                         if (new_el.attrs[k] !== v) {
759                                                 attrs_match = false
760                                                 break
761                                         }
762                                 }
763                                 if (attrs_match) {
764                                         for (k in new_el.attrs) {
765                                                 v = new_el.attrs[k]
766                                                 if (el.attrs[k] !== v) {
767                                                         attrs_match = false
768                                                         break
769                                                 }
770                                         }
771                                 }
772                                 if (attrs_match) {
773                                         matches += 1
774                                         if (matches === 3) {
775                                                 afe.splice(i, 1)
776                                                 break
777                                         }
778                                 }
779                         }
780                 }
781                 afe.unshift(new_el)
782         }
783
784         afe_push_marker = function () {
785                 afe.unshift(new_afe_marker())
786         }
787
788         // the functions below impliment the Tree Contstruction algorithm
789         // http://www.w3.org/TR/html5/syntax.html#tree-construction
790
791         // But first... the helpers
792         template_tag_is_open = function () {
793                 var i, el
794                 for (i = 0; i < open_els.length; ++i) {
795                         el = open_els[i]
796                         if (el.name === 'template' && el.namespace === NS_HTML) {
797                                 return true
798                         }
799                 }
800                 return false
801         }
802         is_in_scope_x = function (tag_name, scope, namespace) {
803                 var i, el
804                 for (i = 0; i < open_els.length; ++i) {
805                         el = open_els[i]
806                         if (el.name === tag_name && (namespace === null || namespace === el.namespace)) {
807                                 return true
808                         }
809                         if (scope[el.name] === el.namespace) {
810                                 return false
811                         }
812                 }
813                 return false
814         }
815         is_in_scope_x_y = function (tag_name, scope, scope2, namespace) {
816                 var i, el
817                 for (i = 0; i < open_els.length; ++i) {
818                         el = open_els[i]
819                         if (el.name === tag_name && (namespace === null || namespace === el.namespace)) {
820                                 return true
821                         }
822                         if (scope[el.name] === el.namespace) {
823                                 return false
824                         }
825                         if (scope2[el.name] === el.namespace) {
826                                 return false
827                         }
828                 }
829                 return false
830         }
831         standard_scopers = {
832                 applet: NS_HTML, caption: NS_HTML, html: NS_HTML, table: NS_HTML,
833                 td: NS_HTML, th: NS_HTML, marquee: NS_HTML, object: NS_HTML,
834                 template: NS_HTML,
835
836                 mi: NS_MATHML, mo: NS_MATHML, mn: NS_MATHML, ms: NS_MATHML,
837                 mtext: NS_MATHML, 'annotation-xml': NS_MATHML,
838
839                 foreignObject: NS_SVG, desc: NS_SVG, title: NS_SVG
840         }
841         button_scopers = { button: NS_HTML }
842         li_scopers = { ol: NS_HTML, ul: NS_HTML }
843         table_scopers = { html: NS_HTML, table: NS_HTML, template: NS_HTML }
844         is_in_scope = function (tag_name, namespace) {
845                 if (namespace == null) {
846                         namespace = null
847                 }
848                 return is_in_scope_x(tag_name, standard_scopers, namespace)
849         }
850         is_in_button_scope = function (tag_name, namespace) {
851                 if (namespace == null) {
852                         namespace = null
853                 }
854                 return is_in_scope_x_y(tag_name, standard_scopers, button_scopers, namespace)
855         }
856         is_in_table_scope = function (tag_name, namespace) {
857                 if (namespace == null) {
858                         namespace = null
859                 }
860                 return is_in_scope_x(tag_name, table_scopers, namespace)
861         }
862         // aka is_in_list_item_scope
863         is_in_li_scope = function (tag_name, namespace) {
864                 if (namespace == null) {
865                         namespace = null
866                 }
867                 return is_in_scope_x_y(tag_name, standard_scopers, li_scopers, namespace)
868         }
869         is_in_select_scope = function (tag_name, namespace) {
870                 var i, t
871                 if (namespace == null) {
872                         namespace = null
873                 }
874                 for (i = 0; i < open_els.length; ++i) {
875                         t = open_els[i]
876                         if (t.name === tag_name && (namespace === null || namespace === t.namespace)) {
877                                 return true
878                         }
879                         if (t.namespace !== NS_HTML && t.name !== 'optgroup' && t.name !== 'option') {
880                                 return false
881                         }
882                 }
883                 return false
884         }
885         // this checks for a particular element, not by name
886         // this requires a namespace match
887         el_is_in_scope = function (needle) {
888                 var i
889                 for (i = 0; i < open_els.length; ++i) {
890                         el = open_els[i]
891                         if (el === needle) {
892                                 return true
893                         }
894                         if (standard_scopers[el.name] === el.namespace) {
895                                 return false
896                         }
897                 }
898                 return false
899         }
900
901         clear_to_table_stopers = {
902                 'table': true,
903                 'template': true,
904                 'html': true
905         }
906         clear_stack_to_table_context = function () {
907                 while (true) {
908                         if (clear_to_table_stopers[open_els[0].name] != null) {
909                                 break
910                         }
911                         open_els.shift()
912                 }
913         }
914         clear_to_table_body_stopers = {
915                 tbody: NS_HTML,
916                 tfoot: NS_HTML,
917                 thead: NS_HTML,
918                 template: NS_HTML,
919                 html: NS_HTML
920         }
921         clear_stack_to_table_body_context = function () {
922                 while (true) {
923                         if (clear_to_table_body_stopers[open_els[0].name] === open_els[0].namespace) {
924                                 break
925                         }
926                         open_els.shift()
927                 }
928         }
929         clear_to_table_row_stopers = {
930                 'tr': true,
931                 'template': true,
932                 'html': true
933         }
934         clear_stack_to_table_row_context = function () {
935                 while (true) {
936                         if (clear_to_table_row_stopers[open_els[0].name] != null) {
937                                 break
938                         }
939                         open_els.shift()
940                 }
941         }
942         clear_afe_to_marker = function () {
943                 var el
944                 while (true) {
945                         if (!(afe.length > 0)) { // this happens in fragment case, ?spec error
946                                 return
947                         }
948                         el = afe.shift()
949                         if (el.type === TYPE_AFE_MARKER) {
950                                 return
951                         }
952                 }
953         }
954
955         // 8.2.3.1 ...
956         // http://www.w3.org/TR/html5/syntax.html#reset-the-insertion-mode-appropriately
957         reset_ins_mode = function () {
958                 var ancestor, ancestor_i, last, node, node_i
959                 // 1. Let last be false.
960                 last = false
961                 // 2. Let node be the last node in the stack of open elements.
962                 node_i = 0
963                 node = open_els[node_i]
964                 // 3. Loop: If node is the first node in the stack of open elements,
965                 // then set last to true, and, if the parser was originally created as
966                 // part of the HTML fragment parsing algorithm (fragment case) set node
967                 // to the context element.
968                 while (true) {
969                         if (node_i === open_els.length - 1) {
970                                 last = true
971                                 if (flag_fragment_parsing) {
972                                         node = context_element
973                                 }
974                         }
975                         // 4. If node is a select element, run these substeps:
976                         if (node.name === 'select' && node.namespace === NS_HTML) {
977                                 // 1. If last is true, jump to the step below labeled done.
978                                 if (!last) {
979                                         // 2. Let ancestor be node.
980                                         ancestor_i = node_i
981                                         ancestor = node
982                                         // 3. Loop: If ancestor is the first node in the stack of
983                                         // open elements, jump to the step below labeled done.
984                                         while (true) {
985                                                 if (ancestor_i === open_els.length - 1) {
986                                                         break
987                                                 }
988                                                 // 4. Let ancestor be the node before ancestor in the stack
989                                                 // of open elements.
990                                                 ancestor_i += 1
991                                                 ancestor = open_els[ancestor_i]
992                                                 // 5. If ancestor is a template node, jump to the step below
993                                                 // labeled done.
994                                                 if (ancestor.name === 'template' && ancestor.namespace === NS_HTML) {
995                                                         break
996                                                 }
997                                                 // 6. If ancestor is a table node, switch the insertion mode
998                                                 // to "in select in table" and abort these steps.
999                                                 if (ancestor.name === 'table' && ancestor.namespace === NS_HTML) {
1000                                                         ins_mode = ins_mode_in_select_in_table
1001                                                         return
1002                                                 }
1003                                                 // 7. Jump back to the step labeled loop.
1004                                         }
1005                                 }
1006                                 // 8. Done: Switch the insertion mode to "in select" and abort
1007                                 // these steps.
1008                                 ins_mode = ins_mode_in_select
1009                                 return
1010                         }
1011                         // 5. If node is a td or th element and last is false, then switch
1012                         // the insertion mode to "in cell" and abort these steps.
1013                         if ((node.name === 'td' || node.name === 'th') && node.namespace === NS_HTML && last === false) {
1014                                 ins_mode = ins_mode_in_cell
1015                                 return
1016                         }
1017                         // 6. If node is a tr element, then switch the insertion mode to "in
1018                         // row" and abort these steps.
1019                         if (node.name === 'tr' && node.namespace === NS_HTML) {
1020                                 ins_mode = ins_mode_in_row
1021                                 return
1022                         }
1023                         // 7. If node is a tbody, thead, or tfoot element, then switch the
1024                         // insertion mode to "in table body" and abort these steps.
1025                         if ((node.name === 'tbody' || node.name === 'thead' || node.name === 'tfoot') && node.namespace === NS_HTML) {
1026                                 ins_mode = ins_mode_in_table_body
1027                                 return
1028                         }
1029                         // 8. If node is a caption element, then switch the insertion mode
1030                         // to "in caption" and abort these steps.
1031                         if (node.name === 'caption' && node.namespace === NS_HTML) {
1032                                 ins_mode = ins_mode_in_caption
1033                                 return
1034                         }
1035                         // 9. If node is a colgroup element, then switch the insertion mode
1036                         // to "in column group" and abort these steps.
1037                         if (node.name === 'colgroup' && node.namespace === NS_HTML) {
1038                                 ins_mode = ins_mode_in_column_group
1039                                 return
1040                         }
1041                         // 10. If node is a table element, then switch the insertion mode to
1042                         // "in table" and abort these steps.
1043                         if (node.name === 'table' && node.namespace === NS_HTML) {
1044                                 ins_mode = ins_mode_in_table
1045                                 return
1046                         }
1047                         // 11. If node is a template element, then switch the insertion mode
1048                         // to the current template insertion mode and abort these steps.
1049                         if (node.name === 'template' && node.namespace === NS_HTML) {
1050                                 ins_mode = template_ins_modes[0]
1051                                 return
1052                         }
1053                         // 12. If node is a head element and last is true, then switch the
1054                         // insertion mode to "in body" ("in body"! not "in head"!) and abort
1055                         // these steps. (fragment case)
1056                         if (node.name === 'head' && node.namespace === NS_HTML && last) {
1057                                 ins_mode = ins_mode_in_body
1058                                 return
1059                         }
1060                         // 13. If node is a head element and last is false, then switch the
1061                         // insertion mode to "in head" and abort these steps.
1062                         if (node.name === 'head' && node.namespace === NS_HTML && last === false) {
1063                                 ins_mode = ins_mode_in_head
1064                                 return
1065                         }
1066                         // 14. If node is a body element, then switch the insertion mode to
1067                         // "in body" and abort these steps.
1068                         if (node.name === 'body' && node.namespace === NS_HTML) {
1069                                 ins_mode = ins_mode_in_body
1070                                 return
1071                         }
1072                         // 15. If node is a frameset element, then switch the insertion mode
1073                         // to "in frameset" and abort these steps. (fragment case)
1074                         if (node.name === 'frameset' && node.namespace === NS_HTML) {
1075                                 ins_mode = ins_mode_in_frameset
1076                                 return
1077                         }
1078                         // 16. If node is an html element, run these substeps:
1079                         if (node.name === 'html' && node.namespace === NS_HTML) {
1080                                 // 1. If the head element pointer is null, switch the insertion
1081                                 // mode to "before head" and abort these steps. (fragment case)
1082                                 if (head_element_pointer === null) {
1083                                         ins_mode = ins_mode_before_head
1084                                 } else {
1085                                         // 2. Otherwise, the head element pointer is not null,
1086                                         // switch the insertion mode to "after head" and abort these
1087                                         // steps.
1088                                         ins_mode = ins_mode_after_head
1089                                 }
1090                                 return
1091                         }
1092                         // 17. If last is true, then switch the insertion mode to "in body"
1093                         // and abort these steps. (fragment case)
1094                         if (last) {
1095                                 ins_mode = ins_mode_in_body
1096                                 return
1097                         }
1098                         // 18. Let node now be the node before node in the stack of open
1099                         // elements.
1100                         node_i += 1
1101                         node = open_els[node_i]
1102                         // 19. Return to the step labeled loop.
1103                 }
1104         }
1105
1106         // 8.2.3.2
1107
1108         // http://www.w3.org/TR/html5/syntax.html#adjusted-current-node
1109         adjusted_current_node = function () {
1110                 if (open_els.length === 1 && flag_fragment_parsing) {
1111                         return context_element
1112                 }
1113                 return open_els[0]
1114         }
1115
1116         // http://www.w3.org/TR/html5/syntax.html#reconstruct-the-active-formatting-elements
1117         // this implementation is structured (mostly) as described at the link above.
1118         // capitalized comments are the "labels" described at the link above.
1119         reconstruct_afe = function () {
1120                 var el, i
1121                 if (afe.length === 0) {
1122                         return
1123                 }
1124                 if (afe[0].type === TYPE_AFE_MARKER || open_els.indexOf(afe[0]) >= 0) {
1125                         return
1126                 }
1127                 // Rewind
1128                 i = 0
1129                 while (true) {
1130                         if (i === afe.length - 1) {
1131                                 break
1132                         }
1133                         i += 1
1134                         if (afe[i].type === TYPE_AFE_MARKER || open_els.indexOf(afe[i]) >= 0) {
1135                                 i -= 1 // Advance
1136                                 break
1137                         }
1138                 }
1139                 // Create
1140                 while (true) {
1141                         el = insert_html_element(afe[i].token)
1142                         afe[i] = el
1143                         if (i === 0) {
1144                                 break
1145                         }
1146                         i -= 1 // Advance
1147                 }
1148         }
1149
1150         // http://www.w3.org/TR/html5/syntax.html#adoption-agency-algorithm
1151         // adoption agency algorithm
1152         // overview here:
1153         //   http://www.w3.org/TR/html5/syntax.html#misnested-tags:-b-i-/b-/i
1154         //   http://www.w3.org/TR/html5/syntax.html#misnested-tags:-b-p-/b-/p
1155         //   http://www.w3.org/TR/html5/syntax.html#unclosed-formatting-elements
1156         adoption_agency = function (subject) {
1157                 var aa, ab, ac, ad, ae, af, bookmark, c, ca, dest, el, fb, fb_of_open_els, fe, fe_of_afe, fe_of_open_els, i, in_afe, in_open_els, inner, j, l, last_node, len, len1, len10, len11, len12, len13, len14, len15, len16, len17, len2, len3, len4, len5, len6, len7, len8, len9, m, n, new_node, node, node_above, node_in_afe, node_next, o, outer, q, r, s, t, u, w, y, z
1158 // this block implements tha W3C spec
1159 //              # 1. If the current node is an HTML element whose tag name is subject,
1160 //              # then run these substeps:
1161 //              #
1162 //              # 1. Let element be the current node.
1163 //              #
1164 //              # 2. Pop element off the stack of open elements.
1165 //              #
1166 //              # 3. If element is also in the list of active formatting elements,
1167 //              # remove the element from the list.
1168 //              #
1169 //              # 4. Abort the adoption agency algorithm.
1170 //              if open_els[0].name is subject and open_els[0].namespace is NS_HTML
1171 //                      el = open_els.shift()
1172 //                      # remove it from the list of active formatting elements (if found)
1173 //                      for t, i in afe
1174 //                              if t is el
1175 //                                      afe.splice i, 1
1176 //                                      break
1177 //                      return
1178 // WHATWG: https://html.spec.whatwg.org/multipage/syntax.html#adoption-agency-algorithm
1179                 // If the current node is an HTML element whose tag name is subject, and
1180                 // the current node is not in the list of active formatting elements,
1181                 // then pop the current node off the stack of open elements, and abort
1182                 // these steps.
1183                 if (open_els[0].name === subject && open_els[0].namespace === NS_HTML) {
1184                         // remove it from the list of active formatting elements (if found)
1185                         in_afe = false
1186                         for (i = 0; i < afe.length; ++i) {
1187                                 el = afe[i]
1188                                 if (el === open_els[0]) {
1189                                         in_afe = true
1190                                         break
1191                                 }
1192                         }
1193                         if (!in_afe) {
1194                                 open_els.shift()
1195                                 return
1196                         }
1197                         // fall through
1198                 }
1199 // END WHATWG
1200                 outer = 0
1201                 while (true) {
1202                         if (outer >= 8) {
1203                                 return
1204                         }
1205                         outer += 1
1206                         // 5. Let formatting element be the last element in the list of
1207                         // active formatting elements that: is between the end of the list
1208                         // and the last scope marker in the list, if any, or the start of
1209                         // the list otherwise, and  has the tag name subject.
1210                         fe = null
1211                         for (fe_of_afe = 0; fe_of_afe < afe.length; ++fe_of_afe) {
1212                                 t = afe[fe_of_afe]
1213                                 if (t.type === TYPE_AFE_MARKER) {
1214                                         break
1215                                 }
1216                                 if (t.name === subject) {
1217                                         fe = t
1218                                         break
1219                                 }
1220                         }
1221                         // If there is no such element, then abort these steps and instead
1222                         // act as described in the "any other end tag" entry above.
1223                         if (fe === null) {
1224                                 in_body_any_other_end_tag(subject)
1225                                 return
1226                         }
1227                         // 6. If formatting element is not in the stack of open elements,
1228                         // then this is a parse error; remove the element from the list, and
1229                         // abort these steps.
1230                         in_open_els = false
1231                         for (fe_of_open_els = 0; fe_of_open_els < open_els.length; ++fe_of_open_els) {
1232                                 t = open_els[fe_of_open_els]
1233                                 if (t === fe) {
1234                                         in_open_els = true
1235                                         break
1236                                 }
1237                         }
1238                         if (!in_open_els) {
1239                                 parse_error()
1240                                 // "remove it from the list" must mean afe, since it's not in open_els
1241                                 afe.splice(fe_of_afe, 1)
1242                                 return
1243                         }
1244                         // 7. If formatting element is in the stack of open elements, but
1245                         // the element is not in scope, then this is a parse error; abort
1246                         // these steps.
1247                         if (!el_is_in_scope(fe)) {
1248                                 parse_error()
1249                                 return
1250                         }
1251                         // 8. If formatting element is not the current node, this is a parse
1252                         // error. (But do not abort these steps.)
1253                         if (open_els[0] !== fe) {
1254                                 parse_error()
1255                                 // continue
1256                         }
1257                         // 9. Let furthest block be the topmost node in the stack of open
1258                         // elements that is lower in the stack than formatting element, and
1259                         // is an element in the special category. There might not be one.
1260                         fb = null
1261                         fb_of_open_els = null
1262                         for (i = 0; i < open_els.length; ++i) {
1263                                 t = open_els[i]
1264                                 if (t === fe) {
1265                                         break
1266                                 }
1267                                 if (el_is_special(t)) {
1268                                         fb = t
1269                                         fb_of_open_els = i
1270                                         // and continue, to see if there's one that's more "topmost"
1271                                 }
1272                         }
1273                         // 10. If there is no furthest block, then the UA must first pop all
1274                         // the nodes from the bottom of the stack of open elements, from the
1275                         // current node up to and including formatting element, then remove
1276                         // formatting element from the list of active formatting elements,
1277                         // and finally abort these steps.
1278                         if (fb === null) {
1279                                 while (true) {
1280                                         t = open_els.shift()
1281                                         if (t === fe) {
1282                                                 afe.splice(fe_of_afe, 1)
1283                                                 return
1284                                         }
1285                                 }
1286                         }
1287                         // 11. Let common ancestor be the element immediately above
1288                         // formatting element in the stack of open elements.
1289                         ca = open_els[fe_of_open_els + 1] // common ancestor
1290
1291                         node_above = open_els[fb_of_open_els + 1] // next node if node isn't in open_els anymore
1292                         // 12. Let a bookmark note the position of formatting element in the list of active formatting elements relative to the elements on either side of it in the list.
1293                         bookmark = new_aaa_bookmark()
1294                         for (i = 0; i < afe.length; ++i) {
1295                                 t = afe[i]
1296                                 if (t === fe) {
1297                                         afe.splice(i, 0, bookmark)
1298                                         break
1299                                 }
1300                         }
1301                         node = last_node = fb
1302                         inner = 0
1303                         while (true) {
1304                                 inner += 1
1305                                 // 3. Let node be the element immediately above node in the
1306                                 // stack of open elements, or if node is no longer in the stack
1307                                 // of open elements (e.g. because it got removed by this
1308                                 // algorithm), the element that was immediately above node in
1309                                 // the stack of open elements before node was removed.
1310                                 node_next = null
1311                                 for (i = 0; i < open_els.length; ++i) {
1312                                         t = open_els[i]
1313                                         if (t === node) {
1314                                                 node_next = open_els[i + 1]
1315                                                 break
1316                                         }
1317                                 }
1318                                 node = node_next != null ? node_next : node_above
1319                                 // TODO make sure node_above gets re-set if/when node is removed from open_els
1320
1321                                 // 4. If node is formatting element, then go to the next step in
1322                                 // the overall algorithm.
1323                                 if (node === fe) {
1324                                         break
1325                                 }
1326                                 // 5. If inner loop counter is greater than three and node is in
1327                                 // the list of active formatting elements, then remove node from
1328                                 // the list of active formatting elements.
1329                                 node_in_afe = false
1330                                 if ((i = afe.indexOf(node)) !== -1) {
1331                                         if (inner > 3) {
1332                                                 afe.splice(i, 1)
1333                                         } else {
1334                                                 node_in_afe = true
1335                                         }
1336                                 }
1337                                 // 6. If node is not in the list of active formatting elements,
1338                                 // then remove node from the stack of open elements and then go
1339                                 // back to the step labeled inner loop.
1340                                 if (!node_in_afe) {
1341                                         if ((i = open_els.indexOf(node)) !== -1) {
1342                                                 node_above = open_els[i + 1]
1343                                                 open_els.splice(i, 1)
1344                                         }
1345                                         continue
1346                                 }
1347                                 // 7. create an element for the token for which the element node
1348                                 // was created, in the HTML namespace, with common ancestor as
1349                                 // the intended parent; replace the entry for node in the list
1350                                 // of active formatting elements with an entry for the new
1351                                 // element, replace the entry for node in the stack of open
1352                                 // elements with an entry for the new element, and let node be
1353                                 // the new element.
1354                                 new_node = token_to_element(node.token, NS_HTML, ca)
1355                                 if ((i = afe.indexOf(node)) !== -1) {
1356                                         afe[i] = new_node
1357                                 }
1358                                 if ((i = open_els.indexOf(node)) !== -1) {
1359                                         node_above = open_els[i + 1]
1360                                         open_els[i] = new_node
1361                                 }
1362                                 node = new_node
1363                                 // 8. If last node is furthest block, then move the
1364                                 // aforementioned bookmark to be immediately after the new node
1365                                 // in the list of active formatting elements.
1366                                 if (last_node === fb) {
1367                                         if ((i = afe.indexOf(bookmark)) !== -1) {
1368                                                 afe.splice(i, 1)
1369                                         }
1370                                         if ((i = afe.indexOf(node)) !== -1) {
1371                                                 // "after" means lower
1372                                                 afe.splice(i, 0, bookmark) // "after as <-
1373                                         }
1374                                 }
1375                                 // 9. Insert last node into node, first removing it from its
1376                                 // previous parent node if any.
1377                                 if (last_node.parent != null) {
1378                                         if ((i = last_node.parent.children.indexOf(last_node)) !== -1) {
1379                                                 last_node.parent.children.splice(i, 1)
1380                                         }
1381                                 }
1382                                 node.children.push(last_node)
1383                                 last_node.parent = node
1384                                 // 10. Let last node be node.
1385                                 last_node = node
1386                                 // 11. Return to the step labeled inner loop.
1387                         }
1388                         // 14. Insert whatever last node ended up being in the previous step
1389                         // at the appropriate place for inserting a node, but using common
1390                         // ancestor as the override target.
1391
1392                         // In the case where fe is immediately followed by fb:
1393                         //   * inner loop exits out early (node==fe)
1394                         //   * last_node is fb
1395                         //   * last_node is still in the tree (not a duplicate)
1396                         if (last_node.parent != null) {
1397                                 if ((i = last_node.parent.children.indexOf(last_node)) !== -1) {
1398                                         last_node.parent.children.splice(i, 1)
1399                                 }
1400                         }
1401                         // can't use standard insert token thing, because it's already in
1402                         // open_els and must stay at it's current position in open_els
1403                         dest = adjusted_insertion_location(ca)
1404                         dest[0].children.splice(dest[1], 0, last_node)
1405                         last_node.parent = dest[0]
1406                         // 15. Create an element for the token for which formatting element
1407                         // was created, in the HTML namespace, with furthest block as the
1408                         // intended parent.
1409                         new_element = token_to_element(fe.token, NS_HTML, fb)
1410                         // 16. Take all of the child nodes of furthest block and append them
1411                         // to the element created in the last step.
1412                         while (fb.children.length) {
1413                                 t = fb.children.shift()
1414                                 t.parent = new_element
1415                                 new_element.children.push(t)
1416                         }
1417                         // 17. Append that new element to furthest block.
1418                         new_element.parent = fb
1419                         fb.children.push(new_element)
1420                         // 18. Remove formatting element from the list of active formatting
1421                         // elements, and insert the new element into the list of active
1422                         // formatting elements at the position of the aforementioned
1423                         // bookmark.
1424                         if ((i = afe.indexOf(fe)) !== -1) {
1425                                 afe.splice(i, 1)
1426                         }
1427                         if ((i = afe.indexOf(bookmark)) !== -1) {
1428                                 afe[i] = new_element
1429                         }
1430                         // 19. Remove formatting element from the stack of open elements,
1431                         // and insert the new element into the stack of open elements
1432                         // immediately below the position of furthest block in that stack.
1433                         if ((i = open_els.indexOf(fe)) !== -1) {
1434                                 open_els.splice(i, 1)
1435                         }
1436                         if ((i = open_els.indexOf(fb)) !== -1) {
1437                                 open_els.splice(i, 0, new_element)
1438                         }
1439                         // 20. Jump back to the step labeled outer loop.
1440                 }
1441         }
1442
1443         // http://www.w3.org/TR/html5/syntax.html#close-a-p-element
1444         close_p_element = function () {
1445                 generate_implied_end_tags('p') // arg is exception
1446                 if (!(open_els[0].name === 'p' && open_els[0].namespace === NS_HTML)) {
1447                         parse_error()
1448                 }
1449                 while (open_els.length > 1) { // just in case
1450                         el = open_els.shift()
1451                         if (el.name === 'p' && el.namespace === NS_HTML) {
1452                                 return
1453                         }
1454                 }
1455         }
1456         close_p_if_in_button_scope = function () {
1457                 if (is_in_button_scope('p', NS_HTML)) {
1458                         close_p_element()
1459                 }
1460         }
1461
1462         // http://www.w3.org/TR/html5/syntax.html#insert-a-character
1463         // aka insert_a_character = function (t) {
1464         insert_character = function (t) {
1465                 var dest, prev
1466                 dest = adjusted_insertion_location()
1467                 // fixfull check for Document node
1468                 if (dest[1] > 0) {
1469                         prev = dest[0].children[dest[1] - 1]
1470                         if (prev.type === TYPE_TEXT) {
1471                                 prev.text += t.text
1472                                 return
1473                         }
1474                 }
1475                 dest[0].children.splice(dest[1], 0, t)
1476                 t.parent = dest[0]
1477         }
1478
1479         // 8.2.5 http://www.w3.org/TR/html5/syntax.html#tree-construction
1480         process_token = function (t) {
1481                 var acn
1482                 acn = adjusted_current_node()
1483                 if (acn == null) {
1484                         ins_mode(t)
1485                         return
1486                 }
1487                 if (acn.namespace === NS_HTML) {
1488                         ins_mode(t)
1489                         return
1490                 }
1491                 if (is_mathml_text_integration_point(acn)) {
1492                         if (t.type === TYPE_START_TAG && !(t.name === 'mglyph' || t.name === 'malignmark')) {
1493                                 ins_mode(t)
1494                                 return
1495                         }
1496                         if (t.type === TYPE_TEXT) {
1497                                 ins_mode(t)
1498                                 return
1499                         }
1500                 }
1501                 if (acn.namespace === NS_MATHML && acn.name === 'annotation-xml' && t.type === TYPE_START_TAG && t.name === 'svg') {
1502                         ins_mode(t)
1503                         return
1504                 }
1505                 if (is_html_integration(acn)) {
1506                         if (t.type === TYPE_START_TAG || t.type === TYPE_TEXT) {
1507                                 ins_mode(t)
1508                                 return
1509                         }
1510                 }
1511                 if (t.type === TYPE_EOF) {
1512                         ins_mode(t)
1513                         return
1514                 }
1515                 in_foreign_content(t)
1516         }
1517
1518         // 8.2.5.1
1519         // http://www.w3.org/TR/html5/syntax.html#creating-and-inserting-nodes
1520         // http://www.w3.org/TR/html5/syntax.html#appropriate-place-for-inserting-a-node
1521         adjusted_insertion_location = function (override_target) {
1522                 var c, el, i, j, l, last_table, last_table_i, last_template, last_template_i, len, len1, len2, m, previous_element, target, target_i
1523                 // 1. If there was an override target specified, then let target be the
1524                 // override target.
1525                 if (override_target != null) {
1526                         target = override_target
1527                 } else { // Otherwise, let target be the current node.
1528                         target = open_els[0]
1529                 }
1530                 // 2. Determine the adjusted insertion location using the first matching
1531                 // steps from the following list:
1532                 //
1533                 // If foster parenting is enabled and target is a table, tbody, tfoot,
1534                 // thead, or tr element Foster parenting happens when content is
1535                 // misnested in tables.
1536                 if (flag_foster_parenting && foster_parenting_targets[target.name] === target.namespace) {
1537                         while (true) { // once. this is here so we can ``break`` to "abort these substeps"
1538                                 // 1. Let last template be the last template element in the
1539                                 // stack of open elements, if any.
1540                                 last_template = null
1541                                 last_template_i = null
1542                                 for (i = 0; i < open_els.length; ++i) {
1543                                         el = open_els[i]
1544                                         if (el.name === 'template' && el.namespace === NS_HTML) {
1545                                                 last_template = el
1546                                                 last_template_i = i
1547                                                 break
1548                                         }
1549                                 }
1550                                 // 2. Let last table be the last table element in the stack of
1551                                 // open elements, if any.
1552                                 last_table = null
1553                                 last_table_i
1554                                 for (i = 0; i < open_els.length; ++i) {
1555                                         el = open_els[i]
1556                                         if (el.name === 'table' && el.namespace === NS_HTML) {
1557                                                 last_table = el
1558                                                 last_table_i = i
1559                                                 break
1560                                         }
1561                                 }
1562                                 // 3. If there is a last template and either there is no last
1563                                 // table, or there is one, but last template is lower (more
1564                                 // recently added) than last table in the stack of open
1565                                 // elements, then: let adjusted insertion location be inside
1566                                 // last template's template contents, after its last child (if
1567                                 // any), and abort these substeps.
1568                                 if (last_template && (last_table === null || last_template_i < last_table_i)) {
1569                                         target = last_template // fixfull should be it's contents
1570                                         target_i = target.children.length
1571                                         break
1572                                 }
1573                                 // 4. If there is no last table, then let adjusted insertion
1574                                 // location be inside the first element in the stack of open
1575                                 // elements (the html element), after its last child (if any),
1576                                 // and abort these substeps. (fragment case)
1577                                 if (last_table === null) {
1578                                         // this is odd
1579                                         target = open_els[open_els.length - 1]
1580                                         target_i = target.children.length
1581                                         break
1582                                 }
1583                                 // 5. If last table has a parent element, then let adjusted
1584                                 // insertion location be inside last table's parent element,
1585                                 // immediately before last table, and abort these substeps.
1586                                 if (last_table.parent != null) {
1587                                         for (i = 0; i < last_table.parent.children.length; ++i) {
1588                                                 c = last_table.parent.children[i]
1589                                                 if (c === last_table) {
1590                                                         target = last_table.parent
1591                                                         target_i = i
1592                                                         break
1593                                                 }
1594                                         }
1595                                         break
1596                                 }
1597                                 // 6. Let previous element be the element immediately above last
1598                                 // table in the stack of open elements.
1599                                 //
1600                                 // huh? how could it not have a parent?
1601                                 previous_element = open_els[last_table_i + 1]
1602                                 // 7. Let adjusted insertion location be inside previous
1603                                 // element, after its last child (if any).
1604                                 target = previous_element
1605                                 target_i = target.children.length
1606                                 // Note: These steps are involved in part because it's possible
1607                                 // for elements, the table element in this case in particular,
1608                                 // to have been moved by a script around in the DOM, or indeed
1609                                 // removed from the DOM entirely, after the element was inserted
1610                                 // by the parser.
1611                                 break // don't really loop
1612                         }
1613                 } else {
1614                         // Otherwise Let adjusted insertion location be inside target, after
1615                         // its last child (if any).
1616                         target_i = target.children.length
1617                 }
1618
1619                 // 3. If the adjusted insertion location is inside a template element,
1620                 // let it instead be inside the template element's template contents,
1621                 // after its last child (if any).
1622                 // fixfull (template)
1623
1624                 // 4. Return the adjusted insertion location.
1625                 return [target, target_i]
1626         }
1627
1628         // http://www.w3.org/TR/html5/syntax.html#create-an-element-for-the-token
1629         // aka create_an_element_for_token
1630         token_to_element = function (t, namespace, intended_parent) {
1631                 var a, attrs, el, i
1632                 // convert attributes into a hash
1633                 attrs = {}
1634                 for (i = 0; i < t.attrs_a.length; ++i) {
1635                         a = t.attrs_a[i]
1636                         attrs[a[0]] = a[1] // TODO check what to do with dupilcate attrs
1637                 }
1638                 el = new Node(TYPE_TAG, {name: t.name, namespace: namespace, attrs: attrs, token: t})
1639
1640                 // TODO 2. If the newly created element has an xmlns attribute in the
1641                 // XMLNS namespace whose value is not exactly the same as the element's
1642                 // namespace, that is a parse error. Similarly, if the newly created
1643                 // element has an xmlns:xlink attribute in the XMLNS namespace whose
1644                 // value is not the XLink Namespace, that is a parse error.
1645
1646                 // fixfull: the spec says stuff about form pointers and ownerDocument
1647
1648                 return el
1649         }
1650
1651         // http://www.w3.org/TR/html5/syntax.html#insert-a-foreign-element
1652         insert_foreign_element = function (token, namespace) {
1653                 var ail, ail_el, ail_i, el
1654                 ail = adjusted_insertion_location()
1655                 ail_el = ail[0]
1656                 ail_i = ail[1]
1657                 el = token_to_element(token, namespace, ail_el)
1658                 // TODO skip this next step if it's broken (eg ail_el is document with child already)
1659                 el.parent = ail_el
1660                 ail_el.children.splice(ail_i, 0, el)
1661                 open_els.unshift(el)
1662                 return el
1663         }
1664         // http://www.w3.org/TR/html5/syntax.html#insert-an-html-element
1665         insert_html_element = function (token) {
1666                 return insert_foreign_element(token, NS_HTML)
1667         }
1668
1669         // http://www.w3.org/TR/html5/syntax.html#insert-a-comment
1670         // position should be [node, index_within_children]
1671         insert_comment = function (t, position) {
1672                 if (position == null) {
1673                         position = adjusted_insertion_location()
1674                 }
1675                 position[0].children.splice(position[1], 0, t)
1676                 t.parent = position[0]
1677                 return
1678         }
1679
1680         // 8.2.5.2
1681         // http://www.w3.org/TR/html5/syntax.html#generic-raw-text-element-parsing-algorithm
1682         parse_generic_raw_text = function (t) {
1683                 insert_html_element(t)
1684                 tok_state = tok_state_rawtext
1685                 original_ins_mode = ins_mode
1686                 ins_mode = ins_mode_text
1687         }
1688         parse_generic_rcdata_text = function (t) {
1689                 insert_html_element(t)
1690                 tok_state = tok_state_rcdata
1691                 original_ins_mode = ins_mode
1692                 ins_mode = ins_mode_text
1693         }
1694
1695         // 8.2.5.3 http://www.w3.org/TR/html5/syntax.html#closing-elements-that-have-implied-end-tags
1696         // http://www.w3.org/TR/html5/syntax.html#generate-implied-end-tags
1697         generate_implied_end_tags = function (except) {
1698                 if (except == null) {
1699                         except = null
1700                 }
1701                 while (end_tag_implied[open_els[0].name] === open_els[0].namespace && open_els[0].name !== except) {
1702                         open_els.shift()
1703                 }
1704         }
1705
1706         // 8.2.5.4 The rules for parsing tokens in HTML content
1707         // http://www.w3.org/TR/html5/syntax.html#parsing-main-inhtml
1708
1709         // 8.2.5.4.1 The "initial" insertion mode
1710         // http://www.w3.org/TR/html5/syntax.html#the-initial-insertion-mode
1711         is_quirks_yes_doctype = function (t) {
1712                 var i, p, pi
1713                 if (t.flag('force-quirks')) {
1714                         return true
1715                 }
1716                 if (t.name !== 'html') {
1717                         return true
1718                 }
1719                 if (t.public_identifier != null) {
1720                         pi = t.public_identifier.toLowerCase()
1721                         for (i = 0; i < quirks_yes_pi_prefixes.length; ++i) {
1722                                 p = quirks_yes_pi_prefixes[i]
1723                                 if (pi.substr(0, p.length) === p) {
1724                                         return true
1725                                 }
1726                         }
1727                         if (pi === '-//w3o//dtd w3 html strict 3.0//en//' || pi === '-/w3c/dtd html 4.0 transitional/en' || pi === 'html') {
1728                                 return true
1729                         }
1730                 }
1731                 if (t.system_identifier != null) {
1732                         if (t.system_identifier.toLowerCase() === 'http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd') {
1733                                 return true
1734                         }
1735                 } else if (t.public_identifier != null) {
1736                         // already did this: pi = t.public_identifier.toLowerCase()
1737                         if (pi.substr(0, 32) === '-//w3c//dtd html 4.01 frameset//' || pi.substr(0, 36) === '-//w3c//dtd html 4.01 transitional//') {
1738                                 return true
1739                         }
1740                 }
1741                 return false
1742         }
1743         is_quirks_limited_doctype = function (t) {
1744                 var pi
1745                 if (t.public_identifier != null) {
1746                         pi = t.public_identifier.toLowerCase()
1747                         if (pi.substr(0, 32) === '-//w3c//dtd xhtml 1.0 frameset//' || pi.substr(0, 36) === '-//w3c//dtd xhtml 1.0 transitional//') {
1748                                 return true
1749                         }
1750                         if (t.system_identifier != null) {
1751                                 if (pi.substr(0, 32) === '-//w3c//dtd html 4.01 frameset//' || pi.substr(0, 36) === '-//w3c//dtd html 4.01 transitional//') {
1752                                         return true
1753                                 }
1754                         }
1755                 }
1756                 return false
1757         }
1758         ins_mode_initial = function (t) {
1759                 if (is_space_tok(t)) {
1760                         return
1761                 }
1762                 if (t.type === TYPE_COMMENT) {
1763                         // ?fixfull
1764                         doc.children.push(t)
1765                         return
1766                 }
1767                 if (t.type === TYPE_DOCTYPE) {
1768                         // fixfull syntax error from first paragraph and following bullets
1769                         // fixfull set doc.doctype
1770                         // fixfull is the "not an iframe srcdoc" thing relevant?
1771                         if (is_quirks_yes_doctype(t)) {
1772                                 doc.flag('quirks mode', QUIRKS_YES)
1773                         } else if (is_quirks_limited_doctype(t)) {
1774                                 doc.flag('quirks mode', QUIRKS_LIMITED)
1775                         }
1776                         doc.children.push(t)
1777                         ins_mode = ins_mode_before_html
1778                         return
1779                 }
1780                 // Anything else
1781                 // fixfull not iframe srcdoc?
1782                 parse_error()
1783                 doc.flag('quirks mode', QUIRKS_YES)
1784                 ins_mode = ins_mode_before_html
1785                 process_token(t)
1786         }
1787
1788         // 8.2.5.4.2 http://www.w3.org/TR/html5/syntax.html#the-before-html-insertion-mode
1789         ins_mode_before_html = function (t) {
1790                 if (t.type === TYPE_DOCTYPE) {
1791                         parse_error()
1792                         return
1793                 }
1794                 if (t.type === TYPE_COMMENT) {
1795                         doc.children.push(t)
1796                         return
1797                 }
1798                 if (is_space_tok(t)) {
1799                         return
1800                 }
1801                 if (t.type === TYPE_START_TAG && t.name === 'html') {
1802                         el = token_to_element(t, NS_HTML, doc)
1803                         doc.children.push(el)
1804                         el.document = doc
1805                         open_els.unshift(el)
1806                         // fixfull (big paragraph in spec about manifest, fragment, urls, etc)
1807                         ins_mode = ins_mode_before_head
1808                         return
1809                 }
1810                 if (t.type === TYPE_END_TAG) {
1811                         if (t.name === 'head' || t.name === 'body' || t.name === 'html' || t.name === 'br') {
1812                                 // fall through to "anything else"
1813                         } else {
1814                                 parse_error()
1815                                 return
1816                         }
1817                 }
1818                 // Anything else
1819                 el = token_to_element(new_open_tag('html'), NS_HTML, doc)
1820                 doc.children.push(el)
1821                 el.document = doc
1822                 open_els.unshift(el)
1823                 // ?fixfull browsing context
1824                 ins_mode = ins_mode_before_head
1825                 process_token(t)
1826         }
1827
1828         // 8.2.5.4.3 http://www.w3.org/TR/html5/syntax.html#the-before-head-insertion-mode
1829         ins_mode_before_head = function (t) {
1830                 var el
1831                 if (is_space_tok(t)) {
1832                         return
1833                 }
1834                 if (t.type === TYPE_COMMENT) {
1835                         insert_comment(t)
1836                         return
1837                 }
1838                 if (t.type === TYPE_DOCTYPE) {
1839                         parse_error()
1840                         return
1841                 }
1842                 if (t.type === TYPE_START_TAG && t.name === 'html') {
1843                         ins_mode_in_body(t)
1844                         return
1845                 }
1846                 if (t.type === TYPE_START_TAG && t.name === 'head') {
1847                         el = insert_html_element(t)
1848                         head_element_pointer = el
1849                         ins_mode = ins_mode_in_head
1850                         return
1851                 }
1852                 if (t.type === TYPE_END_TAG) {
1853                         if (t.name === 'head' || t.name === 'body' || t.name === 'html' || t.name === 'br') {
1854                                 // fall through to Anything else below
1855                         } else {
1856                                 parse_error()
1857                                 return
1858                         }
1859                 }
1860                 // Anything else
1861                 el = insert_html_element(new_open_tag('head'))
1862                 head_element_pointer = el
1863                 ins_mode = ins_mode_in_head
1864                 process_token(t)
1865         }
1866
1867         // 8.2.5.4.4 http://www.w3.org/TR/html5/syntax.html#parsing-main-inhead
1868         ins_mode_in_head_else = function (t) { // factored out for same-as-spec flow control
1869                 open_els.shift() // spec says this will be a 'head' node
1870                 ins_mode = ins_mode_after_head
1871                 process_token(t)
1872         }
1873         ins_mode_in_head = function (t) {
1874                 var ail, el
1875                 if (t.type === TYPE_TEXT && (t.text === "\t" || t.text === "\n" || t.text === "\u000c" || t.text === ' ')) {
1876                         insert_character(t)
1877                         return
1878                 }
1879                 if (t.type === TYPE_COMMENT) {
1880                         insert_comment(t)
1881                         return
1882                 }
1883                 if (t.type === TYPE_DOCTYPE) {
1884                         parse_error()
1885                         return
1886                 }
1887                 if (t.type === TYPE_START_TAG && t.name === 'html') {
1888                         ins_mode_in_body(t)
1889                         return
1890                 }
1891                 if (t.type === TYPE_START_TAG && (t.name === 'base' || t.name === 'basefont' || t.name === 'bgsound' || t.name === 'link')) {
1892                         el = insert_html_element(t)
1893                         open_els.shift()
1894                         t.acknowledge_self_closing()
1895                         return
1896                 }
1897                 if (t.type === TYPE_START_TAG && t.name === 'meta') {
1898                         el = insert_html_element(t)
1899                         open_els.shift()
1900                         t.acknowledge_self_closing()
1901                         // fixfull encoding stuff
1902                         return
1903                 }
1904                 if (t.type === TYPE_START_TAG && t.name === 'title') {
1905                         parse_generic_rcdata_text(t)
1906                         return
1907                 }
1908                 if (t.type === TYPE_START_TAG && ((t.name === 'noscript' && flag_scripting) || t.name === 'noframes' || t.name === 'style')) {
1909                         parse_generic_raw_text(t)
1910                         return
1911                 }
1912                 if (t.type === TYPE_START_TAG && t.name === 'noscript' && flag_scripting === false) {
1913                         insert_html_element(t)
1914                         ins_mode = ins_mode_in_head_noscript
1915                         return
1916                 }
1917                 if (t.type === TYPE_START_TAG && t.name === 'script') {
1918                         ail = adjusted_insertion_location()
1919                         el = token_to_element(t, NS_HTML, ail)
1920                         el.flag('parser-inserted', true)
1921                         // fixfull frament case
1922                         ail[0].children.splice(ail[1], 0, el)
1923                         el.parent = ail[0]
1924                         open_els.unshift(el)
1925                         tok_state = tok_state_script_data
1926                         original_ins_mode = ins_mode // make sure orig... is defined
1927                         ins_mode = ins_mode_text
1928                         return
1929                 }
1930                 if (t.type === TYPE_END_TAG && t.name === 'head') {
1931                         open_els.shift() // will be a head element... spec says so
1932                         ins_mode = ins_mode_after_head
1933                         return
1934                 }
1935                 if (t.type === TYPE_END_TAG && (t.name === 'body' || t.name === 'html' || t.name === 'br')) {
1936                         ins_mode_in_head_else(t)
1937                         return
1938                 }
1939                 if (t.type === TYPE_START_TAG && t.name === 'template') {
1940                         insert_html_element(t)
1941                         afe_push_marker()
1942                         flag_frameset_ok = false
1943                         ins_mode = ins_mode_in_template
1944                         template_ins_modes.unshift(ins_mode_in_template)
1945                         return
1946                 }
1947                 if (t.type === TYPE_END_TAG && t.name === 'template') {
1948                         if (template_tag_is_open()) {
1949                                 generate_implied_end_tags
1950                                 if (open_els[0].name !== 'template') {
1951                                         parse_error()
1952                                 }
1953                                 while (true) {
1954                                         el = open_els.shift()
1955                                         if (el.name === 'template' && el.namespace === NS_HTML) {
1956                                                 break
1957                                         }
1958                                 }
1959                                 clear_afe_to_marker()
1960                                 template_ins_modes.shift()
1961                                 reset_ins_mode()
1962                         } else {
1963                                 parse_error()
1964                         }
1965                         return
1966                 }
1967                 if ((t.type === TYPE_START_TAG && t.name === 'head') || t.type === TYPE_END_TAG) {
1968                         parse_error()
1969                         return
1970                 }
1971                 ins_mode_in_head_else(t)
1972         }
1973
1974         // 8.2.5.4.5 http://www.w3.org/TR/html5/syntax.html#parsing-main-inheadnoscript
1975         ins_mode_in_head_noscript_else = function (t) {
1976                 parse_error()
1977                 open_els.shift()
1978                 ins_mode = ins_mode_in_head
1979                 process_token(t)
1980         }
1981         ins_mode_in_head_noscript = function (t) {
1982                 if (t.type === TYPE_DOCTYPE) {
1983                         parse_error()
1984                         return
1985                 }
1986                 if (t.type === TYPE_START_TAG && t.name === 'html') {
1987                         ins_mode_in_body(t)
1988                         return
1989                 }
1990                 if (t.type === TYPE_END_TAG && t.name === 'noscript') {
1991                         open_els.shift()
1992                         ins_mode = ins_mode_in_head
1993                         return
1994                 }
1995                 if (is_space_tok(t) || t.type === TYPE_COMMENT || (t.type === TYPE_START_TAG && (t.name === 'basefont' || t.name === 'bgsound' || t.name === 'link' || t.name === 'meta' || t.name === 'noframes' || t.name === 'style'))) {
1996                         ins_mode_in_head(t)
1997                         return
1998                 }
1999                 if (t.type === TYPE_END_TAG && t.name === 'br') {
2000                         ins_mode_in_head_noscript_else(t)
2001                         return
2002                 }
2003                 if ((t.type === TYPE_START_TAG && (t.name === 'head' || t.name === 'noscript')) || t.type === TYPE_END_TAG) {
2004                         parse_error()
2005                         return
2006                 }
2007                 // Anything else
2008                 ins_mode_in_head_noscript_else(t)
2009         }
2010
2011         // 8.2.5.4.6 http://www.w3.org/TR/html5/syntax.html#the-after-head-insertion-mode
2012         ins_mode_after_head_else = function (t) {
2013                 var body_tok
2014                 body_tok = new_open_tag('body')
2015                 insert_html_element(body_tok)
2016                 ins_mode = ins_mode_in_body
2017                 process_token(t)
2018         }
2019         ins_mode_after_head = function (t) {
2020                 var el, i, j, len
2021                 if (is_space_tok(t)) {
2022                         insert_character(t)
2023                         return
2024                 }
2025                 if (t.type === TYPE_COMMENT) {
2026                         insert_comment(t)
2027                         return
2028                 }
2029                 if (t.type === TYPE_DOCTYPE) {
2030                         parse_error()
2031                         return
2032                 }
2033                 if (t.type === TYPE_START_TAG && t.name === 'html') {
2034                         ins_mode_in_body(t)
2035                         return
2036                 }
2037                 if (t.type === TYPE_START_TAG && t.name === 'body') {
2038                         insert_html_element(t)
2039                         flag_frameset_ok = false
2040                         ins_mode = ins_mode_in_body
2041                         return
2042                 }
2043                 if (t.type === TYPE_START_TAG && t.name === 'frameset') {
2044                         insert_html_element(t)
2045                         ins_mode = ins_mode_in_frameset
2046                         return
2047                 }
2048                 if (t.type === TYPE_START_TAG && (t.name === 'base' || t.name === 'basefont' || t.name === 'bgsound' || t.name === 'link' || t.name === 'meta' || t.name === 'noframes' || t.name === 'script' || t.name === 'style' || t.name === 'template' || t.name === 'title')) {
2049                         parse_error()
2050                         open_els.unshift(head_element_pointer)
2051                         ins_mode_in_head(t)
2052                         for (i = 0; i < open_els.length; ++i) {
2053                                 el = open_els[i]
2054                                 if (el === head_element_pointer) {
2055                                         open_els.splice(i, 1)
2056                                         return
2057                                 }
2058                         }
2059                         return
2060                 }
2061                 if (t.type === TYPE_END_TAG && t.name === 'template') {
2062                         ins_mode_in_head(t)
2063                         return
2064                 }
2065                 if (t.type === TYPE_END_TAG && (t.name === 'body' || t.name === 'html' || t.name === 'br')) {
2066                         ins_mode_after_head_else(t)
2067                         return
2068                 }
2069                 if ((t.type === TYPE_START_TAG && t.name === 'head') || t.type === TYPE_END_TAG) {
2070                         parse_error()
2071                         return
2072                 }
2073                 // Anything else
2074                 ins_mode_after_head_else(t)
2075         }
2076
2077         // 8.2.5.4.7 http://www.w3.org/TR/html5/syntax.html#parsing-main-inbody
2078         in_body_any_other_end_tag = function (name) { // factored out because adoption agency calls it
2079                 var el, i, node
2080                 node = open_els[0]
2081                 while (true) {
2082                         if (node.name === name && node.namespace === NS_HTML) {
2083                                 generate_implied_end_tags(name) // arg is exception
2084                                 if (node !== open_els[0]) {
2085                                         parse_error()
2086                                 }
2087                                 while (true) {
2088                                         el = open_els.shift()
2089                                         if (el === node) {
2090                                                 return
2091                                         }
2092                                 }
2093                         }
2094                         if (special_elements[node.name] === node.namespace) {
2095                                 parse_error()
2096                                 return
2097                         }
2098                         for (i = 0; i < open_els.length; ++i) {
2099                                 el = open_els[i]
2100                                 if (node === el) {
2101                                         node = open_els[i + 1]
2102                                         break
2103                                 }
2104                         }
2105                 }
2106         }
2107         ins_mode_in_body = function (t) {
2108                 var a, aa, ab, ac, el, found, h_in_scope, i, input_el, j, l, len, len1, len10, len11, len12, len13, len14, len2, len3, len4, len5, len6, len7, len8, len9, m, n, node, o, ok_tags, prompt, q, r, root_attrs, s, second, second_i, u, w, y, z
2109                 if (t.type === TYPE_TEXT && t.text === "\u0000") {
2110                         parse_error()
2111                         return
2112                 }
2113                 if (is_space_tok(t)) {
2114                         reconstruct_afe()
2115                         insert_character(t)
2116                         return
2117                 }
2118                 if (t.type === TYPE_TEXT) {
2119                         reconstruct_afe()
2120                         insert_character(t)
2121                         flag_frameset_ok = false
2122                         return
2123                 }
2124                 if (t.type === TYPE_COMMENT) {
2125                         insert_comment(t)
2126                         return
2127                 }
2128                 if (t.type === TYPE_DOCTYPE) {
2129                         parse_error()
2130                         return
2131                 }
2132                 if (t.type === TYPE_START_TAG && t.name === 'html') {
2133                         parse_error()
2134                         if (template_tag_is_open()) {
2135                                 return
2136                         }
2137                         root_attrs = open_els[open_els.length - 1].attrs
2138                         for (i = 0; i < t.attrs_a.length; ++i) {
2139                                 a = t.attrs_a[i]
2140                                 if (root_attrs[a[0]] == null) {
2141                                         root_attrs[a[0]] = a[1]
2142                                 }
2143                         }
2144                         return
2145                 }
2146
2147                 if ((t.type === TYPE_START_TAG && (t.name === 'base' || t.name === 'basefont' || t.name === 'bgsound' || t.name === 'link' || t.name === 'meta' || t.name === 'noframes' || t.name === 'script' || t.name === 'style' || t.name === 'template' || t.name === 'title')) || (t.type === TYPE_END_TAG && t.name === 'template')) {
2148                         ins_mode_in_head(t)
2149                         return
2150                 }
2151                 if (t.type === TYPE_START_TAG && t.name === 'body') {
2152                         parse_error()
2153                         if (open_els.length < 2) {
2154                                 return
2155                         }
2156                         second = open_els[open_els.length - 2]
2157                         if (second.namespace !== NS_HTML) {
2158                                 return
2159                         }
2160                         if (second.name !== 'body') {
2161                                 return
2162                         }
2163                         if (template_tag_is_open()) {
2164                                 return
2165                         }
2166                         flag_frameset_ok = false
2167                         for (i = 0; i < t.attrs_a.length; ++i) {
2168                                 a = t.attrs_a[i]
2169                                 if (second.attrs[a[0]] == null) {
2170                                         second.attrs[a[0]] = a[1]
2171                                 }
2172                         }
2173                         return
2174                 }
2175                 if (t.type === TYPE_START_TAG && t.name === 'frameset') {
2176                         parse_error()
2177                         if (open_els.length < 2) {
2178                                 return
2179                         }
2180                         second_i = open_els.length - 2
2181                         second = open_els[second_i]
2182                         if (second.namespace !== NS_HTML) {
2183                                 return
2184                         }
2185                         if (second.name !== 'body') {
2186                                 return
2187                         }
2188                         if (flag_frameset_ok === false) {
2189                                 return
2190                         }
2191                         if (second.parent != null) {
2192                                 for (i = 0; i < second.parent.children.length; ++i) {
2193                                         el = second.parent.children[i]
2194                                         if (el === second) {
2195                                                 second.parent.children.splice(i, 1)
2196                                                 break
2197                                         }
2198                                 }
2199                         }
2200                         open_els.splice(second_i, 1)
2201                         // pop everything except the "root html element"
2202                         while (open_els.length > 1) {
2203                                 open_els.shift()
2204                         }
2205                         insert_html_element(t)
2206                         ins_mode = ins_mode_in_frameset
2207                         return
2208                 }
2209                 if (t.type === TYPE_EOF) {
2210                         ok_tags = {
2211                                 dd: NS_HTML, dt: NS_HTML, li: NS_HTML, p: NS_HTML, tbody: NS_HTML,
2212                                 td: NS_HTML, tfoot: NS_HTML, th: NS_HTML, thead: NS_HTML,
2213                                 tr: NS_HTML, body: NS_HTML, html: NS_HTML
2214                         }
2215                         for (i = 0; i < open_els.length; ++i) {
2216                                 el = open_els[i]
2217                                 if (ok_tags[t.name] !== el.namespace) {
2218                                         parse_error()
2219                                         break
2220                                 }
2221                         }
2222                         if (template_ins_modes.length > 0) {
2223                                 ins_mode_in_template(t)
2224                         } else {
2225                                 stop_parsing()
2226                         }
2227                         return
2228                 }
2229                 if (t.type === TYPE_END_TAG && t.name === 'body') {
2230                         if (!is_in_scope('body', NS_HTML)) {
2231                                 parse_error()
2232                                 return
2233                         }
2234                         ok_tags = {
2235                                 dd: NS_HTML, dt: NS_HTML, li: NS_HTML, optgroup: NS_HTML,
2236                                 option: NS_HTML, p: NS_HTML, rb: NS_HTML, rp: NS_HTML, rt: NS_HTML,
2237                                 rtc: NS_HTML, tbody: NS_HTML, td: NS_HTML, tfoot: NS_HTML,
2238                                 th: NS_HTML, thead: NS_HTML, tr: NS_HTML, body: NS_HTML,
2239                                 html: NS_HTML
2240                         }
2241                         for (i = 0; i < open_els.length; ++i) {
2242                                 el = open_els[i]
2243                                 if (ok_tags[t.name] !== el.namespace) {
2244                                         parse_error()
2245                                         break
2246                                 }
2247                         }
2248                         ins_mode = ins_mode_after_body
2249                         return
2250                 }
2251                 if (t.type === TYPE_END_TAG && t.name === 'html') {
2252                         if (!is_in_scope('body', NS_HTML)) {
2253                                 parse_error()
2254                                 return
2255                         }
2256                         ok_tags = {
2257                                 dd: NS_HTML, dt: NS_HTML, li: NS_HTML, optgroup: NS_HTML,
2258                                 option: NS_HTML, p: NS_HTML, rb: NS_HTML, rp: NS_HTML, rt: NS_HTML,
2259                                 rtc: NS_HTML, tbody: NS_HTML, td: NS_HTML, tfoot: NS_HTML,
2260                                 th: NS_HTML, thead: NS_HTML, tr: NS_HTML, body: NS_HTML,
2261                                 html: NS_HTML
2262                         }
2263                         for (i = 0; i < open_els.length; ++i) {
2264                                 el = open_els[i]
2265                                 if (ok_tags[t.name] !== el.namespace) {
2266                                         parse_error()
2267                                         break
2268                                 }
2269                         }
2270                         ins_mode = ins_mode_after_body
2271                         process_token(t)
2272                         return
2273                 }
2274                 if (t.type === TYPE_START_TAG && (t.name === 'address' || t.name === 'article' || t.name === 'aside' || t.name === 'blockquote' || t.name === 'center' || t.name === 'details' || t.name === 'dialog' || t.name === 'dir' || t.name === 'div' || t.name === 'dl' || t.name === 'fieldset' || t.name === 'figcaption' || t.name === 'figure' || t.name === 'footer' || t.name === 'header' || t.name === 'hgroup' || t.name === 'main' || t.name === 'nav' || t.name === 'ol' || t.name === 'p' || t.name === 'section' || t.name === 'summary' || t.name === 'ul')) {
2275                         close_p_if_in_button_scope()
2276                         insert_html_element(t)
2277                         return
2278                 }
2279                 if (t.type === TYPE_START_TAG && (h_tags[t.name] != null)) { // extra perens because Coffeescript did that
2280                         close_p_if_in_button_scope()
2281                         if (h_tags[open_els[0].name] === open_els[0].namespace) {
2282                                 parse_error()
2283                                 open_els.shift()
2284                         }
2285                         insert_html_element(t)
2286                         return
2287                 }
2288                 if (t.type === TYPE_START_TAG && (t.name === 'pre' || t.name === 'listing')) {
2289                         close_p_if_in_button_scope()
2290                         insert_html_element(t)
2291                         eat_next_token_if_newline()
2292                         flag_frameset_ok = false
2293                         return
2294                 }
2295                 if (t.type === TYPE_START_TAG && t.name === 'form') {
2296                         if (!(form_element_pointer === null || template_tag_is_open())) {
2297                                 parse_error()
2298                                 return
2299                         }
2300                         close_p_if_in_button_scope()
2301                         el = insert_html_element(t)
2302                         if (!template_tag_is_open()) {
2303                                 form_element_pointer = el
2304                         }
2305                         return
2306                 }
2307                 if (t.type === TYPE_START_TAG && t.name === 'li') {
2308                         flag_frameset_ok = false
2309                         for (i = 0; i < open_els.length; ++i) {
2310                                 node = open_els[i]
2311                                 if (node.name === 'li' && node.namespace === NS_HTML) {
2312                                         generate_implied_end_tags('li') // arg is exception
2313                                         if (open_els[0].name !== 'li' || open_els[0].namespace !== NS_HTML) {
2314                                                 parse_error()
2315                                         }
2316                                         while (true) {
2317                                                 el = open_els.shift()
2318                                                 if (el.name === 'li' && el.namespace === NS_HTML) {
2319                                                         break
2320                                                 }
2321                                         }
2322                                         break
2323                                 }
2324                                 if (el_is_special_not_adp(node)) {
2325                                         break
2326                                 }
2327                         }
2328                         close_p_if_in_button_scope()
2329                         insert_html_element(t)
2330                         return
2331                 }
2332                 if (t.type === TYPE_START_TAG && (t.name === 'dd' || t.name === 'dt')) {
2333                         flag_frameset_ok = false
2334                         for (i = 0; i < open_els.length; ++i) {
2335                                 node = open_els[i]
2336                                 if (node.name === 'dd' && node.namespace === NS_HTML) {
2337                                         generate_implied_end_tags('dd') // arg is exception
2338                                         if (open_els[0].name !== 'dd' || open_els[0].namespace !== NS_HTML) {
2339                                                 parse_error()
2340                                         }
2341                                         while (true) {
2342                                                 el = open_els.shift()
2343                                                 if (el.name === 'dd' && el.namespace === NS_HTML) {
2344                                                         break
2345                                                 }
2346                                         }
2347                                         break
2348                                 }
2349                                 if (node.name === 'dt' && node.namespace === NS_HTML) {
2350                                         generate_implied_end_tags('dt') // arg is exception
2351                                         if (open_els[0].name !== 'dt' || open_els[0].namespace !== NS_HTML) {
2352                                                 parse_error()
2353                                         }
2354                                         while (true) {
2355                                                 el = open_els.shift()
2356                                                 if (el.name === 'dt' && el.namespace === NS_HTML) {
2357                                                         break
2358                                                 }
2359                                         }
2360                                         break
2361                                 }
2362                                 if (el_is_special_not_adp(node)) {
2363                                         break
2364                                 }
2365                         }
2366                         close_p_if_in_button_scope()
2367                         insert_html_element(t)
2368                         return
2369                 }
2370                 if (t.type === TYPE_START_TAG && t.name === 'plaintext') {
2371                         close_p_if_in_button_scope()
2372                         insert_html_element(t)
2373                         tok_state = tok_state_plaintext
2374                         return
2375                 }
2376                 if (t.type === TYPE_START_TAG && t.name === 'button') {
2377                         if (is_in_scope('button', NS_HTML)) {
2378                                 parse_error()
2379                                 generate_implied_end_tags()
2380                                 while (true) {
2381                                         el = open_els.shift()
2382                                         if (el.name === 'button' && el.namespace === NS_HTML) {
2383                                                 break
2384                                         }
2385                                 }
2386                         }
2387                         reconstruct_afe()
2388                         insert_html_element(t)
2389                         flag_frameset_ok = false
2390                         return
2391                 }
2392                 if (t.type === TYPE_END_TAG && (t.name === 'address' || t.name === 'article' || t.name === 'aside' || t.name === 'blockquote' || t.name === 'button' || t.name === 'center' || t.name === 'details' || t.name === 'dialog' || t.name === 'dir' || t.name === 'div' || t.name === 'dl' || t.name === 'fieldset' || t.name === 'figcaption' || t.name === 'figure' || t.name === 'footer' || t.name === 'header' || t.name === 'hgroup' || t.name === 'listing' || t.name === 'main' || t.name === 'nav' || t.name === 'ol' || t.name === 'pre' || t.name === 'section' || t.name === 'summary' || t.name === 'ul')) {
2393                         if (!is_in_scope(t.name, NS_HTML)) {
2394                                 parse_error()
2395                                 return
2396                         }
2397                         generate_implied_end_tags()
2398                         if (!(open_els[0].name === t.name && open_els[0].namespace === NS_HTML)) {
2399                                 parse_error()
2400                         }
2401                         while (true) {
2402                                 el = open_els.shift()
2403                                 if (el.name === t.name && el.namespace === NS_HTML) {
2404                                         return
2405                                 }
2406                         }
2407                         return
2408                 }
2409                 if (t.type === TYPE_END_TAG && t.name === 'form') {
2410                         if (!template_tag_is_open()) {
2411                                 node = form_element_pointer
2412                                 form_element_pointer = null
2413                                 if (node === null || !el_is_in_scope(node)) {
2414                                         parse_error()
2415                                         return
2416                                 }
2417                                 generate_implied_end_tags()
2418                                 if (open_els[0] !== node) {
2419                                         parse_error()
2420                                 }
2421                                 for (i = 0; i < open_els.length; ++i) {
2422                                         el = open_els[i]
2423                                         if (el === node) {
2424                                                 open_els.splice(i, 1)
2425                                                 break
2426                                         }
2427                                 }
2428                         } else {
2429                                 if (!is_in_scope('form', NS_HTML)) {
2430                                         parse_error()
2431                                         return
2432                                 }
2433                                 generate_implied_end_tags()
2434                                 if (open_els[0].name !== 'form' || open_els[0].namespace !== NS_HTML) {
2435                                         parse_error()
2436                                 }
2437                                 while (true) {
2438                                         el = open_els.shift()
2439                                         if (el.name === 'form' && el.namespace === NS_HTML) {
2440                                                 break
2441                                         }
2442                                 }
2443                         }
2444                         return
2445                 }
2446                 if (t.type === TYPE_END_TAG && t.name === 'p') {
2447                         if (!is_in_button_scope('p', NS_HTML)) {
2448                                 parse_error()
2449                                 insert_html_element(new_open_tag('p'))
2450                         }
2451                         close_p_element()
2452                         return
2453                 }
2454                 if (t.type === TYPE_END_TAG && t.name === 'li') {
2455                         if (!is_in_li_scope('li', NS_HTML)) {
2456                                 parse_error()
2457                                 return
2458                         }
2459                         generate_implied_end_tags('li') // arg is exception
2460                         if (open_els[0].name !== 'li' || open_els[0].namespace !== NS_HTML) {
2461                                 parse_error()
2462                         }
2463                         while (true) {
2464                                 el = open_els.shift()
2465                                 if (el.name === 'li' && el.namespace === NS_HTML) {
2466                                         break
2467                                 }
2468                         }
2469                         return
2470                 }
2471                 if (t.type === TYPE_END_TAG && (t.name === 'dd' || t.name === 'dt')) {
2472                         if (!is_in_scope(t.name, NS_HTML)) {
2473                                 parse_error()
2474                                 return
2475                         }
2476                         generate_implied_end_tags(t.name) // arg is exception
2477                         if (open_els[0].name !== t.name || open_els[0].namespace !== NS_HTML) {
2478                                 parse_error()
2479                         }
2480                         while (true) {
2481                                 el = open_els.shift()
2482                                 if (el.name === t.name && el.namespace === NS_HTML) {
2483                                         break
2484                                 }
2485                         }
2486                         return
2487                 }
2488                 if (t.type === TYPE_END_TAG && (h_tags[t.name] != null)) { // extra perens because Coffeescript did
2489                         h_in_scope = false
2490                         for (i = 0; i < open_els.length; ++i) {
2491                                 el = open_els[i]
2492                                 if (h_tags[el.name] === el.namespace) {
2493                                         h_in_scope = true
2494                                         break
2495                                 }
2496                                 if (standard_scopers[el.name] === el.namespace) {
2497                                         break
2498                                 }
2499                         }
2500                         if (!h_in_scope) {
2501                                 parse_error()
2502                                 return
2503                         }
2504                         generate_implied_end_tags()
2505                         if (open_els[0].name !== t.name || open_els[0].namespace !== NS_HTML) {
2506                                 parse_error()
2507                         }
2508                         while (true) {
2509                                 el = open_els.shift()
2510                                 if (h_tags[el.name] === el.namespace) {
2511                                         break
2512                                 }
2513                         }
2514                         return
2515                 }
2516                 // deep breath!
2517                 if (t.type === TYPE_START_TAG && t.name === 'a') {
2518                         // If the list of active formatting elements contains an a element
2519                         // between the end of the list and the last marker on the list (or
2520                         // the start of the list if there is no marker on the list), then
2521                         // this is a parse error; run the adoption agency algorithm for the
2522                         // tag name "a", then remove that element from the list of active
2523                         // formatting elements and the stack of open elements if the
2524                         // adoption agency algorithm didn't already remove it (it might not
2525                         // have if the element is not in table scope).
2526                         found = false
2527                         for (i = 0; i < afe.length; ++i) {
2528                                 el = afe[i]
2529                                 if (el.type === TYPE_AFE_MARKER) {
2530                                         break
2531                                 }
2532                                 if (el.name === 'a' && el.namespace === NS_HTML) {
2533                                         found = el
2534                                 }
2535                         }
2536                         if (found != null) {
2537                                 parse_error()
2538                                 adoption_agency('a')
2539                                 for (i = 0; i < afe.length; ++i) {
2540                                         el = afe[i]
2541                                         if (el === found) {
2542                                                 afe.splice(i, 1)
2543                                         }
2544                                 }
2545                                 for (i = 0; i < open_els.length; ++i) {
2546                                         el = open_els[i]
2547                                         if (el === found) {
2548                                                 open_els.splice(i, 1)
2549                                         }
2550                                 }
2551                         }
2552                         reconstruct_afe()
2553                         el = insert_html_element(t)
2554                         afe_push(el)
2555                         return
2556                 }
2557                 if (t.type === TYPE_START_TAG && (t.name === 'b' || t.name === 'big' || t.name === 'code' || t.name === 'em' || t.name === 'font' || t.name === 'i' || t.name === 's' || t.name === 'small' || t.name === 'strike' || t.name === 'strong' || t.name === 'tt' || t.name === 'u')) {
2558                         reconstruct_afe()
2559                         el = insert_html_element(t)
2560                         afe_push(el)
2561                         return
2562                 }
2563                 if (t.type === TYPE_START_TAG && t.name === 'nobr') {
2564                         reconstruct_afe()
2565                         if (is_in_scope('nobr', NS_HTML)) {
2566                                 parse_error()
2567                                 adoption_agency('nobr')
2568                                 reconstruct_afe()
2569                         }
2570                         el = insert_html_element(t)
2571                         afe_push(el)
2572                         return
2573                 }
2574                 if (t.type === TYPE_END_TAG && (t.name === 'a' || t.name === 'b' || t.name === 'big' || t.name === 'code' || t.name === 'em' || t.name === 'font' || t.name === 'i' || t.name === 'nobr' || t.name === 's' || t.name === 'small' || t.name === 'strike' || t.name === 'strong' || t.name === 'tt' || t.name === 'u')) {
2575                         adoption_agency(t.name)
2576                         return
2577                 }
2578                 if (t.type === TYPE_START_TAG && (t.name === 'applet' || t.name === 'marquee' || t.name === 'object')) {
2579                         reconstruct_afe()
2580                         insert_html_element(t)
2581                         afe_push_marker()
2582                         flag_frameset_ok = false
2583                         return
2584                 }
2585                 if (t.type === TYPE_END_TAG && (t.name === 'applet' || t.name === 'marquee' || t.name === 'object')) {
2586                         if (!is_in_scope(t.name, NS_HTML)) {
2587                                 parse_error()
2588                                 return
2589                         }
2590                         generate_implied_end_tags()
2591                         if (open_els[0].name !== t.name || open_els[0].namespace !== NS_HTML) {
2592                                 parse_error()
2593                         }
2594                         while (true) {
2595                                 el = open_els.shift()
2596                                 if (el.name === t.name && el.namespace === NS_HTML) {
2597                                         break
2598                                 }
2599                         }
2600                         clear_afe_to_marker()
2601                         return
2602                 }
2603                 if (t.type === TYPE_START_TAG && t.name === 'table') {
2604                         if (doc.flag('quirks mode') !== QUIRKS_YES) {
2605                                 close_p_if_in_button_scope() // test
2606                         }
2607                         insert_html_element(t)
2608                         flag_frameset_ok = false
2609                         ins_mode = ins_mode_in_table
2610                         return
2611                 }
2612                 if (t.type === TYPE_END_TAG && t.name === 'br') {
2613                         parse_error()
2614                         // W3C: t.type = TYPE_START_TAG
2615                         t = new_open_tag('br') // WHATWG
2616                         // fall through
2617                 }
2618                 if (t.type === TYPE_START_TAG && (t.name === 'area' || t.name === 'br' || t.name === 'embed' || t.name === 'img' || t.name === 'keygen' || t.name === 'wbr')) {
2619                         reconstruct_afe()
2620                         insert_html_element(t)
2621                         open_els.shift()
2622                         t.acknowledge_self_closing()
2623                         flag_frameset_ok = false
2624                         return
2625                 }
2626                 if (t.type === TYPE_START_TAG && t.name === 'input') {
2627                         reconstruct_afe()
2628                         insert_html_element(t)
2629                         open_els.shift()
2630                         t.acknowledge_self_closing()
2631                         if (!is_input_hidden_tok(t)) {
2632                                 flag_frameset_ok = false
2633                         }
2634                         return
2635                 }
2636                 if (t.type === TYPE_START_TAG && (t.name === 'menuitem' || t.name === 'param' || t.name === 'source' || t.name === 'track')) {
2637                         // WHATWG adds 'menuitem' for this block
2638                         insert_html_element(t)
2639                         open_els.shift()
2640                         t.acknowledge_self_closing()
2641                         return
2642                 }
2643                 if (t.type === TYPE_START_TAG && t.name === 'hr') {
2644                         close_p_if_in_button_scope()
2645                         insert_html_element(t)
2646                         open_els.shift()
2647                         t.acknowledge_self_closing()
2648                         flag_frameset_ok = false
2649                         return
2650                 }
2651                 if (t.type === TYPE_START_TAG && t.name === 'image') {
2652                         parse_error()
2653                         t.name = 'img'
2654                         process_token(t)
2655                         return
2656                 }
2657                 if (t.type === TYPE_START_TAG && t.name === 'isindex') {
2658                         parse_error()
2659                         if (template_tag_is_open() === false && form_element_pointer !== null) {
2660                                 return
2661                         }
2662                         t.acknowledge_self_closing()
2663                         flag_frameset_ok = false
2664                         close_p_if_in_button_scope()
2665                         el = insert_html_element(new_open_tag('form'))
2666                         if (!template_tag_is_open()) {
2667                                 form_element_pointer = el
2668                         }
2669                         for (i = 0; i < t.attrs_a.length; ++i) {
2670                                 a = t.attrs_a[i]
2671                                 if (a[0] === 'action') {
2672                                         el.attrs['action'] = a[1]
2673                                         break
2674                                 }
2675                         }
2676                         insert_html_element(new_open_tag('hr'))
2677                         open_els.shift()
2678                         reconstruct_afe()
2679                         insert_html_element(new_open_tag('label'))
2680                         // note: this is a little out-of-spec-order so we only have to scan t.attrs_a once
2681                         input_el = new_open_tag('input')
2682                         prompt = null
2683                         for (i = 0; i < t.attrs_a.length; ++i) {
2684                                 a = t.attrs_a[i]
2685                                 if (a[0] === 'prompt') {
2686                                         prompt = a[1]
2687                                 }
2688                                 if (a[0] !== 'name' && a[0] !== 'action' && a[0] !== 'prompt') {
2689                                         input_el.attrs_a.push([a[0], a[1]])
2690                                 }
2691                         }
2692                         input_el.attrs_a.push(['name', 'isindex'])
2693                         // fixfull this next bit is in english... internationalize?
2694                         if (prompt == null) {
2695                                 prompt = "This is a searchable index. Enter search keywords: "
2696                         }
2697                         insert_character(new_character_token(prompt)) // fixfull split
2698                         // TODO submit typo "balue" in spec
2699                         insert_html_element(input_el)
2700                         open_els.shift()
2701                         // insert_character('') // you can put chars here if prompt attr missing
2702                         open_els.shift()
2703                         insert_html_element(new_open_tag('hr'))
2704                         open_els.shift()
2705                         open_els.shift()
2706                         if (!template_tag_is_open()) {
2707                                 form_element_pointer = null
2708                         }
2709                         return
2710                 }
2711                 if (t.type === TYPE_START_TAG && t.name === 'textarea') {
2712                         insert_html_element(t)
2713                         eat_next_token_if_newline()
2714                         tok_state = tok_state_rcdata
2715                         original_ins_mode = ins_mode
2716                         flag_frameset_ok = false
2717                         ins_mode = ins_mode_text
2718                         return
2719                 }
2720                 if (t.type === TYPE_START_TAG && t.name === 'xmp') {
2721                         close_p_if_in_button_scope()
2722                         reconstruct_afe()
2723                         flag_frameset_ok = false
2724                         parse_generic_raw_text(t)
2725                         return
2726                 }
2727                 if (t.type === TYPE_START_TAG && t.name === 'iframe') {
2728                         flag_frameset_ok = false
2729                         parse_generic_raw_text(t)
2730                         return
2731                 }
2732                 if (t.type === TYPE_START_TAG && (t.name === 'noembed' || (t.name === 'noscript' && flag_scripting))) {
2733                         parse_generic_raw_text(t)
2734                         return
2735                 }
2736                 if (t.type === TYPE_START_TAG && t.name === 'select') {
2737                         reconstruct_afe()
2738                         insert_html_element(t)
2739                         flag_frameset_ok = false
2740                         if (ins_mode === ins_mode_in_table || ins_mode === ins_mode_in_caption || ins_mode === ins_mode_in_table_body || ins_mode === ins_mode_in_row || ins_mode === ins_mode_in_cell) {
2741                                 ins_mode = ins_mode_in_select_in_table
2742                         } else {
2743                                 ins_mode = ins_mode_in_select
2744                         }
2745                         return
2746                 }
2747                 if (t.type === TYPE_START_TAG && (t.name === 'optgroup' || t.name === 'option')) {
2748                         if (open_els[0].name === 'option' && open_els[0].namespace === NS_HTML) {
2749                                 open_els.shift()
2750                         }
2751                         reconstruct_afe()
2752                         insert_html_element(t)
2753                         return
2754                 }
2755 // this comment block implements the W3C spec
2756 //              if t.type === TYPE_START_TAG && (t.name === 'rb' || t.name === 'rp' || t.name === 'rtc')
2757 //                      if is_in_scope 'ruby', NS_HTML
2758 //                              generate_implied_end_tags()
2759 //                              unless open_els[0].name === 'ruby' && open_els[0].namespace === NS_HTML
2760 //                                      parse_error()
2761 //                      insert_html_element t
2762 //                      return
2763 //              if t.type === TYPE_START_TAG && t.name === 'rt'
2764 //                      if is_in_scope 'ruby', NS_HTML
2765 //                              generate_implied_end_tags 'rtc' // arg === exception
2766 //                              unless (open_els[0].name === 'ruby' || open_els[0].name === 'rtc') && open_els[0].namespace === NS_HTML
2767 //                                      parse_error()
2768 //                      insert_html_element t
2769 //                      return
2770 // below implements the WHATWG spec https://html.spec.whatwg.org/multipage/syntax.html#parsing-main-inbody
2771                 if (t.type === TYPE_START_TAG && (t.name === 'rb' || t.name === 'rtc')) {
2772                         if (is_in_scope('ruby', NS_HTML)) {
2773                                 generate_implied_end_tags()
2774                                 if (!(open_els[0].name === 'ruby' && open_els[0].namespace === NS_HTML)) {
2775                                         parse_error()
2776                                 }
2777                         }
2778                         insert_html_element(t)
2779                         return
2780                 }
2781                 if (t.type === TYPE_START_TAG && (t.name === 'rp' || t.name === 'rt')) {
2782                         if (is_in_scope('ruby', NS_HTML)) {
2783                                 generate_implied_end_tags('rtc')
2784                                 if (!((open_els[0].name === 'ruby' || open_els[0].name === 'rtc') && open_els[0].namespace === NS_HTML)) {
2785                                         parse_error()
2786                                 }
2787                         }
2788                         insert_html_element(t)
2789                         return
2790                 }
2791 // end WHATWG chunk
2792                 if (t.type === TYPE_START_TAG && t.name === 'math') {
2793                         reconstruct_afe()
2794                         adjust_mathml_attributes(t)
2795                         adjust_foreign_attributes(t)
2796                         insert_foreign_element(t, NS_MATHML)
2797                         if (t.flag('self-closing')) {
2798                                 open_els.shift()
2799                                 t.acknowledge_self_closing()
2800                         }
2801                         return
2802                 }
2803                 if (t.type === TYPE_START_TAG && t.name === 'svg') {
2804                         reconstruct_afe()
2805                         adjust_svg_attributes(t)
2806                         adjust_foreign_attributes(t)
2807                         insert_foreign_element(t, NS_SVG)
2808                         if (t.flag('self-closing')) {
2809                                 open_els.shift()
2810                                 t.acknowledge_self_closing()
2811                         }
2812                         return
2813                 }
2814                 if (t.type === TYPE_START_TAG && (t.name === 'caption' || t.name === 'col' || t.name === 'colgroup' || t.name === 'frame' || t.name === 'head' || t.name === 'tbody' || t.name === 'td' || t.name === 'tfoot' || t.name === 'th' || t.name === 'thead' || t.name === 'tr')) {
2815                         parse_error()
2816                         return
2817                 }
2818                 if (t.type === TYPE_START_TAG) { // any other start tag
2819                         reconstruct_afe()
2820                         insert_html_element(t)
2821                         return
2822                 }
2823                 if (t.type === TYPE_END_TAG) { // any other end tag
2824                         in_body_any_other_end_tag(t.name)
2825                         return
2826                 }
2827         }
2828
2829         // 8.2.5.4.8 http://www.w3.org/TR/html5/syntax.html#parsing-main-incdata
2830         ins_mode_text = function (t) {
2831                 if (t.type === TYPE_TEXT) {
2832                         insert_character(t)
2833                         return
2834                 }
2835                 if (t.type === TYPE_EOF) {
2836                         parse_error()
2837                         if (open_els[0].name === 'script' && open_els[0].namespace === NS_HTML) {
2838                                 open_els[0].flag('already started', true)
2839                         }
2840                         open_els.shift()
2841                         ins_mode = original_ins_mode
2842                         process_token(t)
2843                         return
2844                 }
2845                 if (t.type === TYPE_END_TAG && t.name === 'script') {
2846                         open_els.shift()
2847                         ins_mode = original_ins_mode
2848                         // fixfull the spec seems to assume that I'm going to run the script
2849                         // http://www.w3.org/TR/html5/syntax.html#scriptEndTag
2850                         return
2851                 }
2852                 if (t.type === TYPE_END_TAG) {
2853                         open_els.shift()
2854                         ins_mode = original_ins_mode
2855                         return
2856                 }
2857         }
2858
2859         // the functions below implement the tokenizer stats described here:
2860         // http://www.w3.org/TR/html5/syntax.html#tokenization
2861
2862         // 8.2.5.4.9 http://www.w3.org/TR/html5/syntax.html#parsing-main-intable
2863         ins_mode_in_table_else = function (t) {
2864                 parse_error()
2865                 flag_foster_parenting = true
2866                 ins_mode_in_body(t)
2867                 flag_foster_parenting = false
2868         }
2869         ins_mode_in_table = function (t) {
2870                 var el
2871                 switch (t.type) {
2872                         case TYPE_TEXT:
2873                                 if ((open_els[0].name === 'table' || open_els[0].name === 'tbody' || open_els[0].name === 'tfoot' || open_els[0].name === 'thead' || open_els[0].name === 'tr') && open_els[0].namespace === NS_HTML) {
2874                                         pending_table_character_tokens = []
2875                                         original_ins_mode = ins_mode
2876                                         ins_mode = ins_mode_in_table_text
2877                                         process_token(t)
2878                                 } else {
2879                                         ins_mode_in_table_else(t)
2880                                 }
2881                         break
2882                         case TYPE_COMMENT:
2883                                 insert_comment(t)
2884                         break
2885                         case TYPE_DOCTYPE:
2886                                 parse_error()
2887                         break
2888                         case TYPE_START_TAG:
2889                                 switch (t.name) {
2890                                         case 'caption':
2891                                                 clear_stack_to_table_context()
2892                                                 afe_push_marker()
2893                                                 insert_html_element(t)
2894                                                 ins_mode = ins_mode_in_caption
2895                                         break
2896                                         case 'colgroup':
2897                                                 clear_stack_to_table_context()
2898                                                 insert_html_element(t)
2899                                                 ins_mode = ins_mode_in_column_group
2900                                         break
2901                                         case 'col':
2902                                                 clear_stack_to_table_context()
2903                                                 insert_html_element(new_open_tag('colgroup'))
2904                                                 ins_mode = ins_mode_in_column_group
2905                                                 process_token(t)
2906                                         break
2907                                         case 'tbody':
2908                                         case 'tfoot':
2909                                         case 'thead':
2910                                                 clear_stack_to_table_context()
2911                                                 insert_html_element(t)
2912                                                 ins_mode = ins_mode_in_table_body
2913                                         break
2914                                         case 'td':
2915                                         case 'th':
2916                                         case 'tr':
2917                                                 clear_stack_to_table_context()
2918                                                 insert_html_element(new_open_tag('tbody'))
2919                                                 ins_mode = ins_mode_in_table_body
2920                                                 process_token(t)
2921                                         break
2922                                         case 'table':
2923                                                 parse_error()
2924                                                 if (is_in_table_scope('table', NS_HTML)) {
2925                                                         while (true) {
2926                                                                 el = open_els.shift()
2927                                                                 if (el.name === 'table' && el.namespace === NS_HTML) {
2928                                                                         break
2929                                                                 }
2930                                                         }
2931                                                         reset_ins_mode()
2932                                                         process_token(t)
2933                                                 }
2934                                         break
2935                                         case 'style':
2936                                         case 'script':
2937                                         case 'template':
2938                                                 ins_mode_in_head(t)
2939                                         break
2940                                         case 'input':
2941                                                 if (!is_input_hidden_tok(t)) {
2942                                                         ins_mode_in_table_else(t)
2943                                                 } else {
2944                                                         parse_error()
2945                                                         el = insert_html_element(t)
2946                                                         open_els.shift()
2947                                                         t.acknowledge_self_closing()
2948                                                 }
2949                                         break
2950                                         case 'form':
2951                                                 parse_error()
2952                                                 if (form_element_pointer != null) {
2953                                                         return
2954                                                 }
2955                                                 if (template_tag_is_open()) {
2956                                                         return
2957                                                 }
2958                                                 form_element_pointer = insert_html_element(t)
2959                                                 open_els.shift()
2960                                         break
2961                                         default:
2962                                                 ins_mode_in_table_else(t)
2963                                 }
2964                         break
2965                         case TYPE_END_TAG:
2966                                 switch (t.name) {
2967                                         case 'table':
2968                                                 if (is_in_table_scope('table', NS_HTML)) {
2969                                                         while (true) {
2970                                                                 el = open_els.shift()
2971                                                                 if (el.name === 'table' && el.namespace === NS_HTML) {
2972                                                                         break
2973                                                                 }
2974                                                         }
2975                                                         reset_ins_mode()
2976                                                 } else {
2977                                                         parse_error()
2978                                                 }
2979                                         break
2980                                         case 'body':
2981                                         case 'caption':
2982                                         case 'col':
2983                                         case 'colgroup':
2984                                         case 'html':
2985                                         case 'tbody':
2986                                         case 'td':
2987                                         case 'tfoot':
2988                                         case 'th':
2989                                         case 'thead':
2990                                         case 'tr':
2991                                                 parse_error()
2992                                         break
2993                                         case 'template':
2994                                                 ins_mode_in_head(t)
2995                                         break
2996                                         default:
2997                                                 ins_mode_in_table_else(t)
2998                                 }
2999                         break
3000                         case TYPE_EOF:
3001                                 ins_mode_in_body(t)
3002                         break
3003                         default:
3004                                 ins_mode_in_table_else(t)
3005                 }
3006         }
3007
3008         // 8.2.5.4.10 http://www.w3.org/TR/html5/syntax.html#parsing-main-intabletext
3009         ins_mode_in_table_text = function (t) {
3010                 var all_space, i, l, m, old
3011                 if (t.type === TYPE_TEXT && t.text === "\u0000") {
3012                         // from javascript?
3013                         parse_error()
3014                         return
3015                 }
3016                 if (t.type === TYPE_TEXT) {
3017                         pending_table_character_tokens.push(t)
3018                         return
3019                 }
3020                 // Anything else
3021                 all_space = true
3022                 for (i = 0; i < pending_table_character_tokens.length; ++i) {
3023                         old = pending_table_character_tokens[i]
3024                         if (!is_space_tok(old)) {
3025                                 all_space = false
3026                                 break
3027                         }
3028                 }
3029                 if (all_space) {
3030                         for (i = 0; i < pending_table_character_tokens.length; ++i) {
3031                                 old = pending_table_character_tokens[i]
3032                                 insert_character(old)
3033                         }
3034                 } else {
3035                         for (i = 0; i < pending_table_character_tokens.length; ++i) {
3036                                 old = pending_table_character_tokens[i]
3037                                 ins_mode_in_table_else(old)
3038                         }
3039                 }
3040                 pending_table_character_tokens = []
3041                 ins_mode = original_ins_mode
3042                 process_token(t)
3043         }
3044
3045         // 8.2.5.4.11 http://www.w3.org/TR/html5/syntax.html#parsing-main-incaption
3046         ins_mode_in_caption = function (t) {
3047                 var el
3048                 if (t.type === TYPE_END_TAG && t.name === 'caption') {
3049                         if (is_in_table_scope('caption', NS_HTML)) {
3050                                 generate_implied_end_tags()
3051                                 if (open_els[0].name !== 'caption') {
3052                                         parse_error()
3053                                 }
3054                                 while (true) {
3055                                         el = open_els.shift()
3056                                         if (el.name === 'caption' && el.namespace === NS_HTML) {
3057                                                 break
3058                                         }
3059                                 }
3060                                 clear_afe_to_marker()
3061                                 ins_mode = ins_mode_in_table
3062                         } else {
3063                                 parse_error()
3064                                 // fragment case
3065                         }
3066                         return
3067                 }
3068                 if ((t.type === TYPE_START_TAG && (t.name === 'caption' || t.name === 'col' || t.name === 'colgroup' || t.name === 'tbody' || t.name === 'td' || t.name === 'tfoot' || t.name === 'th' || t.name === 'thead' || t.name === 'tr')) || t.type === TYPE_END_TAG && t.name === 'table') {
3069                         parse_error()
3070                         if (is_in_table_scope('caption', NS_HTML)) {
3071                                 while (true) {
3072                                         el = open_els.shift()
3073                                         if (el.name === 'caption' && el.namespace === NS_HTML) {
3074                                                 break
3075                                         }
3076                                 }
3077                                 clear_afe_to_marker()
3078                                 ins_mode = ins_mode_in_table
3079                                 process_token(t)
3080                         }
3081                         // else fragment case
3082                         return
3083                 }
3084                 if (t.type === TYPE_END_TAG && (t.name === 'body' || t.name === 'col' || t.name === 'colgroup' || t.name === 'html' || t.name === 'tbody' || t.name === 'td' || t.name === 'tfoot' || t.name === 'th' || t.name === 'thead' || t.name === 'tr')) {
3085                         parse_error()
3086                         return
3087                 }
3088                 // Anything else
3089                 ins_mode_in_body(t)
3090         }
3091
3092         // 8.2.5.4.12 http://www.w3.org/TR/html5/syntax.html#parsing-main-incolgroup
3093         ins_mode_in_column_group = function (t) {
3094                 var el
3095                 if (is_space_tok(t)) {
3096                         insert_character(t)
3097                         return
3098                 }
3099                 if (t.type === TYPE_COMMENT) {
3100                         insert_comment(t)
3101                         return
3102                 }
3103                 if (t.type === TYPE_DOCTYPE) {
3104                         parse_error()
3105                         return
3106                 }
3107                 if (t.type === TYPE_START_TAG && t.name === 'html') {
3108                         ins_mode_in_body(t)
3109                         return
3110                 }
3111                 if (t.type === TYPE_START_TAG && t.name === 'col') {
3112                         el = insert_html_element(t)
3113                         open_els.shift()
3114                         t.acknowledge_self_closing()
3115                         return
3116                 }
3117                 if (t.type === TYPE_END_TAG && t.name === 'colgroup') {
3118                         if (open_els[0].name === 'colgroup' && open_els.namespace === NS_HTML) {
3119                                 open_els.shift()
3120                                 ins_mode = ins_mode_in_table
3121                         } else {
3122                                 parse_error()
3123                         }
3124                         return
3125                 }
3126                 if (t.type === TYPE_END_TAG && t.name === 'col') {
3127                         parse_error()
3128                         return
3129                 }
3130                 if ((t.type === TYPE_START_TAG || t.type === TYPE_END_TAG) && t.name === 'template') {
3131                         ins_mode_in_head(t)
3132                         return
3133                 }
3134                 if (t.type === TYPE_EOF) {
3135                         ins_mode_in_body(t)
3136                         return
3137                 }
3138                 // Anything else
3139                 if (open_els[0].name !== 'colgroup') {
3140                         parse_error()
3141                         return
3142                 }
3143                 open_els.shift()
3144                 ins_mode = ins_mode_in_table
3145                 process_token(t)
3146         }
3147
3148         // 8.2.5.4.13 http://www.w3.org/TR/html5/syntax.html#parsing-main-intbody
3149         ins_mode_in_table_body = function (t) {
3150                 var el, has, i
3151                 if (t.type === TYPE_START_TAG && t.name === 'tr') {
3152                         clear_stack_to_table_body_context()
3153                         insert_html_element(t)
3154                         ins_mode = ins_mode_in_row
3155                         return
3156                 }
3157                 if (t.type === TYPE_START_TAG && (t.name === 'th' || t.name === 'td')) {
3158                         parse_error()
3159                         clear_stack_to_table_body_context()
3160                         insert_html_element(new_open_tag('tr'))
3161                         ins_mode = ins_mode_in_row
3162                         process_token(t)
3163                         return
3164                 }
3165                 if (t.type === TYPE_END_TAG && (t.name === 'tbody' || t.name === 'tfoot' || t.name === 'thead')) {
3166                         if (!is_in_table_scope(t.name, NS_HTML)) {
3167                                 parse_error()
3168                                 return
3169                         }
3170                         clear_stack_to_table_body_context()
3171                         open_els.shift()
3172                         ins_mode = ins_mode_in_table
3173                         return
3174                 }
3175                 if ((t.type === TYPE_START_TAG && (t.name === 'caption' || t.name === 'col' || t.name === 'colgroup' || t.name === 'tbody' || t.name === 'tfoot' || t.name === 'thead')) || (t.type === TYPE_END_TAG && t.name === 'table')) {
3176                         has = false
3177                         for (i = 0; i < open_els.length; ++i) {
3178                                 el = open_els[i]
3179                                 if (el.namespace === NS_HTML && (el.name === 'tbody' || el.name === 'tfoot' || el.name === 'thead')) {
3180                                         has = true
3181                                         break
3182                                 }
3183                                 if (table_scopers[el.name] === el.namespace) {
3184                                         break
3185                                 }
3186                         }
3187                         if (!has) {
3188                                 parse_error()
3189                                 return
3190                         }
3191                         clear_stack_to_table_body_context()
3192                         open_els.shift()
3193                         ins_mode = ins_mode_in_table
3194                         process_token(t)
3195                         return
3196                 }
3197                 if (t.type === TYPE_END_TAG && (t.name === 'body' || t.name === 'caption' || t.name === 'col' || t.name === 'colgroup' || t.name === 'html' || t.name === 'td' || t.name === 'th' || t.name === 'tr')) {
3198                         parse_error()
3199                         return
3200                 }
3201                 // Anything else
3202                 ins_mode_in_table(t)
3203         }
3204
3205         // 8.2.5.4.14 http://www.w3.org/TR/html5/syntax.html#parsing-main-intr
3206         ins_mode_in_row = function (t) {
3207                 if (t.type === TYPE_START_TAG && (t.name === 'th' || t.name === 'td')) {
3208                         clear_stack_to_table_row_context()
3209                         insert_html_element(t)
3210                         ins_mode = ins_mode_in_cell
3211                         afe_push_marker()
3212                         return
3213                 }
3214                 if (t.type === TYPE_END_TAG && t.name === 'tr') {
3215                         if (is_in_table_scope('tr', NS_HTML)) {
3216                                 clear_stack_to_table_row_context()
3217                                 open_els.shift()
3218                                 ins_mode = ins_mode_in_table_body
3219                         } else {
3220                                 parse_error()
3221                         }
3222                         return
3223                 }
3224                 if ((t.type === TYPE_START_TAG && (t.name === 'caption' || t.name === 'col' || t.name === 'colgroup' || t.name === 'tbody' || t.name === 'tfoot' || t.name === 'thead' || t.name === 'tr')) || t.type === TYPE_END_TAG && t.name === 'table') {
3225                         if (is_in_table_scope('tr', NS_HTML)) {
3226                                 clear_stack_to_table_row_context()
3227                                 open_els.shift()
3228                                 ins_mode = ins_mode_in_table_body
3229                                 process_token(t)
3230                         } else {
3231                                 parse_error()
3232                         }
3233                         return
3234                 }
3235                 if (t.type === TYPE_END_TAG && (t.name === 'tbody' || t.name === 'tfoot' || t.name === 'thead')) {
3236                         if (is_in_table_scope(t.name, NS_HTML)) {
3237                                 if (is_in_table_scope('tr', NS_HTML)) {
3238                                         clear_stack_to_table_row_context()
3239                                         open_els.shift()
3240                                         ins_mode = ins_mode_in_table_body
3241                                         process_token(t)
3242                                 }
3243                         } else {
3244                                 parse_error()
3245                         }
3246                         return
3247                 }
3248                 if (t.type === TYPE_END_TAG && (t.name === 'body' || t.name === 'caption' || t.name === 'col' || t.name === 'colgroup' || t.name === 'html' || t.name === 'td' || t.name === 'th')) {
3249                         parse_error()
3250                         return
3251                 }
3252                 // Anything else
3253                 ins_mode_in_table(t)
3254         }
3255
3256         // http://www.w3.org/TR/html5/syntax.html#close-the-cell
3257         close_the_cell = function () {
3258                 var el
3259                 generate_implied_end_tags()
3260                 if (!((open_els[0].name === 'td' || open_els[0] === 'th') && open_els[0].namespace === NS_HTML)) {
3261                         parse_error()
3262                 }
3263                 while (true) {
3264                         el = open_els.shift()
3265                         if (el.namespace === NS_HTML && (el.name === 'td' || el.name === 'th')) {
3266                                 break
3267                         }
3268                 }
3269                 clear_afe_to_marker()
3270                 ins_mode = ins_mode_in_row
3271         }
3272
3273         // 8.2.5.4.15 http://www.w3.org/TR/html5/syntax.html#parsing-main-intd
3274         ins_mode_in_cell = function (t) {
3275                 var el, has, i
3276                 if (t.type === TYPE_END_TAG && (t.name === 'td' || t.name === 'th')) {
3277                         if (is_in_table_scope(t.name, NS_HTML)) {
3278                                 generate_implied_end_tags()
3279                                 if (!((open_els[0].name === t.name) && open_els[0].namespace === NS_HTML)) {
3280                                         parse_error()
3281                                 }
3282                                 while (true) {
3283                                         el = open_els.shift()
3284                                         if (el.name === t.name && el.namespace === NS_HTML) {
3285                                                 break
3286                                         }
3287                                 }
3288                                 clear_afe_to_marker()
3289                                 ins_mode = ins_mode_in_row
3290                         } else {
3291                                 parse_error()
3292                         }
3293                         return
3294                 }
3295                 if (t.type === TYPE_START_TAG && (t.name === 'caption' || t.name === 'col' || t.name === 'colgroup' || t.name === 'tbody' || t.name === 'td' || t.name === 'tfoot' || t.name === 'th' || t.name === 'thead' || t.name === 'tr')) {
3296                         has = false
3297                         for (i = 0; i < open_els.length; ++i) {
3298                                 el = open_els[i]
3299                                 if (el.namespace === NS_HTML && (el.name === 'td' || el.name === 'th')) {
3300                                         has = true
3301                                         break
3302                                 }
3303                                 if (table_scopers[el.name] === el.namespace) {
3304                                         break
3305                                 }
3306                         }
3307                         if (!has) {
3308                                 parse_error()
3309                                 return
3310                         }
3311                         close_the_cell()
3312                         process_token(t)
3313                         return
3314                 }
3315                 if (t.type === TYPE_END_TAG && (t.name === 'body' || t.name === 'caption' || t.name === 'col' || t.name === 'colgroup' || t.name === 'html')) {
3316                         parse_error()
3317                         return
3318                 }
3319                 if (t.type === TYPE_END_TAG && (t.name === 'table' || t.name === 'tbody' || t.name === 'tfoot' || t.name === 'thead' || t.name === 'tr')) {
3320                         if (is_in_table_scope(t.name, NS_HTML)) {
3321                                 close_the_cell()
3322                                 process_token(t)
3323                         } else {
3324                                 parse_error()
3325                         }
3326                         return
3327                 }
3328                 // Anything Else
3329                 ins_mode_in_body(t)
3330         }
3331
3332         // 8.2.5.4.16 http://www.w3.org/TR/html5/syntax.html#parsing-main-inselect
3333         ins_mode_in_select = function (t) {
3334                 var el
3335                 if (t.type === TYPE_TEXT && t.text === "\u0000") {
3336                         parse_error()
3337                         return
3338                 }
3339                 if (t.type === TYPE_TEXT) {
3340                         insert_character(t)
3341                         return
3342                 }
3343                 if (t.type === TYPE_COMMENT) {
3344                         insert_comment(t)
3345                         return
3346                 }
3347                 if (t.type === TYPE_DOCTYPE) {
3348                         parse_error()
3349                         return
3350                 }
3351                 if (t.type === TYPE_START_TAG && t.name === 'html') {
3352                         ins_mode_in_body(t)
3353                         return
3354                 }
3355                 if (t.type === TYPE_START_TAG && t.name === 'option') {
3356                         if (open_els[0].name === 'option' && open_els[0].namespace === NS_HTML) {
3357                                 open_els.shift()
3358                         }
3359                         insert_html_element(t)
3360                         return
3361                 }
3362                 if (t.type === TYPE_START_TAG && t.name === 'optgroup') {
3363                         if (open_els[0].name === 'option' && open_els[0].namespace === NS_HTML) {
3364                                 open_els.shift()
3365                         }
3366                         if (open_els[0].name === 'optgroup' && open_els[0].namespace === NS_HTML) {
3367                                 open_els.shift()
3368                         }
3369                         insert_html_element(t)
3370                         return
3371                 }
3372                 if (t.type === TYPE_END_TAG && t.name === 'optgroup') {
3373                         if (open_els[0].name === 'option' && open_els[0].namespace === NS_HTML) {
3374                                 if (open_els[1].name === 'optgroup' && open_els[0].namespace === NS_HTML) {
3375                                         open_els.shift()
3376                                 }
3377                         }
3378                         if (open_els[0].name === 'optgroup' && open_els[0].namespace === NS_HTML) {
3379                                 open_els.shift()
3380                         } else {
3381                                 parse_error()
3382                         }
3383                         return
3384                 }
3385                 if (t.type === TYPE_END_TAG && t.name === 'option') {
3386                         if (open_els[0].name === 'option' && open_els[0].namespace === NS_HTML) {
3387                                 open_els.shift()
3388                         } else {
3389                                 parse_error()
3390                         }
3391                         return
3392                 }
3393                 if (t.type === TYPE_END_TAG && t.name === 'select') {
3394                         if (is_in_select_scope('select', NS_HTML)) {
3395                                 while (true) {
3396                                         el = open_els.shift()
3397                                         if (el.name === 'select' && el.namespace === NS_HTML) {
3398                                                 break
3399                                         }
3400                                 }
3401                                 reset_ins_mode()
3402                         } else {
3403                                 parse_error()
3404                         }
3405                         return
3406                 }
3407                 if (t.type === TYPE_START_TAG && t.name === 'select') {
3408                         parse_error()
3409                         while (true) {
3410                                 el = open_els.shift()
3411                                 if (el.name === 'select' && el.namespace === NS_HTML) {
3412                                         break
3413                                 }
3414                         }
3415                         reset_ins_mode()
3416                         // spec says that this is the same as </select> but it doesn't say
3417                         // to check scope first
3418                         return
3419                 }
3420                 if (t.type === TYPE_START_TAG && (t.name === 'input' || t.name === 'keygen' || t.name === 'textarea')) {
3421                         parse_error()
3422                         if (!is_in_select_scope('select', NS_HTML)) {
3423                                 return
3424                         }
3425                         while (true) {
3426                                 el = open_els.shift()
3427                                 if (el.name === 'select' && el.namespace === NS_HTML) {
3428                                         break
3429                                 }
3430                         }
3431                         reset_ins_mode()
3432                         process_token(t)
3433                         return
3434                 }
3435                 if (t.type === TYPE_START_TAG && (t.name === 'script' || t.name === 'template')) {
3436                         ins_mode_in_head(t)
3437                         return
3438                 }
3439                 if (t.type === TYPE_EOF) {
3440                         ins_mode_in_body(t)
3441                         return
3442                 }
3443                 // Anything else
3444                 parse_error()
3445         }
3446
3447         // 8.2.5.4.17 http://www.w3.org/TR/html5/syntax.html#parsing-main-inselectintable
3448         ins_mode_in_select_in_table = function (t) {
3449                 var el
3450                 if (t.type === TYPE_START_TAG && (t.name === 'caption' || t.name === 'table' || t.name === 'tbody' || t.name === 'tfoot' || t.name === 'thead' || t.name === 'tr' || t.name === 'td' || t.name === 'th')) {
3451                         parse_error()
3452                         while (true) {
3453                                 el = open_els.shift()
3454                                 if (el.name === 'select' && el.namespace === NS_HTML) {
3455                                         break
3456                                 }
3457                         }
3458                         reset_ins_mode()
3459                         process_token(t)
3460                         return
3461                 }
3462                 if (t.type === TYPE_END_TAG && (t.name === 'caption' || t.name === 'table' || t.name === 'tbody' || t.name === 'tfoot' || t.name === 'thead' || t.name === 'tr' || t.name === 'td' || t.name === 'th')) {
3463                         parse_error()
3464                         if (!is_in_table_scope(t.name, NS_HTML)) {
3465                                 return
3466                         }
3467                         while (true) {
3468                                 el = open_els.shift()
3469                                 if (el.name === 'select' && el.namespace === NS_HTML) {
3470                                         break
3471                                 }
3472                         }
3473                         reset_ins_mode()
3474                         process_token(t)
3475                         return
3476                 }
3477                 // Anything else
3478                 ins_mode_in_select(t)
3479         }
3480
3481         // 8.2.5.4.18 http://www.w3.org/TR/html5/syntax.html#parsing-main-intemplate
3482         ins_mode_in_template = function (t) {
3483                 var el
3484                 if (t.type === TYPE_TEXT || t.type === TYPE_COMMENT || t.type === TYPE_DOCTYPE) {
3485                         ins_mode_in_body(t)
3486                         return
3487                 }
3488                 if ((t.type === TYPE_START_TAG && (t.name === 'base' || t.name === 'basefont' || t.name === 'bgsound' || t.name === 'link' || t.name === 'meta' || t.name === 'noframes' || t.name === 'script' || t.name === 'style' || t.name === 'template' || t.name === 'title')) || (t.type === TYPE_END_TAG && t.name === 'template')) {
3489                         ins_mode_in_head(t)
3490                         return
3491                 }
3492                 if (t.type === TYPE_START_TAG && (t.name === 'caption' || t.name === 'colgroup' || t.name === 'tbody' || t.name === 'tfoot' || t.name === 'thead')) {
3493                         template_ins_modes.shift()
3494                         template_ins_modes.unshift(ins_mode_in_table)
3495                         ins_mode = ins_mode_in_table
3496                         process_token(t)
3497                         return
3498                 }
3499                 if (t.type === TYPE_START_TAG && t.name === 'col') {
3500                         template_ins_modes.shift()
3501                         template_ins_modes.unshift(ins_mode_in_column_group)
3502                         ins_mode = ins_mode_in_column_group
3503                         process_token(t)
3504                         return
3505                 }
3506                 if (t.type === TYPE_START_TAG && t.name === 'tr') {
3507                         template_ins_modes.shift()
3508                         template_ins_modes.unshift(ins_mode_in_table_body)
3509                         ins_mode = ins_mode_in_table_body
3510                         process_token(t)
3511                         return
3512                 }
3513                 if (t.type === TYPE_START_TAG && (t.name === 'td' || t.name === 'th')) {
3514                         template_ins_modes.shift()
3515                         template_ins_modes.unshift(ins_mode_in_row)
3516                         ins_mode = ins_mode_in_row
3517                         process_token(t)
3518                         return
3519                 }
3520                 if (t.type === TYPE_START_TAG) {
3521                         template_ins_modes.shift()
3522                         template_ins_modes.unshift(ins_mode_in_body)
3523                         ins_mode = ins_mode_in_body
3524                         process_token(t)
3525                         return
3526                 }
3527                 if (t.type === TYPE_END_TAG) {
3528                         parse_error()
3529                         return
3530                 }
3531                 if (t.type === TYPE_EOF) {
3532                         if (!template_tag_is_open()) {
3533                                 stop_parsing()
3534                                 return
3535                         }
3536                         parse_error()
3537                         while (true) {
3538                                 el = open_els.shift()
3539                                 if (el.name === 'template' && el.namespace === NS_HTML) {
3540                                         break
3541                                 }
3542                         }
3543                         clear_afe_to_marker()
3544                         template_ins_modes.shift()
3545                         reset_ins_mode()
3546                         process_token(t)
3547                 }
3548         }
3549
3550         // 8.2.5.4.19 http://www.w3.org/TR/html5/syntax.html#parsing-main-afterbody
3551         ins_mode_after_body = function (t) {
3552                 var first
3553                 if (is_space_tok(t)) {
3554                         ins_mode_in_body(t)
3555                         return
3556                 }
3557                 if (t.type === TYPE_COMMENT) {
3558                         first = open_els[open_els.length - 1]
3559                         insert_comment(t, [first, first.children.length])
3560                         return
3561                 }
3562                 if (t.type === TYPE_DOCTYPE) {
3563                         parse_error()
3564                         return
3565                 }
3566                 if (t.type === TYPE_START_TAG && t.name === 'html') {
3567                         ins_mode_in_body(t)
3568                         return
3569                 }
3570                 if (t.type === TYPE_END_TAG && t.name === 'html') {
3571                         if (flag_fragment_parsing) {
3572                                 parse_error()
3573                                 return
3574                         }
3575                         ins_mode = ins_mode_after_after_body
3576                         return
3577                 }
3578                 if (t.type === TYPE_EOF) {
3579                         stop_parsing()
3580                         return
3581                 }
3582                 // Anything ELse
3583                 parse_error()
3584                 ins_mode = ins_mode_in_body
3585                 process_token(t)
3586         }
3587
3588         // 8.2.5.4.20 http://www.w3.org/TR/html5/syntax.html#parsing-main-inframeset
3589         ins_mode_in_frameset = function (t) {
3590                 if (is_space_tok(t)) {
3591                         insert_character(t)
3592                         return
3593                 }
3594                 if (t.type === TYPE_COMMENT) {
3595                         insert_comment(t)
3596                         return
3597                 }
3598                 if (t.type === TYPE_DOCTYPE) {
3599                         parse_error()
3600                         return
3601                 }
3602                 if (t.type === TYPE_START_TAG && t.name === 'html') {
3603                         ins_mode_in_body(t)
3604                         return
3605                 }
3606                 if (t.type === TYPE_START_TAG && t.name === 'frameset') {
3607                         insert_html_element(t)
3608                         return
3609                 }
3610                 if (t.type === TYPE_END_TAG && t.name === 'frameset') {
3611                         if (open_els.length === 1) {
3612                                 parse_error()
3613                                 return // fragment case
3614                         }
3615                         open_els.shift()
3616                         if (flag_fragment_parsing === false && open_els[0].name !== 'frameset') {
3617                                 ins_mode = ins_mode_after_frameset
3618                         }
3619                         return
3620                 }
3621                 if (t.type === TYPE_START_TAG && t.name === 'frame') {
3622                         insert_html_element(t)
3623                         open_els.shift()
3624                         t.acknowledge_self_closing()
3625                         return
3626                 }
3627                 if (t.type === TYPE_START_TAG && t.name === 'noframes') {
3628                         ins_mode_in_head(t)
3629                         return
3630                 }
3631                 if (t.type === TYPE_EOF) {
3632                         if (open_els.length !== 1) {
3633                                 parse_error()
3634                         }
3635                         stop_parsing()
3636                         return
3637                 }
3638                 // Anything else
3639                 parse_error()
3640         }
3641
3642         // 8.2.5.4.21 http://www.w3.org/TR/html5/syntax.html#parsing-main-afterframeset
3643         ins_mode_after_frameset = function (t) {
3644                 if (is_space_tok(t)) {
3645                         insert_character(t)
3646                         return
3647                 }
3648                 if (t.type === TYPE_COMMENT) {
3649                         insert_comment(t)
3650                         return
3651                 }
3652                 if (t.type === TYPE_DOCTYPE) {
3653                         parse_error()
3654                         return
3655                 }
3656                 if (t.type === TYPE_START_TAG && t.name === 'html') {
3657                         ins_mode_in_body(t)
3658                         return
3659                 }
3660                 if (t.type === TYPE_END_TAG && t.name === 'html') {
3661                         ins_mode = ins_mode_after_after_frameset
3662                         return
3663                 }
3664                 if (t.type === TYPE_START_TAG && t.name === 'noframes') {
3665                         ins_mode_in_head(t)
3666                         return
3667                 }
3668                 if (t.type === TYPE_EOF) {
3669                         stop_parsing()
3670                         return
3671                 }
3672                 // Anything else
3673                 parse_error()
3674         }
3675
3676         // 8.2.5.4.22 http://www.w3.org/TR/html5/syntax.html#the-after-after-body-insertion-mode
3677         ins_mode_after_after_body = function (t) {
3678                 if (t.type === TYPE_COMMENT) {
3679                         insert_comment(t, [doc, doc.children.length])
3680                         return
3681                 }
3682                 if (t.type === TYPE_DOCTYPE || is_space_tok(t) || (t.type === TYPE_START_TAG && t.name === 'html')) {
3683                         ins_mode_in_body(t)
3684                         return
3685                 }
3686                 if (t.type === TYPE_EOF) {
3687                         stop_parsing()
3688                         return
3689                 }
3690                 // Anything else
3691                 parse_error()
3692                 ins_mode = ins_mode_in_body
3693                 process_token(t)
3694         }
3695
3696         // 8.2.5.4.23 http://www.w3.org/TR/html5/syntax.html#the-after-after-frameset-insertion-mode
3697         ins_mode_after_after_frameset = function (t) {
3698                 if (t.type === TYPE_COMMENT) {
3699                         insert_comment(t, [doc, doc.children.length])
3700                         return
3701                 }
3702                 if (t.type === TYPE_DOCTYPE || is_space_tok(t) || (t.type === TYPE_START_TAG && t.name === 'html')) {
3703                         ins_mode_in_body(t)
3704                         return
3705                 }
3706                 if (t.type === TYPE_EOF) {
3707                         stop_parsing()
3708                         return
3709                 }
3710                 if (t.type === TYPE_START_TAG && t.name === 'noframes') {
3711                         ins_mode_in_head(t)
3712                         return
3713                 }
3714                 // Anything else
3715                 parse_error()
3716                 return
3717         }
3718
3719         // 8.2.5.5 http://www.w3.org/TR/html5/syntax.html#parsing-main-inforeign
3720         has_color_face_or_size = function (t) {
3721                 var a, i
3722                 for (i = 0; i < t.attrs_a.length; ++i) {
3723                         a = t.attrs_a[i]
3724                         if (a[0] === 'color' || a[0] === 'face' || a[0] === 'size') {
3725                                 return true
3726                         }
3727                 }
3728                 return false
3729         }
3730         in_foreign_content_end_script = function () {
3731                 open_els.shift()
3732                 // fixfull
3733         }
3734         in_foreign_content_other_start = function (t) {
3735                 var acn
3736                 acn = adjusted_current_node()
3737                 if (acn.namespace === NS_MATHML) {
3738                         adjust_mathml_attributes(t)
3739                 }
3740                 if (acn.namespace === NS_SVG && (svg_name_fixes[t.name] != null)) { // extra perens because Coffeescript did
3741                         t.name = svg_name_fixes[t.name]
3742                 }
3743                 if (acn.namespace === NS_SVG) {
3744                         adjust_svg_attributes(t)
3745                 }
3746                 adjust_foreign_attributes(t)
3747                 insert_foreign_element(t, acn.namespace)
3748                 if (t.flag('self-closing')) {
3749                         if (t.name === 'script') {
3750                                 t.acknowledge_self_closing()
3751                                 in_foreign_content_end_script()
3752                                 // fixfull
3753                         } else {
3754                                 open_els.shift()
3755                                 t.acknowledge_self_closing()
3756                         }
3757                 }
3758         }
3759         in_foreign_content = function (t) {
3760                 var el, i, node
3761                 if (t.type === TYPE_TEXT && t.text === "\u0000") {
3762                         parse_error()
3763                         insert_character(new_character_token("\ufffd"))
3764                         return
3765                 }
3766                 if (is_space_tok(t)) {
3767                         insert_character(t)
3768                         return
3769                 }
3770                 if (t.type === TYPE_TEXT) {
3771                         flag_frameset_ok = false
3772                         insert_character(t)
3773                         return
3774                 }
3775                 if (t.type === TYPE_COMMENT) {
3776                         insert_comment(t)
3777                         return
3778                 }
3779                 if (t.type === TYPE_DOCTYPE) {
3780                         parse_error()
3781                         return
3782                 }
3783                 if (t.type === TYPE_START_TAG && (t.name === 'b' || t.name === 'big' || t.name === 'blockquote' || t.name === 'body' || t.name === 'br' || t.name === 'center' || t.name === 'code' || t.name === 'dd' || t.name === 'div' || t.name === 'dl' || t.name === 'dt' || t.name === 'em' || t.name === 'embed' || t.name === 'h1' || t.name === 'h2' || t.name === 'h3' || t.name === 'h4' || t.name === 'h5' || t.name === 'h6' || t.name === 'head' || t.name === 'hr' || t.name === 'i' || t.name === 'img' || t.name === 'li' || t.name === 'listing' || t.name === 'main' || t.name === 'meta' || t.name === 'nobr' || t.name === 'ol' || t.name === 'p' || t.name === 'pre' || t.name === 'ruby' || t.name === 's' || t.name === 'small' || t.name === 'span' || t.name === 'strong' || t.name === 'strike' || t.name === 'sub' || t.name === 'sup' || t.name === 'table' || t.name === 'tt' || t.name === 'u' || t.name === 'ul' || t.name === 'var' || (t.name === 'font' && has_color_face_or_size(t)))) {
3784                         parse_error()
3785                         if (flag_fragment_parsing) {
3786                                 in_foreign_content_other_start(t)
3787                                 return
3788                         }
3789                         while (true) { // is this safe?
3790                                 open_els.shift()
3791                                 if (is_mathml_text_integration_point(open_els[0]) || is_html_integration(open_els[0]) || open_els[0].namespace === NS_HTML) {
3792                                         break
3793                                 }
3794                         }
3795                         process_token(t)
3796                         return
3797                 }
3798                 if (t.type === TYPE_START_TAG) {
3799                         in_foreign_content_other_start(t)
3800                         return
3801                 }
3802                 if (t.type === TYPE_END_TAG && t.name === 'script' && open_els[0].name === 'script' && open_els[0].namespace === NS_SVG) {
3803                         in_foreign_content_end_script()
3804                         return
3805                 }
3806                 if (t.type === TYPE_END_TAG) {
3807                         i = 0
3808                         node = open_els[i]
3809                         if (node.name.toLowerCase() !== t.name) {
3810                                 parse_error()
3811                         }
3812                         while (true) {
3813                                 if (node === open_els[open_els.length - 1]) {
3814                                         return
3815                                 }
3816                                 if (node.name.toLowerCase() === t.name) {
3817                                         while (true) {
3818                                                 el = open_els.shift()
3819                                                 if (el === node) {
3820                                                         return
3821                                                 }
3822                                         }
3823                                 }
3824                                 i += 1
3825                                 node = open_els[i]
3826                                 if (node.namespace === NS_HTML) {
3827                                         break
3828                                 }
3829                         }
3830                         ins_mode(t) // explicitly call HTML insertion mode
3831                 }
3832         }
3833
3834
3835         // 8.2.4.1 http://www.w3.org/TR/html5/syntax.html#data-state
3836         tok_state_data = function () {
3837                 var c
3838                 switch (c = txt.charAt(cur++)) {
3839                         case '&':
3840                                 return new_text_node(parse_character_reference())
3841                         break
3842                         case '<':
3843                                 tok_state = tok_state_tag_open
3844                         break
3845                         case "\u0000":
3846                                 parse_error()
3847                                 return new_text_node(c)
3848                         break
3849                         case '': // EOF
3850                                 return new_eof_token()
3851                         break
3852                         default:
3853                                 return new_text_node(c)
3854                 }
3855                 return null
3856         }
3857
3858         // 8.2.4.2 http://www.w3.org/TR/html5/syntax.html#character-reference-in-data-state
3859         // not needed: tok_state_character_reference_in_data = function () {
3860         // just call parse_character_reference()
3861
3862         // 8.2.4.3 http://www.w3.org/TR/html5/syntax.html#rcdata-state
3863         tok_state_rcdata = function () {
3864                 var c
3865                 switch (c = txt.charAt(cur++)) {
3866                         case '&':
3867                                 return new_text_node(parse_character_reference())
3868                         break
3869                         case '<':
3870                                 tok_state = tok_state_rcdata_less_than_sign
3871                         break
3872                         case "\u0000":
3873                                 parse_error()
3874                                 return new_character_token("\ufffd")
3875                         break
3876                         case '': // EOF
3877                                 return new_eof_token()
3878                         break
3879                         default:
3880                                 return new_character_token(c)
3881                 }
3882                 return null
3883         }
3884
3885         // 8.2.4.4 http://www.w3.org/TR/html5/syntax.html#character-reference-in-rcdata-state
3886         // not needed: tok_state_character_reference_in_rcdata = function () {
3887         // just call parse_character_reference()
3888
3889         // 8.2.4.5 http://www.w3.org/TR/html5/syntax.html#rawtext-state
3890         tok_state_rawtext = function () {
3891                 var c
3892                 switch (c = txt.charAt(cur++)) {
3893                         case '<':
3894                                 tok_state = tok_state_rawtext_less_than_sign
3895                         break
3896                         case "\u0000":
3897                                 parse_error()
3898                                 return new_character_token("\ufffd")
3899                         break
3900                         case '': // EOF
3901                                 return new_eof_token()
3902                         break
3903                         default:
3904                                 return new_character_token(c)
3905                 }
3906                 return null
3907         }
3908
3909         // 8.2.4.6 http://www.w3.org/TR/html5/syntax.html#script-data-state
3910         tok_state_script_data = function () {
3911                 var c
3912                 switch (c = txt.charAt(cur++)) {
3913                         case '<':
3914                                 tok_state = tok_state_script_data_less_than_sign
3915                         break
3916                         case "\u0000":
3917                                 parse_error()
3918                                 return new_character_token("\ufffd")
3919                         break
3920                         case '': // EOF
3921                                 return new_eof_token()
3922                         break
3923                         default:
3924                                 return new_character_token(c)
3925                 }
3926                 return null
3927         }
3928
3929         // 8.2.4.7 http://www.w3.org/TR/html5/syntax.html#plaintext-state
3930         tok_state_plaintext = function () {
3931                 var c
3932                 switch (c = txt.charAt(cur++)) {
3933                         case "\u0000":
3934                                 parse_error()
3935                                 return new_character_token("\ufffd")
3936                         break
3937                         case '': // EOF
3938                                 return new_eof_token()
3939                         break
3940                         default:
3941                                 return new_character_token(c)
3942                 }
3943                 return null
3944         }
3945
3946         // 8.2.4.8 http://www.w3.org/TR/html5/syntax.html#tag-open-state
3947         tok_state_tag_open = function () {
3948                 var c
3949                 c = txt.charAt(cur++)
3950                 if (c === '!') {
3951                         tok_state = tok_state_markup_declaration_open
3952                         return
3953                 }
3954                 if (c === '/') {
3955                         tok_state = tok_state_end_tag_open
3956                         return
3957                 }
3958                 if (is_uc_alpha(c)) {
3959                         tok_cur_tag = new_open_tag(c.toLowerCase())
3960                         tok_state = tok_state_tag_name
3961                         return
3962                 }
3963                 if (is_lc_alpha(c)) {
3964                         tok_cur_tag = new_open_tag(c)
3965                         tok_state = tok_state_tag_name
3966                         return
3967                 }
3968                 if (c === '?') {
3969                         parse_error()
3970                         tok_cur_tag = new_comment_token('?') // FIXME right?
3971                         tok_state = tok_state_bogus_comment
3972                         return
3973                 }
3974                 // Anything else
3975                 parse_error()
3976                 tok_state = tok_state_data
3977                 cur -= 1 // we didn't parse/handle the char after <
3978                 return new_text_node('<')
3979         }
3980
3981         // 8.2.4.9 http://www.w3.org/TR/html5/syntax.html#end-tag-open-state
3982         tok_state_end_tag_open = function () {
3983                 var c
3984                 c = txt.charAt(cur++)
3985                 if (is_uc_alpha(c)) {
3986                         tok_cur_tag = new_end_tag(c.toLowerCase())
3987                         tok_state = tok_state_tag_name
3988                         return
3989                 }
3990                 if (is_lc_alpha(c)) {
3991                         tok_cur_tag = new_end_tag(c)
3992                         tok_state = tok_state_tag_name
3993                         return
3994                 }
3995                 if (c === '>') {
3996                         parse_error()
3997                         tok_state = tok_state_data
3998                         return
3999                 }
4000                 if (c === '') { // EOF
4001                         parse_error()
4002                         tok_state = tok_state_data
4003                         return new_text_node('</')
4004                 }
4005                 // Anything else
4006                 parse_error()
4007                 tok_cur_tag = new_comment_token(c)
4008                 tok_state = tok_state_bogus_comment
4009                 return null
4010         }
4011
4012         // 8.2.4.10 http://www.w3.org/TR/html5/syntax.html#tag-name-state
4013         tok_state_tag_name = function () {
4014                 var c, tmp
4015                 switch (c = txt.charAt(cur++)) {
4016                         case "\t":
4017                         case "\n":
4018                         case "\u000c":
4019                         case ' ':
4020                                 tok_state = tok_state_before_attribute_name
4021                         break
4022                         case '/':
4023                                 tok_state = tok_state_self_closing_start_tag
4024                         break
4025                         case '>':
4026                                 tok_state = tok_state_data
4027                                 tmp = tok_cur_tag
4028                                 tok_cur_tag = null
4029                                 return tmp
4030                         break
4031                         case "\u0000":
4032                                 parse_error()
4033                                 tok_cur_tag.name += "\ufffd"
4034                         break
4035                         case '': // EOF
4036                                 parse_error()
4037                                 tok_state = tok_state_data
4038                         break
4039                         default:
4040                                 if (is_uc_alpha(c)) {
4041                                         tok_cur_tag.name += c.toLowerCase()
4042                                 } else {
4043                                         tok_cur_tag.name += c
4044                                 }
4045                 }
4046                 return null
4047         }
4048
4049         // 8.2.4.11 http://www.w3.org/TR/html5/syntax.html#rcdata-less-than-sign-state
4050         tok_state_rcdata_less_than_sign = function () {
4051                 var c
4052                 c = txt.charAt(cur++)
4053                 if (c === '/') {
4054                         temporary_buffer = ''
4055                         tok_state = tok_state_rcdata_end_tag_open
4056                         return null
4057                 }
4058                 // Anything else
4059                 tok_state = tok_state_rcdata
4060                 cur -= 1 // reconsume the input character
4061                 return new_character_token('<')
4062         }
4063
4064         // 8.2.4.12 http://www.w3.org/TR/html5/syntax.html#rcdata-end-tag-open-state
4065         tok_state_rcdata_end_tag_open = function () {
4066                 var c
4067                 c = txt.charAt(cur++)
4068                 if (is_uc_alpha(c)) {
4069                         tok_cur_tag = new_end_tag(c.toLowerCase())
4070                         temporary_buffer += c
4071                         tok_state = tok_state_rcdata_end_tag_name
4072                         return null
4073                 }
4074                 if (is_lc_alpha(c)) {
4075                         tok_cur_tag = new_end_tag(c)
4076                         temporary_buffer += c
4077                         tok_state = tok_state_rcdata_end_tag_name
4078                         return null
4079                 }
4080                 // Anything else
4081                 tok_state = tok_state_rcdata
4082                 cur -= 1 // reconsume the input character
4083                 return new_character_token("</") // fixfull separate these
4084         }
4085
4086         // http://www.w3.org/TR/html5/syntax.html#appropriate-end-tag-token
4087         is_appropriate_end_tag = function (t) {
4088                 // fixfull: this assumes that open_els[0].name is "the tag name of the last
4089                 // start tag to have been emitted from this tokenizer"
4090                 return t.type === TYPE_END_TAG && t.name === open_els[0].name
4091         }
4092
4093         // 8.2.4.13 http://www.w3.org/TR/html5/syntax.html#rcdata-end-tag-name-state
4094         tok_state_rcdata_end_tag_name = function () {
4095                 var c
4096                 c = txt.charAt(cur++)
4097                 if (c === "\t" || c === "\n" || c === "\u000c" || c === ' ') {
4098                         if (is_appropriate_end_tag(tok_cur_tag)) {
4099                                 tok_state = tok_state_before_attribute_name
4100                                 return
4101                         }
4102                         // else fall through to "Anything else"
4103                 }
4104                 if (c === '/') {
4105                         if (is_appropriate_end_tag(tok_cur_tag)) {
4106                                 tok_state = tok_state_self_closing_start_tag // FIXME spec typo?
4107                                 return
4108                         }
4109                         // else fall through to "Anything else"
4110                 }
4111                 if (c === '>') {
4112                         if (is_appropriate_end_tag(tok_cur_tag)) {
4113                                 tok_state = tok_state_data
4114                                 return tok_cur_tag
4115                         }
4116                         // else fall through to "Anything else"
4117                 }
4118                 if (is_uc_alpha(c)) {
4119                         tok_cur_tag.name += c.toLowerCase()
4120                         temporary_buffer += c
4121                         return null
4122                 }
4123                 if (is_lc_alpha(c)) {
4124                         tok_cur_tag.name += c
4125                         temporary_buffer += c
4126                         return null
4127                 }
4128                 // Anything else
4129                 tok_state = tok_state_rcdata
4130                 cur -= 1 // reconsume the input character
4131                 return new_character_token('</' + temporary_buffer) // fixfull separate these
4132         }
4133
4134         // 8.2.4.14 http://www.w3.org/TR/html5/syntax.html#rawtext-less-than-sign-state
4135         tok_state_rawtext_less_than_sign = function () {
4136                 var c
4137                 c = txt.charAt(cur++)
4138                 if (c === '/') {
4139                         temporary_buffer = ''
4140                         tok_state = tok_state_rawtext_end_tag_open
4141                         return null
4142                 }
4143                 // Anything else
4144                 tok_state = tok_state_rawtext
4145                 cur -= 1 // reconsume the input character
4146                 return new_character_token('<')
4147         }
4148
4149         // 8.2.4.15 http://www.w3.org/TR/html5/syntax.html#rawtext-end-tag-open-state
4150         tok_state_rawtext_end_tag_open = function () {
4151                 c = txt.charAt(cur++)
4152                 if (is_uc_alpha(c)) {
4153                         tok_cur_tag = new_end_tag(c.toLowerCase())
4154                         temporary_buffer += c
4155                         tok_state = tok_state_rawtext_end_tag_name
4156                         return null
4157                 }
4158                 if (is_lc_alpha(c)) {
4159                         tok_cur_tag = new_end_tag(c)
4160                         temporary_buffer += c
4161                         tok_state = tok_state_rawtext_end_tag_name
4162                         return null
4163                 }
4164                 // Anything else
4165                 tok_state = tok_state_rawtext
4166                 cur -= 1 // reconsume the input character
4167                 return new_character_token("</") // fixfull separate these
4168         }
4169
4170         // 8.2.4.16 http://www.w3.org/TR/html5/syntax.html#rawtext-end-tag-name-state
4171         tok_state_rawtext_end_tag_name = function () {
4172                 var c
4173                 c = txt.charAt(cur++)
4174                 if (c === "\t" || c === "\n" || c === "\u000c" || c === ' ') {
4175                         if (is_appropriate_end_tag(tok_cur_tag)) {
4176                                 tok_state = tok_state_before_attribute_name
4177                                 return
4178                         }
4179                         // else fall through to "Anything else"
4180                 }
4181                 if (c === '/') {
4182                         if (is_appropriate_end_tag(tok_cur_tag)) {
4183                                 tok_state = tok_state_self_closing_start_tag
4184                                 return
4185                         }
4186                         // else fall through to "Anything else"
4187                 }
4188                 if (c === '>') {
4189                         if (is_appropriate_end_tag(tok_cur_tag)) {
4190                                 tok_state = tok_state_data
4191                                 return tok_cur_tag
4192                         }
4193                         // else fall through to "Anything else"
4194                 }
4195                 if (is_uc_alpha(c)) {
4196                         tok_cur_tag.name += c.toLowerCase()
4197                         temporary_buffer += c
4198                         return null
4199                 }
4200                 if (is_lc_alpha(c)) {
4201                         tok_cur_tag.name += c
4202                         temporary_buffer += c
4203                         return null
4204                 }
4205                 // Anything else
4206                 tok_state = tok_state_rawtext
4207                 cur -= 1 // reconsume the input character
4208                 return new_character_token('</' + temporary_buffer) // fixfull separate these
4209         }
4210
4211         // 8.2.4.17 http://www.w3.org/TR/html5/syntax.html#script-data-less-than-sign-state
4212         tok_state_script_data_less_than_sign = function () {
4213                 var c
4214                 c = txt.charAt(cur++)
4215                 if (c === '/') {
4216                         temporary_buffer = ''
4217                         tok_state = tok_state_script_data_end_tag_open
4218                         return
4219                 }
4220                 if (c === '!') {
4221                         tok_state = tok_state_script_data_escape_start
4222                         return new_character_token('<!') // fixfull split
4223                 }
4224                 // Anything else
4225                 tok_state = tok_state_script_data
4226                 cur -= 1 // reconsume
4227                 return new_character_token('<')
4228         }
4229
4230         // 8.2.4.18 http://www.w3.org/TR/html5/syntax.html#script-data-end-tag-open-state
4231         tok_state_script_data_end_tag_open = function () {
4232                 var c
4233                 c = txt.charAt(cur++)
4234                 if (is_uc_alpha(c)) {
4235                         tok_cur_tag = new_end_tag(c.toLowerCase())
4236                         temporary_buffer += c
4237                         tok_state = tok_state_script_data_end_tag_name
4238                         return
4239                 }
4240                 if (is_lc_alpha(c)) {
4241                         tok_cur_tag = new_end_tag(c)
4242                         temporary_buffer += c
4243                         tok_state = tok_state_script_data_end_tag_name
4244                         return
4245                 }
4246                 // Anything else
4247                 tok_state = tok_state_script_data
4248                 cur -= 1 // reconsume
4249                 return new_character_token('</')
4250         }
4251
4252         // 8.2.4.19 http://www.w3.org/TR/html5/syntax.html#script-data-end-tag-open-state
4253         tok_state_script_data_end_tag_name = function () {
4254                 var c
4255                 c = txt.charAt(cur++)
4256                 if (c === "\t" || c === "\n" || c === "\u000c" || c === ' ') {
4257                         if (is_appropriate_end_tag(tok_cur_tag)) {
4258                                 tok_state = tok_state_before_attribute_name
4259                                 return
4260                         }
4261                         // fall through
4262                 }
4263                 if (c === '/') {
4264                         if (is_appropriate_end_tag(tok_cur_tag)) {
4265                                 tok_state = tok_state_self_closing_start_tag
4266                                 return
4267                         }
4268                         // fall through
4269                 }
4270                 if (c === '>') {
4271                         if (is_appropriate_end_tag(tok_cur_tag)) {
4272                                 tok_state = tok_state_data
4273                                 return tok_cur_tag
4274                         }
4275                         // fall through
4276                 }
4277                 if (is_uc_alpha(c)) {
4278                         tok_cur_tag.name += c.toLowerCase()
4279                         temporary_buffer += c
4280                         return
4281                 }
4282                 if (is_lc_alpha(c)) {
4283                         tok_cur_tag.name += c
4284                         temporary_buffer += c
4285                         return
4286                 }
4287                 // Anything else
4288                 tok_state = tok_state_script_data
4289                 cur -= 1 // reconsume
4290                 return new_character_token("</" + temporary_buffer) // fixfull split
4291         }
4292
4293         // 8.2.4.20 http://www.w3.org/TR/html5/syntax.html#script-data-escape-start-state
4294         tok_state_script_data_escape_start = function () {
4295                 var c
4296                 c = txt.charAt(cur++)
4297                 if (c === '-') {
4298                         tok_state = tok_state_script_data_escape_start_dash
4299                         return new_character_token('-')
4300                 }
4301                 // Anything else
4302                 tok_state = tok_state_script_data
4303                 cur -= 1 // reconsume
4304         }
4305
4306         // 8.2.4.21 http://www.w3.org/TR/html5/syntax.html#script-data-escape-start-dash-state
4307         tok_state_script_data_escape_start_dash = function () {
4308                 var c
4309                 c = txt.charAt(cur++)
4310                 if (c === '-') {
4311                         tok_state = tok_state_script_data_escaped_dash_dash
4312                         return new_character_token('-')
4313                 }
4314                 // Anything else
4315                 tok_state = tok_state_script_data
4316                 cur -= 1 // reconsume
4317         }
4318
4319         // 8.2.4.22 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-state
4320         tok_state_script_data_escaped = function () {
4321                 var c
4322                 c = txt.charAt(cur++)
4323                 if (c === '-') {
4324                         tok_state = tok_state_script_data_escaped_dash
4325                         return new_character_token('-')
4326                 }
4327                 if (c === '<') {
4328                         tok_state = tok_state_script_data_escaped_less_than_sign
4329                         return
4330                 }
4331                 if (c === "\u0000") {
4332                         parse_error()
4333                         return new_character_token("\ufffd")
4334                 }
4335                 if (c === '') { // EOF
4336                         tok_state = tok_state_data
4337                         parse_error()
4338                         cur -= 1 // reconsume
4339                         return
4340                 }
4341                 // Anything else
4342                 return new_character_token(c)
4343         }
4344
4345         // 8.2.4.23 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-dash-state
4346         tok_state_script_data_escaped_dash = function () {
4347                 var c
4348                 c = txt.charAt(cur++)
4349                 if (c === '-') {
4350                         tok_state = tok_state_script_data_escaped_dash_dash
4351                         return new_character_token('-')
4352                 }
4353                 if (c === '<') {
4354                         tok_state = tok_state_script_data_escaped_less_than_sign
4355                         return
4356                 }
4357                 if (c === "\u0000") {
4358                         parse_error()
4359                         tok_state = tok_state_script_data_escaped
4360                         return new_character_token("\ufffd")
4361                 }
4362                 if (c === '') { // EOF
4363                         tok_state = tok_state_data
4364                         parse_error()
4365                         cur -= 1 // reconsume
4366                         return
4367                 }
4368                 // Anything else
4369                 tok_state = tok_state_script_data_escaped
4370                 return new_character_token(c)
4371         }
4372
4373         // 8.2.4.24 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-dash-dash-state
4374         tok_state_script_data_escaped_dash_dash = function () {
4375                 var c
4376                 c = txt.charAt(cur++)
4377                 if (c === '-') {
4378                         return new_character_token('-')
4379                 }
4380                 if (c === '<') {
4381                         tok_state = tok_state_script_data_escaped_less_than_sign
4382                         return
4383                 }
4384                 if (c === '>') {
4385                         tok_state = tok_state_script_data
4386                         return new_character_token('>')
4387                 }
4388                 if (c === "\u0000") {
4389                         parse_error()
4390                         tok_state = tok_state_script_data_escaped
4391                         return new_character_token("\ufffd")
4392                 }
4393                 if (c === '') { // EOF
4394                         parse_error()
4395                         tok_state = tok_state_data
4396                         cur -= 1 // reconsume
4397                         return
4398                 }
4399                 // Anything else
4400                 tok_state = tok_state_script_data_escaped
4401                 return new_character_token(c)
4402         }
4403
4404         // 8.2.4.25 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-less-than-sign-state
4405         tok_state_script_data_escaped_less_than_sign = function () {
4406                 var c
4407                 c = txt.charAt(cur++)
4408                 if (c === '/') {
4409                         temporary_buffer = ''
4410                         tok_state = tok_state_script_data_escaped_end_tag_open
4411                         return
4412                 }
4413                 if (is_uc_alpha(c)) {
4414                         temporary_buffer = c.toLowerCase() // yes, really
4415                         tok_state = tok_state_script_data_double_escape_start
4416                         return new_character_token("<" + c) // fixfull split
4417                 }
4418                 if (is_lc_alpha(c)) {
4419                         temporary_buffer = c
4420                         tok_state = tok_state_script_data_double_escape_start
4421                         return new_character_token("<" + c) // fixfull split
4422                 }
4423                 // Anything else
4424                 tok_state = tok_state_script_data_escaped
4425                 cur -= 1 // reconsume
4426                 return new_character_token('<')
4427         }
4428
4429         // 8.2.4.26 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-end-tag-open-state
4430         tok_state_script_data_escaped_end_tag_open = function () {
4431                 var c
4432                 c = txt.charAt(cur++)
4433                 if (is_uc_alpha(c)) {
4434                         tok_cur_tag = new_end_tag(c.toLowerCase())
4435                         temporary_buffer += c
4436                         tok_state = tok_state_script_data_escaped_end_tag_name
4437                         return
4438                 }
4439                 if (is_lc_alpha(c)) {
4440                         tok_cur_tag = new_end_tag(c)
4441                         temporary_buffer += c
4442                         tok_state = tok_state_script_data_escaped_end_tag_name
4443                         return
4444                 }
4445                 // Anything else
4446                 tok_state = tok_state_script_data_escaped
4447                 cur -= 1 // reconsume
4448                 return new_character_token('</') // fixfull split
4449         }
4450
4451         // 8.2.4.27 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-end-tag-name-state
4452         tok_state_script_data_escaped_end_tag_name = function () {
4453                 var c
4454                 c = txt.charAt(cur++)
4455                 if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ') {
4456                         if (is_appropriate_end_tag(tok_cur_tag)) {
4457                                 tok_state = tok_state_before_attribute_name
4458                                 return
4459                         }
4460                         // fall through
4461                 }
4462                 if (c === '/') {
4463                         if (is_appropriate_end_tag(tok_cur_tag)) {
4464                                 tok_state = tok_state_self_closing_start_tag
4465                                 return
4466                         }
4467                         // fall through
4468                 }
4469                 if (c === '>') {
4470                         if (is_appropriate_end_tag(tok_cur_tag)) {
4471                                 tok_state = tok_state_data
4472                                 return tok_cur_tag
4473                         }
4474                         // fall through
4475                 }
4476                 if (is_uc_alpha(c)) {
4477                         tok_cur_tag.name += c.toLowerCase()
4478                         temporary_buffer += c.toLowerCase()
4479                         return
4480                 }
4481                 if (is_lc_alpha(c)) {
4482                         tok_cur_tag.name += c
4483                         temporary_buffer += c.toLowerCase()
4484                         return
4485                 }
4486                 // Anything else
4487                 tok_state = tok_state_script_data_escaped
4488                 cur -= 1 // reconsume
4489                 return new_character_token("</" + temporary_buffer) // fixfull split
4490         }
4491
4492         // 8.2.4.28 http://www.w3.org/TR/html5/syntax.html#script-data-double-escape-start-state
4493         tok_state_script_data_double_escape_start = function () {
4494                 var c
4495                 c = txt.charAt(cur++)
4496                 if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ' || c === '/' || c === '>') {
4497                         if (temporary_buffer === 'script') {
4498                                 tok_state = tok_state_script_data_double_escaped
4499                         } else {
4500                                 tok_state = tok_state_script_data_escaped
4501                         }
4502                         return new_character_token(c)
4503                 }
4504                 if (is_uc_alpha(c)) {
4505                         temporary_buffer += c.toLowerCase() // yes, really lowercase
4506                         return new_character_token(c)
4507                 }
4508                 if (is_lc_alpha(c)) {
4509                         temporary_buffer += c
4510                         return new_character_token(c)
4511                 }
4512                 // Anything else
4513                 tok_state = tok_state_script_data_escaped
4514                 cur -= 1 // reconsume
4515         }
4516
4517         // 8.2.4.29 http://www.w3.org/TR/html5/syntax.html#script-data-double-escaped-state
4518         tok_state_script_data_double_escaped = function () {
4519                 var c
4520                 c = txt.charAt(cur++)
4521                 if (c === '-') {
4522                         tok_state = tok_state_script_data_double_escaped_dash
4523                         return new_character_token('-')
4524                 }
4525                 if (c === '<') {
4526                         tok_state = tok_state_script_data_double_escaped_less_than_sign
4527                         return new_character_token('<')
4528                 }
4529                 if (c === "\u0000") {
4530                         parse_error()
4531                         return new_character_token("\ufffd")
4532                 }
4533                 if (c === '') { // EOF
4534                         parse_error()
4535                         tok_state = tok_state_data
4536                         cur -= 1 // reconsume
4537                         return
4538                 }
4539                 // Anything else
4540                 return new_character_token(c)
4541         }
4542
4543         // 8.2.4.30 http://www.w3.org/TR/html5/syntax.html#script-data-double-escaped-dash-state
4544         tok_state_script_data_double_escaped_dash = function () {
4545                 var c
4546                 c = txt.charAt(cur++)
4547                 if (c === '-') {
4548                         tok_state = tok_state_script_data_double_escaped_dash_dash
4549                         return new_character_token('-')
4550                 }
4551                 if (c === '<') {
4552                         tok_state = tok_state_script_data_double_escaped_less_than_sign
4553                         return new_character_token('<')
4554                 }
4555                 if (c === "\u0000") {
4556                         parse_error()
4557                         tok_state = tok_state_script_data_double_escaped
4558                         return new_character_token("\ufffd")
4559                 }
4560                 if (c === '') { // EOF
4561                         parse_error()
4562                         tok_state = tok_state_data
4563                         cur -= 1 // reconsume
4564                         return
4565                 }
4566                 // Anything else
4567                 tok_state = tok_state_script_data_double_escaped
4568                 return new_character_token(c)
4569         }
4570
4571         // 8.2.4.31 http://www.w3.org/TR/html5/syntax.html#script-data-double-escaped-dash-dash-state
4572         tok_state_script_data_double_escaped_dash_dash = function () {
4573                 var c
4574                 c = txt.charAt(cur++)
4575                 if (c === '-') {
4576                         return new_character_token('-')
4577                 }
4578                 if (c === '<') {
4579                         tok_state = tok_state_script_data_double_escaped_less_than_sign
4580                         return new_character_token('<')
4581                 }
4582                 if (c === '>') {
4583                         tok_state = tok_state_script_data
4584                         return new_character_token('>')
4585                 }
4586                 if (c === "\u0000") {
4587                         parse_error()
4588                         tok_state = tok_state_script_data_double_escaped
4589                         return new_character_token("\ufffd")
4590                 }
4591                 if (c === '') { // EOF
4592                         parse_error()
4593                         tok_state = tok_state_data
4594                         cur -= 1 // reconsume
4595                         return
4596                 }
4597                 // Anything else
4598                 tok_state = tok_state_script_data_double_escaped
4599                 return new_character_token(c)
4600         }
4601
4602         // 8.2.4.32 http://www.w3.org/TR/html5/syntax.html#script-data-double-escaped-less-than-sign-state
4603         tok_state_script_data_double_escaped_less_than_sign = function () {
4604                 var c
4605                 c = txt.charAt(cur++)
4606                 if (c === '/') {
4607                         temporary_buffer = ''
4608                         tok_state = tok_state_script_data_double_escape_end
4609                         return new_character_token('/')
4610                 }
4611                 // Anything else
4612                 tok_state = tok_state_script_data_double_escaped
4613                 cur -= 1 // reconsume
4614         }
4615
4616         // 8.2.4.33 http://www.w3.org/TR/html5/syntax.html#script-data-double-escape-end-state
4617         tok_state_script_data_double_escape_end = function () {
4618                 var c
4619                 c = txt.charAt(cur++)
4620                 if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ' || c === '/' || c === '>') {
4621                         if (temporary_buffer === 'script') {
4622                                 tok_state = tok_state_script_data_escaped
4623                         } else {
4624                                 tok_state = tok_state_script_data_double_escaped
4625                         }
4626                         return new_character_token(c)
4627                 }
4628                 if (is_uc_alpha(c)) {
4629                         temporary_buffer += c.toLowerCase() // yes, really lowercase
4630                         return new_character_token(c)
4631                 }
4632                 if (is_lc_alpha(c)) {
4633                         temporary_buffer += c
4634                         return new_character_token(c)
4635                 }
4636                 // Anything else
4637                 tok_state = tok_state_script_data_double_escaped
4638                 cur -= 1 // reconsume
4639         }
4640
4641         // 8.2.4.34 http://www.w3.org/TR/html5/syntax.html#before-attribute-name-state
4642         tok_state_before_attribute_name = function () {
4643                 var attr_name, c, tmp
4644                 attr_name = null
4645                 switch (c = txt.charAt(cur++)) {
4646                         case "\t":
4647                         case "\n":
4648                         case "\u000c":
4649                         case ' ':
4650                                 return null
4651                         break
4652                         case '/':
4653                                 tok_state = tok_state_self_closing_start_tag
4654                                 return null
4655                         break
4656                         case '>':
4657                                 tok_state = tok_state_data
4658                                 tmp = tok_cur_tag
4659                                 tok_cur_tag = null
4660                                 return tmp
4661                         break
4662                         case "\u0000":
4663                                 parse_error()
4664                                 attr_name = "\ufffd"
4665                         break
4666                         case '"':
4667                         case "'":
4668                         case '<':
4669                         case '=':
4670                                 parse_error()
4671                                 attr_name = c
4672                         break
4673                         case '': // EOF
4674                                 parse_error()
4675                                 tok_state = tok_state_data
4676                         break
4677                         default:
4678                                 if (is_uc_alpha(c)) {
4679                                         attr_name = c.toLowerCase()
4680                                 } else {
4681                                         attr_name = c
4682                                 }
4683                 }
4684                 if (attr_name != null) {
4685                         tok_cur_tag.attrs_a.unshift([attr_name, ''])
4686                         tok_state = tok_state_attribute_name
4687                 }
4688                 return null
4689         }
4690
4691         // 8.2.4.35 http://www.w3.org/TR/html5/syntax.html#attribute-name-state
4692         tok_state_attribute_name = function () {
4693                 var c, tmp
4694                 switch (c = txt.charAt(cur++)) {
4695                         case "\t":
4696                         case "\n":
4697                         case "\u000c":
4698                         case ' ':
4699                                 tok_state = tok_state_after_attribute_name
4700                         break
4701                         case '/':
4702                                 tok_state = tok_state_self_closing_start_tag
4703                         break
4704                         case '=':
4705                                 tok_state = tok_state_before_attribute_value
4706                         break
4707                         case '>':
4708                                 tok_state = tok_state_data
4709                                 tmp = tok_cur_tag
4710                                 tok_cur_tag = null
4711                                 return tmp
4712                         break
4713                         case "\u0000":
4714                                 parse_error()
4715                                 tok_cur_tag.attrs_a[0][0] += "\ufffd"
4716                         break
4717                         case '"':
4718                         case "'":
4719                         case '<':
4720                                 parse_error()
4721                                 tok_cur_tag.attrs_a[0][0] += c
4722                         break
4723                         case '': // EOF
4724                                 parse_error()
4725                                 tok_state = tok_state_data
4726                         break
4727                         default:
4728                                 if (is_uc_alpha(c)) {
4729                                         tok_cur_tag.attrs_a[0][0] += c.toLowerCase()
4730                                 } else {
4731                                         tok_cur_tag.attrs_a[0][0] += c
4732                                 }
4733                 }
4734                 return null
4735         }
4736
4737         // 8.2.4.36 http://www.w3.org/TR/html5/syntax.html#after-attribute-name-state
4738         tok_state_after_attribute_name = function () {
4739                 var c
4740                 c = txt.charAt(cur++)
4741                 if (c === "\t" || c === "\n" || c === "\u000c" || c === ' ') {
4742                         return
4743                 }
4744                 if (c === '/') {
4745                         tok_state = tok_state_self_closing_start_tag
4746                         return
4747                 }
4748                 if (c === '=') {
4749                         tok_state = tok_state_before_attribute_value
4750                         return
4751                 }
4752                 if (c === '>') {
4753                         tok_state = tok_state_data
4754                         return tok_cur_tag
4755                 }
4756                 if (is_uc_alpha(c)) {
4757                         tok_cur_tag.attrs_a.unshift([c.toLowerCase(), ''])
4758                         tok_state = tok_state_attribute_name
4759                         return
4760                 }
4761                 if (c === "\u0000") {
4762                         parse_error()
4763                         tok_cur_tag.attrs_a.unshift(["\ufffd", ''])
4764                         tok_state = tok_state_attribute_name
4765                         return
4766                 }
4767                 if (c === '') { // EOF
4768                         parse_error()
4769                         tok_state = tok_state_data
4770                         cur -= 1 // reconsume
4771                         return
4772                 }
4773                 if (c === '"' || c === "'" || c === '<') {
4774                         parse_error()
4775                         // fall through to Anything else
4776                 }
4777                 // Anything else
4778                 tok_cur_tag.attrs_a.unshift([c, ''])
4779                 tok_state = tok_state_attribute_name
4780         }
4781
4782         // 8.2.4.37 http://www.w3.org/TR/html5/syntax.html#before-attribute-value-state
4783         tok_state_before_attribute_value = function () {
4784                 var c, tmp
4785                 switch (c = txt.charAt(cur++)) {
4786                         case "\t":
4787                         case "\n":
4788                         case "\u000c":
4789                         case ' ':
4790                                 return null
4791                         break
4792                         case '"':
4793                                 tok_state = tok_state_attribute_value_double_quoted
4794                         break
4795                         case '&':
4796                                 tok_state = tok_state_attribute_value_unquoted
4797                                 cur -= 1
4798                         break
4799                         case "'":
4800                                 tok_state = tok_state_attribute_value_single_quoted
4801                         break
4802                         case "\u0000":
4803                                 // Parse error
4804                                 tok_cur_tag.attrs_a[0][1] += "\ufffd"
4805                                 tok_state = tok_state_attribute_value_unquoted
4806                         break
4807                         case '>':
4808                                 // Parse error
4809                                 tok_state = tok_state_data
4810                                 tmp = tok_cur_tag
4811                                 tok_cur_tag = null
4812                                 return tmp
4813                         break
4814                         case '': // EOF
4815                                 parse_error()
4816                                 tok_state = tok_state_data
4817                         break
4818                         default:
4819                                 tok_cur_tag.attrs_a[0][1] += c
4820                                 tok_state = tok_state_attribute_value_unquoted
4821                 }
4822                 return null
4823         }
4824
4825         // 8.2.4.38 http://www.w3.org/TR/html5/syntax.html#attribute-value-(double-quoted)-state
4826         tok_state_attribute_value_double_quoted = function () {
4827                 var c
4828                 switch (c = txt.charAt(cur++)) {
4829                         case '"':
4830                                 tok_state = tok_state_after_attribute_value_quoted
4831                         break
4832                         case '&':
4833                                 tok_cur_tag.attrs_a[0][1] += parse_character_reference('"', true)
4834                         break
4835                         case "\u0000":
4836                                 // Parse error
4837                                 tok_cur_tag.attrs_a[0][1] += "\ufffd"
4838                         break
4839                         case '': // EOF
4840                                 parse_error()
4841                                 tok_state = tok_state_data
4842                         break
4843                         default:
4844                                 tok_cur_tag.attrs_a[0][1] += c
4845                 }
4846                 return null
4847         }
4848
4849         // 8.2.4.39 http://www.w3.org/TR/html5/syntax.html#attribute-value-(single-quoted)-state
4850         tok_state_attribute_value_single_quoted = function () {
4851                 var c
4852                 switch (c = txt.charAt(cur++)) {
4853                         case "'":
4854                                 tok_state = tok_state_after_attribute_value_quoted
4855                         break
4856                         case '&':
4857                                 tok_cur_tag.attrs_a[0][1] += parse_character_reference("'", true)
4858                         break
4859                         case "\u0000":
4860                                 // Parse error
4861                                 tok_cur_tag.attrs_a[0][1] += "\ufffd"
4862                         break
4863                         case '': // EOF
4864                                 parse_error()
4865                                 tok_state = tok_state_data
4866                         break
4867                         default:
4868                                 tok_cur_tag.attrs_a[0][1] += c
4869                 }
4870                 return null
4871         }
4872
4873         // 8.2.4.40 http://www.w3.org/TR/html5/syntax.html#attribute-value-(unquoted)-state
4874         tok_state_attribute_value_unquoted = function () {
4875                 var c, tmp
4876                 switch (c = txt.charAt(cur++)) {
4877                         case "\t":
4878                         case "\n":
4879                         case "\u000c":
4880                         case ' ':
4881                                 tok_state = tok_state_before_attribute_name
4882                         break
4883                         case '&':
4884                                 tok_cur_tag.attrs_a[0][1] += parse_character_reference('>', true)
4885                         break
4886                         case '>':
4887                                 tok_state = tok_state_data
4888                                 tmp = tok_cur_tag
4889                                 tok_cur_tag = null
4890                                 return tmp
4891                         break
4892                         case "\u0000":
4893                                 tok_cur_tag.attrs_a[0][1] += "\ufffd"
4894                         break
4895                         case '': // EOF
4896                                 parse_error()
4897                                 tok_state = tok_state_data
4898                         break
4899                         default:
4900                                 // Parse Error if ', <, = or ` (backtick)
4901                                 tok_cur_tag.attrs_a[0][1] += c
4902                 }
4903                 return null
4904         }
4905
4906         // 8.2.4.42 http://www.w3.org/TR/html5/syntax.html#after-attribute-value-(quoted)-state
4907         tok_state_after_attribute_value_quoted = function () {
4908                 var c, tmp
4909                 switch (c = txt.charAt(cur++)) {
4910                         case "\t":
4911                         case "\n":
4912                         case "\u000c":
4913                         case ' ':
4914                                 tok_state = tok_state_before_attribute_name
4915                         break
4916                         case '/':
4917                                 tok_state = tok_state_self_closing_start_tag
4918                         break
4919                         case '>':
4920                                 tok_state = tok_state_data
4921                                 tmp = tok_cur_tag
4922                                 tok_cur_tag = null
4923                                 return tmp
4924                         break
4925                         case '': // EOF
4926                                 parse_error()
4927                                 tok_state = tok_state_data
4928                         break
4929                         default:
4930                                 // Parse Error
4931                                 tok_state = tok_state_before_attribute_name
4932                                 cur -= 1 // we didn't handle that char
4933                 }
4934                 return null
4935         }
4936
4937         // 8.2.4.43 http://www.w3.org/TR/html5/syntax.html#self-closing-start-tag-state
4938         tok_state_self_closing_start_tag = function () {
4939                 var c
4940                 c = txt.charAt(cur++)
4941                 if (c === '>') {
4942                         tok_cur_tag.flag('self-closing', true)
4943                         tok_state = tok_state_data
4944                         return tok_cur_tag
4945                 }
4946                 if (c === '') {
4947                         parse_error()
4948                         tok_state = tok_state_data
4949                         cur -= 1 // reconsume
4950                         return
4951                 }
4952                 // Anything else
4953                 parse_error()
4954                 tok_state = tok_state_before_attribute_name
4955                 cur -= 1 // reconsume
4956         }
4957
4958         // 8.2.4.44 http://www.w3.org/TR/html5/syntax.html#bogus-comment-state
4959         // WARNING: put a comment token in tok_cur_tag before setting this state
4960         tok_state_bogus_comment = function () {
4961                 var next_gt, val
4962                 next_gt = txt.indexOf('>', cur)
4963                 if (next_gt === -1) {
4964                         val = txt.substr(cur)
4965                         cur = txt.length
4966                 } else {
4967                         val = txt.substr(cur, next_gt - cur)
4968                         cur = next_gt + 1
4969                 }
4970                 val = val.replace(new RegExp("\u0000", 'g'), "\ufffd")
4971                 tok_cur_tag.text += val
4972                 tok_state = tok_state_data
4973                 return tok_cur_tag
4974         }
4975
4976         // 8.2.4.45 http://www.w3.org/TR/html5/syntax.html#markup-declaration-open-state
4977         tok_state_markup_declaration_open = function () {
4978                 var acn
4979                 if (txt.substr(cur, 2) === '--') {
4980                         cur += 2
4981                         tok_cur_tag = new_comment_token('')
4982                         tok_state = tok_state_comment_start
4983                         return
4984                 }
4985                 if (txt.substr(cur, 7).toLowerCase() === 'doctype') {
4986                         cur += 7
4987                         tok_state = tok_state_doctype
4988                         return
4989                 }
4990                 acn = adjusted_current_node()
4991                 if (acn && acn.namespace !== NS_HTML && txt.substr(cur, 7) === '[CDATA[') {
4992                         cur += 7
4993                         tok_state = tok_state_cdata_section
4994                         return
4995                 }
4996                 // Otherwise
4997                 parse_error()
4998                 tok_cur_tag = new_comment_token('')
4999                 tok_state = tok_state_bogus_comment
5000         }
5001
5002         // 8.2.4.46 http://www.w3.org/TR/html5/syntax.html#comment-start-state
5003         tok_state_comment_start = function () {
5004                 var c
5005                 switch (c = txt.charAt(cur++)) {
5006                         case '-':
5007                                 tok_state = tok_state_comment_start_dash
5008                         break
5009                         case "\u0000":
5010                                 parse_error()
5011                                 tok_state = tok_state_comment
5012                                 return new_character_token("\ufffd")
5013                         break
5014                         case '>':
5015                                 parse_error()
5016                                 tok_state = tok_state_data
5017                                 return tok_cur_tag
5018                         break
5019                         case '': // EOF
5020                                 parse_error()
5021                                 tok_state = tok_state_data
5022                                 cur -= 1 // reconsume
5023                                 return tok_cur_tag
5024                         break
5025                         default:
5026                                 tok_cur_tag.text += c
5027                                 tok_state = tok_state_comment
5028                 }
5029                 return null
5030         }
5031
5032         // 8.2.4.47 http://www.w3.org/TR/html5/syntax.html#comment-start-dash-state
5033         tok_state_comment_start_dash = function () {
5034                 var c
5035                 switch (c = txt.charAt(cur++)) {
5036                         case '-':
5037                                 tok_state = tok_state_comment_end
5038                         break
5039                         case "\u0000":
5040                                 parse_error()
5041                                 tok_cur_tag.text += "-\ufffd"
5042                                 tok_state = tok_state_comment
5043                         break
5044                         case '>':
5045                                 parse_error()
5046                                 tok_state = tok_state_data
5047                                 return tok_cur_tag
5048                         break
5049                         case '': // EOF
5050                                 parse_error()
5051                                 tok_state = tok_state_data
5052                                 cur -= 1 // reconsume
5053                                 return tok_cur_tag
5054                         break
5055                         default:
5056                                 tok_cur_tag.text += "-" + c
5057                                 tok_state = tok_state_comment
5058                 }
5059                 return null
5060         }
5061
5062         // 8.2.4.48 http://www.w3.org/TR/html5/syntax.html#comment-state
5063         tok_state_comment = function () {
5064                 var c
5065                 switch (c = txt.charAt(cur++)) {
5066                         case '-':
5067                                 tok_state = tok_state_comment_end_dash
5068                         break
5069                         case "\u0000":
5070                                 parse_error()
5071                                 tok_cur_tag.text += "\ufffd"
5072                         break
5073                         case '': // EOF
5074                                 parse_error()
5075                                 tok_state = tok_state_data
5076                                 cur -= 1 // reconsume
5077                                 return tok_cur_tag
5078                         break
5079                         default:
5080                                 tok_cur_tag.text += c
5081                 }
5082                 return null
5083         }
5084
5085         // 8.2.4.49 http://www.w3.org/TR/html5/syntax.html#comment-end-dash-state
5086         tok_state_comment_end_dash = function () {
5087                 var c
5088                 switch (c = txt.charAt(cur++)) {
5089                         case '-':
5090                                 tok_state = tok_state_comment_end
5091                         break
5092                         case "\u0000":
5093                                 parse_error()
5094                                 tok_cur_tag.text += "-\ufffd"
5095                                 tok_state = tok_state_comment
5096                         break
5097                         case '': // EOF
5098                                 parse_error()
5099                                 tok_state = tok_state_data
5100                                 cur -= 1 // reconsume
5101                                 return tok_cur_tag
5102                         break
5103                         default:
5104                                 tok_cur_tag.text += "-" + c
5105                                 tok_state = tok_state_comment
5106                 }
5107                 return null
5108         }
5109
5110         // 8.2.4.50 http://www.w3.org/TR/html5/syntax.html#comment-end-state
5111         tok_state_comment_end = function () {
5112                 var c
5113                 switch (c = txt.charAt(cur++)) {
5114                         case '>':
5115                                 tok_state = tok_state_data
5116                                 return tok_cur_tag
5117                         break
5118                         case "\u0000":
5119                                 parse_error()
5120                                 tok_cur_tag.text += "--\ufffd"
5121                                 tok_state = tok_state_comment
5122                         break
5123                         case '!':
5124                                 parse_error()
5125                                 tok_state = tok_state_comment_end_bang
5126                         break
5127                         case '-':
5128                                 parse_error()
5129                                 tok_cur_tag.text += '-'
5130                         break
5131                         case '': // EOF
5132                                 parse_error()
5133                                 tok_state = tok_state_data
5134                                 cur -= 1 // reconsume
5135                                 return tok_cur_tag
5136                         break
5137                         default:
5138                                 parse_error()
5139                                 tok_cur_tag.text += "--" + c
5140                                 tok_state = tok_state_comment
5141                 }
5142                 return null
5143         }
5144
5145         // 8.2.4.51 http://www.w3.org/TR/html5/syntax.html#comment-end-bang-state
5146         tok_state_comment_end_bang = function () {
5147                 var c
5148                 switch (c = txt.charAt(cur++)) {
5149                         case '-':
5150                                 tok_cur_tag.text += "--!" + c
5151                                 tok_state = tok_state_comment_end_dash
5152                         break
5153                         case '>':
5154                                 tok_state = tok_state_data
5155                                 return tok_cur_tag
5156                         break
5157                         case "\u0000":
5158                                 parse_error()
5159                                 tok_cur_tag.text += "--!\ufffd"
5160                                 tok_state = tok_state_comment
5161                         break
5162                         case '': // EOF
5163                                 parse_error()
5164                                 tok_state = tok_state_data
5165                                 cur -= 1 // reconsume
5166                                 return tok_cur_tag
5167                         break
5168                         default:
5169                                 tok_cur_tag.text += "--!" + c
5170                                 tok_state = tok_state_comment
5171                 }
5172                 return null
5173         }
5174
5175         // 8.2.4.52 http://www.w3.org/TR/html5/syntax.html#doctype-state
5176         tok_state_doctype = function () {
5177                 var c, el
5178                 switch (c = txt.charAt(cur++)) {
5179                         case "\t":
5180                         case "\u000a":
5181                         case "\u000c":
5182                         case ' ':
5183                                 tok_state = tok_state_before_doctype_name
5184                         break
5185                         case '': // EOF
5186                                 parse_error()
5187                                 tok_state = tok_state_data
5188                                 el = new_doctype_token('')
5189                                 el.flag('force-quirks', true)
5190                                 cur -= 1 // reconsume
5191                                 return el
5192                         break
5193                         default:
5194                                 parse_error()
5195                                 tok_state = tok_state_before_doctype_name
5196                                 cur -= 1 // reconsume
5197                 }
5198                 return null
5199         }
5200
5201         // 8.2.4.52 http://www.w3.org/TR/html5/syntax.html#doctype-state
5202         tok_state_before_doctype_name = function () {
5203                 var c, el
5204                 c = txt.charAt(cur++)
5205                 if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ') {
5206                         return
5207                 }
5208                 if (is_uc_alpha(c)) {
5209                         tok_cur_tag = new_doctype_token(c.toLowerCase())
5210                         tok_state = tok_state_doctype_name
5211                         return
5212                 }
5213                 if (c === "\u0000") {
5214                         parse_error()
5215                         tok_cur_tag = new_doctype_token("\ufffd")
5216                         tok_state = tok_state_doctype_name
5217                         return
5218                 }
5219                 if (c === '>') {
5220                         parse_error()
5221                         el = new_doctype_token('')
5222                         el.flag('force-quirks', true)
5223                         tok_state = tok_state_data
5224                         return el
5225                 }
5226                 if (c === '') { // EOF
5227                         parse_error()
5228                         tok_state = tok_state_data
5229                         el = new_doctype_token('')
5230                         el.flag('force-quirks', true)
5231                         cur -= 1 // reconsume
5232                         return el
5233                 }
5234                 // Anything else
5235                 tok_cur_tag = new_doctype_token(c)
5236                 tok_state = tok_state_doctype_name
5237                 return null
5238         }
5239
5240         // 8.2.4.54 http://www.w3.org/TR/html5/syntax.html#doctype-name-state
5241         tok_state_doctype_name = function () {
5242                 var c
5243                 c = txt.charAt(cur++)
5244                 if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ') {
5245                         tok_state = tok_state_after_doctype_name
5246                         return
5247                 }
5248                 if (c === '>') {
5249                         tok_state = tok_state_data
5250                         return tok_cur_tag
5251                 }
5252                 if (is_uc_alpha(c)) {
5253                         tok_cur_tag.name += c.toLowerCase()
5254                         return
5255                 }
5256                 if (c === "\u0000") {
5257                         parse_error()
5258                         tok_cur_tag.name += "\ufffd"
5259                         return
5260                 }
5261                 if (c === '') { // EOF
5262                         parse_error()
5263                         tok_state = tok_state_data
5264                         tok_cur_tag.flag('force-quirks', true)
5265                         cur -= 1 // reconsume
5266                         return tok_cur_tag
5267                 }
5268                 // Anything else
5269                 tok_cur_tag.name += c
5270                 return null
5271         }
5272
5273         // 8.2.4.55 http://www.w3.org/TR/html5/syntax.html#after-doctype-name-state
5274         tok_state_after_doctype_name = function () {
5275                 var c
5276                 c = txt.charAt(cur++)
5277                 if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ') {
5278                         return
5279                 }
5280                 if (c === '>') {
5281                         tok_state = tok_state_data
5282                         return tok_cur_tag
5283                 }
5284                 if (c === '') { // EOF
5285                         parse_error()
5286                         tok_state = tok_state_data
5287                         tok_cur_tag.flag('force-quirks', true)
5288                         cur -= 1 // reconsume
5289                         return tok_cur_tag
5290                 }
5291                 // Anything else
5292                 if (txt.substr(cur - 1, 6).toLowerCase() === 'public') {
5293                         cur += 5
5294                         tok_state = tok_state_after_doctype_public_keyword
5295                         return
5296                 }
5297                 if (txt.substr(cur - 1, 6).toLowerCase() === 'system') {
5298                         cur += 5
5299                         tok_state = tok_state_after_doctype_system_keyword
5300                         return
5301                 }
5302                 parse_error()
5303                 tok_cur_tag.flag('force-quirks', true)
5304                 tok_state = tok_state_bogus_doctype
5305                 return null
5306         }
5307
5308         // 8.2.4.56 http://www.w3.org/TR/html5/syntax.html#after-doctype-public-keyword-state
5309         tok_state_after_doctype_public_keyword = function () {
5310                 var c
5311                 c = txt.charAt(cur++)
5312                 if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ') {
5313                         tok_state = tok_state_before_doctype_public_identifier
5314                         return
5315                 }
5316                 if (c === '"') {
5317                         parse_error()
5318                         tok_cur_tag.public_identifier = ''
5319                         tok_state = tok_state_doctype_public_identifier_double_quoted
5320                         return
5321                 }
5322                 if (c === "'") {
5323                         parse_error()
5324                         tok_cur_tag.public_identifier = ''
5325                         tok_state = tok_state_doctype_public_identifier_single_quoted
5326                         return
5327                 }
5328                 if (c === '>') {
5329                         parse_error()
5330                         tok_cur_tag.flag('force-quirks', true)
5331                         tok_state = tok_state_data
5332                         return tok_cur_tag
5333                 }
5334                 if (c === '') { // EOF
5335                         parse_error()
5336                         tok_state = tok_state_data
5337                         tok_cur_tag.flag('force-quirks', true)
5338                         cur -= 1 // reconsume
5339                         return tok_cur_tag
5340                 }
5341                 // Anything else
5342                 parse_error()
5343                 tok_cur_tag.flag('force-quirks', true)
5344                 tok_state = tok_state_bogus_doctype
5345                 return null
5346         }
5347
5348         // 8.2.4.57 http://www.w3.org/TR/html5/syntax.html#before-doctype-public-identifier-state
5349         tok_state_before_doctype_public_identifier = function () {
5350                 var c
5351                 c = txt.charAt(cur++)
5352                 if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ') {
5353                         return
5354                 }
5355                 if (c === '"') {
5356                         parse_error()
5357                         tok_cur_tag.public_identifier = ''
5358                         tok_state = tok_state_doctype_public_identifier_double_quoted
5359                         return
5360                 }
5361                 if (c === "'") {
5362                         parse_error()
5363                         tok_cur_tag.public_identifier = ''
5364                         tok_state = tok_state_doctype_public_identifier_single_quoted
5365                         return
5366                 }
5367                 if (c === '>') {
5368                         parse_error()
5369                         tok_cur_tag.flag('force-quirks', true)
5370                         tok_state = tok_state_data
5371                         return tok_cur_tag
5372                 }
5373                 if (c === '') { // EOF
5374                         parse_error()
5375                         tok_state = tok_state_data
5376                         tok_cur_tag.flag('force-quirks', true)
5377                         cur -= 1 // reconsume
5378                         return tok_cur_tag
5379                 }
5380                 // Anything else
5381                 parse_error()
5382                 tok_cur_tag.flag('force-quirks', true)
5383                 tok_state = tok_state_bogus_doctype
5384                 return null
5385         }
5386
5387
5388         // 8.2.4.58 http://www.w3.org/TR/html5/syntax.html#doctype-public-identifier-(double-quoted)-state
5389         tok_state_doctype_public_identifier_double_quoted = function () {
5390                 var c
5391                 c = txt.charAt(cur++)
5392                 if (c === '"') {
5393                         tok_state = tok_state_after_doctype_public_identifier
5394                         return
5395                 }
5396                 if (c === "\u0000") {
5397                         parse_error()
5398                         tok_cur_tag.public_identifier += "\ufffd"
5399                         return
5400                 }
5401                 if (c === '>') {
5402                         parse_error()
5403                         tok_cur_tag.flag('force-quirks', true)
5404                         tok_state = tok_state_data
5405                         return tok_cur_tag
5406                 }
5407                 if (c === '') { // EOF
5408                         parse_error()
5409                         tok_state = tok_state_data
5410                         tok_cur_tag.flag('force-quirks', true)
5411                         cur -= 1 // reconsume
5412                         return tok_cur_tag
5413                 }
5414                 // Anything else
5415                 tok_cur_tag.public_identifier += c
5416                 return null
5417         }
5418
5419         // 8.2.4.59 http://www.w3.org/TR/html5/syntax.html#doctype-public-identifier-(single-quoted)-state
5420         tok_state_doctype_public_identifier_single_quoted = function () {
5421                 var c
5422                 c = txt.charAt(cur++)
5423                 if (c === "'") {
5424                         tok_state = tok_state_after_doctype_public_identifier
5425                         return
5426                 }
5427                 if (c === "\u0000") {
5428                         parse_error()
5429                         tok_cur_tag.public_identifier += "\ufffd"
5430                         return
5431                 }
5432                 if (c === '>') {
5433                         parse_error()
5434                         tok_cur_tag.flag('force-quirks', true)
5435                         tok_state = tok_state_data
5436                         return tok_cur_tag
5437                 }
5438                 if (c === '') { // EOF
5439                         parse_error()
5440                         tok_state = tok_state_data
5441                         tok_cur_tag.flag('force-quirks', true)
5442                         cur -= 1 // reconsume
5443                         return tok_cur_tag
5444                 }
5445                 // Anything else
5446                 tok_cur_tag.public_identifier += c
5447                 return null
5448         }
5449
5450         // 8.2.4.60 http://www.w3.org/TR/html5/syntax.html#after-doctype-public-identifier-state
5451         tok_state_after_doctype_public_identifier = function () {
5452                 var c
5453                 c = txt.charAt(cur++)
5454                 if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ') {
5455                         tok_state = tok_state_between_doctype_public_and_system_identifiers
5456                         return
5457                 }
5458                 if (c === '>') {
5459                         tok_state = tok_state_data
5460                         return tok_cur_tag
5461                 }
5462                 if (c === '"') {
5463                         parse_error()
5464                         tok_cur_tag.system_identifier = ''
5465                         tok_state = tok_state_doctype_system_identifier_double_quoted
5466                         return
5467                 }
5468                 if (c === "'") {
5469                         parse_error()
5470                         tok_cur_tag.system_identifier = ''
5471                         tok_state = tok_state_doctype_system_identifier_single_quoted
5472                         return
5473                 }
5474                 if (c === '') { // EOF
5475                         parse_error()
5476                         tok_state = tok_state_data
5477                         tok_cur_tag.flag('force-quirks', true)
5478                         cur -= 1 // reconsume
5479                         return tok_cur_tag
5480                 }
5481                 // Anything else
5482                 parse_error()
5483                 tok_cur_tag.flag('force-quirks', true)
5484                 tok_state = tok_state_bogus_doctype
5485                 return null
5486         }
5487
5488         // 8.2.4.61 http://www.w3.org/TR/html5/syntax.html#between-doctype-public-&&-system-identifiers-state
5489         tok_state_between_doctype_public_and_system_identifiers = function () {
5490                 var c
5491                 c = txt.charAt(cur++)
5492                 if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ') {
5493                         return
5494                 }
5495                 if (c === '>') {
5496                         tok_state = tok_state_data
5497                         return tok_cur_tag
5498                 }
5499                 if (c === '"') {
5500                         parse_error()
5501                         tok_cur_tag.system_identifier = ''
5502                         tok_state = tok_state_doctype_system_identifier_double_quoted
5503                         return
5504                 }
5505                 if (c === "'") {
5506                         parse_error()
5507                         tok_cur_tag.system_identifier = ''
5508                         tok_state = tok_state_doctype_system_identifier_single_quoted
5509                         return
5510                 }
5511                 if (c === '') { // EOF
5512                         parse_error()
5513                         tok_state = tok_state_data
5514                         tok_cur_tag.flag('force-quirks', true)
5515                         cur -= 1 // reconsume
5516                         return tok_cur_tag
5517                 }
5518                 // Anything else
5519                 parse_error()
5520                 tok_cur_tag.flag('force-quirks', true)
5521                 tok_state = tok_state_bogus_doctype
5522                 return null
5523         }
5524
5525         // 8.2.4.62 http://www.w3.org/TR/html5/syntax.html#after-doctype-system-keyword-state
5526         tok_state_after_doctype_system_keyword = function () {
5527                 var c
5528                 c = txt.charAt(cur++)
5529                 if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ') {
5530                         tok_state = tok_state_before_doctype_system_identifier
5531                         return
5532                 }
5533                 if (c === '"') {
5534                         parse_error()
5535                         tok_cur_tag.system_identifier = ''
5536                         tok_state = tok_state_doctype_system_identifier_double_quoted
5537                         return
5538                 }
5539                 if (c === "'") {
5540                         parse_error()
5541                         tok_cur_tag.system_identifier = ''
5542                         tok_state = tok_state_doctype_system_identifier_single_quoted
5543                         return
5544                 }
5545                 if (c === '>') {
5546                         parse_error()
5547                         tok_cur_tag.flag('force-quirks', true)
5548                         tok_state = tok_state_data
5549                         return tok_cur_tag
5550                 }
5551                 if (c === '') { // EOF
5552                         parse_error()
5553                         tok_state = tok_state_data
5554                         tok_cur_tag.flag('force-quirks', true)
5555                         cur -= 1 // reconsume
5556                         return tok_cur_tag
5557                 }
5558                 // Anything else
5559                 parse_error()
5560                 tok_cur_tag.flag('force-quirks', true)
5561                 tok_state = tok_state_bogus_doctype
5562                 return null
5563         }
5564
5565         // 8.2.4.63 http://www.w3.org/TR/html5/syntax.html#before-doctype-system-identifier-state
5566         tok_state_before_doctype_system_identifier = function () {
5567                 var c
5568                 c = txt.charAt(cur++)
5569                 if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ') {
5570                         return
5571                 }
5572                 if (c === '"') {
5573                         tok_cur_tag.system_identifier = ''
5574                         tok_state = tok_state_doctype_system_identifier_double_quoted
5575                         return
5576                 }
5577                 if (c === "'") {
5578                         tok_cur_tag.system_identifier = ''
5579                         tok_state = tok_state_doctype_system_identifier_single_quoted
5580                         return
5581                 }
5582                 if (c === '>') {
5583                         parse_error()
5584                         tok_cur_tag.flag('force-quirks', true)
5585                         tok_state = tok_state_data
5586                         return tok_cur_tag
5587                 }
5588                 if (c === '') { // EOF
5589                         parse_error()
5590                         tok_state = tok_state_data
5591                         tok_cur_tag.flag('force-quirks', true)
5592                         cur -= 1 // reconsume
5593                         return tok_cur_tag
5594                 }
5595                 // Anything else
5596                 parse_error()
5597                 tok_cur_tag.flag('force-quirks', true)
5598                 tok_state = tok_state_bogus_doctype
5599                 return null
5600         }
5601
5602         // 8.2.4.64 http://www.w3.org/TR/html5/syntax.html#doctype-system-identifier-(double-quoted)-state
5603         tok_state_doctype_system_identifier_double_quoted = function () {
5604                 var c
5605                 c = txt.charAt(cur++)
5606                 if (c === '"') {
5607                         tok_state = tok_state_after_doctype_system_identifier
5608                         return
5609                 }
5610                 if (c === "\u0000") {
5611                         parse_error()
5612                         tok_cur_tag.system_identifier += "\ufffd"
5613                         return
5614                 }
5615                 if (c === '>') {
5616                         parse_error()
5617                         tok_cur_tag.flag('force-quirks', true)
5618                         tok_state = tok_state_data
5619                         return tok_cur_tag
5620                 }
5621                 if (c === '') { // EOF
5622                         parse_error()
5623                         tok_state = tok_state_data
5624                         tok_cur_tag.flag('force-quirks', true)
5625                         cur -= 1 // reconsume
5626                         return tok_cur_tag
5627                 }
5628                 // Anything else
5629                 tok_cur_tag.system_identifier += c
5630                 return null
5631         }
5632
5633         // 8.2.4.65 http://www.w3.org/TR/html5/syntax.html#doctype-system-identifier-(single-quoted)-state
5634         tok_state_doctype_system_identifier_single_quoted = function () {
5635                 var c
5636                 c = txt.charAt(cur++)
5637                 if (c === "'") {
5638                         tok_state = tok_state_after_doctype_system_identifier
5639                         return
5640                 }
5641                 if (c === "\u0000") {
5642                         parse_error()
5643                         tok_cur_tag.system_identifier += "\ufffd"
5644                         return
5645                 }
5646                 if (c === '>') {
5647                         parse_error()
5648                         tok_cur_tag.flag('force-quirks', true)
5649                         tok_state = tok_state_data
5650                         return tok_cur_tag
5651                 }
5652                 if (c === '') { // EOF
5653                         parse_error()
5654                         tok_state = tok_state_data
5655                         tok_cur_tag.flag('force-quirks', true)
5656                         cur -= 1 // reconsume
5657                         return tok_cur_tag
5658                 }
5659                 // Anything else
5660                 tok_cur_tag.system_identifier += c
5661                 return null
5662         }
5663
5664         // 8.2.4.66 http://www.w3.org/TR/html5/syntax.html#after-doctype-system-identifier-state
5665         tok_state_after_doctype_system_identifier = function () {
5666                 var c
5667                 c = txt.charAt(cur++)
5668                 if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ') {
5669                         return
5670                 }
5671                 if (c === '>') {
5672                         tok_state = tok_state_data
5673                         return tok_cur_tag
5674                 }
5675                 if (c === '') { // EOF
5676                         parse_error()
5677                         tok_state = tok_state_data
5678                         tok_cur_tag.flag('force-quirks', true)
5679                         cur -= 1 // reconsume
5680                         return tok_cur_tag
5681                 }
5682                 // Anything else
5683                 parse_error()
5684                 // do _not_ tok_cur_tag.flag 'force-quirks', true
5685                 tok_state = tok_state_bogus_doctype
5686                 return null
5687         }
5688
5689         // 8.2.4.67 http://www.w3.org/TR/html5/syntax.html#bogus-doctype-state
5690         tok_state_bogus_doctype = function () {
5691                 var c
5692                 c = txt.charAt(cur++)
5693                 if (c === '>') {
5694                         tok_state = tok_state_data
5695                         return tok_cur_tag
5696                 }
5697                 if (c === '') { // EOF
5698                         tok_state = tok_state_data
5699                         cur -= 1 // reconsume
5700                         return tok_cur_tag
5701                 }
5702                 // Anything else
5703                 return null
5704         }
5705
5706         // 8.2.4.68 http://www.w3.org/TR/html5/syntax.html#cdata-section-state
5707         tok_state_cdata_section = function () {
5708                 var next_gt, val
5709                 tok_state = tok_state_data
5710                 next_gt = txt.indexOf(']]>', cur)
5711                 if (next_gt === -1) {
5712                         val = txt.substr(cur)
5713                         cur = txt.length
5714                 } else {
5715                         val = txt.substr(cur, next_gt - cur)
5716                         cur = next_gt + 3
5717                 }
5718                 val = val.replace(new RegExp("\u0000", 'g'), "\ufffd")
5719                 if (val.length > 0) {
5720                         return new_character_token(val) // fixfull split
5721                 }
5722                 return null
5723         }
5724
5725         // 8.2.4.69 http://www.w3.org/TR/html5/syntax.html#consume-a-character-reference
5726         // Don't set this as a state, just call it
5727         // returns a string (NOT a text node)
5728         parse_character_reference = function (allowed_char, in_attr) {
5729                 var base, c, charset, code_point, decoded, i, max, start
5730                 if (allowed_char == null) {
5731                         allowed_char = null
5732                 }
5733                 if (in_attr == null) {
5734                         in_attr = false
5735                 }
5736                 if (cur >= txt.length) {
5737                         return '&'
5738                 }
5739                 switch (c = txt.charAt(cur)) {
5740                         case "\t":
5741                         case "\n":
5742                         case "\u000c":
5743                         case ' ':
5744                         case '<':
5745                         case '&':
5746                         case '':
5747                         case allowed_char:
5748                                 // explicitly not a parse error
5749                                 return '&'
5750                         break
5751                         case ';':
5752                                 // there has to be "one or more" alnums between & and ; to be a parse error
5753                                 return '&'
5754                         break
5755                         case '#':
5756                                 if (cur + 1 >= txt.length) {
5757                                         return '&'
5758                                 }
5759                                 if (txt.charAt(cur + 1).toLowerCase() === 'x') {
5760                                         base = 16
5761                                         charset = hex_chars
5762                                         start = cur + 2
5763                                 } else {
5764                                         charset = digits
5765                                         start = cur + 1
5766                                         base = 10
5767                                 }
5768                                 i = 0
5769                                 while (start + i < txt.length && charset.indexOf(txt.charAt(start + i)) > -1) {
5770                                         i += 1
5771                                 }
5772                                 if (i === 0) {
5773                                         return '&'
5774                                 }
5775                                 cur = start + i
5776                                 if (txt.charAt(start + i) === ';') {
5777                                         cur += 1
5778                                 } else {
5779                                         parse_error()
5780                                 }
5781                                 code_point = txt.substr(start, i)
5782                                 while (code_point.charAt(0) === '0' && code_point.length > 1) {
5783                                         code_point = code_point.substr(1)
5784                                 }
5785                                 code_point = parseInt(code_point, base)
5786                                 if (unicode_fixes[code_point] != null) {
5787                                         parse_error()
5788                                         return unicode_fixes[code_point]
5789                                 } else {
5790                                         if ((code_point >= 0xd800 && code_point <= 0xdfff) || code_point > 0x10ffff) {
5791                                                 parse_error()
5792                                                 return "\ufffd"
5793                                         } else {
5794                                                 if ((code_point >= 0x0001 && code_point <= 0x0008) || (code_point >= 0x000D && code_point <= 0x001F) || (code_point >= 0x007F && code_point <= 0x009F) || (code_point >= 0xFDD0 && code_point <= 0xFDEF) || code_point === 0x000B || code_point === 0xFFFE || code_point === 0xFFFF || code_point === 0x1FFFE || code_point === 0x1FFFF || code_point === 0x2FFFE || code_point === 0x2FFFF || code_point === 0x3FFFE || code_point === 0x3FFFF || code_point === 0x4FFFE || code_point === 0x4FFFF || code_point === 0x5FFFE || code_point === 0x5FFFF || code_point === 0x6FFFE || code_point === 0x6FFFF || code_point === 0x7FFFE || code_point === 0x7FFFF || code_point === 0x8FFFE || code_point === 0x8FFFF || code_point === 0x9FFFE || code_point === 0x9FFFF || code_point === 0xAFFFE || code_point === 0xAFFFF || code_point === 0xBFFFE || code_point === 0xBFFFF || code_point === 0xCFFFE || code_point === 0xCFFFF || code_point === 0xDFFFE || code_point === 0xDFFFF || code_point === 0xEFFFE || code_point === 0xEFFFF || code_point === 0xFFFFE || code_point === 0xFFFFF || code_point === 0x10FFFE || code_point === 0x10FFFF) {
5795                                                         parse_error()
5796                                                 }
5797                                                 return from_code_point(code_point)
5798                                         }
5799                                 }
5800                                 return
5801                         break
5802                         default:
5803                                 for (i = 0; i < 31; ++i) {
5804                                         if (alnum.indexOf(txt.charAt(cur + i)) === -1) {
5805                                                 break
5806                                         }
5807                                 }
5808                                 if (i === 0) {
5809                                         // exit early, because parse_error() below needs at least one alnum
5810                                         return '&'
5811                                 }
5812                                 if (txt.charAt(cur + i) === ';') {
5813                                         decoded = decode_named_char_ref(txt.substr(cur, i))
5814                                         i += 1 // scan past the ';' (after, so we dno't pass it to decode)
5815                                         if (decoded != null) {
5816                                                 cur += i
5817                                                 return decoded
5818                                         }
5819                                         // else FALL THROUGH (check for match without last char(s) or ";")
5820                                 }
5821                                 // no ';' terminator (only legacy char refs)
5822                                 max = i
5823                                 for (i = 2; i <= max; ++i) { // no prefix matches, so ok to check shortest first
5824                                         c = legacy_char_refs[txt.substr(cur, i)]
5825                                         if (c != null) {
5826                                                 if (in_attr) {
5827                                                         if (txt.charAt(cur + i) === '=') {
5828                                                                 // "because some legacy user agents will
5829                                                                 // misinterpret the markup in those cases"
5830                                                                 parse_error()
5831                                                                 return '&'
5832                                                         }
5833                                                         if (alnum.indexOf(txt.charAt(cur + i)) > -1) {
5834                                                                 // this makes attributes forgiving about url args
5835                                                                 return '&'
5836                                                         }
5837                                                 }
5838                                                 // ok, and besides the weird exceptions for attributes...
5839                                                 // return the matching char
5840                                                 cur += i // consume entity chars
5841                                                 parse_error() // because no terminating ";"
5842                                                 return c
5843                                         }
5844                                 }
5845                                 parse_error()
5846                                 return '&'
5847                 }
5848                 // never reached
5849         }
5850
5851         eat_next_token_if_newline = function () {
5852                 var old_cur, t
5853                 old_cur = cur
5854                 t = null
5855                 while (t == null) {
5856                         t = tok_state()
5857                 }
5858                 if (t.type === TYPE_TEXT) {
5859                         // definition of a newline depends on whether it was a character ref or not
5860                         if (cur - old_cur === 1) {
5861                                 // not a character reference
5862                                 if (t.text === "\u000d" || t.text === "\u000a") {
5863                                         return
5864                                 }
5865                         } else {
5866                                 if (t.text === "\u000a") {
5867                                         return
5868                                 }
5869                         }
5870                 }
5871                 // not a "newline"
5872                 cur = old_cur
5873         }
5874
5875         // tree constructor initialization
5876         // see comments on TYPE_TAG/etc for the structure of this data
5877         txt = args_html
5878         cur = 0
5879         doc = new Node(TYPE_TAG, {name: 'document', namespace: NS_HTML})
5880         doc.flag('quirks mode', QUIRKS_NO) // TODO bugreport spec for not specifying this
5881         fragment_root = null // fragment parsing algorithm returns children of this
5882         open_els = []
5883         afe = [] // active formatting elements
5884         template_ins_modes = []
5885         ins_mode = ins_mode_initial
5886         original_ins_mode = ins_mode // TODO check spec
5887         flag_scripting = args.scripting != null ? args.scripting : true // TODO might need an extra flag to get <noscript> to parse correctly
5888         flag_frameset_ok = true
5889         flag_parsing = true
5890         flag_foster_parenting = false
5891         form_element_pointer = null
5892         temporary_buffer = null
5893         pending_table_character_tokens = []
5894         head_element_pointer = null
5895         flag_fragment_parsing = false
5896         context_element = null
5897         prev_node_id = 0 // just for debugging
5898
5899         // tokenizer initialization
5900         tok_state = tok_state_data
5901
5902         parse_init = function () {
5903                 var el, f, ns, old_doc, t
5904                 // fragment parsing (text arg)
5905                 if (args.fragment != null) {
5906                         // this handles the fragment from the tests in the format described here:
5907                         // https://github.com/html5lib/html5lib-tests/blob/master/tree-construction/README.md
5908                         f = args.fragment
5909                         ns = NS_HTML
5910                         if (f.substr(0, 5) === 'math ') {
5911                                 f = f.substr(5)
5912                                 ns = NS_MATHML
5913                         } else if (f.substr(0, 4) === 'svg ') {
5914                                 f = f.substr(4)
5915                                 ns = NS_SVG
5916                         }
5917                         t = new_open_tag(f)
5918                         context_element = token_to_element(t, ns)
5919                         context_element.document = new Node(TYPE_TAG, {name: 'document', namespace: NS_HTML})
5920                         context_element.document.flag('quirks mode', QUIRKS_NO)
5921                 }
5922                 // fragment parsing (Node arg)
5923                 if (args.context != null) {
5924                         context_element = args.context
5925                 }
5926
5927                 // http://www.w3.org/TR/html5/syntax.html#parsing-html-fragments
5928                 // fragment parsing algorithm
5929                 if (context_element != null) {
5930                         flag_fragment_parsing = true
5931                         doc = new Node(TYPE_TAG, {name: 'html', namespace: NS_HTML})
5932                         // search up the tree from context, to try to find it's document,
5933                         // because this file only puts a "document" property on the root
5934                         // element.
5935                         old_doc = null
5936                         el = context_element
5937                         while (true) {
5938                                 if (el.document != null) {
5939                                         old_doc = el.document
5940                                         break
5941                                 }
5942                                 if (el.parent) {
5943                                         el = el.parent
5944                                 } else {
5945                                         break
5946                                 }
5947                         }
5948                         if (old_doc) {
5949                                 doc.flag('quirks mode', old_doc.flag('quirks mode'))
5950                         }
5951                         // set tok_state
5952                         if (context_element.namespace === NS_HTML) {
5953                                 switch (context_element.name) {
5954                                         case 'title':
5955                                         case 'textarea':
5956                                                 tok_state = tok_state_rcdata
5957                                         break
5958                                         case 'style':
5959                                         case 'xmp':
5960                                         case 'iframe':
5961                                         case 'noembed':
5962                                         case 'noframes':
5963                                                 tok_state = tok_state_rawtext
5964                                         break
5965                                         case 'script':
5966                                                 tok_state = tok_state_script_data
5967                                         break
5968                                         case 'noscript':
5969                                                 if (flag_scripting) {
5970                                                         tok_state = tok_state_rawtext
5971                                                 }
5972                                         break
5973                                         case 'plaintext':
5974                                                 tok_state = tok_state_plaintext
5975                                 }
5976                         }
5977                         fragment_root = new Node(TYPE_TAG, {name: 'html', namespace: NS_HTML})
5978                         doc.children.push(fragment_root)
5979                         fragment_root.document = doc
5980                         open_els = [fragment_root]
5981                         if (context_element.name === 'template' && context_element.namespace === NS_HTML) {
5982                                 template_ins_modes.unshift(ins_mode_in_template)
5983                         }
5984                         // fixfull create token for context (it should have it's original one already)
5985                         reset_ins_mode()
5986                         // set form_element pointer... in the foreign doc?!
5987                         el = context_element
5988                         while (true) {
5989                                 if (el.name === 'form' && el.namespace === NS_HTML) {
5990                                         form_element_pointer = el
5991                                         break
5992                                 }
5993                                 if (el.parent) {
5994                                         el = el.parent
5995                                 } else {
5996                                         break
5997                                 }
5998                         }
5999                 }
6000
6001                 // text pre-processing
6002                 // FIXME check http://www.w3.org/TR/html5/syntax.html#preprocessing-the-input-stream
6003                 txt = txt.replace(new RegExp("\r\n", 'g'), "\n") // fixfull spec doesn't say this
6004                 txt = txt.replace(new RegExp("\r", 'g'), "\n") // fixfull spec doesn't say this
6005         }
6006
6007         // http://www.w3.org/TR/html5/syntax.html#tree-construction
6008         parse_main_loop = function () {
6009                 var t
6010                 while (flag_parsing) {
6011                         t = tok_state()
6012                         if (t != null) {
6013                                 process_token(t)
6014                                 // fixfull parse error if has self-closing flag, but it wasn't acknolwedged
6015                         }
6016                 }
6017         }
6018         parse_init()
6019         parse_main_loop()
6020
6021         if (flag_fragment_parsing) {
6022                 return fragment_root.children
6023         }
6024         return doc.children
6025 }
6026
6027 var this_module = {
6028         parse: parse_html,
6029         Node: Node,
6030 }
6031
6032 if (context === 'module') {
6033         module.exports = this_module
6034 } else {
6035         window.peach_parser = this_module
6036 }
6037
6038 }).call(this)