JasonWoof Got questions, comments, patches, etc.? Contact Jason Woofenden
code cleanup (no IE8 support)
[peach-html5-editor.git] / parser.js
1 // todo remove unused variables
2 // todo remove debug log, or make a way to access it
3
4 // Copyright 2015 Jason Woofenden
5 // This file implements an HTML5 parser
6 //
7 // This program is free software: you can redistribute it and/or modify it under
8 // the terms of the GNU Affero General Public License as published by the Free
9 // Software Foundation, either version 3 of the License, or (at your option) any
10 // later version.
11 //
12 // This program is distributed in the hope that it will be useful, but WITHOUT
13 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 // FOR A PARTICULAR PURPOSE.  See the GNU Affero General Public License for more
15 // details.
16 //
17 // You should have received a copy of the GNU Affero General Public License
18 // along with this program.  If not, see <http://www.gnu.org/licenses/>.
19
20
21 // This file implements a thorough parser for html5, meant to be used by a
22 // WYSIWYG editor.
23
24 // The implementation is a pretty direct implementation of the parsing algorithm
25 // described here:
26 //
27 //     http://www.w3.org/TR/html5/syntax.html
28 //
29 // except for some places marked "WHATWG" that are implemented as described here:
30 //
31 //     https://html.spec.whatwg.org/multipage/syntax.html
32 //
33 // This code passes all of the tests in the .dat files at:
34 //
35 //     https://github.com/JasonWoof/html5lib-tests/tree/patch-1/tree-construction
36
37
38 //////////////////////////
39 // how to use this code //
40 //////////////////////////
41 //
42 // See README.md for how to run this file in the browser or in node.js.
43 //
44 // This file exports a single useful function: parse, and some constants
45 // (see the bottom of this file for those.)
46 //
47 // Call it like this:
48 //
49 //     peach_parser.parse("<p><b>hi</p>")
50 //
51 // Or, if you don't want <html><head><body>/etc, do this:
52 //
53 //     peach_parser.parse("<p><b>hi</p>", {fragment: "body"})
54 //
55 // return value is an array of Nodes, A Node contains:
56 //     type: one of: "tag", "text", "comment", "doctype"
57 //     text: contents for text/comment nodes
58 //     attrs: object of attributes, eg {href: "#main"}
59 //     children: array of Nodes
60 //     namespace: one of: "html", "mathml", "svg"
61 //     parent: another Node or null
62
63 // This code is a work in progress, eg try search this file for "fixfull",
64 // "TODO" and "FIXME"
65
66
67 // Notes:  stacks/lists
68 //
69 // Jason was frequently confused by the terminology used to refer to different
70 // parts of the stacks and lists in the spec, so he made this chart to help keep
71 // his head straight:
72 //
73 // stacks grow downward (current element is index=0)
74 //
75 // example: open_els = [a, b, c, d, e, f, g]
76 //
77 // "grows downwards" means it's visualized like this: (index: el "names")
78 //
79 //   6: g "start of the list", "topmost", "first"
80 //   5: f
81 //   4: e "previous" (to d), "above", "before"
82 //   3: d   (previous/next are relative to this element)
83 //   2: c "next", "after", "lower", "below"
84 //   1: b
85 //   0: a "end of the list", "current node", "bottommost", "last"
86 (function () {
87
88 var NS_HTML, NS_MATHML, NS_SVG, QUIRKS_LIMITED, QUIRKS_NO, QUIRKS_YES, TYPE_AAA_BOOKMARK, TYPE_AFE_MARKER, TYPE_COMMENT, TYPE_DOCTYPE, TYPE_END_TAG, TYPE_EOF, TYPE_START_TAG, TYPE_TAG, TYPE_TEXT, _decode_named_char_ref, adjust_foreign_attributes, adjust_mathml_attributes, adjust_svg_attributes, adp_els, alnum, context, debug_log, debug_log_each, debug_log_reset, decode_named_char_ref, decode_named_char_ref_cache, decode_named_char_ref_el, digits, el_is_special, el_is_special_not_adp, end_tag_implied, exports, foreign_attr_fixes, formatting_elements, foster_parenting_targets, from_code_point, g_debug_log, h_tags, hex_chars, is_html_integration, is_input_hidden_tok, is_lc_alpha, is_mathml_text_integration_point, is_space, is_space_tok, is_uc_alpha, lc_alpha, legacy_char_refs, mathml_elements, mathml_text_integration, new_aaa_bookmark, new_afe_marker, new_character_token, new_comment_token, new_doctype_token, new_element, new_end_tag, new_eof_token, new_open_tag, new_text_node, parse_html, prev_node_id, quirks_yes_pi_prefixes, space_chars, special_elements, svg_attribute_fixes, svg_elements, svg_name_fixes, tag_name_chars, uc_alpha, unicode_fixes, whitespace_chars
89
90 if ((typeof module) !== 'undefined' && (module.exports != null)) {
91         context = 'module'
92 } else {
93         context = 'browser'
94         window.peach_parser = {}
95 }
96
97 from_code_point = function (x) {
98         if (String.fromCodePoint != null) {
99                 return String.fromCodePoint(x)
100         } else {
101                 if (x <= 0xffff) {
102                         return String.fromCharCode(x)
103                 }
104                 x -= 0x10000
105                 return String.fromCharCode((x >> 10) + 0xd800, (x % 0x400) + 0xdc00)
106         }
107 }
108
109 // Each node is an obect of the Node class. Here are the Node types:
110 TYPE_TAG = 'tag' // name, {attributes}, [children]
111 TYPE_TEXT = 'text' // "text"
112 TYPE_COMMENT = 'comment'
113 TYPE_DOCTYPE = 'doctype'
114 // the following types are emited by the tokenizer, but shouldn't end up in the tree:
115 TYPE_START_TAG = 4 // name, [attributes ([key,value]...) in reverse order], [children]
116 TYPE_END_TAG = 5 // name
117 TYPE_EOF = 6
118 TYPE_AFE_MARKER = 7 // http://www.w3.org/TR/html5/syntax.html#reconstruct-the-active-formatting-elements
119 TYPE_AAA_BOOKMARK = 8 // http://www.w3.org/TR/html5/syntax.html#adoption-agency-algorithm
120
121 // namespace constants
122 NS_HTML = 'html'
123 NS_MATHML = 'mathml'
124 NS_SVG = 'svg'
125
126 // quirks mode constants
127 QUIRKS_NO = 'no'
128 QUIRKS_LIMITED = 'limited'
129 QUIRKS_YES = 'yes'
130
131 // queue up debug logs, so eg they can be shown only for tests that fail
132 g_debug_log = []
133 debug_log_reset = function () {
134         g_debug_log = []
135 }
136 debug_log = function (str) {
137         g_debug_log.push(str)
138 }
139 debug_log_each = function (cb) {
140         var i
141         for (i = 0; i < g_debug_log.length; ++i) {
142                 cb(g_debug_log[i])
143         }
144 }
145
146 prev_node_id = 0
147 function Node (type, args) {
148         if (args == null) {
149                 args = {}
150         }
151         this.type = type // one of the TYPE_* constants above
152         this.name = args.name != null ? args.name : '' // tag name
153         this.text = args.text != null ? args.text : '' // contents for text/comment nodes
154         this.attrs = args.attrs != null ? args.attrs : {}
155         this.children = args.children != null ? args.children : []
156         this.namespace = args.namespace != null ? args.namespace : NS_HTML
157         this.parent = args.parent != null ? args.parent : null
158         // private:
159         this.attrs_a = args.attr_k != null ? args.attr_k : [] // attrs in progress, TYPE_START_TAG only
160         this.token = args.token != null ? args.token : null
161         this.flags = args.flags != null ? args.flags : {}
162         if (args.id != null) {
163                 this.id = args.id + "+"
164         } else {
165                 this.id = "" + (++prev_node_id)
166         }
167 }
168
169 Node.prototype.acknowledge_self_closing = function () {
170         if (this.token != null) {
171                 this.token.flag('did_self_close', true)
172         } else {
173                 this.flag('did_self_close', true)
174         }
175 }
176
177 Node.prototype.flag = function (key, value) {
178         if (value != null) {
179                 this.flags[key] = value
180         } else {
181                 return this.flags[key]
182         }
183 }
184
185 // helpers: (only take args that are normally known when parser creates nodes)
186 new_open_tag = function (name) {
187         return new Node(TYPE_START_TAG, {name: name})
188 }
189 new_end_tag = function (name) {
190         return new Node(TYPE_END_TAG, {name: name})
191 }
192 new_element = function (name) {
193         return new Node(TYPE_TAG, {name: name})
194 }
195 new_text_node = function (txt) {
196         return new Node(TYPE_TEXT, {text: txt})
197 }
198 new_character_token = new_text_node
199 new_comment_token = function (txt) {
200         return new Node(TYPE_COMMENT, {text: txt})
201 }
202 new_doctype_token = function (name) {
203         return new Node(TYPE_DOCTYPE, {name: name})
204 }
205 new_eof_token = function () {
206         return new Node(TYPE_EOF)
207 }
208 new_afe_marker = function () {
209         return new Node(TYPE_AFE_MARKER)
210 }
211 new_aaa_bookmark = function () {
212         return new Node(TYPE_AAA_BOOKMARK)
213 }
214
215 lc_alpha = "abcdefghijklmnopqrstuvwxyz"
216 uc_alpha = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
217 digits = "0123456789"
218 alnum = lc_alpha + uc_alpha + digits
219 hex_chars = digits + "abcdefABCDEF"
220
221 is_uc_alpha = function (str) {
222         return str.length === 1 && uc_alpha.indexOf(str) > -1
223 }
224 is_lc_alpha = function (str) {
225         return str.length === 1 && lc_alpha.indexOf(str) > -1
226 }
227
228 // some SVG elements have dashes in them
229 tag_name_chars = alnum + "-"
230
231 // http://www.w3.org/TR/html5/infrastructure.html#space-character
232 space_chars = "\u0009\u000a\u000c\u000d\u0020"
233 is_space = function (txt) {
234         return txt.length === 1 && space_chars.indexOf(txt) > -1
235 }
236 is_space_tok = function (t) {
237         return t.type === TYPE_TEXT && t.text.length === 1 && space_chars.indexOf(t.text) > -1
238 }
239
240 is_input_hidden_tok = function (t) {
241         var i, a
242         if (t.type !== TYPE_START_TAG) {
243                 return false
244         }
245         for (i = 0; i < t.attrs_a.length; ++i) {
246                 a = t.attrs_a[i]
247                 if (a[0] === 'type') {
248                         if (a[1].toLowerCase() === 'hidden') {
249                                 return true
250                         }
251                         return false
252                 }
253         }
254         return false
255 }
256
257 // https://en.wikipedia.org/wiki/Whitespace_character#Unicode
258 whitespace_chars = "\u0009\u000a\u000b\u000c\u000d\u0020\u0085\u00a0\u1680\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200a\u2028\u2029\u202f\u205f\u3000"
259
260 unicode_fixes = {}
261 unicode_fixes[0x00] = "\uFFFD"
262 unicode_fixes[0x80] = "\u20AC"
263 unicode_fixes[0x82] = "\u201A"
264 unicode_fixes[0x83] = "\u0192"
265 unicode_fixes[0x84] = "\u201E"
266 unicode_fixes[0x85] = "\u2026"
267 unicode_fixes[0x86] = "\u2020"
268 unicode_fixes[0x87] = "\u2021"
269 unicode_fixes[0x88] = "\u02C6"
270 unicode_fixes[0x89] = "\u2030"
271 unicode_fixes[0x8A] = "\u0160"
272 unicode_fixes[0x8B] = "\u2039"
273 unicode_fixes[0x8C] = "\u0152"
274 unicode_fixes[0x8E] = "\u017D"
275 unicode_fixes[0x91] = "\u2018"
276 unicode_fixes[0x92] = "\u2019"
277 unicode_fixes[0x93] = "\u201C"
278 unicode_fixes[0x94] = "\u201D"
279 unicode_fixes[0x95] = "\u2022"
280 unicode_fixes[0x96] = "\u2013"
281 unicode_fixes[0x97] = "\u2014"
282 unicode_fixes[0x98] = "\u02DC"
283 unicode_fixes[0x99] = "\u2122"
284 unicode_fixes[0x9A] = "\u0161"
285 unicode_fixes[0x9B] = "\u203A"
286 unicode_fixes[0x9C] = "\u0153"
287 unicode_fixes[0x9E] = "\u017E"
288 unicode_fixes[0x9F] = "\u0178"
289
290 quirks_yes_pi_prefixes = [
291         "+//silmaril//dtd html pro v0r11 19970101//",
292         "-//as//dtd html 3.0 aswedit + extensions//",
293         "-//advasoft ltd//dtd html 3.0 aswedit + extensions//",
294         "-//ietf//dtd html 2.0 level 1//",
295         "-//ietf//dtd html 2.0 level 2//",
296         "-//ietf//dtd html 2.0 strict level 1//",
297         "-//ietf//dtd html 2.0 strict level 2//",
298         "-//ietf//dtd html 2.0 strict//",
299         "-//ietf//dtd html 2.0//",
300         "-//ietf//dtd html 2.1e//",
301         "-//ietf//dtd html 3.0//",
302         "-//ietf//dtd html 3.2 final//",
303         "-//ietf//dtd html 3.2//",
304         "-//ietf//dtd html 3//",
305         "-//ietf//dtd html level 0//",
306         "-//ietf//dtd html level 1//",
307         "-//ietf//dtd html level 2//",
308         "-//ietf//dtd html level 3//",
309         "-//ietf//dtd html strict level 0//",
310         "-//ietf//dtd html strict level 1//",
311         "-//ietf//dtd html strict level 2//",
312         "-//ietf//dtd html strict level 3//",
313         "-//ietf//dtd html strict//",
314         "-//ietf//dtd html//",
315         "-//metrius//dtd metrius presentational//",
316         "-//microsoft//dtd internet explorer 2.0 html strict//",
317         "-//microsoft//dtd internet explorer 2.0 html//",
318         "-//microsoft//dtd internet explorer 2.0 tables//",
319         "-//microsoft//dtd internet explorer 3.0 html strict//",
320         "-//microsoft//dtd internet explorer 3.0 html//",
321         "-//microsoft//dtd internet explorer 3.0 tables//",
322         "-//netscape comm. corp.//dtd html//",
323         "-//netscape comm. corp.//dtd strict html//",
324         "-//o'reilly and associates//dtd html 2.0//",
325         "-//o'reilly and associates//dtd html extended 1.0//",
326         "-//o'reilly and associates//dtd html extended relaxed 1.0//",
327         "-//sq//dtd html 2.0 hotmetal + extensions//",
328         "-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//",
329         "-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//",
330         "-//spyglass//dtd html 2.0 extended//",
331         "-//sun microsystems corp.//dtd hotjava html//",
332         "-//sun microsystems corp.//dtd hotjava strict html//",
333         "-//w3c//dtd html 3 1995-03-24//",
334         "-//w3c//dtd html 3.2 draft//",
335         "-//w3c//dtd html 3.2 final//",
336         "-//w3c//dtd html 3.2//",
337         "-//w3c//dtd html 3.2s draft//",
338         "-//w3c//dtd html 4.0 frameset//",
339         "-//w3c//dtd html 4.0 transitional//",
340         "-//w3c//dtd html experimental 19960712//",
341         "-//w3c//dtd html experimental 970421//",
342         "-//w3c//dtd w3 html//",
343         "-//w3o//dtd w3 html 3.0//",
344         "-//webtechs//dtd mozilla html 2.0//",
345         "-//webtechs//dtd mozilla html//",
346 ]
347
348 // These are the character references that don't need a terminating semicolon
349 // min length: 2, max: 6, none are a prefix of any other.
350 legacy_char_refs = {
351         Aacute: 'Á', aacute: 'á', Acirc: 'Â', acirc: 'â', acute: '´', AElig: 'Æ',
352         aelig: 'æ', Agrave: 'À', agrave: 'à', AMP: '&', amp: '&', Aring: 'Å',
353         aring: 'å', Atilde: 'Ã', atilde: 'ã', Auml: 'Ä', auml: 'ä', brvbar: '¦',
354         Ccedil: 'Ç', ccedil: 'ç', cedil: '¸', cent: '¢', COPY: '©', copy: '©',
355         curren: '¤', deg: '°', divide: '÷', Eacute: 'É', eacute: 'é', Ecirc: 'Ê',
356         ecirc: 'ê', Egrave: 'È', egrave: 'è', ETH: 'Ð', eth: 'ð', Euml: 'Ë',
357         euml: 'ë', frac12: '½', frac14: '¼', frac34: '¾', GT: '>', gt: '>',
358         Iacute: 'Í', iacute: 'í', Icirc: 'Î', icirc: 'î', iexcl: '¡', Igrave: 'Ì',
359         igrave: 'ì', iquest: '¿', Iuml: 'Ï', iuml: 'ï', laquo: '«', LT: '<',
360         lt: '<', macr: '¯', micro: 'µ', middot: '·', nbsp: "\u00a0", not: '¬',
361         Ntilde: 'Ñ', ntilde: 'ñ', Oacute: 'Ó', oacute: 'ó', Ocirc: 'Ô', ocirc: 'ô',
362         Ograve: 'Ò', ograve: 'ò', ordf: 'ª', ordm: 'º', Oslash: 'Ø', oslash: 'ø',
363         Otilde: 'Õ', otilde: 'õ', Ouml: 'Ö', ouml: 'ö', para: '¶', plusmn: '±',
364         pound: '£', QUOT: '"', quot: '"', raquo: '»', REG: '®', reg: '®', sect: '§',
365         shy: '­', sup1: '¹', sup2: '²', sup3: '³', szlig: 'ß', THORN: 'Þ', thorn: 'þ',
366         times: '×', Uacute: 'Ú', uacute: 'ú', Ucirc: 'Û', ucirc: 'û', Ugrave: 'Ù',
367         ugrave: 'ù', uml: '¨', Uuml: 'Ü', uuml: 'ü', Yacute: 'Ý', yacute: 'ý',
368         yen: '¥', yuml: 'ÿ'
369 }
370
371 //void_elements = ['area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input', 'keygen', 'link', 'meta', 'param', 'source', 'track', 'wbr']
372 //raw_text_elements = ['script', 'style']
373 //escapable_raw_text_elements = ['textarea', 'title']
374 // http://www.w3.org/TR/SVG/ 1.1 (Second Edition)
375 svg_elements = [
376         'a', 'altGlyph', 'altGlyphDef', 'altGlyphItem', 'animate', 'animateColor',
377         'animateMotion', 'animateTransform', 'circle', 'clipPath', 'color-profile',
378         'cursor', 'defs', 'desc', 'ellipse', 'feBlend', 'feColorMatrix',
379         'feComponentTransfer', 'feComposite', 'feConvolveMatrix',
380         'feDiffuseLighting', 'feDisplacementMap', 'feDistantLight', 'feFlood',
381         'feFuncA', 'feFuncB', 'feFuncG', 'feFuncR', 'feGaussianBlur', 'feImage',
382         'feMerge', 'feMergeNode', 'feMorphology', 'feOffset', 'fePointLight',
383         'feSpecularLighting', 'feSpotLight', 'feTile', 'feTurbulence', 'filter',
384         'font', 'font-face', 'font-face-format', 'font-face-name', 'font-face-src',
385         'font-face-uri', 'foreignObject', 'g', 'glyph', 'glyphRef', 'hkern',
386         'image', 'line', 'linearGradient', 'marker', 'mask', 'metadata',
387         'missing-glyph', 'mpath', 'path', 'pattern', 'polygon', 'polyline',
388         'radialGradient', 'rect', 'script', 'set', 'stop', 'style', 'svg',
389         'switch', 'symbol', 'text', 'textPath', 'title', 'tref', 'tspan', 'use',
390         'view', 'vkern'
391 ]
392
393 // http://www.w3.org/TR/MathML/ Version 3.0 2nd Edition
394 mathml_elements = [
395         'abs', 'and', 'annotation', 'annotation-xml', 'apply', 'approx', 'arccos',
396         'arccosh', 'arccot', 'arccoth', 'arccsc', 'arccsch', 'arcsec', 'arcsech',
397         'arcsin', 'arcsinh', 'arctan', 'arctanh', 'arg', 'bind', 'bvar', 'card',
398         'cartesianproduct', 'cbytes', 'ceiling', 'cerror', 'ci', 'cn', 'codomain',
399         'complexes', 'compose', 'condition', 'conjugate', 'cos', 'cosh', 'cot',
400         'coth', 'cs', 'csc', 'csch', 'csymbol', 'curl', 'declare', 'degree',
401         'determinant', 'diff', 'divergence', 'divide', 'domain',
402         'domainofapplication', 'emptyset', 'eq', 'equivalent', 'eulergamma',
403         'exists', 'exp', 'exponentiale', 'factorial', 'factorof', 'false', 'floor',
404         'fn', 'forall', 'gcd', 'geq', 'grad', 'gt', 'ident', 'image', 'imaginary',
405         'imaginaryi', 'implies', 'in', 'infinity', 'int', 'integers', 'intersect',
406         'interval', 'inverse', 'lambda', 'laplacian', 'lcm', 'leq', 'limit',
407         'list', 'ln', 'log', 'logbase', 'lowlimit', 'lt', 'maction', 'maligngroup',
408         'malignmark', 'math', 'matrix', 'matrixrow', 'max', 'mean', 'median',
409         'menclose', 'merror', 'mfenced', 'mfrac', 'mglyph', 'mi', 'mi', 'min',
410         'minus', 'mlabeledtr', 'mlongdiv', 'mmultiscripts', 'mn', 'mo', 'mode',
411         'moment', 'momentabout', 'mover', 'mpadded', 'mphantom', 'mprescripts',
412         'mroot', 'mrow', 'ms', 'mscarries', 'mscarry', 'msgroup', 'msline',
413         'mspace', 'msqrt', 'msrow', 'mstack', 'mstyle', 'msub', 'msubsup', 'msup',
414         'mtable', 'mtd', 'mtext', 'mtr', 'munder', 'munderover', 'naturalnumbers',
415         'neq', 'none', 'not', 'notanumber', 'notin', 'notprsubset', 'notsubset',
416         'or', 'otherwise', 'outerproduct', 'partialdiff', 'pi', 'piece',
417         'piecewise', 'plus', 'power', 'primes', 'product', 'prsubset', 'quotient',
418         'rationals', 'real', 'reals', 'reln', 'rem', 'root', 'scalarproduct',
419         'sdev', 'sec', 'sech', 'selector', 'semantics', 'sep', 'set', 'setdiff',
420         'share', 'sin', 'sinh', 'span', 'subset', 'sum', 'tan', 'tanh', 'tendsto',
421         'times', 'transpose', 'true', 'union', 'uplimit', 'variance', 'vector',
422         'vectorproduct', 'xor'
423 ]
424 // foreign_elements = [svg_elements..., mathml_elements...]
425 //normal_elements = All other allowed HTML elements are normal elements.
426
427 special_elements = {
428         // HTML:
429         address: NS_HTML, applet: NS_HTML, area: NS_HTML, article: NS_HTML,
430         aside: NS_HTML, base: NS_HTML, basefont: NS_HTML, bgsound: NS_HTML,
431         blockquote: NS_HTML, body: NS_HTML, br: NS_HTML, button: NS_HTML,
432         caption: NS_HTML, center: NS_HTML, col: NS_HTML, colgroup: NS_HTML, dd: NS_HTML,
433         details: NS_HTML, dir: NS_HTML, div: NS_HTML, dl: NS_HTML, dt: NS_HTML,
434         embed: NS_HTML, fieldset: NS_HTML, figcaption: NS_HTML, figure: NS_HTML,
435         footer: NS_HTML, form: NS_HTML, frame: NS_HTML, frameset: NS_HTML, h1: NS_HTML,
436         h2: NS_HTML, h3: NS_HTML, h4: NS_HTML, h5: NS_HTML, h6: NS_HTML, head: NS_HTML,
437         header: NS_HTML, hgroup: NS_HTML, hr: NS_HTML, html: NS_HTML, iframe: NS_HTML,
438         img: NS_HTML, input: NS_HTML, isindex: NS_HTML, li: NS_HTML, link: NS_HTML,
439         listing: NS_HTML, main: NS_HTML, marquee: NS_HTML,
440
441         menu: NS_HTML,menuitem: NS_HTML, // WHATWG adds these
442
443         meta: NS_HTML, nav: NS_HTML, noembed: NS_HTML, noframes: NS_HTML,
444         noscript: NS_HTML, object: NS_HTML, ol: NS_HTML, p: NS_HTML, param: NS_HTML,
445         plaintext: NS_HTML, pre: NS_HTML, script: NS_HTML, section: NS_HTML,
446         select: NS_HTML, source: NS_HTML, style: NS_HTML, summary: NS_HTML,
447         table: NS_HTML, tbody: NS_HTML, td: NS_HTML, template: NS_HTML,
448         textarea: NS_HTML, tfoot: NS_HTML, th: NS_HTML, thead: NS_HTML, title: NS_HTML,
449         tr: NS_HTML, track: NS_HTML, ul: NS_HTML, wbr: NS_HTML, xmp: NS_HTML,
450
451         // MathML: 
452         mi: NS_MATHML, mo: NS_MATHML, mn: NS_MATHML, ms: NS_MATHML, mtext: NS_MATHML,
453         'annotation-xml': NS_MATHML,
454
455         // SVG: 
456         foreignObject: NS_SVG, desc: NS_SVG, title: NS_SVG
457 }
458
459 formatting_elements = {
460         a: true, b: true, big: true, code: true, em: true, font: true, i: true,
461         nobr: true, s: true, small: true, strike: true, strong: true, tt: true,
462         u: true
463 }
464
465 mathml_text_integration = {
466         mi: NS_MATHML, mo: NS_MATHML, mn: NS_MATHML, ms: NS_MATHML, mtext: NS_MATHML
467 }
468 is_mathml_text_integration_point = function (el) {
469         return mathml_text_integration[el.name] === el.namespace
470 }
471 is_html_integration = function (el) { // DON'T PASS A TOKEN
472         if (el.namespace === NS_MATHML) {
473                 if (el.name === 'annotation-xml') {
474                         if (el.attrs.encoding != null) {
475                                 if (el.attrs.encoding.toLowerCase() === 'text/html') {
476                                         return true
477                                 }
478                                 if (el.attrs.encoding.toLowerCase() === 'application/xhtml+xml') {
479                                         return true
480                                 }
481                         }
482                 }
483                 return false
484         }
485         if (el.namespace === NS_SVG) {
486                 if (el.name === 'foreignObject' || el.name === 'desc' || el.name === 'title') {
487                         return true
488                 }
489         }
490         return false
491 }
492
493 h_tags = {
494         h1: NS_HTML, h2: NS_HTML, h3: NS_HTML, h4: NS_HTML, h5: NS_HTML, h6: NS_HTML
495 }
496
497 foster_parenting_targets = {
498         table: NS_HTML,
499         tbody: NS_HTML,
500         tfoot: NS_HTML,
501         thead: NS_HTML,
502         tr: NS_HTML
503 }
504
505 end_tag_implied = {
506         dd: NS_HTML,
507         dt: NS_HTML,
508         li: NS_HTML,
509         option: NS_HTML,
510         optgroup: NS_HTML,
511         p: NS_HTML,
512         rb: NS_HTML,
513         rp: NS_HTML,
514         rt: NS_HTML,
515         rtc: NS_HTML
516 }
517
518 el_is_special = function (e) {
519         return special_elements[e.name] === e.namespace
520 }
521
522 adp_els = { address: NS_HTML, div: NS_HTML, p: NS_HTML }
523 el_is_special_not_adp = function (el) {
524         return special_elements[el.name] === el.namespace && adp_els[el.name] !== el.namespace
525 }
526
527 svg_name_fixes = {
528         altglyph: 'altGlyph',
529         altglyphdef: 'altGlyphDef',
530         altglyphitem: 'altGlyphItem',
531         animatecolor: 'animateColor',
532         animatemotion: 'animateMotion',
533         animatetransform: 'animateTransform',
534         clippath: 'clipPath',
535         feblend: 'feBlend',
536         fecolormatrix: 'feColorMatrix',
537         fecomponenttransfer: 'feComponentTransfer',
538         fecomposite: 'feComposite',
539         feconvolvematrix: 'feConvolveMatrix',
540         fediffuselighting: 'feDiffuseLighting',
541         fedisplacementmap: 'feDisplacementMap',
542         fedistantlight: 'feDistantLight',
543         fedropshadow: 'feDropShadow',
544         feflood: 'feFlood',
545         fefunca: 'feFuncA',
546         fefuncb: 'feFuncB',
547         fefuncg: 'feFuncG',
548         fefuncr: 'feFuncR',
549         fegaussianblur: 'feGaussianBlur',
550         feimage: 'feImage',
551         femerge: 'feMerge',
552         femergenode: 'feMergeNode',
553         femorphology: 'feMorphology',
554         feoffset: 'feOffset',
555         fepointlight: 'fePointLight',
556         fespecularlighting: 'feSpecularLighting',
557         fespotlight: 'feSpotLight',
558         fetile: 'feTile',
559         feturbulence: 'feTurbulence',
560         foreignobject: 'foreignObject',
561         glyphref: 'glyphRef',
562         lineargradient: 'linearGradient',
563         radialgradient: 'radialGradient',
564         textpath: 'textPath'
565 }
566 svg_attribute_fixes = {
567         attributename: 'attributeName',
568         attributetype: 'attributeType',
569         basefrequency: 'baseFrequency',
570         baseprofile: 'baseProfile',
571         calcmode: 'calcMode',
572         clippathunits: 'clipPathUnits',
573         contentscripttype: 'contentScriptType',
574         contentstyletype: 'contentStyleType',
575         diffuseconstant: 'diffuseConstant',
576         edgemode: 'edgeMode',
577         externalresourcesrequired: 'externalResourcesRequired',
578         // WHATWG removes this: filterres: 'filterRes',
579         filterunits: 'filterUnits',
580         glyphref: 'glyphRef',
581         gradienttransform: 'gradientTransform',
582         gradientunits: 'gradientUnits',
583         kernelmatrix: 'kernelMatrix',
584         kernelunitlength: 'kernelUnitLength',
585         keypoints: 'keyPoints',
586         keysplines: 'keySplines',
587         keytimes: 'keyTimes',
588         lengthadjust: 'lengthAdjust',
589         limitingconeangle: 'limitingConeAngle',
590         markerheight: 'markerHeight',
591         markerunits: 'markerUnits',
592         markerwidth: 'markerWidth',
593         maskcontentunits: 'maskContentUnits',
594         maskunits: 'maskUnits',
595         numoctaves: 'numOctaves',
596         pathlength: 'pathLength',
597         patterncontentunits: 'patternContentUnits',
598         patterntransform: 'patternTransform',
599         patternunits: 'patternUnits',
600         pointsatx: 'pointsAtX',
601         pointsaty: 'pointsAtY',
602         pointsatz: 'pointsAtZ',
603         preservealpha: 'preserveAlpha',
604         preserveaspectratio: 'preserveAspectRatio',
605         primitiveunits: 'primitiveUnits',
606         refx: 'refX',
607         refy: 'refY',
608         repeatcount: 'repeatCount',
609         repeatdur: 'repeatDur',
610         requiredextensions: 'requiredExtensions',
611         requiredfeatures: 'requiredFeatures',
612         specularconstant: 'specularConstant',
613         specularexponent: 'specularExponent',
614         spreadmethod: 'spreadMethod',
615         startoffset: 'startOffset',
616         stddeviation: 'stdDeviation',
617         stitchtiles: 'stitchTiles',
618         surfacescale: 'surfaceScale',
619         systemlanguage: 'systemLanguage',
620         tablevalues: 'tableValues',
621         targetx: 'targetX',
622         targety: 'targetY',
623         textlength: 'textLength',
624         viewbox: 'viewBox',
625         viewtarget: 'viewTarget',
626         xchannelselector: 'xChannelSelector',
627         ychannelselector: 'yChannelSelector',
628         zoomandpan: 'zoomAndPan'
629 }
630 foreign_attr_fixes = {
631         'xlink:actuate': 'xlink actuate',
632         'xlink:arcrole': 'xlink arcrole',
633         'xlink:href': 'xlink href',
634         'xlink:role': 'xlink role',
635         'xlink:show': 'xlink show',
636         'xlink:title': 'xlink title',
637         'xlink:type': 'xlink type',
638         'xml:base': 'xml base',
639         'xml:lang': 'xml lang',
640         'xml:space': 'xml space',
641         'xmlns': 'xmlns',
642         'xmlns:xlink': 'xmlns xlink'
643 }
644 adjust_mathml_attributes = function (t) {
645         var i, a
646         for (i = 0; i < t.attrs_a.length; ++i) {
647                 a = t.attrs_a[i]
648                 if (a[0] === 'definitionurl') {
649                         a[0] = 'definitionURL'
650                 }
651         }
652 }
653 adjust_svg_attributes = function (t) {
654         var i, a
655         for (i = 0; i < t.attrs_a.length; ++i) {
656                 a = t.attrs_a[i]
657                 if (svg_attribute_fixes[a[0]] != null) {
658                         a[0] = svg_attribute_fixes[a[0]]
659                 }
660         }
661 }
662 adjust_foreign_attributes = function (t) {
663         // fixfull
664         var i, a
665         for (i = 0; i < t.attrs_a.length; ++i) {
666                 a = t.attrs_a[i]
667                 if (foreign_attr_fixes[a[0]] != null) {
668                         a[0] = foreign_attr_fixes[a[0]]
669                 }
670         }
671 }
672
673 // decode_named_char_ref()
674 //
675 // The list of named character references is _huge_ so if we're running in a
676 // browser, we get the browser to decode them, rather than increasing the code
677 // size to include the table.
678 if (context === 'module') {
679         _decode_named_char_ref = require('./parser_no_browser_helper.js')
680 } else {
681         decode_named_char_ref_el = document.createElement('textarea')
682         _decode_named_char_ref = function (txt) {
683                 var decoded
684                 txt = "&" + txt + ";"
685                 decode_named_char_ref_el.innerHTML = txt
686                 decoded = decode_named_char_ref_el.value
687                 if (decoded === txt) {
688                         return null
689                 }
690                 return decoded
691         }
692 }
693 // Pass the name of a named entity _that has a terminating semicolon_
694 // Entities without terminating semicolons should use legacy_char_refs[]
695 // Do not include the "&" or ";" in your argument, eg pass "alpha"
696 decode_named_char_ref_cache = {}
697 decode_named_char_ref = function (txt) {
698         var decoded
699         decoded = decode_named_char_ref_cache[txt]
700         if (decoded != null) {
701                 return decoded
702         }
703         decoded = _decode_named_char_ref(txt)
704         return decode_named_char_ref_cache[txt] = decoded
705 }
706
707 parse_html = function (args_html, args) {
708         var adjusted_current_node, adjusted_insertion_location, adoption_agency, afe, afe_push, afe_push_marker, button_scopers, clear_afe_to_marker, clear_stack_to_table_body_context, clear_stack_to_table_context, clear_stack_to_table_row_context, clear_to_table_body_stopers, clear_to_table_row_stopers, clear_to_table_stopers, close_p_element, close_p_if_in_button_scope, close_the_cell, context_element, cur, doc, eat_next_token_if_newline, el_is_in_scope, flag_foster_parenting, flag_fragment_parsing, flag_frameset_ok, flag_parsing, flag_scripting, form_element_pointer, fragment_root, generate_implied_end_tags, has_color_face_or_size, head_element_pointer, in_body_any_other_end_tag, in_foreign_content, in_foreign_content_end_script, in_foreign_content_other_start, ins_mode, ins_mode_after_after_body, ins_mode_after_after_frameset, ins_mode_after_body, ins_mode_after_frameset, ins_mode_after_head, ins_mode_after_head_else, ins_mode_before_head, ins_mode_before_html, ins_mode_in_body, ins_mode_in_caption, ins_mode_in_cell, ins_mode_in_column_group, ins_mode_in_frameset, ins_mode_in_head, ins_mode_in_head_else, ins_mode_in_head_noscript, ins_mode_in_head_noscript_else, ins_mode_in_row, ins_mode_in_select, ins_mode_in_select_in_table, ins_mode_in_table, ins_mode_in_table_body, ins_mode_in_table_else, ins_mode_in_table_text, ins_mode_in_template, ins_mode_initial, ins_mode_text, insert_character, insert_comment, insert_foreign_element, insert_html_element, is_appropriate_end_tag, is_in_button_scope, is_in_li_scope, is_in_scope, is_in_scope_x, is_in_scope_x_y, is_in_select_scope, is_in_table_scope, is_quirks_limited_doctype, is_quirks_yes_doctype, li_scopers, open_els, original_ins_mode, parse_character_reference, parse_error, parse_generic_raw_text, parse_generic_rcdata_text, parse_init, parse_main_loop, pending_table_character_tokens, process_token, reconstruct_afe, reset_ins_mode, standard_scopers, stop_parsing, table_scopers, template_ins_modes, template_tag_is_open, temporary_buffer, tok_cur_tag, tok_state, tok_state_after_attribute_name, tok_state_after_attribute_value_quoted, tok_state_after_doctype_name, tok_state_after_doctype_public_identifier, tok_state_after_doctype_public_keyword, tok_state_after_doctype_system_identifier, tok_state_after_doctype_system_keyword, tok_state_attribute_name, tok_state_attribute_value_double_quoted, tok_state_attribute_value_single_quoted, tok_state_attribute_value_unquoted, tok_state_before_attribute_name, tok_state_before_attribute_value, tok_state_before_doctype_name, tok_state_before_doctype_public_identifier, tok_state_before_doctype_system_identifier, tok_state_between_doctype_public_and_system_identifiers, tok_state_bogus_comment, tok_state_bogus_doctype, tok_state_cdata_section, tok_state_comment, tok_state_comment_end, tok_state_comment_end_bang, tok_state_comment_end_dash, tok_state_comment_start, tok_state_comment_start_dash, tok_state_data, tok_state_doctype, tok_state_doctype_name, tok_state_doctype_public_identifier_double_quoted, tok_state_doctype_public_identifier_single_quoted, tok_state_doctype_system_identifier_double_quoted, tok_state_doctype_system_identifier_single_quoted, tok_state_end_tag_open, tok_state_markup_declaration_open, tok_state_plaintext, tok_state_rawtext, tok_state_rawtext_end_tag_name, tok_state_rawtext_end_tag_open, tok_state_rawtext_less_than_sign, tok_state_rcdata, tok_state_rcdata_end_tag_name, tok_state_rcdata_end_tag_open, tok_state_rcdata_less_than_sign, tok_state_script_data, tok_state_script_data_double_escape_end, tok_state_script_data_double_escape_start, tok_state_script_data_double_escaped, tok_state_script_data_double_escaped_dash, tok_state_script_data_double_escaped_dash_dash, tok_state_script_data_double_escaped_less_than_sign, tok_state_script_data_end_tag_name, tok_state_script_data_end_tag_open, tok_state_script_data_escape_start, tok_state_script_data_escape_start_dash, tok_state_script_data_escaped, tok_state_script_data_escaped_dash, tok_state_script_data_escaped_dash_dash, tok_state_script_data_escaped_end_tag_name, tok_state_script_data_escaped_end_tag_open, tok_state_script_data_escaped_less_than_sign, tok_state_script_data_less_than_sign, tok_state_self_closing_start_tag, tok_state_tag_name, tok_state_tag_open, token_to_element, txt
709         if (args == null) {
710                 args = {}
711         }
712         txt = null
713         cur = null // index of next char in txt to be parsed
714         // declare doc and tokenizer variables so they're in scope below
715         doc = null
716         open_els = null // stack of open elements
717         afe = null // active formatting elements
718         template_ins_modes = null
719         ins_mode = null
720         original_ins_mode = null
721         tok_state = null
722         tok_cur_tag = null // partially parsed tag
723         flag_scripting = null
724         flag_frameset_ok = null
725         flag_parsing = null
726         flag_foster_parenting = null
727         form_element_pointer = null
728         temporary_buffer = null
729         pending_table_character_tokens = null
730         head_element_pointer = null
731         flag_fragment_parsing = null
732         context_element = null
733
734         stop_parsing = function () {
735                 flag_parsing = false
736         }
737
738         parse_error = function () {
739                 if (args.error_cb != null) {
740                         args.error_cb(cur)
741                 }
742         }
743
744         // http://www.w3.org/TR/html5/syntax.html#push-onto-the-list-of-active-formatting-elements
745         // "Noah's Ark clause" but with three
746         afe_push = function (new_el) {
747                 var attrs_match, el, i, j, k, matches, v
748                 matches = 0
749                 for (i = 0; i < afe.length; ++i) {
750                         el = afe[i]
751                         if (el.type === TYPE_AFE_MARKER) {
752                                 break
753                         }
754                         if (el.name === new_el.name && el.namespace === new_el.namespace) {
755                                 attrs_match = true
756                                 for (k in el.attrs) {
757                                         v = el.attrs[k]
758                                         if (new_el.attrs[k] !== v) {
759                                                 attrs_match = false
760                                                 break
761                                         }
762                                 }
763                                 if (attrs_match) {
764                                         for (k in new_el.attrs) {
765                                                 v = new_el.attrs[k]
766                                                 if (el.attrs[k] !== v) {
767                                                         attrs_match = false
768                                                         break
769                                                 }
770                                         }
771                                 }
772                                 if (attrs_match) {
773                                         matches += 1
774                                         if (matches === 3) {
775                                                 afe.splice(i, 1)
776                                                 break
777                                         }
778                                 }
779                         }
780                 }
781                 afe.unshift(new_el)
782         }
783
784         afe_push_marker = function () {
785                 afe.unshift(new_afe_marker())
786         }
787
788         // the functions below impliment the Tree Contstruction algorithm
789         // http://www.w3.org/TR/html5/syntax.html#tree-construction
790
791         // But first... the helpers
792         template_tag_is_open = function () {
793                 var i, el
794                 for (i = 0; i < open_els.length; ++i) {
795                         el = open_els[i]
796                         if (el.name === 'template' && el.namespace === NS_HTML) {
797                                 return true
798                         }
799                 }
800                 return false
801         }
802         is_in_scope_x = function (tag_name, scope, namespace) {
803                 var i, el
804                 for (i = 0; i < open_els.length; ++i) {
805                         el = open_els[i]
806                         if (el.name === tag_name && (namespace === null || namespace === el.namespace)) {
807                                 return true
808                         }
809                         if (scope[el.name] === el.namespace) {
810                                 return false
811                         }
812                 }
813                 return false
814         }
815         is_in_scope_x_y = function (tag_name, scope, scope2, namespace) {
816                 var i, el
817                 for (i = 0; i < open_els.length; ++i) {
818                         el = open_els[i]
819                         if (el.name === tag_name && (namespace === null || namespace === el.namespace)) {
820                                 return true
821                         }
822                         if (scope[el.name] === el.namespace) {
823                                 return false
824                         }
825                         if (scope2[el.name] === el.namespace) {
826                                 return false
827                         }
828                 }
829                 return false
830         }
831         standard_scopers = {
832                 applet: NS_HTML, caption: NS_HTML, html: NS_HTML, table: NS_HTML,
833                 td: NS_HTML, th: NS_HTML, marquee: NS_HTML, object: NS_HTML,
834                 template: NS_HTML,
835
836                 mi: NS_MATHML, mo: NS_MATHML, mn: NS_MATHML, ms: NS_MATHML,
837                 mtext: NS_MATHML, 'annotation-xml': NS_MATHML,
838
839                 foreignObject: NS_SVG, desc: NS_SVG, title: NS_SVG
840         }
841         button_scopers = { button: NS_HTML }
842         li_scopers = { ol: NS_HTML, ul: NS_HTML }
843         table_scopers = { html: NS_HTML, table: NS_HTML, template: NS_HTML }
844         is_in_scope = function (tag_name, namespace) {
845                 if (namespace == null) {
846                         namespace = null
847                 }
848                 return is_in_scope_x(tag_name, standard_scopers, namespace)
849         }
850         is_in_button_scope = function (tag_name, namespace) {
851                 if (namespace == null) {
852                         namespace = null
853                 }
854                 return is_in_scope_x_y(tag_name, standard_scopers, button_scopers, namespace)
855         }
856         is_in_table_scope = function (tag_name, namespace) {
857                 if (namespace == null) {
858                         namespace = null
859                 }
860                 return is_in_scope_x(tag_name, table_scopers, namespace)
861         }
862         // aka is_in_list_item_scope
863         is_in_li_scope = function (tag_name, namespace) {
864                 if (namespace == null) {
865                         namespace = null
866                 }
867                 return is_in_scope_x_y(tag_name, standard_scopers, li_scopers, namespace)
868         }
869         is_in_select_scope = function (tag_name, namespace) {
870                 var i, t
871                 if (namespace == null) {
872                         namespace = null
873                 }
874                 for (i = 0; i < open_els.length; ++i) {
875                         t = open_els[i]
876                         if (t.name === tag_name && (namespace === null || namespace === t.namespace)) {
877                                 return true
878                         }
879                         if (t.namespace !== NS_HTML && t.name !== 'optgroup' && t.name !== 'option') {
880                                 return false
881                         }
882                 }
883                 return false
884         }
885         // this checks for a particular element, not by name
886         // this requires a namespace match
887         el_is_in_scope = function (needle) {
888                 var i
889                 for (i = 0; i < open_els.length; ++i) {
890                         el = open_els[i]
891                         if (el === needle) {
892                                 return true
893                         }
894                         if (standard_scopers[el.name] === el.namespace) {
895                                 return false
896                         }
897                 }
898                 return false
899         }
900
901         clear_to_table_stopers = {
902                 'table': true,
903                 'template': true,
904                 'html': true
905         }
906         clear_stack_to_table_context = function () {
907                 while (true) {
908                         if (clear_to_table_stopers[open_els[0].name] != null) {
909                                 break
910                         }
911                         open_els.shift()
912                 }
913         }
914         clear_to_table_body_stopers = {
915                 tbody: NS_HTML,
916                 tfoot: NS_HTML,
917                 thead: NS_HTML,
918                 template: NS_HTML,
919                 html: NS_HTML
920         }
921         clear_stack_to_table_body_context = function () {
922                 while (true) {
923                         if (clear_to_table_body_stopers[open_els[0].name] === open_els[0].namespace) {
924                                 break
925                         }
926                         open_els.shift()
927                 }
928         }
929         clear_to_table_row_stopers = {
930                 'tr': true,
931                 'template': true,
932                 'html': true
933         }
934         clear_stack_to_table_row_context = function () {
935                 while (true) {
936                         if (clear_to_table_row_stopers[open_els[0].name] != null) {
937                                 break
938                         }
939                         open_els.shift()
940                 }
941         }
942         clear_afe_to_marker = function () {
943                 var el
944                 while (true) {
945                         if (!(afe.length > 0)) { // this happens in fragment case, ?spec error
946                                 return
947                         }
948                         el = afe.shift()
949                         if (el.type === TYPE_AFE_MARKER) {
950                                 return
951                         }
952                 }
953         }
954
955         // 8.2.3.1 ...
956         // http://www.w3.org/TR/html5/syntax.html#reset-the-insertion-mode-appropriately
957         reset_ins_mode = function () {
958                 var ancestor, ancestor_i, last, node, node_i
959                 // 1. Let last be false.
960                 last = false
961                 // 2. Let node be the last node in the stack of open elements.
962                 node_i = 0
963                 node = open_els[node_i]
964                 // 3. Loop: If node is the first node in the stack of open elements,
965                 // then set last to true, and, if the parser was originally created as
966                 // part of the HTML fragment parsing algorithm (fragment case) set node
967                 // to the context element.
968                 while (true) {
969                         if (node_i === open_els.length - 1) {
970                                 last = true
971                                 if (flag_fragment_parsing) {
972                                         node = context_element
973                                 }
974                         }
975                         // 4. If node is a select element, run these substeps:
976                         if (node.name === 'select' && node.namespace === NS_HTML) {
977                                 // 1. If last is true, jump to the step below labeled done.
978                                 if (!last) {
979                                         // 2. Let ancestor be node.
980                                         ancestor_i = node_i
981                                         ancestor = node
982                                         // 3. Loop: If ancestor is the first node in the stack of
983                                         // open elements, jump to the step below labeled done.
984                                         while (true) {
985                                                 if (ancestor_i === open_els.length - 1) {
986                                                         break
987                                                 }
988                                                 // 4. Let ancestor be the node before ancestor in the stack
989                                                 // of open elements.
990                                                 ancestor_i += 1
991                                                 ancestor = open_els[ancestor_i]
992                                                 // 5. If ancestor is a template node, jump to the step below
993                                                 // labeled done.
994                                                 if (ancestor.name === 'template' && ancestor.namespace === NS_HTML) {
995                                                         break
996                                                 }
997                                                 // 6. If ancestor is a table node, switch the insertion mode
998                                                 // to "in select in table" and abort these steps.
999                                                 if (ancestor.name === 'table' && ancestor.namespace === NS_HTML) {
1000                                                         ins_mode = ins_mode_in_select_in_table
1001                                                         return
1002                                                 }
1003                                                 // 7. Jump back to the step labeled loop.
1004                                         }
1005                                 }
1006                                 // 8. Done: Switch the insertion mode to "in select" and abort
1007                                 // these steps.
1008                                 ins_mode = ins_mode_in_select
1009                                 return
1010                         }
1011                         // 5. If node is a td or th element and last is false, then switch
1012                         // the insertion mode to "in cell" and abort these steps.
1013                         if ((node.name === 'td' || node.name === 'th') && node.namespace === NS_HTML && last === false) {
1014                                 ins_mode = ins_mode_in_cell
1015                                 return
1016                         }
1017                         // 6. If node is a tr element, then switch the insertion mode to "in
1018                         // row" and abort these steps.
1019                         if (node.name === 'tr' && node.namespace === NS_HTML) {
1020                                 ins_mode = ins_mode_in_row
1021                                 return
1022                         }
1023                         // 7. If node is a tbody, thead, or tfoot element, then switch the
1024                         // insertion mode to "in table body" and abort these steps.
1025                         if ((node.name === 'tbody' || node.name === 'thead' || node.name === 'tfoot') && node.namespace === NS_HTML) {
1026                                 ins_mode = ins_mode_in_table_body
1027                                 return
1028                         }
1029                         // 8. If node is a caption element, then switch the insertion mode
1030                         // to "in caption" and abort these steps.
1031                         if (node.name === 'caption' && node.namespace === NS_HTML) {
1032                                 ins_mode = ins_mode_in_caption
1033                                 return
1034                         }
1035                         // 9. If node is a colgroup element, then switch the insertion mode
1036                         // to "in column group" and abort these steps.
1037                         if (node.name === 'colgroup' && node.namespace === NS_HTML) {
1038                                 ins_mode = ins_mode_in_column_group
1039                                 return
1040                         }
1041                         // 10. If node is a table element, then switch the insertion mode to
1042                         // "in table" and abort these steps.
1043                         if (node.name === 'table' && node.namespace === NS_HTML) {
1044                                 ins_mode = ins_mode_in_table
1045                                 return
1046                         }
1047                         // 11. If node is a template element, then switch the insertion mode
1048                         // to the current template insertion mode and abort these steps.
1049                         if (node.name === 'template' && node.namespace === NS_HTML) {
1050                                 ins_mode = template_ins_modes[0]
1051                                 return
1052                         }
1053                         // 12. If node is a head element and last is true, then switch the
1054                         // insertion mode to "in body" ("in body"! not "in head"!) and abort
1055                         // these steps. (fragment case)
1056                         if (node.name === 'head' && node.namespace === NS_HTML && last) {
1057                                 ins_mode = ins_mode_in_body
1058                                 return
1059                         }
1060                         // 13. If node is a head element and last is false, then switch the
1061                         // insertion mode to "in head" and abort these steps.
1062                         if (node.name === 'head' && node.namespace === NS_HTML && last === false) {
1063                                 ins_mode = ins_mode_in_head
1064                                 return
1065                         }
1066                         // 14. If node is a body element, then switch the insertion mode to
1067                         // "in body" and abort these steps.
1068                         if (node.name === 'body' && node.namespace === NS_HTML) {
1069                                 ins_mode = ins_mode_in_body
1070                                 return
1071                         }
1072                         // 15. If node is a frameset element, then switch the insertion mode
1073                         // to "in frameset" and abort these steps. (fragment case)
1074                         if (node.name === 'frameset' && node.namespace === NS_HTML) {
1075                                 ins_mode = ins_mode_in_frameset
1076                                 return
1077                         }
1078                         // 16. If node is an html element, run these substeps:
1079                         if (node.name === 'html' && node.namespace === NS_HTML) {
1080                                 // 1. If the head element pointer is null, switch the insertion
1081                                 // mode to "before head" and abort these steps. (fragment case)
1082                                 if (head_element_pointer === null) {
1083                                         ins_mode = ins_mode_before_head
1084                                 } else {
1085                                         // 2. Otherwise, the head element pointer is not null,
1086                                         // switch the insertion mode to "after head" and abort these
1087                                         // steps.
1088                                         ins_mode = ins_mode_after_head
1089                                 }
1090                                 return
1091                         }
1092                         // 17. If last is true, then switch the insertion mode to "in body"
1093                         // and abort these steps. (fragment case)
1094                         if (last) {
1095                                 ins_mode = ins_mode_in_body
1096                                 return
1097                         }
1098                         // 18. Let node now be the node before node in the stack of open
1099                         // elements.
1100                         node_i += 1
1101                         node = open_els[node_i]
1102                         // 19. Return to the step labeled loop.
1103                 }
1104         }
1105
1106         // 8.2.3.2
1107
1108         // http://www.w3.org/TR/html5/syntax.html#adjusted-current-node
1109         adjusted_current_node = function () {
1110                 if (open_els.length === 1 && flag_fragment_parsing) {
1111                         return context_element
1112                 }
1113                 return open_els[0]
1114         }
1115
1116         // http://www.w3.org/TR/html5/syntax.html#reconstruct-the-active-formatting-elements
1117         // this implementation is structured (mostly) as described at the link above.
1118         // capitalized comments are the "labels" described at the link above.
1119         reconstruct_afe = function () {
1120                 var el, i
1121                 if (afe.length === 0) {
1122                         return
1123                 }
1124                 if (afe[0].type === TYPE_AFE_MARKER || open_els.indexOf(afe[0]) >= 0) {
1125                         return
1126                 }
1127                 // Rewind
1128                 i = 0
1129                 while (true) {
1130                         if (i === afe.length - 1) {
1131                                 break
1132                         }
1133                         i += 1
1134                         if (afe[i].type === TYPE_AFE_MARKER || open_els.indexOf(afe[i]) >= 0) {
1135                                 i -= 1 // Advance
1136                                 break
1137                         }
1138                 }
1139                 // Create
1140                 while (true) {
1141                         el = insert_html_element(afe[i].token)
1142                         afe[i] = el
1143                         if (i === 0) {
1144                                 break
1145                         }
1146                         i -= 1 // Advance
1147                 }
1148         }
1149
1150         // http://www.w3.org/TR/html5/syntax.html#adoption-agency-algorithm
1151         // adoption agency algorithm
1152         // overview here:
1153         //   http://www.w3.org/TR/html5/syntax.html#misnested-tags:-b-i-/b-/i
1154         //   http://www.w3.org/TR/html5/syntax.html#misnested-tags:-b-p-/b-/p
1155         //   http://www.w3.org/TR/html5/syntax.html#unclosed-formatting-elements
1156         adoption_agency = function (subject) {
1157                 var aa, ab, ac, ad, ae, af, bookmark, c, ca, dest, el, fb, fb_of_open_els, fe, fe_of_afe, fe_of_open_els, i, in_afe, in_open_els, inner, j, l, last_node, len, len1, len10, len11, len12, len13, len14, len15, len16, len17, len2, len3, len4, len5, len6, len7, len8, len9, m, n, new_node, node, node_above, node_in_afe, node_next, o, outer, q, r, s, t, u, w, y, z
1158 // this block implements tha W3C spec
1159 //              # 1. If the current node is an HTML element whose tag name is subject,
1160 //              # then run these substeps:
1161 //              #
1162 //              # 1. Let element be the current node.
1163 //              #
1164 //              # 2. Pop element off the stack of open elements.
1165 //              #
1166 //              # 3. If element is also in the list of active formatting elements,
1167 //              # remove the element from the list.
1168 //              #
1169 //              # 4. Abort the adoption agency algorithm.
1170 //              if open_els[0].name is subject and open_els[0].namespace is NS_HTML
1171 //                      el = open_els.shift()
1172 //                      # remove it from the list of active formatting elements (if found)
1173 //                      for t, i in afe
1174 //                              if t is el
1175 //                                      afe.splice i, 1
1176 //                                      break
1177 //                      return
1178 // WHATWG: https://html.spec.whatwg.org/multipage/syntax.html#adoption-agency-algorithm
1179                 // If the current node is an HTML element whose tag name is subject, and
1180                 // the current node is not in the list of active formatting elements,
1181                 // then pop the current node off the stack of open elements, and abort
1182                 // these steps.
1183                 if (open_els[0].name === subject && open_els[0].namespace === NS_HTML) {
1184                         // remove it from the list of active formatting elements (if found)
1185                         in_afe = false
1186                         for (i = 0; i < afe.length; ++i) {
1187                                 el = afe[i]
1188                                 if (el === open_els[0]) {
1189                                         in_afe = true
1190                                         break
1191                                 }
1192                         }
1193                         if (!in_afe) {
1194                                 open_els.shift()
1195                                 return
1196                         }
1197                         // fall through
1198                 }
1199 // END WHATWG
1200                 outer = 0
1201                 while (true) {
1202                         if (outer >= 8) {
1203                                 return
1204                         }
1205                         outer += 1
1206                         // 5. Let formatting element be the last element in the list of
1207                         // active formatting elements that: is between the end of the list
1208                         // and the last scope marker in the list, if any, or the start of
1209                         // the list otherwise, and  has the tag name subject.
1210                         fe = null
1211                         for (fe_of_afe = 0; fe_of_afe < afe.length; ++fe_of_afe) {
1212                                 t = afe[fe_of_afe]
1213                                 if (t.type === TYPE_AFE_MARKER) {
1214                                         break
1215                                 }
1216                                 if (t.name === subject) {
1217                                         fe = t
1218                                         break
1219                                 }
1220                         }
1221                         // If there is no such element, then abort these steps and instead
1222                         // act as described in the "any other end tag" entry above.
1223                         if (fe === null) {
1224                                 in_body_any_other_end_tag(subject)
1225                                 return
1226                         }
1227                         // 6. If formatting element is not in the stack of open elements,
1228                         // then this is a parse error; remove the element from the list, and
1229                         // abort these steps.
1230                         in_open_els = false
1231                         for (fe_of_open_els = 0; fe_of_open_els < open_els.length; ++fe_of_open_els) {
1232                                 t = open_els[fe_of_open_els]
1233                                 if (t === fe) {
1234                                         in_open_els = true
1235                                         break
1236                                 }
1237                         }
1238                         if (!in_open_els) {
1239                                 parse_error()
1240                                 // "remove it from the list" must mean afe, since it's not in open_els
1241                                 afe.splice(fe_of_afe, 1)
1242                                 return
1243                         }
1244                         // 7. If formatting element is in the stack of open elements, but
1245                         // the element is not in scope, then this is a parse error; abort
1246                         // these steps.
1247                         if (!el_is_in_scope(fe)) {
1248                                 parse_error()
1249                                 return
1250                         }
1251                         // 8. If formatting element is not the current node, this is a parse
1252                         // error. (But do not abort these steps.)
1253                         if (open_els[0] !== fe) {
1254                                 parse_error()
1255                                 // continue
1256                         }
1257                         // 9. Let furthest block be the topmost node in the stack of open
1258                         // elements that is lower in the stack than formatting element, and
1259                         // is an element in the special category. There might not be one.
1260                         fb = null
1261                         fb_of_open_els = null
1262                         for (i = 0; i < open_els.length; ++i) {
1263                                 t = open_els[i]
1264                                 if (t === fe) {
1265                                         break
1266                                 }
1267                                 if (el_is_special(t)) {
1268                                         fb = t
1269                                         fb_of_open_els = i
1270                                         // and continue, to see if there's one that's more "topmost"
1271                                 }
1272                         }
1273                         // 10. If there is no furthest block, then the UA must first pop all
1274                         // the nodes from the bottom of the stack of open elements, from the
1275                         // current node up to and including formatting element, then remove
1276                         // formatting element from the list of active formatting elements,
1277                         // and finally abort these steps.
1278                         if (fb === null) {
1279                                 while (true) {
1280                                         t = open_els.shift()
1281                                         if (t === fe) {
1282                                                 afe.splice(fe_of_afe, 1)
1283                                                 return
1284                                         }
1285                                 }
1286                         }
1287                         // 11. Let common ancestor be the element immediately above
1288                         // formatting element in the stack of open elements.
1289                         ca = open_els[fe_of_open_els + 1] // common ancestor
1290
1291                         node_above = open_els[fb_of_open_els + 1] // next node if node isn't in open_els anymore
1292                         // 12. Let a bookmark note the position of formatting element in the list of active formatting elements relative to the elements on either side of it in the list.
1293                         bookmark = new_aaa_bookmark()
1294                         for (i = 0; i < afe.length; ++i) {
1295                                 t = afe[i]
1296                                 if (t === fe) {
1297                                         afe.splice(i, 0, bookmark)
1298                                         break
1299                                 }
1300                         }
1301                         node = last_node = fb
1302                         inner = 0
1303                         while (true) {
1304                                 inner += 1
1305                                 // 3. Let node be the element immediately above node in the
1306                                 // stack of open elements, or if node is no longer in the stack
1307                                 // of open elements (e.g. because it got removed by this
1308                                 // algorithm), the element that was immediately above node in
1309                                 // the stack of open elements before node was removed.
1310                                 node_next = null
1311                                 for (i = 0; i < open_els.length; ++i) {
1312                                         t = open_els[i]
1313                                         if (t === node) {
1314                                                 node_next = open_els[i + 1]
1315                                                 break
1316                                         }
1317                                 }
1318                                 node = node_next != null ? node_next : node_above
1319                                 // TODO make sure node_above gets re-set if/when node is removed from open_els
1320
1321                                 // 4. If node is formatting element, then go to the next step in
1322                                 // the overall algorithm.
1323                                 if (node === fe) {
1324                                         break
1325                                 }
1326                                 // 5. If inner loop counter is greater than three and node is in
1327                                 // the list of active formatting elements, then remove node from
1328                                 // the list of active formatting elements.
1329                                 node_in_afe = false
1330                                 if ((i = afe.indexOf(node)) !== -1) {
1331                                         if (inner > 3) {
1332                                                 afe.splice(i, 1)
1333                                         } else {
1334                                                 node_in_afe = true
1335                                         }
1336                                 }
1337                                 // 6. If node is not in the list of active formatting elements,
1338                                 // then remove node from the stack of open elements and then go
1339                                 // back to the step labeled inner loop.
1340                                 if (!node_in_afe) {
1341                                         if ((i = open_els.indexOf(node)) !== -1) {
1342                                                 node_above = open_els[i + 1]
1343                                                 open_els.splice(i, 1)
1344                                         }
1345                                         continue
1346                                 }
1347                                 // 7. create an element for the token for which the element node
1348                                 // was created, in the HTML namespace, with common ancestor as
1349                                 // the intended parent; replace the entry for node in the list
1350                                 // of active formatting elements with an entry for the new
1351                                 // element, replace the entry for node in the stack of open
1352                                 // elements with an entry for the new element, and let node be
1353                                 // the new element.
1354                                 new_node = token_to_element(node.token, NS_HTML, ca)
1355                                 if ((i = afe.indexOf(node)) !== -1) {
1356                                         afe[i] = new_node
1357                                 }
1358                                 if ((i = open_els.indexOf(node)) !== -1) {
1359                                         node_above = open_els[i + 1]
1360                                         open_els[i] = new_node
1361                                 }
1362                                 node = new_node
1363                                 // 8. If last node is furthest block, then move the
1364                                 // aforementioned bookmark to be immediately after the new node
1365                                 // in the list of active formatting elements.
1366                                 if (last_node === fb) {
1367                                         if ((i = afe.indexOf(bookmark)) !== -1) {
1368                                                 afe.splice(i, 1)
1369                                         }
1370                                         if ((i = afe.indexOf(node)) !== -1) {
1371                                                 // "after" means lower
1372                                                 afe.splice(i, 0, bookmark) // "after as <-
1373                                         }
1374                                 }
1375                                 // 9. Insert last node into node, first removing it from its
1376                                 // previous parent node if any.
1377                                 if (last_node.parent != null) {
1378                                         if ((i = last_node.parent.children.indexOf(last_node)) !== -1) {
1379                                                 last_node.parent.children.splice(i, 1)
1380                                         }
1381                                 }
1382                                 node.children.push(last_node)
1383                                 last_node.parent = node
1384                                 // 10. Let last node be node.
1385                                 last_node = node
1386                                 // 11. Return to the step labeled inner loop.
1387                         }
1388                         // 14. Insert whatever last node ended up being in the previous step
1389                         // at the appropriate place for inserting a node, but using common
1390                         // ancestor as the override target.
1391
1392                         // In the case where fe is immediately followed by fb:
1393                         //   * inner loop exits out early (node==fe)
1394                         //   * last_node is fb
1395                         //   * last_node is still in the tree (not a duplicate)
1396                         if (last_node.parent != null) {
1397                                 if ((i = last_node.parent.children.indexOf(last_node)) !== -1) {
1398                                         last_node.parent.children.splice(i, 1)
1399                                 }
1400                         }
1401                         // can't use standard insert token thing, because it's already in
1402                         // open_els and must stay at it's current position in open_els
1403                         dest = adjusted_insertion_location(ca)
1404                         dest[0].children.splice(dest[1], 0, last_node)
1405                         last_node.parent = dest[0]
1406                         // 15. Create an element for the token for which formatting element
1407                         // was created, in the HTML namespace, with furthest block as the
1408                         // intended parent.
1409                         new_element = token_to_element(fe.token, NS_HTML, fb)
1410                         // 16. Take all of the child nodes of furthest block and append them
1411                         // to the element created in the last step.
1412                         while (fb.children.length) {
1413                                 t = fb.children.shift()
1414                                 t.parent = new_element
1415                                 new_element.children.push(t)
1416                         }
1417                         // 17. Append that new element to furthest block.
1418                         new_element.parent = fb
1419                         fb.children.push(new_element)
1420                         // 18. Remove formatting element from the list of active formatting
1421                         // elements, and insert the new element into the list of active
1422                         // formatting elements at the position of the aforementioned
1423                         // bookmark.
1424                         if ((i = afe.indexOf(fe)) !== -1) {
1425                                 afe.splice(i, 1)
1426                         }
1427                         if ((i = afe.indexOf(bookmark)) !== -1) {
1428                                 afe[i] = new_element
1429                         }
1430                         // 19. Remove formatting element from the stack of open elements,
1431                         // and insert the new element into the stack of open elements
1432                         // immediately below the position of furthest block in that stack.
1433                         if ((i = open_els.indexOf(fe)) !== -1) {
1434                                 open_els.splice(i, 1)
1435                         }
1436                         if ((i = open_els.indexOf(fb)) !== -1) {
1437                                 open_els.splice(i, 0, new_element)
1438                         }
1439                         // 20. Jump back to the step labeled outer loop.
1440                 }
1441         }
1442
1443         // http://www.w3.org/TR/html5/syntax.html#close-a-p-element
1444         close_p_element = function () {
1445                 generate_implied_end_tags('p') // arg is exception
1446                 if (!(open_els[0].name === 'p' && open_els[0].namespace === NS_HTML)) {
1447                         parse_error()
1448                 }
1449                 while (open_els.length > 1) { // just in case
1450                         el = open_els.shift()
1451                         if (el.name === 'p' && el.namespace === NS_HTML) {
1452                                 return
1453                         }
1454                 }
1455         }
1456         close_p_if_in_button_scope = function () {
1457                 if (is_in_button_scope('p', NS_HTML)) {
1458                         close_p_element()
1459                 }
1460         }
1461
1462         // http://www.w3.org/TR/html5/syntax.html#insert-a-character
1463         // aka insert_a_character = function (t) {
1464         insert_character = function (t) {
1465                 var dest, prev
1466                 dest = adjusted_insertion_location()
1467                 // fixfull check for Document node
1468                 if (dest[1] > 0) {
1469                         prev = dest[0].children[dest[1] - 1]
1470                         if (prev.type === TYPE_TEXT) {
1471                                 prev.text += t.text
1472                                 return
1473                         }
1474                 }
1475                 dest[0].children.splice(dest[1], 0, t)
1476                 t.parent = dest[0]
1477         }
1478
1479         // 8.2.5 http://www.w3.org/TR/html5/syntax.html#tree-construction
1480         process_token = function (t) {
1481                 var acn
1482                 acn = adjusted_current_node()
1483                 if (acn == null) {
1484                         ins_mode(t)
1485                         return
1486                 }
1487                 if (acn.namespace === NS_HTML) {
1488                         ins_mode(t)
1489                         return
1490                 }
1491                 if (is_mathml_text_integration_point(acn)) {
1492                         if (t.type === TYPE_START_TAG && !(t.name === 'mglyph' || t.name === 'malignmark')) {
1493                                 ins_mode(t)
1494                                 return
1495                         }
1496                         if (t.type === TYPE_TEXT) {
1497                                 ins_mode(t)
1498                                 return
1499                         }
1500                 }
1501                 if (acn.namespace === NS_MATHML && acn.name === 'annotation-xml' && t.type === TYPE_START_TAG && t.name === 'svg') {
1502                         ins_mode(t)
1503                         return
1504                 }
1505                 if (is_html_integration(acn)) {
1506                         if (t.type === TYPE_START_TAG || t.type === TYPE_TEXT) {
1507                                 ins_mode(t)
1508                                 return
1509                         }
1510                 }
1511                 if (t.type === TYPE_EOF) {
1512                         ins_mode(t)
1513                         return
1514                 }
1515                 in_foreign_content(t)
1516         }
1517
1518         // 8.2.5.1
1519         // http://www.w3.org/TR/html5/syntax.html#creating-and-inserting-nodes
1520         // http://www.w3.org/TR/html5/syntax.html#appropriate-place-for-inserting-a-node
1521         adjusted_insertion_location = function (override_target) {
1522                 var c, el, i, j, l, last_table, last_table_i, last_template, last_template_i, len, len1, len2, m, previous_element, target, target_i
1523                 // 1. If there was an override target specified, then let target be the
1524                 // override target.
1525                 if (override_target != null) {
1526                         target = override_target
1527                 } else { // Otherwise, let target be the current node.
1528                         target = open_els[0]
1529                 }
1530                 // 2. Determine the adjusted insertion location using the first matching
1531                 // steps from the following list:
1532                 //
1533                 // If foster parenting is enabled and target is a table, tbody, tfoot,
1534                 // thead, or tr element Foster parenting happens when content is
1535                 // misnested in tables.
1536                 if (flag_foster_parenting && foster_parenting_targets[target.name] === target.namespace) {
1537                         while (true) { // once. this is here so we can ``break`` to "abort these substeps"
1538                                 // 1. Let last template be the last template element in the
1539                                 // stack of open elements, if any.
1540                                 last_template = null
1541                                 last_template_i = null
1542                                 for (i = 0; i < open_els.length; ++i) {
1543                                         el = open_els[i]
1544                                         if (el.name === 'template' && el.namespace === NS_HTML) {
1545                                                 last_template = el
1546                                                 last_template_i = i
1547                                                 break
1548                                         }
1549                                 }
1550                                 // 2. Let last table be the last table element in the stack of
1551                                 // open elements, if any.
1552                                 last_table = null
1553                                 last_table_i
1554                                 for (i = 0; i < open_els.length; ++i) {
1555                                         el = open_els[i]
1556                                         if (el.name === 'table' && el.namespace === NS_HTML) {
1557                                                 last_table = el
1558                                                 last_table_i = i
1559                                                 break
1560                                         }
1561                                 }
1562                                 // 3. If there is a last template and either there is no last
1563                                 // table, or there is one, but last template is lower (more
1564                                 // recently added) than last table in the stack of open
1565                                 // elements, then: let adjusted insertion location be inside
1566                                 // last template's template contents, after its last child (if
1567                                 // any), and abort these substeps.
1568                                 if (last_template && (last_table === null || last_template_i < last_table_i)) {
1569                                         target = last_template // fixfull should be it's contents
1570                                         target_i = target.children.length
1571                                         break
1572                                 }
1573                                 // 4. If there is no last table, then let adjusted insertion
1574                                 // location be inside the first element in the stack of open
1575                                 // elements (the html element), after its last child (if any),
1576                                 // and abort these substeps. (fragment case)
1577                                 if (last_table === null) {
1578                                         // this is odd
1579                                         target = open_els[open_els.length - 1]
1580                                         target_i = target.children.length
1581                                         break
1582                                 }
1583                                 // 5. If last table has a parent element, then let adjusted
1584                                 // insertion location be inside last table's parent element,
1585                                 // immediately before last table, and abort these substeps.
1586                                 if (last_table.parent != null) {
1587                                         for (i = 0; i < last_table.parent.children.length; ++i) {
1588                                                 c = last_table.parent.children[i]
1589                                                 if (c === last_table) {
1590                                                         target = last_table.parent
1591                                                         target_i = i
1592                                                         break
1593                                                 }
1594                                         }
1595                                         break
1596                                 }
1597                                 // 6. Let previous element be the element immediately above last
1598                                 // table in the stack of open elements.
1599                                 //
1600                                 // huh? how could it not have a parent?
1601                                 previous_element = open_els[last_table_i + 1]
1602                                 // 7. Let adjusted insertion location be inside previous
1603                                 // element, after its last child (if any).
1604                                 target = previous_element
1605                                 target_i = target.children.length
1606                                 // Note: These steps are involved in part because it's possible
1607                                 // for elements, the table element in this case in particular,
1608                                 // to have been moved by a script around in the DOM, or indeed
1609                                 // removed from the DOM entirely, after the element was inserted
1610                                 // by the parser.
1611                                 break // don't really loop
1612                         }
1613                 } else {
1614                         // Otherwise Let adjusted insertion location be inside target, after
1615                         // its last child (if any).
1616                         target_i = target.children.length
1617                 }
1618
1619                 // 3. If the adjusted insertion location is inside a template element,
1620                 // let it instead be inside the template element's template contents,
1621                 // after its last child (if any).
1622                 // fixfull (template)
1623
1624                 // 4. Return the adjusted insertion location.
1625                 return [target, target_i]
1626         }
1627
1628         // http://www.w3.org/TR/html5/syntax.html#create-an-element-for-the-token
1629         // aka create_an_element_for_token
1630         token_to_element = function (t, namespace, intended_parent) {
1631                 var a, attrs, el, i
1632                 // convert attributes into a hash
1633                 attrs = {}
1634                 for (i = 0; i < t.attrs_a.length; ++i) {
1635                         a = t.attrs_a[i]
1636                         attrs[a[0]] = a[1] // TODO check what to do with dupilcate attrs
1637                 }
1638                 el = new Node(TYPE_TAG, {name: t.name, namespace: namespace, attrs: attrs, token: t})
1639
1640                 // TODO 2. If the newly created element has an xmlns attribute in the
1641                 // XMLNS namespace whose value is not exactly the same as the element's
1642                 // namespace, that is a parse error. Similarly, if the newly created
1643                 // element has an xmlns:xlink attribute in the XMLNS namespace whose
1644                 // value is not the XLink Namespace, that is a parse error.
1645
1646                 // fixfull: the spec says stuff about form pointers and ownerDocument
1647
1648                 return el
1649         }
1650
1651         // http://www.w3.org/TR/html5/syntax.html#insert-a-foreign-element
1652         insert_foreign_element = function (token, namespace) {
1653                 var ail, ail_el, ail_i, el
1654                 ail = adjusted_insertion_location()
1655                 ail_el = ail[0]
1656                 ail_i = ail[1]
1657                 el = token_to_element(token, namespace, ail_el)
1658                 // TODO skip this next step if it's broken (eg ail_el is document with child already)
1659                 el.parent = ail_el
1660                 ail_el.children.splice(ail_i, 0, el)
1661                 open_els.unshift(el)
1662                 return el
1663         }
1664         // http://www.w3.org/TR/html5/syntax.html#insert-an-html-element
1665         insert_html_element = function (token) {
1666                 return insert_foreign_element(token, NS_HTML)
1667         }
1668
1669         // http://www.w3.org/TR/html5/syntax.html#insert-a-comment
1670         // position should be [node, index_within_children]
1671         insert_comment = function (t, position) {
1672                 if (position == null) {
1673                         position = adjusted_insertion_location()
1674                 }
1675                 position[0].children.splice(position[1], 0, t)
1676                 return
1677         }
1678
1679         // 8.2.5.2
1680         // http://www.w3.org/TR/html5/syntax.html#generic-raw-text-element-parsing-algorithm
1681         parse_generic_raw_text = function (t) {
1682                 insert_html_element(t)
1683                 tok_state = tok_state_rawtext
1684                 original_ins_mode = ins_mode
1685                 ins_mode = ins_mode_text
1686         }
1687         parse_generic_rcdata_text = function (t) {
1688                 insert_html_element(t)
1689                 tok_state = tok_state_rcdata
1690                 original_ins_mode = ins_mode
1691                 ins_mode = ins_mode_text
1692         }
1693
1694         // 8.2.5.3 http://www.w3.org/TR/html5/syntax.html#closing-elements-that-have-implied-end-tags
1695         // http://www.w3.org/TR/html5/syntax.html#generate-implied-end-tags
1696         generate_implied_end_tags = function (except) {
1697                 if (except == null) {
1698                         except = null
1699                 }
1700                 while (end_tag_implied[open_els[0].name] === open_els[0].namespace && open_els[0].name !== except) {
1701                         open_els.shift()
1702                 }
1703         }
1704
1705         // 8.2.5.4 The rules for parsing tokens in HTML content
1706         // http://www.w3.org/TR/html5/syntax.html#parsing-main-inhtml
1707
1708         // 8.2.5.4.1 The "initial" insertion mode
1709         // http://www.w3.org/TR/html5/syntax.html#the-initial-insertion-mode
1710         is_quirks_yes_doctype = function (t) {
1711                 var i, p, pi
1712                 if (t.flag('force-quirks')) {
1713                         return true
1714                 }
1715                 if (t.name !== 'html') {
1716                         return true
1717                 }
1718                 if (t.public_identifier != null) {
1719                         pi = t.public_identifier.toLowerCase()
1720                         for (i = 0; i < quirks_yes_pi_prefixes.length; ++i) {
1721                                 p = quirks_yes_pi_prefixes[i]
1722                                 if (pi.substr(0, p.length) === p) {
1723                                         return true
1724                                 }
1725                         }
1726                         if (pi === '-//w3o//dtd w3 html strict 3.0//en//' || pi === '-/w3c/dtd html 4.0 transitional/en' || pi === 'html') {
1727                                 return true
1728                         }
1729                 }
1730                 if (t.system_identifier != null) {
1731                         if (t.system_identifier.toLowerCase() === 'http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd') {
1732                                 return true
1733                         }
1734                 } else if (t.public_identifier != null) {
1735                         // already did this: pi = t.public_identifier.toLowerCase()
1736                         if (pi.substr(0, 32) === '-//w3c//dtd html 4.01 frameset//' || pi.substr(0, 36) === '-//w3c//dtd html 4.01 transitional//') {
1737                                 return true
1738                         }
1739                 }
1740                 return false
1741         }
1742         is_quirks_limited_doctype = function (t) {
1743                 var pi
1744                 if (t.public_identifier != null) {
1745                         pi = t.public_identifier.toLowerCase()
1746                         if (pi.substr(0, 32) === '-//w3c//dtd xhtml 1.0 frameset//' || pi.substr(0, 36) === '-//w3c//dtd xhtml 1.0 transitional//') {
1747                                 return true
1748                         }
1749                         if (t.system_identifier != null) {
1750                                 if (pi.substr(0, 32) === '-//w3c//dtd html 4.01 frameset//' || pi.substr(0, 36) === '-//w3c//dtd html 4.01 transitional//') {
1751                                         return true
1752                                 }
1753                         }
1754                 }
1755                 return false
1756         }
1757         ins_mode_initial = function (t) {
1758                 if (is_space_tok(t)) {
1759                         return
1760                 }
1761                 if (t.type === TYPE_COMMENT) {
1762                         // ?fixfull
1763                         doc.children.push(t)
1764                         return
1765                 }
1766                 if (t.type === TYPE_DOCTYPE) {
1767                         // fixfull syntax error from first paragraph and following bullets
1768                         // fixfull set doc.doctype
1769                         // fixfull is the "not an iframe srcdoc" thing relevant?
1770                         if (is_quirks_yes_doctype(t)) {
1771                                 doc.flag('quirks mode', QUIRKS_YES)
1772                         } else if (is_quirks_limited_doctype(t)) {
1773                                 doc.flag('quirks mode', QUIRKS_LIMITED)
1774                         }
1775                         doc.children.push(t)
1776                         ins_mode = ins_mode_before_html
1777                         return
1778                 }
1779                 // Anything else
1780                 // fixfull not iframe srcdoc?
1781                 parse_error()
1782                 doc.flag('quirks mode', QUIRKS_YES)
1783                 ins_mode = ins_mode_before_html
1784                 process_token(t)
1785         }
1786
1787         // 8.2.5.4.2 http://www.w3.org/TR/html5/syntax.html#the-before-html-insertion-mode
1788         ins_mode_before_html = function (t) {
1789                 if (t.type === TYPE_DOCTYPE) {
1790                         parse_error()
1791                         return
1792                 }
1793                 if (t.type === TYPE_COMMENT) {
1794                         doc.children.push(t)
1795                         return
1796                 }
1797                 if (is_space_tok(t)) {
1798                         return
1799                 }
1800                 if (t.type === TYPE_START_TAG && t.name === 'html') {
1801                         el = token_to_element(t, NS_HTML, doc)
1802                         doc.children.push(el)
1803                         el.document = doc
1804                         open_els.unshift(el)
1805                         // fixfull (big paragraph in spec about manifest, fragment, urls, etc)
1806                         ins_mode = ins_mode_before_head
1807                         return
1808                 }
1809                 if (t.type === TYPE_END_TAG) {
1810                         if (t.name === 'head' || t.name === 'body' || t.name === 'html' || t.name === 'br') {
1811                                 // fall through to "anything else"
1812                         } else {
1813                                 parse_error()
1814                                 return
1815                         }
1816                 }
1817                 // Anything else
1818                 el = token_to_element(new_open_tag('html'), NS_HTML, doc)
1819                 doc.children.push(el)
1820                 el.document = doc
1821                 open_els.unshift(el)
1822                 // ?fixfull browsing context
1823                 ins_mode = ins_mode_before_head
1824                 process_token(t)
1825         }
1826
1827         // 8.2.5.4.3 http://www.w3.org/TR/html5/syntax.html#the-before-head-insertion-mode
1828         ins_mode_before_head = function (t) {
1829                 var el
1830                 if (is_space_tok(t)) {
1831                         return
1832                 }
1833                 if (t.type === TYPE_COMMENT) {
1834                         insert_comment(t)
1835                         return
1836                 }
1837                 if (t.type === TYPE_DOCTYPE) {
1838                         parse_error()
1839                         return
1840                 }
1841                 if (t.type === TYPE_START_TAG && t.name === 'html') {
1842                         ins_mode_in_body(t)
1843                         return
1844                 }
1845                 if (t.type === TYPE_START_TAG && t.name === 'head') {
1846                         el = insert_html_element(t)
1847                         head_element_pointer = el
1848                         ins_mode = ins_mode_in_head
1849                         return
1850                 }
1851                 if (t.type === TYPE_END_TAG) {
1852                         if (t.name === 'head' || t.name === 'body' || t.name === 'html' || t.name === 'br') {
1853                                 // fall through to Anything else below
1854                         } else {
1855                                 parse_error()
1856                                 return
1857                         }
1858                 }
1859                 // Anything else
1860                 el = insert_html_element(new_open_tag('head'))
1861                 head_element_pointer = el
1862                 ins_mode = ins_mode_in_head
1863                 process_token(t)
1864         }
1865
1866         // 8.2.5.4.4 http://www.w3.org/TR/html5/syntax.html#parsing-main-inhead
1867         ins_mode_in_head_else = function (t) { // factored out for same-as-spec flow control
1868                 open_els.shift() // spec says this will be a 'head' node
1869                 ins_mode = ins_mode_after_head
1870                 process_token(t)
1871         }
1872         ins_mode_in_head = function (t) {
1873                 var ail, el
1874                 if (t.type === TYPE_TEXT && (t.text === "\t" || t.text === "\n" || t.text === "\u000c" || t.text === ' ')) {
1875                         insert_character(t)
1876                         return
1877                 }
1878                 if (t.type === TYPE_COMMENT) {
1879                         insert_comment(t)
1880                         return
1881                 }
1882                 if (t.type === TYPE_DOCTYPE) {
1883                         parse_error()
1884                         return
1885                 }
1886                 if (t.type === TYPE_START_TAG && t.name === 'html') {
1887                         ins_mode_in_body(t)
1888                         return
1889                 }
1890                 if (t.type === TYPE_START_TAG && (t.name === 'base' || t.name === 'basefont' || t.name === 'bgsound' || t.name === 'link')) {
1891                         el = insert_html_element(t)
1892                         open_els.shift()
1893                         t.acknowledge_self_closing()
1894                         return
1895                 }
1896                 if (t.type === TYPE_START_TAG && t.name === 'meta') {
1897                         el = insert_html_element(t)
1898                         open_els.shift()
1899                         t.acknowledge_self_closing()
1900                         // fixfull encoding stuff
1901                         return
1902                 }
1903                 if (t.type === TYPE_START_TAG && t.name === 'title') {
1904                         parse_generic_rcdata_text(t)
1905                         return
1906                 }
1907                 if (t.type === TYPE_START_TAG && ((t.name === 'noscript' && flag_scripting) || t.name === 'noframes' || t.name === 'style')) {
1908                         parse_generic_raw_text(t)
1909                         return
1910                 }
1911                 if (t.type === TYPE_START_TAG && t.name === 'noscript' && flag_scripting === false) {
1912                         insert_html_element(t)
1913                         ins_mode = ins_mode_in_head_noscript
1914                         return
1915                 }
1916                 if (t.type === TYPE_START_TAG && t.name === 'script') {
1917                         ail = adjusted_insertion_location()
1918                         el = token_to_element(t, NS_HTML, ail)
1919                         el.flag('parser-inserted', true)
1920                         // fixfull frament case
1921                         ail[0].children.splice(ail[1], 0, el)
1922                         open_els.unshift(el)
1923                         tok_state = tok_state_script_data
1924                         original_ins_mode = ins_mode // make sure orig... is defined
1925                         ins_mode = ins_mode_text
1926                         return
1927                 }
1928                 if (t.type === TYPE_END_TAG && t.name === 'head') {
1929                         open_els.shift() // will be a head element... spec says so
1930                         ins_mode = ins_mode_after_head
1931                         return
1932                 }
1933                 if (t.type === TYPE_END_TAG && (t.name === 'body' || t.name === 'html' || t.name === 'br')) {
1934                         ins_mode_in_head_else(t)
1935                         return
1936                 }
1937                 if (t.type === TYPE_START_TAG && t.name === 'template') {
1938                         insert_html_element(t)
1939                         afe_push_marker()
1940                         flag_frameset_ok = false
1941                         ins_mode = ins_mode_in_template
1942                         template_ins_modes.unshift(ins_mode_in_template)
1943                         return
1944                 }
1945                 if (t.type === TYPE_END_TAG && t.name === 'template') {
1946                         if (template_tag_is_open()) {
1947                                 generate_implied_end_tags
1948                                 if (open_els[0].name !== 'template') {
1949                                         parse_error()
1950                                 }
1951                                 while (true) {
1952                                         el = open_els.shift()
1953                                         if (el.name === 'template' && el.namespace === NS_HTML) {
1954                                                 break
1955                                         }
1956                                 }
1957                                 clear_afe_to_marker()
1958                                 template_ins_modes.shift()
1959                                 reset_ins_mode()
1960                         } else {
1961                                 parse_error()
1962                         }
1963                         return
1964                 }
1965                 if ((t.type === TYPE_START_TAG && t.name === 'head') || t.type === TYPE_END_TAG) {
1966                         parse_error()
1967                         return
1968                 }
1969                 ins_mode_in_head_else(t)
1970         }
1971
1972         // 8.2.5.4.5 http://www.w3.org/TR/html5/syntax.html#parsing-main-inheadnoscript
1973         ins_mode_in_head_noscript_else = function (t) {
1974                 parse_error()
1975                 open_els.shift()
1976                 ins_mode = ins_mode_in_head
1977                 process_token(t)
1978         }
1979         ins_mode_in_head_noscript = function (t) {
1980                 if (t.type === TYPE_DOCTYPE) {
1981                         parse_error()
1982                         return
1983                 }
1984                 if (t.type === TYPE_START_TAG && t.name === 'html') {
1985                         ins_mode_in_body(t)
1986                         return
1987                 }
1988                 if (t.type === TYPE_END_TAG && t.name === 'noscript') {
1989                         open_els.shift()
1990                         ins_mode = ins_mode_in_head
1991                         return
1992                 }
1993                 if (is_space_tok(t) || t.type === TYPE_COMMENT || (t.type === TYPE_START_TAG && (t.name === 'basefont' || t.name === 'bgsound' || t.name === 'link' || t.name === 'meta' || t.name === 'noframes' || t.name === 'style'))) {
1994                         ins_mode_in_head(t)
1995                         return
1996                 }
1997                 if (t.type === TYPE_END_TAG && t.name === 'br') {
1998                         ins_mode_in_head_noscript_else(t)
1999                         return
2000                 }
2001                 if ((t.type === TYPE_START_TAG && (t.name === 'head' || t.name === 'noscript')) || t.type === TYPE_END_TAG) {
2002                         parse_error()
2003                         return
2004                 }
2005                 // Anything else
2006                 ins_mode_in_head_noscript_else(t)
2007         }
2008
2009         // 8.2.5.4.6 http://www.w3.org/TR/html5/syntax.html#the-after-head-insertion-mode
2010         ins_mode_after_head_else = function (t) {
2011                 var body_tok
2012                 body_tok = new_open_tag('body')
2013                 insert_html_element(body_tok)
2014                 ins_mode = ins_mode_in_body
2015                 process_token(t)
2016         }
2017         ins_mode_after_head = function (t) {
2018                 var el, i, j, len
2019                 if (is_space_tok(t)) {
2020                         insert_character(t)
2021                         return
2022                 }
2023                 if (t.type === TYPE_COMMENT) {
2024                         insert_comment(t)
2025                         return
2026                 }
2027                 if (t.type === TYPE_DOCTYPE) {
2028                         parse_error()
2029                         return
2030                 }
2031                 if (t.type === TYPE_START_TAG && t.name === 'html') {
2032                         ins_mode_in_body(t)
2033                         return
2034                 }
2035                 if (t.type === TYPE_START_TAG && t.name === 'body') {
2036                         insert_html_element(t)
2037                         flag_frameset_ok = false
2038                         ins_mode = ins_mode_in_body
2039                         return
2040                 }
2041                 if (t.type === TYPE_START_TAG && t.name === 'frameset') {
2042                         insert_html_element(t)
2043                         ins_mode = ins_mode_in_frameset
2044                         return
2045                 }
2046                 if (t.type === TYPE_START_TAG && (t.name === 'base' || t.name === 'basefont' || t.name === 'bgsound' || t.name === 'link' || t.name === 'meta' || t.name === 'noframes' || t.name === 'script' || t.name === 'style' || t.name === 'template' || t.name === 'title')) {
2047                         parse_error()
2048                         open_els.unshift(head_element_pointer)
2049                         ins_mode_in_head(t)
2050                         for (i = 0; i < open_els.length; ++i) {
2051                                 el = open_els[i]
2052                                 if (el === head_element_pointer) {
2053                                         open_els.splice(i, 1)
2054                                         return
2055                                 }
2056                         }
2057                         return
2058                 }
2059                 if (t.type === TYPE_END_TAG && t.name === 'template') {
2060                         ins_mode_in_head(t)
2061                         return
2062                 }
2063                 if (t.type === TYPE_END_TAG && (t.name === 'body' || t.name === 'html' || t.name === 'br')) {
2064                         ins_mode_after_head_else(t)
2065                         return
2066                 }
2067                 if ((t.type === TYPE_START_TAG && t.name === 'head') || t.type === TYPE_END_TAG) {
2068                         parse_error()
2069                         return
2070                 }
2071                 // Anything else
2072                 ins_mode_after_head_else(t)
2073         }
2074
2075         // 8.2.5.4.7 http://www.w3.org/TR/html5/syntax.html#parsing-main-inbody
2076         in_body_any_other_end_tag = function (name) { // factored out because adoption agency calls it
2077                 var el, i, node
2078                 node = open_els[0]
2079                 while (true) {
2080                         if (node.name === name && node.namespace === NS_HTML) {
2081                                 generate_implied_end_tags(name) // arg is exception
2082                                 if (node !== open_els[0]) {
2083                                         parse_error()
2084                                 }
2085                                 while (true) {
2086                                         el = open_els.shift()
2087                                         if (el === node) {
2088                                                 return
2089                                         }
2090                                 }
2091                         }
2092                         if (special_elements[node.name] === node.namespace) {
2093                                 parse_error()
2094                                 return
2095                         }
2096                         for (i = 0; i < open_els.length; ++i) {
2097                                 el = open_els[i]
2098                                 if (node === el) {
2099                                         node = open_els[i + 1]
2100                                         break
2101                                 }
2102                         }
2103                 }
2104         }
2105         ins_mode_in_body = function (t) {
2106                 var a, aa, ab, ac, el, found, h_in_scope, i, input_el, j, l, len, len1, len10, len11, len12, len13, len14, len2, len3, len4, len5, len6, len7, len8, len9, m, n, node, o, ok_tags, prompt, q, r, root_attrs, s, second, second_i, u, w, y, z
2107                 if (t.type === TYPE_TEXT && t.text === "\u0000") {
2108                         parse_error()
2109                         return
2110                 }
2111                 if (is_space_tok(t)) {
2112                         reconstruct_afe()
2113                         insert_character(t)
2114                         return
2115                 }
2116                 if (t.type === TYPE_TEXT) {
2117                         reconstruct_afe()
2118                         insert_character(t)
2119                         flag_frameset_ok = false
2120                         return
2121                 }
2122                 if (t.type === TYPE_COMMENT) {
2123                         insert_comment(t)
2124                         return
2125                 }
2126                 if (t.type === TYPE_DOCTYPE) {
2127                         parse_error()
2128                         return
2129                 }
2130                 if (t.type === TYPE_START_TAG && t.name === 'html') {
2131                         parse_error()
2132                         if (template_tag_is_open()) {
2133                                 return
2134                         }
2135                         root_attrs = open_els[open_els.length - 1].attrs
2136                         for (i = 0; i < t.attrs_a.length; ++i) {
2137                                 a = t.attrs_a[i]
2138                                 if (root_attrs[a[0]] == null) {
2139                                         root_attrs[a[0]] = a[1]
2140                                 }
2141                         }
2142                         return
2143                 }
2144
2145                 if ((t.type === TYPE_START_TAG && (t.name === 'base' || t.name === 'basefont' || t.name === 'bgsound' || t.name === 'link' || t.name === 'meta' || t.name === 'noframes' || t.name === 'script' || t.name === 'style' || t.name === 'template' || t.name === 'title')) || (t.type === TYPE_END_TAG && t.name === 'template')) {
2146                         ins_mode_in_head(t)
2147                         return
2148                 }
2149                 if (t.type === TYPE_START_TAG && t.name === 'body') {
2150                         parse_error()
2151                         if (open_els.length < 2) {
2152                                 return
2153                         }
2154                         second = open_els[open_els.length - 2]
2155                         if (second.namespace !== NS_HTML) {
2156                                 return
2157                         }
2158                         if (second.name !== 'body') {
2159                                 return
2160                         }
2161                         if (template_tag_is_open()) {
2162                                 return
2163                         }
2164                         flag_frameset_ok = false
2165                         for (i = 0; i < t.attrs_a.length; ++i) {
2166                                 a = t.attrs_a[i]
2167                                 if (second.attrs[a[0]] == null) {
2168                                         second.attrs[a[0]] = a[1]
2169                                 }
2170                         }
2171                         return
2172                 }
2173                 if (t.type === TYPE_START_TAG && t.name === 'frameset') {
2174                         parse_error()
2175                         if (open_els.length < 2) {
2176                                 return
2177                         }
2178                         second_i = open_els.length - 2
2179                         second = open_els[second_i]
2180                         if (second.namespace !== NS_HTML) {
2181                                 return
2182                         }
2183                         if (second.name !== 'body') {
2184                                 return
2185                         }
2186                         if (flag_frameset_ok === false) {
2187                                 return
2188                         }
2189                         if (second.parent != null) {
2190                                 for (i = 0; i < second.parent.children.length; ++i) {
2191                                         el = second.parent.children[i]
2192                                         if (el === second) {
2193                                                 second.parent.children.splice(i, 1)
2194                                                 break
2195                                         }
2196                                 }
2197                         }
2198                         open_els.splice(second_i, 1)
2199                         // pop everything except the "root html element"
2200                         while (open_els.length > 1) {
2201                                 open_els.shift()
2202                         }
2203                         insert_html_element(t)
2204                         ins_mode = ins_mode_in_frameset
2205                         return
2206                 }
2207                 if (t.type === TYPE_EOF) {
2208                         ok_tags = {
2209                                 dd: NS_HTML, dt: NS_HTML, li: NS_HTML, p: NS_HTML, tbody: NS_HTML,
2210                                 td: NS_HTML, tfoot: NS_HTML, th: NS_HTML, thead: NS_HTML,
2211                                 tr: NS_HTML, body: NS_HTML, html: NS_HTML
2212                         }
2213                         for (i = 0; i < open_els.length; ++i) {
2214                                 el = open_els[i]
2215                                 if (ok_tags[t.name] !== el.namespace) {
2216                                         parse_error()
2217                                         break
2218                                 }
2219                         }
2220                         if (template_ins_modes.length > 0) {
2221                                 ins_mode_in_template(t)
2222                         } else {
2223                                 stop_parsing()
2224                         }
2225                         return
2226                 }
2227                 if (t.type === TYPE_END_TAG && t.name === 'body') {
2228                         if (!is_in_scope('body', NS_HTML)) {
2229                                 parse_error()
2230                                 return
2231                         }
2232                         ok_tags = {
2233                                 dd: NS_HTML, dt: NS_HTML, li: NS_HTML, optgroup: NS_HTML,
2234                                 option: NS_HTML, p: NS_HTML, rb: NS_HTML, rp: NS_HTML, rt: NS_HTML,
2235                                 rtc: NS_HTML, tbody: NS_HTML, td: NS_HTML, tfoot: NS_HTML,
2236                                 th: NS_HTML, thead: NS_HTML, tr: NS_HTML, body: NS_HTML,
2237                                 html: NS_HTML
2238                         }
2239                         for (i = 0; i < open_els.length; ++i) {
2240                                 el = open_els[i]
2241                                 if (ok_tags[t.name] !== el.namespace) {
2242                                         parse_error()
2243                                         break
2244                                 }
2245                         }
2246                         ins_mode = ins_mode_after_body
2247                         return
2248                 }
2249                 if (t.type === TYPE_END_TAG && t.name === 'html') {
2250                         if (!is_in_scope('body', NS_HTML)) {
2251                                 parse_error()
2252                                 return
2253                         }
2254                         ok_tags = {
2255                                 dd: NS_HTML, dt: NS_HTML, li: NS_HTML, optgroup: NS_HTML,
2256                                 option: NS_HTML, p: NS_HTML, rb: NS_HTML, rp: NS_HTML, rt: NS_HTML,
2257                                 rtc: NS_HTML, tbody: NS_HTML, td: NS_HTML, tfoot: NS_HTML,
2258                                 th: NS_HTML, thead: NS_HTML, tr: NS_HTML, body: NS_HTML,
2259                                 html: NS_HTML
2260                         }
2261                         for (i = 0; i < open_els.length; ++i) {
2262                                 el = open_els[i]
2263                                 if (ok_tags[t.name] !== el.namespace) {
2264                                         parse_error()
2265                                         break
2266                                 }
2267                         }
2268                         ins_mode = ins_mode_after_body
2269                         process_token(t)
2270                         return
2271                 }
2272                 if (t.type === TYPE_START_TAG && (t.name === 'address' || t.name === 'article' || t.name === 'aside' || t.name === 'blockquote' || t.name === 'center' || t.name === 'details' || t.name === 'dialog' || t.name === 'dir' || t.name === 'div' || t.name === 'dl' || t.name === 'fieldset' || t.name === 'figcaption' || t.name === 'figure' || t.name === 'footer' || t.name === 'header' || t.name === 'hgroup' || t.name === 'main' || t.name === 'nav' || t.name === 'ol' || t.name === 'p' || t.name === 'section' || t.name === 'summary' || t.name === 'ul')) {
2273                         close_p_if_in_button_scope()
2274                         insert_html_element(t)
2275                         return
2276                 }
2277                 if (t.type === TYPE_START_TAG && (h_tags[t.name] != null)) { // extra perens because Coffeescript did that
2278                         close_p_if_in_button_scope()
2279                         if (h_tags[open_els[0].name] === open_els[0].namespace) {
2280                                 parse_error()
2281                                 open_els.shift()
2282                         }
2283                         insert_html_element(t)
2284                         return
2285                 }
2286                 if (t.type === TYPE_START_TAG && (t.name === 'pre' || t.name === 'listing')) {
2287                         close_p_if_in_button_scope()
2288                         insert_html_element(t)
2289                         eat_next_token_if_newline()
2290                         flag_frameset_ok = false
2291                         return
2292                 }
2293                 if (t.type === TYPE_START_TAG && t.name === 'form') {
2294                         if (!(form_element_pointer === null || template_tag_is_open())) {
2295                                 parse_error()
2296                                 return
2297                         }
2298                         close_p_if_in_button_scope()
2299                         el = insert_html_element(t)
2300                         if (!template_tag_is_open()) {
2301                                 form_element_pointer = el
2302                         }
2303                         return
2304                 }
2305                 if (t.type === TYPE_START_TAG && t.name === 'li') {
2306                         flag_frameset_ok = false
2307                         for (i = 0; i < open_els.length; ++i) {
2308                                 node = open_els[i]
2309                                 if (node.name === 'li' && node.namespace === NS_HTML) {
2310                                         generate_implied_end_tags('li') // arg is exception
2311                                         if (open_els[0].name !== 'li' || open_els[0].namespace !== NS_HTML) {
2312                                                 parse_error()
2313                                         }
2314                                         while (true) {
2315                                                 el = open_els.shift()
2316                                                 if (el.name === 'li' && el.namespace === NS_HTML) {
2317                                                         break
2318                                                 }
2319                                         }
2320                                         break
2321                                 }
2322                                 if (el_is_special_not_adp(node)) {
2323                                         break
2324                                 }
2325                         }
2326                         close_p_if_in_button_scope()
2327                         insert_html_element(t)
2328                         return
2329                 }
2330                 if (t.type === TYPE_START_TAG && (t.name === 'dd' || t.name === 'dt')) {
2331                         flag_frameset_ok = false
2332                         for (i = 0; i < open_els.length; ++i) {
2333                                 node = open_els[i]
2334                                 if (node.name === 'dd' && node.namespace === NS_HTML) {
2335                                         generate_implied_end_tags('dd') // arg is exception
2336                                         if (open_els[0].name !== 'dd' || open_els[0].namespace !== NS_HTML) {
2337                                                 parse_error()
2338                                         }
2339                                         while (true) {
2340                                                 el = open_els.shift()
2341                                                 if (el.name === 'dd' && el.namespace === NS_HTML) {
2342                                                         break
2343                                                 }
2344                                         }
2345                                         break
2346                                 }
2347                                 if (node.name === 'dt' && node.namespace === NS_HTML) {
2348                                         generate_implied_end_tags('dt') // arg is exception
2349                                         if (open_els[0].name !== 'dt' || open_els[0].namespace !== NS_HTML) {
2350                                                 parse_error()
2351                                         }
2352                                         while (true) {
2353                                                 el = open_els.shift()
2354                                                 if (el.name === 'dt' && el.namespace === NS_HTML) {
2355                                                         break
2356                                                 }
2357                                         }
2358                                         break
2359                                 }
2360                                 if (el_is_special_not_adp(node)) {
2361                                         break
2362                                 }
2363                         }
2364                         close_p_if_in_button_scope()
2365                         insert_html_element(t)
2366                         return
2367                 }
2368                 if (t.type === TYPE_START_TAG && t.name === 'plaintext') {
2369                         close_p_if_in_button_scope()
2370                         insert_html_element(t)
2371                         tok_state = tok_state_plaintext
2372                         return
2373                 }
2374                 if (t.type === TYPE_START_TAG && t.name === 'button') {
2375                         if (is_in_scope('button', NS_HTML)) {
2376                                 parse_error()
2377                                 generate_implied_end_tags()
2378                                 while (true) {
2379                                         el = open_els.shift()
2380                                         if (el.name === 'button' && el.namespace === NS_HTML) {
2381                                                 break
2382                                         }
2383                                 }
2384                         }
2385                         reconstruct_afe()
2386                         insert_html_element(t)
2387                         flag_frameset_ok = false
2388                         return
2389                 }
2390                 if (t.type === TYPE_END_TAG && (t.name === 'address' || t.name === 'article' || t.name === 'aside' || t.name === 'blockquote' || t.name === 'button' || t.name === 'center' || t.name === 'details' || t.name === 'dialog' || t.name === 'dir' || t.name === 'div' || t.name === 'dl' || t.name === 'fieldset' || t.name === 'figcaption' || t.name === 'figure' || t.name === 'footer' || t.name === 'header' || t.name === 'hgroup' || t.name === 'listing' || t.name === 'main' || t.name === 'nav' || t.name === 'ol' || t.name === 'pre' || t.name === 'section' || t.name === 'summary' || t.name === 'ul')) {
2391                         if (!is_in_scope(t.name, NS_HTML)) {
2392                                 parse_error()
2393                                 return
2394                         }
2395                         generate_implied_end_tags()
2396                         if (!(open_els[0].name === t.name && open_els[0].namespace === NS_HTML)) {
2397                                 parse_error()
2398                         }
2399                         while (true) {
2400                                 el = open_els.shift()
2401                                 if (el.name === t.name && el.namespace === NS_HTML) {
2402                                         return
2403                                 }
2404                         }
2405                         return
2406                 }
2407                 if (t.type === TYPE_END_TAG && t.name === 'form') {
2408                         if (!template_tag_is_open()) {
2409                                 node = form_element_pointer
2410                                 form_element_pointer = null
2411                                 if (node === null || !el_is_in_scope(node)) {
2412                                         parse_error()
2413                                         return
2414                                 }
2415                                 generate_implied_end_tags()
2416                                 if (open_els[0] !== node) {
2417                                         parse_error()
2418                                 }
2419                                 for (i = 0; i < open_els.length; ++i) {
2420                                         el = open_els[i]
2421                                         if (el === node) {
2422                                                 open_els.splice(i, 1)
2423                                                 break
2424                                         }
2425                                 }
2426                         } else {
2427                                 if (!is_in_scope('form', NS_HTML)) {
2428                                         parse_error()
2429                                         return
2430                                 }
2431                                 generate_implied_end_tags()
2432                                 if (open_els[0].name !== 'form' || open_els[0].namespace !== NS_HTML) {
2433                                         parse_error()
2434                                 }
2435                                 while (true) {
2436                                         el = open_els.shift()
2437                                         if (el.name === 'form' && el.namespace === NS_HTML) {
2438                                                 break
2439                                         }
2440                                 }
2441                         }
2442                         return
2443                 }
2444                 if (t.type === TYPE_END_TAG && t.name === 'p') {
2445                         if (!is_in_button_scope('p', NS_HTML)) {
2446                                 parse_error()
2447                                 insert_html_element(new_open_tag('p'))
2448                         }
2449                         close_p_element()
2450                         return
2451                 }
2452                 if (t.type === TYPE_END_TAG && t.name === 'li') {
2453                         if (!is_in_li_scope('li', NS_HTML)) {
2454                                 parse_error()
2455                                 return
2456                         }
2457                         generate_implied_end_tags('li') // arg is exception
2458                         if (open_els[0].name !== 'li' || open_els[0].namespace !== NS_HTML) {
2459                                 parse_error()
2460                         }
2461                         while (true) {
2462                                 el = open_els.shift()
2463                                 if (el.name === 'li' && el.namespace === NS_HTML) {
2464                                         break
2465                                 }
2466                         }
2467                         return
2468                 }
2469                 if (t.type === TYPE_END_TAG && (t.name === 'dd' || t.name === 'dt')) {
2470                         if (!is_in_scope(t.name, NS_HTML)) {
2471                                 parse_error()
2472                                 return
2473                         }
2474                         generate_implied_end_tags(t.name) // arg is exception
2475                         if (open_els[0].name !== t.name || open_els[0].namespace !== NS_HTML) {
2476                                 parse_error()
2477                         }
2478                         while (true) {
2479                                 el = open_els.shift()
2480                                 if (el.name === t.name && el.namespace === NS_HTML) {
2481                                         break
2482                                 }
2483                         }
2484                         return
2485                 }
2486                 if (t.type === TYPE_END_TAG && (h_tags[t.name] != null)) { // extra perens because Coffeescript did
2487                         h_in_scope = false
2488                         for (i = 0; i < open_els.length; ++i) {
2489                                 el = open_els[i]
2490                                 if (h_tags[el.name] === el.namespace) {
2491                                         h_in_scope = true
2492                                         break
2493                                 }
2494                                 if (standard_scopers[el.name] === el.namespace) {
2495                                         break
2496                                 }
2497                         }
2498                         if (!h_in_scope) {
2499                                 parse_error()
2500                                 return
2501                         }
2502                         generate_implied_end_tags()
2503                         if (open_els[0].name !== t.name || open_els[0].namespace !== NS_HTML) {
2504                                 parse_error()
2505                         }
2506                         while (true) {
2507                                 el = open_els.shift()
2508                                 if (h_tags[el.name] === el.namespace) {
2509                                         break
2510                                 }
2511                         }
2512                         return
2513                 }
2514                 // deep breath!
2515                 if (t.type === TYPE_START_TAG && t.name === 'a') {
2516                         // If the list of active formatting elements contains an a element
2517                         // between the end of the list and the last marker on the list (or
2518                         // the start of the list if there is no marker on the list), then
2519                         // this is a parse error; run the adoption agency algorithm for the
2520                         // tag name "a", then remove that element from the list of active
2521                         // formatting elements and the stack of open elements if the
2522                         // adoption agency algorithm didn't already remove it (it might not
2523                         // have if the element is not in table scope).
2524                         found = false
2525                         for (i = 0; i < afe.length; ++i) {
2526                                 el = afe[i]
2527                                 if (el.type === TYPE_AFE_MARKER) {
2528                                         break
2529                                 }
2530                                 if (el.name === 'a' && el.namespace === NS_HTML) {
2531                                         found = el
2532                                 }
2533                         }
2534                         if (found != null) {
2535                                 parse_error()
2536                                 adoption_agency('a')
2537                                 for (i = 0; i < afe.length; ++i) {
2538                                         el = afe[i]
2539                                         if (el === found) {
2540                                                 afe.splice(i, 1)
2541                                         }
2542                                 }
2543                                 for (i = 0; i < open_els.length; ++i) {
2544                                         el = open_els[i]
2545                                         if (el === found) {
2546                                                 open_els.splice(i, 1)
2547                                         }
2548                                 }
2549                         }
2550                         reconstruct_afe()
2551                         el = insert_html_element(t)
2552                         afe_push(el)
2553                         return
2554                 }
2555                 if (t.type === TYPE_START_TAG && (t.name === 'b' || t.name === 'big' || t.name === 'code' || t.name === 'em' || t.name === 'font' || t.name === 'i' || t.name === 's' || t.name === 'small' || t.name === 'strike' || t.name === 'strong' || t.name === 'tt' || t.name === 'u')) {
2556                         reconstruct_afe()
2557                         el = insert_html_element(t)
2558                         afe_push(el)
2559                         return
2560                 }
2561                 if (t.type === TYPE_START_TAG && t.name === 'nobr') {
2562                         reconstruct_afe()
2563                         if (is_in_scope('nobr', NS_HTML)) {
2564                                 parse_error()
2565                                 adoption_agency('nobr')
2566                                 reconstruct_afe()
2567                         }
2568                         el = insert_html_element(t)
2569                         afe_push(el)
2570                         return
2571                 }
2572                 if (t.type === TYPE_END_TAG && (t.name === 'a' || t.name === 'b' || t.name === 'big' || t.name === 'code' || t.name === 'em' || t.name === 'font' || t.name === 'i' || t.name === 'nobr' || t.name === 's' || t.name === 'small' || t.name === 'strike' || t.name === 'strong' || t.name === 'tt' || t.name === 'u')) {
2573                         adoption_agency(t.name)
2574                         return
2575                 }
2576                 if (t.type === TYPE_START_TAG && (t.name === 'applet' || t.name === 'marquee' || t.name === 'object')) {
2577                         reconstruct_afe()
2578                         insert_html_element(t)
2579                         afe_push_marker()
2580                         flag_frameset_ok = false
2581                         return
2582                 }
2583                 if (t.type === TYPE_END_TAG && (t.name === 'applet' || t.name === 'marquee' || t.name === 'object')) {
2584                         if (!is_in_scope(t.name, NS_HTML)) {
2585                                 parse_error()
2586                                 return
2587                         }
2588                         generate_implied_end_tags()
2589                         if (open_els[0].name !== t.name || open_els[0].namespace !== NS_HTML) {
2590                                 parse_error()
2591                         }
2592                         while (true) {
2593                                 el = open_els.shift()
2594                                 if (el.name === t.name && el.namespace === NS_HTML) {
2595                                         break
2596                                 }
2597                         }
2598                         clear_afe_to_marker()
2599                         return
2600                 }
2601                 if (t.type === TYPE_START_TAG && t.name === 'table') {
2602                         if (doc.flag('quirks mode') !== QUIRKS_YES) {
2603                                 close_p_if_in_button_scope() // test
2604                         }
2605                         insert_html_element(t)
2606                         flag_frameset_ok = false
2607                         ins_mode = ins_mode_in_table
2608                         return
2609                 }
2610                 if (t.type === TYPE_END_TAG && t.name === 'br') {
2611                         parse_error()
2612                         // W3C: t.type = TYPE_START_TAG
2613                         t = new_open_tag('br') // WHATWG
2614                         // fall through
2615                 }
2616                 if (t.type === TYPE_START_TAG && (t.name === 'area' || t.name === 'br' || t.name === 'embed' || t.name === 'img' || t.name === 'keygen' || t.name === 'wbr')) {
2617                         reconstruct_afe()
2618                         insert_html_element(t)
2619                         open_els.shift()
2620                         t.acknowledge_self_closing()
2621                         flag_frameset_ok = false
2622                         return
2623                 }
2624                 if (t.type === TYPE_START_TAG && t.name === 'input') {
2625                         reconstruct_afe()
2626                         insert_html_element(t)
2627                         open_els.shift()
2628                         t.acknowledge_self_closing()
2629                         if (!is_input_hidden_tok(t)) {
2630                                 flag_frameset_ok = false
2631                         }
2632                         return
2633                 }
2634                 if (t.type === TYPE_START_TAG && (t.name === 'menuitem' || t.name === 'param' || t.name === 'source' || t.name === 'track')) {
2635                         // WHATWG adds 'menuitem' for this block
2636                         insert_html_element(t)
2637                         open_els.shift()
2638                         t.acknowledge_self_closing()
2639                         return
2640                 }
2641                 if (t.type === TYPE_START_TAG && t.name === 'hr') {
2642                         close_p_if_in_button_scope()
2643                         insert_html_element(t)
2644                         open_els.shift()
2645                         t.acknowledge_self_closing()
2646                         flag_frameset_ok = false
2647                         return
2648                 }
2649                 if (t.type === TYPE_START_TAG && t.name === 'image') {
2650                         parse_error()
2651                         t.name = 'img'
2652                         process_token(t)
2653                         return
2654                 }
2655                 if (t.type === TYPE_START_TAG && t.name === 'isindex') {
2656                         parse_error()
2657                         if (template_tag_is_open() === false && form_element_pointer !== null) {
2658                                 return
2659                         }
2660                         t.acknowledge_self_closing()
2661                         flag_frameset_ok = false
2662                         close_p_if_in_button_scope()
2663                         el = insert_html_element(new_open_tag('form'))
2664                         if (!template_tag_is_open()) {
2665                                 form_element_pointer = el
2666                         }
2667                         for (i = 0; i < t.attrs_a.length; ++i) {
2668                                 a = t.attrs_a[i]
2669                                 if (a[0] === 'action') {
2670                                         el.attrs['action'] = a[1]
2671                                         break
2672                                 }
2673                         }
2674                         insert_html_element(new_open_tag('hr'))
2675                         open_els.shift()
2676                         reconstruct_afe()
2677                         insert_html_element(new_open_tag('label'))
2678                         // note: this is a little out-of-spec-order so we only have to scan t.attrs_a once
2679                         input_el = new_open_tag('input')
2680                         prompt = null
2681                         for (i = 0; i < t.attrs_a.length; ++i) {
2682                                 a = t.attrs_a[i]
2683                                 if (a[0] === 'prompt') {
2684                                         prompt = a[1]
2685                                 }
2686                                 if (a[0] !== 'name' && a[0] !== 'action' && a[0] !== 'prompt') {
2687                                         input_el.attrs_a.push([a[0], a[1]])
2688                                 }
2689                         }
2690                         input_el.attrs_a.push(['name', 'isindex'])
2691                         // fixfull this next bit is in english... internationalize?
2692                         if (prompt == null) {
2693                                 prompt = "This is a searchable index. Enter search keywords: "
2694                         }
2695                         insert_character(new_character_token(prompt)) // fixfull split
2696                         // TODO submit typo "balue" in spec
2697                         insert_html_element(input_el)
2698                         open_els.shift()
2699                         // insert_character('') // you can put chars here if prompt attr missing
2700                         open_els.shift()
2701                         insert_html_element(new_open_tag('hr'))
2702                         open_els.shift()
2703                         open_els.shift()
2704                         if (!template_tag_is_open()) {
2705                                 form_element_pointer = null
2706                         }
2707                         return
2708                 }
2709                 if (t.type === TYPE_START_TAG && t.name === 'textarea') {
2710                         insert_html_element(t)
2711                         eat_next_token_if_newline()
2712                         tok_state = tok_state_rcdata
2713                         original_ins_mode = ins_mode
2714                         flag_frameset_ok = false
2715                         ins_mode = ins_mode_text
2716                         return
2717                 }
2718                 if (t.type === TYPE_START_TAG && t.name === 'xmp') {
2719                         close_p_if_in_button_scope()
2720                         reconstruct_afe()
2721                         flag_frameset_ok = false
2722                         parse_generic_raw_text(t)
2723                         return
2724                 }
2725                 if (t.type === TYPE_START_TAG && t.name === 'iframe') {
2726                         flag_frameset_ok = false
2727                         parse_generic_raw_text(t)
2728                         return
2729                 }
2730                 if (t.type === TYPE_START_TAG && (t.name === 'noembed' || (t.name === 'noscript' && flag_scripting))) {
2731                         parse_generic_raw_text(t)
2732                         return
2733                 }
2734                 if (t.type === TYPE_START_TAG && t.name === 'select') {
2735                         reconstruct_afe()
2736                         insert_html_element(t)
2737                         flag_frameset_ok = false
2738                         if (ins_mode === ins_mode_in_table || ins_mode === ins_mode_in_caption || ins_mode === ins_mode_in_table_body || ins_mode === ins_mode_in_row || ins_mode === ins_mode_in_cell) {
2739                                 ins_mode = ins_mode_in_select_in_table
2740                         } else {
2741                                 ins_mode = ins_mode_in_select
2742                         }
2743                         return
2744                 }
2745                 if (t.type === TYPE_START_TAG && (t.name === 'optgroup' || t.name === 'option')) {
2746                         if (open_els[0].name === 'option' && open_els[0].namespace === NS_HTML) {
2747                                 open_els.shift()
2748                         }
2749                         reconstruct_afe()
2750                         insert_html_element(t)
2751                         return
2752                 }
2753 // this comment block implements the W3C spec
2754 //              if t.type === TYPE_START_TAG && (t.name === 'rb' || t.name === 'rp' || t.name === 'rtc')
2755 //                      if is_in_scope 'ruby', NS_HTML
2756 //                              generate_implied_end_tags()
2757 //                              unless open_els[0].name === 'ruby' && open_els[0].namespace === NS_HTML
2758 //                                      parse_error()
2759 //                      insert_html_element t
2760 //                      return
2761 //              if t.type === TYPE_START_TAG && t.name === 'rt'
2762 //                      if is_in_scope 'ruby', NS_HTML
2763 //                              generate_implied_end_tags 'rtc' // arg === exception
2764 //                              unless (open_els[0].name === 'ruby' || open_els[0].name === 'rtc') && open_els[0].namespace === NS_HTML
2765 //                                      parse_error()
2766 //                      insert_html_element t
2767 //                      return
2768 // below implements the WHATWG spec https://html.spec.whatwg.org/multipage/syntax.html#parsing-main-inbody
2769                 if (t.type === TYPE_START_TAG && (t.name === 'rb' || t.name === 'rtc')) {
2770                         if (is_in_scope('ruby', NS_HTML)) {
2771                                 generate_implied_end_tags()
2772                                 if (!(open_els[0].name === 'ruby' && open_els[0].namespace === NS_HTML)) {
2773                                         parse_error()
2774                                 }
2775                         }
2776                         insert_html_element(t)
2777                         return
2778                 }
2779                 if (t.type === TYPE_START_TAG && (t.name === 'rp' || t.name === 'rt')) {
2780                         if (is_in_scope('ruby', NS_HTML)) {
2781                                 generate_implied_end_tags('rtc')
2782                                 if (!((open_els[0].name === 'ruby' || open_els[0].name === 'rtc') && open_els[0].namespace === NS_HTML)) {
2783                                         parse_error()
2784                                 }
2785                         }
2786                         insert_html_element(t)
2787                         return
2788                 }
2789 // end WHATWG chunk
2790                 if (t.type === TYPE_START_TAG && t.name === 'math') {
2791                         reconstruct_afe()
2792                         adjust_mathml_attributes(t)
2793                         adjust_foreign_attributes(t)
2794                         insert_foreign_element(t, NS_MATHML)
2795                         if (t.flag('self-closing')) {
2796                                 open_els.shift()
2797                                 t.acknowledge_self_closing()
2798                         }
2799                         return
2800                 }
2801                 if (t.type === TYPE_START_TAG && t.name === 'svg') {
2802                         reconstruct_afe()
2803                         adjust_svg_attributes(t)
2804                         adjust_foreign_attributes(t)
2805                         insert_foreign_element(t, NS_SVG)
2806                         if (t.flag('self-closing')) {
2807                                 open_els.shift()
2808                                 t.acknowledge_self_closing()
2809                         }
2810                         return
2811                 }
2812                 if (t.type === TYPE_START_TAG && (t.name === 'caption' || t.name === 'col' || t.name === 'colgroup' || t.name === 'frame' || t.name === 'head' || t.name === 'tbody' || t.name === 'td' || t.name === 'tfoot' || t.name === 'th' || t.name === 'thead' || t.name === 'tr')) {
2813                         parse_error()
2814                         return
2815                 }
2816                 if (t.type === TYPE_START_TAG) { // any other start tag
2817                         reconstruct_afe()
2818                         insert_html_element(t)
2819                         return
2820                 }
2821                 if (t.type === TYPE_END_TAG) { // any other end tag
2822                         in_body_any_other_end_tag(t.name)
2823                         return
2824                 }
2825         }
2826
2827         // 8.2.5.4.8 http://www.w3.org/TR/html5/syntax.html#parsing-main-incdata
2828         ins_mode_text = function (t) {
2829                 if (t.type === TYPE_TEXT) {
2830                         insert_character(t)
2831                         return
2832                 }
2833                 if (t.type === TYPE_EOF) {
2834                         parse_error()
2835                         if (open_els[0].name === 'script' && open_els[0].namespace === NS_HTML) {
2836                                 open_els[0].flag('already started', true)
2837                         }
2838                         open_els.shift()
2839                         ins_mode = original_ins_mode
2840                         process_token(t)
2841                         return
2842                 }
2843                 if (t.type === TYPE_END_TAG && t.name === 'script') {
2844                         open_els.shift()
2845                         ins_mode = original_ins_mode
2846                         // fixfull the spec seems to assume that I'm going to run the script
2847                         // http://www.w3.org/TR/html5/syntax.html#scriptEndTag
2848                         return
2849                 }
2850                 if (t.type === TYPE_END_TAG) {
2851                         open_els.shift()
2852                         ins_mode = original_ins_mode
2853                         return
2854                 }
2855         }
2856
2857         // the functions below implement the tokenizer stats described here:
2858         // http://www.w3.org/TR/html5/syntax.html#tokenization
2859
2860         // 8.2.5.4.9 http://www.w3.org/TR/html5/syntax.html#parsing-main-intable
2861         ins_mode_in_table_else = function (t) {
2862                 parse_error()
2863                 flag_foster_parenting = true
2864                 ins_mode_in_body(t)
2865                 flag_foster_parenting = false
2866         }
2867         ins_mode_in_table = function (t) {
2868                 var el
2869                 switch (t.type) {
2870                         case TYPE_TEXT:
2871                                 if ((open_els[0].name === 'table' || open_els[0].name === 'tbody' || open_els[0].name === 'tfoot' || open_els[0].name === 'thead' || open_els[0].name === 'tr') && open_els[0].namespace === NS_HTML) {
2872                                         pending_table_character_tokens = []
2873                                         original_ins_mode = ins_mode
2874                                         ins_mode = ins_mode_in_table_text
2875                                         process_token(t)
2876                                 } else {
2877                                         ins_mode_in_table_else(t)
2878                                 }
2879                         break
2880                         case TYPE_COMMENT:
2881                                 insert_comment(t)
2882                         break
2883                         case TYPE_DOCTYPE:
2884                                 parse_error()
2885                         break
2886                         case TYPE_START_TAG:
2887                                 switch (t.name) {
2888                                         case 'caption':
2889                                                 clear_stack_to_table_context()
2890                                                 afe_push_marker()
2891                                                 insert_html_element(t)
2892                                                 ins_mode = ins_mode_in_caption
2893                                         break
2894                                         case 'colgroup':
2895                                                 clear_stack_to_table_context()
2896                                                 insert_html_element(t)
2897                                                 ins_mode = ins_mode_in_column_group
2898                                         break
2899                                         case 'col':
2900                                                 clear_stack_to_table_context()
2901                                                 insert_html_element(new_open_tag('colgroup'))
2902                                                 ins_mode = ins_mode_in_column_group
2903                                                 process_token(t)
2904                                         break
2905                                         case 'tbody':
2906                                         case 'tfoot':
2907                                         case 'thead':
2908                                                 clear_stack_to_table_context()
2909                                                 insert_html_element(t)
2910                                                 ins_mode = ins_mode_in_table_body
2911                                         break
2912                                         case 'td':
2913                                         case 'th':
2914                                         case 'tr':
2915                                                 clear_stack_to_table_context()
2916                                                 insert_html_element(new_open_tag('tbody'))
2917                                                 ins_mode = ins_mode_in_table_body
2918                                                 process_token(t)
2919                                         break
2920                                         case 'table':
2921                                                 parse_error()
2922                                                 if (is_in_table_scope('table', NS_HTML)) {
2923                                                         while (true) {
2924                                                                 el = open_els.shift()
2925                                                                 if (el.name === 'table' && el.namespace === NS_HTML) {
2926                                                                         break
2927                                                                 }
2928                                                         }
2929                                                         reset_ins_mode()
2930                                                         process_token(t)
2931                                                 }
2932                                         break
2933                                         case 'style':
2934                                         case 'script':
2935                                         case 'template':
2936                                                 ins_mode_in_head(t)
2937                                         break
2938                                         case 'input':
2939                                                 if (!is_input_hidden_tok(t)) {
2940                                                         ins_mode_in_table_else(t)
2941                                                 } else {
2942                                                         parse_error()
2943                                                         el = insert_html_element(t)
2944                                                         open_els.shift()
2945                                                         t.acknowledge_self_closing()
2946                                                 }
2947                                         break
2948                                         case 'form':
2949                                                 parse_error()
2950                                                 if (form_element_pointer != null) {
2951                                                         return
2952                                                 }
2953                                                 if (template_tag_is_open()) {
2954                                                         return
2955                                                 }
2956                                                 form_element_pointer = insert_html_element(t)
2957                                                 open_els.shift()
2958                                         break
2959                                         default:
2960                                                 ins_mode_in_table_else(t)
2961                                 }
2962                         break
2963                         case TYPE_END_TAG:
2964                                 switch (t.name) {
2965                                         case 'table':
2966                                                 if (is_in_table_scope('table', NS_HTML)) {
2967                                                         while (true) {
2968                                                                 el = open_els.shift()
2969                                                                 if (el.name === 'table' && el.namespace === NS_HTML) {
2970                                                                         break
2971                                                                 }
2972                                                         }
2973                                                         reset_ins_mode()
2974                                                 } else {
2975                                                         parse_error()
2976                                                 }
2977                                         break
2978                                         case 'body':
2979                                         case 'caption':
2980                                         case 'col':
2981                                         case 'colgroup':
2982                                         case 'html':
2983                                         case 'tbody':
2984                                         case 'td':
2985                                         case 'tfoot':
2986                                         case 'th':
2987                                         case 'thead':
2988                                         case 'tr':
2989                                                 parse_error()
2990                                         break
2991                                         case 'template':
2992                                                 ins_mode_in_head(t)
2993                                         break
2994                                         default:
2995                                                 ins_mode_in_table_else(t)
2996                                 }
2997                         break
2998                         case TYPE_EOF:
2999                                 ins_mode_in_body(t)
3000                         break
3001                         default:
3002                                 ins_mode_in_table_else(t)
3003                 }
3004         }
3005
3006         // 8.2.5.4.10 http://www.w3.org/TR/html5/syntax.html#parsing-main-intabletext
3007         ins_mode_in_table_text = function (t) {
3008                 var all_space, i, l, m, old
3009                 if (t.type === TYPE_TEXT && t.text === "\u0000") {
3010                         // from javascript?
3011                         parse_error()
3012                         return
3013                 }
3014                 if (t.type === TYPE_TEXT) {
3015                         pending_table_character_tokens.push(t)
3016                         return
3017                 }
3018                 // Anything else
3019                 all_space = true
3020                 for (i = 0; i < pending_table_character_tokens.length; ++i) {
3021                         old = pending_table_character_tokens[i]
3022                         if (!is_space_tok(old)) {
3023                                 all_space = false
3024                                 break
3025                         }
3026                 }
3027                 if (all_space) {
3028                         for (i = 0; i < pending_table_character_tokens.length; ++i) {
3029                                 old = pending_table_character_tokens[i]
3030                                 insert_character(old)
3031                         }
3032                 } else {
3033                         for (i = 0; i < pending_table_character_tokens.length; ++i) {
3034                                 old = pending_table_character_tokens[i]
3035                                 ins_mode_in_table_else(old)
3036                         }
3037                 }
3038                 pending_table_character_tokens = []
3039                 ins_mode = original_ins_mode
3040                 process_token(t)
3041         }
3042
3043         // 8.2.5.4.11 http://www.w3.org/TR/html5/syntax.html#parsing-main-incaption
3044         ins_mode_in_caption = function (t) {
3045                 var el
3046                 if (t.type === TYPE_END_TAG && t.name === 'caption') {
3047                         if (is_in_table_scope('caption', NS_HTML)) {
3048                                 generate_implied_end_tags()
3049                                 if (open_els[0].name !== 'caption') {
3050                                         parse_error()
3051                                 }
3052                                 while (true) {
3053                                         el = open_els.shift()
3054                                         if (el.name === 'caption' && el.namespace === NS_HTML) {
3055                                                 break
3056                                         }
3057                                 }
3058                                 clear_afe_to_marker()
3059                                 ins_mode = ins_mode_in_table
3060                         } else {
3061                                 parse_error()
3062                                 // fragment case
3063                         }
3064                         return
3065                 }
3066                 if ((t.type === TYPE_START_TAG && (t.name === 'caption' || t.name === 'col' || t.name === 'colgroup' || t.name === 'tbody' || t.name === 'td' || t.name === 'tfoot' || t.name === 'th' || t.name === 'thead' || t.name === 'tr')) || t.type === TYPE_END_TAG && t.name === 'table') {
3067                         parse_error()
3068                         if (is_in_table_scope('caption', NS_HTML)) {
3069                                 while (true) {
3070                                         el = open_els.shift()
3071                                         if (el.name === 'caption' && el.namespace === NS_HTML) {
3072                                                 break
3073                                         }
3074                                 }
3075                                 clear_afe_to_marker()
3076                                 ins_mode = ins_mode_in_table
3077                                 process_token(t)
3078                         }
3079                         // else fragment case
3080                         return
3081                 }
3082                 if (t.type === TYPE_END_TAG && (t.name === 'body' || t.name === 'col' || t.name === 'colgroup' || t.name === 'html' || t.name === 'tbody' || t.name === 'td' || t.name === 'tfoot' || t.name === 'th' || t.name === 'thead' || t.name === 'tr')) {
3083                         parse_error()
3084                         return
3085                 }
3086                 // Anything else
3087                 ins_mode_in_body(t)
3088         }
3089
3090         // 8.2.5.4.12 http://www.w3.org/TR/html5/syntax.html#parsing-main-incolgroup
3091         ins_mode_in_column_group = function (t) {
3092                 var el
3093                 if (is_space_tok(t)) {
3094                         insert_character(t)
3095                         return
3096                 }
3097                 if (t.type === TYPE_COMMENT) {
3098                         insert_comment(t)
3099                         return
3100                 }
3101                 if (t.type === TYPE_DOCTYPE) {
3102                         parse_error()
3103                         return
3104                 }
3105                 if (t.type === TYPE_START_TAG && t.name === 'html') {
3106                         ins_mode_in_body(t)
3107                         return
3108                 }
3109                 if (t.type === TYPE_START_TAG && t.name === 'col') {
3110                         el = insert_html_element(t)
3111                         open_els.shift()
3112                         t.acknowledge_self_closing()
3113                         return
3114                 }
3115                 if (t.type === TYPE_END_TAG && t.name === 'colgroup') {
3116                         if (open_els[0].name === 'colgroup' && open_els.namespace === NS_HTML) {
3117                                 open_els.shift()
3118                                 ins_mode = ins_mode_in_table
3119                         } else {
3120                                 parse_error()
3121                         }
3122                         return
3123                 }
3124                 if (t.type === TYPE_END_TAG && t.name === 'col') {
3125                         parse_error()
3126                         return
3127                 }
3128                 if ((t.type === TYPE_START_TAG || t.type === TYPE_END_TAG) && t.name === 'template') {
3129                         ins_mode_in_head(t)
3130                         return
3131                 }
3132                 if (t.type === TYPE_EOF) {
3133                         ins_mode_in_body(t)
3134                         return
3135                 }
3136                 // Anything else
3137                 if (open_els[0].name !== 'colgroup') {
3138                         parse_error()
3139                         return
3140                 }
3141                 open_els.shift()
3142                 ins_mode = ins_mode_in_table
3143                 process_token(t)
3144         }
3145
3146         // 8.2.5.4.13 http://www.w3.org/TR/html5/syntax.html#parsing-main-intbody
3147         ins_mode_in_table_body = function (t) {
3148                 var el, has, i
3149                 if (t.type === TYPE_START_TAG && t.name === 'tr') {
3150                         clear_stack_to_table_body_context()
3151                         insert_html_element(t)
3152                         ins_mode = ins_mode_in_row
3153                         return
3154                 }
3155                 if (t.type === TYPE_START_TAG && (t.name === 'th' || t.name === 'td')) {
3156                         parse_error()
3157                         clear_stack_to_table_body_context()
3158                         insert_html_element(new_open_tag('tr'))
3159                         ins_mode = ins_mode_in_row
3160                         process_token(t)
3161                         return
3162                 }
3163                 if (t.type === TYPE_END_TAG && (t.name === 'tbody' || t.name === 'tfoot' || t.name === 'thead')) {
3164                         if (!is_in_table_scope(t.name, NS_HTML)) {
3165                                 parse_error()
3166                                 return
3167                         }
3168                         clear_stack_to_table_body_context()
3169                         open_els.shift()
3170                         ins_mode = ins_mode_in_table
3171                         return
3172                 }
3173                 if ((t.type === TYPE_START_TAG && (t.name === 'caption' || t.name === 'col' || t.name === 'colgroup' || t.name === 'tbody' || t.name === 'tfoot' || t.name === 'thead')) || (t.type === TYPE_END_TAG && t.name === 'table')) {
3174                         has = false
3175                         for (i = 0; i < open_els.length; ++i) {
3176                                 el = open_els[i]
3177                                 if (el.namespace === NS_HTML && (el.name === 'tbody' || el.name === 'tfoot' || el.name === 'thead')) {
3178                                         has = true
3179                                         break
3180                                 }
3181                                 if (table_scopers[el.name] === el.namespace) {
3182                                         break
3183                                 }
3184                         }
3185                         if (!has) {
3186                                 parse_error()
3187                                 return
3188                         }
3189                         clear_stack_to_table_body_context()
3190                         open_els.shift()
3191                         ins_mode = ins_mode_in_table
3192                         process_token(t)
3193                         return
3194                 }
3195                 if (t.type === TYPE_END_TAG && (t.name === 'body' || t.name === 'caption' || t.name === 'col' || t.name === 'colgroup' || t.name === 'html' || t.name === 'td' || t.name === 'th' || t.name === 'tr')) {
3196                         parse_error()
3197                         return
3198                 }
3199                 // Anything else
3200                 ins_mode_in_table(t)
3201         }
3202
3203         // 8.2.5.4.14 http://www.w3.org/TR/html5/syntax.html#parsing-main-intr
3204         ins_mode_in_row = function (t) {
3205                 if (t.type === TYPE_START_TAG && (t.name === 'th' || t.name === 'td')) {
3206                         clear_stack_to_table_row_context()
3207                         insert_html_element(t)
3208                         ins_mode = ins_mode_in_cell
3209                         afe_push_marker()
3210                         return
3211                 }
3212                 if (t.type === TYPE_END_TAG && t.name === 'tr') {
3213                         if (is_in_table_scope('tr', NS_HTML)) {
3214                                 clear_stack_to_table_row_context()
3215                                 open_els.shift()
3216                                 ins_mode = ins_mode_in_table_body
3217                         } else {
3218                                 parse_error()
3219                         }
3220                         return
3221                 }
3222                 if ((t.type === TYPE_START_TAG && (t.name === 'caption' || t.name === 'col' || t.name === 'colgroup' || t.name === 'tbody' || t.name === 'tfoot' || t.name === 'thead' || t.name === 'tr')) || t.type === TYPE_END_TAG && t.name === 'table') {
3223                         if (is_in_table_scope('tr', NS_HTML)) {
3224                                 clear_stack_to_table_row_context()
3225                                 open_els.shift()
3226                                 ins_mode = ins_mode_in_table_body
3227                                 process_token(t)
3228                         } else {
3229                                 parse_error()
3230                         }
3231                         return
3232                 }
3233                 if (t.type === TYPE_END_TAG && (t.name === 'tbody' || t.name === 'tfoot' || t.name === 'thead')) {
3234                         if (is_in_table_scope(t.name, NS_HTML)) {
3235                                 if (is_in_table_scope('tr', NS_HTML)) {
3236                                         clear_stack_to_table_row_context()
3237                                         open_els.shift()
3238                                         ins_mode = ins_mode_in_table_body
3239                                         process_token(t)
3240                                 }
3241                         } else {
3242                                 parse_error()
3243                         }
3244                         return
3245                 }
3246                 if (t.type === TYPE_END_TAG && (t.name === 'body' || t.name === 'caption' || t.name === 'col' || t.name === 'colgroup' || t.name === 'html' || t.name === 'td' || t.name === 'th')) {
3247                         parse_error()
3248                         return
3249                 }
3250                 // Anything else
3251                 ins_mode_in_table(t)
3252         }
3253
3254         // http://www.w3.org/TR/html5/syntax.html#close-the-cell
3255         close_the_cell = function () {
3256                 var el
3257                 generate_implied_end_tags()
3258                 if (!((open_els[0].name === 'td' || open_els[0] === 'th') && open_els[0].namespace === NS_HTML)) {
3259                         parse_error()
3260                 }
3261                 while (true) {
3262                         el = open_els.shift()
3263                         if (el.namespace === NS_HTML && (el.name === 'td' || el.name === 'th')) {
3264                                 break
3265                         }
3266                 }
3267                 clear_afe_to_marker()
3268                 ins_mode = ins_mode_in_row
3269         }
3270
3271         // 8.2.5.4.15 http://www.w3.org/TR/html5/syntax.html#parsing-main-intd
3272         ins_mode_in_cell = function (t) {
3273                 var el, has, i
3274                 if (t.type === TYPE_END_TAG && (t.name === 'td' || t.name === 'th')) {
3275                         if (is_in_table_scope(t.name, NS_HTML)) {
3276                                 generate_implied_end_tags()
3277                                 if (!((open_els[0].name === t.name) && open_els[0].namespace === NS_HTML)) {
3278                                         parse_error()
3279                                 }
3280                                 while (true) {
3281                                         el = open_els.shift()
3282                                         if (el.name === t.name && el.namespace === NS_HTML) {
3283                                                 break
3284                                         }
3285                                 }
3286                                 clear_afe_to_marker()
3287                                 ins_mode = ins_mode_in_row
3288                         } else {
3289                                 parse_error()
3290                         }
3291                         return
3292                 }
3293                 if (t.type === TYPE_START_TAG && (t.name === 'caption' || t.name === 'col' || t.name === 'colgroup' || t.name === 'tbody' || t.name === 'td' || t.name === 'tfoot' || t.name === 'th' || t.name === 'thead' || t.name === 'tr')) {
3294                         has = false
3295                         for (i = 0; i < open_els.length; ++i) {
3296                                 el = open_els[i]
3297                                 if (el.namespace === NS_HTML && (el.name === 'td' || el.name === 'th')) {
3298                                         has = true
3299                                         break
3300                                 }
3301                                 if (table_scopers[el.name] === el.namespace) {
3302                                         break
3303                                 }
3304                         }
3305                         if (!has) {
3306                                 parse_error()
3307                                 return
3308                         }
3309                         close_the_cell()
3310                         process_token(t)
3311                         return
3312                 }
3313                 if (t.type === TYPE_END_TAG && (t.name === 'body' || t.name === 'caption' || t.name === 'col' || t.name === 'colgroup' || t.name === 'html')) {
3314                         parse_error()
3315                         return
3316                 }
3317                 if (t.type === TYPE_END_TAG && (t.name === 'table' || t.name === 'tbody' || t.name === 'tfoot' || t.name === 'thead' || t.name === 'tr')) {
3318                         if (is_in_table_scope(t.name, NS_HTML)) {
3319                                 close_the_cell()
3320                                 process_token(t)
3321                         } else {
3322                                 parse_error()
3323                         }
3324                         return
3325                 }
3326                 // Anything Else
3327                 ins_mode_in_body(t)
3328         }
3329
3330         // 8.2.5.4.16 http://www.w3.org/TR/html5/syntax.html#parsing-main-inselect
3331         ins_mode_in_select = function (t) {
3332                 var el
3333                 if (t.type === TYPE_TEXT && t.text === "\u0000") {
3334                         parse_error()
3335                         return
3336                 }
3337                 if (t.type === TYPE_TEXT) {
3338                         insert_character(t)
3339                         return
3340                 }
3341                 if (t.type === TYPE_COMMENT) {
3342                         insert_comment(t)
3343                         return
3344                 }
3345                 if (t.type === TYPE_DOCTYPE) {
3346                         parse_error()
3347                         return
3348                 }
3349                 if (t.type === TYPE_START_TAG && t.name === 'html') {
3350                         ins_mode_in_body(t)
3351                         return
3352                 }
3353                 if (t.type === TYPE_START_TAG && t.name === 'option') {
3354                         if (open_els[0].name === 'option' && open_els[0].namespace === NS_HTML) {
3355                                 open_els.shift()
3356                         }
3357                         insert_html_element(t)
3358                         return
3359                 }
3360                 if (t.type === TYPE_START_TAG && t.name === 'optgroup') {
3361                         if (open_els[0].name === 'option' && open_els[0].namespace === NS_HTML) {
3362                                 open_els.shift()
3363                         }
3364                         if (open_els[0].name === 'optgroup' && open_els[0].namespace === NS_HTML) {
3365                                 open_els.shift()
3366                         }
3367                         insert_html_element(t)
3368                         return
3369                 }
3370                 if (t.type === TYPE_END_TAG && t.name === 'optgroup') {
3371                         if (open_els[0].name === 'option' && open_els[0].namespace === NS_HTML) {
3372                                 if (open_els[1].name === 'optgroup' && open_els[0].namespace === NS_HTML) {
3373                                         open_els.shift()
3374                                 }
3375                         }
3376                         if (open_els[0].name === 'optgroup' && open_els[0].namespace === NS_HTML) {
3377                                 open_els.shift()
3378                         } else {
3379                                 parse_error()
3380                         }
3381                         return
3382                 }
3383                 if (t.type === TYPE_END_TAG && t.name === 'option') {
3384                         if (open_els[0].name === 'option' && open_els[0].namespace === NS_HTML) {
3385                                 open_els.shift()
3386                         } else {
3387                                 parse_error()
3388                         }
3389                         return
3390                 }
3391                 if (t.type === TYPE_END_TAG && t.name === 'select') {
3392                         if (is_in_select_scope('select', NS_HTML)) {
3393                                 while (true) {
3394                                         el = open_els.shift()
3395                                         if (el.name === 'select' && el.namespace === NS_HTML) {
3396                                                 break
3397                                         }
3398                                 }
3399                                 reset_ins_mode()
3400                         } else {
3401                                 parse_error()
3402                         }
3403                         return
3404                 }
3405                 if (t.type === TYPE_START_TAG && t.name === 'select') {
3406                         parse_error()
3407                         while (true) {
3408                                 el = open_els.shift()
3409                                 if (el.name === 'select' && el.namespace === NS_HTML) {
3410                                         break
3411                                 }
3412                         }
3413                         reset_ins_mode()
3414                         // spec says that this is the same as </select> but it doesn't say
3415                         // to check scope first
3416                         return
3417                 }
3418                 if (t.type === TYPE_START_TAG && (t.name === 'input' || t.name === 'keygen' || t.name === 'textarea')) {
3419                         parse_error()
3420                         if (!is_in_select_scope('select', NS_HTML)) {
3421                                 return
3422                         }
3423                         while (true) {
3424                                 el = open_els.shift()
3425                                 if (el.name === 'select' && el.namespace === NS_HTML) {
3426                                         break
3427                                 }
3428                         }
3429                         reset_ins_mode()
3430                         process_token(t)
3431                         return
3432                 }
3433                 if (t.type === TYPE_START_TAG && (t.name === 'script' || t.name === 'template')) {
3434                         ins_mode_in_head(t)
3435                         return
3436                 }
3437                 if (t.type === TYPE_EOF) {
3438                         ins_mode_in_body(t)
3439                         return
3440                 }
3441                 // Anything else
3442                 parse_error()
3443         }
3444
3445         // 8.2.5.4.17 http://www.w3.org/TR/html5/syntax.html#parsing-main-inselectintable
3446         ins_mode_in_select_in_table = function (t) {
3447                 var el
3448                 if (t.type === TYPE_START_TAG && (t.name === 'caption' || t.name === 'table' || t.name === 'tbody' || t.name === 'tfoot' || t.name === 'thead' || t.name === 'tr' || t.name === 'td' || t.name === 'th')) {
3449                         parse_error()
3450                         while (true) {
3451                                 el = open_els.shift()
3452                                 if (el.name === 'select' && el.namespace === NS_HTML) {
3453                                         break
3454                                 }
3455                         }
3456                         reset_ins_mode()
3457                         process_token(t)
3458                         return
3459                 }
3460                 if (t.type === TYPE_END_TAG && (t.name === 'caption' || t.name === 'table' || t.name === 'tbody' || t.name === 'tfoot' || t.name === 'thead' || t.name === 'tr' || t.name === 'td' || t.name === 'th')) {
3461                         parse_error()
3462                         if (!is_in_table_scope(t.name, NS_HTML)) {
3463                                 return
3464                         }
3465                         while (true) {
3466                                 el = open_els.shift()
3467                                 if (el.name === 'select' && el.namespace === NS_HTML) {
3468                                         break
3469                                 }
3470                         }
3471                         reset_ins_mode()
3472                         process_token(t)
3473                         return
3474                 }
3475                 // Anything else
3476                 ins_mode_in_select(t)
3477         }
3478
3479         // 8.2.5.4.18 http://www.w3.org/TR/html5/syntax.html#parsing-main-intemplate
3480         ins_mode_in_template = function (t) {
3481                 var el
3482                 if (t.type === TYPE_TEXT || t.type === TYPE_COMMENT || t.type === TYPE_DOCTYPE) {
3483                         ins_mode_in_body(t)
3484                         return
3485                 }
3486                 if ((t.type === TYPE_START_TAG && (t.name === 'base' || t.name === 'basefont' || t.name === 'bgsound' || t.name === 'link' || t.name === 'meta' || t.name === 'noframes' || t.name === 'script' || t.name === 'style' || t.name === 'template' || t.name === 'title')) || (t.type === TYPE_END_TAG && t.name === 'template')) {
3487                         ins_mode_in_head(t)
3488                         return
3489                 }
3490                 if (t.type === TYPE_START_TAG && (t.name === 'caption' || t.name === 'colgroup' || t.name === 'tbody' || t.name === 'tfoot' || t.name === 'thead')) {
3491                         template_ins_modes.shift()
3492                         template_ins_modes.unshift(ins_mode_in_table)
3493                         ins_mode = ins_mode_in_table
3494                         process_token(t)
3495                         return
3496                 }
3497                 if (t.type === TYPE_START_TAG && t.name === 'col') {
3498                         template_ins_modes.shift()
3499                         template_ins_modes.unshift(ins_mode_in_column_group)
3500                         ins_mode = ins_mode_in_column_group
3501                         process_token(t)
3502                         return
3503                 }
3504                 if (t.type === TYPE_START_TAG && t.name === 'tr') {
3505                         template_ins_modes.shift()
3506                         template_ins_modes.unshift(ins_mode_in_table_body)
3507                         ins_mode = ins_mode_in_table_body
3508                         process_token(t)
3509                         return
3510                 }
3511                 if (t.type === TYPE_START_TAG && (t.name === 'td' || t.name === 'th')) {
3512                         template_ins_modes.shift()
3513                         template_ins_modes.unshift(ins_mode_in_row)
3514                         ins_mode = ins_mode_in_row
3515                         process_token(t)
3516                         return
3517                 }
3518                 if (t.type === TYPE_START_TAG) {
3519                         template_ins_modes.shift()
3520                         template_ins_modes.unshift(ins_mode_in_body)
3521                         ins_mode = ins_mode_in_body
3522                         process_token(t)
3523                         return
3524                 }
3525                 if (t.type === TYPE_END_TAG) {
3526                         parse_error()
3527                         return
3528                 }
3529                 if (t.type === TYPE_EOF) {
3530                         if (!template_tag_is_open()) {
3531                                 stop_parsing()
3532                                 return
3533                         }
3534                         parse_error()
3535                         while (true) {
3536                                 el = open_els.shift()
3537                                 if (el.name === 'template' && el.namespace === NS_HTML) {
3538                                         break
3539                                 }
3540                         }
3541                         clear_afe_to_marker()
3542                         template_ins_modes.shift()
3543                         reset_ins_mode()
3544                         process_token(t)
3545                 }
3546         }
3547
3548         // 8.2.5.4.19 http://www.w3.org/TR/html5/syntax.html#parsing-main-afterbody
3549         ins_mode_after_body = function (t) {
3550                 var first
3551                 if (is_space_tok(t)) {
3552                         ins_mode_in_body(t)
3553                         return
3554                 }
3555                 if (t.type === TYPE_COMMENT) {
3556                         first = open_els[open_els.length - 1]
3557                         insert_comment(t, [first, first.children.length])
3558                         return
3559                 }
3560                 if (t.type === TYPE_DOCTYPE) {
3561                         parse_error()
3562                         return
3563                 }
3564                 if (t.type === TYPE_START_TAG && t.name === 'html') {
3565                         ins_mode_in_body(t)
3566                         return
3567                 }
3568                 if (t.type === TYPE_END_TAG && t.name === 'html') {
3569                         if (flag_fragment_parsing) {
3570                                 parse_error()
3571                                 return
3572                         }
3573                         ins_mode = ins_mode_after_after_body
3574                         return
3575                 }
3576                 if (t.type === TYPE_EOF) {
3577                         stop_parsing()
3578                         return
3579                 }
3580                 // Anything ELse
3581                 parse_error()
3582                 ins_mode = ins_mode_in_body
3583                 process_token(t)
3584         }
3585
3586         // 8.2.5.4.20 http://www.w3.org/TR/html5/syntax.html#parsing-main-inframeset
3587         ins_mode_in_frameset = function (t) {
3588                 if (is_space_tok(t)) {
3589                         insert_character(t)
3590                         return
3591                 }
3592                 if (t.type === TYPE_COMMENT) {
3593                         insert_comment(t)
3594                         return
3595                 }
3596                 if (t.type === TYPE_DOCTYPE) {
3597                         parse_error()
3598                         return
3599                 }
3600                 if (t.type === TYPE_START_TAG && t.name === 'html') {
3601                         ins_mode_in_body(t)
3602                         return
3603                 }
3604                 if (t.type === TYPE_START_TAG && t.name === 'frameset') {
3605                         insert_html_element(t)
3606                         return
3607                 }
3608                 if (t.type === TYPE_END_TAG && t.name === 'frameset') {
3609                         if (open_els.length === 1) {
3610                                 parse_error()
3611                                 return // fragment case
3612                         }
3613                         open_els.shift()
3614                         if (flag_fragment_parsing === false && open_els[0].name !== 'frameset') {
3615                                 ins_mode = ins_mode_after_frameset
3616                         }
3617                         return
3618                 }
3619                 if (t.type === TYPE_START_TAG && t.name === 'frame') {
3620                         insert_html_element(t)
3621                         open_els.shift()
3622                         t.acknowledge_self_closing()
3623                         return
3624                 }
3625                 if (t.type === TYPE_START_TAG && t.name === 'noframes') {
3626                         ins_mode_in_head(t)
3627                         return
3628                 }
3629                 if (t.type === TYPE_EOF) {
3630                         if (open_els.length !== 1) {
3631                                 parse_error()
3632                         }
3633                         stop_parsing()
3634                         return
3635                 }
3636                 // Anything else
3637                 parse_error()
3638         }
3639
3640         // 8.2.5.4.21 http://www.w3.org/TR/html5/syntax.html#parsing-main-afterframeset
3641         ins_mode_after_frameset = function (t) {
3642                 if (is_space_tok(t)) {
3643                         insert_character(t)
3644                         return
3645                 }
3646                 if (t.type === TYPE_COMMENT) {
3647                         insert_comment(t)
3648                         return
3649                 }
3650                 if (t.type === TYPE_DOCTYPE) {
3651                         parse_error()
3652                         return
3653                 }
3654                 if (t.type === TYPE_START_TAG && t.name === 'html') {
3655                         ins_mode_in_body(t)
3656                         return
3657                 }
3658                 if (t.type === TYPE_END_TAG && t.name === 'html') {
3659                         ins_mode = ins_mode_after_after_frameset
3660                         return
3661                 }
3662                 if (t.type === TYPE_START_TAG && t.name === 'noframes') {
3663                         ins_mode_in_head(t)
3664                         return
3665                 }
3666                 if (t.type === TYPE_EOF) {
3667                         stop_parsing()
3668                         return
3669                 }
3670                 // Anything else
3671                 parse_error()
3672         }
3673
3674         // 8.2.5.4.22 http://www.w3.org/TR/html5/syntax.html#the-after-after-body-insertion-mode
3675         ins_mode_after_after_body = function (t) {
3676                 if (t.type === TYPE_COMMENT) {
3677                         insert_comment(t, [doc, doc.children.length])
3678                         return
3679                 }
3680                 if (t.type === TYPE_DOCTYPE || is_space_tok(t) || (t.type === TYPE_START_TAG && t.name === 'html')) {
3681                         ins_mode_in_body(t)
3682                         return
3683                 }
3684                 if (t.type === TYPE_EOF) {
3685                         stop_parsing()
3686                         return
3687                 }
3688                 // Anything else
3689                 parse_error()
3690                 ins_mode = ins_mode_in_body
3691                 process_token(t)
3692         }
3693
3694         // 8.2.5.4.23 http://www.w3.org/TR/html5/syntax.html#the-after-after-frameset-insertion-mode
3695         ins_mode_after_after_frameset = function (t) {
3696                 if (t.type === TYPE_COMMENT) {
3697                         insert_comment(t, [doc, doc.children.length])
3698                         return
3699                 }
3700                 if (t.type === TYPE_DOCTYPE || is_space_tok(t) || (t.type === TYPE_START_TAG && t.name === 'html')) {
3701                         ins_mode_in_body(t)
3702                         return
3703                 }
3704                 if (t.type === TYPE_EOF) {
3705                         stop_parsing()
3706                         return
3707                 }
3708                 if (t.type === TYPE_START_TAG && t.name === 'noframes') {
3709                         ins_mode_in_head(t)
3710                         return
3711                 }
3712                 // Anything else
3713                 parse_error()
3714                 return
3715         }
3716
3717         // 8.2.5.5 http://www.w3.org/TR/html5/syntax.html#parsing-main-inforeign
3718         has_color_face_or_size = function (t) {
3719                 var a, i
3720                 for (i = 0; i < t.attrs_a.length; ++i) {
3721                         a = t.attrs_a[i]
3722                         if (a[0] === 'color' || a[0] === 'face' || a[0] === 'size') {
3723                                 return true
3724                         }
3725                 }
3726                 return false
3727         }
3728         in_foreign_content_end_script = function () {
3729                 open_els.shift()
3730                 // fixfull
3731         }
3732         in_foreign_content_other_start = function (t) {
3733                 var acn
3734                 acn = adjusted_current_node()
3735                 if (acn.namespace === NS_MATHML) {
3736                         adjust_mathml_attributes(t)
3737                 }
3738                 if (acn.namespace === NS_SVG && (svg_name_fixes[t.name] != null)) { // extra perens because Coffeescript did
3739                         t.name = svg_name_fixes[t.name]
3740                 }
3741                 if (acn.namespace === NS_SVG) {
3742                         adjust_svg_attributes(t)
3743                 }
3744                 adjust_foreign_attributes(t)
3745                 insert_foreign_element(t, acn.namespace)
3746                 if (t.flag('self-closing')) {
3747                         if (t.name === 'script') {
3748                                 t.acknowledge_self_closing()
3749                                 in_foreign_content_end_script()
3750                                 // fixfull
3751                         } else {
3752                                 open_els.shift()
3753                                 t.acknowledge_self_closing()
3754                         }
3755                 }
3756         }
3757         in_foreign_content = function (t) {
3758                 var el, i, node
3759                 if (t.type === TYPE_TEXT && t.text === "\u0000") {
3760                         parse_error()
3761                         insert_character(new_character_token("\ufffd"))
3762                         return
3763                 }
3764                 if (is_space_tok(t)) {
3765                         insert_character(t)
3766                         return
3767                 }
3768                 if (t.type === TYPE_TEXT) {
3769                         flag_frameset_ok = false
3770                         insert_character(t)
3771                         return
3772                 }
3773                 if (t.type === TYPE_COMMENT) {
3774                         insert_comment(t)
3775                         return
3776                 }
3777                 if (t.type === TYPE_DOCTYPE) {
3778                         parse_error()
3779                         return
3780                 }
3781                 if (t.type === TYPE_START_TAG && (t.name === 'b' || t.name === 'big' || t.name === 'blockquote' || t.name === 'body' || t.name === 'br' || t.name === 'center' || t.name === 'code' || t.name === 'dd' || t.name === 'div' || t.name === 'dl' || t.name === 'dt' || t.name === 'em' || t.name === 'embed' || t.name === 'h1' || t.name === 'h2' || t.name === 'h3' || t.name === 'h4' || t.name === 'h5' || t.name === 'h6' || t.name === 'head' || t.name === 'hr' || t.name === 'i' || t.name === 'img' || t.name === 'li' || t.name === 'listing' || t.name === 'main' || t.name === 'meta' || t.name === 'nobr' || t.name === 'ol' || t.name === 'p' || t.name === 'pre' || t.name === 'ruby' || t.name === 's' || t.name === 'small' || t.name === 'span' || t.name === 'strong' || t.name === 'strike' || t.name === 'sub' || t.name === 'sup' || t.name === 'table' || t.name === 'tt' || t.name === 'u' || t.name === 'ul' || t.name === 'var' || (t.name === 'font' && has_color_face_or_size(t)))) {
3782                         parse_error()
3783                         if (flag_fragment_parsing) {
3784                                 in_foreign_content_other_start(t)
3785                                 return
3786                         }
3787                         while (true) { // is this safe?
3788                                 open_els.shift()
3789                                 if (is_mathml_text_integration_point(open_els[0]) || is_html_integration(open_els[0]) || open_els[0].namespace === NS_HTML) {
3790                                         break
3791                                 }
3792                         }
3793                         process_token(t)
3794                         return
3795                 }
3796                 if (t.type === TYPE_START_TAG) {
3797                         in_foreign_content_other_start(t)
3798                         return
3799                 }
3800                 if (t.type === TYPE_END_TAG && t.name === 'script' && open_els[0].name === 'script' && open_els[0].namespace === NS_SVG) {
3801                         in_foreign_content_end_script()
3802                         return
3803                 }
3804                 if (t.type === TYPE_END_TAG) {
3805                         i = 0
3806                         node = open_els[i]
3807                         if (node.name.toLowerCase() !== t.name) {
3808                                 parse_error()
3809                         }
3810                         while (true) {
3811                                 if (node === open_els[open_els.length - 1]) {
3812                                         return
3813                                 }
3814                                 if (node.name.toLowerCase() === t.name) {
3815                                         while (true) {
3816                                                 el = open_els.shift()
3817                                                 if (el === node) {
3818                                                         return
3819                                                 }
3820                                         }
3821                                 }
3822                                 i += 1
3823                                 node = open_els[i]
3824                                 if (node.namespace === NS_HTML) {
3825                                         break
3826                                 }
3827                         }
3828                         ins_mode(t) // explicitly call HTML insertion mode
3829                 }
3830         }
3831
3832
3833         // 8.2.4.1 http://www.w3.org/TR/html5/syntax.html#data-state
3834         tok_state_data = function () {
3835                 var c
3836                 switch (c = txt.charAt(cur++)) {
3837                         case '&':
3838                                 return new_text_node(parse_character_reference())
3839                         break
3840                         case '<':
3841                                 tok_state = tok_state_tag_open
3842                         break
3843                         case "\u0000":
3844                                 parse_error()
3845                                 return new_text_node(c)
3846                         break
3847                         case '': // EOF
3848                                 return new_eof_token()
3849                         break
3850                         default:
3851                                 return new_text_node(c)
3852                 }
3853                 return null
3854         }
3855
3856         // 8.2.4.2 http://www.w3.org/TR/html5/syntax.html#character-reference-in-data-state
3857         // not needed: tok_state_character_reference_in_data = function () {
3858         // just call parse_character_reference()
3859
3860         // 8.2.4.3 http://www.w3.org/TR/html5/syntax.html#rcdata-state
3861         tok_state_rcdata = function () {
3862                 var c
3863                 switch (c = txt.charAt(cur++)) {
3864                         case '&':
3865                                 return new_text_node(parse_character_reference())
3866                         break
3867                         case '<':
3868                                 tok_state = tok_state_rcdata_less_than_sign
3869                         break
3870                         case "\u0000":
3871                                 parse_error()
3872                                 return new_character_token("\ufffd")
3873                         break
3874                         case '': // EOF
3875                                 return new_eof_token()
3876                         break
3877                         default:
3878                                 return new_character_token(c)
3879                 }
3880                 return null
3881         }
3882
3883         // 8.2.4.4 http://www.w3.org/TR/html5/syntax.html#character-reference-in-rcdata-state
3884         // not needed: tok_state_character_reference_in_rcdata = function () {
3885         // just call parse_character_reference()
3886
3887         // 8.2.4.5 http://www.w3.org/TR/html5/syntax.html#rawtext-state
3888         tok_state_rawtext = function () {
3889                 var c
3890                 switch (c = txt.charAt(cur++)) {
3891                         case '<':
3892                                 tok_state = tok_state_rawtext_less_than_sign
3893                         break
3894                         case "\u0000":
3895                                 parse_error()
3896                                 return new_character_token("\ufffd")
3897                         break
3898                         case '': // EOF
3899                                 return new_eof_token()
3900                         break
3901                         default:
3902                                 return new_character_token(c)
3903                 }
3904                 return null
3905         }
3906
3907         // 8.2.4.6 http://www.w3.org/TR/html5/syntax.html#script-data-state
3908         tok_state_script_data = function () {
3909                 var c
3910                 switch (c = txt.charAt(cur++)) {
3911                         case '<':
3912                                 tok_state = tok_state_script_data_less_than_sign
3913                         break
3914                         case "\u0000":
3915                                 parse_error()
3916                                 return new_character_token("\ufffd")
3917                         break
3918                         case '': // EOF
3919                                 return new_eof_token()
3920                         break
3921                         default:
3922                                 return new_character_token(c)
3923                 }
3924                 return null
3925         }
3926
3927         // 8.2.4.7 http://www.w3.org/TR/html5/syntax.html#plaintext-state
3928         tok_state_plaintext = function () {
3929                 var c
3930                 switch (c = txt.charAt(cur++)) {
3931                         case "\u0000":
3932                                 parse_error()
3933                                 return new_character_token("\ufffd")
3934                         break
3935                         case '': // EOF
3936                                 return new_eof_token()
3937                         break
3938                         default:
3939                                 return new_character_token(c)
3940                 }
3941                 return null
3942         }
3943
3944         // 8.2.4.8 http://www.w3.org/TR/html5/syntax.html#tag-open-state
3945         tok_state_tag_open = function () {
3946                 var c
3947                 c = txt.charAt(cur++)
3948                 if (c === '!') {
3949                         tok_state = tok_state_markup_declaration_open
3950                         return
3951                 }
3952                 if (c === '/') {
3953                         tok_state = tok_state_end_tag_open
3954                         return
3955                 }
3956                 if (is_uc_alpha(c)) {
3957                         tok_cur_tag = new_open_tag(c.toLowerCase())
3958                         tok_state = tok_state_tag_name
3959                         return
3960                 }
3961                 if (is_lc_alpha(c)) {
3962                         tok_cur_tag = new_open_tag(c)
3963                         tok_state = tok_state_tag_name
3964                         return
3965                 }
3966                 if (c === '?') {
3967                         parse_error()
3968                         tok_cur_tag = new_comment_token('?') // FIXME right?
3969                         tok_state = tok_state_bogus_comment
3970                         return
3971                 }
3972                 // Anything else
3973                 parse_error()
3974                 tok_state = tok_state_data
3975                 cur -= 1 // we didn't parse/handle the char after <
3976                 return new_text_node('<')
3977         }
3978
3979         // 8.2.4.9 http://www.w3.org/TR/html5/syntax.html#end-tag-open-state
3980         tok_state_end_tag_open = function () {
3981                 var c
3982                 c = txt.charAt(cur++)
3983                 if (is_uc_alpha(c)) {
3984                         tok_cur_tag = new_end_tag(c.toLowerCase())
3985                         tok_state = tok_state_tag_name
3986                         return
3987                 }
3988                 if (is_lc_alpha(c)) {
3989                         tok_cur_tag = new_end_tag(c)
3990                         tok_state = tok_state_tag_name
3991                         return
3992                 }
3993                 if (c === '>') {
3994                         parse_error()
3995                         tok_state = tok_state_data
3996                         return
3997                 }
3998                 if (c === '') { // EOF
3999                         parse_error()
4000                         tok_state = tok_state_data
4001                         return new_text_node('</')
4002                 }
4003                 // Anything else
4004                 parse_error()
4005                 tok_cur_tag = new_comment_token(c)
4006                 tok_state = tok_state_bogus_comment
4007                 return null
4008         }
4009
4010         // 8.2.4.10 http://www.w3.org/TR/html5/syntax.html#tag-name-state
4011         tok_state_tag_name = function () {
4012                 var c, tmp
4013                 switch (c = txt.charAt(cur++)) {
4014                         case "\t":
4015                         case "\n":
4016                         case "\u000c":
4017                         case ' ':
4018                                 tok_state = tok_state_before_attribute_name
4019                         break
4020                         case '/':
4021                                 tok_state = tok_state_self_closing_start_tag
4022                         break
4023                         case '>':
4024                                 tok_state = tok_state_data
4025                                 tmp = tok_cur_tag
4026                                 tok_cur_tag = null
4027                                 return tmp
4028                         break
4029                         case "\u0000":
4030                                 parse_error()
4031                                 tok_cur_tag.name += "\ufffd"
4032                         break
4033                         case '': // EOF
4034                                 parse_error()
4035                                 tok_state = tok_state_data
4036                         break
4037                         default:
4038                                 if (is_uc_alpha(c)) {
4039                                         tok_cur_tag.name += c.toLowerCase()
4040                                 } else {
4041                                         tok_cur_tag.name += c
4042                                 }
4043                 }
4044                 return null
4045         }
4046
4047         // 8.2.4.11 http://www.w3.org/TR/html5/syntax.html#rcdata-less-than-sign-state
4048         tok_state_rcdata_less_than_sign = function () {
4049                 var c
4050                 c = txt.charAt(cur++)
4051                 if (c === '/') {
4052                         temporary_buffer = ''
4053                         tok_state = tok_state_rcdata_end_tag_open
4054                         return null
4055                 }
4056                 // Anything else
4057                 tok_state = tok_state_rcdata
4058                 cur -= 1 // reconsume the input character
4059                 return new_character_token('<')
4060         }
4061
4062         // 8.2.4.12 http://www.w3.org/TR/html5/syntax.html#rcdata-end-tag-open-state
4063         tok_state_rcdata_end_tag_open = function () {
4064                 var c
4065                 c = txt.charAt(cur++)
4066                 if (is_uc_alpha(c)) {
4067                         tok_cur_tag = new_end_tag(c.toLowerCase())
4068                         temporary_buffer += c
4069                         tok_state = tok_state_rcdata_end_tag_name
4070                         return null
4071                 }
4072                 if (is_lc_alpha(c)) {
4073                         tok_cur_tag = new_end_tag(c)
4074                         temporary_buffer += c
4075                         tok_state = tok_state_rcdata_end_tag_name
4076                         return null
4077                 }
4078                 // Anything else
4079                 tok_state = tok_state_rcdata
4080                 cur -= 1 // reconsume the input character
4081                 return new_character_token("</") // fixfull separate these
4082         }
4083
4084         // http://www.w3.org/TR/html5/syntax.html#appropriate-end-tag-token
4085         is_appropriate_end_tag = function (t) {
4086                 // fixfull: this assumes that open_els[0].name is "the tag name of the last
4087                 // start tag to have been emitted from this tokenizer"
4088                 return t.type === TYPE_END_TAG && t.name === open_els[0].name
4089         }
4090
4091         // 8.2.4.13 http://www.w3.org/TR/html5/syntax.html#rcdata-end-tag-name-state
4092         tok_state_rcdata_end_tag_name = function () {
4093                 var c
4094                 c = txt.charAt(cur++)
4095                 if (c === "\t" || c === "\n" || c === "\u000c" || c === ' ') {
4096                         if (is_appropriate_end_tag(tok_cur_tag)) {
4097                                 tok_state = tok_state_before_attribute_name
4098                                 return
4099                         }
4100                         // else fall through to "Anything else"
4101                 }
4102                 if (c === '/') {
4103                         if (is_appropriate_end_tag(tok_cur_tag)) {
4104                                 tok_state = tok_state_self_closing_start_tag // FIXME spec typo?
4105                                 return
4106                         }
4107                         // else fall through to "Anything else"
4108                 }
4109                 if (c === '>') {
4110                         if (is_appropriate_end_tag(tok_cur_tag)) {
4111                                 tok_state = tok_state_data
4112                                 return tok_cur_tag
4113                         }
4114                         // else fall through to "Anything else"
4115                 }
4116                 if (is_uc_alpha(c)) {
4117                         tok_cur_tag.name += c.toLowerCase()
4118                         temporary_buffer += c
4119                         return null
4120                 }
4121                 if (is_lc_alpha(c)) {
4122                         tok_cur_tag.name += c
4123                         temporary_buffer += c
4124                         return null
4125                 }
4126                 // Anything else
4127                 tok_state = tok_state_rcdata
4128                 cur -= 1 // reconsume the input character
4129                 return new_character_token('</' + temporary_buffer) // fixfull separate these
4130         }
4131
4132         // 8.2.4.14 http://www.w3.org/TR/html5/syntax.html#rawtext-less-than-sign-state
4133         tok_state_rawtext_less_than_sign = function () {
4134                 var c
4135                 c = txt.charAt(cur++)
4136                 if (c === '/') {
4137                         temporary_buffer = ''
4138                         tok_state = tok_state_rawtext_end_tag_open
4139                         return null
4140                 }
4141                 // Anything else
4142                 tok_state = tok_state_rawtext
4143                 cur -= 1 // reconsume the input character
4144                 return new_character_token('<')
4145         }
4146
4147         // 8.2.4.15 http://www.w3.org/TR/html5/syntax.html#rawtext-end-tag-open-state
4148         tok_state_rawtext_end_tag_open = function () {
4149                 c = txt.charAt(cur++)
4150                 if (is_uc_alpha(c)) {
4151                         tok_cur_tag = new_end_tag(c.toLowerCase())
4152                         temporary_buffer += c
4153                         tok_state = tok_state_rawtext_end_tag_name
4154                         return null
4155                 }
4156                 if (is_lc_alpha(c)) {
4157                         tok_cur_tag = new_end_tag(c)
4158                         temporary_buffer += c
4159                         tok_state = tok_state_rawtext_end_tag_name
4160                         return null
4161                 }
4162                 // Anything else
4163                 tok_state = tok_state_rawtext
4164                 cur -= 1 // reconsume the input character
4165                 return new_character_token("</") // fixfull separate these
4166         }
4167
4168         // 8.2.4.16 http://www.w3.org/TR/html5/syntax.html#rawtext-end-tag-name-state
4169         tok_state_rawtext_end_tag_name = function () {
4170                 var c
4171                 c = txt.charAt(cur++)
4172                 if (c === "\t" || c === "\n" || c === "\u000c" || c === ' ') {
4173                         if (is_appropriate_end_tag(tok_cur_tag)) {
4174                                 tok_state = tok_state_before_attribute_name
4175                                 return
4176                         }
4177                         // else fall through to "Anything else"
4178                 }
4179                 if (c === '/') {
4180                         if (is_appropriate_end_tag(tok_cur_tag)) {
4181                                 tok_state = tok_state_self_closing_start_tag
4182                                 return
4183                         }
4184                         // else fall through to "Anything else"
4185                 }
4186                 if (c === '>') {
4187                         if (is_appropriate_end_tag(tok_cur_tag)) {
4188                                 tok_state = tok_state_data
4189                                 return tok_cur_tag
4190                         }
4191                         // else fall through to "Anything else"
4192                 }
4193                 if (is_uc_alpha(c)) {
4194                         tok_cur_tag.name += c.toLowerCase()
4195                         temporary_buffer += c
4196                         return null
4197                 }
4198                 if (is_lc_alpha(c)) {
4199                         tok_cur_tag.name += c
4200                         temporary_buffer += c
4201                         return null
4202                 }
4203                 // Anything else
4204                 tok_state = tok_state_rawtext
4205                 cur -= 1 // reconsume the input character
4206                 return new_character_token('</' + temporary_buffer) // fixfull separate these
4207         }
4208
4209         // 8.2.4.17 http://www.w3.org/TR/html5/syntax.html#script-data-less-than-sign-state
4210         tok_state_script_data_less_than_sign = function () {
4211                 var c
4212                 c = txt.charAt(cur++)
4213                 if (c === '/') {
4214                         temporary_buffer = ''
4215                         tok_state = tok_state_script_data_end_tag_open
4216                         return
4217                 }
4218                 if (c === '!') {
4219                         tok_state = tok_state_script_data_escape_start
4220                         return new_character_token('<!') // fixfull split
4221                 }
4222                 // Anything else
4223                 tok_state = tok_state_script_data
4224                 cur -= 1 // reconsume
4225                 return new_character_token('<')
4226         }
4227
4228         // 8.2.4.18 http://www.w3.org/TR/html5/syntax.html#script-data-end-tag-open-state
4229         tok_state_script_data_end_tag_open = function () {
4230                 var c
4231                 c = txt.charAt(cur++)
4232                 if (is_uc_alpha(c)) {
4233                         tok_cur_tag = new_end_tag(c.toLowerCase())
4234                         temporary_buffer += c
4235                         tok_state = tok_state_script_data_end_tag_name
4236                         return
4237                 }
4238                 if (is_lc_alpha(c)) {
4239                         tok_cur_tag = new_end_tag(c)
4240                         temporary_buffer += c
4241                         tok_state = tok_state_script_data_end_tag_name
4242                         return
4243                 }
4244                 // Anything else
4245                 tok_state = tok_state_script_data
4246                 cur -= 1 // reconsume
4247                 return new_character_token('</')
4248         }
4249
4250         // 8.2.4.19 http://www.w3.org/TR/html5/syntax.html#script-data-end-tag-open-state
4251         tok_state_script_data_end_tag_name = function () {
4252                 var c
4253                 c = txt.charAt(cur++)
4254                 if (c === "\t" || c === "\n" || c === "\u000c" || c === ' ') {
4255                         if (is_appropriate_end_tag(tok_cur_tag)) {
4256                                 tok_state = tok_state_before_attribute_name
4257                                 return
4258                         }
4259                         // fall through
4260                 }
4261                 if (c === '/') {
4262                         if (is_appropriate_end_tag(tok_cur_tag)) {
4263                                 tok_state = tok_state_self_closing_start_tag
4264                                 return
4265                         }
4266                         // fall through
4267                 }
4268                 if (c === '>') {
4269                         if (is_appropriate_end_tag(tok_cur_tag)) {
4270                                 tok_state = tok_state_data
4271                                 return tok_cur_tag
4272                         }
4273                         // fall through
4274                 }
4275                 if (is_uc_alpha(c)) {
4276                         tok_cur_tag.name += c.toLowerCase()
4277                         temporary_buffer += c
4278                         return
4279                 }
4280                 if (is_lc_alpha(c)) {
4281                         tok_cur_tag.name += c
4282                         temporary_buffer += c
4283                         return
4284                 }
4285                 // Anything else
4286                 tok_state = tok_state_script_data
4287                 cur -= 1 // reconsume
4288                 return new_character_token("</" + temporary_buffer) // fixfull split
4289         }
4290
4291         // 8.2.4.20 http://www.w3.org/TR/html5/syntax.html#script-data-escape-start-state
4292         tok_state_script_data_escape_start = function () {
4293                 var c
4294                 c = txt.charAt(cur++)
4295                 if (c === '-') {
4296                         tok_state = tok_state_script_data_escape_start_dash
4297                         return new_character_token('-')
4298                 }
4299                 // Anything else
4300                 tok_state = tok_state_script_data
4301                 cur -= 1 // reconsume
4302         }
4303
4304         // 8.2.4.21 http://www.w3.org/TR/html5/syntax.html#script-data-escape-start-dash-state
4305         tok_state_script_data_escape_start_dash = function () {
4306                 var c
4307                 c = txt.charAt(cur++)
4308                 if (c === '-') {
4309                         tok_state = tok_state_script_data_escaped_dash_dash
4310                         return new_character_token('-')
4311                 }
4312                 // Anything else
4313                 tok_state = tok_state_script_data
4314                 cur -= 1 // reconsume
4315         }
4316
4317         // 8.2.4.22 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-state
4318         tok_state_script_data_escaped = function () {
4319                 var c
4320                 c = txt.charAt(cur++)
4321                 if (c === '-') {
4322                         tok_state = tok_state_script_data_escaped_dash
4323                         return new_character_token('-')
4324                 }
4325                 if (c === '<') {
4326                         tok_state = tok_state_script_data_escaped_less_than_sign
4327                         return
4328                 }
4329                 if (c === "\u0000") {
4330                         parse_error()
4331                         return new_character_token("\ufffd")
4332                 }
4333                 if (c === '') { // EOF
4334                         tok_state = tok_state_data
4335                         parse_error()
4336                         cur -= 1 // reconsume
4337                         return
4338                 }
4339                 // Anything else
4340                 return new_character_token(c)
4341         }
4342
4343         // 8.2.4.23 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-dash-state
4344         tok_state_script_data_escaped_dash = function () {
4345                 var c
4346                 c = txt.charAt(cur++)
4347                 if (c === '-') {
4348                         tok_state = tok_state_script_data_escaped_dash_dash
4349                         return new_character_token('-')
4350                 }
4351                 if (c === '<') {
4352                         tok_state = tok_state_script_data_escaped_less_than_sign
4353                         return
4354                 }
4355                 if (c === "\u0000") {
4356                         parse_error()
4357                         tok_state = tok_state_script_data_escaped
4358                         return new_character_token("\ufffd")
4359                 }
4360                 if (c === '') { // EOF
4361                         tok_state = tok_state_data
4362                         parse_error()
4363                         cur -= 1 // reconsume
4364                         return
4365                 }
4366                 // Anything else
4367                 tok_state = tok_state_script_data_escaped
4368                 return new_character_token(c)
4369         }
4370
4371         // 8.2.4.24 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-dash-dash-state
4372         tok_state_script_data_escaped_dash_dash = function () {
4373                 var c
4374                 c = txt.charAt(cur++)
4375                 if (c === '-') {
4376                         return new_character_token('-')
4377                 }
4378                 if (c === '<') {
4379                         tok_state = tok_state_script_data_escaped_less_than_sign
4380                         return
4381                 }
4382                 if (c === '>') {
4383                         tok_state = tok_state_script_data
4384                         return new_character_token('>')
4385                 }
4386                 if (c === "\u0000") {
4387                         parse_error()
4388                         tok_state = tok_state_script_data_escaped
4389                         return new_character_token("\ufffd")
4390                 }
4391                 if (c === '') { // EOF
4392                         parse_error()
4393                         tok_state = tok_state_data
4394                         cur -= 1 // reconsume
4395                         return
4396                 }
4397                 // Anything else
4398                 tok_state = tok_state_script_data_escaped
4399                 return new_character_token(c)
4400         }
4401
4402         // 8.2.4.25 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-less-than-sign-state
4403         tok_state_script_data_escaped_less_than_sign = function () {
4404                 var c
4405                 c = txt.charAt(cur++)
4406                 if (c === '/') {
4407                         temporary_buffer = ''
4408                         tok_state = tok_state_script_data_escaped_end_tag_open
4409                         return
4410                 }
4411                 if (is_uc_alpha(c)) {
4412                         temporary_buffer = c.toLowerCase() // yes, really
4413                         tok_state = tok_state_script_data_double_escape_start
4414                         return new_character_token("<" + c) // fixfull split
4415                 }
4416                 if (is_lc_alpha(c)) {
4417                         temporary_buffer = c
4418                         tok_state = tok_state_script_data_double_escape_start
4419                         return new_character_token("<" + c) // fixfull split
4420                 }
4421                 // Anything else
4422                 tok_state = tok_state_script_data_escaped
4423                 cur -= 1 // reconsume
4424                 return new_character_token('<')
4425         }
4426
4427         // 8.2.4.26 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-end-tag-open-state
4428         tok_state_script_data_escaped_end_tag_open = function () {
4429                 var c
4430                 c = txt.charAt(cur++)
4431                 if (is_uc_alpha(c)) {
4432                         tok_cur_tag = new_end_tag(c.toLowerCase())
4433                         temporary_buffer += c
4434                         tok_state = tok_state_script_data_escaped_end_tag_name
4435                         return
4436                 }
4437                 if (is_lc_alpha(c)) {
4438                         tok_cur_tag = new_end_tag(c)
4439                         temporary_buffer += c
4440                         tok_state = tok_state_script_data_escaped_end_tag_name
4441                         return
4442                 }
4443                 // Anything else
4444                 tok_state = tok_state_script_data_escaped
4445                 cur -= 1 // reconsume
4446                 return new_character_token('</') // fixfull split
4447         }
4448
4449         // 8.2.4.27 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-end-tag-name-state
4450         tok_state_script_data_escaped_end_tag_name = function () {
4451                 var c
4452                 c = txt.charAt(cur++)
4453                 if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ') {
4454                         if (is_appropriate_end_tag(tok_cur_tag)) {
4455                                 tok_state = tok_state_before_attribute_name
4456                                 return
4457                         }
4458                         // fall through
4459                 }
4460                 if (c === '/') {
4461                         if (is_appropriate_end_tag(tok_cur_tag)) {
4462                                 tok_state = tok_state_self_closing_start_tag
4463                                 return
4464                         }
4465                         // fall through
4466                 }
4467                 if (c === '>') {
4468                         if (is_appropriate_end_tag(tok_cur_tag)) {
4469                                 tok_state = tok_state_data
4470                                 return tok_cur_tag
4471                         }
4472                         // fall through
4473                 }
4474                 if (is_uc_alpha(c)) {
4475                         tok_cur_tag.name += c.toLowerCase()
4476                         temporary_buffer += c.toLowerCase()
4477                         return
4478                 }
4479                 if (is_lc_alpha(c)) {
4480                         tok_cur_tag.name += c
4481                         temporary_buffer += c.toLowerCase()
4482                         return
4483                 }
4484                 // Anything else
4485                 tok_state = tok_state_script_data_escaped
4486                 cur -= 1 // reconsume
4487                 return new_character_token("</" + temporary_buffer) // fixfull split
4488         }
4489
4490         // 8.2.4.28 http://www.w3.org/TR/html5/syntax.html#script-data-double-escape-start-state
4491         tok_state_script_data_double_escape_start = function () {
4492                 var c
4493                 c = txt.charAt(cur++)
4494                 if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ' || c === '/' || c === '>') {
4495                         if (temporary_buffer === 'script') {
4496                                 tok_state = tok_state_script_data_double_escaped
4497                         } else {
4498                                 tok_state = tok_state_script_data_escaped
4499                         }
4500                         return new_character_token(c)
4501                 }
4502                 if (is_uc_alpha(c)) {
4503                         temporary_buffer += c.toLowerCase() // yes, really lowercase
4504                         return new_character_token(c)
4505                 }
4506                 if (is_lc_alpha(c)) {
4507                         temporary_buffer += c
4508                         return new_character_token(c)
4509                 }
4510                 // Anything else
4511                 tok_state = tok_state_script_data_escaped
4512                 cur -= 1 // reconsume
4513         }
4514
4515         // 8.2.4.29 http://www.w3.org/TR/html5/syntax.html#script-data-double-escaped-state
4516         tok_state_script_data_double_escaped = function () {
4517                 var c
4518                 c = txt.charAt(cur++)
4519                 if (c === '-') {
4520                         tok_state = tok_state_script_data_double_escaped_dash
4521                         return new_character_token('-')
4522                 }
4523                 if (c === '<') {
4524                         tok_state = tok_state_script_data_double_escaped_less_than_sign
4525                         return new_character_token('<')
4526                 }
4527                 if (c === "\u0000") {
4528                         parse_error()
4529                         return new_character_token("\ufffd")
4530                 }
4531                 if (c === '') { // EOF
4532                         parse_error()
4533                         tok_state = tok_state_data
4534                         cur -= 1 // reconsume
4535                         return
4536                 }
4537                 // Anything else
4538                 return new_character_token(c)
4539         }
4540
4541         // 8.2.4.30 http://www.w3.org/TR/html5/syntax.html#script-data-double-escaped-dash-state
4542         tok_state_script_data_double_escaped_dash = function () {
4543                 var c
4544                 c = txt.charAt(cur++)
4545                 if (c === '-') {
4546                         tok_state = tok_state_script_data_double_escaped_dash_dash
4547                         return new_character_token('-')
4548                 }
4549                 if (c === '<') {
4550                         tok_state = tok_state_script_data_double_escaped_less_than_sign
4551                         return new_character_token('<')
4552                 }
4553                 if (c === "\u0000") {
4554                         parse_error()
4555                         tok_state = tok_state_script_data_double_escaped
4556                         return new_character_token("\ufffd")
4557                 }
4558                 if (c === '') { // EOF
4559                         parse_error()
4560                         tok_state = tok_state_data
4561                         cur -= 1 // reconsume
4562                         return
4563                 }
4564                 // Anything else
4565                 tok_state = tok_state_script_data_double_escaped
4566                 return new_character_token(c)
4567         }
4568
4569         // 8.2.4.31 http://www.w3.org/TR/html5/syntax.html#script-data-double-escaped-dash-dash-state
4570         tok_state_script_data_double_escaped_dash_dash = function () {
4571                 var c
4572                 c = txt.charAt(cur++)
4573                 if (c === '-') {
4574                         return new_character_token('-')
4575                 }
4576                 if (c === '<') {
4577                         tok_state = tok_state_script_data_double_escaped_less_than_sign
4578                         return new_character_token('<')
4579                 }
4580                 if (c === '>') {
4581                         tok_state = tok_state_script_data
4582                         return new_character_token('>')
4583                 }
4584                 if (c === "\u0000") {
4585                         parse_error()
4586                         tok_state = tok_state_script_data_double_escaped
4587                         return new_character_token("\ufffd")
4588                 }
4589                 if (c === '') { // EOF
4590                         parse_error()
4591                         tok_state = tok_state_data
4592                         cur -= 1 // reconsume
4593                         return
4594                 }
4595                 // Anything else
4596                 tok_state = tok_state_script_data_double_escaped
4597                 return new_character_token(c)
4598         }
4599
4600         // 8.2.4.32 http://www.w3.org/TR/html5/syntax.html#script-data-double-escaped-less-than-sign-state
4601         tok_state_script_data_double_escaped_less_than_sign = function () {
4602                 var c
4603                 c = txt.charAt(cur++)
4604                 if (c === '/') {
4605                         temporary_buffer = ''
4606                         tok_state = tok_state_script_data_double_escape_end
4607                         return new_character_token('/')
4608                 }
4609                 // Anything else
4610                 tok_state = tok_state_script_data_double_escaped
4611                 cur -= 1 // reconsume
4612         }
4613
4614         // 8.2.4.33 http://www.w3.org/TR/html5/syntax.html#script-data-double-escape-end-state
4615         tok_state_script_data_double_escape_end = function () {
4616                 var c
4617                 c = txt.charAt(cur++)
4618                 if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ' || c === '/' || c === '>') {
4619                         if (temporary_buffer === 'script') {
4620                                 tok_state = tok_state_script_data_escaped
4621                         } else {
4622                                 tok_state = tok_state_script_data_double_escaped
4623                         }
4624                         return new_character_token(c)
4625                 }
4626                 if (is_uc_alpha(c)) {
4627                         temporary_buffer += c.toLowerCase() // yes, really lowercase
4628                         return new_character_token(c)
4629                 }
4630                 if (is_lc_alpha(c)) {
4631                         temporary_buffer += c
4632                         return new_character_token(c)
4633                 }
4634                 // Anything else
4635                 tok_state = tok_state_script_data_double_escaped
4636                 cur -= 1 // reconsume
4637         }
4638
4639         // 8.2.4.34 http://www.w3.org/TR/html5/syntax.html#before-attribute-name-state
4640         tok_state_before_attribute_name = function () {
4641                 var attr_name, c, tmp
4642                 attr_name = null
4643                 switch (c = txt.charAt(cur++)) {
4644                         case "\t":
4645                         case "\n":
4646                         case "\u000c":
4647                         case ' ':
4648                                 return null
4649                         break
4650                         case '/':
4651                                 tok_state = tok_state_self_closing_start_tag
4652                                 return null
4653                         break
4654                         case '>':
4655                                 tok_state = tok_state_data
4656                                 tmp = tok_cur_tag
4657                                 tok_cur_tag = null
4658                                 return tmp
4659                         break
4660                         case "\u0000":
4661                                 parse_error()
4662                                 attr_name = "\ufffd"
4663                         break
4664                         case '"':
4665                         case "'":
4666                         case '<':
4667                         case '=':
4668                                 parse_error()
4669                                 attr_name = c
4670                         break
4671                         case '': // EOF
4672                                 parse_error()
4673                                 tok_state = tok_state_data
4674                         break
4675                         default:
4676                                 if (is_uc_alpha(c)) {
4677                                         attr_name = c.toLowerCase()
4678                                 } else {
4679                                         attr_name = c
4680                                 }
4681                 }
4682                 if (attr_name != null) {
4683                         tok_cur_tag.attrs_a.unshift([attr_name, ''])
4684                         tok_state = tok_state_attribute_name
4685                 }
4686                 return null
4687         }
4688
4689         // 8.2.4.35 http://www.w3.org/TR/html5/syntax.html#attribute-name-state
4690         tok_state_attribute_name = function () {
4691                 var c, tmp
4692                 switch (c = txt.charAt(cur++)) {
4693                         case "\t":
4694                         case "\n":
4695                         case "\u000c":
4696                         case ' ':
4697                                 tok_state = tok_state_after_attribute_name
4698                         break
4699                         case '/':
4700                                 tok_state = tok_state_self_closing_start_tag
4701                         break
4702                         case '=':
4703                                 tok_state = tok_state_before_attribute_value
4704                         break
4705                         case '>':
4706                                 tok_state = tok_state_data
4707                                 tmp = tok_cur_tag
4708                                 tok_cur_tag = null
4709                                 return tmp
4710                         break
4711                         case "\u0000":
4712                                 parse_error()
4713                                 tok_cur_tag.attrs_a[0][0] += "\ufffd"
4714                         break
4715                         case '"':
4716                         case "'":
4717                         case '<':
4718                                 parse_error()
4719                                 tok_cur_tag.attrs_a[0][0] += c
4720                         break
4721                         case '': // EOF
4722                                 parse_error()
4723                                 tok_state = tok_state_data
4724                         break
4725                         default:
4726                                 if (is_uc_alpha(c)) {
4727                                         tok_cur_tag.attrs_a[0][0] += c.toLowerCase()
4728                                 } else {
4729                                         tok_cur_tag.attrs_a[0][0] += c
4730                                 }
4731                 }
4732                 return null
4733         }
4734
4735         // 8.2.4.36 http://www.w3.org/TR/html5/syntax.html#after-attribute-name-state
4736         tok_state_after_attribute_name = function () {
4737                 var c
4738                 c = txt.charAt(cur++)
4739                 if (c === "\t" || c === "\n" || c === "\u000c" || c === ' ') {
4740                         return
4741                 }
4742                 if (c === '/') {
4743                         tok_state = tok_state_self_closing_start_tag
4744                         return
4745                 }
4746                 if (c === '=') {
4747                         tok_state = tok_state_before_attribute_value
4748                         return
4749                 }
4750                 if (c === '>') {
4751                         tok_state = tok_state_data
4752                         return tok_cur_tag
4753                 }
4754                 if (is_uc_alpha(c)) {
4755                         tok_cur_tag.attrs_a.unshift([c.toLowerCase(), ''])
4756                         tok_state = tok_state_attribute_name
4757                         return
4758                 }
4759                 if (c === "\u0000") {
4760                         parse_error()
4761                         tok_cur_tag.attrs_a.unshift(["\ufffd", ''])
4762                         tok_state = tok_state_attribute_name
4763                         return
4764                 }
4765                 if (c === '') { // EOF
4766                         parse_error()
4767                         tok_state = tok_state_data
4768                         cur -= 1 // reconsume
4769                         return
4770                 }
4771                 if (c === '"' || c === "'" || c === '<') {
4772                         parse_error()
4773                         // fall through to Anything else
4774                 }
4775                 // Anything else
4776                 tok_cur_tag.attrs_a.unshift([c, ''])
4777                 tok_state = tok_state_attribute_name
4778         }
4779
4780         // 8.2.4.37 http://www.w3.org/TR/html5/syntax.html#before-attribute-value-state
4781         tok_state_before_attribute_value = function () {
4782                 var c, tmp
4783                 switch (c = txt.charAt(cur++)) {
4784                         case "\t":
4785                         case "\n":
4786                         case "\u000c":
4787                         case ' ':
4788                                 return null
4789                         break
4790                         case '"':
4791                                 tok_state = tok_state_attribute_value_double_quoted
4792                         break
4793                         case '&':
4794                                 tok_state = tok_state_attribute_value_unquoted
4795                                 cur -= 1
4796                         break
4797                         case "'":
4798                                 tok_state = tok_state_attribute_value_single_quoted
4799                         break
4800                         case "\u0000":
4801                                 // Parse error
4802                                 tok_cur_tag.attrs_a[0][1] += "\ufffd"
4803                                 tok_state = tok_state_attribute_value_unquoted
4804                         break
4805                         case '>':
4806                                 // Parse error
4807                                 tok_state = tok_state_data
4808                                 tmp = tok_cur_tag
4809                                 tok_cur_tag = null
4810                                 return tmp
4811                         break
4812                         case '': // EOF
4813                                 parse_error()
4814                                 tok_state = tok_state_data
4815                         break
4816                         default:
4817                                 tok_cur_tag.attrs_a[0][1] += c
4818                                 tok_state = tok_state_attribute_value_unquoted
4819                 }
4820                 return null
4821         }
4822
4823         // 8.2.4.38 http://www.w3.org/TR/html5/syntax.html#attribute-value-(double-quoted)-state
4824         tok_state_attribute_value_double_quoted = function () {
4825                 var c
4826                 switch (c = txt.charAt(cur++)) {
4827                         case '"':
4828                                 tok_state = tok_state_after_attribute_value_quoted
4829                         break
4830                         case '&':
4831                                 tok_cur_tag.attrs_a[0][1] += parse_character_reference('"', true)
4832                         break
4833                         case "\u0000":
4834                                 // Parse error
4835                                 tok_cur_tag.attrs_a[0][1] += "\ufffd"
4836                         break
4837                         case '': // EOF
4838                                 parse_error()
4839                                 tok_state = tok_state_data
4840                         break
4841                         default:
4842                                 tok_cur_tag.attrs_a[0][1] += c
4843                 }
4844                 return null
4845         }
4846
4847         // 8.2.4.39 http://www.w3.org/TR/html5/syntax.html#attribute-value-(single-quoted)-state
4848         tok_state_attribute_value_single_quoted = function () {
4849                 var c
4850                 switch (c = txt.charAt(cur++)) {
4851                         case "'":
4852                                 tok_state = tok_state_after_attribute_value_quoted
4853                         break
4854                         case '&':
4855                                 tok_cur_tag.attrs_a[0][1] += parse_character_reference("'", true)
4856                         break
4857                         case "\u0000":
4858                                 // Parse error
4859                                 tok_cur_tag.attrs_a[0][1] += "\ufffd"
4860                         break
4861                         case '': // EOF
4862                                 parse_error()
4863                                 tok_state = tok_state_data
4864                         break
4865                         default:
4866                                 tok_cur_tag.attrs_a[0][1] += c
4867                 }
4868                 return null
4869         }
4870
4871         // 8.2.4.40 http://www.w3.org/TR/html5/syntax.html#attribute-value-(unquoted)-state
4872         tok_state_attribute_value_unquoted = function () {
4873                 var c, tmp
4874                 switch (c = txt.charAt(cur++)) {
4875                         case "\t":
4876                         case "\n":
4877                         case "\u000c":
4878                         case ' ':
4879                                 tok_state = tok_state_before_attribute_name
4880                         break
4881                         case '&':
4882                                 tok_cur_tag.attrs_a[0][1] += parse_character_reference('>', true)
4883                         break
4884                         case '>':
4885                                 tok_state = tok_state_data
4886                                 tmp = tok_cur_tag
4887                                 tok_cur_tag = null
4888                                 return tmp
4889                         break
4890                         case "\u0000":
4891                                 tok_cur_tag.attrs_a[0][1] += "\ufffd"
4892                         break
4893                         case '': // EOF
4894                                 parse_error()
4895                                 tok_state = tok_state_data
4896                         break
4897                         default:
4898                                 // Parse Error if ', <, = or ` (backtick)
4899                                 tok_cur_tag.attrs_a[0][1] += c
4900                 }
4901                 return null
4902         }
4903
4904         // 8.2.4.42 http://www.w3.org/TR/html5/syntax.html#after-attribute-value-(quoted)-state
4905         tok_state_after_attribute_value_quoted = function () {
4906                 var c, tmp
4907                 switch (c = txt.charAt(cur++)) {
4908                         case "\t":
4909                         case "\n":
4910                         case "\u000c":
4911                         case ' ':
4912                                 tok_state = tok_state_before_attribute_name
4913                         break
4914                         case '/':
4915                                 tok_state = tok_state_self_closing_start_tag
4916                         break
4917                         case '>':
4918                                 tok_state = tok_state_data
4919                                 tmp = tok_cur_tag
4920                                 tok_cur_tag = null
4921                                 return tmp
4922                         break
4923                         case '': // EOF
4924                                 parse_error()
4925                                 tok_state = tok_state_data
4926                         break
4927                         default:
4928                                 // Parse Error
4929                                 tok_state = tok_state_before_attribute_name
4930                                 cur -= 1 // we didn't handle that char
4931                 }
4932                 return null
4933         }
4934
4935         // 8.2.4.43 http://www.w3.org/TR/html5/syntax.html#self-closing-start-tag-state
4936         tok_state_self_closing_start_tag = function () {
4937                 var c
4938                 c = txt.charAt(cur++)
4939                 if (c === '>') {
4940                         tok_cur_tag.flag('self-closing', true)
4941                         tok_state = tok_state_data
4942                         return tok_cur_tag
4943                 }
4944                 if (c === '') {
4945                         parse_error()
4946                         tok_state = tok_state_data
4947                         cur -= 1 // reconsume
4948                         return
4949                 }
4950                 // Anything else
4951                 parse_error()
4952                 tok_state = tok_state_before_attribute_name
4953                 cur -= 1 // reconsume
4954         }
4955
4956         // 8.2.4.44 http://www.w3.org/TR/html5/syntax.html#bogus-comment-state
4957         // WARNING: put a comment token in tok_cur_tag before setting this state
4958         tok_state_bogus_comment = function () {
4959                 var next_gt, val
4960                 next_gt = txt.indexOf('>', cur)
4961                 if (next_gt === -1) {
4962                         val = txt.substr(cur)
4963                         cur = txt.length
4964                 } else {
4965                         val = txt.substr(cur, next_gt - cur)
4966                         cur = next_gt + 1
4967                 }
4968                 val = val.replace(new RegExp("\u0000", 'g'), "\ufffd")
4969                 tok_cur_tag.text += val
4970                 tok_state = tok_state_data
4971                 return tok_cur_tag
4972         }
4973
4974         // 8.2.4.45 http://www.w3.org/TR/html5/syntax.html#markup-declaration-open-state
4975         tok_state_markup_declaration_open = function () {
4976                 var acn
4977                 if (txt.substr(cur, 2) === '--') {
4978                         cur += 2
4979                         tok_cur_tag = new_comment_token('')
4980                         tok_state = tok_state_comment_start
4981                         return
4982                 }
4983                 if (txt.substr(cur, 7).toLowerCase() === 'doctype') {
4984                         cur += 7
4985                         tok_state = tok_state_doctype
4986                         return
4987                 }
4988                 acn = adjusted_current_node()
4989                 if (acn && acn.namespace !== NS_HTML && txt.substr(cur, 7) === '[CDATA[') {
4990                         cur += 7
4991                         tok_state = tok_state_cdata_section
4992                         return
4993                 }
4994                 // Otherwise
4995                 parse_error()
4996                 tok_cur_tag = new_comment_token('')
4997                 tok_state = tok_state_bogus_comment
4998         }
4999
5000         // 8.2.4.46 http://www.w3.org/TR/html5/syntax.html#comment-start-state
5001         tok_state_comment_start = function () {
5002                 var c
5003                 switch (c = txt.charAt(cur++)) {
5004                         case '-':
5005                                 tok_state = tok_state_comment_start_dash
5006                         break
5007                         case "\u0000":
5008                                 parse_error()
5009                                 tok_state = tok_state_comment
5010                                 return new_character_token("\ufffd")
5011                         break
5012                         case '>':
5013                                 parse_error()
5014                                 tok_state = tok_state_data
5015                                 return tok_cur_tag
5016                         break
5017                         case '': // EOF
5018                                 parse_error()
5019                                 tok_state = tok_state_data
5020                                 cur -= 1 // reconsume
5021                                 return tok_cur_tag
5022                         break
5023                         default:
5024                                 tok_cur_tag.text += c
5025                                 tok_state = tok_state_comment
5026                 }
5027                 return null
5028         }
5029
5030         // 8.2.4.47 http://www.w3.org/TR/html5/syntax.html#comment-start-dash-state
5031         tok_state_comment_start_dash = function () {
5032                 var c
5033                 switch (c = txt.charAt(cur++)) {
5034                         case '-':
5035                                 tok_state = tok_state_comment_end
5036                         break
5037                         case "\u0000":
5038                                 parse_error()
5039                                 tok_cur_tag.text += "-\ufffd"
5040                                 tok_state = tok_state_comment
5041                         break
5042                         case '>':
5043                                 parse_error()
5044                                 tok_state = tok_state_data
5045                                 return tok_cur_tag
5046                         break
5047                         case '': // EOF
5048                                 parse_error()
5049                                 tok_state = tok_state_data
5050                                 cur -= 1 // reconsume
5051                                 return tok_cur_tag
5052                         break
5053                         default:
5054                                 tok_cur_tag.text += "-" + c
5055                                 tok_state = tok_state_comment
5056                 }
5057                 return null
5058         }
5059
5060         // 8.2.4.48 http://www.w3.org/TR/html5/syntax.html#comment-state
5061         tok_state_comment = function () {
5062                 var c
5063                 switch (c = txt.charAt(cur++)) {
5064                         case '-':
5065                                 tok_state = tok_state_comment_end_dash
5066                         break
5067                         case "\u0000":
5068                                 parse_error()
5069                                 tok_cur_tag.text += "\ufffd"
5070                         break
5071                         case '': // EOF
5072                                 parse_error()
5073                                 tok_state = tok_state_data
5074                                 cur -= 1 // reconsume
5075                                 return tok_cur_tag
5076                         break
5077                         default:
5078                                 tok_cur_tag.text += c
5079                 }
5080                 return null
5081         }
5082
5083         // 8.2.4.49 http://www.w3.org/TR/html5/syntax.html#comment-end-dash-state
5084         tok_state_comment_end_dash = function () {
5085                 var c
5086                 switch (c = txt.charAt(cur++)) {
5087                         case '-':
5088                                 tok_state = tok_state_comment_end
5089                         break
5090                         case "\u0000":
5091                                 parse_error()
5092                                 tok_cur_tag.text += "-\ufffd"
5093                                 tok_state = tok_state_comment
5094                         break
5095                         case '': // EOF
5096                                 parse_error()
5097                                 tok_state = tok_state_data
5098                                 cur -= 1 // reconsume
5099                                 return tok_cur_tag
5100                         break
5101                         default:
5102                                 tok_cur_tag.text += "-" + c
5103                                 tok_state = tok_state_comment
5104                 }
5105                 return null
5106         }
5107
5108         // 8.2.4.50 http://www.w3.org/TR/html5/syntax.html#comment-end-state
5109         tok_state_comment_end = function () {
5110                 var c
5111                 switch (c = txt.charAt(cur++)) {
5112                         case '>':
5113                                 tok_state = tok_state_data
5114                                 return tok_cur_tag
5115                         break
5116                         case "\u0000":
5117                                 parse_error()
5118                                 tok_cur_tag.text += "--\ufffd"
5119                                 tok_state = tok_state_comment
5120                         break
5121                         case '!':
5122                                 parse_error()
5123                                 tok_state = tok_state_comment_end_bang
5124                         break
5125                         case '-':
5126                                 parse_error()
5127                                 tok_cur_tag.text += '-'
5128                         break
5129                         case '': // EOF
5130                                 parse_error()
5131                                 tok_state = tok_state_data
5132                                 cur -= 1 // reconsume
5133                                 return tok_cur_tag
5134                         break
5135                         default:
5136                                 parse_error()
5137                                 tok_cur_tag.text += "--" + c
5138                                 tok_state = tok_state_comment
5139                 }
5140                 return null
5141         }
5142
5143         // 8.2.4.51 http://www.w3.org/TR/html5/syntax.html#comment-end-bang-state
5144         tok_state_comment_end_bang = function () {
5145                 var c
5146                 switch (c = txt.charAt(cur++)) {
5147                         case '-':
5148                                 tok_cur_tag.text += "--!" + c
5149                                 tok_state = tok_state_comment_end_dash
5150                         break
5151                         case '>':
5152                                 tok_state = tok_state_data
5153                                 return tok_cur_tag
5154                         break
5155                         case "\u0000":
5156                                 parse_error()
5157                                 tok_cur_tag.text += "--!\ufffd"
5158                                 tok_state = tok_state_comment
5159                         break
5160                         case '': // EOF
5161                                 parse_error()
5162                                 tok_state = tok_state_data
5163                                 cur -= 1 // reconsume
5164                                 return tok_cur_tag
5165                         break
5166                         default:
5167                                 tok_cur_tag.text += "--!" + c
5168                                 tok_state = tok_state_comment
5169                 }
5170                 return null
5171         }
5172
5173         // 8.2.4.52 http://www.w3.org/TR/html5/syntax.html#doctype-state
5174         tok_state_doctype = function () {
5175                 var c, el
5176                 switch (c = txt.charAt(cur++)) {
5177                         case "\t":
5178                         case "\u000a":
5179                         case "\u000c":
5180                         case ' ':
5181                                 tok_state = tok_state_before_doctype_name
5182                         break
5183                         case '': // EOF
5184                                 parse_error()
5185                                 tok_state = tok_state_data
5186                                 el = new_doctype_token('')
5187                                 el.flag('force-quirks', true)
5188                                 cur -= 1 // reconsume
5189                                 return el
5190                         break
5191                         default:
5192                                 parse_error()
5193                                 tok_state = tok_state_before_doctype_name
5194                                 cur -= 1 // reconsume
5195                 }
5196                 return null
5197         }
5198
5199         // 8.2.4.52 http://www.w3.org/TR/html5/syntax.html#doctype-state
5200         tok_state_before_doctype_name = function () {
5201                 var c, el
5202                 c = txt.charAt(cur++)
5203                 if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ') {
5204                         return
5205                 }
5206                 if (is_uc_alpha(c)) {
5207                         tok_cur_tag = new_doctype_token(c.toLowerCase())
5208                         tok_state = tok_state_doctype_name
5209                         return
5210                 }
5211                 if (c === "\u0000") {
5212                         parse_error()
5213                         tok_cur_tag = new_doctype_token("\ufffd")
5214                         tok_state = tok_state_doctype_name
5215                         return
5216                 }
5217                 if (c === '>') {
5218                         parse_error()
5219                         el = new_doctype_token('')
5220                         el.flag('force-quirks', true)
5221                         tok_state = tok_state_data
5222                         return el
5223                 }
5224                 if (c === '') { // EOF
5225                         parse_error()
5226                         tok_state = tok_state_data
5227                         el = new_doctype_token('')
5228                         el.flag('force-quirks', true)
5229                         cur -= 1 // reconsume
5230                         return el
5231                 }
5232                 // Anything else
5233                 tok_cur_tag = new_doctype_token(c)
5234                 tok_state = tok_state_doctype_name
5235                 return null
5236         }
5237
5238         // 8.2.4.54 http://www.w3.org/TR/html5/syntax.html#doctype-name-state
5239         tok_state_doctype_name = function () {
5240                 var c
5241                 c = txt.charAt(cur++)
5242                 if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ') {
5243                         tok_state = tok_state_after_doctype_name
5244                         return
5245                 }
5246                 if (c === '>') {
5247                         tok_state = tok_state_data
5248                         return tok_cur_tag
5249                 }
5250                 if (is_uc_alpha(c)) {
5251                         tok_cur_tag.name += c.toLowerCase()
5252                         return
5253                 }
5254                 if (c === "\u0000") {
5255                         parse_error()
5256                         tok_cur_tag.name += "\ufffd"
5257                         return
5258                 }
5259                 if (c === '') { // EOF
5260                         parse_error()
5261                         tok_state = tok_state_data
5262                         tok_cur_tag.flag('force-quirks', true)
5263                         cur -= 1 // reconsume
5264                         return tok_cur_tag
5265                 }
5266                 // Anything else
5267                 tok_cur_tag.name += c
5268                 return null
5269         }
5270
5271         // 8.2.4.55 http://www.w3.org/TR/html5/syntax.html#after-doctype-name-state
5272         tok_state_after_doctype_name = function () {
5273                 var c
5274                 c = txt.charAt(cur++)
5275                 if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ') {
5276                         return
5277                 }
5278                 if (c === '>') {
5279                         tok_state = tok_state_data
5280                         return tok_cur_tag
5281                 }
5282                 if (c === '') { // EOF
5283                         parse_error()
5284                         tok_state = tok_state_data
5285                         tok_cur_tag.flag('force-quirks', true)
5286                         cur -= 1 // reconsume
5287                         return tok_cur_tag
5288                 }
5289                 // Anything else
5290                 if (txt.substr(cur - 1, 6).toLowerCase() === 'public') {
5291                         cur += 5
5292                         tok_state = tok_state_after_doctype_public_keyword
5293                         return
5294                 }
5295                 if (txt.substr(cur - 1, 6).toLowerCase() === 'system') {
5296                         cur += 5
5297                         tok_state = tok_state_after_doctype_system_keyword
5298                         return
5299                 }
5300                 parse_error()
5301                 tok_cur_tag.flag('force-quirks', true)
5302                 tok_state = tok_state_bogus_doctype
5303                 return null
5304         }
5305
5306         // 8.2.4.56 http://www.w3.org/TR/html5/syntax.html#after-doctype-public-keyword-state
5307         tok_state_after_doctype_public_keyword = function () {
5308                 var c
5309                 c = txt.charAt(cur++)
5310                 if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ') {
5311                         tok_state = tok_state_before_doctype_public_identifier
5312                         return
5313                 }
5314                 if (c === '"') {
5315                         parse_error()
5316                         tok_cur_tag.public_identifier = ''
5317                         tok_state = tok_state_doctype_public_identifier_double_quoted
5318                         return
5319                 }
5320                 if (c === "'") {
5321                         parse_error()
5322                         tok_cur_tag.public_identifier = ''
5323                         tok_state = tok_state_doctype_public_identifier_single_quoted
5324                         return
5325                 }
5326                 if (c === '>') {
5327                         parse_error()
5328                         tok_cur_tag.flag('force-quirks', true)
5329                         tok_state = tok_state_data
5330                         return tok_cur_tag
5331                 }
5332                 if (c === '') { // EOF
5333                         parse_error()
5334                         tok_state = tok_state_data
5335                         tok_cur_tag.flag('force-quirks', true)
5336                         cur -= 1 // reconsume
5337                         return tok_cur_tag
5338                 }
5339                 // Anything else
5340                 parse_error()
5341                 tok_cur_tag.flag('force-quirks', true)
5342                 tok_state = tok_state_bogus_doctype
5343                 return null
5344         }
5345
5346         // 8.2.4.57 http://www.w3.org/TR/html5/syntax.html#before-doctype-public-identifier-state
5347         tok_state_before_doctype_public_identifier = function () {
5348                 var c
5349                 c = txt.charAt(cur++)
5350                 if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ') {
5351                         return
5352                 }
5353                 if (c === '"') {
5354                         parse_error()
5355                         tok_cur_tag.public_identifier = ''
5356                         tok_state = tok_state_doctype_public_identifier_double_quoted
5357                         return
5358                 }
5359                 if (c === "'") {
5360                         parse_error()
5361                         tok_cur_tag.public_identifier = ''
5362                         tok_state = tok_state_doctype_public_identifier_single_quoted
5363                         return
5364                 }
5365                 if (c === '>') {
5366                         parse_error()
5367                         tok_cur_tag.flag('force-quirks', true)
5368                         tok_state = tok_state_data
5369                         return tok_cur_tag
5370                 }
5371                 if (c === '') { // EOF
5372                         parse_error()
5373                         tok_state = tok_state_data
5374                         tok_cur_tag.flag('force-quirks', true)
5375                         cur -= 1 // reconsume
5376                         return tok_cur_tag
5377                 }
5378                 // Anything else
5379                 parse_error()
5380                 tok_cur_tag.flag('force-quirks', true)
5381                 tok_state = tok_state_bogus_doctype
5382                 return null
5383         }
5384
5385
5386         // 8.2.4.58 http://www.w3.org/TR/html5/syntax.html#doctype-public-identifier-(double-quoted)-state
5387         tok_state_doctype_public_identifier_double_quoted = function () {
5388                 var c
5389                 c = txt.charAt(cur++)
5390                 if (c === '"') {
5391                         tok_state = tok_state_after_doctype_public_identifier
5392                         return
5393                 }
5394                 if (c === "\u0000") {
5395                         parse_error()
5396                         tok_cur_tag.public_identifier += "\ufffd"
5397                         return
5398                 }
5399                 if (c === '>') {
5400                         parse_error()
5401                         tok_cur_tag.flag('force-quirks', true)
5402                         tok_state = tok_state_data
5403                         return tok_cur_tag
5404                 }
5405                 if (c === '') { // EOF
5406                         parse_error()
5407                         tok_state = tok_state_data
5408                         tok_cur_tag.flag('force-quirks', true)
5409                         cur -= 1 // reconsume
5410                         return tok_cur_tag
5411                 }
5412                 // Anything else
5413                 tok_cur_tag.public_identifier += c
5414                 return null
5415         }
5416
5417         // 8.2.4.59 http://www.w3.org/TR/html5/syntax.html#doctype-public-identifier-(single-quoted)-state
5418         tok_state_doctype_public_identifier_single_quoted = function () {
5419                 var c
5420                 c = txt.charAt(cur++)
5421                 if (c === "'") {
5422                         tok_state = tok_state_after_doctype_public_identifier
5423                         return
5424                 }
5425                 if (c === "\u0000") {
5426                         parse_error()
5427                         tok_cur_tag.public_identifier += "\ufffd"
5428                         return
5429                 }
5430                 if (c === '>') {
5431                         parse_error()
5432                         tok_cur_tag.flag('force-quirks', true)
5433                         tok_state = tok_state_data
5434                         return tok_cur_tag
5435                 }
5436                 if (c === '') { // EOF
5437                         parse_error()
5438                         tok_state = tok_state_data
5439                         tok_cur_tag.flag('force-quirks', true)
5440                         cur -= 1 // reconsume
5441                         return tok_cur_tag
5442                 }
5443                 // Anything else
5444                 tok_cur_tag.public_identifier += c
5445                 return null
5446         }
5447
5448         // 8.2.4.60 http://www.w3.org/TR/html5/syntax.html#after-doctype-public-identifier-state
5449         tok_state_after_doctype_public_identifier = function () {
5450                 var c
5451                 c = txt.charAt(cur++)
5452                 if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ') {
5453                         tok_state = tok_state_between_doctype_public_and_system_identifiers
5454                         return
5455                 }
5456                 if (c === '>') {
5457                         tok_state = tok_state_data
5458                         return tok_cur_tag
5459                 }
5460                 if (c === '"') {
5461                         parse_error()
5462                         tok_cur_tag.system_identifier = ''
5463                         tok_state = tok_state_doctype_system_identifier_double_quoted
5464                         return
5465                 }
5466                 if (c === "'") {
5467                         parse_error()
5468                         tok_cur_tag.system_identifier = ''
5469                         tok_state = tok_state_doctype_system_identifier_single_quoted
5470                         return
5471                 }
5472                 if (c === '') { // EOF
5473                         parse_error()
5474                         tok_state = tok_state_data
5475                         tok_cur_tag.flag('force-quirks', true)
5476                         cur -= 1 // reconsume
5477                         return tok_cur_tag
5478                 }
5479                 // Anything else
5480                 parse_error()
5481                 tok_cur_tag.flag('force-quirks', true)
5482                 tok_state = tok_state_bogus_doctype
5483                 return null
5484         }
5485
5486         // 8.2.4.61 http://www.w3.org/TR/html5/syntax.html#between-doctype-public-&&-system-identifiers-state
5487         tok_state_between_doctype_public_and_system_identifiers = function () {
5488                 var c
5489                 c = txt.charAt(cur++)
5490                 if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ') {
5491                         return
5492                 }
5493                 if (c === '>') {
5494                         tok_state = tok_state_data
5495                         return tok_cur_tag
5496                 }
5497                 if (c === '"') {
5498                         parse_error()
5499                         tok_cur_tag.system_identifier = ''
5500                         tok_state = tok_state_doctype_system_identifier_double_quoted
5501                         return
5502                 }
5503                 if (c === "'") {
5504                         parse_error()
5505                         tok_cur_tag.system_identifier = ''
5506                         tok_state = tok_state_doctype_system_identifier_single_quoted
5507                         return
5508                 }
5509                 if (c === '') { // EOF
5510                         parse_error()
5511                         tok_state = tok_state_data
5512                         tok_cur_tag.flag('force-quirks', true)
5513                         cur -= 1 // reconsume
5514                         return tok_cur_tag
5515                 }
5516                 // Anything else
5517                 parse_error()
5518                 tok_cur_tag.flag('force-quirks', true)
5519                 tok_state = tok_state_bogus_doctype
5520                 return null
5521         }
5522
5523         // 8.2.4.62 http://www.w3.org/TR/html5/syntax.html#after-doctype-system-keyword-state
5524         tok_state_after_doctype_system_keyword = function () {
5525                 var c
5526                 c = txt.charAt(cur++)
5527                 if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ') {
5528                         tok_state = tok_state_before_doctype_system_identifier
5529                         return
5530                 }
5531                 if (c === '"') {
5532                         parse_error()
5533                         tok_cur_tag.system_identifier = ''
5534                         tok_state = tok_state_doctype_system_identifier_double_quoted
5535                         return
5536                 }
5537                 if (c === "'") {
5538                         parse_error()
5539                         tok_cur_tag.system_identifier = ''
5540                         tok_state = tok_state_doctype_system_identifier_single_quoted
5541                         return
5542                 }
5543                 if (c === '>') {
5544                         parse_error()
5545                         tok_cur_tag.flag('force-quirks', true)
5546                         tok_state = tok_state_data
5547                         return tok_cur_tag
5548                 }
5549                 if (c === '') { // EOF
5550                         parse_error()
5551                         tok_state = tok_state_data
5552                         tok_cur_tag.flag('force-quirks', true)
5553                         cur -= 1 // reconsume
5554                         return tok_cur_tag
5555                 }
5556                 // Anything else
5557                 parse_error()
5558                 tok_cur_tag.flag('force-quirks', true)
5559                 tok_state = tok_state_bogus_doctype
5560                 return null
5561         }
5562
5563         // 8.2.4.63 http://www.w3.org/TR/html5/syntax.html#before-doctype-system-identifier-state
5564         tok_state_before_doctype_system_identifier = function () {
5565                 var c
5566                 c = txt.charAt(cur++)
5567                 if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ') {
5568                         return
5569                 }
5570                 if (c === '"') {
5571                         tok_cur_tag.system_identifier = ''
5572                         tok_state = tok_state_doctype_system_identifier_double_quoted
5573                         return
5574                 }
5575                 if (c === "'") {
5576                         tok_cur_tag.system_identifier = ''
5577                         tok_state = tok_state_doctype_system_identifier_single_quoted
5578                         return
5579                 }
5580                 if (c === '>') {
5581                         parse_error()
5582                         tok_cur_tag.flag('force-quirks', true)
5583                         tok_state = tok_state_data
5584                         return tok_cur_tag
5585                 }
5586                 if (c === '') { // EOF
5587                         parse_error()
5588                         tok_state = tok_state_data
5589                         tok_cur_tag.flag('force-quirks', true)
5590                         cur -= 1 // reconsume
5591                         return tok_cur_tag
5592                 }
5593                 // Anything else
5594                 parse_error()
5595                 tok_cur_tag.flag('force-quirks', true)
5596                 tok_state = tok_state_bogus_doctype
5597                 return null
5598         }
5599
5600         // 8.2.4.64 http://www.w3.org/TR/html5/syntax.html#doctype-system-identifier-(double-quoted)-state
5601         tok_state_doctype_system_identifier_double_quoted = function () {
5602                 var c
5603                 c = txt.charAt(cur++)
5604                 if (c === '"') {
5605                         tok_state = tok_state_after_doctype_system_identifier
5606                         return
5607                 }
5608                 if (c === "\u0000") {
5609                         parse_error()
5610                         tok_cur_tag.system_identifier += "\ufffd"
5611                         return
5612                 }
5613                 if (c === '>') {
5614                         parse_error()
5615                         tok_cur_tag.flag('force-quirks', true)
5616                         tok_state = tok_state_data
5617                         return tok_cur_tag
5618                 }
5619                 if (c === '') { // EOF
5620                         parse_error()
5621                         tok_state = tok_state_data
5622                         tok_cur_tag.flag('force-quirks', true)
5623                         cur -= 1 // reconsume
5624                         return tok_cur_tag
5625                 }
5626                 // Anything else
5627                 tok_cur_tag.system_identifier += c
5628                 return null
5629         }
5630
5631         // 8.2.4.65 http://www.w3.org/TR/html5/syntax.html#doctype-system-identifier-(single-quoted)-state
5632         tok_state_doctype_system_identifier_single_quoted = function () {
5633                 var c
5634                 c = txt.charAt(cur++)
5635                 if (c === "'") {
5636                         tok_state = tok_state_after_doctype_system_identifier
5637                         return
5638                 }
5639                 if (c === "\u0000") {
5640                         parse_error()
5641                         tok_cur_tag.system_identifier += "\ufffd"
5642                         return
5643                 }
5644                 if (c === '>') {
5645                         parse_error()
5646                         tok_cur_tag.flag('force-quirks', true)
5647                         tok_state = tok_state_data
5648                         return tok_cur_tag
5649                 }
5650                 if (c === '') { // EOF
5651                         parse_error()
5652                         tok_state = tok_state_data
5653                         tok_cur_tag.flag('force-quirks', true)
5654                         cur -= 1 // reconsume
5655                         return tok_cur_tag
5656                 }
5657                 // Anything else
5658                 tok_cur_tag.system_identifier += c
5659                 return null
5660         }
5661
5662         // 8.2.4.66 http://www.w3.org/TR/html5/syntax.html#after-doctype-system-identifier-state
5663         tok_state_after_doctype_system_identifier = function () {
5664                 var c
5665                 c = txt.charAt(cur++)
5666                 if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ') {
5667                         return
5668                 }
5669                 if (c === '>') {
5670                         tok_state = tok_state_data
5671                         return tok_cur_tag
5672                 }
5673                 if (c === '') { // EOF
5674                         parse_error()
5675                         tok_state = tok_state_data
5676                         tok_cur_tag.flag('force-quirks', true)
5677                         cur -= 1 // reconsume
5678                         return tok_cur_tag
5679                 }
5680                 // Anything else
5681                 parse_error()
5682                 // do _not_ tok_cur_tag.flag 'force-quirks', true
5683                 tok_state = tok_state_bogus_doctype
5684                 return null
5685         }
5686
5687         // 8.2.4.67 http://www.w3.org/TR/html5/syntax.html#bogus-doctype-state
5688         tok_state_bogus_doctype = function () {
5689                 var c
5690                 c = txt.charAt(cur++)
5691                 if (c === '>') {
5692                         tok_state = tok_state_data
5693                         return tok_cur_tag
5694                 }
5695                 if (c === '') { // EOF
5696                         tok_state = tok_state_data
5697                         cur -= 1 // reconsume
5698                         return tok_cur_tag
5699                 }
5700                 // Anything else
5701                 return null
5702         }
5703
5704         // 8.2.4.68 http://www.w3.org/TR/html5/syntax.html#cdata-section-state
5705         tok_state_cdata_section = function () {
5706                 var next_gt, val
5707                 tok_state = tok_state_data
5708                 next_gt = txt.indexOf(']]>', cur)
5709                 if (next_gt === -1) {
5710                         val = txt.substr(cur)
5711                         cur = txt.length
5712                 } else {
5713                         val = txt.substr(cur, next_gt - cur)
5714                         cur = next_gt + 3
5715                 }
5716                 val = val.replace(new RegExp("\u0000", 'g'), "\ufffd")
5717                 if (val.length > 0) {
5718                         return new_character_token(val) // fixfull split
5719                 }
5720                 return null
5721         }
5722
5723         // 8.2.4.69 http://www.w3.org/TR/html5/syntax.html#consume-a-character-reference
5724         // Don't set this as a state, just call it
5725         // returns a string (NOT a text node)
5726         parse_character_reference = function (allowed_char, in_attr) {
5727                 var base, c, charset, code_point, decoded, i, max, start
5728                 if (allowed_char == null) {
5729                         allowed_char = null
5730                 }
5731                 if (in_attr == null) {
5732                         in_attr = false
5733                 }
5734                 if (cur >= txt.length) {
5735                         return '&'
5736                 }
5737                 switch (c = txt.charAt(cur)) {
5738                         case "\t":
5739                         case "\n":
5740                         case "\u000c":
5741                         case ' ':
5742                         case '<':
5743                         case '&':
5744                         case '':
5745                         case allowed_char:
5746                                 // explicitly not a parse error
5747                                 return '&'
5748                         break
5749                         case ';':
5750                                 // there has to be "one or more" alnums between & and ; to be a parse error
5751                                 return '&'
5752                         break
5753                         case '#':
5754                                 if (cur + 1 >= txt.length) {
5755                                         return '&'
5756                                 }
5757                                 if (txt.charAt(cur + 1).toLowerCase() === 'x') {
5758                                         base = 16
5759                                         charset = hex_chars
5760                                         start = cur + 2
5761                                 } else {
5762                                         charset = digits
5763                                         start = cur + 1
5764                                         base = 10
5765                                 }
5766                                 i = 0
5767                                 while (start + i < txt.length && charset.indexOf(txt.charAt(start + i)) > -1) {
5768                                         i += 1
5769                                 }
5770                                 if (i === 0) {
5771                                         return '&'
5772                                 }
5773                                 cur = start + i
5774                                 if (txt.charAt(start + i) === ';') {
5775                                         cur += 1
5776                                 } else {
5777                                         parse_error()
5778                                 }
5779                                 code_point = txt.substr(start, i)
5780                                 while (code_point.charAt(0) === '0' && code_point.length > 1) {
5781                                         code_point = code_point.substr(1)
5782                                 }
5783                                 code_point = parseInt(code_point, base)
5784                                 if (unicode_fixes[code_point] != null) {
5785                                         parse_error()
5786                                         return unicode_fixes[code_point]
5787                                 } else {
5788                                         if ((code_point >= 0xd800 && code_point <= 0xdfff) || code_point > 0x10ffff) {
5789                                                 parse_error()
5790                                                 return "\ufffd"
5791                                         } else {
5792                                                 if ((code_point >= 0x0001 && code_point <= 0x0008) || (code_point >= 0x000D && code_point <= 0x001F) || (code_point >= 0x007F && code_point <= 0x009F) || (code_point >= 0xFDD0 && code_point <= 0xFDEF) || code_point === 0x000B || code_point === 0xFFFE || code_point === 0xFFFF || code_point === 0x1FFFE || code_point === 0x1FFFF || code_point === 0x2FFFE || code_point === 0x2FFFF || code_point === 0x3FFFE || code_point === 0x3FFFF || code_point === 0x4FFFE || code_point === 0x4FFFF || code_point === 0x5FFFE || code_point === 0x5FFFF || code_point === 0x6FFFE || code_point === 0x6FFFF || code_point === 0x7FFFE || code_point === 0x7FFFF || code_point === 0x8FFFE || code_point === 0x8FFFF || code_point === 0x9FFFE || code_point === 0x9FFFF || code_point === 0xAFFFE || code_point === 0xAFFFF || code_point === 0xBFFFE || code_point === 0xBFFFF || code_point === 0xCFFFE || code_point === 0xCFFFF || code_point === 0xDFFFE || code_point === 0xDFFFF || code_point === 0xEFFFE || code_point === 0xEFFFF || code_point === 0xFFFFE || code_point === 0xFFFFF || code_point === 0x10FFFE || code_point === 0x10FFFF) {
5793                                                         parse_error()
5794                                                 }
5795                                                 return from_code_point(code_point)
5796                                         }
5797                                 }
5798                                 return
5799                         break
5800                         default:
5801                                 for (i = 0; i < 31; ++i) {
5802                                         if (alnum.indexOf(txt.charAt(cur + i)) === -1) {
5803                                                 break
5804                                         }
5805                                 }
5806                                 if (i === 0) {
5807                                         // exit early, because parse_error() below needs at least one alnum
5808                                         return '&'
5809                                 }
5810                                 if (txt.charAt(cur + i) === ';') {
5811                                         decoded = decode_named_char_ref(txt.substr(cur, i))
5812                                         i += 1 // scan past the ';' (after, so we dno't pass it to decode)
5813                                         if (decoded != null) {
5814                                                 cur += i
5815                                                 return decoded
5816                                         }
5817                                         // else FALL THROUGH (check for match without last char(s) or ";")
5818                                 }
5819                                 // no ';' terminator (only legacy char refs)
5820                                 max = i
5821                                 for (i = 2; i <= max; ++i) { // no prefix matches, so ok to check shortest first
5822                                         c = legacy_char_refs[txt.substr(cur, i)]
5823                                         if (c != null) {
5824                                                 if (in_attr) {
5825                                                         if (txt.charAt(cur + i) === '=') {
5826                                                                 // "because some legacy user agents will
5827                                                                 // misinterpret the markup in those cases"
5828                                                                 parse_error()
5829                                                                 return '&'
5830                                                         }
5831                                                         if (alnum.indexOf(txt.charAt(cur + i)) > -1) {
5832                                                                 // this makes attributes forgiving about url args
5833                                                                 return '&'
5834                                                         }
5835                                                 }
5836                                                 // ok, and besides the weird exceptions for attributes...
5837                                                 // return the matching char
5838                                                 cur += i // consume entity chars
5839                                                 parse_error() // because no terminating ";"
5840                                                 return c
5841                                         }
5842                                 }
5843                                 parse_error()
5844                                 return '&'
5845                 }
5846                 // never reached
5847         }
5848
5849         eat_next_token_if_newline = function () {
5850                 var old_cur, t
5851                 old_cur = cur
5852                 t = null
5853                 while (t == null) {
5854                         t = tok_state()
5855                 }
5856                 if (t.type === TYPE_TEXT) {
5857                         // definition of a newline depends on whether it was a character ref or not
5858                         if (cur - old_cur === 1) {
5859                                 // not a character reference
5860                                 if (t.text === "\u000d" || t.text === "\u000a") {
5861                                         return
5862                                 }
5863                         } else {
5864                                 if (t.text === "\u000a") {
5865                                         return
5866                                 }
5867                         }
5868                 }
5869                 // not a "newline"
5870                 cur = old_cur
5871         }
5872
5873         // tree constructor initialization
5874         // see comments on TYPE_TAG/etc for the structure of this data
5875         txt = args_html
5876         cur = 0
5877         doc = new Node(TYPE_TAG, {name: 'document', namespace: NS_HTML})
5878         doc.flag('quirks mode', QUIRKS_NO) // TODO bugreport spec for not specifying this
5879         fragment_root = null // fragment parsing algorithm returns children of this
5880         open_els = []
5881         afe = [] // active formatting elements
5882         template_ins_modes = []
5883         ins_mode = ins_mode_initial
5884         original_ins_mode = ins_mode // TODO check spec
5885         flag_scripting = args.scripting != null ? args.scripting : true // TODO might need an extra flag to get <noscript> to parse correctly
5886         flag_frameset_ok = true
5887         flag_parsing = true
5888         flag_foster_parenting = false
5889         form_element_pointer = null
5890         temporary_buffer = null
5891         pending_table_character_tokens = []
5892         head_element_pointer = null
5893         flag_fragment_parsing = false
5894         context_element = null
5895         prev_node_id = 0 // just for debugging
5896
5897         // tokenizer initialization
5898         tok_state = tok_state_data
5899
5900         parse_init = function () {
5901                 var el, f, ns, old_doc, t
5902                 // fragment parsing (text arg)
5903                 if (args.fragment != null) {
5904                         // this handles the fragment from the tests in the format described here:
5905                         // https://github.com/html5lib/html5lib-tests/blob/master/tree-construction/README.md
5906                         f = args.fragment
5907                         ns = NS_HTML
5908                         if (f.substr(0, 5) === 'math ') {
5909                                 f = f.substr(5)
5910                                 ns = NS_MATHML
5911                         } else if (f.substr(0, 4) === 'svg ') {
5912                                 f = f.substr(4)
5913                                 ns = NS_SVG
5914                         }
5915                         t = new_open_tag(f)
5916                         context_element = token_to_element(t, ns)
5917                         context_element.document = new Node(TYPE_TAG, {name: 'document', namespace: NS_HTML})
5918                         context_element.document.flag('quirks mode', QUIRKS_NO)
5919                 }
5920                 // fragment parsing (Node arg)
5921                 if (args.context != null) {
5922                         context_element = args.context
5923                 }
5924
5925                 // http://www.w3.org/TR/html5/syntax.html#parsing-html-fragments
5926                 // fragment parsing algorithm
5927                 if (context_element != null) {
5928                         flag_fragment_parsing = true
5929                         doc = new Node(TYPE_TAG, {name: 'html', namespace: NS_HTML})
5930                         // search up the tree from context, to try to find it's document,
5931                         // because this file only puts a "document" property on the root
5932                         // element.
5933                         old_doc = null
5934                         el = context_element
5935                         while (true) {
5936                                 if (el.document != null) {
5937                                         old_doc = el.document
5938                                         break
5939                                 }
5940                                 if (el.parent) {
5941                                         el = el.parent
5942                                 } else {
5943                                         break
5944                                 }
5945                         }
5946                         if (old_doc) {
5947                                 doc.flag('quirks mode', old_doc.flag('quirks mode'))
5948                         }
5949                         // set tok_state
5950                         if (context_element.namespace === NS_HTML) {
5951                                 switch (context_element.name) {
5952                                         case 'title':
5953                                         case 'textarea':
5954                                                 tok_state = tok_state_rcdata
5955                                         break
5956                                         case 'style':
5957                                         case 'xmp':
5958                                         case 'iframe':
5959                                         case 'noembed':
5960                                         case 'noframes':
5961                                                 tok_state = tok_state_rawtext
5962                                         break
5963                                         case 'script':
5964                                                 tok_state = tok_state_script_data
5965                                         break
5966                                         case 'noscript':
5967                                                 if (flag_scripting) {
5968                                                         tok_state = tok_state_rawtext
5969                                                 }
5970                                         break
5971                                         case 'plaintext':
5972                                                 tok_state = tok_state_plaintext
5973                                 }
5974                         }
5975                         fragment_root = new Node(TYPE_TAG, {name: 'html', namespace: NS_HTML})
5976                         doc.children.push(fragment_root)
5977                         fragment_root.document = doc
5978                         open_els = [fragment_root]
5979                         if (context_element.name === 'template' && context_element.namespace === NS_HTML) {
5980                                 template_ins_modes.unshift(ins_mode_in_template)
5981                         }
5982                         // fixfull create token for context (it should have it's original one already)
5983                         reset_ins_mode()
5984                         // set form_element pointer... in the foreign doc?!
5985                         el = context_element
5986                         while (true) {
5987                                 if (el.name === 'form' && el.namespace === NS_HTML) {
5988                                         form_element_pointer = el
5989                                         break
5990                                 }
5991                                 if (el.parent) {
5992                                         el = el.parent
5993                                 } else {
5994                                         break
5995                                 }
5996                         }
5997                 }
5998
5999                 // text pre-processing
6000                 // FIXME check http://www.w3.org/TR/html5/syntax.html#preprocessing-the-input-stream
6001                 txt = txt.replace(new RegExp("\r\n", 'g'), "\n") // fixfull spec doesn't say this
6002                 txt = txt.replace(new RegExp("\r", 'g'), "\n") // fixfull spec doesn't say this
6003         }
6004
6005         // http://www.w3.org/TR/html5/syntax.html#tree-construction
6006         parse_main_loop = function () {
6007                 var t
6008                 while (flag_parsing) {
6009                         t = tok_state()
6010                         if (t != null) {
6011                                 process_token(t)
6012                                 // fixfull parse error if has self-closing flag, but it wasn't acknolwedged
6013                         }
6014                 }
6015         }
6016         parse_init()
6017         parse_main_loop()
6018
6019         if (flag_fragment_parsing) {
6020                 return fragment_root.children
6021         }
6022         return doc.children
6023 }
6024
6025 var this_module = {
6026         parse: parse_html,
6027         Node: Node,
6028 }
6029
6030 if (context === 'module') {
6031         module.exports = this_module
6032 } else {
6033         window.peach_parser = this_module
6034 }
6035
6036 }).call(this)