1 // todo remove unused variables
2 // todo remove debug log, or make a way to access it
4 // Copyright 2015 Jason Woofenden
5 // This file implements an HTML5 parser
7 // This program is free software: you can redistribute it and/or modify it under
8 // the terms of the GNU Affero General Public License as published by the Free
9 // Software Foundation, either version 3 of the License, or (at your option) any
12 // This program is distributed in the hope that it will be useful, but WITHOUT
13 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
17 // You should have received a copy of the GNU Affero General Public License
18 // along with this program. If not, see <http://www.gnu.org/licenses/>.
21 // This file implements a thorough parser for html5, meant to be used by a
24 // The implementation is a pretty direct implementation of the parsing algorithm
27 // http://www.w3.org/TR/html5/syntax.html
29 // except for some places marked "WHATWG" that are implemented as described here:
31 // https://html.spec.whatwg.org/multipage/syntax.html
33 // This code passes all of the tests in the .dat files at:
35 // https://github.com/JasonWoof/html5lib-tests/tree/patch-1/tree-construction
38 //////////////////////////
39 // how to use this code //
40 //////////////////////////
42 // See README.md for how to run this file in the browser or in node.js.
44 // This file exports a single useful function: parse, and some constants
45 // (see the bottom of this file for those.)
49 // peach_parser.parse("<p><b>hi</p>")
51 // Or, if you don't want <html><head><body>/etc, do this:
53 // peach_parser.parse("<p><b>hi</p>", {fragment: "body"})
55 // return value is an array of Nodes, A Node contains:
56 // type: one of: "tag", "text", "comment", "doctype"
57 // text: contents for text/comment nodes
58 // attrs: object of attributes, eg {href: "#main"}
59 // children: array of Nodes
60 // namespace: one of: "html", "mathml", "svg"
61 // parent: another Node or null
63 // This code is a work in progress, eg try search this file for "fixfull",
67 // Notes: stacks/lists
69 // Jason was frequently confused by the terminology used to refer to different
70 // parts of the stacks and lists in the spec, so he made this chart to help keep
73 // stacks grow downward (current element is index=0)
75 // example: open_els = [a, b, c, d, e, f, g]
77 // "grows downwards" means it's visualized like this: (index: el "names")
79 // 6: g "start of the list", "topmost", "first"
81 // 4: e "previous" (to d), "above", "before"
82 // 3: d (previous/next are relative to this element)
83 // 2: c "next", "after", "lower", "below"
85 // 0: a "end of the list", "current node", "bottommost", "last"
88 var NS_HTML, NS_MATHML, NS_SVG, QUIRKS_LIMITED, QUIRKS_NO, QUIRKS_YES, TYPE_AAA_BOOKMARK, TYPE_AFE_MARKER, TYPE_COMMENT, TYPE_DOCTYPE, TYPE_END_TAG, TYPE_EOF, TYPE_START_TAG, TYPE_TAG, TYPE_TEXT, _decode_named_char_ref, adjust_foreign_attributes, adjust_mathml_attributes, adjust_svg_attributes, adp_els, alnum, context, debug_log, debug_log_each, debug_log_reset, decode_named_char_ref, decode_named_char_ref_cache, decode_named_char_ref_el, digits, el_is_special, el_is_special_not_adp, end_tag_implied, exports, foreign_attr_fixes, formatting_elements, foster_parenting_targets, from_code_point, g_debug_log, h_tags, hex_chars, is_html_integration, is_input_hidden_tok, is_lc_alpha, is_mathml_text_integration_point, is_space, is_space_tok, is_uc_alpha, lc_alpha, legacy_char_refs, mathml_elements, mathml_text_integration, new_aaa_bookmark, new_afe_marker, new_character_token, new_comment_token, new_doctype_token, new_element, new_end_tag, new_eof_token, new_open_tag, new_text_node, parse_html, prev_node_id, quirks_yes_pi_prefixes, space_chars, special_elements, svg_attribute_fixes, svg_elements, svg_name_fixes, tag_name_chars, uc_alpha, unicode_fixes, whitespace_chars
90 if ((typeof module) !== 'undefined' && (module.exports != null)) {
94 window.peach_parser = {}
97 from_code_point = function (x) {
98 if (String.fromCodePoint != null) {
99 return String.fromCodePoint(x)
102 return String.fromCharCode(x)
105 return String.fromCharCode((x >> 10) + 0xd800, (x % 0x400) + 0xdc00)
109 // Each node is an obect of the Node class. Here are the Node types:
110 TYPE_TAG = 'tag' // name, {attributes}, [children]
111 TYPE_TEXT = 'text' // "text"
112 TYPE_COMMENT = 'comment'
113 TYPE_DOCTYPE = 'doctype'
114 // the following types are emited by the tokenizer, but shouldn't end up in the tree:
115 TYPE_START_TAG = 4 // name, [attributes ([key,value]...) in reverse order], [children]
116 TYPE_END_TAG = 5 // name
118 TYPE_AFE_MARKER = 7 // http://www.w3.org/TR/html5/syntax.html#reconstruct-the-active-formatting-elements
119 TYPE_AAA_BOOKMARK = 8 // http://www.w3.org/TR/html5/syntax.html#adoption-agency-algorithm
121 // namespace constants
126 // quirks mode constants
128 QUIRKS_LIMITED = 'limited'
131 // queue up debug logs, so eg they can be shown only for tests that fail
133 debug_log_reset = function () {
136 debug_log = function (str) {
137 g_debug_log.push(str)
139 debug_log_each = function (cb) {
141 for (i = 0; i < g_debug_log.length; ++i) {
147 function Node (type, args) {
151 this.type = type // one of the TYPE_* constants above
152 this.name = args.name != null ? args.name : '' // tag name
153 this.text = args.text != null ? args.text : '' // contents for text/comment nodes
154 this.attrs = args.attrs != null ? args.attrs : {}
155 this.children = args.children != null ? args.children : []
156 this.namespace = args.namespace != null ? args.namespace : NS_HTML
157 this.parent = args.parent != null ? args.parent : null
159 this.attrs_a = args.attr_k != null ? args.attr_k : [] // attrs in progress, TYPE_START_TAG only
160 this.token = args.token != null ? args.token : null
161 this.flags = args.flags != null ? args.flags : {}
162 if (args.id != null) {
163 this.id = args.id + "+"
165 this.id = "" + (++prev_node_id)
169 Node.prototype.acknowledge_self_closing = function () {
170 if (this.token != null) {
171 this.token.flag('did_self_close', true)
173 this.flag('did_self_close', true)
177 Node.prototype.flag = function (key, value) {
179 this.flags[key] = value
181 return this.flags[key]
185 // helpers: (only take args that are normally known when parser creates nodes)
186 new_open_tag = function (name) {
187 return new Node(TYPE_START_TAG, {name: name})
189 new_end_tag = function (name) {
190 return new Node(TYPE_END_TAG, {name: name})
192 new_element = function (name) {
193 return new Node(TYPE_TAG, {name: name})
195 new_text_node = function (txt) {
196 return new Node(TYPE_TEXT, {text: txt})
198 new_character_token = new_text_node
199 new_comment_token = function (txt) {
200 return new Node(TYPE_COMMENT, {text: txt})
202 new_doctype_token = function (name) {
203 return new Node(TYPE_DOCTYPE, {name: name})
205 new_eof_token = function () {
206 return new Node(TYPE_EOF)
208 new_afe_marker = function () {
209 return new Node(TYPE_AFE_MARKER)
211 new_aaa_bookmark = function () {
212 return new Node(TYPE_AAA_BOOKMARK)
215 lc_alpha = "abcdefghijklmnopqrstuvwxyz"
216 uc_alpha = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
217 digits = "0123456789"
218 alnum = lc_alpha + uc_alpha + digits
219 hex_chars = digits + "abcdefABCDEF"
221 is_uc_alpha = function (str) {
222 return str.length === 1 && uc_alpha.indexOf(str) > -1
224 is_lc_alpha = function (str) {
225 return str.length === 1 && lc_alpha.indexOf(str) > -1
228 // some SVG elements have dashes in them
229 tag_name_chars = alnum + "-"
231 // http://www.w3.org/TR/html5/infrastructure.html#space-character
232 space_chars = "\u0009\u000a\u000c\u000d\u0020"
233 is_space = function (txt) {
234 return txt.length === 1 && space_chars.indexOf(txt) > -1
236 is_space_tok = function (t) {
237 return t.type === TYPE_TEXT && t.text.length === 1 && space_chars.indexOf(t.text) > -1
240 is_input_hidden_tok = function (t) {
242 if (t.type !== TYPE_START_TAG) {
245 for (i = 0; i < t.attrs_a.length; ++i) {
247 if (a[0] === 'type') {
248 if (a[1].toLowerCase() === 'hidden') {
257 // https://en.wikipedia.org/wiki/Whitespace_character#Unicode
258 whitespace_chars = "\u0009\u000a\u000b\u000c\u000d\u0020\u0085\u00a0\u1680\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200a\u2028\u2029\u202f\u205f\u3000"
261 unicode_fixes[0x00] = "\uFFFD"
262 unicode_fixes[0x80] = "\u20AC"
263 unicode_fixes[0x82] = "\u201A"
264 unicode_fixes[0x83] = "\u0192"
265 unicode_fixes[0x84] = "\u201E"
266 unicode_fixes[0x85] = "\u2026"
267 unicode_fixes[0x86] = "\u2020"
268 unicode_fixes[0x87] = "\u2021"
269 unicode_fixes[0x88] = "\u02C6"
270 unicode_fixes[0x89] = "\u2030"
271 unicode_fixes[0x8A] = "\u0160"
272 unicode_fixes[0x8B] = "\u2039"
273 unicode_fixes[0x8C] = "\u0152"
274 unicode_fixes[0x8E] = "\u017D"
275 unicode_fixes[0x91] = "\u2018"
276 unicode_fixes[0x92] = "\u2019"
277 unicode_fixes[0x93] = "\u201C"
278 unicode_fixes[0x94] = "\u201D"
279 unicode_fixes[0x95] = "\u2022"
280 unicode_fixes[0x96] = "\u2013"
281 unicode_fixes[0x97] = "\u2014"
282 unicode_fixes[0x98] = "\u02DC"
283 unicode_fixes[0x99] = "\u2122"
284 unicode_fixes[0x9A] = "\u0161"
285 unicode_fixes[0x9B] = "\u203A"
286 unicode_fixes[0x9C] = "\u0153"
287 unicode_fixes[0x9E] = "\u017E"
288 unicode_fixes[0x9F] = "\u0178"
290 quirks_yes_pi_prefixes = [
291 "+//silmaril//dtd html pro v0r11 19970101//",
292 "-//as//dtd html 3.0 aswedit + extensions//",
293 "-//advasoft ltd//dtd html 3.0 aswedit + extensions//",
294 "-//ietf//dtd html 2.0 level 1//",
295 "-//ietf//dtd html 2.0 level 2//",
296 "-//ietf//dtd html 2.0 strict level 1//",
297 "-//ietf//dtd html 2.0 strict level 2//",
298 "-//ietf//dtd html 2.0 strict//",
299 "-//ietf//dtd html 2.0//",
300 "-//ietf//dtd html 2.1e//",
301 "-//ietf//dtd html 3.0//",
302 "-//ietf//dtd html 3.2 final//",
303 "-//ietf//dtd html 3.2//",
304 "-//ietf//dtd html 3//",
305 "-//ietf//dtd html level 0//",
306 "-//ietf//dtd html level 1//",
307 "-//ietf//dtd html level 2//",
308 "-//ietf//dtd html level 3//",
309 "-//ietf//dtd html strict level 0//",
310 "-//ietf//dtd html strict level 1//",
311 "-//ietf//dtd html strict level 2//",
312 "-//ietf//dtd html strict level 3//",
313 "-//ietf//dtd html strict//",
314 "-//ietf//dtd html//",
315 "-//metrius//dtd metrius presentational//",
316 "-//microsoft//dtd internet explorer 2.0 html strict//",
317 "-//microsoft//dtd internet explorer 2.0 html//",
318 "-//microsoft//dtd internet explorer 2.0 tables//",
319 "-//microsoft//dtd internet explorer 3.0 html strict//",
320 "-//microsoft//dtd internet explorer 3.0 html//",
321 "-//microsoft//dtd internet explorer 3.0 tables//",
322 "-//netscape comm. corp.//dtd html//",
323 "-//netscape comm. corp.//dtd strict html//",
324 "-//o'reilly and associates//dtd html 2.0//",
325 "-//o'reilly and associates//dtd html extended 1.0//",
326 "-//o'reilly and associates//dtd html extended relaxed 1.0//",
327 "-//sq//dtd html 2.0 hotmetal + extensions//",
328 "-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//",
329 "-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//",
330 "-//spyglass//dtd html 2.0 extended//",
331 "-//sun microsystems corp.//dtd hotjava html//",
332 "-//sun microsystems corp.//dtd hotjava strict html//",
333 "-//w3c//dtd html 3 1995-03-24//",
334 "-//w3c//dtd html 3.2 draft//",
335 "-//w3c//dtd html 3.2 final//",
336 "-//w3c//dtd html 3.2//",
337 "-//w3c//dtd html 3.2s draft//",
338 "-//w3c//dtd html 4.0 frameset//",
339 "-//w3c//dtd html 4.0 transitional//",
340 "-//w3c//dtd html experimental 19960712//",
341 "-//w3c//dtd html experimental 970421//",
342 "-//w3c//dtd w3 html//",
343 "-//w3o//dtd w3 html 3.0//",
344 "-//webtechs//dtd mozilla html 2.0//",
345 "-//webtechs//dtd mozilla html//",
348 // These are the character references that don't need a terminating semicolon
349 // min length: 2, max: 6, none are a prefix of any other.
351 Aacute: 'Á', aacute: 'á', Acirc: 'Â', acirc: 'â', acute: '´', AElig: 'Æ',
352 aelig: 'æ', Agrave: 'À', agrave: 'à', AMP: '&', amp: '&', Aring: 'Å',
353 aring: 'å', Atilde: 'Ã', atilde: 'ã', Auml: 'Ä', auml: 'ä', brvbar: '¦',
354 Ccedil: 'Ç', ccedil: 'ç', cedil: '¸', cent: '¢', COPY: '©', copy: '©',
355 curren: '¤', deg: '°', divide: '÷', Eacute: 'É', eacute: 'é', Ecirc: 'Ê',
356 ecirc: 'ê', Egrave: 'È', egrave: 'è', ETH: 'Ð', eth: 'ð', Euml: 'Ë',
357 euml: 'ë', frac12: '½', frac14: '¼', frac34: '¾', GT: '>', gt: '>',
358 Iacute: 'Í', iacute: 'í', Icirc: 'Î', icirc: 'î', iexcl: '¡', Igrave: 'Ì',
359 igrave: 'ì', iquest: '¿', Iuml: 'Ï', iuml: 'ï', laquo: '«', LT: '<',
360 lt: '<', macr: '¯', micro: 'µ', middot: '·', nbsp: "\u00a0", not: '¬',
361 Ntilde: 'Ñ', ntilde: 'ñ', Oacute: 'Ó', oacute: 'ó', Ocirc: 'Ô', ocirc: 'ô',
362 Ograve: 'Ò', ograve: 'ò', ordf: 'ª', ordm: 'º', Oslash: 'Ø', oslash: 'ø',
363 Otilde: 'Õ', otilde: 'õ', Ouml: 'Ö', ouml: 'ö', para: '¶', plusmn: '±',
364 pound: '£', QUOT: '"', quot: '"', raquo: '»', REG: '®', reg: '®', sect: '§',
365 shy: '', sup1: '¹', sup2: '²', sup3: '³', szlig: 'ß', THORN: 'Þ', thorn: 'þ',
366 times: '×', Uacute: 'Ú', uacute: 'ú', Ucirc: 'Û', ucirc: 'û', Ugrave: 'Ù',
367 ugrave: 'ù', uml: '¨', Uuml: 'Ü', uuml: 'ü', Yacute: 'Ý', yacute: 'ý',
371 //void_elements = ['area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input', 'keygen', 'link', 'meta', 'param', 'source', 'track', 'wbr']
372 //raw_text_elements = ['script', 'style']
373 //escapable_raw_text_elements = ['textarea', 'title']
374 // http://www.w3.org/TR/SVG/ 1.1 (Second Edition)
376 'a', 'altGlyph', 'altGlyphDef', 'altGlyphItem', 'animate', 'animateColor',
377 'animateMotion', 'animateTransform', 'circle', 'clipPath', 'color-profile',
378 'cursor', 'defs', 'desc', 'ellipse', 'feBlend', 'feColorMatrix',
379 'feComponentTransfer', 'feComposite', 'feConvolveMatrix',
380 'feDiffuseLighting', 'feDisplacementMap', 'feDistantLight', 'feFlood',
381 'feFuncA', 'feFuncB', 'feFuncG', 'feFuncR', 'feGaussianBlur', 'feImage',
382 'feMerge', 'feMergeNode', 'feMorphology', 'feOffset', 'fePointLight',
383 'feSpecularLighting', 'feSpotLight', 'feTile', 'feTurbulence', 'filter',
384 'font', 'font-face', 'font-face-format', 'font-face-name', 'font-face-src',
385 'font-face-uri', 'foreignObject', 'g', 'glyph', 'glyphRef', 'hkern',
386 'image', 'line', 'linearGradient', 'marker', 'mask', 'metadata',
387 'missing-glyph', 'mpath', 'path', 'pattern', 'polygon', 'polyline',
388 'radialGradient', 'rect', 'script', 'set', 'stop', 'style', 'svg',
389 'switch', 'symbol', 'text', 'textPath', 'title', 'tref', 'tspan', 'use',
393 // http://www.w3.org/TR/MathML/ Version 3.0 2nd Edition
395 'abs', 'and', 'annotation', 'annotation-xml', 'apply', 'approx', 'arccos',
396 'arccosh', 'arccot', 'arccoth', 'arccsc', 'arccsch', 'arcsec', 'arcsech',
397 'arcsin', 'arcsinh', 'arctan', 'arctanh', 'arg', 'bind', 'bvar', 'card',
398 'cartesianproduct', 'cbytes', 'ceiling', 'cerror', 'ci', 'cn', 'codomain',
399 'complexes', 'compose', 'condition', 'conjugate', 'cos', 'cosh', 'cot',
400 'coth', 'cs', 'csc', 'csch', 'csymbol', 'curl', 'declare', 'degree',
401 'determinant', 'diff', 'divergence', 'divide', 'domain',
402 'domainofapplication', 'emptyset', 'eq', 'equivalent', 'eulergamma',
403 'exists', 'exp', 'exponentiale', 'factorial', 'factorof', 'false', 'floor',
404 'fn', 'forall', 'gcd', 'geq', 'grad', 'gt', 'ident', 'image', 'imaginary',
405 'imaginaryi', 'implies', 'in', 'infinity', 'int', 'integers', 'intersect',
406 'interval', 'inverse', 'lambda', 'laplacian', 'lcm', 'leq', 'limit',
407 'list', 'ln', 'log', 'logbase', 'lowlimit', 'lt', 'maction', 'maligngroup',
408 'malignmark', 'math', 'matrix', 'matrixrow', 'max', 'mean', 'median',
409 'menclose', 'merror', 'mfenced', 'mfrac', 'mglyph', 'mi', 'mi', 'min',
410 'minus', 'mlabeledtr', 'mlongdiv', 'mmultiscripts', 'mn', 'mo', 'mode',
411 'moment', 'momentabout', 'mover', 'mpadded', 'mphantom', 'mprescripts',
412 'mroot', 'mrow', 'ms', 'mscarries', 'mscarry', 'msgroup', 'msline',
413 'mspace', 'msqrt', 'msrow', 'mstack', 'mstyle', 'msub', 'msubsup', 'msup',
414 'mtable', 'mtd', 'mtext', 'mtr', 'munder', 'munderover', 'naturalnumbers',
415 'neq', 'none', 'not', 'notanumber', 'notin', 'notprsubset', 'notsubset',
416 'or', 'otherwise', 'outerproduct', 'partialdiff', 'pi', 'piece',
417 'piecewise', 'plus', 'power', 'primes', 'product', 'prsubset', 'quotient',
418 'rationals', 'real', 'reals', 'reln', 'rem', 'root', 'scalarproduct',
419 'sdev', 'sec', 'sech', 'selector', 'semantics', 'sep', 'set', 'setdiff',
420 'share', 'sin', 'sinh', 'span', 'subset', 'sum', 'tan', 'tanh', 'tendsto',
421 'times', 'transpose', 'true', 'union', 'uplimit', 'variance', 'vector',
422 'vectorproduct', 'xor'
424 // foreign_elements = [svg_elements..., mathml_elements...]
425 //normal_elements = All other allowed HTML elements are normal elements.
429 address: NS_HTML, applet: NS_HTML, area: NS_HTML, article: NS_HTML,
430 aside: NS_HTML, base: NS_HTML, basefont: NS_HTML, bgsound: NS_HTML,
431 blockquote: NS_HTML, body: NS_HTML, br: NS_HTML, button: NS_HTML,
432 caption: NS_HTML, center: NS_HTML, col: NS_HTML, colgroup: NS_HTML, dd: NS_HTML,
433 details: NS_HTML, dir: NS_HTML, div: NS_HTML, dl: NS_HTML, dt: NS_HTML,
434 embed: NS_HTML, fieldset: NS_HTML, figcaption: NS_HTML, figure: NS_HTML,
435 footer: NS_HTML, form: NS_HTML, frame: NS_HTML, frameset: NS_HTML, h1: NS_HTML,
436 h2: NS_HTML, h3: NS_HTML, h4: NS_HTML, h5: NS_HTML, h6: NS_HTML, head: NS_HTML,
437 header: NS_HTML, hgroup: NS_HTML, hr: NS_HTML, html: NS_HTML, iframe: NS_HTML,
438 img: NS_HTML, input: NS_HTML, isindex: NS_HTML, li: NS_HTML, link: NS_HTML,
439 listing: NS_HTML, main: NS_HTML, marquee: NS_HTML,
441 menu: NS_HTML,menuitem: NS_HTML, // WHATWG adds these
443 meta: NS_HTML, nav: NS_HTML, noembed: NS_HTML, noframes: NS_HTML,
444 noscript: NS_HTML, object: NS_HTML, ol: NS_HTML, p: NS_HTML, param: NS_HTML,
445 plaintext: NS_HTML, pre: NS_HTML, script: NS_HTML, section: NS_HTML,
446 select: NS_HTML, source: NS_HTML, style: NS_HTML, summary: NS_HTML,
447 table: NS_HTML, tbody: NS_HTML, td: NS_HTML, template: NS_HTML,
448 textarea: NS_HTML, tfoot: NS_HTML, th: NS_HTML, thead: NS_HTML, title: NS_HTML,
449 tr: NS_HTML, track: NS_HTML, ul: NS_HTML, wbr: NS_HTML, xmp: NS_HTML,
452 mi: NS_MATHML, mo: NS_MATHML, mn: NS_MATHML, ms: NS_MATHML, mtext: NS_MATHML,
453 'annotation-xml': NS_MATHML,
456 foreignObject: NS_SVG, desc: NS_SVG, title: NS_SVG
459 formatting_elements = {
460 a: true, b: true, big: true, code: true, em: true, font: true, i: true,
461 nobr: true, s: true, small: true, strike: true, strong: true, tt: true,
465 mathml_text_integration = {
466 mi: NS_MATHML, mo: NS_MATHML, mn: NS_MATHML, ms: NS_MATHML, mtext: NS_MATHML
468 is_mathml_text_integration_point = function (el) {
469 return mathml_text_integration[el.name] === el.namespace
471 is_html_integration = function (el) { // DON'T PASS A TOKEN
472 if (el.namespace === NS_MATHML) {
473 if (el.name === 'annotation-xml') {
474 if (el.attrs.encoding != null) {
475 if (el.attrs.encoding.toLowerCase() === 'text/html') {
478 if (el.attrs.encoding.toLowerCase() === 'application/xhtml+xml') {
485 if (el.namespace === NS_SVG) {
486 if (el.name === 'foreignObject' || el.name === 'desc' || el.name === 'title') {
494 h1: NS_HTML, h2: NS_HTML, h3: NS_HTML, h4: NS_HTML, h5: NS_HTML, h6: NS_HTML
497 foster_parenting_targets = {
518 el_is_special = function (e) {
519 return special_elements[e.name] === e.namespace
522 adp_els = { address: NS_HTML, div: NS_HTML, p: NS_HTML }
523 el_is_special_not_adp = function (el) {
524 return special_elements[el.name] === el.namespace && adp_els[el.name] !== el.namespace
528 altglyph: 'altGlyph',
529 altglyphdef: 'altGlyphDef',
530 altglyphitem: 'altGlyphItem',
531 animatecolor: 'animateColor',
532 animatemotion: 'animateMotion',
533 animatetransform: 'animateTransform',
534 clippath: 'clipPath',
536 fecolormatrix: 'feColorMatrix',
537 fecomponenttransfer: 'feComponentTransfer',
538 fecomposite: 'feComposite',
539 feconvolvematrix: 'feConvolveMatrix',
540 fediffuselighting: 'feDiffuseLighting',
541 fedisplacementmap: 'feDisplacementMap',
542 fedistantlight: 'feDistantLight',
543 fedropshadow: 'feDropShadow',
549 fegaussianblur: 'feGaussianBlur',
552 femergenode: 'feMergeNode',
553 femorphology: 'feMorphology',
554 feoffset: 'feOffset',
555 fepointlight: 'fePointLight',
556 fespecularlighting: 'feSpecularLighting',
557 fespotlight: 'feSpotLight',
559 feturbulence: 'feTurbulence',
560 foreignobject: 'foreignObject',
561 glyphref: 'glyphRef',
562 lineargradient: 'linearGradient',
563 radialgradient: 'radialGradient',
566 svg_attribute_fixes = {
567 attributename: 'attributeName',
568 attributetype: 'attributeType',
569 basefrequency: 'baseFrequency',
570 baseprofile: 'baseProfile',
571 calcmode: 'calcMode',
572 clippathunits: 'clipPathUnits',
573 contentscripttype: 'contentScriptType',
574 contentstyletype: 'contentStyleType',
575 diffuseconstant: 'diffuseConstant',
576 edgemode: 'edgeMode',
577 externalresourcesrequired: 'externalResourcesRequired',
578 // WHATWG removes this: filterres: 'filterRes',
579 filterunits: 'filterUnits',
580 glyphref: 'glyphRef',
581 gradienttransform: 'gradientTransform',
582 gradientunits: 'gradientUnits',
583 kernelmatrix: 'kernelMatrix',
584 kernelunitlength: 'kernelUnitLength',
585 keypoints: 'keyPoints',
586 keysplines: 'keySplines',
587 keytimes: 'keyTimes',
588 lengthadjust: 'lengthAdjust',
589 limitingconeangle: 'limitingConeAngle',
590 markerheight: 'markerHeight',
591 markerunits: 'markerUnits',
592 markerwidth: 'markerWidth',
593 maskcontentunits: 'maskContentUnits',
594 maskunits: 'maskUnits',
595 numoctaves: 'numOctaves',
596 pathlength: 'pathLength',
597 patterncontentunits: 'patternContentUnits',
598 patterntransform: 'patternTransform',
599 patternunits: 'patternUnits',
600 pointsatx: 'pointsAtX',
601 pointsaty: 'pointsAtY',
602 pointsatz: 'pointsAtZ',
603 preservealpha: 'preserveAlpha',
604 preserveaspectratio: 'preserveAspectRatio',
605 primitiveunits: 'primitiveUnits',
608 repeatcount: 'repeatCount',
609 repeatdur: 'repeatDur',
610 requiredextensions: 'requiredExtensions',
611 requiredfeatures: 'requiredFeatures',
612 specularconstant: 'specularConstant',
613 specularexponent: 'specularExponent',
614 spreadmethod: 'spreadMethod',
615 startoffset: 'startOffset',
616 stddeviation: 'stdDeviation',
617 stitchtiles: 'stitchTiles',
618 surfacescale: 'surfaceScale',
619 systemlanguage: 'systemLanguage',
620 tablevalues: 'tableValues',
623 textlength: 'textLength',
625 viewtarget: 'viewTarget',
626 xchannelselector: 'xChannelSelector',
627 ychannelselector: 'yChannelSelector',
628 zoomandpan: 'zoomAndPan'
630 foreign_attr_fixes = {
631 'xlink:actuate': 'xlink actuate',
632 'xlink:arcrole': 'xlink arcrole',
633 'xlink:href': 'xlink href',
634 'xlink:role': 'xlink role',
635 'xlink:show': 'xlink show',
636 'xlink:title': 'xlink title',
637 'xlink:type': 'xlink type',
638 'xml:base': 'xml base',
639 'xml:lang': 'xml lang',
640 'xml:space': 'xml space',
642 'xmlns:xlink': 'xmlns xlink'
644 adjust_mathml_attributes = function (t) {
646 for (i = 0; i < t.attrs_a.length; ++i) {
648 if (a[0] === 'definitionurl') {
649 a[0] = 'definitionURL'
653 adjust_svg_attributes = function (t) {
655 for (i = 0; i < t.attrs_a.length; ++i) {
657 if (svg_attribute_fixes[a[0]] != null) {
658 a[0] = svg_attribute_fixes[a[0]]
662 adjust_foreign_attributes = function (t) {
665 for (i = 0; i < t.attrs_a.length; ++i) {
667 if (foreign_attr_fixes[a[0]] != null) {
668 a[0] = foreign_attr_fixes[a[0]]
673 // decode_named_char_ref()
675 // The list of named character references is _huge_ so if we're running in a
676 // browser, we get the browser to decode them, rather than increasing the code
677 // size to include the table.
678 if (context === 'module') {
679 _decode_named_char_ref = require('./parser_no_browser_helper.js')
681 decode_named_char_ref_el = document.createElement('textarea')
682 _decode_named_char_ref = function (txt) {
684 txt = "&" + txt + ";"
685 decode_named_char_ref_el.innerHTML = txt
686 decoded = decode_named_char_ref_el.value
687 if (decoded === txt) {
693 // Pass the name of a named entity _that has a terminating semicolon_
694 // Entities without terminating semicolons should use legacy_char_refs[]
695 // Do not include the "&" or ";" in your argument, eg pass "alpha"
696 decode_named_char_ref_cache = {}
697 decode_named_char_ref = function (txt) {
699 decoded = decode_named_char_ref_cache[txt]
700 if (decoded != null) {
703 decoded = _decode_named_char_ref(txt)
704 return decode_named_char_ref_cache[txt] = decoded
707 parse_html = function (args_html, args) {
708 var adjusted_current_node, adjusted_insertion_location, adoption_agency, afe, afe_push, afe_push_marker, button_scopers, clear_afe_to_marker, clear_stack_to_table_body_context, clear_stack_to_table_context, clear_stack_to_table_row_context, clear_to_table_body_stopers, clear_to_table_row_stopers, clear_to_table_stopers, close_p_element, close_p_if_in_button_scope, close_the_cell, context_element, cur, doc, eat_next_token_if_newline, el_is_in_scope, flag_foster_parenting, flag_fragment_parsing, flag_frameset_ok, flag_parsing, flag_scripting, form_element_pointer, fragment_root, generate_implied_end_tags, has_color_face_or_size, head_element_pointer, in_body_any_other_end_tag, in_foreign_content, in_foreign_content_end_script, in_foreign_content_other_start, ins_mode, ins_mode_after_after_body, ins_mode_after_after_frameset, ins_mode_after_body, ins_mode_after_frameset, ins_mode_after_head, ins_mode_after_head_else, ins_mode_before_head, ins_mode_before_html, ins_mode_in_body, ins_mode_in_caption, ins_mode_in_cell, ins_mode_in_column_group, ins_mode_in_frameset, ins_mode_in_head, ins_mode_in_head_else, ins_mode_in_head_noscript, ins_mode_in_head_noscript_else, ins_mode_in_row, ins_mode_in_select, ins_mode_in_select_in_table, ins_mode_in_table, ins_mode_in_table_body, ins_mode_in_table_else, ins_mode_in_table_text, ins_mode_in_template, ins_mode_initial, ins_mode_text, insert_character, insert_comment, insert_foreign_element, insert_html_element, is_appropriate_end_tag, is_in_button_scope, is_in_li_scope, is_in_scope, is_in_scope_x, is_in_scope_x_y, is_in_select_scope, is_in_table_scope, is_quirks_limited_doctype, is_quirks_yes_doctype, li_scopers, open_els, original_ins_mode, parse_character_reference, parse_error, parse_generic_raw_text, parse_generic_rcdata_text, parse_init, parse_main_loop, pending_table_character_tokens, process_token, reconstruct_afe, reset_ins_mode, standard_scopers, stop_parsing, table_scopers, template_ins_modes, template_tag_is_open, temporary_buffer, tok_cur_tag, tok_state, tok_state_after_attribute_name, tok_state_after_attribute_value_quoted, tok_state_after_doctype_name, tok_state_after_doctype_public_identifier, tok_state_after_doctype_public_keyword, tok_state_after_doctype_system_identifier, tok_state_after_doctype_system_keyword, tok_state_attribute_name, tok_state_attribute_value_double_quoted, tok_state_attribute_value_single_quoted, tok_state_attribute_value_unquoted, tok_state_before_attribute_name, tok_state_before_attribute_value, tok_state_before_doctype_name, tok_state_before_doctype_public_identifier, tok_state_before_doctype_system_identifier, tok_state_between_doctype_public_and_system_identifiers, tok_state_bogus_comment, tok_state_bogus_doctype, tok_state_cdata_section, tok_state_comment, tok_state_comment_end, tok_state_comment_end_bang, tok_state_comment_end_dash, tok_state_comment_start, tok_state_comment_start_dash, tok_state_data, tok_state_doctype, tok_state_doctype_name, tok_state_doctype_public_identifier_double_quoted, tok_state_doctype_public_identifier_single_quoted, tok_state_doctype_system_identifier_double_quoted, tok_state_doctype_system_identifier_single_quoted, tok_state_end_tag_open, tok_state_markup_declaration_open, tok_state_plaintext, tok_state_rawtext, tok_state_rawtext_end_tag_name, tok_state_rawtext_end_tag_open, tok_state_rawtext_less_than_sign, tok_state_rcdata, tok_state_rcdata_end_tag_name, tok_state_rcdata_end_tag_open, tok_state_rcdata_less_than_sign, tok_state_script_data, tok_state_script_data_double_escape_end, tok_state_script_data_double_escape_start, tok_state_script_data_double_escaped, tok_state_script_data_double_escaped_dash, tok_state_script_data_double_escaped_dash_dash, tok_state_script_data_double_escaped_less_than_sign, tok_state_script_data_end_tag_name, tok_state_script_data_end_tag_open, tok_state_script_data_escape_start, tok_state_script_data_escape_start_dash, tok_state_script_data_escaped, tok_state_script_data_escaped_dash, tok_state_script_data_escaped_dash_dash, tok_state_script_data_escaped_end_tag_name, tok_state_script_data_escaped_end_tag_open, tok_state_script_data_escaped_less_than_sign, tok_state_script_data_less_than_sign, tok_state_self_closing_start_tag, tok_state_tag_name, tok_state_tag_open, token_to_element, txt
713 cur = null // index of next char in txt to be parsed
714 // declare doc and tokenizer variables so they're in scope below
716 open_els = null // stack of open elements
717 afe = null // active formatting elements
718 template_ins_modes = null
720 original_ins_mode = null
722 tok_cur_tag = null // partially parsed tag
723 flag_scripting = null
724 flag_frameset_ok = null
726 flag_foster_parenting = null
727 form_element_pointer = null
728 temporary_buffer = null
729 pending_table_character_tokens = null
730 head_element_pointer = null
731 flag_fragment_parsing = null
732 context_element = null
734 stop_parsing = function () {
738 parse_error = function () {
739 if (args.error_cb != null) {
744 // http://www.w3.org/TR/html5/syntax.html#push-onto-the-list-of-active-formatting-elements
745 // "Noah's Ark clause" but with three
746 afe_push = function (new_el) {
747 var attrs_match, el, i, j, k, matches, v
749 for (i = 0; i < afe.length; ++i) {
751 if (el.type === TYPE_AFE_MARKER) {
754 if (el.name === new_el.name && el.namespace === new_el.namespace) {
756 for (k in el.attrs) {
758 if (new_el.attrs[k] !== v) {
764 for (k in new_el.attrs) {
766 if (el.attrs[k] !== v) {
784 afe_push_marker = function () {
785 afe.unshift(new_afe_marker())
788 // the functions below impliment the Tree Contstruction algorithm
789 // http://www.w3.org/TR/html5/syntax.html#tree-construction
791 // But first... the helpers
792 template_tag_is_open = function () {
794 for (i = 0; i < open_els.length; ++i) {
796 if (el.name === 'template' && el.namespace === NS_HTML) {
802 is_in_scope_x = function (tag_name, scope, namespace) {
804 for (i = 0; i < open_els.length; ++i) {
806 if (el.name === tag_name && (namespace === null || namespace === el.namespace)) {
809 if (scope[el.name] === el.namespace) {
815 is_in_scope_x_y = function (tag_name, scope, scope2, namespace) {
817 for (i = 0; i < open_els.length; ++i) {
819 if (el.name === tag_name && (namespace === null || namespace === el.namespace)) {
822 if (scope[el.name] === el.namespace) {
825 if (scope2[el.name] === el.namespace) {
832 applet: NS_HTML, caption: NS_HTML, html: NS_HTML, table: NS_HTML,
833 td: NS_HTML, th: NS_HTML, marquee: NS_HTML, object: NS_HTML,
836 mi: NS_MATHML, mo: NS_MATHML, mn: NS_MATHML, ms: NS_MATHML,
837 mtext: NS_MATHML, 'annotation-xml': NS_MATHML,
839 foreignObject: NS_SVG, desc: NS_SVG, title: NS_SVG
841 button_scopers = { button: NS_HTML }
842 li_scopers = { ol: NS_HTML, ul: NS_HTML }
843 table_scopers = { html: NS_HTML, table: NS_HTML, template: NS_HTML }
844 is_in_scope = function (tag_name, namespace) {
845 if (namespace == null) {
848 return is_in_scope_x(tag_name, standard_scopers, namespace)
850 is_in_button_scope = function (tag_name, namespace) {
851 if (namespace == null) {
854 return is_in_scope_x_y(tag_name, standard_scopers, button_scopers, namespace)
856 is_in_table_scope = function (tag_name, namespace) {
857 if (namespace == null) {
860 return is_in_scope_x(tag_name, table_scopers, namespace)
862 // aka is_in_list_item_scope
863 is_in_li_scope = function (tag_name, namespace) {
864 if (namespace == null) {
867 return is_in_scope_x_y(tag_name, standard_scopers, li_scopers, namespace)
869 is_in_select_scope = function (tag_name, namespace) {
871 if (namespace == null) {
874 for (i = 0; i < open_els.length; ++i) {
876 if (t.name === tag_name && (namespace === null || namespace === t.namespace)) {
879 if (t.namespace !== NS_HTML && t.name !== 'optgroup' && t.name !== 'option') {
885 // this checks for a particular element, not by name
886 // this requires a namespace match
887 el_is_in_scope = function (needle) {
889 for (i = 0; i < open_els.length; ++i) {
894 if (standard_scopers[el.name] === el.namespace) {
901 clear_to_table_stopers = {
906 clear_stack_to_table_context = function () {
908 if (clear_to_table_stopers[open_els[0].name] != null) {
914 clear_to_table_body_stopers = {
921 clear_stack_to_table_body_context = function () {
923 if (clear_to_table_body_stopers[open_els[0].name] === open_els[0].namespace) {
929 clear_to_table_row_stopers = {
934 clear_stack_to_table_row_context = function () {
936 if (clear_to_table_row_stopers[open_els[0].name] != null) {
942 clear_afe_to_marker = function () {
945 if (!(afe.length > 0)) { // this happens in fragment case, ?spec error
949 if (el.type === TYPE_AFE_MARKER) {
956 // http://www.w3.org/TR/html5/syntax.html#reset-the-insertion-mode-appropriately
957 reset_ins_mode = function () {
958 var ancestor, ancestor_i, last, node, node_i
959 // 1. Let last be false.
961 // 2. Let node be the last node in the stack of open elements.
963 node = open_els[node_i]
964 // 3. Loop: If node is the first node in the stack of open elements,
965 // then set last to true, and, if the parser was originally created as
966 // part of the HTML fragment parsing algorithm (fragment case) set node
967 // to the context element.
969 if (node_i === open_els.length - 1) {
971 if (flag_fragment_parsing) {
972 node = context_element
975 // 4. If node is a select element, run these substeps:
976 if (node.name === 'select' && node.namespace === NS_HTML) {
977 // 1. If last is true, jump to the step below labeled done.
979 // 2. Let ancestor be node.
982 // 3. Loop: If ancestor is the first node in the stack of
983 // open elements, jump to the step below labeled done.
985 if (ancestor_i === open_els.length - 1) {
988 // 4. Let ancestor be the node before ancestor in the stack
991 ancestor = open_els[ancestor_i]
992 // 5. If ancestor is a template node, jump to the step below
994 if (ancestor.name === 'template' && ancestor.namespace === NS_HTML) {
997 // 6. If ancestor is a table node, switch the insertion mode
998 // to "in select in table" and abort these steps.
999 if (ancestor.name === 'table' && ancestor.namespace === NS_HTML) {
1000 ins_mode = ins_mode_in_select_in_table
1003 // 7. Jump back to the step labeled loop.
1006 // 8. Done: Switch the insertion mode to "in select" and abort
1008 ins_mode = ins_mode_in_select
1011 // 5. If node is a td or th element and last is false, then switch
1012 // the insertion mode to "in cell" and abort these steps.
1013 if ((node.name === 'td' || node.name === 'th') && node.namespace === NS_HTML && last === false) {
1014 ins_mode = ins_mode_in_cell
1017 // 6. If node is a tr element, then switch the insertion mode to "in
1018 // row" and abort these steps.
1019 if (node.name === 'tr' && node.namespace === NS_HTML) {
1020 ins_mode = ins_mode_in_row
1023 // 7. If node is a tbody, thead, or tfoot element, then switch the
1024 // insertion mode to "in table body" and abort these steps.
1025 if ((node.name === 'tbody' || node.name === 'thead' || node.name === 'tfoot') && node.namespace === NS_HTML) {
1026 ins_mode = ins_mode_in_table_body
1029 // 8. If node is a caption element, then switch the insertion mode
1030 // to "in caption" and abort these steps.
1031 if (node.name === 'caption' && node.namespace === NS_HTML) {
1032 ins_mode = ins_mode_in_caption
1035 // 9. If node is a colgroup element, then switch the insertion mode
1036 // to "in column group" and abort these steps.
1037 if (node.name === 'colgroup' && node.namespace === NS_HTML) {
1038 ins_mode = ins_mode_in_column_group
1041 // 10. If node is a table element, then switch the insertion mode to
1042 // "in table" and abort these steps.
1043 if (node.name === 'table' && node.namespace === NS_HTML) {
1044 ins_mode = ins_mode_in_table
1047 // 11. If node is a template element, then switch the insertion mode
1048 // to the current template insertion mode and abort these steps.
1049 if (node.name === 'template' && node.namespace === NS_HTML) {
1050 ins_mode = template_ins_modes[0]
1053 // 12. If node is a head element and last is true, then switch the
1054 // insertion mode to "in body" ("in body"! not "in head"!) and abort
1055 // these steps. (fragment case)
1056 if (node.name === 'head' && node.namespace === NS_HTML && last) {
1057 ins_mode = ins_mode_in_body
1060 // 13. If node is a head element and last is false, then switch the
1061 // insertion mode to "in head" and abort these steps.
1062 if (node.name === 'head' && node.namespace === NS_HTML && last === false) {
1063 ins_mode = ins_mode_in_head
1066 // 14. If node is a body element, then switch the insertion mode to
1067 // "in body" and abort these steps.
1068 if (node.name === 'body' && node.namespace === NS_HTML) {
1069 ins_mode = ins_mode_in_body
1072 // 15. If node is a frameset element, then switch the insertion mode
1073 // to "in frameset" and abort these steps. (fragment case)
1074 if (node.name === 'frameset' && node.namespace === NS_HTML) {
1075 ins_mode = ins_mode_in_frameset
1078 // 16. If node is an html element, run these substeps:
1079 if (node.name === 'html' && node.namespace === NS_HTML) {
1080 // 1. If the head element pointer is null, switch the insertion
1081 // mode to "before head" and abort these steps. (fragment case)
1082 if (head_element_pointer === null) {
1083 ins_mode = ins_mode_before_head
1085 // 2. Otherwise, the head element pointer is not null,
1086 // switch the insertion mode to "after head" and abort these
1088 ins_mode = ins_mode_after_head
1092 // 17. If last is true, then switch the insertion mode to "in body"
1093 // and abort these steps. (fragment case)
1095 ins_mode = ins_mode_in_body
1098 // 18. Let node now be the node before node in the stack of open
1101 node = open_els[node_i]
1102 // 19. Return to the step labeled loop.
1108 // http://www.w3.org/TR/html5/syntax.html#adjusted-current-node
1109 adjusted_current_node = function () {
1110 if (open_els.length === 1 && flag_fragment_parsing) {
1111 return context_element
1116 // http://www.w3.org/TR/html5/syntax.html#reconstruct-the-active-formatting-elements
1117 // this implementation is structured (mostly) as described at the link above.
1118 // capitalized comments are the "labels" described at the link above.
1119 reconstruct_afe = function () {
1121 if (afe.length === 0) {
1124 if (afe[0].type === TYPE_AFE_MARKER || open_els.indexOf(afe[0]) >= 0) {
1130 if (i === afe.length - 1) {
1134 if (afe[i].type === TYPE_AFE_MARKER || open_els.indexOf(afe[i]) >= 0) {
1141 el = insert_html_element(afe[i].token)
1150 // http://www.w3.org/TR/html5/syntax.html#adoption-agency-algorithm
1151 // adoption agency algorithm
1153 // http://www.w3.org/TR/html5/syntax.html#misnested-tags:-b-i-/b-/i
1154 // http://www.w3.org/TR/html5/syntax.html#misnested-tags:-b-p-/b-/p
1155 // http://www.w3.org/TR/html5/syntax.html#unclosed-formatting-elements
1156 adoption_agency = function (subject) {
1157 var aa, ab, ac, ad, ae, af, bookmark, c, ca, dest, el, fb, fb_of_open_els, fe, fe_of_afe, fe_of_open_els, i, in_afe, in_open_els, inner, j, l, last_node, len, len1, len10, len11, len12, len13, len14, len15, len16, len17, len2, len3, len4, len5, len6, len7, len8, len9, m, n, new_node, node, node_above, node_in_afe, node_next, o, outer, q, r, s, t, u, w, y, z
1158 // this block implements tha W3C spec
1159 // # 1. If the current node is an HTML element whose tag name is subject,
1160 // # then run these substeps:
1162 // # 1. Let element be the current node.
1164 // # 2. Pop element off the stack of open elements.
1166 // # 3. If element is also in the list of active formatting elements,
1167 // # remove the element from the list.
1169 // # 4. Abort the adoption agency algorithm.
1170 // if open_els[0].name is subject and open_els[0].namespace is NS_HTML
1171 // el = open_els.shift()
1172 // # remove it from the list of active formatting elements (if found)
1178 // WHATWG: https://html.spec.whatwg.org/multipage/syntax.html#adoption-agency-algorithm
1179 // If the current node is an HTML element whose tag name is subject, and
1180 // the current node is not in the list of active formatting elements,
1181 // then pop the current node off the stack of open elements, and abort
1183 if (open_els[0].name === subject && open_els[0].namespace === NS_HTML) {
1184 // remove it from the list of active formatting elements (if found)
1186 for (i = 0; i < afe.length; ++i) {
1188 if (el === open_els[0]) {
1206 // 5. Let formatting element be the last element in the list of
1207 // active formatting elements that: is between the end of the list
1208 // and the last scope marker in the list, if any, or the start of
1209 // the list otherwise, and has the tag name subject.
1211 for (fe_of_afe = 0; fe_of_afe < afe.length; ++fe_of_afe) {
1213 if (t.type === TYPE_AFE_MARKER) {
1216 if (t.name === subject) {
1221 // If there is no such element, then abort these steps and instead
1222 // act as described in the "any other end tag" entry above.
1224 in_body_any_other_end_tag(subject)
1227 // 6. If formatting element is not in the stack of open elements,
1228 // then this is a parse error; remove the element from the list, and
1229 // abort these steps.
1231 for (fe_of_open_els = 0; fe_of_open_els < open_els.length; ++fe_of_open_els) {
1232 t = open_els[fe_of_open_els]
1240 // "remove it from the list" must mean afe, since it's not in open_els
1241 afe.splice(fe_of_afe, 1)
1244 // 7. If formatting element is in the stack of open elements, but
1245 // the element is not in scope, then this is a parse error; abort
1247 if (!el_is_in_scope(fe)) {
1251 // 8. If formatting element is not the current node, this is a parse
1252 // error. (But do not abort these steps.)
1253 if (open_els[0] !== fe) {
1257 // 9. Let furthest block be the topmost node in the stack of open
1258 // elements that is lower in the stack than formatting element, and
1259 // is an element in the special category. There might not be one.
1261 fb_of_open_els = null
1262 for (i = 0; i < open_els.length; ++i) {
1267 if (el_is_special(t)) {
1270 // and continue, to see if there's one that's more "topmost"
1273 // 10. If there is no furthest block, then the UA must first pop all
1274 // the nodes from the bottom of the stack of open elements, from the
1275 // current node up to and including formatting element, then remove
1276 // formatting element from the list of active formatting elements,
1277 // and finally abort these steps.
1280 t = open_els.shift()
1282 afe.splice(fe_of_afe, 1)
1287 // 11. Let common ancestor be the element immediately above
1288 // formatting element in the stack of open elements.
1289 ca = open_els[fe_of_open_els + 1] // common ancestor
1291 node_above = open_els[fb_of_open_els + 1] // next node if node isn't in open_els anymore
1292 // 12. Let a bookmark note the position of formatting element in the list of active formatting elements relative to the elements on either side of it in the list.
1293 bookmark = new_aaa_bookmark()
1294 for (i = 0; i < afe.length; ++i) {
1297 afe.splice(i, 0, bookmark)
1301 node = last_node = fb
1305 // 3. Let node be the element immediately above node in the
1306 // stack of open elements, or if node is no longer in the stack
1307 // of open elements (e.g. because it got removed by this
1308 // algorithm), the element that was immediately above node in
1309 // the stack of open elements before node was removed.
1311 for (i = 0; i < open_els.length; ++i) {
1314 node_next = open_els[i + 1]
1318 node = node_next != null ? node_next : node_above
1319 // TODO make sure node_above gets re-set if/when node is removed from open_els
1321 // 4. If node is formatting element, then go to the next step in
1322 // the overall algorithm.
1326 // 5. If inner loop counter is greater than three and node is in
1327 // the list of active formatting elements, then remove node from
1328 // the list of active formatting elements.
1330 if ((i = afe.indexOf(node)) !== -1) {
1337 // 6. If node is not in the list of active formatting elements,
1338 // then remove node from the stack of open elements and then go
1339 // back to the step labeled inner loop.
1341 if ((i = open_els.indexOf(node)) !== -1) {
1342 node_above = open_els[i + 1]
1343 open_els.splice(i, 1)
1347 // 7. create an element for the token for which the element node
1348 // was created, in the HTML namespace, with common ancestor as
1349 // the intended parent; replace the entry for node in the list
1350 // of active formatting elements with an entry for the new
1351 // element, replace the entry for node in the stack of open
1352 // elements with an entry for the new element, and let node be
1354 new_node = token_to_element(node.token, NS_HTML, ca)
1355 if ((i = afe.indexOf(node)) !== -1) {
1358 if ((i = open_els.indexOf(node)) !== -1) {
1359 node_above = open_els[i + 1]
1360 open_els[i] = new_node
1363 // 8. If last node is furthest block, then move the
1364 // aforementioned bookmark to be immediately after the new node
1365 // in the list of active formatting elements.
1366 if (last_node === fb) {
1367 if ((i = afe.indexOf(bookmark)) !== -1) {
1370 if ((i = afe.indexOf(node)) !== -1) {
1371 // "after" means lower
1372 afe.splice(i, 0, bookmark) // "after as <-
1375 // 9. Insert last node into node, first removing it from its
1376 // previous parent node if any.
1377 if (last_node.parent != null) {
1378 if ((i = last_node.parent.children.indexOf(last_node)) !== -1) {
1379 last_node.parent.children.splice(i, 1)
1382 node.children.push(last_node)
1383 last_node.parent = node
1384 // 10. Let last node be node.
1386 // 11. Return to the step labeled inner loop.
1388 // 14. Insert whatever last node ended up being in the previous step
1389 // at the appropriate place for inserting a node, but using common
1390 // ancestor as the override target.
1392 // In the case where fe is immediately followed by fb:
1393 // * inner loop exits out early (node==fe)
1394 // * last_node is fb
1395 // * last_node is still in the tree (not a duplicate)
1396 if (last_node.parent != null) {
1397 if ((i = last_node.parent.children.indexOf(last_node)) !== -1) {
1398 last_node.parent.children.splice(i, 1)
1401 // can't use standard insert token thing, because it's already in
1402 // open_els and must stay at it's current position in open_els
1403 dest = adjusted_insertion_location(ca)
1404 dest[0].children.splice(dest[1], 0, last_node)
1405 last_node.parent = dest[0]
1406 // 15. Create an element for the token for which formatting element
1407 // was created, in the HTML namespace, with furthest block as the
1409 new_element = token_to_element(fe.token, NS_HTML, fb)
1410 // 16. Take all of the child nodes of furthest block and append them
1411 // to the element created in the last step.
1412 while (fb.children.length) {
1413 t = fb.children.shift()
1414 t.parent = new_element
1415 new_element.children.push(t)
1417 // 17. Append that new element to furthest block.
1418 new_element.parent = fb
1419 fb.children.push(new_element)
1420 // 18. Remove formatting element from the list of active formatting
1421 // elements, and insert the new element into the list of active
1422 // formatting elements at the position of the aforementioned
1424 if ((i = afe.indexOf(fe)) !== -1) {
1427 if ((i = afe.indexOf(bookmark)) !== -1) {
1428 afe[i] = new_element
1430 // 19. Remove formatting element from the stack of open elements,
1431 // and insert the new element into the stack of open elements
1432 // immediately below the position of furthest block in that stack.
1433 if ((i = open_els.indexOf(fe)) !== -1) {
1434 open_els.splice(i, 1)
1436 if ((i = open_els.indexOf(fb)) !== -1) {
1437 open_els.splice(i, 0, new_element)
1439 // 20. Jump back to the step labeled outer loop.
1443 // http://www.w3.org/TR/html5/syntax.html#close-a-p-element
1444 close_p_element = function () {
1445 generate_implied_end_tags('p') // arg is exception
1446 if (!(open_els[0].name === 'p' && open_els[0].namespace === NS_HTML)) {
1449 while (open_els.length > 1) { // just in case
1450 el = open_els.shift()
1451 if (el.name === 'p' && el.namespace === NS_HTML) {
1456 close_p_if_in_button_scope = function () {
1457 if (is_in_button_scope('p', NS_HTML)) {
1462 // http://www.w3.org/TR/html5/syntax.html#insert-a-character
1463 // aka insert_a_character = function (t) {
1464 insert_character = function (t) {
1466 dest = adjusted_insertion_location()
1467 // fixfull check for Document node
1469 prev = dest[0].children[dest[1] - 1]
1470 if (prev.type === TYPE_TEXT) {
1475 dest[0].children.splice(dest[1], 0, t)
1479 // 8.2.5 http://www.w3.org/TR/html5/syntax.html#tree-construction
1480 process_token = function (t) {
1482 acn = adjusted_current_node()
1487 if (acn.namespace === NS_HTML) {
1491 if (is_mathml_text_integration_point(acn)) {
1492 if (t.type === TYPE_START_TAG && !(t.name === 'mglyph' || t.name === 'malignmark')) {
1496 if (t.type === TYPE_TEXT) {
1501 if (acn.namespace === NS_MATHML && acn.name === 'annotation-xml' && t.type === TYPE_START_TAG && t.name === 'svg') {
1505 if (is_html_integration(acn)) {
1506 if (t.type === TYPE_START_TAG || t.type === TYPE_TEXT) {
1511 if (t.type === TYPE_EOF) {
1515 in_foreign_content(t)
1519 // http://www.w3.org/TR/html5/syntax.html#creating-and-inserting-nodes
1520 // http://www.w3.org/TR/html5/syntax.html#appropriate-place-for-inserting-a-node
1521 adjusted_insertion_location = function (override_target) {
1522 var c, el, i, j, l, last_table, last_table_i, last_template, last_template_i, len, len1, len2, m, previous_element, target, target_i
1523 // 1. If there was an override target specified, then let target be the
1525 if (override_target != null) {
1526 target = override_target
1527 } else { // Otherwise, let target be the current node.
1528 target = open_els[0]
1530 // 2. Determine the adjusted insertion location using the first matching
1531 // steps from the following list:
1533 // If foster parenting is enabled and target is a table, tbody, tfoot,
1534 // thead, or tr element Foster parenting happens when content is
1535 // misnested in tables.
1536 if (flag_foster_parenting && foster_parenting_targets[target.name] === target.namespace) {
1537 while (true) { // once. this is here so we can ``break`` to "abort these substeps"
1538 // 1. Let last template be the last template element in the
1539 // stack of open elements, if any.
1540 last_template = null
1541 last_template_i = null
1542 for (i = 0; i < open_els.length; ++i) {
1544 if (el.name === 'template' && el.namespace === NS_HTML) {
1550 // 2. Let last table be the last table element in the stack of
1551 // open elements, if any.
1554 for (i = 0; i < open_els.length; ++i) {
1556 if (el.name === 'table' && el.namespace === NS_HTML) {
1562 // 3. If there is a last template and either there is no last
1563 // table, or there is one, but last template is lower (more
1564 // recently added) than last table in the stack of open
1565 // elements, then: let adjusted insertion location be inside
1566 // last template's template contents, after its last child (if
1567 // any), and abort these substeps.
1568 if (last_template && (last_table === null || last_template_i < last_table_i)) {
1569 target = last_template // fixfull should be it's contents
1570 target_i = target.children.length
1573 // 4. If there is no last table, then let adjusted insertion
1574 // location be inside the first element in the stack of open
1575 // elements (the html element), after its last child (if any),
1576 // and abort these substeps. (fragment case)
1577 if (last_table === null) {
1579 target = open_els[open_els.length - 1]
1580 target_i = target.children.length
1583 // 5. If last table has a parent element, then let adjusted
1584 // insertion location be inside last table's parent element,
1585 // immediately before last table, and abort these substeps.
1586 if (last_table.parent != null) {
1587 for (i = 0; i < last_table.parent.children.length; ++i) {
1588 c = last_table.parent.children[i]
1589 if (c === last_table) {
1590 target = last_table.parent
1597 // 6. Let previous element be the element immediately above last
1598 // table in the stack of open elements.
1600 // huh? how could it not have a parent?
1601 previous_element = open_els[last_table_i + 1]
1602 // 7. Let adjusted insertion location be inside previous
1603 // element, after its last child (if any).
1604 target = previous_element
1605 target_i = target.children.length
1606 // Note: These steps are involved in part because it's possible
1607 // for elements, the table element in this case in particular,
1608 // to have been moved by a script around in the DOM, or indeed
1609 // removed from the DOM entirely, after the element was inserted
1611 break // don't really loop
1614 // Otherwise Let adjusted insertion location be inside target, after
1615 // its last child (if any).
1616 target_i = target.children.length
1619 // 3. If the adjusted insertion location is inside a template element,
1620 // let it instead be inside the template element's template contents,
1621 // after its last child (if any).
1622 // fixfull (template)
1624 // 4. Return the adjusted insertion location.
1625 return [target, target_i]
1628 // http://www.w3.org/TR/html5/syntax.html#create-an-element-for-the-token
1629 // aka create_an_element_for_token
1630 token_to_element = function (t, namespace, intended_parent) {
1632 // convert attributes into a hash
1634 for (i = 0; i < t.attrs_a.length; ++i) {
1636 attrs[a[0]] = a[1] // TODO check what to do with dupilcate attrs
1638 el = new Node(TYPE_TAG, {name: t.name, namespace: namespace, attrs: attrs, token: t})
1640 // TODO 2. If the newly created element has an xmlns attribute in the
1641 // XMLNS namespace whose value is not exactly the same as the element's
1642 // namespace, that is a parse error. Similarly, if the newly created
1643 // element has an xmlns:xlink attribute in the XMLNS namespace whose
1644 // value is not the XLink Namespace, that is a parse error.
1646 // fixfull: the spec says stuff about form pointers and ownerDocument
1651 // http://www.w3.org/TR/html5/syntax.html#insert-a-foreign-element
1652 insert_foreign_element = function (token, namespace) {
1653 var ail, ail_el, ail_i, el
1654 ail = adjusted_insertion_location()
1657 el = token_to_element(token, namespace, ail_el)
1658 // TODO skip this next step if it's broken (eg ail_el is document with child already)
1660 ail_el.children.splice(ail_i, 0, el)
1661 open_els.unshift(el)
1664 // http://www.w3.org/TR/html5/syntax.html#insert-an-html-element
1665 insert_html_element = function (token) {
1666 return insert_foreign_element(token, NS_HTML)
1669 // http://www.w3.org/TR/html5/syntax.html#insert-a-comment
1670 // position should be [node, index_within_children]
1671 insert_comment = function (t, position) {
1672 if (position == null) {
1673 position = adjusted_insertion_location()
1675 position[0].children.splice(position[1], 0, t)
1680 // http://www.w3.org/TR/html5/syntax.html#generic-raw-text-element-parsing-algorithm
1681 parse_generic_raw_text = function (t) {
1682 insert_html_element(t)
1683 tok_state = tok_state_rawtext
1684 original_ins_mode = ins_mode
1685 ins_mode = ins_mode_text
1687 parse_generic_rcdata_text = function (t) {
1688 insert_html_element(t)
1689 tok_state = tok_state_rcdata
1690 original_ins_mode = ins_mode
1691 ins_mode = ins_mode_text
1694 // 8.2.5.3 http://www.w3.org/TR/html5/syntax.html#closing-elements-that-have-implied-end-tags
1695 // http://www.w3.org/TR/html5/syntax.html#generate-implied-end-tags
1696 generate_implied_end_tags = function (except) {
1697 if (except == null) {
1700 while (end_tag_implied[open_els[0].name] === open_els[0].namespace && open_els[0].name !== except) {
1705 // 8.2.5.4 The rules for parsing tokens in HTML content
1706 // http://www.w3.org/TR/html5/syntax.html#parsing-main-inhtml
1708 // 8.2.5.4.1 The "initial" insertion mode
1709 // http://www.w3.org/TR/html5/syntax.html#the-initial-insertion-mode
1710 is_quirks_yes_doctype = function (t) {
1712 if (t.flag('force-quirks')) {
1715 if (t.name !== 'html') {
1718 if (t.public_identifier != null) {
1719 pi = t.public_identifier.toLowerCase()
1720 for (i = 0; i < quirks_yes_pi_prefixes.length; ++i) {
1721 p = quirks_yes_pi_prefixes[i]
1722 if (pi.substr(0, p.length) === p) {
1726 if (pi === '-//w3o//dtd w3 html strict 3.0//en//' || pi === '-/w3c/dtd html 4.0 transitional/en' || pi === 'html') {
1730 if (t.system_identifier != null) {
1731 if (t.system_identifier.toLowerCase() === 'http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd') {
1734 } else if (t.public_identifier != null) {
1735 // already did this: pi = t.public_identifier.toLowerCase()
1736 if (pi.substr(0, 32) === '-//w3c//dtd html 4.01 frameset//' || pi.substr(0, 36) === '-//w3c//dtd html 4.01 transitional//') {
1742 is_quirks_limited_doctype = function (t) {
1744 if (t.public_identifier != null) {
1745 pi = t.public_identifier.toLowerCase()
1746 if (pi.substr(0, 32) === '-//w3c//dtd xhtml 1.0 frameset//' || pi.substr(0, 36) === '-//w3c//dtd xhtml 1.0 transitional//') {
1749 if (t.system_identifier != null) {
1750 if (pi.substr(0, 32) === '-//w3c//dtd html 4.01 frameset//' || pi.substr(0, 36) === '-//w3c//dtd html 4.01 transitional//') {
1757 ins_mode_initial = function (t) {
1758 if (is_space_tok(t)) {
1761 if (t.type === TYPE_COMMENT) {
1763 doc.children.push(t)
1766 if (t.type === TYPE_DOCTYPE) {
1767 // fixfull syntax error from first paragraph and following bullets
1768 // fixfull set doc.doctype
1769 // fixfull is the "not an iframe srcdoc" thing relevant?
1770 if (is_quirks_yes_doctype(t)) {
1771 doc.flag('quirks mode', QUIRKS_YES)
1772 } else if (is_quirks_limited_doctype(t)) {
1773 doc.flag('quirks mode', QUIRKS_LIMITED)
1775 doc.children.push(t)
1776 ins_mode = ins_mode_before_html
1780 // fixfull not iframe srcdoc?
1782 doc.flag('quirks mode', QUIRKS_YES)
1783 ins_mode = ins_mode_before_html
1787 // 8.2.5.4.2 http://www.w3.org/TR/html5/syntax.html#the-before-html-insertion-mode
1788 ins_mode_before_html = function (t) {
1789 if (t.type === TYPE_DOCTYPE) {
1793 if (t.type === TYPE_COMMENT) {
1794 doc.children.push(t)
1797 if (is_space_tok(t)) {
1800 if (t.type === TYPE_START_TAG && t.name === 'html') {
1801 el = token_to_element(t, NS_HTML, doc)
1802 doc.children.push(el)
1804 open_els.unshift(el)
1805 // fixfull (big paragraph in spec about manifest, fragment, urls, etc)
1806 ins_mode = ins_mode_before_head
1809 if (t.type === TYPE_END_TAG) {
1810 if (t.name === 'head' || t.name === 'body' || t.name === 'html' || t.name === 'br') {
1811 // fall through to "anything else"
1818 el = token_to_element(new_open_tag('html'), NS_HTML, doc)
1819 doc.children.push(el)
1821 open_els.unshift(el)
1822 // ?fixfull browsing context
1823 ins_mode = ins_mode_before_head
1827 // 8.2.5.4.3 http://www.w3.org/TR/html5/syntax.html#the-before-head-insertion-mode
1828 ins_mode_before_head = function (t) {
1830 if (is_space_tok(t)) {
1833 if (t.type === TYPE_COMMENT) {
1837 if (t.type === TYPE_DOCTYPE) {
1841 if (t.type === TYPE_START_TAG && t.name === 'html') {
1845 if (t.type === TYPE_START_TAG && t.name === 'head') {
1846 el = insert_html_element(t)
1847 head_element_pointer = el
1848 ins_mode = ins_mode_in_head
1851 if (t.type === TYPE_END_TAG) {
1852 if (t.name === 'head' || t.name === 'body' || t.name === 'html' || t.name === 'br') {
1853 // fall through to Anything else below
1860 el = insert_html_element(new_open_tag('head'))
1861 head_element_pointer = el
1862 ins_mode = ins_mode_in_head
1866 // 8.2.5.4.4 http://www.w3.org/TR/html5/syntax.html#parsing-main-inhead
1867 ins_mode_in_head_else = function (t) { // factored out for same-as-spec flow control
1868 open_els.shift() // spec says this will be a 'head' node
1869 ins_mode = ins_mode_after_head
1872 ins_mode_in_head = function (t) {
1874 if (t.type === TYPE_TEXT && (t.text === "\t" || t.text === "\n" || t.text === "\u000c" || t.text === ' ')) {
1878 if (t.type === TYPE_COMMENT) {
1882 if (t.type === TYPE_DOCTYPE) {
1886 if (t.type === TYPE_START_TAG && t.name === 'html') {
1890 if (t.type === TYPE_START_TAG && (t.name === 'base' || t.name === 'basefont' || t.name === 'bgsound' || t.name === 'link')) {
1891 el = insert_html_element(t)
1893 t.acknowledge_self_closing()
1896 if (t.type === TYPE_START_TAG && t.name === 'meta') {
1897 el = insert_html_element(t)
1899 t.acknowledge_self_closing()
1900 // fixfull encoding stuff
1903 if (t.type === TYPE_START_TAG && t.name === 'title') {
1904 parse_generic_rcdata_text(t)
1907 if (t.type === TYPE_START_TAG && ((t.name === 'noscript' && flag_scripting) || t.name === 'noframes' || t.name === 'style')) {
1908 parse_generic_raw_text(t)
1911 if (t.type === TYPE_START_TAG && t.name === 'noscript' && flag_scripting === false) {
1912 insert_html_element(t)
1913 ins_mode = ins_mode_in_head_noscript
1916 if (t.type === TYPE_START_TAG && t.name === 'script') {
1917 ail = adjusted_insertion_location()
1918 el = token_to_element(t, NS_HTML, ail)
1919 el.flag('parser-inserted', true)
1920 // fixfull frament case
1921 ail[0].children.splice(ail[1], 0, el)
1922 open_els.unshift(el)
1923 tok_state = tok_state_script_data
1924 original_ins_mode = ins_mode // make sure orig... is defined
1925 ins_mode = ins_mode_text
1928 if (t.type === TYPE_END_TAG && t.name === 'head') {
1929 open_els.shift() // will be a head element... spec says so
1930 ins_mode = ins_mode_after_head
1933 if (t.type === TYPE_END_TAG && (t.name === 'body' || t.name === 'html' || t.name === 'br')) {
1934 ins_mode_in_head_else(t)
1937 if (t.type === TYPE_START_TAG && t.name === 'template') {
1938 insert_html_element(t)
1940 flag_frameset_ok = false
1941 ins_mode = ins_mode_in_template
1942 template_ins_modes.unshift(ins_mode_in_template)
1945 if (t.type === TYPE_END_TAG && t.name === 'template') {
1946 if (template_tag_is_open()) {
1947 generate_implied_end_tags
1948 if (open_els[0].name !== 'template') {
1952 el = open_els.shift()
1953 if (el.name === 'template' && el.namespace === NS_HTML) {
1957 clear_afe_to_marker()
1958 template_ins_modes.shift()
1965 if ((t.type === TYPE_START_TAG && t.name === 'head') || t.type === TYPE_END_TAG) {
1969 ins_mode_in_head_else(t)
1972 // 8.2.5.4.5 http://www.w3.org/TR/html5/syntax.html#parsing-main-inheadnoscript
1973 ins_mode_in_head_noscript_else = function (t) {
1976 ins_mode = ins_mode_in_head
1979 ins_mode_in_head_noscript = function (t) {
1980 if (t.type === TYPE_DOCTYPE) {
1984 if (t.type === TYPE_START_TAG && t.name === 'html') {
1988 if (t.type === TYPE_END_TAG && t.name === 'noscript') {
1990 ins_mode = ins_mode_in_head
1993 if (is_space_tok(t) || t.type === TYPE_COMMENT || (t.type === TYPE_START_TAG && (t.name === 'basefont' || t.name === 'bgsound' || t.name === 'link' || t.name === 'meta' || t.name === 'noframes' || t.name === 'style'))) {
1997 if (t.type === TYPE_END_TAG && t.name === 'br') {
1998 ins_mode_in_head_noscript_else(t)
2001 if ((t.type === TYPE_START_TAG && (t.name === 'head' || t.name === 'noscript')) || t.type === TYPE_END_TAG) {
2006 ins_mode_in_head_noscript_else(t)
2009 // 8.2.5.4.6 http://www.w3.org/TR/html5/syntax.html#the-after-head-insertion-mode
2010 ins_mode_after_head_else = function (t) {
2012 body_tok = new_open_tag('body')
2013 insert_html_element(body_tok)
2014 ins_mode = ins_mode_in_body
2017 ins_mode_after_head = function (t) {
2019 if (is_space_tok(t)) {
2023 if (t.type === TYPE_COMMENT) {
2027 if (t.type === TYPE_DOCTYPE) {
2031 if (t.type === TYPE_START_TAG && t.name === 'html') {
2035 if (t.type === TYPE_START_TAG && t.name === 'body') {
2036 insert_html_element(t)
2037 flag_frameset_ok = false
2038 ins_mode = ins_mode_in_body
2041 if (t.type === TYPE_START_TAG && t.name === 'frameset') {
2042 insert_html_element(t)
2043 ins_mode = ins_mode_in_frameset
2046 if (t.type === TYPE_START_TAG && (t.name === 'base' || t.name === 'basefont' || t.name === 'bgsound' || t.name === 'link' || t.name === 'meta' || t.name === 'noframes' || t.name === 'script' || t.name === 'style' || t.name === 'template' || t.name === 'title')) {
2048 open_els.unshift(head_element_pointer)
2050 for (i = 0; i < open_els.length; ++i) {
2052 if (el === head_element_pointer) {
2053 open_els.splice(i, 1)
2059 if (t.type === TYPE_END_TAG && t.name === 'template') {
2063 if (t.type === TYPE_END_TAG && (t.name === 'body' || t.name === 'html' || t.name === 'br')) {
2064 ins_mode_after_head_else(t)
2067 if ((t.type === TYPE_START_TAG && t.name === 'head') || t.type === TYPE_END_TAG) {
2072 ins_mode_after_head_else(t)
2075 // 8.2.5.4.7 http://www.w3.org/TR/html5/syntax.html#parsing-main-inbody
2076 in_body_any_other_end_tag = function (name) { // factored out because adoption agency calls it
2080 if (node.name === name && node.namespace === NS_HTML) {
2081 generate_implied_end_tags(name) // arg is exception
2082 if (node !== open_els[0]) {
2086 el = open_els.shift()
2092 if (special_elements[node.name] === node.namespace) {
2096 for (i = 0; i < open_els.length; ++i) {
2099 node = open_els[i + 1]
2105 ins_mode_in_body = function (t) {
2106 var a, aa, ab, ac, el, found, h_in_scope, i, input_el, j, l, len, len1, len10, len11, len12, len13, len14, len2, len3, len4, len5, len6, len7, len8, len9, m, n, node, o, ok_tags, prompt, q, r, root_attrs, s, second, second_i, u, w, y, z
2107 if (t.type === TYPE_TEXT && t.text === "\u0000") {
2111 if (is_space_tok(t)) {
2116 if (t.type === TYPE_TEXT) {
2119 flag_frameset_ok = false
2122 if (t.type === TYPE_COMMENT) {
2126 if (t.type === TYPE_DOCTYPE) {
2130 if (t.type === TYPE_START_TAG && t.name === 'html') {
2132 if (template_tag_is_open()) {
2135 root_attrs = open_els[open_els.length - 1].attrs
2136 for (i = 0; i < t.attrs_a.length; ++i) {
2138 if (root_attrs[a[0]] == null) {
2139 root_attrs[a[0]] = a[1]
2145 if ((t.type === TYPE_START_TAG && (t.name === 'base' || t.name === 'basefont' || t.name === 'bgsound' || t.name === 'link' || t.name === 'meta' || t.name === 'noframes' || t.name === 'script' || t.name === 'style' || t.name === 'template' || t.name === 'title')) || (t.type === TYPE_END_TAG && t.name === 'template')) {
2149 if (t.type === TYPE_START_TAG && t.name === 'body') {
2151 if (open_els.length < 2) {
2154 second = open_els[open_els.length - 2]
2155 if (second.namespace !== NS_HTML) {
2158 if (second.name !== 'body') {
2161 if (template_tag_is_open()) {
2164 flag_frameset_ok = false
2165 for (i = 0; i < t.attrs_a.length; ++i) {
2167 if (second.attrs[a[0]] == null) {
2168 second.attrs[a[0]] = a[1]
2173 if (t.type === TYPE_START_TAG && t.name === 'frameset') {
2175 if (open_els.length < 2) {
2178 second_i = open_els.length - 2
2179 second = open_els[second_i]
2180 if (second.namespace !== NS_HTML) {
2183 if (second.name !== 'body') {
2186 if (flag_frameset_ok === false) {
2189 if (second.parent != null) {
2190 for (i = 0; i < second.parent.children.length; ++i) {
2191 el = second.parent.children[i]
2192 if (el === second) {
2193 second.parent.children.splice(i, 1)
2198 open_els.splice(second_i, 1)
2199 // pop everything except the "root html element"
2200 while (open_els.length > 1) {
2203 insert_html_element(t)
2204 ins_mode = ins_mode_in_frameset
2207 if (t.type === TYPE_EOF) {
2209 dd: NS_HTML, dt: NS_HTML, li: NS_HTML, p: NS_HTML, tbody: NS_HTML,
2210 td: NS_HTML, tfoot: NS_HTML, th: NS_HTML, thead: NS_HTML,
2211 tr: NS_HTML, body: NS_HTML, html: NS_HTML
2213 for (i = 0; i < open_els.length; ++i) {
2215 if (ok_tags[t.name] !== el.namespace) {
2220 if (template_ins_modes.length > 0) {
2221 ins_mode_in_template(t)
2227 if (t.type === TYPE_END_TAG && t.name === 'body') {
2228 if (!is_in_scope('body', NS_HTML)) {
2233 dd: NS_HTML, dt: NS_HTML, li: NS_HTML, optgroup: NS_HTML,
2234 option: NS_HTML, p: NS_HTML, rb: NS_HTML, rp: NS_HTML, rt: NS_HTML,
2235 rtc: NS_HTML, tbody: NS_HTML, td: NS_HTML, tfoot: NS_HTML,
2236 th: NS_HTML, thead: NS_HTML, tr: NS_HTML, body: NS_HTML,
2239 for (i = 0; i < open_els.length; ++i) {
2241 if (ok_tags[t.name] !== el.namespace) {
2246 ins_mode = ins_mode_after_body
2249 if (t.type === TYPE_END_TAG && t.name === 'html') {
2250 if (!is_in_scope('body', NS_HTML)) {
2255 dd: NS_HTML, dt: NS_HTML, li: NS_HTML, optgroup: NS_HTML,
2256 option: NS_HTML, p: NS_HTML, rb: NS_HTML, rp: NS_HTML, rt: NS_HTML,
2257 rtc: NS_HTML, tbody: NS_HTML, td: NS_HTML, tfoot: NS_HTML,
2258 th: NS_HTML, thead: NS_HTML, tr: NS_HTML, body: NS_HTML,
2261 for (i = 0; i < open_els.length; ++i) {
2263 if (ok_tags[t.name] !== el.namespace) {
2268 ins_mode = ins_mode_after_body
2272 if (t.type === TYPE_START_TAG && (t.name === 'address' || t.name === 'article' || t.name === 'aside' || t.name === 'blockquote' || t.name === 'center' || t.name === 'details' || t.name === 'dialog' || t.name === 'dir' || t.name === 'div' || t.name === 'dl' || t.name === 'fieldset' || t.name === 'figcaption' || t.name === 'figure' || t.name === 'footer' || t.name === 'header' || t.name === 'hgroup' || t.name === 'main' || t.name === 'nav' || t.name === 'ol' || t.name === 'p' || t.name === 'section' || t.name === 'summary' || t.name === 'ul')) {
2273 close_p_if_in_button_scope()
2274 insert_html_element(t)
2277 if (t.type === TYPE_START_TAG && (h_tags[t.name] != null)) { // extra perens because Coffeescript did that
2278 close_p_if_in_button_scope()
2279 if (h_tags[open_els[0].name] === open_els[0].namespace) {
2283 insert_html_element(t)
2286 if (t.type === TYPE_START_TAG && (t.name === 'pre' || t.name === 'listing')) {
2287 close_p_if_in_button_scope()
2288 insert_html_element(t)
2289 eat_next_token_if_newline()
2290 flag_frameset_ok = false
2293 if (t.type === TYPE_START_TAG && t.name === 'form') {
2294 if (!(form_element_pointer === null || template_tag_is_open())) {
2298 close_p_if_in_button_scope()
2299 el = insert_html_element(t)
2300 if (!template_tag_is_open()) {
2301 form_element_pointer = el
2305 if (t.type === TYPE_START_TAG && t.name === 'li') {
2306 flag_frameset_ok = false
2307 for (i = 0; i < open_els.length; ++i) {
2309 if (node.name === 'li' && node.namespace === NS_HTML) {
2310 generate_implied_end_tags('li') // arg is exception
2311 if (open_els[0].name !== 'li' || open_els[0].namespace !== NS_HTML) {
2315 el = open_els.shift()
2316 if (el.name === 'li' && el.namespace === NS_HTML) {
2322 if (el_is_special_not_adp(node)) {
2326 close_p_if_in_button_scope()
2327 insert_html_element(t)
2330 if (t.type === TYPE_START_TAG && (t.name === 'dd' || t.name === 'dt')) {
2331 flag_frameset_ok = false
2332 for (i = 0; i < open_els.length; ++i) {
2334 if (node.name === 'dd' && node.namespace === NS_HTML) {
2335 generate_implied_end_tags('dd') // arg is exception
2336 if (open_els[0].name !== 'dd' || open_els[0].namespace !== NS_HTML) {
2340 el = open_els.shift()
2341 if (el.name === 'dd' && el.namespace === NS_HTML) {
2347 if (node.name === 'dt' && node.namespace === NS_HTML) {
2348 generate_implied_end_tags('dt') // arg is exception
2349 if (open_els[0].name !== 'dt' || open_els[0].namespace !== NS_HTML) {
2353 el = open_els.shift()
2354 if (el.name === 'dt' && el.namespace === NS_HTML) {
2360 if (el_is_special_not_adp(node)) {
2364 close_p_if_in_button_scope()
2365 insert_html_element(t)
2368 if (t.type === TYPE_START_TAG && t.name === 'plaintext') {
2369 close_p_if_in_button_scope()
2370 insert_html_element(t)
2371 tok_state = tok_state_plaintext
2374 if (t.type === TYPE_START_TAG && t.name === 'button') {
2375 if (is_in_scope('button', NS_HTML)) {
2377 generate_implied_end_tags()
2379 el = open_els.shift()
2380 if (el.name === 'button' && el.namespace === NS_HTML) {
2386 insert_html_element(t)
2387 flag_frameset_ok = false
2390 if (t.type === TYPE_END_TAG && (t.name === 'address' || t.name === 'article' || t.name === 'aside' || t.name === 'blockquote' || t.name === 'button' || t.name === 'center' || t.name === 'details' || t.name === 'dialog' || t.name === 'dir' || t.name === 'div' || t.name === 'dl' || t.name === 'fieldset' || t.name === 'figcaption' || t.name === 'figure' || t.name === 'footer' || t.name === 'header' || t.name === 'hgroup' || t.name === 'listing' || t.name === 'main' || t.name === 'nav' || t.name === 'ol' || t.name === 'pre' || t.name === 'section' || t.name === 'summary' || t.name === 'ul')) {
2391 if (!is_in_scope(t.name, NS_HTML)) {
2395 generate_implied_end_tags()
2396 if (!(open_els[0].name === t.name && open_els[0].namespace === NS_HTML)) {
2400 el = open_els.shift()
2401 if (el.name === t.name && el.namespace === NS_HTML) {
2407 if (t.type === TYPE_END_TAG && t.name === 'form') {
2408 if (!template_tag_is_open()) {
2409 node = form_element_pointer
2410 form_element_pointer = null
2411 if (node === null || !el_is_in_scope(node)) {
2415 generate_implied_end_tags()
2416 if (open_els[0] !== node) {
2419 for (i = 0; i < open_els.length; ++i) {
2422 open_els.splice(i, 1)
2427 if (!is_in_scope('form', NS_HTML)) {
2431 generate_implied_end_tags()
2432 if (open_els[0].name !== 'form' || open_els[0].namespace !== NS_HTML) {
2436 el = open_els.shift()
2437 if (el.name === 'form' && el.namespace === NS_HTML) {
2444 if (t.type === TYPE_END_TAG && t.name === 'p') {
2445 if (!is_in_button_scope('p', NS_HTML)) {
2447 insert_html_element(new_open_tag('p'))
2452 if (t.type === TYPE_END_TAG && t.name === 'li') {
2453 if (!is_in_li_scope('li', NS_HTML)) {
2457 generate_implied_end_tags('li') // arg is exception
2458 if (open_els[0].name !== 'li' || open_els[0].namespace !== NS_HTML) {
2462 el = open_els.shift()
2463 if (el.name === 'li' && el.namespace === NS_HTML) {
2469 if (t.type === TYPE_END_TAG && (t.name === 'dd' || t.name === 'dt')) {
2470 if (!is_in_scope(t.name, NS_HTML)) {
2474 generate_implied_end_tags(t.name) // arg is exception
2475 if (open_els[0].name !== t.name || open_els[0].namespace !== NS_HTML) {
2479 el = open_els.shift()
2480 if (el.name === t.name && el.namespace === NS_HTML) {
2486 if (t.type === TYPE_END_TAG && (h_tags[t.name] != null)) { // extra perens because Coffeescript did
2488 for (i = 0; i < open_els.length; ++i) {
2490 if (h_tags[el.name] === el.namespace) {
2494 if (standard_scopers[el.name] === el.namespace) {
2502 generate_implied_end_tags()
2503 if (open_els[0].name !== t.name || open_els[0].namespace !== NS_HTML) {
2507 el = open_els.shift()
2508 if (h_tags[el.name] === el.namespace) {
2515 if (t.type === TYPE_START_TAG && t.name === 'a') {
2516 // If the list of active formatting elements contains an a element
2517 // between the end of the list and the last marker on the list (or
2518 // the start of the list if there is no marker on the list), then
2519 // this is a parse error; run the adoption agency algorithm for the
2520 // tag name "a", then remove that element from the list of active
2521 // formatting elements and the stack of open elements if the
2522 // adoption agency algorithm didn't already remove it (it might not
2523 // have if the element is not in table scope).
2525 for (i = 0; i < afe.length; ++i) {
2527 if (el.type === TYPE_AFE_MARKER) {
2530 if (el.name === 'a' && el.namespace === NS_HTML) {
2534 if (found != null) {
2536 adoption_agency('a')
2537 for (i = 0; i < afe.length; ++i) {
2543 for (i = 0; i < open_els.length; ++i) {
2546 open_els.splice(i, 1)
2551 el = insert_html_element(t)
2555 if (t.type === TYPE_START_TAG && (t.name === 'b' || t.name === 'big' || t.name === 'code' || t.name === 'em' || t.name === 'font' || t.name === 'i' || t.name === 's' || t.name === 'small' || t.name === 'strike' || t.name === 'strong' || t.name === 'tt' || t.name === 'u')) {
2557 el = insert_html_element(t)
2561 if (t.type === TYPE_START_TAG && t.name === 'nobr') {
2563 if (is_in_scope('nobr', NS_HTML)) {
2565 adoption_agency('nobr')
2568 el = insert_html_element(t)
2572 if (t.type === TYPE_END_TAG && (t.name === 'a' || t.name === 'b' || t.name === 'big' || t.name === 'code' || t.name === 'em' || t.name === 'font' || t.name === 'i' || t.name === 'nobr' || t.name === 's' || t.name === 'small' || t.name === 'strike' || t.name === 'strong' || t.name === 'tt' || t.name === 'u')) {
2573 adoption_agency(t.name)
2576 if (t.type === TYPE_START_TAG && (t.name === 'applet' || t.name === 'marquee' || t.name === 'object')) {
2578 insert_html_element(t)
2580 flag_frameset_ok = false
2583 if (t.type === TYPE_END_TAG && (t.name === 'applet' || t.name === 'marquee' || t.name === 'object')) {
2584 if (!is_in_scope(t.name, NS_HTML)) {
2588 generate_implied_end_tags()
2589 if (open_els[0].name !== t.name || open_els[0].namespace !== NS_HTML) {
2593 el = open_els.shift()
2594 if (el.name === t.name && el.namespace === NS_HTML) {
2598 clear_afe_to_marker()
2601 if (t.type === TYPE_START_TAG && t.name === 'table') {
2602 if (doc.flag('quirks mode') !== QUIRKS_YES) {
2603 close_p_if_in_button_scope() // test
2605 insert_html_element(t)
2606 flag_frameset_ok = false
2607 ins_mode = ins_mode_in_table
2610 if (t.type === TYPE_END_TAG && t.name === 'br') {
2612 // W3C: t.type = TYPE_START_TAG
2613 t = new_open_tag('br') // WHATWG
2616 if (t.type === TYPE_START_TAG && (t.name === 'area' || t.name === 'br' || t.name === 'embed' || t.name === 'img' || t.name === 'keygen' || t.name === 'wbr')) {
2618 insert_html_element(t)
2620 t.acknowledge_self_closing()
2621 flag_frameset_ok = false
2624 if (t.type === TYPE_START_TAG && t.name === 'input') {
2626 insert_html_element(t)
2628 t.acknowledge_self_closing()
2629 if (!is_input_hidden_tok(t)) {
2630 flag_frameset_ok = false
2634 if (t.type === TYPE_START_TAG && (t.name === 'menuitem' || t.name === 'param' || t.name === 'source' || t.name === 'track')) {
2635 // WHATWG adds 'menuitem' for this block
2636 insert_html_element(t)
2638 t.acknowledge_self_closing()
2641 if (t.type === TYPE_START_TAG && t.name === 'hr') {
2642 close_p_if_in_button_scope()
2643 insert_html_element(t)
2645 t.acknowledge_self_closing()
2646 flag_frameset_ok = false
2649 if (t.type === TYPE_START_TAG && t.name === 'image') {
2655 if (t.type === TYPE_START_TAG && t.name === 'isindex') {
2657 if (template_tag_is_open() === false && form_element_pointer !== null) {
2660 t.acknowledge_self_closing()
2661 flag_frameset_ok = false
2662 close_p_if_in_button_scope()
2663 el = insert_html_element(new_open_tag('form'))
2664 if (!template_tag_is_open()) {
2665 form_element_pointer = el
2667 for (i = 0; i < t.attrs_a.length; ++i) {
2669 if (a[0] === 'action') {
2670 el.attrs['action'] = a[1]
2674 insert_html_element(new_open_tag('hr'))
2677 insert_html_element(new_open_tag('label'))
2678 // note: this is a little out-of-spec-order so we only have to scan t.attrs_a once
2679 input_el = new_open_tag('input')
2681 for (i = 0; i < t.attrs_a.length; ++i) {
2683 if (a[0] === 'prompt') {
2686 if (a[0] !== 'name' && a[0] !== 'action' && a[0] !== 'prompt') {
2687 input_el.attrs_a.push([a[0], a[1]])
2690 input_el.attrs_a.push(['name', 'isindex'])
2691 // fixfull this next bit is in english... internationalize?
2692 if (prompt == null) {
2693 prompt = "This is a searchable index. Enter search keywords: "
2695 insert_character(new_character_token(prompt)) // fixfull split
2696 // TODO submit typo "balue" in spec
2697 insert_html_element(input_el)
2699 // insert_character('') // you can put chars here if prompt attr missing
2701 insert_html_element(new_open_tag('hr'))
2704 if (!template_tag_is_open()) {
2705 form_element_pointer = null
2709 if (t.type === TYPE_START_TAG && t.name === 'textarea') {
2710 insert_html_element(t)
2711 eat_next_token_if_newline()
2712 tok_state = tok_state_rcdata
2713 original_ins_mode = ins_mode
2714 flag_frameset_ok = false
2715 ins_mode = ins_mode_text
2718 if (t.type === TYPE_START_TAG && t.name === 'xmp') {
2719 close_p_if_in_button_scope()
2721 flag_frameset_ok = false
2722 parse_generic_raw_text(t)
2725 if (t.type === TYPE_START_TAG && t.name === 'iframe') {
2726 flag_frameset_ok = false
2727 parse_generic_raw_text(t)
2730 if (t.type === TYPE_START_TAG && (t.name === 'noembed' || (t.name === 'noscript' && flag_scripting))) {
2731 parse_generic_raw_text(t)
2734 if (t.type === TYPE_START_TAG && t.name === 'select') {
2736 insert_html_element(t)
2737 flag_frameset_ok = false
2738 if (ins_mode === ins_mode_in_table || ins_mode === ins_mode_in_caption || ins_mode === ins_mode_in_table_body || ins_mode === ins_mode_in_row || ins_mode === ins_mode_in_cell) {
2739 ins_mode = ins_mode_in_select_in_table
2741 ins_mode = ins_mode_in_select
2745 if (t.type === TYPE_START_TAG && (t.name === 'optgroup' || t.name === 'option')) {
2746 if (open_els[0].name === 'option' && open_els[0].namespace === NS_HTML) {
2750 insert_html_element(t)
2753 // this comment block implements the W3C spec
2754 // if t.type === TYPE_START_TAG && (t.name === 'rb' || t.name === 'rp' || t.name === 'rtc')
2755 // if is_in_scope 'ruby', NS_HTML
2756 // generate_implied_end_tags()
2757 // unless open_els[0].name === 'ruby' && open_els[0].namespace === NS_HTML
2759 // insert_html_element t
2761 // if t.type === TYPE_START_TAG && t.name === 'rt'
2762 // if is_in_scope 'ruby', NS_HTML
2763 // generate_implied_end_tags 'rtc' // arg === exception
2764 // unless (open_els[0].name === 'ruby' || open_els[0].name === 'rtc') && open_els[0].namespace === NS_HTML
2766 // insert_html_element t
2768 // below implements the WHATWG spec https://html.spec.whatwg.org/multipage/syntax.html#parsing-main-inbody
2769 if (t.type === TYPE_START_TAG && (t.name === 'rb' || t.name === 'rtc')) {
2770 if (is_in_scope('ruby', NS_HTML)) {
2771 generate_implied_end_tags()
2772 if (!(open_els[0].name === 'ruby' && open_els[0].namespace === NS_HTML)) {
2776 insert_html_element(t)
2779 if (t.type === TYPE_START_TAG && (t.name === 'rp' || t.name === 'rt')) {
2780 if (is_in_scope('ruby', NS_HTML)) {
2781 generate_implied_end_tags('rtc')
2782 if (!((open_els[0].name === 'ruby' || open_els[0].name === 'rtc') && open_els[0].namespace === NS_HTML)) {
2786 insert_html_element(t)
2790 if (t.type === TYPE_START_TAG && t.name === 'math') {
2792 adjust_mathml_attributes(t)
2793 adjust_foreign_attributes(t)
2794 insert_foreign_element(t, NS_MATHML)
2795 if (t.flag('self-closing')) {
2797 t.acknowledge_self_closing()
2801 if (t.type === TYPE_START_TAG && t.name === 'svg') {
2803 adjust_svg_attributes(t)
2804 adjust_foreign_attributes(t)
2805 insert_foreign_element(t, NS_SVG)
2806 if (t.flag('self-closing')) {
2808 t.acknowledge_self_closing()
2812 if (t.type === TYPE_START_TAG && (t.name === 'caption' || t.name === 'col' || t.name === 'colgroup' || t.name === 'frame' || t.name === 'head' || t.name === 'tbody' || t.name === 'td' || t.name === 'tfoot' || t.name === 'th' || t.name === 'thead' || t.name === 'tr')) {
2816 if (t.type === TYPE_START_TAG) { // any other start tag
2818 insert_html_element(t)
2821 if (t.type === TYPE_END_TAG) { // any other end tag
2822 in_body_any_other_end_tag(t.name)
2827 // 8.2.5.4.8 http://www.w3.org/TR/html5/syntax.html#parsing-main-incdata
2828 ins_mode_text = function (t) {
2829 if (t.type === TYPE_TEXT) {
2833 if (t.type === TYPE_EOF) {
2835 if (open_els[0].name === 'script' && open_els[0].namespace === NS_HTML) {
2836 open_els[0].flag('already started', true)
2839 ins_mode = original_ins_mode
2843 if (t.type === TYPE_END_TAG && t.name === 'script') {
2845 ins_mode = original_ins_mode
2846 // fixfull the spec seems to assume that I'm going to run the script
2847 // http://www.w3.org/TR/html5/syntax.html#scriptEndTag
2850 if (t.type === TYPE_END_TAG) {
2852 ins_mode = original_ins_mode
2857 // the functions below implement the tokenizer stats described here:
2858 // http://www.w3.org/TR/html5/syntax.html#tokenization
2860 // 8.2.5.4.9 http://www.w3.org/TR/html5/syntax.html#parsing-main-intable
2861 ins_mode_in_table_else = function (t) {
2863 flag_foster_parenting = true
2865 flag_foster_parenting = false
2867 ins_mode_in_table = function (t) {
2871 if ((open_els[0].name === 'table' || open_els[0].name === 'tbody' || open_els[0].name === 'tfoot' || open_els[0].name === 'thead' || open_els[0].name === 'tr') && open_els[0].namespace === NS_HTML) {
2872 pending_table_character_tokens = []
2873 original_ins_mode = ins_mode
2874 ins_mode = ins_mode_in_table_text
2877 ins_mode_in_table_else(t)
2886 case TYPE_START_TAG:
2889 clear_stack_to_table_context()
2891 insert_html_element(t)
2892 ins_mode = ins_mode_in_caption
2895 clear_stack_to_table_context()
2896 insert_html_element(t)
2897 ins_mode = ins_mode_in_column_group
2900 clear_stack_to_table_context()
2901 insert_html_element(new_open_tag('colgroup'))
2902 ins_mode = ins_mode_in_column_group
2908 clear_stack_to_table_context()
2909 insert_html_element(t)
2910 ins_mode = ins_mode_in_table_body
2915 clear_stack_to_table_context()
2916 insert_html_element(new_open_tag('tbody'))
2917 ins_mode = ins_mode_in_table_body
2922 if (is_in_table_scope('table', NS_HTML)) {
2924 el = open_els.shift()
2925 if (el.name === 'table' && el.namespace === NS_HTML) {
2939 if (!is_input_hidden_tok(t)) {
2940 ins_mode_in_table_else(t)
2943 el = insert_html_element(t)
2945 t.acknowledge_self_closing()
2950 if (form_element_pointer != null) {
2953 if (template_tag_is_open()) {
2956 form_element_pointer = insert_html_element(t)
2960 ins_mode_in_table_else(t)
2966 if (is_in_table_scope('table', NS_HTML)) {
2968 el = open_els.shift()
2969 if (el.name === 'table' && el.namespace === NS_HTML) {
2995 ins_mode_in_table_else(t)
3002 ins_mode_in_table_else(t)
3006 // 8.2.5.4.10 http://www.w3.org/TR/html5/syntax.html#parsing-main-intabletext
3007 ins_mode_in_table_text = function (t) {
3008 var all_space, i, l, m, old
3009 if (t.type === TYPE_TEXT && t.text === "\u0000") {
3014 if (t.type === TYPE_TEXT) {
3015 pending_table_character_tokens.push(t)
3020 for (i = 0; i < pending_table_character_tokens.length; ++i) {
3021 old = pending_table_character_tokens[i]
3022 if (!is_space_tok(old)) {
3028 for (i = 0; i < pending_table_character_tokens.length; ++i) {
3029 old = pending_table_character_tokens[i]
3030 insert_character(old)
3033 for (i = 0; i < pending_table_character_tokens.length; ++i) {
3034 old = pending_table_character_tokens[i]
3035 ins_mode_in_table_else(old)
3038 pending_table_character_tokens = []
3039 ins_mode = original_ins_mode
3043 // 8.2.5.4.11 http://www.w3.org/TR/html5/syntax.html#parsing-main-incaption
3044 ins_mode_in_caption = function (t) {
3046 if (t.type === TYPE_END_TAG && t.name === 'caption') {
3047 if (is_in_table_scope('caption', NS_HTML)) {
3048 generate_implied_end_tags()
3049 if (open_els[0].name !== 'caption') {
3053 el = open_els.shift()
3054 if (el.name === 'caption' && el.namespace === NS_HTML) {
3058 clear_afe_to_marker()
3059 ins_mode = ins_mode_in_table
3066 if ((t.type === TYPE_START_TAG && (t.name === 'caption' || t.name === 'col' || t.name === 'colgroup' || t.name === 'tbody' || t.name === 'td' || t.name === 'tfoot' || t.name === 'th' || t.name === 'thead' || t.name === 'tr')) || t.type === TYPE_END_TAG && t.name === 'table') {
3068 if (is_in_table_scope('caption', NS_HTML)) {
3070 el = open_els.shift()
3071 if (el.name === 'caption' && el.namespace === NS_HTML) {
3075 clear_afe_to_marker()
3076 ins_mode = ins_mode_in_table
3079 // else fragment case
3082 if (t.type === TYPE_END_TAG && (t.name === 'body' || t.name === 'col' || t.name === 'colgroup' || t.name === 'html' || t.name === 'tbody' || t.name === 'td' || t.name === 'tfoot' || t.name === 'th' || t.name === 'thead' || t.name === 'tr')) {
3090 // 8.2.5.4.12 http://www.w3.org/TR/html5/syntax.html#parsing-main-incolgroup
3091 ins_mode_in_column_group = function (t) {
3093 if (is_space_tok(t)) {
3097 if (t.type === TYPE_COMMENT) {
3101 if (t.type === TYPE_DOCTYPE) {
3105 if (t.type === TYPE_START_TAG && t.name === 'html') {
3109 if (t.type === TYPE_START_TAG && t.name === 'col') {
3110 el = insert_html_element(t)
3112 t.acknowledge_self_closing()
3115 if (t.type === TYPE_END_TAG && t.name === 'colgroup') {
3116 if (open_els[0].name === 'colgroup' && open_els.namespace === NS_HTML) {
3118 ins_mode = ins_mode_in_table
3124 if (t.type === TYPE_END_TAG && t.name === 'col') {
3128 if ((t.type === TYPE_START_TAG || t.type === TYPE_END_TAG) && t.name === 'template') {
3132 if (t.type === TYPE_EOF) {
3137 if (open_els[0].name !== 'colgroup') {
3142 ins_mode = ins_mode_in_table
3146 // 8.2.5.4.13 http://www.w3.org/TR/html5/syntax.html#parsing-main-intbody
3147 ins_mode_in_table_body = function (t) {
3149 if (t.type === TYPE_START_TAG && t.name === 'tr') {
3150 clear_stack_to_table_body_context()
3151 insert_html_element(t)
3152 ins_mode = ins_mode_in_row
3155 if (t.type === TYPE_START_TAG && (t.name === 'th' || t.name === 'td')) {
3157 clear_stack_to_table_body_context()
3158 insert_html_element(new_open_tag('tr'))
3159 ins_mode = ins_mode_in_row
3163 if (t.type === TYPE_END_TAG && (t.name === 'tbody' || t.name === 'tfoot' || t.name === 'thead')) {
3164 if (!is_in_table_scope(t.name, NS_HTML)) {
3168 clear_stack_to_table_body_context()
3170 ins_mode = ins_mode_in_table
3173 if ((t.type === TYPE_START_TAG && (t.name === 'caption' || t.name === 'col' || t.name === 'colgroup' || t.name === 'tbody' || t.name === 'tfoot' || t.name === 'thead')) || (t.type === TYPE_END_TAG && t.name === 'table')) {
3175 for (i = 0; i < open_els.length; ++i) {
3177 if (el.namespace === NS_HTML && (el.name === 'tbody' || el.name === 'tfoot' || el.name === 'thead')) {
3181 if (table_scopers[el.name] === el.namespace) {
3189 clear_stack_to_table_body_context()
3191 ins_mode = ins_mode_in_table
3195 if (t.type === TYPE_END_TAG && (t.name === 'body' || t.name === 'caption' || t.name === 'col' || t.name === 'colgroup' || t.name === 'html' || t.name === 'td' || t.name === 'th' || t.name === 'tr')) {
3200 ins_mode_in_table(t)
3203 // 8.2.5.4.14 http://www.w3.org/TR/html5/syntax.html#parsing-main-intr
3204 ins_mode_in_row = function (t) {
3205 if (t.type === TYPE_START_TAG && (t.name === 'th' || t.name === 'td')) {
3206 clear_stack_to_table_row_context()
3207 insert_html_element(t)
3208 ins_mode = ins_mode_in_cell
3212 if (t.type === TYPE_END_TAG && t.name === 'tr') {
3213 if (is_in_table_scope('tr', NS_HTML)) {
3214 clear_stack_to_table_row_context()
3216 ins_mode = ins_mode_in_table_body
3222 if ((t.type === TYPE_START_TAG && (t.name === 'caption' || t.name === 'col' || t.name === 'colgroup' || t.name === 'tbody' || t.name === 'tfoot' || t.name === 'thead' || t.name === 'tr')) || t.type === TYPE_END_TAG && t.name === 'table') {
3223 if (is_in_table_scope('tr', NS_HTML)) {
3224 clear_stack_to_table_row_context()
3226 ins_mode = ins_mode_in_table_body
3233 if (t.type === TYPE_END_TAG && (t.name === 'tbody' || t.name === 'tfoot' || t.name === 'thead')) {
3234 if (is_in_table_scope(t.name, NS_HTML)) {
3235 if (is_in_table_scope('tr', NS_HTML)) {
3236 clear_stack_to_table_row_context()
3238 ins_mode = ins_mode_in_table_body
3246 if (t.type === TYPE_END_TAG && (t.name === 'body' || t.name === 'caption' || t.name === 'col' || t.name === 'colgroup' || t.name === 'html' || t.name === 'td' || t.name === 'th')) {
3251 ins_mode_in_table(t)
3254 // http://www.w3.org/TR/html5/syntax.html#close-the-cell
3255 close_the_cell = function () {
3257 generate_implied_end_tags()
3258 if (!((open_els[0].name === 'td' || open_els[0] === 'th') && open_els[0].namespace === NS_HTML)) {
3262 el = open_els.shift()
3263 if (el.namespace === NS_HTML && (el.name === 'td' || el.name === 'th')) {
3267 clear_afe_to_marker()
3268 ins_mode = ins_mode_in_row
3271 // 8.2.5.4.15 http://www.w3.org/TR/html5/syntax.html#parsing-main-intd
3272 ins_mode_in_cell = function (t) {
3274 if (t.type === TYPE_END_TAG && (t.name === 'td' || t.name === 'th')) {
3275 if (is_in_table_scope(t.name, NS_HTML)) {
3276 generate_implied_end_tags()
3277 if (!((open_els[0].name === t.name) && open_els[0].namespace === NS_HTML)) {
3281 el = open_els.shift()
3282 if (el.name === t.name && el.namespace === NS_HTML) {
3286 clear_afe_to_marker()
3287 ins_mode = ins_mode_in_row
3293 if (t.type === TYPE_START_TAG && (t.name === 'caption' || t.name === 'col' || t.name === 'colgroup' || t.name === 'tbody' || t.name === 'td' || t.name === 'tfoot' || t.name === 'th' || t.name === 'thead' || t.name === 'tr')) {
3295 for (i = 0; i < open_els.length; ++i) {
3297 if (el.namespace === NS_HTML && (el.name === 'td' || el.name === 'th')) {
3301 if (table_scopers[el.name] === el.namespace) {
3313 if (t.type === TYPE_END_TAG && (t.name === 'body' || t.name === 'caption' || t.name === 'col' || t.name === 'colgroup' || t.name === 'html')) {
3317 if (t.type === TYPE_END_TAG && (t.name === 'table' || t.name === 'tbody' || t.name === 'tfoot' || t.name === 'thead' || t.name === 'tr')) {
3318 if (is_in_table_scope(t.name, NS_HTML)) {
3330 // 8.2.5.4.16 http://www.w3.org/TR/html5/syntax.html#parsing-main-inselect
3331 ins_mode_in_select = function (t) {
3333 if (t.type === TYPE_TEXT && t.text === "\u0000") {
3337 if (t.type === TYPE_TEXT) {
3341 if (t.type === TYPE_COMMENT) {
3345 if (t.type === TYPE_DOCTYPE) {
3349 if (t.type === TYPE_START_TAG && t.name === 'html') {
3353 if (t.type === TYPE_START_TAG && t.name === 'option') {
3354 if (open_els[0].name === 'option' && open_els[0].namespace === NS_HTML) {
3357 insert_html_element(t)
3360 if (t.type === TYPE_START_TAG && t.name === 'optgroup') {
3361 if (open_els[0].name === 'option' && open_els[0].namespace === NS_HTML) {
3364 if (open_els[0].name === 'optgroup' && open_els[0].namespace === NS_HTML) {
3367 insert_html_element(t)
3370 if (t.type === TYPE_END_TAG && t.name === 'optgroup') {
3371 if (open_els[0].name === 'option' && open_els[0].namespace === NS_HTML) {
3372 if (open_els[1].name === 'optgroup' && open_els[0].namespace === NS_HTML) {
3376 if (open_els[0].name === 'optgroup' && open_els[0].namespace === NS_HTML) {
3383 if (t.type === TYPE_END_TAG && t.name === 'option') {
3384 if (open_els[0].name === 'option' && open_els[0].namespace === NS_HTML) {
3391 if (t.type === TYPE_END_TAG && t.name === 'select') {
3392 if (is_in_select_scope('select', NS_HTML)) {
3394 el = open_els.shift()
3395 if (el.name === 'select' && el.namespace === NS_HTML) {
3405 if (t.type === TYPE_START_TAG && t.name === 'select') {
3408 el = open_els.shift()
3409 if (el.name === 'select' && el.namespace === NS_HTML) {
3414 // spec says that this is the same as </select> but it doesn't say
3415 // to check scope first
3418 if (t.type === TYPE_START_TAG && (t.name === 'input' || t.name === 'keygen' || t.name === 'textarea')) {
3420 if (!is_in_select_scope('select', NS_HTML)) {
3424 el = open_els.shift()
3425 if (el.name === 'select' && el.namespace === NS_HTML) {
3433 if (t.type === TYPE_START_TAG && (t.name === 'script' || t.name === 'template')) {
3437 if (t.type === TYPE_EOF) {
3445 // 8.2.5.4.17 http://www.w3.org/TR/html5/syntax.html#parsing-main-inselectintable
3446 ins_mode_in_select_in_table = function (t) {
3448 if (t.type === TYPE_START_TAG && (t.name === 'caption' || t.name === 'table' || t.name === 'tbody' || t.name === 'tfoot' || t.name === 'thead' || t.name === 'tr' || t.name === 'td' || t.name === 'th')) {
3451 el = open_els.shift()
3452 if (el.name === 'select' && el.namespace === NS_HTML) {
3460 if (t.type === TYPE_END_TAG && (t.name === 'caption' || t.name === 'table' || t.name === 'tbody' || t.name === 'tfoot' || t.name === 'thead' || t.name === 'tr' || t.name === 'td' || t.name === 'th')) {
3462 if (!is_in_table_scope(t.name, NS_HTML)) {
3466 el = open_els.shift()
3467 if (el.name === 'select' && el.namespace === NS_HTML) {
3476 ins_mode_in_select(t)
3479 // 8.2.5.4.18 http://www.w3.org/TR/html5/syntax.html#parsing-main-intemplate
3480 ins_mode_in_template = function (t) {
3482 if (t.type === TYPE_TEXT || t.type === TYPE_COMMENT || t.type === TYPE_DOCTYPE) {
3486 if ((t.type === TYPE_START_TAG && (t.name === 'base' || t.name === 'basefont' || t.name === 'bgsound' || t.name === 'link' || t.name === 'meta' || t.name === 'noframes' || t.name === 'script' || t.name === 'style' || t.name === 'template' || t.name === 'title')) || (t.type === TYPE_END_TAG && t.name === 'template')) {
3490 if (t.type === TYPE_START_TAG && (t.name === 'caption' || t.name === 'colgroup' || t.name === 'tbody' || t.name === 'tfoot' || t.name === 'thead')) {
3491 template_ins_modes.shift()
3492 template_ins_modes.unshift(ins_mode_in_table)
3493 ins_mode = ins_mode_in_table
3497 if (t.type === TYPE_START_TAG && t.name === 'col') {
3498 template_ins_modes.shift()
3499 template_ins_modes.unshift(ins_mode_in_column_group)
3500 ins_mode = ins_mode_in_column_group
3504 if (t.type === TYPE_START_TAG && t.name === 'tr') {
3505 template_ins_modes.shift()
3506 template_ins_modes.unshift(ins_mode_in_table_body)
3507 ins_mode = ins_mode_in_table_body
3511 if (t.type === TYPE_START_TAG && (t.name === 'td' || t.name === 'th')) {
3512 template_ins_modes.shift()
3513 template_ins_modes.unshift(ins_mode_in_row)
3514 ins_mode = ins_mode_in_row
3518 if (t.type === TYPE_START_TAG) {
3519 template_ins_modes.shift()
3520 template_ins_modes.unshift(ins_mode_in_body)
3521 ins_mode = ins_mode_in_body
3525 if (t.type === TYPE_END_TAG) {
3529 if (t.type === TYPE_EOF) {
3530 if (!template_tag_is_open()) {
3536 el = open_els.shift()
3537 if (el.name === 'template' && el.namespace === NS_HTML) {
3541 clear_afe_to_marker()
3542 template_ins_modes.shift()
3548 // 8.2.5.4.19 http://www.w3.org/TR/html5/syntax.html#parsing-main-afterbody
3549 ins_mode_after_body = function (t) {
3551 if (is_space_tok(t)) {
3555 if (t.type === TYPE_COMMENT) {
3556 first = open_els[open_els.length - 1]
3557 insert_comment(t, [first, first.children.length])
3560 if (t.type === TYPE_DOCTYPE) {
3564 if (t.type === TYPE_START_TAG && t.name === 'html') {
3568 if (t.type === TYPE_END_TAG && t.name === 'html') {
3569 if (flag_fragment_parsing) {
3573 ins_mode = ins_mode_after_after_body
3576 if (t.type === TYPE_EOF) {
3582 ins_mode = ins_mode_in_body
3586 // 8.2.5.4.20 http://www.w3.org/TR/html5/syntax.html#parsing-main-inframeset
3587 ins_mode_in_frameset = function (t) {
3588 if (is_space_tok(t)) {
3592 if (t.type === TYPE_COMMENT) {
3596 if (t.type === TYPE_DOCTYPE) {
3600 if (t.type === TYPE_START_TAG && t.name === 'html') {
3604 if (t.type === TYPE_START_TAG && t.name === 'frameset') {
3605 insert_html_element(t)
3608 if (t.type === TYPE_END_TAG && t.name === 'frameset') {
3609 if (open_els.length === 1) {
3611 return // fragment case
3614 if (flag_fragment_parsing === false && open_els[0].name !== 'frameset') {
3615 ins_mode = ins_mode_after_frameset
3619 if (t.type === TYPE_START_TAG && t.name === 'frame') {
3620 insert_html_element(t)
3622 t.acknowledge_self_closing()
3625 if (t.type === TYPE_START_TAG && t.name === 'noframes') {
3629 if (t.type === TYPE_EOF) {
3630 if (open_els.length !== 1) {
3640 // 8.2.5.4.21 http://www.w3.org/TR/html5/syntax.html#parsing-main-afterframeset
3641 ins_mode_after_frameset = function (t) {
3642 if (is_space_tok(t)) {
3646 if (t.type === TYPE_COMMENT) {
3650 if (t.type === TYPE_DOCTYPE) {
3654 if (t.type === TYPE_START_TAG && t.name === 'html') {
3658 if (t.type === TYPE_END_TAG && t.name === 'html') {
3659 ins_mode = ins_mode_after_after_frameset
3662 if (t.type === TYPE_START_TAG && t.name === 'noframes') {
3666 if (t.type === TYPE_EOF) {
3674 // 8.2.5.4.22 http://www.w3.org/TR/html5/syntax.html#the-after-after-body-insertion-mode
3675 ins_mode_after_after_body = function (t) {
3676 if (t.type === TYPE_COMMENT) {
3677 insert_comment(t, [doc, doc.children.length])
3680 if (t.type === TYPE_DOCTYPE || is_space_tok(t) || (t.type === TYPE_START_TAG && t.name === 'html')) {
3684 if (t.type === TYPE_EOF) {
3690 ins_mode = ins_mode_in_body
3694 // 8.2.5.4.23 http://www.w3.org/TR/html5/syntax.html#the-after-after-frameset-insertion-mode
3695 ins_mode_after_after_frameset = function (t) {
3696 if (t.type === TYPE_COMMENT) {
3697 insert_comment(t, [doc, doc.children.length])
3700 if (t.type === TYPE_DOCTYPE || is_space_tok(t) || (t.type === TYPE_START_TAG && t.name === 'html')) {
3704 if (t.type === TYPE_EOF) {
3708 if (t.type === TYPE_START_TAG && t.name === 'noframes') {
3717 // 8.2.5.5 http://www.w3.org/TR/html5/syntax.html#parsing-main-inforeign
3718 has_color_face_or_size = function (t) {
3720 for (i = 0; i < t.attrs_a.length; ++i) {
3722 if (a[0] === 'color' || a[0] === 'face' || a[0] === 'size') {
3728 in_foreign_content_end_script = function () {
3732 in_foreign_content_other_start = function (t) {
3734 acn = adjusted_current_node()
3735 if (acn.namespace === NS_MATHML) {
3736 adjust_mathml_attributes(t)
3738 if (acn.namespace === NS_SVG && (svg_name_fixes[t.name] != null)) { // extra perens because Coffeescript did
3739 t.name = svg_name_fixes[t.name]
3741 if (acn.namespace === NS_SVG) {
3742 adjust_svg_attributes(t)
3744 adjust_foreign_attributes(t)
3745 insert_foreign_element(t, acn.namespace)
3746 if (t.flag('self-closing')) {
3747 if (t.name === 'script') {
3748 t.acknowledge_self_closing()
3749 in_foreign_content_end_script()
3753 t.acknowledge_self_closing()
3757 in_foreign_content = function (t) {
3759 if (t.type === TYPE_TEXT && t.text === "\u0000") {
3761 insert_character(new_character_token("\ufffd"))
3764 if (is_space_tok(t)) {
3768 if (t.type === TYPE_TEXT) {
3769 flag_frameset_ok = false
3773 if (t.type === TYPE_COMMENT) {
3777 if (t.type === TYPE_DOCTYPE) {
3781 if (t.type === TYPE_START_TAG && (t.name === 'b' || t.name === 'big' || t.name === 'blockquote' || t.name === 'body' || t.name === 'br' || t.name === 'center' || t.name === 'code' || t.name === 'dd' || t.name === 'div' || t.name === 'dl' || t.name === 'dt' || t.name === 'em' || t.name === 'embed' || t.name === 'h1' || t.name === 'h2' || t.name === 'h3' || t.name === 'h4' || t.name === 'h5' || t.name === 'h6' || t.name === 'head' || t.name === 'hr' || t.name === 'i' || t.name === 'img' || t.name === 'li' || t.name === 'listing' || t.name === 'main' || t.name === 'meta' || t.name === 'nobr' || t.name === 'ol' || t.name === 'p' || t.name === 'pre' || t.name === 'ruby' || t.name === 's' || t.name === 'small' || t.name === 'span' || t.name === 'strong' || t.name === 'strike' || t.name === 'sub' || t.name === 'sup' || t.name === 'table' || t.name === 'tt' || t.name === 'u' || t.name === 'ul' || t.name === 'var' || (t.name === 'font' && has_color_face_or_size(t)))) {
3783 if (flag_fragment_parsing) {
3784 in_foreign_content_other_start(t)
3787 while (true) { // is this safe?
3789 if (is_mathml_text_integration_point(open_els[0]) || is_html_integration(open_els[0]) || open_els[0].namespace === NS_HTML) {
3796 if (t.type === TYPE_START_TAG) {
3797 in_foreign_content_other_start(t)
3800 if (t.type === TYPE_END_TAG && t.name === 'script' && open_els[0].name === 'script' && open_els[0].namespace === NS_SVG) {
3801 in_foreign_content_end_script()
3804 if (t.type === TYPE_END_TAG) {
3807 if (node.name.toLowerCase() !== t.name) {
3811 if (node === open_els[open_els.length - 1]) {
3814 if (node.name.toLowerCase() === t.name) {
3816 el = open_els.shift()
3824 if (node.namespace === NS_HTML) {
3828 ins_mode(t) // explicitly call HTML insertion mode
3833 // 8.2.4.1 http://www.w3.org/TR/html5/syntax.html#data-state
3834 tok_state_data = function () {
3836 switch (c = txt.charAt(cur++)) {
3838 return new_text_node(parse_character_reference())
3841 tok_state = tok_state_tag_open
3845 return new_text_node(c)
3848 return new_eof_token()
3851 return new_text_node(c)
3856 // 8.2.4.2 http://www.w3.org/TR/html5/syntax.html#character-reference-in-data-state
3857 // not needed: tok_state_character_reference_in_data = function () {
3858 // just call parse_character_reference()
3860 // 8.2.4.3 http://www.w3.org/TR/html5/syntax.html#rcdata-state
3861 tok_state_rcdata = function () {
3863 switch (c = txt.charAt(cur++)) {
3865 return new_text_node(parse_character_reference())
3868 tok_state = tok_state_rcdata_less_than_sign
3872 return new_character_token("\ufffd")
3875 return new_eof_token()
3878 return new_character_token(c)
3883 // 8.2.4.4 http://www.w3.org/TR/html5/syntax.html#character-reference-in-rcdata-state
3884 // not needed: tok_state_character_reference_in_rcdata = function () {
3885 // just call parse_character_reference()
3887 // 8.2.4.5 http://www.w3.org/TR/html5/syntax.html#rawtext-state
3888 tok_state_rawtext = function () {
3890 switch (c = txt.charAt(cur++)) {
3892 tok_state = tok_state_rawtext_less_than_sign
3896 return new_character_token("\ufffd")
3899 return new_eof_token()
3902 return new_character_token(c)
3907 // 8.2.4.6 http://www.w3.org/TR/html5/syntax.html#script-data-state
3908 tok_state_script_data = function () {
3910 switch (c = txt.charAt(cur++)) {
3912 tok_state = tok_state_script_data_less_than_sign
3916 return new_character_token("\ufffd")
3919 return new_eof_token()
3922 return new_character_token(c)
3927 // 8.2.4.7 http://www.w3.org/TR/html5/syntax.html#plaintext-state
3928 tok_state_plaintext = function () {
3930 switch (c = txt.charAt(cur++)) {
3933 return new_character_token("\ufffd")
3936 return new_eof_token()
3939 return new_character_token(c)
3944 // 8.2.4.8 http://www.w3.org/TR/html5/syntax.html#tag-open-state
3945 tok_state_tag_open = function () {
3947 c = txt.charAt(cur++)
3949 tok_state = tok_state_markup_declaration_open
3953 tok_state = tok_state_end_tag_open
3956 if (is_uc_alpha(c)) {
3957 tok_cur_tag = new_open_tag(c.toLowerCase())
3958 tok_state = tok_state_tag_name
3961 if (is_lc_alpha(c)) {
3962 tok_cur_tag = new_open_tag(c)
3963 tok_state = tok_state_tag_name
3968 tok_cur_tag = new_comment_token('?') // FIXME right?
3969 tok_state = tok_state_bogus_comment
3974 tok_state = tok_state_data
3975 cur -= 1 // we didn't parse/handle the char after <
3976 return new_text_node('<')
3979 // 8.2.4.9 http://www.w3.org/TR/html5/syntax.html#end-tag-open-state
3980 tok_state_end_tag_open = function () {
3982 c = txt.charAt(cur++)
3983 if (is_uc_alpha(c)) {
3984 tok_cur_tag = new_end_tag(c.toLowerCase())
3985 tok_state = tok_state_tag_name
3988 if (is_lc_alpha(c)) {
3989 tok_cur_tag = new_end_tag(c)
3990 tok_state = tok_state_tag_name
3995 tok_state = tok_state_data
3998 if (c === '') { // EOF
4000 tok_state = tok_state_data
4001 return new_text_node('</')
4005 tok_cur_tag = new_comment_token(c)
4006 tok_state = tok_state_bogus_comment
4010 // 8.2.4.10 http://www.w3.org/TR/html5/syntax.html#tag-name-state
4011 tok_state_tag_name = function () {
4013 switch (c = txt.charAt(cur++)) {
4018 tok_state = tok_state_before_attribute_name
4021 tok_state = tok_state_self_closing_start_tag
4024 tok_state = tok_state_data
4031 tok_cur_tag.name += "\ufffd"
4035 tok_state = tok_state_data
4038 if (is_uc_alpha(c)) {
4039 tok_cur_tag.name += c.toLowerCase()
4041 tok_cur_tag.name += c
4047 // 8.2.4.11 http://www.w3.org/TR/html5/syntax.html#rcdata-less-than-sign-state
4048 tok_state_rcdata_less_than_sign = function () {
4050 c = txt.charAt(cur++)
4052 temporary_buffer = ''
4053 tok_state = tok_state_rcdata_end_tag_open
4057 tok_state = tok_state_rcdata
4058 cur -= 1 // reconsume the input character
4059 return new_character_token('<')
4062 // 8.2.4.12 http://www.w3.org/TR/html5/syntax.html#rcdata-end-tag-open-state
4063 tok_state_rcdata_end_tag_open = function () {
4065 c = txt.charAt(cur++)
4066 if (is_uc_alpha(c)) {
4067 tok_cur_tag = new_end_tag(c.toLowerCase())
4068 temporary_buffer += c
4069 tok_state = tok_state_rcdata_end_tag_name
4072 if (is_lc_alpha(c)) {
4073 tok_cur_tag = new_end_tag(c)
4074 temporary_buffer += c
4075 tok_state = tok_state_rcdata_end_tag_name
4079 tok_state = tok_state_rcdata
4080 cur -= 1 // reconsume the input character
4081 return new_character_token("</") // fixfull separate these
4084 // http://www.w3.org/TR/html5/syntax.html#appropriate-end-tag-token
4085 is_appropriate_end_tag = function (t) {
4086 // fixfull: this assumes that open_els[0].name is "the tag name of the last
4087 // start tag to have been emitted from this tokenizer"
4088 return t.type === TYPE_END_TAG && t.name === open_els[0].name
4091 // 8.2.4.13 http://www.w3.org/TR/html5/syntax.html#rcdata-end-tag-name-state
4092 tok_state_rcdata_end_tag_name = function () {
4094 c = txt.charAt(cur++)
4095 if (c === "\t" || c === "\n" || c === "\u000c" || c === ' ') {
4096 if (is_appropriate_end_tag(tok_cur_tag)) {
4097 tok_state = tok_state_before_attribute_name
4100 // else fall through to "Anything else"
4103 if (is_appropriate_end_tag(tok_cur_tag)) {
4104 tok_state = tok_state_self_closing_start_tag // FIXME spec typo?
4107 // else fall through to "Anything else"
4110 if (is_appropriate_end_tag(tok_cur_tag)) {
4111 tok_state = tok_state_data
4114 // else fall through to "Anything else"
4116 if (is_uc_alpha(c)) {
4117 tok_cur_tag.name += c.toLowerCase()
4118 temporary_buffer += c
4121 if (is_lc_alpha(c)) {
4122 tok_cur_tag.name += c
4123 temporary_buffer += c
4127 tok_state = tok_state_rcdata
4128 cur -= 1 // reconsume the input character
4129 return new_character_token('</' + temporary_buffer) // fixfull separate these
4132 // 8.2.4.14 http://www.w3.org/TR/html5/syntax.html#rawtext-less-than-sign-state
4133 tok_state_rawtext_less_than_sign = function () {
4135 c = txt.charAt(cur++)
4137 temporary_buffer = ''
4138 tok_state = tok_state_rawtext_end_tag_open
4142 tok_state = tok_state_rawtext
4143 cur -= 1 // reconsume the input character
4144 return new_character_token('<')
4147 // 8.2.4.15 http://www.w3.org/TR/html5/syntax.html#rawtext-end-tag-open-state
4148 tok_state_rawtext_end_tag_open = function () {
4149 c = txt.charAt(cur++)
4150 if (is_uc_alpha(c)) {
4151 tok_cur_tag = new_end_tag(c.toLowerCase())
4152 temporary_buffer += c
4153 tok_state = tok_state_rawtext_end_tag_name
4156 if (is_lc_alpha(c)) {
4157 tok_cur_tag = new_end_tag(c)
4158 temporary_buffer += c
4159 tok_state = tok_state_rawtext_end_tag_name
4163 tok_state = tok_state_rawtext
4164 cur -= 1 // reconsume the input character
4165 return new_character_token("</") // fixfull separate these
4168 // 8.2.4.16 http://www.w3.org/TR/html5/syntax.html#rawtext-end-tag-name-state
4169 tok_state_rawtext_end_tag_name = function () {
4171 c = txt.charAt(cur++)
4172 if (c === "\t" || c === "\n" || c === "\u000c" || c === ' ') {
4173 if (is_appropriate_end_tag(tok_cur_tag)) {
4174 tok_state = tok_state_before_attribute_name
4177 // else fall through to "Anything else"
4180 if (is_appropriate_end_tag(tok_cur_tag)) {
4181 tok_state = tok_state_self_closing_start_tag
4184 // else fall through to "Anything else"
4187 if (is_appropriate_end_tag(tok_cur_tag)) {
4188 tok_state = tok_state_data
4191 // else fall through to "Anything else"
4193 if (is_uc_alpha(c)) {
4194 tok_cur_tag.name += c.toLowerCase()
4195 temporary_buffer += c
4198 if (is_lc_alpha(c)) {
4199 tok_cur_tag.name += c
4200 temporary_buffer += c
4204 tok_state = tok_state_rawtext
4205 cur -= 1 // reconsume the input character
4206 return new_character_token('</' + temporary_buffer) // fixfull separate these
4209 // 8.2.4.17 http://www.w3.org/TR/html5/syntax.html#script-data-less-than-sign-state
4210 tok_state_script_data_less_than_sign = function () {
4212 c = txt.charAt(cur++)
4214 temporary_buffer = ''
4215 tok_state = tok_state_script_data_end_tag_open
4219 tok_state = tok_state_script_data_escape_start
4220 return new_character_token('<!') // fixfull split
4223 tok_state = tok_state_script_data
4224 cur -= 1 // reconsume
4225 return new_character_token('<')
4228 // 8.2.4.18 http://www.w3.org/TR/html5/syntax.html#script-data-end-tag-open-state
4229 tok_state_script_data_end_tag_open = function () {
4231 c = txt.charAt(cur++)
4232 if (is_uc_alpha(c)) {
4233 tok_cur_tag = new_end_tag(c.toLowerCase())
4234 temporary_buffer += c
4235 tok_state = tok_state_script_data_end_tag_name
4238 if (is_lc_alpha(c)) {
4239 tok_cur_tag = new_end_tag(c)
4240 temporary_buffer += c
4241 tok_state = tok_state_script_data_end_tag_name
4245 tok_state = tok_state_script_data
4246 cur -= 1 // reconsume
4247 return new_character_token('</')
4250 // 8.2.4.19 http://www.w3.org/TR/html5/syntax.html#script-data-end-tag-open-state
4251 tok_state_script_data_end_tag_name = function () {
4253 c = txt.charAt(cur++)
4254 if (c === "\t" || c === "\n" || c === "\u000c" || c === ' ') {
4255 if (is_appropriate_end_tag(tok_cur_tag)) {
4256 tok_state = tok_state_before_attribute_name
4262 if (is_appropriate_end_tag(tok_cur_tag)) {
4263 tok_state = tok_state_self_closing_start_tag
4269 if (is_appropriate_end_tag(tok_cur_tag)) {
4270 tok_state = tok_state_data
4275 if (is_uc_alpha(c)) {
4276 tok_cur_tag.name += c.toLowerCase()
4277 temporary_buffer += c
4280 if (is_lc_alpha(c)) {
4281 tok_cur_tag.name += c
4282 temporary_buffer += c
4286 tok_state = tok_state_script_data
4287 cur -= 1 // reconsume
4288 return new_character_token("</" + temporary_buffer) // fixfull split
4291 // 8.2.4.20 http://www.w3.org/TR/html5/syntax.html#script-data-escape-start-state
4292 tok_state_script_data_escape_start = function () {
4294 c = txt.charAt(cur++)
4296 tok_state = tok_state_script_data_escape_start_dash
4297 return new_character_token('-')
4300 tok_state = tok_state_script_data
4301 cur -= 1 // reconsume
4304 // 8.2.4.21 http://www.w3.org/TR/html5/syntax.html#script-data-escape-start-dash-state
4305 tok_state_script_data_escape_start_dash = function () {
4307 c = txt.charAt(cur++)
4309 tok_state = tok_state_script_data_escaped_dash_dash
4310 return new_character_token('-')
4313 tok_state = tok_state_script_data
4314 cur -= 1 // reconsume
4317 // 8.2.4.22 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-state
4318 tok_state_script_data_escaped = function () {
4320 c = txt.charAt(cur++)
4322 tok_state = tok_state_script_data_escaped_dash
4323 return new_character_token('-')
4326 tok_state = tok_state_script_data_escaped_less_than_sign
4329 if (c === "\u0000") {
4331 return new_character_token("\ufffd")
4333 if (c === '') { // EOF
4334 tok_state = tok_state_data
4336 cur -= 1 // reconsume
4340 return new_character_token(c)
4343 // 8.2.4.23 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-dash-state
4344 tok_state_script_data_escaped_dash = function () {
4346 c = txt.charAt(cur++)
4348 tok_state = tok_state_script_data_escaped_dash_dash
4349 return new_character_token('-')
4352 tok_state = tok_state_script_data_escaped_less_than_sign
4355 if (c === "\u0000") {
4357 tok_state = tok_state_script_data_escaped
4358 return new_character_token("\ufffd")
4360 if (c === '') { // EOF
4361 tok_state = tok_state_data
4363 cur -= 1 // reconsume
4367 tok_state = tok_state_script_data_escaped
4368 return new_character_token(c)
4371 // 8.2.4.24 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-dash-dash-state
4372 tok_state_script_data_escaped_dash_dash = function () {
4374 c = txt.charAt(cur++)
4376 return new_character_token('-')
4379 tok_state = tok_state_script_data_escaped_less_than_sign
4383 tok_state = tok_state_script_data
4384 return new_character_token('>')
4386 if (c === "\u0000") {
4388 tok_state = tok_state_script_data_escaped
4389 return new_character_token("\ufffd")
4391 if (c === '') { // EOF
4393 tok_state = tok_state_data
4394 cur -= 1 // reconsume
4398 tok_state = tok_state_script_data_escaped
4399 return new_character_token(c)
4402 // 8.2.4.25 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-less-than-sign-state
4403 tok_state_script_data_escaped_less_than_sign = function () {
4405 c = txt.charAt(cur++)
4407 temporary_buffer = ''
4408 tok_state = tok_state_script_data_escaped_end_tag_open
4411 if (is_uc_alpha(c)) {
4412 temporary_buffer = c.toLowerCase() // yes, really
4413 tok_state = tok_state_script_data_double_escape_start
4414 return new_character_token("<" + c) // fixfull split
4416 if (is_lc_alpha(c)) {
4417 temporary_buffer = c
4418 tok_state = tok_state_script_data_double_escape_start
4419 return new_character_token("<" + c) // fixfull split
4422 tok_state = tok_state_script_data_escaped
4423 cur -= 1 // reconsume
4424 return new_character_token('<')
4427 // 8.2.4.26 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-end-tag-open-state
4428 tok_state_script_data_escaped_end_tag_open = function () {
4430 c = txt.charAt(cur++)
4431 if (is_uc_alpha(c)) {
4432 tok_cur_tag = new_end_tag(c.toLowerCase())
4433 temporary_buffer += c
4434 tok_state = tok_state_script_data_escaped_end_tag_name
4437 if (is_lc_alpha(c)) {
4438 tok_cur_tag = new_end_tag(c)
4439 temporary_buffer += c
4440 tok_state = tok_state_script_data_escaped_end_tag_name
4444 tok_state = tok_state_script_data_escaped
4445 cur -= 1 // reconsume
4446 return new_character_token('</') // fixfull split
4449 // 8.2.4.27 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-end-tag-name-state
4450 tok_state_script_data_escaped_end_tag_name = function () {
4452 c = txt.charAt(cur++)
4453 if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ') {
4454 if (is_appropriate_end_tag(tok_cur_tag)) {
4455 tok_state = tok_state_before_attribute_name
4461 if (is_appropriate_end_tag(tok_cur_tag)) {
4462 tok_state = tok_state_self_closing_start_tag
4468 if (is_appropriate_end_tag(tok_cur_tag)) {
4469 tok_state = tok_state_data
4474 if (is_uc_alpha(c)) {
4475 tok_cur_tag.name += c.toLowerCase()
4476 temporary_buffer += c.toLowerCase()
4479 if (is_lc_alpha(c)) {
4480 tok_cur_tag.name += c
4481 temporary_buffer += c.toLowerCase()
4485 tok_state = tok_state_script_data_escaped
4486 cur -= 1 // reconsume
4487 return new_character_token("</" + temporary_buffer) // fixfull split
4490 // 8.2.4.28 http://www.w3.org/TR/html5/syntax.html#script-data-double-escape-start-state
4491 tok_state_script_data_double_escape_start = function () {
4493 c = txt.charAt(cur++)
4494 if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ' || c === '/' || c === '>') {
4495 if (temporary_buffer === 'script') {
4496 tok_state = tok_state_script_data_double_escaped
4498 tok_state = tok_state_script_data_escaped
4500 return new_character_token(c)
4502 if (is_uc_alpha(c)) {
4503 temporary_buffer += c.toLowerCase() // yes, really lowercase
4504 return new_character_token(c)
4506 if (is_lc_alpha(c)) {
4507 temporary_buffer += c
4508 return new_character_token(c)
4511 tok_state = tok_state_script_data_escaped
4512 cur -= 1 // reconsume
4515 // 8.2.4.29 http://www.w3.org/TR/html5/syntax.html#script-data-double-escaped-state
4516 tok_state_script_data_double_escaped = function () {
4518 c = txt.charAt(cur++)
4520 tok_state = tok_state_script_data_double_escaped_dash
4521 return new_character_token('-')
4524 tok_state = tok_state_script_data_double_escaped_less_than_sign
4525 return new_character_token('<')
4527 if (c === "\u0000") {
4529 return new_character_token("\ufffd")
4531 if (c === '') { // EOF
4533 tok_state = tok_state_data
4534 cur -= 1 // reconsume
4538 return new_character_token(c)
4541 // 8.2.4.30 http://www.w3.org/TR/html5/syntax.html#script-data-double-escaped-dash-state
4542 tok_state_script_data_double_escaped_dash = function () {
4544 c = txt.charAt(cur++)
4546 tok_state = tok_state_script_data_double_escaped_dash_dash
4547 return new_character_token('-')
4550 tok_state = tok_state_script_data_double_escaped_less_than_sign
4551 return new_character_token('<')
4553 if (c === "\u0000") {
4555 tok_state = tok_state_script_data_double_escaped
4556 return new_character_token("\ufffd")
4558 if (c === '') { // EOF
4560 tok_state = tok_state_data
4561 cur -= 1 // reconsume
4565 tok_state = tok_state_script_data_double_escaped
4566 return new_character_token(c)
4569 // 8.2.4.31 http://www.w3.org/TR/html5/syntax.html#script-data-double-escaped-dash-dash-state
4570 tok_state_script_data_double_escaped_dash_dash = function () {
4572 c = txt.charAt(cur++)
4574 return new_character_token('-')
4577 tok_state = tok_state_script_data_double_escaped_less_than_sign
4578 return new_character_token('<')
4581 tok_state = tok_state_script_data
4582 return new_character_token('>')
4584 if (c === "\u0000") {
4586 tok_state = tok_state_script_data_double_escaped
4587 return new_character_token("\ufffd")
4589 if (c === '') { // EOF
4591 tok_state = tok_state_data
4592 cur -= 1 // reconsume
4596 tok_state = tok_state_script_data_double_escaped
4597 return new_character_token(c)
4600 // 8.2.4.32 http://www.w3.org/TR/html5/syntax.html#script-data-double-escaped-less-than-sign-state
4601 tok_state_script_data_double_escaped_less_than_sign = function () {
4603 c = txt.charAt(cur++)
4605 temporary_buffer = ''
4606 tok_state = tok_state_script_data_double_escape_end
4607 return new_character_token('/')
4610 tok_state = tok_state_script_data_double_escaped
4611 cur -= 1 // reconsume
4614 // 8.2.4.33 http://www.w3.org/TR/html5/syntax.html#script-data-double-escape-end-state
4615 tok_state_script_data_double_escape_end = function () {
4617 c = txt.charAt(cur++)
4618 if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ' || c === '/' || c === '>') {
4619 if (temporary_buffer === 'script') {
4620 tok_state = tok_state_script_data_escaped
4622 tok_state = tok_state_script_data_double_escaped
4624 return new_character_token(c)
4626 if (is_uc_alpha(c)) {
4627 temporary_buffer += c.toLowerCase() // yes, really lowercase
4628 return new_character_token(c)
4630 if (is_lc_alpha(c)) {
4631 temporary_buffer += c
4632 return new_character_token(c)
4635 tok_state = tok_state_script_data_double_escaped
4636 cur -= 1 // reconsume
4639 // 8.2.4.34 http://www.w3.org/TR/html5/syntax.html#before-attribute-name-state
4640 tok_state_before_attribute_name = function () {
4641 var attr_name, c, tmp
4643 switch (c = txt.charAt(cur++)) {
4651 tok_state = tok_state_self_closing_start_tag
4655 tok_state = tok_state_data
4662 attr_name = "\ufffd"
4673 tok_state = tok_state_data
4676 if (is_uc_alpha(c)) {
4677 attr_name = c.toLowerCase()
4682 if (attr_name != null) {
4683 tok_cur_tag.attrs_a.unshift([attr_name, ''])
4684 tok_state = tok_state_attribute_name
4689 // 8.2.4.35 http://www.w3.org/TR/html5/syntax.html#attribute-name-state
4690 tok_state_attribute_name = function () {
4692 switch (c = txt.charAt(cur++)) {
4697 tok_state = tok_state_after_attribute_name
4700 tok_state = tok_state_self_closing_start_tag
4703 tok_state = tok_state_before_attribute_value
4706 tok_state = tok_state_data
4713 tok_cur_tag.attrs_a[0][0] += "\ufffd"
4719 tok_cur_tag.attrs_a[0][0] += c
4723 tok_state = tok_state_data
4726 if (is_uc_alpha(c)) {
4727 tok_cur_tag.attrs_a[0][0] += c.toLowerCase()
4729 tok_cur_tag.attrs_a[0][0] += c
4735 // 8.2.4.36 http://www.w3.org/TR/html5/syntax.html#after-attribute-name-state
4736 tok_state_after_attribute_name = function () {
4738 c = txt.charAt(cur++)
4739 if (c === "\t" || c === "\n" || c === "\u000c" || c === ' ') {
4743 tok_state = tok_state_self_closing_start_tag
4747 tok_state = tok_state_before_attribute_value
4751 tok_state = tok_state_data
4754 if (is_uc_alpha(c)) {
4755 tok_cur_tag.attrs_a.unshift([c.toLowerCase(), ''])
4756 tok_state = tok_state_attribute_name
4759 if (c === "\u0000") {
4761 tok_cur_tag.attrs_a.unshift(["\ufffd", ''])
4762 tok_state = tok_state_attribute_name
4765 if (c === '') { // EOF
4767 tok_state = tok_state_data
4768 cur -= 1 // reconsume
4771 if (c === '"' || c === "'" || c === '<') {
4773 // fall through to Anything else
4776 tok_cur_tag.attrs_a.unshift([c, ''])
4777 tok_state = tok_state_attribute_name
4780 // 8.2.4.37 http://www.w3.org/TR/html5/syntax.html#before-attribute-value-state
4781 tok_state_before_attribute_value = function () {
4783 switch (c = txt.charAt(cur++)) {
4791 tok_state = tok_state_attribute_value_double_quoted
4794 tok_state = tok_state_attribute_value_unquoted
4798 tok_state = tok_state_attribute_value_single_quoted
4802 tok_cur_tag.attrs_a[0][1] += "\ufffd"
4803 tok_state = tok_state_attribute_value_unquoted
4807 tok_state = tok_state_data
4814 tok_state = tok_state_data
4817 tok_cur_tag.attrs_a[0][1] += c
4818 tok_state = tok_state_attribute_value_unquoted
4823 // 8.2.4.38 http://www.w3.org/TR/html5/syntax.html#attribute-value-(double-quoted)-state
4824 tok_state_attribute_value_double_quoted = function () {
4826 switch (c = txt.charAt(cur++)) {
4828 tok_state = tok_state_after_attribute_value_quoted
4831 tok_cur_tag.attrs_a[0][1] += parse_character_reference('"', true)
4835 tok_cur_tag.attrs_a[0][1] += "\ufffd"
4839 tok_state = tok_state_data
4842 tok_cur_tag.attrs_a[0][1] += c
4847 // 8.2.4.39 http://www.w3.org/TR/html5/syntax.html#attribute-value-(single-quoted)-state
4848 tok_state_attribute_value_single_quoted = function () {
4850 switch (c = txt.charAt(cur++)) {
4852 tok_state = tok_state_after_attribute_value_quoted
4855 tok_cur_tag.attrs_a[0][1] += parse_character_reference("'", true)
4859 tok_cur_tag.attrs_a[0][1] += "\ufffd"
4863 tok_state = tok_state_data
4866 tok_cur_tag.attrs_a[0][1] += c
4871 // 8.2.4.40 http://www.w3.org/TR/html5/syntax.html#attribute-value-(unquoted)-state
4872 tok_state_attribute_value_unquoted = function () {
4874 switch (c = txt.charAt(cur++)) {
4879 tok_state = tok_state_before_attribute_name
4882 tok_cur_tag.attrs_a[0][1] += parse_character_reference('>', true)
4885 tok_state = tok_state_data
4891 tok_cur_tag.attrs_a[0][1] += "\ufffd"
4895 tok_state = tok_state_data
4898 // Parse Error if ', <, = or ` (backtick)
4899 tok_cur_tag.attrs_a[0][1] += c
4904 // 8.2.4.42 http://www.w3.org/TR/html5/syntax.html#after-attribute-value-(quoted)-state
4905 tok_state_after_attribute_value_quoted = function () {
4907 switch (c = txt.charAt(cur++)) {
4912 tok_state = tok_state_before_attribute_name
4915 tok_state = tok_state_self_closing_start_tag
4918 tok_state = tok_state_data
4925 tok_state = tok_state_data
4929 tok_state = tok_state_before_attribute_name
4930 cur -= 1 // we didn't handle that char
4935 // 8.2.4.43 http://www.w3.org/TR/html5/syntax.html#self-closing-start-tag-state
4936 tok_state_self_closing_start_tag = function () {
4938 c = txt.charAt(cur++)
4940 tok_cur_tag.flag('self-closing', true)
4941 tok_state = tok_state_data
4946 tok_state = tok_state_data
4947 cur -= 1 // reconsume
4952 tok_state = tok_state_before_attribute_name
4953 cur -= 1 // reconsume
4956 // 8.2.4.44 http://www.w3.org/TR/html5/syntax.html#bogus-comment-state
4957 // WARNING: put a comment token in tok_cur_tag before setting this state
4958 tok_state_bogus_comment = function () {
4960 next_gt = txt.indexOf('>', cur)
4961 if (next_gt === -1) {
4962 val = txt.substr(cur)
4965 val = txt.substr(cur, next_gt - cur)
4968 val = val.replace(new RegExp("\u0000", 'g'), "\ufffd")
4969 tok_cur_tag.text += val
4970 tok_state = tok_state_data
4974 // 8.2.4.45 http://www.w3.org/TR/html5/syntax.html#markup-declaration-open-state
4975 tok_state_markup_declaration_open = function () {
4977 if (txt.substr(cur, 2) === '--') {
4979 tok_cur_tag = new_comment_token('')
4980 tok_state = tok_state_comment_start
4983 if (txt.substr(cur, 7).toLowerCase() === 'doctype') {
4985 tok_state = tok_state_doctype
4988 acn = adjusted_current_node()
4989 if (acn && acn.namespace !== NS_HTML && txt.substr(cur, 7) === '[CDATA[') {
4991 tok_state = tok_state_cdata_section
4996 tok_cur_tag = new_comment_token('')
4997 tok_state = tok_state_bogus_comment
5000 // 8.2.4.46 http://www.w3.org/TR/html5/syntax.html#comment-start-state
5001 tok_state_comment_start = function () {
5003 switch (c = txt.charAt(cur++)) {
5005 tok_state = tok_state_comment_start_dash
5009 tok_state = tok_state_comment
5010 return new_character_token("\ufffd")
5014 tok_state = tok_state_data
5019 tok_state = tok_state_data
5020 cur -= 1 // reconsume
5024 tok_cur_tag.text += c
5025 tok_state = tok_state_comment
5030 // 8.2.4.47 http://www.w3.org/TR/html5/syntax.html#comment-start-dash-state
5031 tok_state_comment_start_dash = function () {
5033 switch (c = txt.charAt(cur++)) {
5035 tok_state = tok_state_comment_end
5039 tok_cur_tag.text += "-\ufffd"
5040 tok_state = tok_state_comment
5044 tok_state = tok_state_data
5049 tok_state = tok_state_data
5050 cur -= 1 // reconsume
5054 tok_cur_tag.text += "-" + c
5055 tok_state = tok_state_comment
5060 // 8.2.4.48 http://www.w3.org/TR/html5/syntax.html#comment-state
5061 tok_state_comment = function () {
5063 switch (c = txt.charAt(cur++)) {
5065 tok_state = tok_state_comment_end_dash
5069 tok_cur_tag.text += "\ufffd"
5073 tok_state = tok_state_data
5074 cur -= 1 // reconsume
5078 tok_cur_tag.text += c
5083 // 8.2.4.49 http://www.w3.org/TR/html5/syntax.html#comment-end-dash-state
5084 tok_state_comment_end_dash = function () {
5086 switch (c = txt.charAt(cur++)) {
5088 tok_state = tok_state_comment_end
5092 tok_cur_tag.text += "-\ufffd"
5093 tok_state = tok_state_comment
5097 tok_state = tok_state_data
5098 cur -= 1 // reconsume
5102 tok_cur_tag.text += "-" + c
5103 tok_state = tok_state_comment
5108 // 8.2.4.50 http://www.w3.org/TR/html5/syntax.html#comment-end-state
5109 tok_state_comment_end = function () {
5111 switch (c = txt.charAt(cur++)) {
5113 tok_state = tok_state_data
5118 tok_cur_tag.text += "--\ufffd"
5119 tok_state = tok_state_comment
5123 tok_state = tok_state_comment_end_bang
5127 tok_cur_tag.text += '-'
5131 tok_state = tok_state_data
5132 cur -= 1 // reconsume
5137 tok_cur_tag.text += "--" + c
5138 tok_state = tok_state_comment
5143 // 8.2.4.51 http://www.w3.org/TR/html5/syntax.html#comment-end-bang-state
5144 tok_state_comment_end_bang = function () {
5146 switch (c = txt.charAt(cur++)) {
5148 tok_cur_tag.text += "--!" + c
5149 tok_state = tok_state_comment_end_dash
5152 tok_state = tok_state_data
5157 tok_cur_tag.text += "--!\ufffd"
5158 tok_state = tok_state_comment
5162 tok_state = tok_state_data
5163 cur -= 1 // reconsume
5167 tok_cur_tag.text += "--!" + c
5168 tok_state = tok_state_comment
5173 // 8.2.4.52 http://www.w3.org/TR/html5/syntax.html#doctype-state
5174 tok_state_doctype = function () {
5176 switch (c = txt.charAt(cur++)) {
5181 tok_state = tok_state_before_doctype_name
5185 tok_state = tok_state_data
5186 el = new_doctype_token('')
5187 el.flag('force-quirks', true)
5188 cur -= 1 // reconsume
5193 tok_state = tok_state_before_doctype_name
5194 cur -= 1 // reconsume
5199 // 8.2.4.52 http://www.w3.org/TR/html5/syntax.html#doctype-state
5200 tok_state_before_doctype_name = function () {
5202 c = txt.charAt(cur++)
5203 if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ') {
5206 if (is_uc_alpha(c)) {
5207 tok_cur_tag = new_doctype_token(c.toLowerCase())
5208 tok_state = tok_state_doctype_name
5211 if (c === "\u0000") {
5213 tok_cur_tag = new_doctype_token("\ufffd")
5214 tok_state = tok_state_doctype_name
5219 el = new_doctype_token('')
5220 el.flag('force-quirks', true)
5221 tok_state = tok_state_data
5224 if (c === '') { // EOF
5226 tok_state = tok_state_data
5227 el = new_doctype_token('')
5228 el.flag('force-quirks', true)
5229 cur -= 1 // reconsume
5233 tok_cur_tag = new_doctype_token(c)
5234 tok_state = tok_state_doctype_name
5238 // 8.2.4.54 http://www.w3.org/TR/html5/syntax.html#doctype-name-state
5239 tok_state_doctype_name = function () {
5241 c = txt.charAt(cur++)
5242 if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ') {
5243 tok_state = tok_state_after_doctype_name
5247 tok_state = tok_state_data
5250 if (is_uc_alpha(c)) {
5251 tok_cur_tag.name += c.toLowerCase()
5254 if (c === "\u0000") {
5256 tok_cur_tag.name += "\ufffd"
5259 if (c === '') { // EOF
5261 tok_state = tok_state_data
5262 tok_cur_tag.flag('force-quirks', true)
5263 cur -= 1 // reconsume
5267 tok_cur_tag.name += c
5271 // 8.2.4.55 http://www.w3.org/TR/html5/syntax.html#after-doctype-name-state
5272 tok_state_after_doctype_name = function () {
5274 c = txt.charAt(cur++)
5275 if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ') {
5279 tok_state = tok_state_data
5282 if (c === '') { // EOF
5284 tok_state = tok_state_data
5285 tok_cur_tag.flag('force-quirks', true)
5286 cur -= 1 // reconsume
5290 if (txt.substr(cur - 1, 6).toLowerCase() === 'public') {
5292 tok_state = tok_state_after_doctype_public_keyword
5295 if (txt.substr(cur - 1, 6).toLowerCase() === 'system') {
5297 tok_state = tok_state_after_doctype_system_keyword
5301 tok_cur_tag.flag('force-quirks', true)
5302 tok_state = tok_state_bogus_doctype
5306 // 8.2.4.56 http://www.w3.org/TR/html5/syntax.html#after-doctype-public-keyword-state
5307 tok_state_after_doctype_public_keyword = function () {
5309 c = txt.charAt(cur++)
5310 if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ') {
5311 tok_state = tok_state_before_doctype_public_identifier
5316 tok_cur_tag.public_identifier = ''
5317 tok_state = tok_state_doctype_public_identifier_double_quoted
5322 tok_cur_tag.public_identifier = ''
5323 tok_state = tok_state_doctype_public_identifier_single_quoted
5328 tok_cur_tag.flag('force-quirks', true)
5329 tok_state = tok_state_data
5332 if (c === '') { // EOF
5334 tok_state = tok_state_data
5335 tok_cur_tag.flag('force-quirks', true)
5336 cur -= 1 // reconsume
5341 tok_cur_tag.flag('force-quirks', true)
5342 tok_state = tok_state_bogus_doctype
5346 // 8.2.4.57 http://www.w3.org/TR/html5/syntax.html#before-doctype-public-identifier-state
5347 tok_state_before_doctype_public_identifier = function () {
5349 c = txt.charAt(cur++)
5350 if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ') {
5355 tok_cur_tag.public_identifier = ''
5356 tok_state = tok_state_doctype_public_identifier_double_quoted
5361 tok_cur_tag.public_identifier = ''
5362 tok_state = tok_state_doctype_public_identifier_single_quoted
5367 tok_cur_tag.flag('force-quirks', true)
5368 tok_state = tok_state_data
5371 if (c === '') { // EOF
5373 tok_state = tok_state_data
5374 tok_cur_tag.flag('force-quirks', true)
5375 cur -= 1 // reconsume
5380 tok_cur_tag.flag('force-quirks', true)
5381 tok_state = tok_state_bogus_doctype
5386 // 8.2.4.58 http://www.w3.org/TR/html5/syntax.html#doctype-public-identifier-(double-quoted)-state
5387 tok_state_doctype_public_identifier_double_quoted = function () {
5389 c = txt.charAt(cur++)
5391 tok_state = tok_state_after_doctype_public_identifier
5394 if (c === "\u0000") {
5396 tok_cur_tag.public_identifier += "\ufffd"
5401 tok_cur_tag.flag('force-quirks', true)
5402 tok_state = tok_state_data
5405 if (c === '') { // EOF
5407 tok_state = tok_state_data
5408 tok_cur_tag.flag('force-quirks', true)
5409 cur -= 1 // reconsume
5413 tok_cur_tag.public_identifier += c
5417 // 8.2.4.59 http://www.w3.org/TR/html5/syntax.html#doctype-public-identifier-(single-quoted)-state
5418 tok_state_doctype_public_identifier_single_quoted = function () {
5420 c = txt.charAt(cur++)
5422 tok_state = tok_state_after_doctype_public_identifier
5425 if (c === "\u0000") {
5427 tok_cur_tag.public_identifier += "\ufffd"
5432 tok_cur_tag.flag('force-quirks', true)
5433 tok_state = tok_state_data
5436 if (c === '') { // EOF
5438 tok_state = tok_state_data
5439 tok_cur_tag.flag('force-quirks', true)
5440 cur -= 1 // reconsume
5444 tok_cur_tag.public_identifier += c
5448 // 8.2.4.60 http://www.w3.org/TR/html5/syntax.html#after-doctype-public-identifier-state
5449 tok_state_after_doctype_public_identifier = function () {
5451 c = txt.charAt(cur++)
5452 if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ') {
5453 tok_state = tok_state_between_doctype_public_and_system_identifiers
5457 tok_state = tok_state_data
5462 tok_cur_tag.system_identifier = ''
5463 tok_state = tok_state_doctype_system_identifier_double_quoted
5468 tok_cur_tag.system_identifier = ''
5469 tok_state = tok_state_doctype_system_identifier_single_quoted
5472 if (c === '') { // EOF
5474 tok_state = tok_state_data
5475 tok_cur_tag.flag('force-quirks', true)
5476 cur -= 1 // reconsume
5481 tok_cur_tag.flag('force-quirks', true)
5482 tok_state = tok_state_bogus_doctype
5486 // 8.2.4.61 http://www.w3.org/TR/html5/syntax.html#between-doctype-public-&&-system-identifiers-state
5487 tok_state_between_doctype_public_and_system_identifiers = function () {
5489 c = txt.charAt(cur++)
5490 if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ') {
5494 tok_state = tok_state_data
5499 tok_cur_tag.system_identifier = ''
5500 tok_state = tok_state_doctype_system_identifier_double_quoted
5505 tok_cur_tag.system_identifier = ''
5506 tok_state = tok_state_doctype_system_identifier_single_quoted
5509 if (c === '') { // EOF
5511 tok_state = tok_state_data
5512 tok_cur_tag.flag('force-quirks', true)
5513 cur -= 1 // reconsume
5518 tok_cur_tag.flag('force-quirks', true)
5519 tok_state = tok_state_bogus_doctype
5523 // 8.2.4.62 http://www.w3.org/TR/html5/syntax.html#after-doctype-system-keyword-state
5524 tok_state_after_doctype_system_keyword = function () {
5526 c = txt.charAt(cur++)
5527 if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ') {
5528 tok_state = tok_state_before_doctype_system_identifier
5533 tok_cur_tag.system_identifier = ''
5534 tok_state = tok_state_doctype_system_identifier_double_quoted
5539 tok_cur_tag.system_identifier = ''
5540 tok_state = tok_state_doctype_system_identifier_single_quoted
5545 tok_cur_tag.flag('force-quirks', true)
5546 tok_state = tok_state_data
5549 if (c === '') { // EOF
5551 tok_state = tok_state_data
5552 tok_cur_tag.flag('force-quirks', true)
5553 cur -= 1 // reconsume
5558 tok_cur_tag.flag('force-quirks', true)
5559 tok_state = tok_state_bogus_doctype
5563 // 8.2.4.63 http://www.w3.org/TR/html5/syntax.html#before-doctype-system-identifier-state
5564 tok_state_before_doctype_system_identifier = function () {
5566 c = txt.charAt(cur++)
5567 if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ') {
5571 tok_cur_tag.system_identifier = ''
5572 tok_state = tok_state_doctype_system_identifier_double_quoted
5576 tok_cur_tag.system_identifier = ''
5577 tok_state = tok_state_doctype_system_identifier_single_quoted
5582 tok_cur_tag.flag('force-quirks', true)
5583 tok_state = tok_state_data
5586 if (c === '') { // EOF
5588 tok_state = tok_state_data
5589 tok_cur_tag.flag('force-quirks', true)
5590 cur -= 1 // reconsume
5595 tok_cur_tag.flag('force-quirks', true)
5596 tok_state = tok_state_bogus_doctype
5600 // 8.2.4.64 http://www.w3.org/TR/html5/syntax.html#doctype-system-identifier-(double-quoted)-state
5601 tok_state_doctype_system_identifier_double_quoted = function () {
5603 c = txt.charAt(cur++)
5605 tok_state = tok_state_after_doctype_system_identifier
5608 if (c === "\u0000") {
5610 tok_cur_tag.system_identifier += "\ufffd"
5615 tok_cur_tag.flag('force-quirks', true)
5616 tok_state = tok_state_data
5619 if (c === '') { // EOF
5621 tok_state = tok_state_data
5622 tok_cur_tag.flag('force-quirks', true)
5623 cur -= 1 // reconsume
5627 tok_cur_tag.system_identifier += c
5631 // 8.2.4.65 http://www.w3.org/TR/html5/syntax.html#doctype-system-identifier-(single-quoted)-state
5632 tok_state_doctype_system_identifier_single_quoted = function () {
5634 c = txt.charAt(cur++)
5636 tok_state = tok_state_after_doctype_system_identifier
5639 if (c === "\u0000") {
5641 tok_cur_tag.system_identifier += "\ufffd"
5646 tok_cur_tag.flag('force-quirks', true)
5647 tok_state = tok_state_data
5650 if (c === '') { // EOF
5652 tok_state = tok_state_data
5653 tok_cur_tag.flag('force-quirks', true)
5654 cur -= 1 // reconsume
5658 tok_cur_tag.system_identifier += c
5662 // 8.2.4.66 http://www.w3.org/TR/html5/syntax.html#after-doctype-system-identifier-state
5663 tok_state_after_doctype_system_identifier = function () {
5665 c = txt.charAt(cur++)
5666 if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ') {
5670 tok_state = tok_state_data
5673 if (c === '') { // EOF
5675 tok_state = tok_state_data
5676 tok_cur_tag.flag('force-quirks', true)
5677 cur -= 1 // reconsume
5682 // do _not_ tok_cur_tag.flag 'force-quirks', true
5683 tok_state = tok_state_bogus_doctype
5687 // 8.2.4.67 http://www.w3.org/TR/html5/syntax.html#bogus-doctype-state
5688 tok_state_bogus_doctype = function () {
5690 c = txt.charAt(cur++)
5692 tok_state = tok_state_data
5695 if (c === '') { // EOF
5696 tok_state = tok_state_data
5697 cur -= 1 // reconsume
5704 // 8.2.4.68 http://www.w3.org/TR/html5/syntax.html#cdata-section-state
5705 tok_state_cdata_section = function () {
5707 tok_state = tok_state_data
5708 next_gt = txt.indexOf(']]>', cur)
5709 if (next_gt === -1) {
5710 val = txt.substr(cur)
5713 val = txt.substr(cur, next_gt - cur)
5716 val = val.replace(new RegExp("\u0000", 'g'), "\ufffd")
5717 if (val.length > 0) {
5718 return new_character_token(val) // fixfull split
5723 // 8.2.4.69 http://www.w3.org/TR/html5/syntax.html#consume-a-character-reference
5724 // Don't set this as a state, just call it
5725 // returns a string (NOT a text node)
5726 parse_character_reference = function (allowed_char, in_attr) {
5727 var base, c, charset, code_point, decoded, i, max, start
5728 if (allowed_char == null) {
5731 if (in_attr == null) {
5734 if (cur >= txt.length) {
5737 switch (c = txt.charAt(cur)) {
5746 // explicitly not a parse error
5750 // there has to be "one or more" alnums between & and ; to be a parse error
5754 if (cur + 1 >= txt.length) {
5757 if (txt.charAt(cur + 1).toLowerCase() === 'x') {
5767 while (start + i < txt.length && charset.indexOf(txt.charAt(start + i)) > -1) {
5774 if (txt.charAt(start + i) === ';') {
5779 code_point = txt.substr(start, i)
5780 while (code_point.charAt(0) === '0' && code_point.length > 1) {
5781 code_point = code_point.substr(1)
5783 code_point = parseInt(code_point, base)
5784 if (unicode_fixes[code_point] != null) {
5786 return unicode_fixes[code_point]
5788 if ((code_point >= 0xd800 && code_point <= 0xdfff) || code_point > 0x10ffff) {
5792 if ((code_point >= 0x0001 && code_point <= 0x0008) || (code_point >= 0x000D && code_point <= 0x001F) || (code_point >= 0x007F && code_point <= 0x009F) || (code_point >= 0xFDD0 && code_point <= 0xFDEF) || code_point === 0x000B || code_point === 0xFFFE || code_point === 0xFFFF || code_point === 0x1FFFE || code_point === 0x1FFFF || code_point === 0x2FFFE || code_point === 0x2FFFF || code_point === 0x3FFFE || code_point === 0x3FFFF || code_point === 0x4FFFE || code_point === 0x4FFFF || code_point === 0x5FFFE || code_point === 0x5FFFF || code_point === 0x6FFFE || code_point === 0x6FFFF || code_point === 0x7FFFE || code_point === 0x7FFFF || code_point === 0x8FFFE || code_point === 0x8FFFF || code_point === 0x9FFFE || code_point === 0x9FFFF || code_point === 0xAFFFE || code_point === 0xAFFFF || code_point === 0xBFFFE || code_point === 0xBFFFF || code_point === 0xCFFFE || code_point === 0xCFFFF || code_point === 0xDFFFE || code_point === 0xDFFFF || code_point === 0xEFFFE || code_point === 0xEFFFF || code_point === 0xFFFFE || code_point === 0xFFFFF || code_point === 0x10FFFE || code_point === 0x10FFFF) {
5795 return from_code_point(code_point)
5801 for (i = 0; i < 31; ++i) {
5802 if (alnum.indexOf(txt.charAt(cur + i)) === -1) {
5807 // exit early, because parse_error() below needs at least one alnum
5810 if (txt.charAt(cur + i) === ';') {
5811 decoded = decode_named_char_ref(txt.substr(cur, i))
5812 i += 1 // scan past the ';' (after, so we dno't pass it to decode)
5813 if (decoded != null) {
5817 // else FALL THROUGH (check for match without last char(s) or ";")
5819 // no ';' terminator (only legacy char refs)
5821 for (i = 2; i <= max; ++i) { // no prefix matches, so ok to check shortest first
5822 c = legacy_char_refs[txt.substr(cur, i)]
5825 if (txt.charAt(cur + i) === '=') {
5826 // "because some legacy user agents will
5827 // misinterpret the markup in those cases"
5831 if (alnum.indexOf(txt.charAt(cur + i)) > -1) {
5832 // this makes attributes forgiving about url args
5836 // ok, and besides the weird exceptions for attributes...
5837 // return the matching char
5838 cur += i // consume entity chars
5839 parse_error() // because no terminating ";"
5849 eat_next_token_if_newline = function () {
5856 if (t.type === TYPE_TEXT) {
5857 // definition of a newline depends on whether it was a character ref or not
5858 if (cur - old_cur === 1) {
5859 // not a character reference
5860 if (t.text === "\u000d" || t.text === "\u000a") {
5864 if (t.text === "\u000a") {
5873 // tree constructor initialization
5874 // see comments on TYPE_TAG/etc for the structure of this data
5877 doc = new Node(TYPE_TAG, {name: 'document', namespace: NS_HTML})
5878 doc.flag('quirks mode', QUIRKS_NO) // TODO bugreport spec for not specifying this
5879 fragment_root = null // fragment parsing algorithm returns children of this
5881 afe = [] // active formatting elements
5882 template_ins_modes = []
5883 ins_mode = ins_mode_initial
5884 original_ins_mode = ins_mode // TODO check spec
5885 flag_scripting = args.scripting != null ? args.scripting : true // TODO might need an extra flag to get <noscript> to parse correctly
5886 flag_frameset_ok = true
5888 flag_foster_parenting = false
5889 form_element_pointer = null
5890 temporary_buffer = null
5891 pending_table_character_tokens = []
5892 head_element_pointer = null
5893 flag_fragment_parsing = false
5894 context_element = null
5895 prev_node_id = 0 // just for debugging
5897 // tokenizer initialization
5898 tok_state = tok_state_data
5900 parse_init = function () {
5901 var el, f, ns, old_doc, t
5902 // fragment parsing (text arg)
5903 if (args.fragment != null) {
5904 // this handles the fragment from the tests in the format described here:
5905 // https://github.com/html5lib/html5lib-tests/blob/master/tree-construction/README.md
5908 if (f.substr(0, 5) === 'math ') {
5911 } else if (f.substr(0, 4) === 'svg ') {
5916 context_element = token_to_element(t, ns)
5917 context_element.document = new Node(TYPE_TAG, {name: 'document', namespace: NS_HTML})
5918 context_element.document.flag('quirks mode', QUIRKS_NO)
5920 // fragment parsing (Node arg)
5921 if (args.context != null) {
5922 context_element = args.context
5925 // http://www.w3.org/TR/html5/syntax.html#parsing-html-fragments
5926 // fragment parsing algorithm
5927 if (context_element != null) {
5928 flag_fragment_parsing = true
5929 doc = new Node(TYPE_TAG, {name: 'html', namespace: NS_HTML})
5930 // search up the tree from context, to try to find it's document,
5931 // because this file only puts a "document" property on the root
5934 el = context_element
5936 if (el.document != null) {
5937 old_doc = el.document
5947 doc.flag('quirks mode', old_doc.flag('quirks mode'))
5950 if (context_element.namespace === NS_HTML) {
5951 switch (context_element.name) {
5954 tok_state = tok_state_rcdata
5961 tok_state = tok_state_rawtext
5964 tok_state = tok_state_script_data
5967 if (flag_scripting) {
5968 tok_state = tok_state_rawtext
5972 tok_state = tok_state_plaintext
5975 fragment_root = new Node(TYPE_TAG, {name: 'html', namespace: NS_HTML})
5976 doc.children.push(fragment_root)
5977 fragment_root.document = doc
5978 open_els = [fragment_root]
5979 if (context_element.name === 'template' && context_element.namespace === NS_HTML) {
5980 template_ins_modes.unshift(ins_mode_in_template)
5982 // fixfull create token for context (it should have it's original one already)
5984 // set form_element pointer... in the foreign doc?!
5985 el = context_element
5987 if (el.name === 'form' && el.namespace === NS_HTML) {
5988 form_element_pointer = el
5999 // text pre-processing
6000 // FIXME check http://www.w3.org/TR/html5/syntax.html#preprocessing-the-input-stream
6001 txt = txt.replace(new RegExp("\r\n", 'g'), "\n") // fixfull spec doesn't say this
6002 txt = txt.replace(new RegExp("\r", 'g'), "\n") // fixfull spec doesn't say this
6005 // http://www.w3.org/TR/html5/syntax.html#tree-construction
6006 parse_main_loop = function () {
6008 while (flag_parsing) {
6012 // fixfull parse error if has self-closing flag, but it wasn't acknolwedged
6019 if (flag_fragment_parsing) {
6020 return fragment_root.children
6030 if (context === 'module') {
6031 module.exports = this_module
6033 window.peach_parser = this_module