1 // todo remove unused variables
2 // todo remove debug log, or make a way to access it
4 // Copyright 2015 Jason Woofenden
5 // This file implements an HTML5 parser
7 // This program is free software: you can redistribute it and/or modify it under
8 // the terms of the GNU Affero General Public License as published by the Free
9 // Software Foundation, either version 3 of the License, or (at your option) any
12 // This program is distributed in the hope that it will be useful, but WITHOUT
13 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
17 // You should have received a copy of the GNU Affero General Public License
18 // along with this program. If not, see <http://www.gnu.org/licenses/>.
21 // This file implements a thorough parser for html5, meant to be used by a
24 // The implementation is a pretty direct implementation of the parsing algorithm
27 // http://www.w3.org/TR/html5/syntax.html
29 // except for some places marked "WHATWG" that are implemented as described here:
31 // https://html.spec.whatwg.org/multipage/syntax.html
33 // This code passes all of the tests in the .dat files at:
35 // https://github.com/JasonWoof/html5lib-tests/tree/patch-1/tree-construction
38 //////////////////////////
39 // how to use this code //
40 //////////////////////////
42 // See README.md for how to run this file in the browser or in node.js.
44 // This file exports a single useful function: parse, and some constants
45 // (see the bottom of this file for those.)
49 // peach_parser.parse("<p><b>hi</p>")
51 // Or, if you don't want <html><head><body>/etc, do this:
53 // peach_parser.parse("<p><b>hi</p>", {fragment: "body"})
55 // return value is an array of Nodes, A Node contains:
56 // type: one of: "tag", "text", "comment", "doctype"
57 // text: contents for text/comment nodes
58 // attrs: object of attributes, eg {href: "#main"}
59 // children: array of Nodes
60 // namespace: one of: "html", "mathml", "svg"
61 // parent: another Node or null
63 // This code is a work in progress, eg try search this file for "fixfull",
67 // Notes: stacks/lists
69 // Jason was frequently confused by the terminology used to refer to different
70 // parts of the stacks and lists in the spec, so he made this chart to help keep
73 // stacks grow downward (current element is index=0)
75 // example: open_els = [a, b, c, d, e, f, g]
77 // "grows downwards" means it's visualized like this: (index: el "names")
79 // 6: g "start of the list", "topmost", "first"
81 // 4: e "previous" (to d), "above", "before"
82 // 3: d (previous/next are relative to this element)
83 // 2: c "next", "after", "lower", "below"
85 // 0: a "end of the list", "current node", "bottommost", "last"
88 var NS_HTML, NS_MATHML, NS_SVG, QUIRKS_LIMITED, QUIRKS_NO, QUIRKS_YES, TYPE_AAA_BOOKMARK, TYPE_AFE_MARKER, TYPE_COMMENT, TYPE_DOCTYPE, TYPE_END_TAG, TYPE_EOF, TYPE_START_TAG, TYPE_TAG, TYPE_TEXT, _decode_named_char_ref, adjust_foreign_attributes, adjust_mathml_attributes, adjust_svg_attributes, adp_els, alnum, context, debug_log, debug_log_each, debug_log_reset, decode_named_char_ref, decode_named_char_ref_cache, decode_named_char_ref_el, digits, el_is_special, el_is_special_not_adp, end_tag_implied, exports, foreign_attr_fixes, formatting_elements, foster_parenting_targets, from_code_point, g_debug_log, h_tags, hex_chars, is_html_integration, is_input_hidden_tok, is_lc_alpha, is_mathml_text_integration_point, is_space, is_space_tok, is_uc_alpha, lc_alpha, legacy_char_refs, mathml_elements, mathml_text_integration, new_aaa_bookmark, new_afe_marker, new_character_token, new_comment_token, new_doctype_token, new_element, new_end_tag, new_eof_token, new_open_tag, new_text_node, parse_html, prev_node_id, quirks_yes_pi_prefixes, space_chars, special_elements, svg_attribute_fixes, svg_elements, svg_name_fixes, tag_name_chars, uc_alpha, unicode_fixes, whitespace_chars
90 if ((typeof module) !== 'undefined' && (module.exports != null)) {
94 window.peach_parser = {}
97 from_code_point = function (x) {
98 if (String.fromCodePoint != null) {
99 return String.fromCodePoint(x)
102 return String.fromCharCode(x)
105 return String.fromCharCode((x >> 10) + 0xd800, (x % 0x400) + 0xdc00)
109 // Each node is an obect of the Node class. Here are the Node types:
110 TYPE_TAG = 'tag' // name, {attributes}, [children]
111 TYPE_TEXT = 'text' // "text"
112 TYPE_COMMENT = 'comment'
113 TYPE_DOCTYPE = 'doctype'
114 // the following types are emited by the tokenizer, but shouldn't end up in the tree:
115 TYPE_START_TAG = 4 // name, [attributes ([key,value]...) in reverse order], [children]
116 TYPE_END_TAG = 5 // name
118 TYPE_AFE_MARKER = 7 // http://www.w3.org/TR/html5/syntax.html#reconstruct-the-active-formatting-elements
119 TYPE_AAA_BOOKMARK = 8 // http://www.w3.org/TR/html5/syntax.html#adoption-agency-algorithm
121 // namespace constants
126 // quirks mode constants
128 QUIRKS_LIMITED = 'limited'
131 // queue up debug logs, so eg they can be shown only for tests that fail
133 debug_log_reset = function () {
136 debug_log = function (str) {
137 g_debug_log.push(str)
139 debug_log_each = function (cb) {
141 for (i = 0; i < g_debug_log.length; ++i) {
147 function Node (type, args) {
151 this.type = type // one of the TYPE_* constants above
152 this.name = args.name != null ? args.name : '' // tag name
153 this.text = args.text != null ? args.text : '' // contents for text/comment nodes
154 this.attrs = args.attrs != null ? args.attrs : {}
155 this.children = args.children != null ? args.children : []
156 this.namespace = args.namespace != null ? args.namespace : NS_HTML
157 this.parent = args.parent != null ? args.parent : null
159 this.attrs_a = args.attr_k != null ? args.attr_k : [] // attrs in progress, TYPE_START_TAG only
160 this.token = args.token != null ? args.token : null
161 this.flags = args.flags != null ? args.flags : {}
162 if (args.id != null) {
163 this.id = args.id + "+"
165 this.id = "" + (++prev_node_id)
169 Node.prototype.acknowledge_self_closing = function () {
170 if (this.token != null) {
171 this.token.flag('did_self_close', true)
173 this.flag('did_self_close', true)
177 Node.prototype.flag = function (key, value) {
179 this.flags[key] = value
181 return this.flags[key]
185 // helpers: (only take args that are normally known when parser creates nodes)
186 new_open_tag = function (name) {
187 return new Node(TYPE_START_TAG, {name: name})
189 new_end_tag = function (name) {
190 return new Node(TYPE_END_TAG, {name: name})
192 new_element = function (name) {
193 return new Node(TYPE_TAG, {name: name})
195 new_text_node = function (txt) {
196 return new Node(TYPE_TEXT, {text: txt})
198 new_character_token = new_text_node
199 new_comment_token = function (txt) {
200 return new Node(TYPE_COMMENT, {text: txt})
202 new_doctype_token = function (name) {
203 return new Node(TYPE_DOCTYPE, {name: name})
205 new_eof_token = function () {
206 return new Node(TYPE_EOF)
208 new_afe_marker = function () {
209 return new Node(TYPE_AFE_MARKER)
211 new_aaa_bookmark = function () {
212 return new Node(TYPE_AAA_BOOKMARK)
215 lc_alpha = "abcdefghijklmnopqrstuvwxyz"
216 uc_alpha = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
217 digits = "0123456789"
218 alnum = lc_alpha + uc_alpha + digits
219 hex_chars = digits + "abcdefABCDEF"
221 is_uc_alpha = function (str) {
222 return str.length === 1 && uc_alpha.indexOf(str) > -1
224 is_lc_alpha = function (str) {
225 return str.length === 1 && lc_alpha.indexOf(str) > -1
228 // some SVG elements have dashes in them
229 tag_name_chars = alnum + "-"
231 // http://www.w3.org/TR/html5/infrastructure.html#space-character
232 space_chars = "\u0009\u000a\u000c\u000d\u0020"
233 is_space = function (txt) {
234 return txt.length === 1 && space_chars.indexOf(txt) > -1
236 is_space_tok = function (t) {
237 return t.type === TYPE_TEXT && t.text.length === 1 && space_chars.indexOf(t.text) > -1
240 is_input_hidden_tok = function (t) {
242 if (t.type !== TYPE_START_TAG) {
245 for (i = 0; i < t.attrs_a.length; ++i) {
247 if (a[0] === 'type') {
248 if (a[1].toLowerCase() === 'hidden') {
257 // https://en.wikipedia.org/wiki/Whitespace_character#Unicode
258 whitespace_chars = "\u0009\u000a\u000b\u000c\u000d\u0020\u0085\u00a0\u1680\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200a\u2028\u2029\u202f\u205f\u3000"
261 unicode_fixes[0x00] = "\uFFFD"
262 unicode_fixes[0x80] = "\u20AC"
263 unicode_fixes[0x82] = "\u201A"
264 unicode_fixes[0x83] = "\u0192"
265 unicode_fixes[0x84] = "\u201E"
266 unicode_fixes[0x85] = "\u2026"
267 unicode_fixes[0x86] = "\u2020"
268 unicode_fixes[0x87] = "\u2021"
269 unicode_fixes[0x88] = "\u02C6"
270 unicode_fixes[0x89] = "\u2030"
271 unicode_fixes[0x8A] = "\u0160"
272 unicode_fixes[0x8B] = "\u2039"
273 unicode_fixes[0x8C] = "\u0152"
274 unicode_fixes[0x8E] = "\u017D"
275 unicode_fixes[0x91] = "\u2018"
276 unicode_fixes[0x92] = "\u2019"
277 unicode_fixes[0x93] = "\u201C"
278 unicode_fixes[0x94] = "\u201D"
279 unicode_fixes[0x95] = "\u2022"
280 unicode_fixes[0x96] = "\u2013"
281 unicode_fixes[0x97] = "\u2014"
282 unicode_fixes[0x98] = "\u02DC"
283 unicode_fixes[0x99] = "\u2122"
284 unicode_fixes[0x9A] = "\u0161"
285 unicode_fixes[0x9B] = "\u203A"
286 unicode_fixes[0x9C] = "\u0153"
287 unicode_fixes[0x9E] = "\u017E"
288 unicode_fixes[0x9F] = "\u0178"
290 quirks_yes_pi_prefixes = [
291 "+//silmaril//dtd html pro v0r11 19970101//",
292 "-//as//dtd html 3.0 aswedit + extensions//",
293 "-//advasoft ltd//dtd html 3.0 aswedit + extensions//",
294 "-//ietf//dtd html 2.0 level 1//",
295 "-//ietf//dtd html 2.0 level 2//",
296 "-//ietf//dtd html 2.0 strict level 1//",
297 "-//ietf//dtd html 2.0 strict level 2//",
298 "-//ietf//dtd html 2.0 strict//",
299 "-//ietf//dtd html 2.0//",
300 "-//ietf//dtd html 2.1e//",
301 "-//ietf//dtd html 3.0//",
302 "-//ietf//dtd html 3.2 final//",
303 "-//ietf//dtd html 3.2//",
304 "-//ietf//dtd html 3//",
305 "-//ietf//dtd html level 0//",
306 "-//ietf//dtd html level 1//",
307 "-//ietf//dtd html level 2//",
308 "-//ietf//dtd html level 3//",
309 "-//ietf//dtd html strict level 0//",
310 "-//ietf//dtd html strict level 1//",
311 "-//ietf//dtd html strict level 2//",
312 "-//ietf//dtd html strict level 3//",
313 "-//ietf//dtd html strict//",
314 "-//ietf//dtd html//",
315 "-//metrius//dtd metrius presentational//",
316 "-//microsoft//dtd internet explorer 2.0 html strict//",
317 "-//microsoft//dtd internet explorer 2.0 html//",
318 "-//microsoft//dtd internet explorer 2.0 tables//",
319 "-//microsoft//dtd internet explorer 3.0 html strict//",
320 "-//microsoft//dtd internet explorer 3.0 html//",
321 "-//microsoft//dtd internet explorer 3.0 tables//",
322 "-//netscape comm. corp.//dtd html//",
323 "-//netscape comm. corp.//dtd strict html//",
324 "-//o'reilly and associates//dtd html 2.0//",
325 "-//o'reilly and associates//dtd html extended 1.0//",
326 "-//o'reilly and associates//dtd html extended relaxed 1.0//",
327 "-//sq//dtd html 2.0 hotmetal + extensions//",
328 "-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//",
329 "-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//",
330 "-//spyglass//dtd html 2.0 extended//",
331 "-//sun microsystems corp.//dtd hotjava html//",
332 "-//sun microsystems corp.//dtd hotjava strict html//",
333 "-//w3c//dtd html 3 1995-03-24//",
334 "-//w3c//dtd html 3.2 draft//",
335 "-//w3c//dtd html 3.2 final//",
336 "-//w3c//dtd html 3.2//",
337 "-//w3c//dtd html 3.2s draft//",
338 "-//w3c//dtd html 4.0 frameset//",
339 "-//w3c//dtd html 4.0 transitional//",
340 "-//w3c//dtd html experimental 19960712//",
341 "-//w3c//dtd html experimental 970421//",
342 "-//w3c//dtd w3 html//",
343 "-//w3o//dtd w3 html 3.0//",
344 "-//webtechs//dtd mozilla html 2.0//",
345 "-//webtechs//dtd mozilla html//",
348 // These are the character references that don't need a terminating semicolon
349 // min length: 2, max: 6, none are a prefix of any other.
351 Aacute: 'Á', aacute: 'á', Acirc: 'Â', acirc: 'â', acute: '´', AElig: 'Æ',
352 aelig: 'æ', Agrave: 'À', agrave: 'à', AMP: '&', amp: '&', Aring: 'Å',
353 aring: 'å', Atilde: 'Ã', atilde: 'ã', Auml: 'Ä', auml: 'ä', brvbar: '¦',
354 Ccedil: 'Ç', ccedil: 'ç', cedil: '¸', cent: '¢', COPY: '©', copy: '©',
355 curren: '¤', deg: '°', divide: '÷', Eacute: 'É', eacute: 'é', Ecirc: 'Ê',
356 ecirc: 'ê', Egrave: 'È', egrave: 'è', ETH: 'Ð', eth: 'ð', Euml: 'Ë',
357 euml: 'ë', frac12: '½', frac14: '¼', frac34: '¾', GT: '>', gt: '>',
358 Iacute: 'Í', iacute: 'í', Icirc: 'Î', icirc: 'î', iexcl: '¡', Igrave: 'Ì',
359 igrave: 'ì', iquest: '¿', Iuml: 'Ï', iuml: 'ï', laquo: '«', LT: '<',
360 lt: '<', macr: '¯', micro: 'µ', middot: '·', nbsp: "\u00a0", not: '¬',
361 Ntilde: 'Ñ', ntilde: 'ñ', Oacute: 'Ó', oacute: 'ó', Ocirc: 'Ô', ocirc: 'ô',
362 Ograve: 'Ò', ograve: 'ò', ordf: 'ª', ordm: 'º', Oslash: 'Ø', oslash: 'ø',
363 Otilde: 'Õ', otilde: 'õ', Ouml: 'Ö', ouml: 'ö', para: '¶', plusmn: '±',
364 pound: '£', QUOT: '"', quot: '"', raquo: '»', REG: '®', reg: '®', sect: '§',
365 shy: '', sup1: '¹', sup2: '²', sup3: '³', szlig: 'ß', THORN: 'Þ', thorn: 'þ',
366 times: '×', Uacute: 'Ú', uacute: 'ú', Ucirc: 'Û', ucirc: 'û', Ugrave: 'Ù',
367 ugrave: 'ù', uml: '¨', Uuml: 'Ü', uuml: 'ü', Yacute: 'Ý', yacute: 'ý',
371 //void_elements = ['area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input', 'keygen', 'link', 'meta', 'param', 'source', 'track', 'wbr']
372 //raw_text_elements = ['script', 'style']
373 //escapable_raw_text_elements = ['textarea', 'title']
374 // http://www.w3.org/TR/SVG/ 1.1 (Second Edition)
376 'a', 'altGlyph', 'altGlyphDef', 'altGlyphItem', 'animate', 'animateColor',
377 'animateMotion', 'animateTransform', 'circle', 'clipPath', 'color-profile',
378 'cursor', 'defs', 'desc', 'ellipse', 'feBlend', 'feColorMatrix',
379 'feComponentTransfer', 'feComposite', 'feConvolveMatrix',
380 'feDiffuseLighting', 'feDisplacementMap', 'feDistantLight', 'feFlood',
381 'feFuncA', 'feFuncB', 'feFuncG', 'feFuncR', 'feGaussianBlur', 'feImage',
382 'feMerge', 'feMergeNode', 'feMorphology', 'feOffset', 'fePointLight',
383 'feSpecularLighting', 'feSpotLight', 'feTile', 'feTurbulence', 'filter',
384 'font', 'font-face', 'font-face-format', 'font-face-name', 'font-face-src',
385 'font-face-uri', 'foreignObject', 'g', 'glyph', 'glyphRef', 'hkern',
386 'image', 'line', 'linearGradient', 'marker', 'mask', 'metadata',
387 'missing-glyph', 'mpath', 'path', 'pattern', 'polygon', 'polyline',
388 'radialGradient', 'rect', 'script', 'set', 'stop', 'style', 'svg',
389 'switch', 'symbol', 'text', 'textPath', 'title', 'tref', 'tspan', 'use',
393 // http://www.w3.org/TR/MathML/ Version 3.0 2nd Edition
395 'abs', 'and', 'annotation', 'annotation-xml', 'apply', 'approx', 'arccos',
396 'arccosh', 'arccot', 'arccoth', 'arccsc', 'arccsch', 'arcsec', 'arcsech',
397 'arcsin', 'arcsinh', 'arctan', 'arctanh', 'arg', 'bind', 'bvar', 'card',
398 'cartesianproduct', 'cbytes', 'ceiling', 'cerror', 'ci', 'cn', 'codomain',
399 'complexes', 'compose', 'condition', 'conjugate', 'cos', 'cosh', 'cot',
400 'coth', 'cs', 'csc', 'csch', 'csymbol', 'curl', 'declare', 'degree',
401 'determinant', 'diff', 'divergence', 'divide', 'domain',
402 'domainofapplication', 'emptyset', 'eq', 'equivalent', 'eulergamma',
403 'exists', 'exp', 'exponentiale', 'factorial', 'factorof', 'false', 'floor',
404 'fn', 'forall', 'gcd', 'geq', 'grad', 'gt', 'ident', 'image', 'imaginary',
405 'imaginaryi', 'implies', 'in', 'infinity', 'int', 'integers', 'intersect',
406 'interval', 'inverse', 'lambda', 'laplacian', 'lcm', 'leq', 'limit',
407 'list', 'ln', 'log', 'logbase', 'lowlimit', 'lt', 'maction', 'maligngroup',
408 'malignmark', 'math', 'matrix', 'matrixrow', 'max', 'mean', 'median',
409 'menclose', 'merror', 'mfenced', 'mfrac', 'mglyph', 'mi', 'mi', 'min',
410 'minus', 'mlabeledtr', 'mlongdiv', 'mmultiscripts', 'mn', 'mo', 'mode',
411 'moment', 'momentabout', 'mover', 'mpadded', 'mphantom', 'mprescripts',
412 'mroot', 'mrow', 'ms', 'mscarries', 'mscarry', 'msgroup', 'msline',
413 'mspace', 'msqrt', 'msrow', 'mstack', 'mstyle', 'msub', 'msubsup', 'msup',
414 'mtable', 'mtd', 'mtext', 'mtr', 'munder', 'munderover', 'naturalnumbers',
415 'neq', 'none', 'not', 'notanumber', 'notin', 'notprsubset', 'notsubset',
416 'or', 'otherwise', 'outerproduct', 'partialdiff', 'pi', 'piece',
417 'piecewise', 'plus', 'power', 'primes', 'product', 'prsubset', 'quotient',
418 'rationals', 'real', 'reals', 'reln', 'rem', 'root', 'scalarproduct',
419 'sdev', 'sec', 'sech', 'selector', 'semantics', 'sep', 'set', 'setdiff',
420 'share', 'sin', 'sinh', 'span', 'subset', 'sum', 'tan', 'tanh', 'tendsto',
421 'times', 'transpose', 'true', 'union', 'uplimit', 'variance', 'vector',
422 'vectorproduct', 'xor'
424 // foreign_elements = [svg_elements..., mathml_elements...]
425 //normal_elements = All other allowed HTML elements are normal elements.
429 address: NS_HTML, applet: NS_HTML, area: NS_HTML, article: NS_HTML,
430 aside: NS_HTML, base: NS_HTML, basefont: NS_HTML, bgsound: NS_HTML,
431 blockquote: NS_HTML, body: NS_HTML, br: NS_HTML, button: NS_HTML,
432 caption: NS_HTML, center: NS_HTML, col: NS_HTML, colgroup: NS_HTML, dd: NS_HTML,
433 details: NS_HTML, dir: NS_HTML, div: NS_HTML, dl: NS_HTML, dt: NS_HTML,
434 embed: NS_HTML, fieldset: NS_HTML, figcaption: NS_HTML, figure: NS_HTML,
435 footer: NS_HTML, form: NS_HTML, frame: NS_HTML, frameset: NS_HTML, h1: NS_HTML,
436 h2: NS_HTML, h3: NS_HTML, h4: NS_HTML, h5: NS_HTML, h6: NS_HTML, head: NS_HTML,
437 header: NS_HTML, hgroup: NS_HTML, hr: NS_HTML, html: NS_HTML, iframe: NS_HTML,
438 img: NS_HTML, input: NS_HTML, isindex: NS_HTML, li: NS_HTML, link: NS_HTML,
439 listing: NS_HTML, main: NS_HTML, marquee: NS_HTML,
441 menu: NS_HTML,menuitem: NS_HTML, // WHATWG adds these
443 meta: NS_HTML, nav: NS_HTML, noembed: NS_HTML, noframes: NS_HTML,
444 noscript: NS_HTML, object: NS_HTML, ol: NS_HTML, p: NS_HTML, param: NS_HTML,
445 plaintext: NS_HTML, pre: NS_HTML, script: NS_HTML, section: NS_HTML,
446 select: NS_HTML, source: NS_HTML, style: NS_HTML, summary: NS_HTML,
447 table: NS_HTML, tbody: NS_HTML, td: NS_HTML, template: NS_HTML,
448 textarea: NS_HTML, tfoot: NS_HTML, th: NS_HTML, thead: NS_HTML, title: NS_HTML,
449 tr: NS_HTML, track: NS_HTML, ul: NS_HTML, wbr: NS_HTML, xmp: NS_HTML,
452 mi: NS_MATHML, mo: NS_MATHML, mn: NS_MATHML, ms: NS_MATHML, mtext: NS_MATHML,
453 'annotation-xml': NS_MATHML,
456 foreignObject: NS_SVG, desc: NS_SVG, title: NS_SVG
459 formatting_elements = {
460 a: true, b: true, big: true, code: true, em: true, font: true, i: true,
461 nobr: true, s: true, small: true, strike: true, strong: true, tt: true,
465 mathml_text_integration = {
466 mi: NS_MATHML, mo: NS_MATHML, mn: NS_MATHML, ms: NS_MATHML, mtext: NS_MATHML
468 is_mathml_text_integration_point = function (el) {
469 return mathml_text_integration[el.name] === el.namespace
471 is_html_integration = function (el) { // DON'T PASS A TOKEN
472 if (el.namespace === NS_MATHML) {
473 if (el.name === 'annotation-xml') {
474 if (el.attrs.encoding != null) {
475 if (el.attrs.encoding.toLowerCase() === 'text/html') {
478 if (el.attrs.encoding.toLowerCase() === 'application/xhtml+xml') {
485 if (el.namespace === NS_SVG) {
486 if (el.name === 'foreignObject' || el.name === 'desc' || el.name === 'title') {
494 h1: NS_HTML, h2: NS_HTML, h3: NS_HTML, h4: NS_HTML, h5: NS_HTML, h6: NS_HTML
497 foster_parenting_targets = {
518 el_is_special = function (e) {
519 return special_elements[e.name] === e.namespace
522 adp_els = { address: NS_HTML, div: NS_HTML, p: NS_HTML }
523 el_is_special_not_adp = function (el) {
524 return special_elements[el.name] === el.namespace && adp_els[el.name] !== el.namespace
528 altglyph: 'altGlyph',
529 altglyphdef: 'altGlyphDef',
530 altglyphitem: 'altGlyphItem',
531 animatecolor: 'animateColor',
532 animatemotion: 'animateMotion',
533 animatetransform: 'animateTransform',
534 clippath: 'clipPath',
536 fecolormatrix: 'feColorMatrix',
537 fecomponenttransfer: 'feComponentTransfer',
538 fecomposite: 'feComposite',
539 feconvolvematrix: 'feConvolveMatrix',
540 fediffuselighting: 'feDiffuseLighting',
541 fedisplacementmap: 'feDisplacementMap',
542 fedistantlight: 'feDistantLight',
543 fedropshadow: 'feDropShadow',
549 fegaussianblur: 'feGaussianBlur',
552 femergenode: 'feMergeNode',
553 femorphology: 'feMorphology',
554 feoffset: 'feOffset',
555 fepointlight: 'fePointLight',
556 fespecularlighting: 'feSpecularLighting',
557 fespotlight: 'feSpotLight',
559 feturbulence: 'feTurbulence',
560 foreignobject: 'foreignObject',
561 glyphref: 'glyphRef',
562 lineargradient: 'linearGradient',
563 radialgradient: 'radialGradient',
566 svg_attribute_fixes = {
567 attributename: 'attributeName',
568 attributetype: 'attributeType',
569 basefrequency: 'baseFrequency',
570 baseprofile: 'baseProfile',
571 calcmode: 'calcMode',
572 clippathunits: 'clipPathUnits',
573 contentscripttype: 'contentScriptType',
574 contentstyletype: 'contentStyleType',
575 diffuseconstant: 'diffuseConstant',
576 edgemode: 'edgeMode',
577 externalresourcesrequired: 'externalResourcesRequired',
578 // WHATWG removes this: filterres: 'filterRes',
579 filterunits: 'filterUnits',
580 glyphref: 'glyphRef',
581 gradienttransform: 'gradientTransform',
582 gradientunits: 'gradientUnits',
583 kernelmatrix: 'kernelMatrix',
584 kernelunitlength: 'kernelUnitLength',
585 keypoints: 'keyPoints',
586 keysplines: 'keySplines',
587 keytimes: 'keyTimes',
588 lengthadjust: 'lengthAdjust',
589 limitingconeangle: 'limitingConeAngle',
590 markerheight: 'markerHeight',
591 markerunits: 'markerUnits',
592 markerwidth: 'markerWidth',
593 maskcontentunits: 'maskContentUnits',
594 maskunits: 'maskUnits',
595 numoctaves: 'numOctaves',
596 pathlength: 'pathLength',
597 patterncontentunits: 'patternContentUnits',
598 patterntransform: 'patternTransform',
599 patternunits: 'patternUnits',
600 pointsatx: 'pointsAtX',
601 pointsaty: 'pointsAtY',
602 pointsatz: 'pointsAtZ',
603 preservealpha: 'preserveAlpha',
604 preserveaspectratio: 'preserveAspectRatio',
605 primitiveunits: 'primitiveUnits',
608 repeatcount: 'repeatCount',
609 repeatdur: 'repeatDur',
610 requiredextensions: 'requiredExtensions',
611 requiredfeatures: 'requiredFeatures',
612 specularconstant: 'specularConstant',
613 specularexponent: 'specularExponent',
614 spreadmethod: 'spreadMethod',
615 startoffset: 'startOffset',
616 stddeviation: 'stdDeviation',
617 stitchtiles: 'stitchTiles',
618 surfacescale: 'surfaceScale',
619 systemlanguage: 'systemLanguage',
620 tablevalues: 'tableValues',
623 textlength: 'textLength',
625 viewtarget: 'viewTarget',
626 xchannelselector: 'xChannelSelector',
627 ychannelselector: 'yChannelSelector',
628 zoomandpan: 'zoomAndPan'
630 foreign_attr_fixes = {
631 'xlink:actuate': 'xlink actuate',
632 'xlink:arcrole': 'xlink arcrole',
633 'xlink:href': 'xlink href',
634 'xlink:role': 'xlink role',
635 'xlink:show': 'xlink show',
636 'xlink:title': 'xlink title',
637 'xlink:type': 'xlink type',
638 'xml:base': 'xml base',
639 'xml:lang': 'xml lang',
640 'xml:space': 'xml space',
642 'xmlns:xlink': 'xmlns xlink'
644 adjust_mathml_attributes = function (t) {
646 for (i = 0; i < t.attrs_a.length; ++i) {
648 if (a[0] === 'definitionurl') {
649 a[0] = 'definitionURL'
653 adjust_svg_attributes = function (t) {
655 for (i = 0; i < t.attrs_a.length; ++i) {
657 if (svg_attribute_fixes[a[0]] != null) {
658 a[0] = svg_attribute_fixes[a[0]]
662 adjust_foreign_attributes = function (t) {
665 for (i = 0; i < t.attrs_a.length; ++i) {
667 if (foreign_attr_fixes[a[0]] != null) {
668 a[0] = foreign_attr_fixes[a[0]]
673 // decode_named_char_ref()
675 // The list of named character references is _huge_ so if we're running in a
676 // browser, we get the browser to decode them, rather than increasing the code
677 // size to include the table.
678 if (context === 'module') {
679 _decode_named_char_ref = require('./parser_no_browser_helper.js')
681 decode_named_char_ref_el = document.createElement('textarea')
682 _decode_named_char_ref = function (txt) {
684 txt = "&" + txt + ";"
685 decode_named_char_ref_el.innerHTML = txt
686 decoded = decode_named_char_ref_el.value
687 if (decoded === txt) {
693 // Pass the name of a named entity _that has a terminating semicolon_
694 // Entities without terminating semicolons should use legacy_char_refs[]
695 // Do not include the "&" or ";" in your argument, eg pass "alpha"
696 decode_named_char_ref_cache = {}
697 decode_named_char_ref = function (txt) {
699 decoded = decode_named_char_ref_cache[txt]
700 if (decoded != null) {
703 decoded = _decode_named_char_ref(txt)
704 return decode_named_char_ref_cache[txt] = decoded
707 parse_html = function (args_html, args) {
708 var adjusted_current_node, adjusted_insertion_location, adoption_agency, afe, afe_push, afe_push_marker, button_scopers, clear_afe_to_marker, clear_stack_to_table_body_context, clear_stack_to_table_context, clear_stack_to_table_row_context, clear_to_table_body_stopers, clear_to_table_row_stopers, clear_to_table_stopers, close_p_element, close_p_if_in_button_scope, close_the_cell, context_element, cur, doc, eat_next_token_if_newline, el_is_in_scope, flag_foster_parenting, flag_fragment_parsing, flag_frameset_ok, flag_parsing, flag_scripting, form_element_pointer, fragment_root, generate_implied_end_tags, has_color_face_or_size, head_element_pointer, in_body_any_other_end_tag, in_foreign_content, in_foreign_content_end_script, in_foreign_content_other_start, ins_mode, ins_mode_after_after_body, ins_mode_after_after_frameset, ins_mode_after_body, ins_mode_after_frameset, ins_mode_after_head, ins_mode_after_head_else, ins_mode_before_head, ins_mode_before_html, ins_mode_in_body, ins_mode_in_caption, ins_mode_in_cell, ins_mode_in_column_group, ins_mode_in_frameset, ins_mode_in_head, ins_mode_in_head_else, ins_mode_in_head_noscript, ins_mode_in_head_noscript_else, ins_mode_in_row, ins_mode_in_select, ins_mode_in_select_in_table, ins_mode_in_table, ins_mode_in_table_body, ins_mode_in_table_else, ins_mode_in_table_text, ins_mode_in_template, ins_mode_initial, ins_mode_text, insert_character, insert_comment, insert_foreign_element, insert_html_element, is_appropriate_end_tag, is_in_button_scope, is_in_li_scope, is_in_scope, is_in_scope_x, is_in_scope_x_y, is_in_select_scope, is_in_table_scope, is_quirks_limited_doctype, is_quirks_yes_doctype, li_scopers, open_els, original_ins_mode, parse_character_reference, parse_error, parse_generic_raw_text, parse_generic_rcdata_text, parse_init, parse_main_loop, pending_table_character_tokens, process_token, reconstruct_afe, reset_ins_mode, standard_scopers, stop_parsing, table_scopers, template_ins_modes, template_tag_is_open, temporary_buffer, tok_cur_tag, tok_state, tok_state_after_attribute_name, tok_state_after_attribute_value_quoted, tok_state_after_doctype_name, tok_state_after_doctype_public_identifier, tok_state_after_doctype_public_keyword, tok_state_after_doctype_system_identifier, tok_state_after_doctype_system_keyword, tok_state_attribute_name, tok_state_attribute_value_double_quoted, tok_state_attribute_value_single_quoted, tok_state_attribute_value_unquoted, tok_state_before_attribute_name, tok_state_before_attribute_value, tok_state_before_doctype_name, tok_state_before_doctype_public_identifier, tok_state_before_doctype_system_identifier, tok_state_between_doctype_public_and_system_identifiers, tok_state_bogus_comment, tok_state_bogus_doctype, tok_state_cdata_section, tok_state_comment, tok_state_comment_end, tok_state_comment_end_bang, tok_state_comment_end_dash, tok_state_comment_start, tok_state_comment_start_dash, tok_state_data, tok_state_doctype, tok_state_doctype_name, tok_state_doctype_public_identifier_double_quoted, tok_state_doctype_public_identifier_single_quoted, tok_state_doctype_system_identifier_double_quoted, tok_state_doctype_system_identifier_single_quoted, tok_state_end_tag_open, tok_state_markup_declaration_open, tok_state_plaintext, tok_state_rawtext, tok_state_rawtext_end_tag_name, tok_state_rawtext_end_tag_open, tok_state_rawtext_less_than_sign, tok_state_rcdata, tok_state_rcdata_end_tag_name, tok_state_rcdata_end_tag_open, tok_state_rcdata_less_than_sign, tok_state_script_data, tok_state_script_data_double_escape_end, tok_state_script_data_double_escape_start, tok_state_script_data_double_escaped, tok_state_script_data_double_escaped_dash, tok_state_script_data_double_escaped_dash_dash, tok_state_script_data_double_escaped_less_than_sign, tok_state_script_data_end_tag_name, tok_state_script_data_end_tag_open, tok_state_script_data_escape_start, tok_state_script_data_escape_start_dash, tok_state_script_data_escaped, tok_state_script_data_escaped_dash, tok_state_script_data_escaped_dash_dash, tok_state_script_data_escaped_end_tag_name, tok_state_script_data_escaped_end_tag_open, tok_state_script_data_escaped_less_than_sign, tok_state_script_data_less_than_sign, tok_state_self_closing_start_tag, tok_state_tag_name, tok_state_tag_open, token_to_element, txt
713 cur = null // index of next char in txt to be parsed
714 // declare doc and tokenizer variables so they're in scope below
716 open_els = null // stack of open elements
717 afe = null // active formatting elements
718 template_ins_modes = null
720 original_ins_mode = null
722 tok_cur_tag = null // partially parsed tag
723 flag_scripting = null
724 flag_frameset_ok = null
726 flag_foster_parenting = null
727 form_element_pointer = null
728 temporary_buffer = null
729 pending_table_character_tokens = null
730 head_element_pointer = null
731 flag_fragment_parsing = null
732 context_element = null
734 stop_parsing = function () {
738 parse_error = function () {
739 if (args.error_cb != null) {
744 // http://www.w3.org/TR/html5/syntax.html#push-onto-the-list-of-active-formatting-elements
745 // "Noah's Ark clause" but with three
746 afe_push = function (new_el) {
747 var attrs_match, el, i, j, k, matches, v
749 for (i = 0; i < afe.length; ++i) {
751 if (el.type === TYPE_AFE_MARKER) {
754 if (el.name === new_el.name && el.namespace === new_el.namespace) {
756 for (k in el.attrs) {
758 if (new_el.attrs[k] !== v) {
764 for (k in new_el.attrs) {
766 if (el.attrs[k] !== v) {
784 afe_push_marker = function () {
785 afe.unshift(new_afe_marker())
788 // the functions below impliment the Tree Contstruction algorithm
789 // http://www.w3.org/TR/html5/syntax.html#tree-construction
791 // But first... the helpers
792 template_tag_is_open = function () {
794 for (i = 0; i < open_els.length; ++i) {
796 if (el.name === 'template' && el.namespace === NS_HTML) {
802 is_in_scope_x = function (tag_name, scope, namespace) {
804 for (i = 0; i < open_els.length; ++i) {
806 if (el.name === tag_name && (namespace === null || namespace === el.namespace)) {
809 if (scope[el.name] === el.namespace) {
815 is_in_scope_x_y = function (tag_name, scope, scope2, namespace) {
817 for (i = 0; i < open_els.length; ++i) {
819 if (el.name === tag_name && (namespace === null || namespace === el.namespace)) {
822 if (scope[el.name] === el.namespace) {
825 if (scope2[el.name] === el.namespace) {
832 applet: NS_HTML, caption: NS_HTML, html: NS_HTML, table: NS_HTML,
833 td: NS_HTML, th: NS_HTML, marquee: NS_HTML, object: NS_HTML,
836 mi: NS_MATHML, mo: NS_MATHML, mn: NS_MATHML, ms: NS_MATHML,
837 mtext: NS_MATHML, 'annotation-xml': NS_MATHML,
839 foreignObject: NS_SVG, desc: NS_SVG, title: NS_SVG
841 button_scopers = { button: NS_HTML }
842 li_scopers = { ol: NS_HTML, ul: NS_HTML }
843 table_scopers = { html: NS_HTML, table: NS_HTML, template: NS_HTML }
844 is_in_scope = function (tag_name, namespace) {
845 if (namespace == null) {
848 return is_in_scope_x(tag_name, standard_scopers, namespace)
850 is_in_button_scope = function (tag_name, namespace) {
851 if (namespace == null) {
854 return is_in_scope_x_y(tag_name, standard_scopers, button_scopers, namespace)
856 is_in_table_scope = function (tag_name, namespace) {
857 if (namespace == null) {
860 return is_in_scope_x(tag_name, table_scopers, namespace)
862 // aka is_in_list_item_scope
863 is_in_li_scope = function (tag_name, namespace) {
864 if (namespace == null) {
867 return is_in_scope_x_y(tag_name, standard_scopers, li_scopers, namespace)
869 is_in_select_scope = function (tag_name, namespace) {
871 if (namespace == null) {
874 for (i = 0; i < open_els.length; ++i) {
876 if (t.name === tag_name && (namespace === null || namespace === t.namespace)) {
879 if (t.namespace !== NS_HTML && t.name !== 'optgroup' && t.name !== 'option') {
885 // this checks for a particular element, not by name
886 // this requires a namespace match
887 el_is_in_scope = function (needle) {
889 for (i = 0; i < open_els.length; ++i) {
894 if (standard_scopers[el.name] === el.namespace) {
901 clear_to_table_stopers = {
906 clear_stack_to_table_context = function () {
908 if (clear_to_table_stopers[open_els[0].name] != null) {
914 clear_to_table_body_stopers = {
921 clear_stack_to_table_body_context = function () {
923 if (clear_to_table_body_stopers[open_els[0].name] === open_els[0].namespace) {
929 clear_to_table_row_stopers = {
934 clear_stack_to_table_row_context = function () {
936 if (clear_to_table_row_stopers[open_els[0].name] != null) {
942 clear_afe_to_marker = function () {
945 if (!(afe.length > 0)) { // this happens in fragment case, ?spec error
949 if (el.type === TYPE_AFE_MARKER) {
956 // http://www.w3.org/TR/html5/syntax.html#reset-the-insertion-mode-appropriately
957 reset_ins_mode = function () {
958 var ancestor, ancestor_i, last, node, node_i
959 // 1. Let last be false.
961 // 2. Let node be the last node in the stack of open elements.
963 node = open_els[node_i]
964 // 3. Loop: If node is the first node in the stack of open elements,
965 // then set last to true, and, if the parser was originally created as
966 // part of the HTML fragment parsing algorithm (fragment case) set node
967 // to the context element.
969 if (node_i === open_els.length - 1) {
971 if (flag_fragment_parsing) {
972 node = context_element
975 // 4. If node is a select element, run these substeps:
976 if (node.name === 'select' && node.namespace === NS_HTML) {
977 // 1. If last is true, jump to the step below labeled done.
979 // 2. Let ancestor be node.
982 // 3. Loop: If ancestor is the first node in the stack of
983 // open elements, jump to the step below labeled done.
985 if (ancestor_i === open_els.length - 1) {
988 // 4. Let ancestor be the node before ancestor in the stack
991 ancestor = open_els[ancestor_i]
992 // 5. If ancestor is a template node, jump to the step below
994 if (ancestor.name === 'template' && ancestor.namespace === NS_HTML) {
997 // 6. If ancestor is a table node, switch the insertion mode
998 // to "in select in table" and abort these steps.
999 if (ancestor.name === 'table' && ancestor.namespace === NS_HTML) {
1000 ins_mode = ins_mode_in_select_in_table
1003 // 7. Jump back to the step labeled loop.
1006 // 8. Done: Switch the insertion mode to "in select" and abort
1008 ins_mode = ins_mode_in_select
1011 // 5. If node is a td or th element and last is false, then switch
1012 // the insertion mode to "in cell" and abort these steps.
1013 if ((node.name === 'td' || node.name === 'th') && node.namespace === NS_HTML && last === false) {
1014 ins_mode = ins_mode_in_cell
1017 // 6. If node is a tr element, then switch the insertion mode to "in
1018 // row" and abort these steps.
1019 if (node.name === 'tr' && node.namespace === NS_HTML) {
1020 ins_mode = ins_mode_in_row
1023 // 7. If node is a tbody, thead, or tfoot element, then switch the
1024 // insertion mode to "in table body" and abort these steps.
1025 if ((node.name === 'tbody' || node.name === 'thead' || node.name === 'tfoot') && node.namespace === NS_HTML) {
1026 ins_mode = ins_mode_in_table_body
1029 // 8. If node is a caption element, then switch the insertion mode
1030 // to "in caption" and abort these steps.
1031 if (node.name === 'caption' && node.namespace === NS_HTML) {
1032 ins_mode = ins_mode_in_caption
1035 // 9. If node is a colgroup element, then switch the insertion mode
1036 // to "in column group" and abort these steps.
1037 if (node.name === 'colgroup' && node.namespace === NS_HTML) {
1038 ins_mode = ins_mode_in_column_group
1041 // 10. If node is a table element, then switch the insertion mode to
1042 // "in table" and abort these steps.
1043 if (node.name === 'table' && node.namespace === NS_HTML) {
1044 ins_mode = ins_mode_in_table
1047 // 11. If node is a template element, then switch the insertion mode
1048 // to the current template insertion mode and abort these steps.
1049 if (node.name === 'template' && node.namespace === NS_HTML) {
1050 ins_mode = template_ins_modes[0]
1053 // 12. If node is a head element and last is true, then switch the
1054 // insertion mode to "in body" ("in body"! not "in head"!) and abort
1055 // these steps. (fragment case)
1056 if (node.name === 'head' && node.namespace === NS_HTML && last) {
1057 ins_mode = ins_mode_in_body
1060 // 13. If node is a head element and last is false, then switch the
1061 // insertion mode to "in head" and abort these steps.
1062 if (node.name === 'head' && node.namespace === NS_HTML && last === false) {
1063 ins_mode = ins_mode_in_head
1066 // 14. If node is a body element, then switch the insertion mode to
1067 // "in body" and abort these steps.
1068 if (node.name === 'body' && node.namespace === NS_HTML) {
1069 ins_mode = ins_mode_in_body
1072 // 15. If node is a frameset element, then switch the insertion mode
1073 // to "in frameset" and abort these steps. (fragment case)
1074 if (node.name === 'frameset' && node.namespace === NS_HTML) {
1075 ins_mode = ins_mode_in_frameset
1078 // 16. If node is an html element, run these substeps:
1079 if (node.name === 'html' && node.namespace === NS_HTML) {
1080 // 1. If the head element pointer is null, switch the insertion
1081 // mode to "before head" and abort these steps. (fragment case)
1082 if (head_element_pointer === null) {
1083 ins_mode = ins_mode_before_head
1085 // 2. Otherwise, the head element pointer is not null,
1086 // switch the insertion mode to "after head" and abort these
1088 ins_mode = ins_mode_after_head
1092 // 17. If last is true, then switch the insertion mode to "in body"
1093 // and abort these steps. (fragment case)
1095 ins_mode = ins_mode_in_body
1098 // 18. Let node now be the node before node in the stack of open
1101 node = open_els[node_i]
1102 // 19. Return to the step labeled loop.
1108 // http://www.w3.org/TR/html5/syntax.html#adjusted-current-node
1109 adjusted_current_node = function () {
1110 if (open_els.length === 1 && flag_fragment_parsing) {
1111 return context_element
1116 // http://www.w3.org/TR/html5/syntax.html#reconstruct-the-active-formatting-elements
1117 // this implementation is structured (mostly) as described at the link above.
1118 // capitalized comments are the "labels" described at the link above.
1119 reconstruct_afe = function () {
1121 if (afe.length === 0) {
1124 if (afe[0].type === TYPE_AFE_MARKER || open_els.indexOf(afe[0]) >= 0) {
1130 if (i === afe.length - 1) {
1134 if (afe[i].type === TYPE_AFE_MARKER || open_els.indexOf(afe[i]) >= 0) {
1141 el = insert_html_element(afe[i].token)
1150 // http://www.w3.org/TR/html5/syntax.html#adoption-agency-algorithm
1151 // adoption agency algorithm
1153 // http://www.w3.org/TR/html5/syntax.html#misnested-tags:-b-i-/b-/i
1154 // http://www.w3.org/TR/html5/syntax.html#misnested-tags:-b-p-/b-/p
1155 // http://www.w3.org/TR/html5/syntax.html#unclosed-formatting-elements
1156 adoption_agency = function (subject) {
1157 var aa, ab, ac, ad, ae, af, bookmark, c, ca, dest, el, fb, fb_of_open_els, fe, fe_of_afe, fe_of_open_els, i, in_afe, in_open_els, inner, j, l, last_node, len, len1, len10, len11, len12, len13, len14, len15, len16, len17, len2, len3, len4, len5, len6, len7, len8, len9, m, n, new_node, node, node_above, node_in_afe, node_next, o, outer, q, r, s, t, u, w, y, z
1158 // this block implements tha W3C spec
1159 // # 1. If the current node is an HTML element whose tag name is subject,
1160 // # then run these substeps:
1162 // # 1. Let element be the current node.
1164 // # 2. Pop element off the stack of open elements.
1166 // # 3. If element is also in the list of active formatting elements,
1167 // # remove the element from the list.
1169 // # 4. Abort the adoption agency algorithm.
1170 // if open_els[0].name is subject and open_els[0].namespace is NS_HTML
1171 // el = open_els.shift()
1172 // # remove it from the list of active formatting elements (if found)
1178 // WHATWG: https://html.spec.whatwg.org/multipage/syntax.html#adoption-agency-algorithm
1179 // If the current node is an HTML element whose tag name is subject, and
1180 // the current node is not in the list of active formatting elements,
1181 // then pop the current node off the stack of open elements, and abort
1183 if (open_els[0].name === subject && open_els[0].namespace === NS_HTML) {
1184 // remove it from the list of active formatting elements (if found)
1186 for (i = 0; i < afe.length; ++i) {
1188 if (el === open_els[0]) {
1206 // 5. Let formatting element be the last element in the list of
1207 // active formatting elements that: is between the end of the list
1208 // and the last scope marker in the list, if any, or the start of
1209 // the list otherwise, and has the tag name subject.
1211 for (fe_of_afe = 0; fe_of_afe < afe.length; ++fe_of_afe) {
1213 if (t.type === TYPE_AFE_MARKER) {
1216 if (t.name === subject) {
1221 // If there is no such element, then abort these steps and instead
1222 // act as described in the "any other end tag" entry above.
1224 in_body_any_other_end_tag(subject)
1227 // 6. If formatting element is not in the stack of open elements,
1228 // then this is a parse error; remove the element from the list, and
1229 // abort these steps.
1231 for (fe_of_open_els = 0; fe_of_open_els < open_els.length; ++fe_of_open_els) {
1232 t = open_els[fe_of_open_els]
1240 // "remove it from the list" must mean afe, since it's not in open_els
1241 afe.splice(fe_of_afe, 1)
1244 // 7. If formatting element is in the stack of open elements, but
1245 // the element is not in scope, then this is a parse error; abort
1247 if (!el_is_in_scope(fe)) {
1251 // 8. If formatting element is not the current node, this is a parse
1252 // error. (But do not abort these steps.)
1253 if (open_els[0] !== fe) {
1257 // 9. Let furthest block be the topmost node in the stack of open
1258 // elements that is lower in the stack than formatting element, and
1259 // is an element in the special category. There might not be one.
1261 fb_of_open_els = null
1262 for (i = 0; i < open_els.length; ++i) {
1267 if (el_is_special(t)) {
1270 // and continue, to see if there's one that's more "topmost"
1273 // 10. If there is no furthest block, then the UA must first pop all
1274 // the nodes from the bottom of the stack of open elements, from the
1275 // current node up to and including formatting element, then remove
1276 // formatting element from the list of active formatting elements,
1277 // and finally abort these steps.
1280 t = open_els.shift()
1282 afe.splice(fe_of_afe, 1)
1287 // 11. Let common ancestor be the element immediately above
1288 // formatting element in the stack of open elements.
1289 ca = open_els[fe_of_open_els + 1] // common ancestor
1291 node_above = open_els[fb_of_open_els + 1] // next node if node isn't in open_els anymore
1292 // 12. Let a bookmark note the position of formatting element in the list of active formatting elements relative to the elements on either side of it in the list.
1293 bookmark = new_aaa_bookmark()
1294 for (i = 0; i < afe.length; ++i) {
1297 afe.splice(i, 0, bookmark)
1301 node = last_node = fb
1305 // 3. Let node be the element immediately above node in the
1306 // stack of open elements, or if node is no longer in the stack
1307 // of open elements (e.g. because it got removed by this
1308 // algorithm), the element that was immediately above node in
1309 // the stack of open elements before node was removed.
1311 for (i = 0; i < open_els.length; ++i) {
1314 node_next = open_els[i + 1]
1318 node = node_next != null ? node_next : node_above
1319 // TODO make sure node_above gets re-set if/when node is removed from open_els
1321 // 4. If node is formatting element, then go to the next step in
1322 // the overall algorithm.
1326 // 5. If inner loop counter is greater than three and node is in
1327 // the list of active formatting elements, then remove node from
1328 // the list of active formatting elements.
1330 if ((i = afe.indexOf(node)) !== -1) {
1337 // 6. If node is not in the list of active formatting elements,
1338 // then remove node from the stack of open elements and then go
1339 // back to the step labeled inner loop.
1341 if ((i = open_els.indexOf(node)) !== -1) {
1342 node_above = open_els[i + 1]
1343 open_els.splice(i, 1)
1347 // 7. create an element for the token for which the element node
1348 // was created, in the HTML namespace, with common ancestor as
1349 // the intended parent; replace the entry for node in the list
1350 // of active formatting elements with an entry for the new
1351 // element, replace the entry for node in the stack of open
1352 // elements with an entry for the new element, and let node be
1354 new_node = token_to_element(node.token, NS_HTML, ca)
1355 if ((i = afe.indexOf(node)) !== -1) {
1358 if ((i = open_els.indexOf(node)) !== -1) {
1359 node_above = open_els[i + 1]
1360 open_els[i] = new_node
1363 // 8. If last node is furthest block, then move the
1364 // aforementioned bookmark to be immediately after the new node
1365 // in the list of active formatting elements.
1366 if (last_node === fb) {
1367 if ((i = afe.indexOf(bookmark)) !== -1) {
1370 if ((i = afe.indexOf(node)) !== -1) {
1371 // "after" means lower
1372 afe.splice(i, 0, bookmark) // "after as <-
1375 // 9. Insert last node into node, first removing it from its
1376 // previous parent node if any.
1377 if (last_node.parent != null) {
1378 if ((i = last_node.parent.children.indexOf(last_node)) !== -1) {
1379 last_node.parent.children.splice(i, 1)
1382 node.children.push(last_node)
1383 last_node.parent = node
1384 // 10. Let last node be node.
1386 // 11. Return to the step labeled inner loop.
1388 // 14. Insert whatever last node ended up being in the previous step
1389 // at the appropriate place for inserting a node, but using common
1390 // ancestor as the override target.
1392 // In the case where fe is immediately followed by fb:
1393 // * inner loop exits out early (node==fe)
1394 // * last_node is fb
1395 // * last_node is still in the tree (not a duplicate)
1396 if (last_node.parent != null) {
1397 if ((i = last_node.parent.children.indexOf(last_node)) !== -1) {
1398 last_node.parent.children.splice(i, 1)
1401 // can't use standard insert token thing, because it's already in
1402 // open_els and must stay at it's current position in open_els
1403 dest = adjusted_insertion_location(ca)
1404 dest[0].children.splice(dest[1], 0, last_node)
1405 last_node.parent = dest[0]
1406 // 15. Create an element for the token for which formatting element
1407 // was created, in the HTML namespace, with furthest block as the
1409 new_element = token_to_element(fe.token, NS_HTML, fb)
1410 // 16. Take all of the child nodes of furthest block and append them
1411 // to the element created in the last step.
1412 while (fb.children.length) {
1413 t = fb.children.shift()
1414 t.parent = new_element
1415 new_element.children.push(t)
1417 // 17. Append that new element to furthest block.
1418 new_element.parent = fb
1419 fb.children.push(new_element)
1420 // 18. Remove formatting element from the list of active formatting
1421 // elements, and insert the new element into the list of active
1422 // formatting elements at the position of the aforementioned
1424 if ((i = afe.indexOf(fe)) !== -1) {
1427 if ((i = afe.indexOf(bookmark)) !== -1) {
1428 afe[i] = new_element
1430 // 19. Remove formatting element from the stack of open elements,
1431 // and insert the new element into the stack of open elements
1432 // immediately below the position of furthest block in that stack.
1433 if ((i = open_els.indexOf(fe)) !== -1) {
1434 open_els.splice(i, 1)
1436 if ((i = open_els.indexOf(fb)) !== -1) {
1437 open_els.splice(i, 0, new_element)
1439 // 20. Jump back to the step labeled outer loop.
1443 // http://www.w3.org/TR/html5/syntax.html#close-a-p-element
1444 close_p_element = function () {
1445 generate_implied_end_tags('p') // arg is exception
1446 if (!(open_els[0].name === 'p' && open_els[0].namespace === NS_HTML)) {
1449 while (open_els.length > 1) { // just in case
1450 el = open_els.shift()
1451 if (el.name === 'p' && el.namespace === NS_HTML) {
1456 close_p_if_in_button_scope = function () {
1457 if (is_in_button_scope('p', NS_HTML)) {
1462 // http://www.w3.org/TR/html5/syntax.html#insert-a-character
1463 // aka insert_a_character = function (t) {
1464 insert_character = function (t) {
1466 dest = adjusted_insertion_location()
1467 // fixfull check for Document node
1469 prev = dest[0].children[dest[1] - 1]
1470 if (prev.type === TYPE_TEXT) {
1475 dest[0].children.splice(dest[1], 0, t)
1479 // 8.2.5 http://www.w3.org/TR/html5/syntax.html#tree-construction
1480 process_token = function (t) {
1482 acn = adjusted_current_node()
1487 if (acn.namespace === NS_HTML) {
1491 if (is_mathml_text_integration_point(acn)) {
1492 if (t.type === TYPE_START_TAG && !(t.name === 'mglyph' || t.name === 'malignmark')) {
1496 if (t.type === TYPE_TEXT) {
1501 if (acn.namespace === NS_MATHML && acn.name === 'annotation-xml' && t.type === TYPE_START_TAG && t.name === 'svg') {
1505 if (is_html_integration(acn)) {
1506 if (t.type === TYPE_START_TAG || t.type === TYPE_TEXT) {
1511 if (t.type === TYPE_EOF) {
1515 in_foreign_content(t)
1519 // http://www.w3.org/TR/html5/syntax.html#creating-and-inserting-nodes
1520 // http://www.w3.org/TR/html5/syntax.html#appropriate-place-for-inserting-a-node
1521 adjusted_insertion_location = function (override_target) {
1522 var c, el, i, j, l, last_table, last_table_i, last_template, last_template_i, len, len1, len2, m, previous_element, target, target_i
1523 // 1. If there was an override target specified, then let target be the
1525 if (override_target != null) {
1526 target = override_target
1527 } else { // Otherwise, let target be the current node.
1528 target = open_els[0]
1530 // 2. Determine the adjusted insertion location using the first matching
1531 // steps from the following list:
1533 // If foster parenting is enabled and target is a table, tbody, tfoot,
1534 // thead, or tr element Foster parenting happens when content is
1535 // misnested in tables.
1536 if (flag_foster_parenting && foster_parenting_targets[target.name] === target.namespace) {
1537 while (true) { // once. this is here so we can ``break`` to "abort these substeps"
1538 // 1. Let last template be the last template element in the
1539 // stack of open elements, if any.
1540 last_template = null
1541 last_template_i = null
1542 for (i = 0; i < open_els.length; ++i) {
1544 if (el.name === 'template' && el.namespace === NS_HTML) {
1550 // 2. Let last table be the last table element in the stack of
1551 // open elements, if any.
1554 for (i = 0; i < open_els.length; ++i) {
1556 if (el.name === 'table' && el.namespace === NS_HTML) {
1562 // 3. If there is a last template and either there is no last
1563 // table, or there is one, but last template is lower (more
1564 // recently added) than last table in the stack of open
1565 // elements, then: let adjusted insertion location be inside
1566 // last template's template contents, after its last child (if
1567 // any), and abort these substeps.
1568 if (last_template && (last_table === null || last_template_i < last_table_i)) {
1569 target = last_template // fixfull should be it's contents
1570 target_i = target.children.length
1573 // 4. If there is no last table, then let adjusted insertion
1574 // location be inside the first element in the stack of open
1575 // elements (the html element), after its last child (if any),
1576 // and abort these substeps. (fragment case)
1577 if (last_table === null) {
1579 target = open_els[open_els.length - 1]
1580 target_i = target.children.length
1583 // 5. If last table has a parent element, then let adjusted
1584 // insertion location be inside last table's parent element,
1585 // immediately before last table, and abort these substeps.
1586 if (last_table.parent != null) {
1587 for (i = 0; i < last_table.parent.children.length; ++i) {
1588 c = last_table.parent.children[i]
1589 if (c === last_table) {
1590 target = last_table.parent
1597 // 6. Let previous element be the element immediately above last
1598 // table in the stack of open elements.
1600 // huh? how could it not have a parent?
1601 previous_element = open_els[last_table_i + 1]
1602 // 7. Let adjusted insertion location be inside previous
1603 // element, after its last child (if any).
1604 target = previous_element
1605 target_i = target.children.length
1606 // Note: These steps are involved in part because it's possible
1607 // for elements, the table element in this case in particular,
1608 // to have been moved by a script around in the DOM, or indeed
1609 // removed from the DOM entirely, after the element was inserted
1611 break // don't really loop
1614 // Otherwise Let adjusted insertion location be inside target, after
1615 // its last child (if any).
1616 target_i = target.children.length
1619 // 3. If the adjusted insertion location is inside a template element,
1620 // let it instead be inside the template element's template contents,
1621 // after its last child (if any).
1622 // fixfull (template)
1624 // 4. Return the adjusted insertion location.
1625 return [target, target_i]
1628 // http://www.w3.org/TR/html5/syntax.html#create-an-element-for-the-token
1629 // aka create_an_element_for_token
1630 token_to_element = function (t, namespace, intended_parent) {
1632 // convert attributes into a hash
1634 for (i = 0; i < t.attrs_a.length; ++i) {
1636 attrs[a[0]] = a[1] // TODO check what to do with dupilcate attrs
1638 el = new Node(TYPE_TAG, {name: t.name, namespace: namespace, attrs: attrs, token: t})
1640 // TODO 2. If the newly created element has an xmlns attribute in the
1641 // XMLNS namespace whose value is not exactly the same as the element's
1642 // namespace, that is a parse error. Similarly, if the newly created
1643 // element has an xmlns:xlink attribute in the XMLNS namespace whose
1644 // value is not the XLink Namespace, that is a parse error.
1646 // fixfull: the spec says stuff about form pointers and ownerDocument
1651 // http://www.w3.org/TR/html5/syntax.html#insert-a-foreign-element
1652 insert_foreign_element = function (token, namespace) {
1653 var ail, ail_el, ail_i, el
1654 ail = adjusted_insertion_location()
1657 el = token_to_element(token, namespace, ail_el)
1658 // TODO skip this next step if it's broken (eg ail_el is document with child already)
1660 ail_el.children.splice(ail_i, 0, el)
1661 open_els.unshift(el)
1664 // http://www.w3.org/TR/html5/syntax.html#insert-an-html-element
1665 insert_html_element = function (token) {
1666 return insert_foreign_element(token, NS_HTML)
1669 // http://www.w3.org/TR/html5/syntax.html#insert-a-comment
1670 // position should be [node, index_within_children]
1671 insert_comment = function (t, position) {
1672 if (position == null) {
1673 position = adjusted_insertion_location()
1675 position[0].children.splice(position[1], 0, t)
1676 t.parent = position[0]
1681 // http://www.w3.org/TR/html5/syntax.html#generic-raw-text-element-parsing-algorithm
1682 parse_generic_raw_text = function (t) {
1683 insert_html_element(t)
1684 tok_state = tok_state_rawtext
1685 original_ins_mode = ins_mode
1686 ins_mode = ins_mode_text
1688 parse_generic_rcdata_text = function (t) {
1689 insert_html_element(t)
1690 tok_state = tok_state_rcdata
1691 original_ins_mode = ins_mode
1692 ins_mode = ins_mode_text
1695 // 8.2.5.3 http://www.w3.org/TR/html5/syntax.html#closing-elements-that-have-implied-end-tags
1696 // http://www.w3.org/TR/html5/syntax.html#generate-implied-end-tags
1697 generate_implied_end_tags = function (except) {
1698 if (except == null) {
1701 while (end_tag_implied[open_els[0].name] === open_els[0].namespace && open_els[0].name !== except) {
1706 // 8.2.5.4 The rules for parsing tokens in HTML content
1707 // http://www.w3.org/TR/html5/syntax.html#parsing-main-inhtml
1709 // 8.2.5.4.1 The "initial" insertion mode
1710 // http://www.w3.org/TR/html5/syntax.html#the-initial-insertion-mode
1711 is_quirks_yes_doctype = function (t) {
1713 if (t.flag('force-quirks')) {
1716 if (t.name !== 'html') {
1719 if (t.public_identifier != null) {
1720 pi = t.public_identifier.toLowerCase()
1721 for (i = 0; i < quirks_yes_pi_prefixes.length; ++i) {
1722 p = quirks_yes_pi_prefixes[i]
1723 if (pi.substr(0, p.length) === p) {
1727 if (pi === '-//w3o//dtd w3 html strict 3.0//en//' || pi === '-/w3c/dtd html 4.0 transitional/en' || pi === 'html') {
1731 if (t.system_identifier != null) {
1732 if (t.system_identifier.toLowerCase() === 'http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd') {
1735 } else if (t.public_identifier != null) {
1736 // already did this: pi = t.public_identifier.toLowerCase()
1737 if (pi.substr(0, 32) === '-//w3c//dtd html 4.01 frameset//' || pi.substr(0, 36) === '-//w3c//dtd html 4.01 transitional//') {
1743 is_quirks_limited_doctype = function (t) {
1745 if (t.public_identifier != null) {
1746 pi = t.public_identifier.toLowerCase()
1747 if (pi.substr(0, 32) === '-//w3c//dtd xhtml 1.0 frameset//' || pi.substr(0, 36) === '-//w3c//dtd xhtml 1.0 transitional//') {
1750 if (t.system_identifier != null) {
1751 if (pi.substr(0, 32) === '-//w3c//dtd html 4.01 frameset//' || pi.substr(0, 36) === '-//w3c//dtd html 4.01 transitional//') {
1758 ins_mode_initial = function (t) {
1759 if (is_space_tok(t)) {
1762 if (t.type === TYPE_COMMENT) {
1764 doc.children.push(t)
1767 if (t.type === TYPE_DOCTYPE) {
1768 // fixfull syntax error from first paragraph and following bullets
1769 // fixfull set doc.doctype
1770 // fixfull is the "not an iframe srcdoc" thing relevant?
1771 if (is_quirks_yes_doctype(t)) {
1772 doc.flag('quirks mode', QUIRKS_YES)
1773 } else if (is_quirks_limited_doctype(t)) {
1774 doc.flag('quirks mode', QUIRKS_LIMITED)
1776 doc.children.push(t)
1777 ins_mode = ins_mode_before_html
1781 // fixfull not iframe srcdoc?
1783 doc.flag('quirks mode', QUIRKS_YES)
1784 ins_mode = ins_mode_before_html
1788 // 8.2.5.4.2 http://www.w3.org/TR/html5/syntax.html#the-before-html-insertion-mode
1789 ins_mode_before_html = function (t) {
1790 if (t.type === TYPE_DOCTYPE) {
1794 if (t.type === TYPE_COMMENT) {
1795 doc.children.push(t)
1798 if (is_space_tok(t)) {
1801 if (t.type === TYPE_START_TAG && t.name === 'html') {
1802 el = token_to_element(t, NS_HTML, doc)
1803 doc.children.push(el)
1805 open_els.unshift(el)
1806 // fixfull (big paragraph in spec about manifest, fragment, urls, etc)
1807 ins_mode = ins_mode_before_head
1810 if (t.type === TYPE_END_TAG) {
1811 if (t.name === 'head' || t.name === 'body' || t.name === 'html' || t.name === 'br') {
1812 // fall through to "anything else"
1819 el = token_to_element(new_open_tag('html'), NS_HTML, doc)
1820 doc.children.push(el)
1822 open_els.unshift(el)
1823 // ?fixfull browsing context
1824 ins_mode = ins_mode_before_head
1828 // 8.2.5.4.3 http://www.w3.org/TR/html5/syntax.html#the-before-head-insertion-mode
1829 ins_mode_before_head = function (t) {
1831 if (is_space_tok(t)) {
1834 if (t.type === TYPE_COMMENT) {
1838 if (t.type === TYPE_DOCTYPE) {
1842 if (t.type === TYPE_START_TAG && t.name === 'html') {
1846 if (t.type === TYPE_START_TAG && t.name === 'head') {
1847 el = insert_html_element(t)
1848 head_element_pointer = el
1849 ins_mode = ins_mode_in_head
1852 if (t.type === TYPE_END_TAG) {
1853 if (t.name === 'head' || t.name === 'body' || t.name === 'html' || t.name === 'br') {
1854 // fall through to Anything else below
1861 el = insert_html_element(new_open_tag('head'))
1862 head_element_pointer = el
1863 ins_mode = ins_mode_in_head
1867 // 8.2.5.4.4 http://www.w3.org/TR/html5/syntax.html#parsing-main-inhead
1868 ins_mode_in_head_else = function (t) { // factored out for same-as-spec flow control
1869 open_els.shift() // spec says this will be a 'head' node
1870 ins_mode = ins_mode_after_head
1873 ins_mode_in_head = function (t) {
1875 if (t.type === TYPE_TEXT && (t.text === "\t" || t.text === "\n" || t.text === "\u000c" || t.text === ' ')) {
1879 if (t.type === TYPE_COMMENT) {
1883 if (t.type === TYPE_DOCTYPE) {
1887 if (t.type === TYPE_START_TAG && t.name === 'html') {
1891 if (t.type === TYPE_START_TAG && (t.name === 'base' || t.name === 'basefont' || t.name === 'bgsound' || t.name === 'link')) {
1892 el = insert_html_element(t)
1894 t.acknowledge_self_closing()
1897 if (t.type === TYPE_START_TAG && t.name === 'meta') {
1898 el = insert_html_element(t)
1900 t.acknowledge_self_closing()
1901 // fixfull encoding stuff
1904 if (t.type === TYPE_START_TAG && t.name === 'title') {
1905 parse_generic_rcdata_text(t)
1908 if (t.type === TYPE_START_TAG && ((t.name === 'noscript' && flag_scripting) || t.name === 'noframes' || t.name === 'style')) {
1909 parse_generic_raw_text(t)
1912 if (t.type === TYPE_START_TAG && t.name === 'noscript' && flag_scripting === false) {
1913 insert_html_element(t)
1914 ins_mode = ins_mode_in_head_noscript
1917 if (t.type === TYPE_START_TAG && t.name === 'script') {
1918 ail = adjusted_insertion_location()
1919 el = token_to_element(t, NS_HTML, ail)
1920 el.flag('parser-inserted', true)
1921 // fixfull frament case
1922 ail[0].children.splice(ail[1], 0, el)
1924 open_els.unshift(el)
1925 tok_state = tok_state_script_data
1926 original_ins_mode = ins_mode // make sure orig... is defined
1927 ins_mode = ins_mode_text
1930 if (t.type === TYPE_END_TAG && t.name === 'head') {
1931 open_els.shift() // will be a head element... spec says so
1932 ins_mode = ins_mode_after_head
1935 if (t.type === TYPE_END_TAG && (t.name === 'body' || t.name === 'html' || t.name === 'br')) {
1936 ins_mode_in_head_else(t)
1939 if (t.type === TYPE_START_TAG && t.name === 'template') {
1940 insert_html_element(t)
1942 flag_frameset_ok = false
1943 ins_mode = ins_mode_in_template
1944 template_ins_modes.unshift(ins_mode_in_template)
1947 if (t.type === TYPE_END_TAG && t.name === 'template') {
1948 if (template_tag_is_open()) {
1949 generate_implied_end_tags
1950 if (open_els[0].name !== 'template') {
1954 el = open_els.shift()
1955 if (el.name === 'template' && el.namespace === NS_HTML) {
1959 clear_afe_to_marker()
1960 template_ins_modes.shift()
1967 if ((t.type === TYPE_START_TAG && t.name === 'head') || t.type === TYPE_END_TAG) {
1971 ins_mode_in_head_else(t)
1974 // 8.2.5.4.5 http://www.w3.org/TR/html5/syntax.html#parsing-main-inheadnoscript
1975 ins_mode_in_head_noscript_else = function (t) {
1978 ins_mode = ins_mode_in_head
1981 ins_mode_in_head_noscript = function (t) {
1982 if (t.type === TYPE_DOCTYPE) {
1986 if (t.type === TYPE_START_TAG && t.name === 'html') {
1990 if (t.type === TYPE_END_TAG && t.name === 'noscript') {
1992 ins_mode = ins_mode_in_head
1995 if (is_space_tok(t) || t.type === TYPE_COMMENT || (t.type === TYPE_START_TAG && (t.name === 'basefont' || t.name === 'bgsound' || t.name === 'link' || t.name === 'meta' || t.name === 'noframes' || t.name === 'style'))) {
1999 if (t.type === TYPE_END_TAG && t.name === 'br') {
2000 ins_mode_in_head_noscript_else(t)
2003 if ((t.type === TYPE_START_TAG && (t.name === 'head' || t.name === 'noscript')) || t.type === TYPE_END_TAG) {
2008 ins_mode_in_head_noscript_else(t)
2011 // 8.2.5.4.6 http://www.w3.org/TR/html5/syntax.html#the-after-head-insertion-mode
2012 ins_mode_after_head_else = function (t) {
2014 body_tok = new_open_tag('body')
2015 insert_html_element(body_tok)
2016 ins_mode = ins_mode_in_body
2019 ins_mode_after_head = function (t) {
2021 if (is_space_tok(t)) {
2025 if (t.type === TYPE_COMMENT) {
2029 if (t.type === TYPE_DOCTYPE) {
2033 if (t.type === TYPE_START_TAG && t.name === 'html') {
2037 if (t.type === TYPE_START_TAG && t.name === 'body') {
2038 insert_html_element(t)
2039 flag_frameset_ok = false
2040 ins_mode = ins_mode_in_body
2043 if (t.type === TYPE_START_TAG && t.name === 'frameset') {
2044 insert_html_element(t)
2045 ins_mode = ins_mode_in_frameset
2048 if (t.type === TYPE_START_TAG && (t.name === 'base' || t.name === 'basefont' || t.name === 'bgsound' || t.name === 'link' || t.name === 'meta' || t.name === 'noframes' || t.name === 'script' || t.name === 'style' || t.name === 'template' || t.name === 'title')) {
2050 open_els.unshift(head_element_pointer)
2052 for (i = 0; i < open_els.length; ++i) {
2054 if (el === head_element_pointer) {
2055 open_els.splice(i, 1)
2061 if (t.type === TYPE_END_TAG && t.name === 'template') {
2065 if (t.type === TYPE_END_TAG && (t.name === 'body' || t.name === 'html' || t.name === 'br')) {
2066 ins_mode_after_head_else(t)
2069 if ((t.type === TYPE_START_TAG && t.name === 'head') || t.type === TYPE_END_TAG) {
2074 ins_mode_after_head_else(t)
2077 // 8.2.5.4.7 http://www.w3.org/TR/html5/syntax.html#parsing-main-inbody
2078 in_body_any_other_end_tag = function (name) { // factored out because adoption agency calls it
2082 if (node.name === name && node.namespace === NS_HTML) {
2083 generate_implied_end_tags(name) // arg is exception
2084 if (node !== open_els[0]) {
2088 el = open_els.shift()
2094 if (special_elements[node.name] === node.namespace) {
2098 for (i = 0; i < open_els.length; ++i) {
2101 node = open_els[i + 1]
2107 ins_mode_in_body = function (t) {
2108 var a, aa, ab, ac, el, found, h_in_scope, i, input_el, j, l, len, len1, len10, len11, len12, len13, len14, len2, len3, len4, len5, len6, len7, len8, len9, m, n, node, o, ok_tags, prompt, q, r, root_attrs, s, second, second_i, u, w, y, z
2109 if (t.type === TYPE_TEXT && t.text === "\u0000") {
2113 if (is_space_tok(t)) {
2118 if (t.type === TYPE_TEXT) {
2121 flag_frameset_ok = false
2124 if (t.type === TYPE_COMMENT) {
2128 if (t.type === TYPE_DOCTYPE) {
2132 if (t.type === TYPE_START_TAG && t.name === 'html') {
2134 if (template_tag_is_open()) {
2137 root_attrs = open_els[open_els.length - 1].attrs
2138 for (i = 0; i < t.attrs_a.length; ++i) {
2140 if (root_attrs[a[0]] == null) {
2141 root_attrs[a[0]] = a[1]
2147 if ((t.type === TYPE_START_TAG && (t.name === 'base' || t.name === 'basefont' || t.name === 'bgsound' || t.name === 'link' || t.name === 'meta' || t.name === 'noframes' || t.name === 'script' || t.name === 'style' || t.name === 'template' || t.name === 'title')) || (t.type === TYPE_END_TAG && t.name === 'template')) {
2151 if (t.type === TYPE_START_TAG && t.name === 'body') {
2153 if (open_els.length < 2) {
2156 second = open_els[open_els.length - 2]
2157 if (second.namespace !== NS_HTML) {
2160 if (second.name !== 'body') {
2163 if (template_tag_is_open()) {
2166 flag_frameset_ok = false
2167 for (i = 0; i < t.attrs_a.length; ++i) {
2169 if (second.attrs[a[0]] == null) {
2170 second.attrs[a[0]] = a[1]
2175 if (t.type === TYPE_START_TAG && t.name === 'frameset') {
2177 if (open_els.length < 2) {
2180 second_i = open_els.length - 2
2181 second = open_els[second_i]
2182 if (second.namespace !== NS_HTML) {
2185 if (second.name !== 'body') {
2188 if (flag_frameset_ok === false) {
2191 if (second.parent != null) {
2192 for (i = 0; i < second.parent.children.length; ++i) {
2193 el = second.parent.children[i]
2194 if (el === second) {
2195 second.parent.children.splice(i, 1)
2200 open_els.splice(second_i, 1)
2201 // pop everything except the "root html element"
2202 while (open_els.length > 1) {
2205 insert_html_element(t)
2206 ins_mode = ins_mode_in_frameset
2209 if (t.type === TYPE_EOF) {
2211 dd: NS_HTML, dt: NS_HTML, li: NS_HTML, p: NS_HTML, tbody: NS_HTML,
2212 td: NS_HTML, tfoot: NS_HTML, th: NS_HTML, thead: NS_HTML,
2213 tr: NS_HTML, body: NS_HTML, html: NS_HTML
2215 for (i = 0; i < open_els.length; ++i) {
2217 if (ok_tags[t.name] !== el.namespace) {
2222 if (template_ins_modes.length > 0) {
2223 ins_mode_in_template(t)
2229 if (t.type === TYPE_END_TAG && t.name === 'body') {
2230 if (!is_in_scope('body', NS_HTML)) {
2235 dd: NS_HTML, dt: NS_HTML, li: NS_HTML, optgroup: NS_HTML,
2236 option: NS_HTML, p: NS_HTML, rb: NS_HTML, rp: NS_HTML, rt: NS_HTML,
2237 rtc: NS_HTML, tbody: NS_HTML, td: NS_HTML, tfoot: NS_HTML,
2238 th: NS_HTML, thead: NS_HTML, tr: NS_HTML, body: NS_HTML,
2241 for (i = 0; i < open_els.length; ++i) {
2243 if (ok_tags[t.name] !== el.namespace) {
2248 ins_mode = ins_mode_after_body
2251 if (t.type === TYPE_END_TAG && t.name === 'html') {
2252 if (!is_in_scope('body', NS_HTML)) {
2257 dd: NS_HTML, dt: NS_HTML, li: NS_HTML, optgroup: NS_HTML,
2258 option: NS_HTML, p: NS_HTML, rb: NS_HTML, rp: NS_HTML, rt: NS_HTML,
2259 rtc: NS_HTML, tbody: NS_HTML, td: NS_HTML, tfoot: NS_HTML,
2260 th: NS_HTML, thead: NS_HTML, tr: NS_HTML, body: NS_HTML,
2263 for (i = 0; i < open_els.length; ++i) {
2265 if (ok_tags[t.name] !== el.namespace) {
2270 ins_mode = ins_mode_after_body
2274 if (t.type === TYPE_START_TAG && (t.name === 'address' || t.name === 'article' || t.name === 'aside' || t.name === 'blockquote' || t.name === 'center' || t.name === 'details' || t.name === 'dialog' || t.name === 'dir' || t.name === 'div' || t.name === 'dl' || t.name === 'fieldset' || t.name === 'figcaption' || t.name === 'figure' || t.name === 'footer' || t.name === 'header' || t.name === 'hgroup' || t.name === 'main' || t.name === 'nav' || t.name === 'ol' || t.name === 'p' || t.name === 'section' || t.name === 'summary' || t.name === 'ul')) {
2275 close_p_if_in_button_scope()
2276 insert_html_element(t)
2279 if (t.type === TYPE_START_TAG && (h_tags[t.name] != null)) { // extra perens because Coffeescript did that
2280 close_p_if_in_button_scope()
2281 if (h_tags[open_els[0].name] === open_els[0].namespace) {
2285 insert_html_element(t)
2288 if (t.type === TYPE_START_TAG && (t.name === 'pre' || t.name === 'listing')) {
2289 close_p_if_in_button_scope()
2290 insert_html_element(t)
2291 eat_next_token_if_newline()
2292 flag_frameset_ok = false
2295 if (t.type === TYPE_START_TAG && t.name === 'form') {
2296 if (!(form_element_pointer === null || template_tag_is_open())) {
2300 close_p_if_in_button_scope()
2301 el = insert_html_element(t)
2302 if (!template_tag_is_open()) {
2303 form_element_pointer = el
2307 if (t.type === TYPE_START_TAG && t.name === 'li') {
2308 flag_frameset_ok = false
2309 for (i = 0; i < open_els.length; ++i) {
2311 if (node.name === 'li' && node.namespace === NS_HTML) {
2312 generate_implied_end_tags('li') // arg is exception
2313 if (open_els[0].name !== 'li' || open_els[0].namespace !== NS_HTML) {
2317 el = open_els.shift()
2318 if (el.name === 'li' && el.namespace === NS_HTML) {
2324 if (el_is_special_not_adp(node)) {
2328 close_p_if_in_button_scope()
2329 insert_html_element(t)
2332 if (t.type === TYPE_START_TAG && (t.name === 'dd' || t.name === 'dt')) {
2333 flag_frameset_ok = false
2334 for (i = 0; i < open_els.length; ++i) {
2336 if (node.name === 'dd' && node.namespace === NS_HTML) {
2337 generate_implied_end_tags('dd') // arg is exception
2338 if (open_els[0].name !== 'dd' || open_els[0].namespace !== NS_HTML) {
2342 el = open_els.shift()
2343 if (el.name === 'dd' && el.namespace === NS_HTML) {
2349 if (node.name === 'dt' && node.namespace === NS_HTML) {
2350 generate_implied_end_tags('dt') // arg is exception
2351 if (open_els[0].name !== 'dt' || open_els[0].namespace !== NS_HTML) {
2355 el = open_els.shift()
2356 if (el.name === 'dt' && el.namespace === NS_HTML) {
2362 if (el_is_special_not_adp(node)) {
2366 close_p_if_in_button_scope()
2367 insert_html_element(t)
2370 if (t.type === TYPE_START_TAG && t.name === 'plaintext') {
2371 close_p_if_in_button_scope()
2372 insert_html_element(t)
2373 tok_state = tok_state_plaintext
2376 if (t.type === TYPE_START_TAG && t.name === 'button') {
2377 if (is_in_scope('button', NS_HTML)) {
2379 generate_implied_end_tags()
2381 el = open_els.shift()
2382 if (el.name === 'button' && el.namespace === NS_HTML) {
2388 insert_html_element(t)
2389 flag_frameset_ok = false
2392 if (t.type === TYPE_END_TAG && (t.name === 'address' || t.name === 'article' || t.name === 'aside' || t.name === 'blockquote' || t.name === 'button' || t.name === 'center' || t.name === 'details' || t.name === 'dialog' || t.name === 'dir' || t.name === 'div' || t.name === 'dl' || t.name === 'fieldset' || t.name === 'figcaption' || t.name === 'figure' || t.name === 'footer' || t.name === 'header' || t.name === 'hgroup' || t.name === 'listing' || t.name === 'main' || t.name === 'nav' || t.name === 'ol' || t.name === 'pre' || t.name === 'section' || t.name === 'summary' || t.name === 'ul')) {
2393 if (!is_in_scope(t.name, NS_HTML)) {
2397 generate_implied_end_tags()
2398 if (!(open_els[0].name === t.name && open_els[0].namespace === NS_HTML)) {
2402 el = open_els.shift()
2403 if (el.name === t.name && el.namespace === NS_HTML) {
2409 if (t.type === TYPE_END_TAG && t.name === 'form') {
2410 if (!template_tag_is_open()) {
2411 node = form_element_pointer
2412 form_element_pointer = null
2413 if (node === null || !el_is_in_scope(node)) {
2417 generate_implied_end_tags()
2418 if (open_els[0] !== node) {
2421 for (i = 0; i < open_els.length; ++i) {
2424 open_els.splice(i, 1)
2429 if (!is_in_scope('form', NS_HTML)) {
2433 generate_implied_end_tags()
2434 if (open_els[0].name !== 'form' || open_els[0].namespace !== NS_HTML) {
2438 el = open_els.shift()
2439 if (el.name === 'form' && el.namespace === NS_HTML) {
2446 if (t.type === TYPE_END_TAG && t.name === 'p') {
2447 if (!is_in_button_scope('p', NS_HTML)) {
2449 insert_html_element(new_open_tag('p'))
2454 if (t.type === TYPE_END_TAG && t.name === 'li') {
2455 if (!is_in_li_scope('li', NS_HTML)) {
2459 generate_implied_end_tags('li') // arg is exception
2460 if (open_els[0].name !== 'li' || open_els[0].namespace !== NS_HTML) {
2464 el = open_els.shift()
2465 if (el.name === 'li' && el.namespace === NS_HTML) {
2471 if (t.type === TYPE_END_TAG && (t.name === 'dd' || t.name === 'dt')) {
2472 if (!is_in_scope(t.name, NS_HTML)) {
2476 generate_implied_end_tags(t.name) // arg is exception
2477 if (open_els[0].name !== t.name || open_els[0].namespace !== NS_HTML) {
2481 el = open_els.shift()
2482 if (el.name === t.name && el.namespace === NS_HTML) {
2488 if (t.type === TYPE_END_TAG && (h_tags[t.name] != null)) { // extra perens because Coffeescript did
2490 for (i = 0; i < open_els.length; ++i) {
2492 if (h_tags[el.name] === el.namespace) {
2496 if (standard_scopers[el.name] === el.namespace) {
2504 generate_implied_end_tags()
2505 if (open_els[0].name !== t.name || open_els[0].namespace !== NS_HTML) {
2509 el = open_els.shift()
2510 if (h_tags[el.name] === el.namespace) {
2517 if (t.type === TYPE_START_TAG && t.name === 'a') {
2518 // If the list of active formatting elements contains an a element
2519 // between the end of the list and the last marker on the list (or
2520 // the start of the list if there is no marker on the list), then
2521 // this is a parse error; run the adoption agency algorithm for the
2522 // tag name "a", then remove that element from the list of active
2523 // formatting elements and the stack of open elements if the
2524 // adoption agency algorithm didn't already remove it (it might not
2525 // have if the element is not in table scope).
2527 for (i = 0; i < afe.length; ++i) {
2529 if (el.type === TYPE_AFE_MARKER) {
2532 if (el.name === 'a' && el.namespace === NS_HTML) {
2536 if (found != null) {
2538 adoption_agency('a')
2539 for (i = 0; i < afe.length; ++i) {
2545 for (i = 0; i < open_els.length; ++i) {
2548 open_els.splice(i, 1)
2553 el = insert_html_element(t)
2557 if (t.type === TYPE_START_TAG && (t.name === 'b' || t.name === 'big' || t.name === 'code' || t.name === 'em' || t.name === 'font' || t.name === 'i' || t.name === 's' || t.name === 'small' || t.name === 'strike' || t.name === 'strong' || t.name === 'tt' || t.name === 'u')) {
2559 el = insert_html_element(t)
2563 if (t.type === TYPE_START_TAG && t.name === 'nobr') {
2565 if (is_in_scope('nobr', NS_HTML)) {
2567 adoption_agency('nobr')
2570 el = insert_html_element(t)
2574 if (t.type === TYPE_END_TAG && (t.name === 'a' || t.name === 'b' || t.name === 'big' || t.name === 'code' || t.name === 'em' || t.name === 'font' || t.name === 'i' || t.name === 'nobr' || t.name === 's' || t.name === 'small' || t.name === 'strike' || t.name === 'strong' || t.name === 'tt' || t.name === 'u')) {
2575 adoption_agency(t.name)
2578 if (t.type === TYPE_START_TAG && (t.name === 'applet' || t.name === 'marquee' || t.name === 'object')) {
2580 insert_html_element(t)
2582 flag_frameset_ok = false
2585 if (t.type === TYPE_END_TAG && (t.name === 'applet' || t.name === 'marquee' || t.name === 'object')) {
2586 if (!is_in_scope(t.name, NS_HTML)) {
2590 generate_implied_end_tags()
2591 if (open_els[0].name !== t.name || open_els[0].namespace !== NS_HTML) {
2595 el = open_els.shift()
2596 if (el.name === t.name && el.namespace === NS_HTML) {
2600 clear_afe_to_marker()
2603 if (t.type === TYPE_START_TAG && t.name === 'table') {
2604 if (doc.flag('quirks mode') !== QUIRKS_YES) {
2605 close_p_if_in_button_scope() // test
2607 insert_html_element(t)
2608 flag_frameset_ok = false
2609 ins_mode = ins_mode_in_table
2612 if (t.type === TYPE_END_TAG && t.name === 'br') {
2614 // W3C: t.type = TYPE_START_TAG
2615 t = new_open_tag('br') // WHATWG
2618 if (t.type === TYPE_START_TAG && (t.name === 'area' || t.name === 'br' || t.name === 'embed' || t.name === 'img' || t.name === 'keygen' || t.name === 'wbr')) {
2620 insert_html_element(t)
2622 t.acknowledge_self_closing()
2623 flag_frameset_ok = false
2626 if (t.type === TYPE_START_TAG && t.name === 'input') {
2628 insert_html_element(t)
2630 t.acknowledge_self_closing()
2631 if (!is_input_hidden_tok(t)) {
2632 flag_frameset_ok = false
2636 if (t.type === TYPE_START_TAG && (t.name === 'menuitem' || t.name === 'param' || t.name === 'source' || t.name === 'track')) {
2637 // WHATWG adds 'menuitem' for this block
2638 insert_html_element(t)
2640 t.acknowledge_self_closing()
2643 if (t.type === TYPE_START_TAG && t.name === 'hr') {
2644 close_p_if_in_button_scope()
2645 insert_html_element(t)
2647 t.acknowledge_self_closing()
2648 flag_frameset_ok = false
2651 if (t.type === TYPE_START_TAG && t.name === 'image') {
2657 if (t.type === TYPE_START_TAG && t.name === 'isindex') {
2659 if (template_tag_is_open() === false && form_element_pointer !== null) {
2662 t.acknowledge_self_closing()
2663 flag_frameset_ok = false
2664 close_p_if_in_button_scope()
2665 el = insert_html_element(new_open_tag('form'))
2666 if (!template_tag_is_open()) {
2667 form_element_pointer = el
2669 for (i = 0; i < t.attrs_a.length; ++i) {
2671 if (a[0] === 'action') {
2672 el.attrs['action'] = a[1]
2676 insert_html_element(new_open_tag('hr'))
2679 insert_html_element(new_open_tag('label'))
2680 // note: this is a little out-of-spec-order so we only have to scan t.attrs_a once
2681 input_el = new_open_tag('input')
2683 for (i = 0; i < t.attrs_a.length; ++i) {
2685 if (a[0] === 'prompt') {
2688 if (a[0] !== 'name' && a[0] !== 'action' && a[0] !== 'prompt') {
2689 input_el.attrs_a.push([a[0], a[1]])
2692 input_el.attrs_a.push(['name', 'isindex'])
2693 // fixfull this next bit is in english... internationalize?
2694 if (prompt == null) {
2695 prompt = "This is a searchable index. Enter search keywords: "
2697 insert_character(new_character_token(prompt)) // fixfull split
2698 // TODO submit typo "balue" in spec
2699 insert_html_element(input_el)
2701 // insert_character('') // you can put chars here if prompt attr missing
2703 insert_html_element(new_open_tag('hr'))
2706 if (!template_tag_is_open()) {
2707 form_element_pointer = null
2711 if (t.type === TYPE_START_TAG && t.name === 'textarea') {
2712 insert_html_element(t)
2713 eat_next_token_if_newline()
2714 tok_state = tok_state_rcdata
2715 original_ins_mode = ins_mode
2716 flag_frameset_ok = false
2717 ins_mode = ins_mode_text
2720 if (t.type === TYPE_START_TAG && t.name === 'xmp') {
2721 close_p_if_in_button_scope()
2723 flag_frameset_ok = false
2724 parse_generic_raw_text(t)
2727 if (t.type === TYPE_START_TAG && t.name === 'iframe') {
2728 flag_frameset_ok = false
2729 parse_generic_raw_text(t)
2732 if (t.type === TYPE_START_TAG && (t.name === 'noembed' || (t.name === 'noscript' && flag_scripting))) {
2733 parse_generic_raw_text(t)
2736 if (t.type === TYPE_START_TAG && t.name === 'select') {
2738 insert_html_element(t)
2739 flag_frameset_ok = false
2740 if (ins_mode === ins_mode_in_table || ins_mode === ins_mode_in_caption || ins_mode === ins_mode_in_table_body || ins_mode === ins_mode_in_row || ins_mode === ins_mode_in_cell) {
2741 ins_mode = ins_mode_in_select_in_table
2743 ins_mode = ins_mode_in_select
2747 if (t.type === TYPE_START_TAG && (t.name === 'optgroup' || t.name === 'option')) {
2748 if (open_els[0].name === 'option' && open_els[0].namespace === NS_HTML) {
2752 insert_html_element(t)
2755 // this comment block implements the W3C spec
2756 // if t.type === TYPE_START_TAG && (t.name === 'rb' || t.name === 'rp' || t.name === 'rtc')
2757 // if is_in_scope 'ruby', NS_HTML
2758 // generate_implied_end_tags()
2759 // unless open_els[0].name === 'ruby' && open_els[0].namespace === NS_HTML
2761 // insert_html_element t
2763 // if t.type === TYPE_START_TAG && t.name === 'rt'
2764 // if is_in_scope 'ruby', NS_HTML
2765 // generate_implied_end_tags 'rtc' // arg === exception
2766 // unless (open_els[0].name === 'ruby' || open_els[0].name === 'rtc') && open_els[0].namespace === NS_HTML
2768 // insert_html_element t
2770 // below implements the WHATWG spec https://html.spec.whatwg.org/multipage/syntax.html#parsing-main-inbody
2771 if (t.type === TYPE_START_TAG && (t.name === 'rb' || t.name === 'rtc')) {
2772 if (is_in_scope('ruby', NS_HTML)) {
2773 generate_implied_end_tags()
2774 if (!(open_els[0].name === 'ruby' && open_els[0].namespace === NS_HTML)) {
2778 insert_html_element(t)
2781 if (t.type === TYPE_START_TAG && (t.name === 'rp' || t.name === 'rt')) {
2782 if (is_in_scope('ruby', NS_HTML)) {
2783 generate_implied_end_tags('rtc')
2784 if (!((open_els[0].name === 'ruby' || open_els[0].name === 'rtc') && open_els[0].namespace === NS_HTML)) {
2788 insert_html_element(t)
2792 if (t.type === TYPE_START_TAG && t.name === 'math') {
2794 adjust_mathml_attributes(t)
2795 adjust_foreign_attributes(t)
2796 insert_foreign_element(t, NS_MATHML)
2797 if (t.flag('self-closing')) {
2799 t.acknowledge_self_closing()
2803 if (t.type === TYPE_START_TAG && t.name === 'svg') {
2805 adjust_svg_attributes(t)
2806 adjust_foreign_attributes(t)
2807 insert_foreign_element(t, NS_SVG)
2808 if (t.flag('self-closing')) {
2810 t.acknowledge_self_closing()
2814 if (t.type === TYPE_START_TAG && (t.name === 'caption' || t.name === 'col' || t.name === 'colgroup' || t.name === 'frame' || t.name === 'head' || t.name === 'tbody' || t.name === 'td' || t.name === 'tfoot' || t.name === 'th' || t.name === 'thead' || t.name === 'tr')) {
2818 if (t.type === TYPE_START_TAG) { // any other start tag
2820 insert_html_element(t)
2823 if (t.type === TYPE_END_TAG) { // any other end tag
2824 in_body_any_other_end_tag(t.name)
2829 // 8.2.5.4.8 http://www.w3.org/TR/html5/syntax.html#parsing-main-incdata
2830 ins_mode_text = function (t) {
2831 if (t.type === TYPE_TEXT) {
2835 if (t.type === TYPE_EOF) {
2837 if (open_els[0].name === 'script' && open_els[0].namespace === NS_HTML) {
2838 open_els[0].flag('already started', true)
2841 ins_mode = original_ins_mode
2845 if (t.type === TYPE_END_TAG && t.name === 'script') {
2847 ins_mode = original_ins_mode
2848 // fixfull the spec seems to assume that I'm going to run the script
2849 // http://www.w3.org/TR/html5/syntax.html#scriptEndTag
2852 if (t.type === TYPE_END_TAG) {
2854 ins_mode = original_ins_mode
2859 // the functions below implement the tokenizer stats described here:
2860 // http://www.w3.org/TR/html5/syntax.html#tokenization
2862 // 8.2.5.4.9 http://www.w3.org/TR/html5/syntax.html#parsing-main-intable
2863 ins_mode_in_table_else = function (t) {
2865 flag_foster_parenting = true
2867 flag_foster_parenting = false
2869 ins_mode_in_table = function (t) {
2873 if ((open_els[0].name === 'table' || open_els[0].name === 'tbody' || open_els[0].name === 'tfoot' || open_els[0].name === 'thead' || open_els[0].name === 'tr') && open_els[0].namespace === NS_HTML) {
2874 pending_table_character_tokens = []
2875 original_ins_mode = ins_mode
2876 ins_mode = ins_mode_in_table_text
2879 ins_mode_in_table_else(t)
2888 case TYPE_START_TAG:
2891 clear_stack_to_table_context()
2893 insert_html_element(t)
2894 ins_mode = ins_mode_in_caption
2897 clear_stack_to_table_context()
2898 insert_html_element(t)
2899 ins_mode = ins_mode_in_column_group
2902 clear_stack_to_table_context()
2903 insert_html_element(new_open_tag('colgroup'))
2904 ins_mode = ins_mode_in_column_group
2910 clear_stack_to_table_context()
2911 insert_html_element(t)
2912 ins_mode = ins_mode_in_table_body
2917 clear_stack_to_table_context()
2918 insert_html_element(new_open_tag('tbody'))
2919 ins_mode = ins_mode_in_table_body
2924 if (is_in_table_scope('table', NS_HTML)) {
2926 el = open_els.shift()
2927 if (el.name === 'table' && el.namespace === NS_HTML) {
2941 if (!is_input_hidden_tok(t)) {
2942 ins_mode_in_table_else(t)
2945 el = insert_html_element(t)
2947 t.acknowledge_self_closing()
2952 if (form_element_pointer != null) {
2955 if (template_tag_is_open()) {
2958 form_element_pointer = insert_html_element(t)
2962 ins_mode_in_table_else(t)
2968 if (is_in_table_scope('table', NS_HTML)) {
2970 el = open_els.shift()
2971 if (el.name === 'table' && el.namespace === NS_HTML) {
2997 ins_mode_in_table_else(t)
3004 ins_mode_in_table_else(t)
3008 // 8.2.5.4.10 http://www.w3.org/TR/html5/syntax.html#parsing-main-intabletext
3009 ins_mode_in_table_text = function (t) {
3010 var all_space, i, l, m, old
3011 if (t.type === TYPE_TEXT && t.text === "\u0000") {
3016 if (t.type === TYPE_TEXT) {
3017 pending_table_character_tokens.push(t)
3022 for (i = 0; i < pending_table_character_tokens.length; ++i) {
3023 old = pending_table_character_tokens[i]
3024 if (!is_space_tok(old)) {
3030 for (i = 0; i < pending_table_character_tokens.length; ++i) {
3031 old = pending_table_character_tokens[i]
3032 insert_character(old)
3035 for (i = 0; i < pending_table_character_tokens.length; ++i) {
3036 old = pending_table_character_tokens[i]
3037 ins_mode_in_table_else(old)
3040 pending_table_character_tokens = []
3041 ins_mode = original_ins_mode
3045 // 8.2.5.4.11 http://www.w3.org/TR/html5/syntax.html#parsing-main-incaption
3046 ins_mode_in_caption = function (t) {
3048 if (t.type === TYPE_END_TAG && t.name === 'caption') {
3049 if (is_in_table_scope('caption', NS_HTML)) {
3050 generate_implied_end_tags()
3051 if (open_els[0].name !== 'caption') {
3055 el = open_els.shift()
3056 if (el.name === 'caption' && el.namespace === NS_HTML) {
3060 clear_afe_to_marker()
3061 ins_mode = ins_mode_in_table
3068 if ((t.type === TYPE_START_TAG && (t.name === 'caption' || t.name === 'col' || t.name === 'colgroup' || t.name === 'tbody' || t.name === 'td' || t.name === 'tfoot' || t.name === 'th' || t.name === 'thead' || t.name === 'tr')) || t.type === TYPE_END_TAG && t.name === 'table') {
3070 if (is_in_table_scope('caption', NS_HTML)) {
3072 el = open_els.shift()
3073 if (el.name === 'caption' && el.namespace === NS_HTML) {
3077 clear_afe_to_marker()
3078 ins_mode = ins_mode_in_table
3081 // else fragment case
3084 if (t.type === TYPE_END_TAG && (t.name === 'body' || t.name === 'col' || t.name === 'colgroup' || t.name === 'html' || t.name === 'tbody' || t.name === 'td' || t.name === 'tfoot' || t.name === 'th' || t.name === 'thead' || t.name === 'tr')) {
3092 // 8.2.5.4.12 http://www.w3.org/TR/html5/syntax.html#parsing-main-incolgroup
3093 ins_mode_in_column_group = function (t) {
3095 if (is_space_tok(t)) {
3099 if (t.type === TYPE_COMMENT) {
3103 if (t.type === TYPE_DOCTYPE) {
3107 if (t.type === TYPE_START_TAG && t.name === 'html') {
3111 if (t.type === TYPE_START_TAG && t.name === 'col') {
3112 el = insert_html_element(t)
3114 t.acknowledge_self_closing()
3117 if (t.type === TYPE_END_TAG && t.name === 'colgroup') {
3118 if (open_els[0].name === 'colgroup' && open_els.namespace === NS_HTML) {
3120 ins_mode = ins_mode_in_table
3126 if (t.type === TYPE_END_TAG && t.name === 'col') {
3130 if ((t.type === TYPE_START_TAG || t.type === TYPE_END_TAG) && t.name === 'template') {
3134 if (t.type === TYPE_EOF) {
3139 if (open_els[0].name !== 'colgroup') {
3144 ins_mode = ins_mode_in_table
3148 // 8.2.5.4.13 http://www.w3.org/TR/html5/syntax.html#parsing-main-intbody
3149 ins_mode_in_table_body = function (t) {
3151 if (t.type === TYPE_START_TAG && t.name === 'tr') {
3152 clear_stack_to_table_body_context()
3153 insert_html_element(t)
3154 ins_mode = ins_mode_in_row
3157 if (t.type === TYPE_START_TAG && (t.name === 'th' || t.name === 'td')) {
3159 clear_stack_to_table_body_context()
3160 insert_html_element(new_open_tag('tr'))
3161 ins_mode = ins_mode_in_row
3165 if (t.type === TYPE_END_TAG && (t.name === 'tbody' || t.name === 'tfoot' || t.name === 'thead')) {
3166 if (!is_in_table_scope(t.name, NS_HTML)) {
3170 clear_stack_to_table_body_context()
3172 ins_mode = ins_mode_in_table
3175 if ((t.type === TYPE_START_TAG && (t.name === 'caption' || t.name === 'col' || t.name === 'colgroup' || t.name === 'tbody' || t.name === 'tfoot' || t.name === 'thead')) || (t.type === TYPE_END_TAG && t.name === 'table')) {
3177 for (i = 0; i < open_els.length; ++i) {
3179 if (el.namespace === NS_HTML && (el.name === 'tbody' || el.name === 'tfoot' || el.name === 'thead')) {
3183 if (table_scopers[el.name] === el.namespace) {
3191 clear_stack_to_table_body_context()
3193 ins_mode = ins_mode_in_table
3197 if (t.type === TYPE_END_TAG && (t.name === 'body' || t.name === 'caption' || t.name === 'col' || t.name === 'colgroup' || t.name === 'html' || t.name === 'td' || t.name === 'th' || t.name === 'tr')) {
3202 ins_mode_in_table(t)
3205 // 8.2.5.4.14 http://www.w3.org/TR/html5/syntax.html#parsing-main-intr
3206 ins_mode_in_row = function (t) {
3207 if (t.type === TYPE_START_TAG && (t.name === 'th' || t.name === 'td')) {
3208 clear_stack_to_table_row_context()
3209 insert_html_element(t)
3210 ins_mode = ins_mode_in_cell
3214 if (t.type === TYPE_END_TAG && t.name === 'tr') {
3215 if (is_in_table_scope('tr', NS_HTML)) {
3216 clear_stack_to_table_row_context()
3218 ins_mode = ins_mode_in_table_body
3224 if ((t.type === TYPE_START_TAG && (t.name === 'caption' || t.name === 'col' || t.name === 'colgroup' || t.name === 'tbody' || t.name === 'tfoot' || t.name === 'thead' || t.name === 'tr')) || t.type === TYPE_END_TAG && t.name === 'table') {
3225 if (is_in_table_scope('tr', NS_HTML)) {
3226 clear_stack_to_table_row_context()
3228 ins_mode = ins_mode_in_table_body
3235 if (t.type === TYPE_END_TAG && (t.name === 'tbody' || t.name === 'tfoot' || t.name === 'thead')) {
3236 if (is_in_table_scope(t.name, NS_HTML)) {
3237 if (is_in_table_scope('tr', NS_HTML)) {
3238 clear_stack_to_table_row_context()
3240 ins_mode = ins_mode_in_table_body
3248 if (t.type === TYPE_END_TAG && (t.name === 'body' || t.name === 'caption' || t.name === 'col' || t.name === 'colgroup' || t.name === 'html' || t.name === 'td' || t.name === 'th')) {
3253 ins_mode_in_table(t)
3256 // http://www.w3.org/TR/html5/syntax.html#close-the-cell
3257 close_the_cell = function () {
3259 generate_implied_end_tags()
3260 if (!((open_els[0].name === 'td' || open_els[0] === 'th') && open_els[0].namespace === NS_HTML)) {
3264 el = open_els.shift()
3265 if (el.namespace === NS_HTML && (el.name === 'td' || el.name === 'th')) {
3269 clear_afe_to_marker()
3270 ins_mode = ins_mode_in_row
3273 // 8.2.5.4.15 http://www.w3.org/TR/html5/syntax.html#parsing-main-intd
3274 ins_mode_in_cell = function (t) {
3276 if (t.type === TYPE_END_TAG && (t.name === 'td' || t.name === 'th')) {
3277 if (is_in_table_scope(t.name, NS_HTML)) {
3278 generate_implied_end_tags()
3279 if (!((open_els[0].name === t.name) && open_els[0].namespace === NS_HTML)) {
3283 el = open_els.shift()
3284 if (el.name === t.name && el.namespace === NS_HTML) {
3288 clear_afe_to_marker()
3289 ins_mode = ins_mode_in_row
3295 if (t.type === TYPE_START_TAG && (t.name === 'caption' || t.name === 'col' || t.name === 'colgroup' || t.name === 'tbody' || t.name === 'td' || t.name === 'tfoot' || t.name === 'th' || t.name === 'thead' || t.name === 'tr')) {
3297 for (i = 0; i < open_els.length; ++i) {
3299 if (el.namespace === NS_HTML && (el.name === 'td' || el.name === 'th')) {
3303 if (table_scopers[el.name] === el.namespace) {
3315 if (t.type === TYPE_END_TAG && (t.name === 'body' || t.name === 'caption' || t.name === 'col' || t.name === 'colgroup' || t.name === 'html')) {
3319 if (t.type === TYPE_END_TAG && (t.name === 'table' || t.name === 'tbody' || t.name === 'tfoot' || t.name === 'thead' || t.name === 'tr')) {
3320 if (is_in_table_scope(t.name, NS_HTML)) {
3332 // 8.2.5.4.16 http://www.w3.org/TR/html5/syntax.html#parsing-main-inselect
3333 ins_mode_in_select = function (t) {
3335 if (t.type === TYPE_TEXT && t.text === "\u0000") {
3339 if (t.type === TYPE_TEXT) {
3343 if (t.type === TYPE_COMMENT) {
3347 if (t.type === TYPE_DOCTYPE) {
3351 if (t.type === TYPE_START_TAG && t.name === 'html') {
3355 if (t.type === TYPE_START_TAG && t.name === 'option') {
3356 if (open_els[0].name === 'option' && open_els[0].namespace === NS_HTML) {
3359 insert_html_element(t)
3362 if (t.type === TYPE_START_TAG && t.name === 'optgroup') {
3363 if (open_els[0].name === 'option' && open_els[0].namespace === NS_HTML) {
3366 if (open_els[0].name === 'optgroup' && open_els[0].namespace === NS_HTML) {
3369 insert_html_element(t)
3372 if (t.type === TYPE_END_TAG && t.name === 'optgroup') {
3373 if (open_els[0].name === 'option' && open_els[0].namespace === NS_HTML) {
3374 if (open_els[1].name === 'optgroup' && open_els[0].namespace === NS_HTML) {
3378 if (open_els[0].name === 'optgroup' && open_els[0].namespace === NS_HTML) {
3385 if (t.type === TYPE_END_TAG && t.name === 'option') {
3386 if (open_els[0].name === 'option' && open_els[0].namespace === NS_HTML) {
3393 if (t.type === TYPE_END_TAG && t.name === 'select') {
3394 if (is_in_select_scope('select', NS_HTML)) {
3396 el = open_els.shift()
3397 if (el.name === 'select' && el.namespace === NS_HTML) {
3407 if (t.type === TYPE_START_TAG && t.name === 'select') {
3410 el = open_els.shift()
3411 if (el.name === 'select' && el.namespace === NS_HTML) {
3416 // spec says that this is the same as </select> but it doesn't say
3417 // to check scope first
3420 if (t.type === TYPE_START_TAG && (t.name === 'input' || t.name === 'keygen' || t.name === 'textarea')) {
3422 if (!is_in_select_scope('select', NS_HTML)) {
3426 el = open_els.shift()
3427 if (el.name === 'select' && el.namespace === NS_HTML) {
3435 if (t.type === TYPE_START_TAG && (t.name === 'script' || t.name === 'template')) {
3439 if (t.type === TYPE_EOF) {
3447 // 8.2.5.4.17 http://www.w3.org/TR/html5/syntax.html#parsing-main-inselectintable
3448 ins_mode_in_select_in_table = function (t) {
3450 if (t.type === TYPE_START_TAG && (t.name === 'caption' || t.name === 'table' || t.name === 'tbody' || t.name === 'tfoot' || t.name === 'thead' || t.name === 'tr' || t.name === 'td' || t.name === 'th')) {
3453 el = open_els.shift()
3454 if (el.name === 'select' && el.namespace === NS_HTML) {
3462 if (t.type === TYPE_END_TAG && (t.name === 'caption' || t.name === 'table' || t.name === 'tbody' || t.name === 'tfoot' || t.name === 'thead' || t.name === 'tr' || t.name === 'td' || t.name === 'th')) {
3464 if (!is_in_table_scope(t.name, NS_HTML)) {
3468 el = open_els.shift()
3469 if (el.name === 'select' && el.namespace === NS_HTML) {
3478 ins_mode_in_select(t)
3481 // 8.2.5.4.18 http://www.w3.org/TR/html5/syntax.html#parsing-main-intemplate
3482 ins_mode_in_template = function (t) {
3484 if (t.type === TYPE_TEXT || t.type === TYPE_COMMENT || t.type === TYPE_DOCTYPE) {
3488 if ((t.type === TYPE_START_TAG && (t.name === 'base' || t.name === 'basefont' || t.name === 'bgsound' || t.name === 'link' || t.name === 'meta' || t.name === 'noframes' || t.name === 'script' || t.name === 'style' || t.name === 'template' || t.name === 'title')) || (t.type === TYPE_END_TAG && t.name === 'template')) {
3492 if (t.type === TYPE_START_TAG && (t.name === 'caption' || t.name === 'colgroup' || t.name === 'tbody' || t.name === 'tfoot' || t.name === 'thead')) {
3493 template_ins_modes.shift()
3494 template_ins_modes.unshift(ins_mode_in_table)
3495 ins_mode = ins_mode_in_table
3499 if (t.type === TYPE_START_TAG && t.name === 'col') {
3500 template_ins_modes.shift()
3501 template_ins_modes.unshift(ins_mode_in_column_group)
3502 ins_mode = ins_mode_in_column_group
3506 if (t.type === TYPE_START_TAG && t.name === 'tr') {
3507 template_ins_modes.shift()
3508 template_ins_modes.unshift(ins_mode_in_table_body)
3509 ins_mode = ins_mode_in_table_body
3513 if (t.type === TYPE_START_TAG && (t.name === 'td' || t.name === 'th')) {
3514 template_ins_modes.shift()
3515 template_ins_modes.unshift(ins_mode_in_row)
3516 ins_mode = ins_mode_in_row
3520 if (t.type === TYPE_START_TAG) {
3521 template_ins_modes.shift()
3522 template_ins_modes.unshift(ins_mode_in_body)
3523 ins_mode = ins_mode_in_body
3527 if (t.type === TYPE_END_TAG) {
3531 if (t.type === TYPE_EOF) {
3532 if (!template_tag_is_open()) {
3538 el = open_els.shift()
3539 if (el.name === 'template' && el.namespace === NS_HTML) {
3543 clear_afe_to_marker()
3544 template_ins_modes.shift()
3550 // 8.2.5.4.19 http://www.w3.org/TR/html5/syntax.html#parsing-main-afterbody
3551 ins_mode_after_body = function (t) {
3553 if (is_space_tok(t)) {
3557 if (t.type === TYPE_COMMENT) {
3558 first = open_els[open_els.length - 1]
3559 insert_comment(t, [first, first.children.length])
3562 if (t.type === TYPE_DOCTYPE) {
3566 if (t.type === TYPE_START_TAG && t.name === 'html') {
3570 if (t.type === TYPE_END_TAG && t.name === 'html') {
3571 if (flag_fragment_parsing) {
3575 ins_mode = ins_mode_after_after_body
3578 if (t.type === TYPE_EOF) {
3584 ins_mode = ins_mode_in_body
3588 // 8.2.5.4.20 http://www.w3.org/TR/html5/syntax.html#parsing-main-inframeset
3589 ins_mode_in_frameset = function (t) {
3590 if (is_space_tok(t)) {
3594 if (t.type === TYPE_COMMENT) {
3598 if (t.type === TYPE_DOCTYPE) {
3602 if (t.type === TYPE_START_TAG && t.name === 'html') {
3606 if (t.type === TYPE_START_TAG && t.name === 'frameset') {
3607 insert_html_element(t)
3610 if (t.type === TYPE_END_TAG && t.name === 'frameset') {
3611 if (open_els.length === 1) {
3613 return // fragment case
3616 if (flag_fragment_parsing === false && open_els[0].name !== 'frameset') {
3617 ins_mode = ins_mode_after_frameset
3621 if (t.type === TYPE_START_TAG && t.name === 'frame') {
3622 insert_html_element(t)
3624 t.acknowledge_self_closing()
3627 if (t.type === TYPE_START_TAG && t.name === 'noframes') {
3631 if (t.type === TYPE_EOF) {
3632 if (open_els.length !== 1) {
3642 // 8.2.5.4.21 http://www.w3.org/TR/html5/syntax.html#parsing-main-afterframeset
3643 ins_mode_after_frameset = function (t) {
3644 if (is_space_tok(t)) {
3648 if (t.type === TYPE_COMMENT) {
3652 if (t.type === TYPE_DOCTYPE) {
3656 if (t.type === TYPE_START_TAG && t.name === 'html') {
3660 if (t.type === TYPE_END_TAG && t.name === 'html') {
3661 ins_mode = ins_mode_after_after_frameset
3664 if (t.type === TYPE_START_TAG && t.name === 'noframes') {
3668 if (t.type === TYPE_EOF) {
3676 // 8.2.5.4.22 http://www.w3.org/TR/html5/syntax.html#the-after-after-body-insertion-mode
3677 ins_mode_after_after_body = function (t) {
3678 if (t.type === TYPE_COMMENT) {
3679 insert_comment(t, [doc, doc.children.length])
3682 if (t.type === TYPE_DOCTYPE || is_space_tok(t) || (t.type === TYPE_START_TAG && t.name === 'html')) {
3686 if (t.type === TYPE_EOF) {
3692 ins_mode = ins_mode_in_body
3696 // 8.2.5.4.23 http://www.w3.org/TR/html5/syntax.html#the-after-after-frameset-insertion-mode
3697 ins_mode_after_after_frameset = function (t) {
3698 if (t.type === TYPE_COMMENT) {
3699 insert_comment(t, [doc, doc.children.length])
3702 if (t.type === TYPE_DOCTYPE || is_space_tok(t) || (t.type === TYPE_START_TAG && t.name === 'html')) {
3706 if (t.type === TYPE_EOF) {
3710 if (t.type === TYPE_START_TAG && t.name === 'noframes') {
3719 // 8.2.5.5 http://www.w3.org/TR/html5/syntax.html#parsing-main-inforeign
3720 has_color_face_or_size = function (t) {
3722 for (i = 0; i < t.attrs_a.length; ++i) {
3724 if (a[0] === 'color' || a[0] === 'face' || a[0] === 'size') {
3730 in_foreign_content_end_script = function () {
3734 in_foreign_content_other_start = function (t) {
3736 acn = adjusted_current_node()
3737 if (acn.namespace === NS_MATHML) {
3738 adjust_mathml_attributes(t)
3740 if (acn.namespace === NS_SVG && (svg_name_fixes[t.name] != null)) { // extra perens because Coffeescript did
3741 t.name = svg_name_fixes[t.name]
3743 if (acn.namespace === NS_SVG) {
3744 adjust_svg_attributes(t)
3746 adjust_foreign_attributes(t)
3747 insert_foreign_element(t, acn.namespace)
3748 if (t.flag('self-closing')) {
3749 if (t.name === 'script') {
3750 t.acknowledge_self_closing()
3751 in_foreign_content_end_script()
3755 t.acknowledge_self_closing()
3759 in_foreign_content = function (t) {
3761 if (t.type === TYPE_TEXT && t.text === "\u0000") {
3763 insert_character(new_character_token("\ufffd"))
3766 if (is_space_tok(t)) {
3770 if (t.type === TYPE_TEXT) {
3771 flag_frameset_ok = false
3775 if (t.type === TYPE_COMMENT) {
3779 if (t.type === TYPE_DOCTYPE) {
3783 if (t.type === TYPE_START_TAG && (t.name === 'b' || t.name === 'big' || t.name === 'blockquote' || t.name === 'body' || t.name === 'br' || t.name === 'center' || t.name === 'code' || t.name === 'dd' || t.name === 'div' || t.name === 'dl' || t.name === 'dt' || t.name === 'em' || t.name === 'embed' || t.name === 'h1' || t.name === 'h2' || t.name === 'h3' || t.name === 'h4' || t.name === 'h5' || t.name === 'h6' || t.name === 'head' || t.name === 'hr' || t.name === 'i' || t.name === 'img' || t.name === 'li' || t.name === 'listing' || t.name === 'main' || t.name === 'meta' || t.name === 'nobr' || t.name === 'ol' || t.name === 'p' || t.name === 'pre' || t.name === 'ruby' || t.name === 's' || t.name === 'small' || t.name === 'span' || t.name === 'strong' || t.name === 'strike' || t.name === 'sub' || t.name === 'sup' || t.name === 'table' || t.name === 'tt' || t.name === 'u' || t.name === 'ul' || t.name === 'var' || (t.name === 'font' && has_color_face_or_size(t)))) {
3785 if (flag_fragment_parsing) {
3786 in_foreign_content_other_start(t)
3789 while (true) { // is this safe?
3791 if (is_mathml_text_integration_point(open_els[0]) || is_html_integration(open_els[0]) || open_els[0].namespace === NS_HTML) {
3798 if (t.type === TYPE_START_TAG) {
3799 in_foreign_content_other_start(t)
3802 if (t.type === TYPE_END_TAG && t.name === 'script' && open_els[0].name === 'script' && open_els[0].namespace === NS_SVG) {
3803 in_foreign_content_end_script()
3806 if (t.type === TYPE_END_TAG) {
3809 if (node.name.toLowerCase() !== t.name) {
3813 if (node === open_els[open_els.length - 1]) {
3816 if (node.name.toLowerCase() === t.name) {
3818 el = open_els.shift()
3826 if (node.namespace === NS_HTML) {
3830 ins_mode(t) // explicitly call HTML insertion mode
3835 // 8.2.4.1 http://www.w3.org/TR/html5/syntax.html#data-state
3836 tok_state_data = function () {
3838 switch (c = txt.charAt(cur++)) {
3840 return new_text_node(parse_character_reference())
3843 tok_state = tok_state_tag_open
3847 return new_text_node(c)
3850 return new_eof_token()
3853 return new_text_node(c)
3858 // 8.2.4.2 http://www.w3.org/TR/html5/syntax.html#character-reference-in-data-state
3859 // not needed: tok_state_character_reference_in_data = function () {
3860 // just call parse_character_reference()
3862 // 8.2.4.3 http://www.w3.org/TR/html5/syntax.html#rcdata-state
3863 tok_state_rcdata = function () {
3865 switch (c = txt.charAt(cur++)) {
3867 return new_text_node(parse_character_reference())
3870 tok_state = tok_state_rcdata_less_than_sign
3874 return new_character_token("\ufffd")
3877 return new_eof_token()
3880 return new_character_token(c)
3885 // 8.2.4.4 http://www.w3.org/TR/html5/syntax.html#character-reference-in-rcdata-state
3886 // not needed: tok_state_character_reference_in_rcdata = function () {
3887 // just call parse_character_reference()
3889 // 8.2.4.5 http://www.w3.org/TR/html5/syntax.html#rawtext-state
3890 tok_state_rawtext = function () {
3892 switch (c = txt.charAt(cur++)) {
3894 tok_state = tok_state_rawtext_less_than_sign
3898 return new_character_token("\ufffd")
3901 return new_eof_token()
3904 return new_character_token(c)
3909 // 8.2.4.6 http://www.w3.org/TR/html5/syntax.html#script-data-state
3910 tok_state_script_data = function () {
3912 switch (c = txt.charAt(cur++)) {
3914 tok_state = tok_state_script_data_less_than_sign
3918 return new_character_token("\ufffd")
3921 return new_eof_token()
3924 return new_character_token(c)
3929 // 8.2.4.7 http://www.w3.org/TR/html5/syntax.html#plaintext-state
3930 tok_state_plaintext = function () {
3932 switch (c = txt.charAt(cur++)) {
3935 return new_character_token("\ufffd")
3938 return new_eof_token()
3941 return new_character_token(c)
3946 // 8.2.4.8 http://www.w3.org/TR/html5/syntax.html#tag-open-state
3947 tok_state_tag_open = function () {
3949 c = txt.charAt(cur++)
3951 tok_state = tok_state_markup_declaration_open
3955 tok_state = tok_state_end_tag_open
3958 if (is_uc_alpha(c)) {
3959 tok_cur_tag = new_open_tag(c.toLowerCase())
3960 tok_state = tok_state_tag_name
3963 if (is_lc_alpha(c)) {
3964 tok_cur_tag = new_open_tag(c)
3965 tok_state = tok_state_tag_name
3970 tok_cur_tag = new_comment_token('?') // FIXME right?
3971 tok_state = tok_state_bogus_comment
3976 tok_state = tok_state_data
3977 cur -= 1 // we didn't parse/handle the char after <
3978 return new_text_node('<')
3981 // 8.2.4.9 http://www.w3.org/TR/html5/syntax.html#end-tag-open-state
3982 tok_state_end_tag_open = function () {
3984 c = txt.charAt(cur++)
3985 if (is_uc_alpha(c)) {
3986 tok_cur_tag = new_end_tag(c.toLowerCase())
3987 tok_state = tok_state_tag_name
3990 if (is_lc_alpha(c)) {
3991 tok_cur_tag = new_end_tag(c)
3992 tok_state = tok_state_tag_name
3997 tok_state = tok_state_data
4000 if (c === '') { // EOF
4002 tok_state = tok_state_data
4003 return new_text_node('</')
4007 tok_cur_tag = new_comment_token(c)
4008 tok_state = tok_state_bogus_comment
4012 // 8.2.4.10 http://www.w3.org/TR/html5/syntax.html#tag-name-state
4013 tok_state_tag_name = function () {
4015 switch (c = txt.charAt(cur++)) {
4020 tok_state = tok_state_before_attribute_name
4023 tok_state = tok_state_self_closing_start_tag
4026 tok_state = tok_state_data
4033 tok_cur_tag.name += "\ufffd"
4037 tok_state = tok_state_data
4040 if (is_uc_alpha(c)) {
4041 tok_cur_tag.name += c.toLowerCase()
4043 tok_cur_tag.name += c
4049 // 8.2.4.11 http://www.w3.org/TR/html5/syntax.html#rcdata-less-than-sign-state
4050 tok_state_rcdata_less_than_sign = function () {
4052 c = txt.charAt(cur++)
4054 temporary_buffer = ''
4055 tok_state = tok_state_rcdata_end_tag_open
4059 tok_state = tok_state_rcdata
4060 cur -= 1 // reconsume the input character
4061 return new_character_token('<')
4064 // 8.2.4.12 http://www.w3.org/TR/html5/syntax.html#rcdata-end-tag-open-state
4065 tok_state_rcdata_end_tag_open = function () {
4067 c = txt.charAt(cur++)
4068 if (is_uc_alpha(c)) {
4069 tok_cur_tag = new_end_tag(c.toLowerCase())
4070 temporary_buffer += c
4071 tok_state = tok_state_rcdata_end_tag_name
4074 if (is_lc_alpha(c)) {
4075 tok_cur_tag = new_end_tag(c)
4076 temporary_buffer += c
4077 tok_state = tok_state_rcdata_end_tag_name
4081 tok_state = tok_state_rcdata
4082 cur -= 1 // reconsume the input character
4083 return new_character_token("</") // fixfull separate these
4086 // http://www.w3.org/TR/html5/syntax.html#appropriate-end-tag-token
4087 is_appropriate_end_tag = function (t) {
4088 // fixfull: this assumes that open_els[0].name is "the tag name of the last
4089 // start tag to have been emitted from this tokenizer"
4090 return t.type === TYPE_END_TAG && t.name === open_els[0].name
4093 // 8.2.4.13 http://www.w3.org/TR/html5/syntax.html#rcdata-end-tag-name-state
4094 tok_state_rcdata_end_tag_name = function () {
4096 c = txt.charAt(cur++)
4097 if (c === "\t" || c === "\n" || c === "\u000c" || c === ' ') {
4098 if (is_appropriate_end_tag(tok_cur_tag)) {
4099 tok_state = tok_state_before_attribute_name
4102 // else fall through to "Anything else"
4105 if (is_appropriate_end_tag(tok_cur_tag)) {
4106 tok_state = tok_state_self_closing_start_tag // FIXME spec typo?
4109 // else fall through to "Anything else"
4112 if (is_appropriate_end_tag(tok_cur_tag)) {
4113 tok_state = tok_state_data
4116 // else fall through to "Anything else"
4118 if (is_uc_alpha(c)) {
4119 tok_cur_tag.name += c.toLowerCase()
4120 temporary_buffer += c
4123 if (is_lc_alpha(c)) {
4124 tok_cur_tag.name += c
4125 temporary_buffer += c
4129 tok_state = tok_state_rcdata
4130 cur -= 1 // reconsume the input character
4131 return new_character_token('</' + temporary_buffer) // fixfull separate these
4134 // 8.2.4.14 http://www.w3.org/TR/html5/syntax.html#rawtext-less-than-sign-state
4135 tok_state_rawtext_less_than_sign = function () {
4137 c = txt.charAt(cur++)
4139 temporary_buffer = ''
4140 tok_state = tok_state_rawtext_end_tag_open
4144 tok_state = tok_state_rawtext
4145 cur -= 1 // reconsume the input character
4146 return new_character_token('<')
4149 // 8.2.4.15 http://www.w3.org/TR/html5/syntax.html#rawtext-end-tag-open-state
4150 tok_state_rawtext_end_tag_open = function () {
4151 c = txt.charAt(cur++)
4152 if (is_uc_alpha(c)) {
4153 tok_cur_tag = new_end_tag(c.toLowerCase())
4154 temporary_buffer += c
4155 tok_state = tok_state_rawtext_end_tag_name
4158 if (is_lc_alpha(c)) {
4159 tok_cur_tag = new_end_tag(c)
4160 temporary_buffer += c
4161 tok_state = tok_state_rawtext_end_tag_name
4165 tok_state = tok_state_rawtext
4166 cur -= 1 // reconsume the input character
4167 return new_character_token("</") // fixfull separate these
4170 // 8.2.4.16 http://www.w3.org/TR/html5/syntax.html#rawtext-end-tag-name-state
4171 tok_state_rawtext_end_tag_name = function () {
4173 c = txt.charAt(cur++)
4174 if (c === "\t" || c === "\n" || c === "\u000c" || c === ' ') {
4175 if (is_appropriate_end_tag(tok_cur_tag)) {
4176 tok_state = tok_state_before_attribute_name
4179 // else fall through to "Anything else"
4182 if (is_appropriate_end_tag(tok_cur_tag)) {
4183 tok_state = tok_state_self_closing_start_tag
4186 // else fall through to "Anything else"
4189 if (is_appropriate_end_tag(tok_cur_tag)) {
4190 tok_state = tok_state_data
4193 // else fall through to "Anything else"
4195 if (is_uc_alpha(c)) {
4196 tok_cur_tag.name += c.toLowerCase()
4197 temporary_buffer += c
4200 if (is_lc_alpha(c)) {
4201 tok_cur_tag.name += c
4202 temporary_buffer += c
4206 tok_state = tok_state_rawtext
4207 cur -= 1 // reconsume the input character
4208 return new_character_token('</' + temporary_buffer) // fixfull separate these
4211 // 8.2.4.17 http://www.w3.org/TR/html5/syntax.html#script-data-less-than-sign-state
4212 tok_state_script_data_less_than_sign = function () {
4214 c = txt.charAt(cur++)
4216 temporary_buffer = ''
4217 tok_state = tok_state_script_data_end_tag_open
4221 tok_state = tok_state_script_data_escape_start
4222 return new_character_token('<!') // fixfull split
4225 tok_state = tok_state_script_data
4226 cur -= 1 // reconsume
4227 return new_character_token('<')
4230 // 8.2.4.18 http://www.w3.org/TR/html5/syntax.html#script-data-end-tag-open-state
4231 tok_state_script_data_end_tag_open = function () {
4233 c = txt.charAt(cur++)
4234 if (is_uc_alpha(c)) {
4235 tok_cur_tag = new_end_tag(c.toLowerCase())
4236 temporary_buffer += c
4237 tok_state = tok_state_script_data_end_tag_name
4240 if (is_lc_alpha(c)) {
4241 tok_cur_tag = new_end_tag(c)
4242 temporary_buffer += c
4243 tok_state = tok_state_script_data_end_tag_name
4247 tok_state = tok_state_script_data
4248 cur -= 1 // reconsume
4249 return new_character_token('</')
4252 // 8.2.4.19 http://www.w3.org/TR/html5/syntax.html#script-data-end-tag-open-state
4253 tok_state_script_data_end_tag_name = function () {
4255 c = txt.charAt(cur++)
4256 if (c === "\t" || c === "\n" || c === "\u000c" || c === ' ') {
4257 if (is_appropriate_end_tag(tok_cur_tag)) {
4258 tok_state = tok_state_before_attribute_name
4264 if (is_appropriate_end_tag(tok_cur_tag)) {
4265 tok_state = tok_state_self_closing_start_tag
4271 if (is_appropriate_end_tag(tok_cur_tag)) {
4272 tok_state = tok_state_data
4277 if (is_uc_alpha(c)) {
4278 tok_cur_tag.name += c.toLowerCase()
4279 temporary_buffer += c
4282 if (is_lc_alpha(c)) {
4283 tok_cur_tag.name += c
4284 temporary_buffer += c
4288 tok_state = tok_state_script_data
4289 cur -= 1 // reconsume
4290 return new_character_token("</" + temporary_buffer) // fixfull split
4293 // 8.2.4.20 http://www.w3.org/TR/html5/syntax.html#script-data-escape-start-state
4294 tok_state_script_data_escape_start = function () {
4296 c = txt.charAt(cur++)
4298 tok_state = tok_state_script_data_escape_start_dash
4299 return new_character_token('-')
4302 tok_state = tok_state_script_data
4303 cur -= 1 // reconsume
4306 // 8.2.4.21 http://www.w3.org/TR/html5/syntax.html#script-data-escape-start-dash-state
4307 tok_state_script_data_escape_start_dash = function () {
4309 c = txt.charAt(cur++)
4311 tok_state = tok_state_script_data_escaped_dash_dash
4312 return new_character_token('-')
4315 tok_state = tok_state_script_data
4316 cur -= 1 // reconsume
4319 // 8.2.4.22 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-state
4320 tok_state_script_data_escaped = function () {
4322 c = txt.charAt(cur++)
4324 tok_state = tok_state_script_data_escaped_dash
4325 return new_character_token('-')
4328 tok_state = tok_state_script_data_escaped_less_than_sign
4331 if (c === "\u0000") {
4333 return new_character_token("\ufffd")
4335 if (c === '') { // EOF
4336 tok_state = tok_state_data
4338 cur -= 1 // reconsume
4342 return new_character_token(c)
4345 // 8.2.4.23 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-dash-state
4346 tok_state_script_data_escaped_dash = function () {
4348 c = txt.charAt(cur++)
4350 tok_state = tok_state_script_data_escaped_dash_dash
4351 return new_character_token('-')
4354 tok_state = tok_state_script_data_escaped_less_than_sign
4357 if (c === "\u0000") {
4359 tok_state = tok_state_script_data_escaped
4360 return new_character_token("\ufffd")
4362 if (c === '') { // EOF
4363 tok_state = tok_state_data
4365 cur -= 1 // reconsume
4369 tok_state = tok_state_script_data_escaped
4370 return new_character_token(c)
4373 // 8.2.4.24 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-dash-dash-state
4374 tok_state_script_data_escaped_dash_dash = function () {
4376 c = txt.charAt(cur++)
4378 return new_character_token('-')
4381 tok_state = tok_state_script_data_escaped_less_than_sign
4385 tok_state = tok_state_script_data
4386 return new_character_token('>')
4388 if (c === "\u0000") {
4390 tok_state = tok_state_script_data_escaped
4391 return new_character_token("\ufffd")
4393 if (c === '') { // EOF
4395 tok_state = tok_state_data
4396 cur -= 1 // reconsume
4400 tok_state = tok_state_script_data_escaped
4401 return new_character_token(c)
4404 // 8.2.4.25 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-less-than-sign-state
4405 tok_state_script_data_escaped_less_than_sign = function () {
4407 c = txt.charAt(cur++)
4409 temporary_buffer = ''
4410 tok_state = tok_state_script_data_escaped_end_tag_open
4413 if (is_uc_alpha(c)) {
4414 temporary_buffer = c.toLowerCase() // yes, really
4415 tok_state = tok_state_script_data_double_escape_start
4416 return new_character_token("<" + c) // fixfull split
4418 if (is_lc_alpha(c)) {
4419 temporary_buffer = c
4420 tok_state = tok_state_script_data_double_escape_start
4421 return new_character_token("<" + c) // fixfull split
4424 tok_state = tok_state_script_data_escaped
4425 cur -= 1 // reconsume
4426 return new_character_token('<')
4429 // 8.2.4.26 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-end-tag-open-state
4430 tok_state_script_data_escaped_end_tag_open = function () {
4432 c = txt.charAt(cur++)
4433 if (is_uc_alpha(c)) {
4434 tok_cur_tag = new_end_tag(c.toLowerCase())
4435 temporary_buffer += c
4436 tok_state = tok_state_script_data_escaped_end_tag_name
4439 if (is_lc_alpha(c)) {
4440 tok_cur_tag = new_end_tag(c)
4441 temporary_buffer += c
4442 tok_state = tok_state_script_data_escaped_end_tag_name
4446 tok_state = tok_state_script_data_escaped
4447 cur -= 1 // reconsume
4448 return new_character_token('</') // fixfull split
4451 // 8.2.4.27 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-end-tag-name-state
4452 tok_state_script_data_escaped_end_tag_name = function () {
4454 c = txt.charAt(cur++)
4455 if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ') {
4456 if (is_appropriate_end_tag(tok_cur_tag)) {
4457 tok_state = tok_state_before_attribute_name
4463 if (is_appropriate_end_tag(tok_cur_tag)) {
4464 tok_state = tok_state_self_closing_start_tag
4470 if (is_appropriate_end_tag(tok_cur_tag)) {
4471 tok_state = tok_state_data
4476 if (is_uc_alpha(c)) {
4477 tok_cur_tag.name += c.toLowerCase()
4478 temporary_buffer += c.toLowerCase()
4481 if (is_lc_alpha(c)) {
4482 tok_cur_tag.name += c
4483 temporary_buffer += c.toLowerCase()
4487 tok_state = tok_state_script_data_escaped
4488 cur -= 1 // reconsume
4489 return new_character_token("</" + temporary_buffer) // fixfull split
4492 // 8.2.4.28 http://www.w3.org/TR/html5/syntax.html#script-data-double-escape-start-state
4493 tok_state_script_data_double_escape_start = function () {
4495 c = txt.charAt(cur++)
4496 if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ' || c === '/' || c === '>') {
4497 if (temporary_buffer === 'script') {
4498 tok_state = tok_state_script_data_double_escaped
4500 tok_state = tok_state_script_data_escaped
4502 return new_character_token(c)
4504 if (is_uc_alpha(c)) {
4505 temporary_buffer += c.toLowerCase() // yes, really lowercase
4506 return new_character_token(c)
4508 if (is_lc_alpha(c)) {
4509 temporary_buffer += c
4510 return new_character_token(c)
4513 tok_state = tok_state_script_data_escaped
4514 cur -= 1 // reconsume
4517 // 8.2.4.29 http://www.w3.org/TR/html5/syntax.html#script-data-double-escaped-state
4518 tok_state_script_data_double_escaped = function () {
4520 c = txt.charAt(cur++)
4522 tok_state = tok_state_script_data_double_escaped_dash
4523 return new_character_token('-')
4526 tok_state = tok_state_script_data_double_escaped_less_than_sign
4527 return new_character_token('<')
4529 if (c === "\u0000") {
4531 return new_character_token("\ufffd")
4533 if (c === '') { // EOF
4535 tok_state = tok_state_data
4536 cur -= 1 // reconsume
4540 return new_character_token(c)
4543 // 8.2.4.30 http://www.w3.org/TR/html5/syntax.html#script-data-double-escaped-dash-state
4544 tok_state_script_data_double_escaped_dash = function () {
4546 c = txt.charAt(cur++)
4548 tok_state = tok_state_script_data_double_escaped_dash_dash
4549 return new_character_token('-')
4552 tok_state = tok_state_script_data_double_escaped_less_than_sign
4553 return new_character_token('<')
4555 if (c === "\u0000") {
4557 tok_state = tok_state_script_data_double_escaped
4558 return new_character_token("\ufffd")
4560 if (c === '') { // EOF
4562 tok_state = tok_state_data
4563 cur -= 1 // reconsume
4567 tok_state = tok_state_script_data_double_escaped
4568 return new_character_token(c)
4571 // 8.2.4.31 http://www.w3.org/TR/html5/syntax.html#script-data-double-escaped-dash-dash-state
4572 tok_state_script_data_double_escaped_dash_dash = function () {
4574 c = txt.charAt(cur++)
4576 return new_character_token('-')
4579 tok_state = tok_state_script_data_double_escaped_less_than_sign
4580 return new_character_token('<')
4583 tok_state = tok_state_script_data
4584 return new_character_token('>')
4586 if (c === "\u0000") {
4588 tok_state = tok_state_script_data_double_escaped
4589 return new_character_token("\ufffd")
4591 if (c === '') { // EOF
4593 tok_state = tok_state_data
4594 cur -= 1 // reconsume
4598 tok_state = tok_state_script_data_double_escaped
4599 return new_character_token(c)
4602 // 8.2.4.32 http://www.w3.org/TR/html5/syntax.html#script-data-double-escaped-less-than-sign-state
4603 tok_state_script_data_double_escaped_less_than_sign = function () {
4605 c = txt.charAt(cur++)
4607 temporary_buffer = ''
4608 tok_state = tok_state_script_data_double_escape_end
4609 return new_character_token('/')
4612 tok_state = tok_state_script_data_double_escaped
4613 cur -= 1 // reconsume
4616 // 8.2.4.33 http://www.w3.org/TR/html5/syntax.html#script-data-double-escape-end-state
4617 tok_state_script_data_double_escape_end = function () {
4619 c = txt.charAt(cur++)
4620 if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ' || c === '/' || c === '>') {
4621 if (temporary_buffer === 'script') {
4622 tok_state = tok_state_script_data_escaped
4624 tok_state = tok_state_script_data_double_escaped
4626 return new_character_token(c)
4628 if (is_uc_alpha(c)) {
4629 temporary_buffer += c.toLowerCase() // yes, really lowercase
4630 return new_character_token(c)
4632 if (is_lc_alpha(c)) {
4633 temporary_buffer += c
4634 return new_character_token(c)
4637 tok_state = tok_state_script_data_double_escaped
4638 cur -= 1 // reconsume
4641 // 8.2.4.34 http://www.w3.org/TR/html5/syntax.html#before-attribute-name-state
4642 tok_state_before_attribute_name = function () {
4643 var attr_name, c, tmp
4645 switch (c = txt.charAt(cur++)) {
4653 tok_state = tok_state_self_closing_start_tag
4657 tok_state = tok_state_data
4664 attr_name = "\ufffd"
4675 tok_state = tok_state_data
4678 if (is_uc_alpha(c)) {
4679 attr_name = c.toLowerCase()
4684 if (attr_name != null) {
4685 tok_cur_tag.attrs_a.unshift([attr_name, ''])
4686 tok_state = tok_state_attribute_name
4691 // 8.2.4.35 http://www.w3.org/TR/html5/syntax.html#attribute-name-state
4692 tok_state_attribute_name = function () {
4694 switch (c = txt.charAt(cur++)) {
4699 tok_state = tok_state_after_attribute_name
4702 tok_state = tok_state_self_closing_start_tag
4705 tok_state = tok_state_before_attribute_value
4708 tok_state = tok_state_data
4715 tok_cur_tag.attrs_a[0][0] += "\ufffd"
4721 tok_cur_tag.attrs_a[0][0] += c
4725 tok_state = tok_state_data
4728 if (is_uc_alpha(c)) {
4729 tok_cur_tag.attrs_a[0][0] += c.toLowerCase()
4731 tok_cur_tag.attrs_a[0][0] += c
4737 // 8.2.4.36 http://www.w3.org/TR/html5/syntax.html#after-attribute-name-state
4738 tok_state_after_attribute_name = function () {
4740 c = txt.charAt(cur++)
4741 if (c === "\t" || c === "\n" || c === "\u000c" || c === ' ') {
4745 tok_state = tok_state_self_closing_start_tag
4749 tok_state = tok_state_before_attribute_value
4753 tok_state = tok_state_data
4756 if (is_uc_alpha(c)) {
4757 tok_cur_tag.attrs_a.unshift([c.toLowerCase(), ''])
4758 tok_state = tok_state_attribute_name
4761 if (c === "\u0000") {
4763 tok_cur_tag.attrs_a.unshift(["\ufffd", ''])
4764 tok_state = tok_state_attribute_name
4767 if (c === '') { // EOF
4769 tok_state = tok_state_data
4770 cur -= 1 // reconsume
4773 if (c === '"' || c === "'" || c === '<') {
4775 // fall through to Anything else
4778 tok_cur_tag.attrs_a.unshift([c, ''])
4779 tok_state = tok_state_attribute_name
4782 // 8.2.4.37 http://www.w3.org/TR/html5/syntax.html#before-attribute-value-state
4783 tok_state_before_attribute_value = function () {
4785 switch (c = txt.charAt(cur++)) {
4793 tok_state = tok_state_attribute_value_double_quoted
4796 tok_state = tok_state_attribute_value_unquoted
4800 tok_state = tok_state_attribute_value_single_quoted
4804 tok_cur_tag.attrs_a[0][1] += "\ufffd"
4805 tok_state = tok_state_attribute_value_unquoted
4809 tok_state = tok_state_data
4816 tok_state = tok_state_data
4819 tok_cur_tag.attrs_a[0][1] += c
4820 tok_state = tok_state_attribute_value_unquoted
4825 // 8.2.4.38 http://www.w3.org/TR/html5/syntax.html#attribute-value-(double-quoted)-state
4826 tok_state_attribute_value_double_quoted = function () {
4828 switch (c = txt.charAt(cur++)) {
4830 tok_state = tok_state_after_attribute_value_quoted
4833 tok_cur_tag.attrs_a[0][1] += parse_character_reference('"', true)
4837 tok_cur_tag.attrs_a[0][1] += "\ufffd"
4841 tok_state = tok_state_data
4844 tok_cur_tag.attrs_a[0][1] += c
4849 // 8.2.4.39 http://www.w3.org/TR/html5/syntax.html#attribute-value-(single-quoted)-state
4850 tok_state_attribute_value_single_quoted = function () {
4852 switch (c = txt.charAt(cur++)) {
4854 tok_state = tok_state_after_attribute_value_quoted
4857 tok_cur_tag.attrs_a[0][1] += parse_character_reference("'", true)
4861 tok_cur_tag.attrs_a[0][1] += "\ufffd"
4865 tok_state = tok_state_data
4868 tok_cur_tag.attrs_a[0][1] += c
4873 // 8.2.4.40 http://www.w3.org/TR/html5/syntax.html#attribute-value-(unquoted)-state
4874 tok_state_attribute_value_unquoted = function () {
4876 switch (c = txt.charAt(cur++)) {
4881 tok_state = tok_state_before_attribute_name
4884 tok_cur_tag.attrs_a[0][1] += parse_character_reference('>', true)
4887 tok_state = tok_state_data
4893 tok_cur_tag.attrs_a[0][1] += "\ufffd"
4897 tok_state = tok_state_data
4900 // Parse Error if ', <, = or ` (backtick)
4901 tok_cur_tag.attrs_a[0][1] += c
4906 // 8.2.4.42 http://www.w3.org/TR/html5/syntax.html#after-attribute-value-(quoted)-state
4907 tok_state_after_attribute_value_quoted = function () {
4909 switch (c = txt.charAt(cur++)) {
4914 tok_state = tok_state_before_attribute_name
4917 tok_state = tok_state_self_closing_start_tag
4920 tok_state = tok_state_data
4927 tok_state = tok_state_data
4931 tok_state = tok_state_before_attribute_name
4932 cur -= 1 // we didn't handle that char
4937 // 8.2.4.43 http://www.w3.org/TR/html5/syntax.html#self-closing-start-tag-state
4938 tok_state_self_closing_start_tag = function () {
4940 c = txt.charAt(cur++)
4942 tok_cur_tag.flag('self-closing', true)
4943 tok_state = tok_state_data
4948 tok_state = tok_state_data
4949 cur -= 1 // reconsume
4954 tok_state = tok_state_before_attribute_name
4955 cur -= 1 // reconsume
4958 // 8.2.4.44 http://www.w3.org/TR/html5/syntax.html#bogus-comment-state
4959 // WARNING: put a comment token in tok_cur_tag before setting this state
4960 tok_state_bogus_comment = function () {
4962 next_gt = txt.indexOf('>', cur)
4963 if (next_gt === -1) {
4964 val = txt.substr(cur)
4967 val = txt.substr(cur, next_gt - cur)
4970 val = val.replace(new RegExp("\u0000", 'g'), "\ufffd")
4971 tok_cur_tag.text += val
4972 tok_state = tok_state_data
4976 // 8.2.4.45 http://www.w3.org/TR/html5/syntax.html#markup-declaration-open-state
4977 tok_state_markup_declaration_open = function () {
4979 if (txt.substr(cur, 2) === '--') {
4981 tok_cur_tag = new_comment_token('')
4982 tok_state = tok_state_comment_start
4985 if (txt.substr(cur, 7).toLowerCase() === 'doctype') {
4987 tok_state = tok_state_doctype
4990 acn = adjusted_current_node()
4991 if (acn && acn.namespace !== NS_HTML && txt.substr(cur, 7) === '[CDATA[') {
4993 tok_state = tok_state_cdata_section
4998 tok_cur_tag = new_comment_token('')
4999 tok_state = tok_state_bogus_comment
5002 // 8.2.4.46 http://www.w3.org/TR/html5/syntax.html#comment-start-state
5003 tok_state_comment_start = function () {
5005 switch (c = txt.charAt(cur++)) {
5007 tok_state = tok_state_comment_start_dash
5011 tok_state = tok_state_comment
5012 return new_character_token("\ufffd")
5016 tok_state = tok_state_data
5021 tok_state = tok_state_data
5022 cur -= 1 // reconsume
5026 tok_cur_tag.text += c
5027 tok_state = tok_state_comment
5032 // 8.2.4.47 http://www.w3.org/TR/html5/syntax.html#comment-start-dash-state
5033 tok_state_comment_start_dash = function () {
5035 switch (c = txt.charAt(cur++)) {
5037 tok_state = tok_state_comment_end
5041 tok_cur_tag.text += "-\ufffd"
5042 tok_state = tok_state_comment
5046 tok_state = tok_state_data
5051 tok_state = tok_state_data
5052 cur -= 1 // reconsume
5056 tok_cur_tag.text += "-" + c
5057 tok_state = tok_state_comment
5062 // 8.2.4.48 http://www.w3.org/TR/html5/syntax.html#comment-state
5063 tok_state_comment = function () {
5065 switch (c = txt.charAt(cur++)) {
5067 tok_state = tok_state_comment_end_dash
5071 tok_cur_tag.text += "\ufffd"
5075 tok_state = tok_state_data
5076 cur -= 1 // reconsume
5080 tok_cur_tag.text += c
5085 // 8.2.4.49 http://www.w3.org/TR/html5/syntax.html#comment-end-dash-state
5086 tok_state_comment_end_dash = function () {
5088 switch (c = txt.charAt(cur++)) {
5090 tok_state = tok_state_comment_end
5094 tok_cur_tag.text += "-\ufffd"
5095 tok_state = tok_state_comment
5099 tok_state = tok_state_data
5100 cur -= 1 // reconsume
5104 tok_cur_tag.text += "-" + c
5105 tok_state = tok_state_comment
5110 // 8.2.4.50 http://www.w3.org/TR/html5/syntax.html#comment-end-state
5111 tok_state_comment_end = function () {
5113 switch (c = txt.charAt(cur++)) {
5115 tok_state = tok_state_data
5120 tok_cur_tag.text += "--\ufffd"
5121 tok_state = tok_state_comment
5125 tok_state = tok_state_comment_end_bang
5129 tok_cur_tag.text += '-'
5133 tok_state = tok_state_data
5134 cur -= 1 // reconsume
5139 tok_cur_tag.text += "--" + c
5140 tok_state = tok_state_comment
5145 // 8.2.4.51 http://www.w3.org/TR/html5/syntax.html#comment-end-bang-state
5146 tok_state_comment_end_bang = function () {
5148 switch (c = txt.charAt(cur++)) {
5150 tok_cur_tag.text += "--!" + c
5151 tok_state = tok_state_comment_end_dash
5154 tok_state = tok_state_data
5159 tok_cur_tag.text += "--!\ufffd"
5160 tok_state = tok_state_comment
5164 tok_state = tok_state_data
5165 cur -= 1 // reconsume
5169 tok_cur_tag.text += "--!" + c
5170 tok_state = tok_state_comment
5175 // 8.2.4.52 http://www.w3.org/TR/html5/syntax.html#doctype-state
5176 tok_state_doctype = function () {
5178 switch (c = txt.charAt(cur++)) {
5183 tok_state = tok_state_before_doctype_name
5187 tok_state = tok_state_data
5188 el = new_doctype_token('')
5189 el.flag('force-quirks', true)
5190 cur -= 1 // reconsume
5195 tok_state = tok_state_before_doctype_name
5196 cur -= 1 // reconsume
5201 // 8.2.4.52 http://www.w3.org/TR/html5/syntax.html#doctype-state
5202 tok_state_before_doctype_name = function () {
5204 c = txt.charAt(cur++)
5205 if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ') {
5208 if (is_uc_alpha(c)) {
5209 tok_cur_tag = new_doctype_token(c.toLowerCase())
5210 tok_state = tok_state_doctype_name
5213 if (c === "\u0000") {
5215 tok_cur_tag = new_doctype_token("\ufffd")
5216 tok_state = tok_state_doctype_name
5221 el = new_doctype_token('')
5222 el.flag('force-quirks', true)
5223 tok_state = tok_state_data
5226 if (c === '') { // EOF
5228 tok_state = tok_state_data
5229 el = new_doctype_token('')
5230 el.flag('force-quirks', true)
5231 cur -= 1 // reconsume
5235 tok_cur_tag = new_doctype_token(c)
5236 tok_state = tok_state_doctype_name
5240 // 8.2.4.54 http://www.w3.org/TR/html5/syntax.html#doctype-name-state
5241 tok_state_doctype_name = function () {
5243 c = txt.charAt(cur++)
5244 if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ') {
5245 tok_state = tok_state_after_doctype_name
5249 tok_state = tok_state_data
5252 if (is_uc_alpha(c)) {
5253 tok_cur_tag.name += c.toLowerCase()
5256 if (c === "\u0000") {
5258 tok_cur_tag.name += "\ufffd"
5261 if (c === '') { // EOF
5263 tok_state = tok_state_data
5264 tok_cur_tag.flag('force-quirks', true)
5265 cur -= 1 // reconsume
5269 tok_cur_tag.name += c
5273 // 8.2.4.55 http://www.w3.org/TR/html5/syntax.html#after-doctype-name-state
5274 tok_state_after_doctype_name = function () {
5276 c = txt.charAt(cur++)
5277 if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ') {
5281 tok_state = tok_state_data
5284 if (c === '') { // EOF
5286 tok_state = tok_state_data
5287 tok_cur_tag.flag('force-quirks', true)
5288 cur -= 1 // reconsume
5292 if (txt.substr(cur - 1, 6).toLowerCase() === 'public') {
5294 tok_state = tok_state_after_doctype_public_keyword
5297 if (txt.substr(cur - 1, 6).toLowerCase() === 'system') {
5299 tok_state = tok_state_after_doctype_system_keyword
5303 tok_cur_tag.flag('force-quirks', true)
5304 tok_state = tok_state_bogus_doctype
5308 // 8.2.4.56 http://www.w3.org/TR/html5/syntax.html#after-doctype-public-keyword-state
5309 tok_state_after_doctype_public_keyword = function () {
5311 c = txt.charAt(cur++)
5312 if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ') {
5313 tok_state = tok_state_before_doctype_public_identifier
5318 tok_cur_tag.public_identifier = ''
5319 tok_state = tok_state_doctype_public_identifier_double_quoted
5324 tok_cur_tag.public_identifier = ''
5325 tok_state = tok_state_doctype_public_identifier_single_quoted
5330 tok_cur_tag.flag('force-quirks', true)
5331 tok_state = tok_state_data
5334 if (c === '') { // EOF
5336 tok_state = tok_state_data
5337 tok_cur_tag.flag('force-quirks', true)
5338 cur -= 1 // reconsume
5343 tok_cur_tag.flag('force-quirks', true)
5344 tok_state = tok_state_bogus_doctype
5348 // 8.2.4.57 http://www.w3.org/TR/html5/syntax.html#before-doctype-public-identifier-state
5349 tok_state_before_doctype_public_identifier = function () {
5351 c = txt.charAt(cur++)
5352 if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ') {
5357 tok_cur_tag.public_identifier = ''
5358 tok_state = tok_state_doctype_public_identifier_double_quoted
5363 tok_cur_tag.public_identifier = ''
5364 tok_state = tok_state_doctype_public_identifier_single_quoted
5369 tok_cur_tag.flag('force-quirks', true)
5370 tok_state = tok_state_data
5373 if (c === '') { // EOF
5375 tok_state = tok_state_data
5376 tok_cur_tag.flag('force-quirks', true)
5377 cur -= 1 // reconsume
5382 tok_cur_tag.flag('force-quirks', true)
5383 tok_state = tok_state_bogus_doctype
5388 // 8.2.4.58 http://www.w3.org/TR/html5/syntax.html#doctype-public-identifier-(double-quoted)-state
5389 tok_state_doctype_public_identifier_double_quoted = function () {
5391 c = txt.charAt(cur++)
5393 tok_state = tok_state_after_doctype_public_identifier
5396 if (c === "\u0000") {
5398 tok_cur_tag.public_identifier += "\ufffd"
5403 tok_cur_tag.flag('force-quirks', true)
5404 tok_state = tok_state_data
5407 if (c === '') { // EOF
5409 tok_state = tok_state_data
5410 tok_cur_tag.flag('force-quirks', true)
5411 cur -= 1 // reconsume
5415 tok_cur_tag.public_identifier += c
5419 // 8.2.4.59 http://www.w3.org/TR/html5/syntax.html#doctype-public-identifier-(single-quoted)-state
5420 tok_state_doctype_public_identifier_single_quoted = function () {
5422 c = txt.charAt(cur++)
5424 tok_state = tok_state_after_doctype_public_identifier
5427 if (c === "\u0000") {
5429 tok_cur_tag.public_identifier += "\ufffd"
5434 tok_cur_tag.flag('force-quirks', true)
5435 tok_state = tok_state_data
5438 if (c === '') { // EOF
5440 tok_state = tok_state_data
5441 tok_cur_tag.flag('force-quirks', true)
5442 cur -= 1 // reconsume
5446 tok_cur_tag.public_identifier += c
5450 // 8.2.4.60 http://www.w3.org/TR/html5/syntax.html#after-doctype-public-identifier-state
5451 tok_state_after_doctype_public_identifier = function () {
5453 c = txt.charAt(cur++)
5454 if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ') {
5455 tok_state = tok_state_between_doctype_public_and_system_identifiers
5459 tok_state = tok_state_data
5464 tok_cur_tag.system_identifier = ''
5465 tok_state = tok_state_doctype_system_identifier_double_quoted
5470 tok_cur_tag.system_identifier = ''
5471 tok_state = tok_state_doctype_system_identifier_single_quoted
5474 if (c === '') { // EOF
5476 tok_state = tok_state_data
5477 tok_cur_tag.flag('force-quirks', true)
5478 cur -= 1 // reconsume
5483 tok_cur_tag.flag('force-quirks', true)
5484 tok_state = tok_state_bogus_doctype
5488 // 8.2.4.61 http://www.w3.org/TR/html5/syntax.html#between-doctype-public-&&-system-identifiers-state
5489 tok_state_between_doctype_public_and_system_identifiers = function () {
5491 c = txt.charAt(cur++)
5492 if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ') {
5496 tok_state = tok_state_data
5501 tok_cur_tag.system_identifier = ''
5502 tok_state = tok_state_doctype_system_identifier_double_quoted
5507 tok_cur_tag.system_identifier = ''
5508 tok_state = tok_state_doctype_system_identifier_single_quoted
5511 if (c === '') { // EOF
5513 tok_state = tok_state_data
5514 tok_cur_tag.flag('force-quirks', true)
5515 cur -= 1 // reconsume
5520 tok_cur_tag.flag('force-quirks', true)
5521 tok_state = tok_state_bogus_doctype
5525 // 8.2.4.62 http://www.w3.org/TR/html5/syntax.html#after-doctype-system-keyword-state
5526 tok_state_after_doctype_system_keyword = function () {
5528 c = txt.charAt(cur++)
5529 if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ') {
5530 tok_state = tok_state_before_doctype_system_identifier
5535 tok_cur_tag.system_identifier = ''
5536 tok_state = tok_state_doctype_system_identifier_double_quoted
5541 tok_cur_tag.system_identifier = ''
5542 tok_state = tok_state_doctype_system_identifier_single_quoted
5547 tok_cur_tag.flag('force-quirks', true)
5548 tok_state = tok_state_data
5551 if (c === '') { // EOF
5553 tok_state = tok_state_data
5554 tok_cur_tag.flag('force-quirks', true)
5555 cur -= 1 // reconsume
5560 tok_cur_tag.flag('force-quirks', true)
5561 tok_state = tok_state_bogus_doctype
5565 // 8.2.4.63 http://www.w3.org/TR/html5/syntax.html#before-doctype-system-identifier-state
5566 tok_state_before_doctype_system_identifier = function () {
5568 c = txt.charAt(cur++)
5569 if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ') {
5573 tok_cur_tag.system_identifier = ''
5574 tok_state = tok_state_doctype_system_identifier_double_quoted
5578 tok_cur_tag.system_identifier = ''
5579 tok_state = tok_state_doctype_system_identifier_single_quoted
5584 tok_cur_tag.flag('force-quirks', true)
5585 tok_state = tok_state_data
5588 if (c === '') { // EOF
5590 tok_state = tok_state_data
5591 tok_cur_tag.flag('force-quirks', true)
5592 cur -= 1 // reconsume
5597 tok_cur_tag.flag('force-quirks', true)
5598 tok_state = tok_state_bogus_doctype
5602 // 8.2.4.64 http://www.w3.org/TR/html5/syntax.html#doctype-system-identifier-(double-quoted)-state
5603 tok_state_doctype_system_identifier_double_quoted = function () {
5605 c = txt.charAt(cur++)
5607 tok_state = tok_state_after_doctype_system_identifier
5610 if (c === "\u0000") {
5612 tok_cur_tag.system_identifier += "\ufffd"
5617 tok_cur_tag.flag('force-quirks', true)
5618 tok_state = tok_state_data
5621 if (c === '') { // EOF
5623 tok_state = tok_state_data
5624 tok_cur_tag.flag('force-quirks', true)
5625 cur -= 1 // reconsume
5629 tok_cur_tag.system_identifier += c
5633 // 8.2.4.65 http://www.w3.org/TR/html5/syntax.html#doctype-system-identifier-(single-quoted)-state
5634 tok_state_doctype_system_identifier_single_quoted = function () {
5636 c = txt.charAt(cur++)
5638 tok_state = tok_state_after_doctype_system_identifier
5641 if (c === "\u0000") {
5643 tok_cur_tag.system_identifier += "\ufffd"
5648 tok_cur_tag.flag('force-quirks', true)
5649 tok_state = tok_state_data
5652 if (c === '') { // EOF
5654 tok_state = tok_state_data
5655 tok_cur_tag.flag('force-quirks', true)
5656 cur -= 1 // reconsume
5660 tok_cur_tag.system_identifier += c
5664 // 8.2.4.66 http://www.w3.org/TR/html5/syntax.html#after-doctype-system-identifier-state
5665 tok_state_after_doctype_system_identifier = function () {
5667 c = txt.charAt(cur++)
5668 if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ') {
5672 tok_state = tok_state_data
5675 if (c === '') { // EOF
5677 tok_state = tok_state_data
5678 tok_cur_tag.flag('force-quirks', true)
5679 cur -= 1 // reconsume
5684 // do _not_ tok_cur_tag.flag 'force-quirks', true
5685 tok_state = tok_state_bogus_doctype
5689 // 8.2.4.67 http://www.w3.org/TR/html5/syntax.html#bogus-doctype-state
5690 tok_state_bogus_doctype = function () {
5692 c = txt.charAt(cur++)
5694 tok_state = tok_state_data
5697 if (c === '') { // EOF
5698 tok_state = tok_state_data
5699 cur -= 1 // reconsume
5706 // 8.2.4.68 http://www.w3.org/TR/html5/syntax.html#cdata-section-state
5707 tok_state_cdata_section = function () {
5709 tok_state = tok_state_data
5710 next_gt = txt.indexOf(']]>', cur)
5711 if (next_gt === -1) {
5712 val = txt.substr(cur)
5715 val = txt.substr(cur, next_gt - cur)
5718 val = val.replace(new RegExp("\u0000", 'g'), "\ufffd")
5719 if (val.length > 0) {
5720 return new_character_token(val) // fixfull split
5725 // 8.2.4.69 http://www.w3.org/TR/html5/syntax.html#consume-a-character-reference
5726 // Don't set this as a state, just call it
5727 // returns a string (NOT a text node)
5728 parse_character_reference = function (allowed_char, in_attr) {
5729 var base, c, charset, code_point, decoded, i, max, start
5730 if (allowed_char == null) {
5733 if (in_attr == null) {
5736 if (cur >= txt.length) {
5739 switch (c = txt.charAt(cur)) {
5748 // explicitly not a parse error
5752 // there has to be "one or more" alnums between & and ; to be a parse error
5756 if (cur + 1 >= txt.length) {
5759 if (txt.charAt(cur + 1).toLowerCase() === 'x') {
5769 while (start + i < txt.length && charset.indexOf(txt.charAt(start + i)) > -1) {
5776 if (txt.charAt(start + i) === ';') {
5781 code_point = txt.substr(start, i)
5782 while (code_point.charAt(0) === '0' && code_point.length > 1) {
5783 code_point = code_point.substr(1)
5785 code_point = parseInt(code_point, base)
5786 if (unicode_fixes[code_point] != null) {
5788 return unicode_fixes[code_point]
5790 if ((code_point >= 0xd800 && code_point <= 0xdfff) || code_point > 0x10ffff) {
5794 if ((code_point >= 0x0001 && code_point <= 0x0008) || (code_point >= 0x000D && code_point <= 0x001F) || (code_point >= 0x007F && code_point <= 0x009F) || (code_point >= 0xFDD0 && code_point <= 0xFDEF) || code_point === 0x000B || code_point === 0xFFFE || code_point === 0xFFFF || code_point === 0x1FFFE || code_point === 0x1FFFF || code_point === 0x2FFFE || code_point === 0x2FFFF || code_point === 0x3FFFE || code_point === 0x3FFFF || code_point === 0x4FFFE || code_point === 0x4FFFF || code_point === 0x5FFFE || code_point === 0x5FFFF || code_point === 0x6FFFE || code_point === 0x6FFFF || code_point === 0x7FFFE || code_point === 0x7FFFF || code_point === 0x8FFFE || code_point === 0x8FFFF || code_point === 0x9FFFE || code_point === 0x9FFFF || code_point === 0xAFFFE || code_point === 0xAFFFF || code_point === 0xBFFFE || code_point === 0xBFFFF || code_point === 0xCFFFE || code_point === 0xCFFFF || code_point === 0xDFFFE || code_point === 0xDFFFF || code_point === 0xEFFFE || code_point === 0xEFFFF || code_point === 0xFFFFE || code_point === 0xFFFFF || code_point === 0x10FFFE || code_point === 0x10FFFF) {
5797 return from_code_point(code_point)
5803 for (i = 0; i < 31; ++i) {
5804 if (alnum.indexOf(txt.charAt(cur + i)) === -1) {
5809 // exit early, because parse_error() below needs at least one alnum
5812 if (txt.charAt(cur + i) === ';') {
5813 decoded = decode_named_char_ref(txt.substr(cur, i))
5814 i += 1 // scan past the ';' (after, so we dno't pass it to decode)
5815 if (decoded != null) {
5819 // else FALL THROUGH (check for match without last char(s) or ";")
5821 // no ';' terminator (only legacy char refs)
5823 for (i = 2; i <= max; ++i) { // no prefix matches, so ok to check shortest first
5824 c = legacy_char_refs[txt.substr(cur, i)]
5827 if (txt.charAt(cur + i) === '=') {
5828 // "because some legacy user agents will
5829 // misinterpret the markup in those cases"
5833 if (alnum.indexOf(txt.charAt(cur + i)) > -1) {
5834 // this makes attributes forgiving about url args
5838 // ok, and besides the weird exceptions for attributes...
5839 // return the matching char
5840 cur += i // consume entity chars
5841 parse_error() // because no terminating ";"
5851 eat_next_token_if_newline = function () {
5858 if (t.type === TYPE_TEXT) {
5859 // definition of a newline depends on whether it was a character ref or not
5860 if (cur - old_cur === 1) {
5861 // not a character reference
5862 if (t.text === "\u000d" || t.text === "\u000a") {
5866 if (t.text === "\u000a") {
5875 // tree constructor initialization
5876 // see comments on TYPE_TAG/etc for the structure of this data
5879 doc = new Node(TYPE_TAG, {name: 'document', namespace: NS_HTML})
5880 doc.flag('quirks mode', QUIRKS_NO) // TODO bugreport spec for not specifying this
5881 fragment_root = null // fragment parsing algorithm returns children of this
5883 afe = [] // active formatting elements
5884 template_ins_modes = []
5885 ins_mode = ins_mode_initial
5886 original_ins_mode = ins_mode // TODO check spec
5887 flag_scripting = args.scripting != null ? args.scripting : true // TODO might need an extra flag to get <noscript> to parse correctly
5888 flag_frameset_ok = true
5890 flag_foster_parenting = false
5891 form_element_pointer = null
5892 temporary_buffer = null
5893 pending_table_character_tokens = []
5894 head_element_pointer = null
5895 flag_fragment_parsing = false
5896 context_element = null
5897 prev_node_id = 0 // just for debugging
5899 // tokenizer initialization
5900 tok_state = tok_state_data
5902 parse_init = function () {
5903 var el, f, ns, old_doc, t
5904 // fragment parsing (text arg)
5905 if (args.fragment != null) {
5906 // this handles the fragment from the tests in the format described here:
5907 // https://github.com/html5lib/html5lib-tests/blob/master/tree-construction/README.md
5910 if (f.substr(0, 5) === 'math ') {
5913 } else if (f.substr(0, 4) === 'svg ') {
5918 context_element = token_to_element(t, ns)
5919 context_element.document = new Node(TYPE_TAG, {name: 'document', namespace: NS_HTML})
5920 context_element.document.flag('quirks mode', QUIRKS_NO)
5922 // fragment parsing (Node arg)
5923 if (args.context != null) {
5924 context_element = args.context
5927 // http://www.w3.org/TR/html5/syntax.html#parsing-html-fragments
5928 // fragment parsing algorithm
5929 if (context_element != null) {
5930 flag_fragment_parsing = true
5931 doc = new Node(TYPE_TAG, {name: 'html', namespace: NS_HTML})
5932 // search up the tree from context, to try to find it's document,
5933 // because this file only puts a "document" property on the root
5936 el = context_element
5938 if (el.document != null) {
5939 old_doc = el.document
5949 doc.flag('quirks mode', old_doc.flag('quirks mode'))
5952 if (context_element.namespace === NS_HTML) {
5953 switch (context_element.name) {
5956 tok_state = tok_state_rcdata
5963 tok_state = tok_state_rawtext
5966 tok_state = tok_state_script_data
5969 if (flag_scripting) {
5970 tok_state = tok_state_rawtext
5974 tok_state = tok_state_plaintext
5977 fragment_root = new Node(TYPE_TAG, {name: 'html', namespace: NS_HTML})
5978 doc.children.push(fragment_root)
5979 fragment_root.document = doc
5980 open_els = [fragment_root]
5981 if (context_element.name === 'template' && context_element.namespace === NS_HTML) {
5982 template_ins_modes.unshift(ins_mode_in_template)
5984 // fixfull create token for context (it should have it's original one already)
5986 // set form_element pointer... in the foreign doc?!
5987 el = context_element
5989 if (el.name === 'form' && el.namespace === NS_HTML) {
5990 form_element_pointer = el
6001 // text pre-processing
6002 // FIXME check http://www.w3.org/TR/html5/syntax.html#preprocessing-the-input-stream
6003 txt = txt.replace(new RegExp("\r\n", 'g'), "\n") // fixfull spec doesn't say this
6004 txt = txt.replace(new RegExp("\r", 'g'), "\n") // fixfull spec doesn't say this
6007 // http://www.w3.org/TR/html5/syntax.html#tree-construction
6008 parse_main_loop = function () {
6010 while (flag_parsing) {
6014 // fixfull parse error if has self-closing flag, but it wasn't acknolwedged
6021 if (flag_fragment_parsing) {
6022 return fragment_root.children
6032 if (context === 'module') {
6033 module.exports = this_module
6035 window.peach_parser = this_module