1 // todo remove unused variables
2 // todo remove debug log, or make a way to access it
4 // Copyright 2015 Jason Woofenden
5 // This file implements an HTML5 parser
7 // This program is free software: you can redistribute it and/or modify it under
8 // the terms of the GNU Affero General Public License as published by the Free
9 // Software Foundation, either version 3 of the License, or (at your option) any
12 // This program is distributed in the hope that it will be useful, but WITHOUT
13 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
17 // You should have received a copy of the GNU Affero General Public License
18 // along with this program. If not, see <http://www.gnu.org/licenses/>.
21 // This file implements a thorough parser for html5, meant to be used by a
24 // The implementation is a pretty direct implementation of the parsing algorithm
27 // http://www.w3.org/TR/html5/syntax.html
29 // except for some places marked "WHATWG" that are implemented as described here:
31 // https://html.spec.whatwg.org/multipage/syntax.html
33 // This code passes all of the tests in the .dat files at:
35 // https://github.com/JasonWoof/html5lib-tests/tree/patch-1/tree-construction
38 //////////////////////////
39 // how to use this code //
40 //////////////////////////
42 // See README.md for how to run this file in the browser or in node.js.
44 // This file exports a single useful function: parse, and some constants
45 // (see the bottom of this file for those.)
49 // peach_parser.parse("<p><b>hi</p>")
51 // Or, if you don't want <html><head><body>/etc, do this:
53 // peach_parser.parse("<p><b>hi</p>", {fragment: "body"})
55 // return value is an array of Nodes, A Node contains:
56 // type: one of: "tag", "text", "comment", "doctype"
57 // text: contents for text/comment nodes
58 // attrs: object of attributes, eg {href: "#main"}
59 // children: array of Nodes
60 // namespace: one of: "html", "mathml", "svg"
61 // parent: another Node or null
63 // This code is a work in progress, eg try search this file for "fixfull",
67 // Notes: stacks/lists
69 // Jason was frequently confused by the terminology used to refer to different
70 // parts of the stacks and lists in the spec, so he made this chart to help keep
73 // stacks grow downward (current element is index=0)
75 // example: open_els = [a, b, c, d, e, f, g]
77 // "grows downwards" means it's visualized like this: (index: el "names")
79 // 6: g "start of the list", "topmost", "first"
81 // 4: e "previous" (to d), "above", "before"
82 // 3: d (previous/next are relative to this element)
83 // 2: c "next", "after", "lower", "below"
85 // 0: a "end of the list", "current node", "bottommost", "last"
88 var NS_HTML, NS_MATHML, NS_SVG, QUIRKS_LIMITED, QUIRKS_NO, QUIRKS_YES, TYPE_AAA_BOOKMARK, TYPE_AFE_MARKER, TYPE_COMMENT, TYPE_DOCTYPE, TYPE_END_TAG, TYPE_EOF, TYPE_START_TAG, TYPE_TAG, TYPE_TEXT, _decode_named_char_ref, adjust_foreign_attributes, adjust_mathml_attributes, adjust_svg_attributes, adp_els, alnum, context, debug_log, debug_log_each, debug_log_reset, decode_named_char_ref, decode_named_char_ref_cache, decode_named_char_ref_el, digits, el_is_special, el_is_special_not_adp, end_tag_implied, exports, foreign_attr_fixes, formatting_elements, foster_parenting_targets, from_code_point, g_debug_log, h_tags, hex_chars, is_html_integration, is_input_hidden_tok, is_lc_alpha, is_mathml_text_integration_point, is_space, is_space_tok, is_uc_alpha, lc_alpha, legacy_char_refs, mathml_elements, mathml_text_integration, new_aaa_bookmark, new_afe_marker, new_character_token, new_comment_token, new_doctype_token, new_element, new_end_tag, new_eof_token, new_open_tag, new_text_node, parse_html, prev_node_id, quirks_yes_pi_prefixes, space_chars, special_elements, svg_attribute_fixes, svg_elements, svg_name_fixes, tag_name_chars, uc_alpha, unicode_fixes, whitespace_chars
90 if ((typeof module) !== 'undefined' && (module.exports != null)) {
94 window.peach_parser = {}
97 from_code_point = function (x) {
98 if (String.fromCodePoint != null) {
99 return String.fromCodePoint(x)
102 return String.fromCharCode(x)
105 return String.fromCharCode((x >> 10) + 0xd800, (x % 0x400) + 0xdc00)
109 // Each node is an obect of the Node class. Here are the Node types:
110 TYPE_TAG = 'tag' // name, {attributes}, [children]
111 TYPE_TEXT = 'text' // "text"
112 TYPE_COMMENT = 'comment'
113 TYPE_DOCTYPE = 'doctype'
114 // the following types are emited by the tokenizer, but shouldn't end up in the tree:
115 TYPE_START_TAG = 4 // name, [attributes ([key,value]...) in reverse order], [children]
116 TYPE_END_TAG = 5 // name
118 TYPE_AFE_MARKER = 7 // http://www.w3.org/TR/html5/syntax.html#reconstruct-the-active-formatting-elements
119 TYPE_AAA_BOOKMARK = 8 // http://www.w3.org/TR/html5/syntax.html#adoption-agency-algorithm
121 // namespace constants
126 // quirks mode constants
128 QUIRKS_LIMITED = 'limited'
131 // queue up debug logs, so eg they can be shown only for tests that fail
133 debug_log_reset = function () {
136 debug_log = function (str) {
137 g_debug_log.push(str)
139 debug_log_each = function (cb) {
141 for (i = 0; i < g_debug_log.length; ++i) {
147 function Node (type, args) {
151 this.type = type // one of the TYPE_* constants above
152 this.name = args.name != null ? args.name : '' // tag name
153 this.text = args.text != null ? args.text : '' // contents for text/comment nodes
154 this.attrs = args.attrs != null ? args.attrs : {}
155 this.children = args.children != null ? args.children : []
156 this.namespace = args.namespace != null ? args.namespace : NS_HTML
157 this.parent = args.parent != null ? args.parent : null
159 this.attrs_a = args.attr_k != null ? args.attr_k : [] // attrs in progress, TYPE_START_TAG only
160 this.token = args.token != null ? args.token : null
161 this.flags = args.flags != null ? args.flags : {}
162 if (args.id != null) {
163 this.id = args.id + "+"
165 this.id = "" + (++prev_node_id)
169 Node.prototype.acknowledge_self_closing = function () {
170 if (this.token != null) {
171 this.token.flag('did_self_close', true)
173 this.flag('did_self_close', true)
177 Node.prototype.flag = function (key, value) {
179 this.flags[key] = value
181 return this.flags[key]
185 // helpers: (only take args that are normally known when parser creates nodes)
186 new_open_tag = function (name) {
187 return new Node(TYPE_START_TAG, {name: name})
189 new_end_tag = function (name) {
190 return new Node(TYPE_END_TAG, {name: name})
192 new_element = function (name) {
193 return new Node(TYPE_TAG, {name: name})
195 new_text_node = function (txt) {
196 return new Node(TYPE_TEXT, {text: txt})
198 new_character_token = new_text_node
199 new_comment_token = function (txt) {
200 return new Node(TYPE_COMMENT, {text: txt})
202 new_doctype_token = function (name) {
203 return new Node(TYPE_DOCTYPE, {name: name})
205 new_eof_token = function () {
206 return new Node(TYPE_EOF)
208 new_afe_marker = function () {
209 return new Node(TYPE_AFE_MARKER)
211 new_aaa_bookmark = function () {
212 return new Node(TYPE_AAA_BOOKMARK)
215 lc_alpha = "abcdefghijklmnopqrstuvwxyz"
216 uc_alpha = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
217 digits = "0123456789"
218 alnum = lc_alpha + uc_alpha + digits
219 hex_chars = digits + "abcdefABCDEF"
221 is_uc_alpha = function (str) {
222 return str.length === 1 && uc_alpha.indexOf(str) > -1
224 is_lc_alpha = function (str) {
225 return str.length === 1 && lc_alpha.indexOf(str) > -1
228 // some SVG elements have dashes in them
229 tag_name_chars = alnum + "-"
231 // http://www.w3.org/TR/html5/infrastructure.html#space-character
232 space_chars = "\u0009\u000a\u000c\u000d\u0020"
233 is_space = function (txt) {
234 return txt.length === 1 && space_chars.indexOf(txt) > -1
236 is_space_tok = function (t) {
237 return t.type === TYPE_TEXT && t.text.length === 1 && space_chars.indexOf(t.text) > -1
240 is_input_hidden_tok = function (t) {
242 if (t.type !== TYPE_START_TAG) {
245 for (i = 0; i < t.attrs_a.length; ++i) {
247 if (a[0] === 'type') {
248 if (a[1].toLowerCase() === 'hidden') {
257 // https://en.wikipedia.org/wiki/Whitespace_character#Unicode
258 whitespace_chars = "\u0009\u000a\u000b\u000c\u000d\u0020\u0085\u00a0\u1680\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200a\u2028\u2029\u202f\u205f\u3000"
261 unicode_fixes[0x00] = "\uFFFD"
262 unicode_fixes[0x80] = "\u20AC"
263 unicode_fixes[0x82] = "\u201A"
264 unicode_fixes[0x83] = "\u0192"
265 unicode_fixes[0x84] = "\u201E"
266 unicode_fixes[0x85] = "\u2026"
267 unicode_fixes[0x86] = "\u2020"
268 unicode_fixes[0x87] = "\u2021"
269 unicode_fixes[0x88] = "\u02C6"
270 unicode_fixes[0x89] = "\u2030"
271 unicode_fixes[0x8A] = "\u0160"
272 unicode_fixes[0x8B] = "\u2039"
273 unicode_fixes[0x8C] = "\u0152"
274 unicode_fixes[0x8E] = "\u017D"
275 unicode_fixes[0x91] = "\u2018"
276 unicode_fixes[0x92] = "\u2019"
277 unicode_fixes[0x93] = "\u201C"
278 unicode_fixes[0x94] = "\u201D"
279 unicode_fixes[0x95] = "\u2022"
280 unicode_fixes[0x96] = "\u2013"
281 unicode_fixes[0x97] = "\u2014"
282 unicode_fixes[0x98] = "\u02DC"
283 unicode_fixes[0x99] = "\u2122"
284 unicode_fixes[0x9A] = "\u0161"
285 unicode_fixes[0x9B] = "\u203A"
286 unicode_fixes[0x9C] = "\u0153"
287 unicode_fixes[0x9E] = "\u017E"
288 unicode_fixes[0x9F] = "\u0178"
290 quirks_yes_pi_prefixes = [
291 "+//silmaril//dtd html pro v0r11 19970101//",
292 "-//as//dtd html 3.0 aswedit + extensions//",
293 "-//advasoft ltd//dtd html 3.0 aswedit + extensions//",
294 "-//ietf//dtd html 2.0 level 1//",
295 "-//ietf//dtd html 2.0 level 2//",
296 "-//ietf//dtd html 2.0 strict level 1//",
297 "-//ietf//dtd html 2.0 strict level 2//",
298 "-//ietf//dtd html 2.0 strict//",
299 "-//ietf//dtd html 2.0//",
300 "-//ietf//dtd html 2.1e//",
301 "-//ietf//dtd html 3.0//",
302 "-//ietf//dtd html 3.2 final//",
303 "-//ietf//dtd html 3.2//",
304 "-//ietf//dtd html 3//",
305 "-//ietf//dtd html level 0//",
306 "-//ietf//dtd html level 1//",
307 "-//ietf//dtd html level 2//",
308 "-//ietf//dtd html level 3//",
309 "-//ietf//dtd html strict level 0//",
310 "-//ietf//dtd html strict level 1//",
311 "-//ietf//dtd html strict level 2//",
312 "-//ietf//dtd html strict level 3//",
313 "-//ietf//dtd html strict//",
314 "-//ietf//dtd html//",
315 "-//metrius//dtd metrius presentational//",
316 "-//microsoft//dtd internet explorer 2.0 html strict//",
317 "-//microsoft//dtd internet explorer 2.0 html//",
318 "-//microsoft//dtd internet explorer 2.0 tables//",
319 "-//microsoft//dtd internet explorer 3.0 html strict//",
320 "-//microsoft//dtd internet explorer 3.0 html//",
321 "-//microsoft//dtd internet explorer 3.0 tables//",
322 "-//netscape comm. corp.//dtd html//",
323 "-//netscape comm. corp.//dtd strict html//",
324 "-//o'reilly and associates//dtd html 2.0//",
325 "-//o'reilly and associates//dtd html extended 1.0//",
326 "-//o'reilly and associates//dtd html extended relaxed 1.0//",
327 "-//sq//dtd html 2.0 hotmetal + extensions//",
328 "-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//",
329 "-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//",
330 "-//spyglass//dtd html 2.0 extended//",
331 "-//sun microsystems corp.//dtd hotjava html//",
332 "-//sun microsystems corp.//dtd hotjava strict html//",
333 "-//w3c//dtd html 3 1995-03-24//",
334 "-//w3c//dtd html 3.2 draft//",
335 "-//w3c//dtd html 3.2 final//",
336 "-//w3c//dtd html 3.2//",
337 "-//w3c//dtd html 3.2s draft//",
338 "-//w3c//dtd html 4.0 frameset//",
339 "-//w3c//dtd html 4.0 transitional//",
340 "-//w3c//dtd html experimental 19960712//",
341 "-//w3c//dtd html experimental 970421//",
342 "-//w3c//dtd w3 html//",
343 "-//w3o//dtd w3 html 3.0//",
344 "-//webtechs//dtd mozilla html 2.0//",
345 "-//webtechs//dtd mozilla html//",
348 // These are the character references that don't need a terminating semicolon
349 // min length: 2, max: 6, none are a prefix of any other.
351 Aacute: 'Á', aacute: 'á', Acirc: 'Â', acirc: 'â', acute: '´', AElig: 'Æ',
352 aelig: 'æ', Agrave: 'À', agrave: 'à', AMP: '&', amp: '&', Aring: 'Å',
353 aring: 'å', Atilde: 'Ã', atilde: 'ã', Auml: 'Ä', auml: 'ä', brvbar: '¦',
354 Ccedil: 'Ç', ccedil: 'ç', cedil: '¸', cent: '¢', COPY: '©', copy: '©',
355 curren: '¤', deg: '°', divide: '÷', Eacute: 'É', eacute: 'é', Ecirc: 'Ê',
356 ecirc: 'ê', Egrave: 'È', egrave: 'è', ETH: 'Ð', eth: 'ð', Euml: 'Ë',
357 euml: 'ë', frac12: '½', frac14: '¼', frac34: '¾', GT: '>', gt: '>',
358 Iacute: 'Í', iacute: 'í', Icirc: 'Î', icirc: 'î', iexcl: '¡', Igrave: 'Ì',
359 igrave: 'ì', iquest: '¿', Iuml: 'Ï', iuml: 'ï', laquo: '«', LT: '<',
360 lt: '<', macr: '¯', micro: 'µ', middot: '·', nbsp: "\u00a0", not: '¬',
361 Ntilde: 'Ñ', ntilde: 'ñ', Oacute: 'Ó', oacute: 'ó', Ocirc: 'Ô', ocirc: 'ô',
362 Ograve: 'Ò', ograve: 'ò', ordf: 'ª', ordm: 'º', Oslash: 'Ø', oslash: 'ø',
363 Otilde: 'Õ', otilde: 'õ', Ouml: 'Ö', ouml: 'ö', para: '¶', plusmn: '±',
364 pound: '£', QUOT: '"', quot: '"', raquo: '»', REG: '®', reg: '®', sect: '§',
365 shy: '', sup1: '¹', sup2: '²', sup3: '³', szlig: 'ß', THORN: 'Þ', thorn: 'þ',
366 times: '×', Uacute: 'Ú', uacute: 'ú', Ucirc: 'Û', ucirc: 'û', Ugrave: 'Ù',
367 ugrave: 'ù', uml: '¨', Uuml: 'Ü', uuml: 'ü', Yacute: 'Ý', yacute: 'ý',
371 //void_elements = ['area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input', 'keygen', 'link', 'meta', 'param', 'source', 'track', 'wbr']
372 //raw_text_elements = ['script', 'style']
373 //escapable_raw_text_elements = ['textarea', 'title']
374 // http://www.w3.org/TR/SVG/ 1.1 (Second Edition)
376 'a', 'altGlyph', 'altGlyphDef', 'altGlyphItem', 'animate', 'animateColor',
377 'animateMotion', 'animateTransform', 'circle', 'clipPath', 'color-profile',
378 'cursor', 'defs', 'desc', 'ellipse', 'feBlend', 'feColorMatrix',
379 'feComponentTransfer', 'feComposite', 'feConvolveMatrix',
380 'feDiffuseLighting', 'feDisplacementMap', 'feDistantLight', 'feFlood',
381 'feFuncA', 'feFuncB', 'feFuncG', 'feFuncR', 'feGaussianBlur', 'feImage',
382 'feMerge', 'feMergeNode', 'feMorphology', 'feOffset', 'fePointLight',
383 'feSpecularLighting', 'feSpotLight', 'feTile', 'feTurbulence', 'filter',
384 'font', 'font-face', 'font-face-format', 'font-face-name', 'font-face-src',
385 'font-face-uri', 'foreignObject', 'g', 'glyph', 'glyphRef', 'hkern',
386 'image', 'line', 'linearGradient', 'marker', 'mask', 'metadata',
387 'missing-glyph', 'mpath', 'path', 'pattern', 'polygon', 'polyline',
388 'radialGradient', 'rect', 'script', 'set', 'stop', 'style', 'svg',
389 'switch', 'symbol', 'text', 'textPath', 'title', 'tref', 'tspan', 'use',
393 // http://www.w3.org/TR/MathML/ Version 3.0 2nd Edition
395 'abs', 'and', 'annotation', 'annotation-xml', 'apply', 'approx', 'arccos',
396 'arccosh', 'arccot', 'arccoth', 'arccsc', 'arccsch', 'arcsec', 'arcsech',
397 'arcsin', 'arcsinh', 'arctan', 'arctanh', 'arg', 'bind', 'bvar', 'card',
398 'cartesianproduct', 'cbytes', 'ceiling', 'cerror', 'ci', 'cn', 'codomain',
399 'complexes', 'compose', 'condition', 'conjugate', 'cos', 'cosh', 'cot',
400 'coth', 'cs', 'csc', 'csch', 'csymbol', 'curl', 'declare', 'degree',
401 'determinant', 'diff', 'divergence', 'divide', 'domain',
402 'domainofapplication', 'emptyset', 'eq', 'equivalent', 'eulergamma',
403 'exists', 'exp', 'exponentiale', 'factorial', 'factorof', 'false', 'floor',
404 'fn', 'forall', 'gcd', 'geq', 'grad', 'gt', 'ident', 'image', 'imaginary',
405 'imaginaryi', 'implies', 'in', 'infinity', 'int', 'integers', 'intersect',
406 'interval', 'inverse', 'lambda', 'laplacian', 'lcm', 'leq', 'limit',
407 'list', 'ln', 'log', 'logbase', 'lowlimit', 'lt', 'maction', 'maligngroup',
408 'malignmark', 'math', 'matrix', 'matrixrow', 'max', 'mean', 'median',
409 'menclose', 'merror', 'mfenced', 'mfrac', 'mglyph', 'mi', 'mi', 'min',
410 'minus', 'mlabeledtr', 'mlongdiv', 'mmultiscripts', 'mn', 'mo', 'mode',
411 'moment', 'momentabout', 'mover', 'mpadded', 'mphantom', 'mprescripts',
412 'mroot', 'mrow', 'ms', 'mscarries', 'mscarry', 'msgroup', 'msline',
413 'mspace', 'msqrt', 'msrow', 'mstack', 'mstyle', 'msub', 'msubsup', 'msup',
414 'mtable', 'mtd', 'mtext', 'mtr', 'munder', 'munderover', 'naturalnumbers',
415 'neq', 'none', 'not', 'notanumber', 'notin', 'notprsubset', 'notsubset',
416 'or', 'otherwise', 'outerproduct', 'partialdiff', 'pi', 'piece',
417 'piecewise', 'plus', 'power', 'primes', 'product', 'prsubset', 'quotient',
418 'rationals', 'real', 'reals', 'reln', 'rem', 'root', 'scalarproduct',
419 'sdev', 'sec', 'sech', 'selector', 'semantics', 'sep', 'set', 'setdiff',
420 'share', 'sin', 'sinh', 'span', 'subset', 'sum', 'tan', 'tanh', 'tendsto',
421 'times', 'transpose', 'true', 'union', 'uplimit', 'variance', 'vector',
422 'vectorproduct', 'xor'
424 // foreign_elements = [svg_elements..., mathml_elements...]
425 //normal_elements = All other allowed HTML elements are normal elements.
429 address: NS_HTML, applet: NS_HTML, area: NS_HTML, article: NS_HTML,
430 aside: NS_HTML, base: NS_HTML, basefont: NS_HTML, bgsound: NS_HTML,
431 blockquote: NS_HTML, body: NS_HTML, br: NS_HTML, button: NS_HTML,
432 caption: NS_HTML, center: NS_HTML, col: NS_HTML, colgroup: NS_HTML, dd: NS_HTML,
433 details: NS_HTML, dir: NS_HTML, div: NS_HTML, dl: NS_HTML, dt: NS_HTML,
434 embed: NS_HTML, fieldset: NS_HTML, figcaption: NS_HTML, figure: NS_HTML,
435 footer: NS_HTML, form: NS_HTML, frame: NS_HTML, frameset: NS_HTML, h1: NS_HTML,
436 h2: NS_HTML, h3: NS_HTML, h4: NS_HTML, h5: NS_HTML, h6: NS_HTML, head: NS_HTML,
437 header: NS_HTML, hgroup: NS_HTML, hr: NS_HTML, html: NS_HTML, iframe: NS_HTML,
438 img: NS_HTML, input: NS_HTML, isindex: NS_HTML, li: NS_HTML, link: NS_HTML,
439 listing: NS_HTML, main: NS_HTML, marquee: NS_HTML,
441 menu: NS_HTML,menuitem: NS_HTML, // WHATWG adds these
443 meta: NS_HTML, nav: NS_HTML, noembed: NS_HTML, noframes: NS_HTML,
444 noscript: NS_HTML, object: NS_HTML, ol: NS_HTML, p: NS_HTML, param: NS_HTML,
445 plaintext: NS_HTML, pre: NS_HTML, script: NS_HTML, section: NS_HTML,
446 select: NS_HTML, source: NS_HTML, style: NS_HTML, summary: NS_HTML,
447 table: NS_HTML, tbody: NS_HTML, td: NS_HTML, template: NS_HTML,
448 textarea: NS_HTML, tfoot: NS_HTML, th: NS_HTML, thead: NS_HTML, title: NS_HTML,
449 tr: NS_HTML, track: NS_HTML, ul: NS_HTML, wbr: NS_HTML, xmp: NS_HTML,
452 mi: NS_MATHML, mo: NS_MATHML, mn: NS_MATHML, ms: NS_MATHML, mtext: NS_MATHML,
453 'annotation-xml': NS_MATHML,
456 foreignObject: NS_SVG, desc: NS_SVG, title: NS_SVG
459 formatting_elements = {
460 a: true, b: true, big: true, code: true, em: true, font: true, i: true,
461 nobr: true, s: true, small: true, strike: true, strong: true, tt: true,
465 mathml_text_integration = {
466 mi: NS_MATHML, mo: NS_MATHML, mn: NS_MATHML, ms: NS_MATHML, mtext: NS_MATHML
468 is_mathml_text_integration_point = function (el) {
469 return mathml_text_integration[el.name] === el.namespace
471 is_html_integration = function (el) { // DON'T PASS A TOKEN
472 if (el.namespace === NS_MATHML) {
473 if (el.name === 'annotation-xml') {
474 if (el.attrs.encoding != null) {
475 if (el.attrs.encoding.toLowerCase() === 'text/html') {
478 if (el.attrs.encoding.toLowerCase() === 'application/xhtml+xml') {
485 if (el.namespace === NS_SVG) {
486 if (el.name === 'foreignObject' || el.name === 'desc' || el.name === 'title') {
494 h1: NS_HTML, h2: NS_HTML, h3: NS_HTML, h4: NS_HTML, h5: NS_HTML, h6: NS_HTML
497 foster_parenting_targets = {
518 el_is_special = function (e) {
519 return special_elements[e.name] === e.namespace
522 adp_els = { address: NS_HTML, div: NS_HTML, p: NS_HTML }
523 el_is_special_not_adp = function (el) {
524 return special_elements[el.name] === el.namespace && adp_els[el.name] !== el.namespace
528 altglyph: 'altGlyph',
529 altglyphdef: 'altGlyphDef',
530 altglyphitem: 'altGlyphItem',
531 animatecolor: 'animateColor',
532 animatemotion: 'animateMotion',
533 animatetransform: 'animateTransform',
534 clippath: 'clipPath',
536 fecolormatrix: 'feColorMatrix',
537 fecomponenttransfer: 'feComponentTransfer',
538 fecomposite: 'feComposite',
539 feconvolvematrix: 'feConvolveMatrix',
540 fediffuselighting: 'feDiffuseLighting',
541 fedisplacementmap: 'feDisplacementMap',
542 fedistantlight: 'feDistantLight',
543 fedropshadow: 'feDropShadow',
549 fegaussianblur: 'feGaussianBlur',
552 femergenode: 'feMergeNode',
553 femorphology: 'feMorphology',
554 feoffset: 'feOffset',
555 fepointlight: 'fePointLight',
556 fespecularlighting: 'feSpecularLighting',
557 fespotlight: 'feSpotLight',
559 feturbulence: 'feTurbulence',
560 foreignobject: 'foreignObject',
561 glyphref: 'glyphRef',
562 lineargradient: 'linearGradient',
563 radialgradient: 'radialGradient',
566 svg_attribute_fixes = {
567 attributename: 'attributeName',
568 attributetype: 'attributeType',
569 basefrequency: 'baseFrequency',
570 baseprofile: 'baseProfile',
571 calcmode: 'calcMode',
572 clippathunits: 'clipPathUnits',
573 contentscripttype: 'contentScriptType',
574 contentstyletype: 'contentStyleType',
575 diffuseconstant: 'diffuseConstant',
576 edgemode: 'edgeMode',
577 externalresourcesrequired: 'externalResourcesRequired',
578 // WHATWG removes this: filterres: 'filterRes',
579 filterunits: 'filterUnits',
580 glyphref: 'glyphRef',
581 gradienttransform: 'gradientTransform',
582 gradientunits: 'gradientUnits',
583 kernelmatrix: 'kernelMatrix',
584 kernelunitlength: 'kernelUnitLength',
585 keypoints: 'keyPoints',
586 keysplines: 'keySplines',
587 keytimes: 'keyTimes',
588 lengthadjust: 'lengthAdjust',
589 limitingconeangle: 'limitingConeAngle',
590 markerheight: 'markerHeight',
591 markerunits: 'markerUnits',
592 markerwidth: 'markerWidth',
593 maskcontentunits: 'maskContentUnits',
594 maskunits: 'maskUnits',
595 numoctaves: 'numOctaves',
596 pathlength: 'pathLength',
597 patterncontentunits: 'patternContentUnits',
598 patterntransform: 'patternTransform',
599 patternunits: 'patternUnits',
600 pointsatx: 'pointsAtX',
601 pointsaty: 'pointsAtY',
602 pointsatz: 'pointsAtZ',
603 preservealpha: 'preserveAlpha',
604 preserveaspectratio: 'preserveAspectRatio',
605 primitiveunits: 'primitiveUnits',
608 repeatcount: 'repeatCount',
609 repeatdur: 'repeatDur',
610 requiredextensions: 'requiredExtensions',
611 requiredfeatures: 'requiredFeatures',
612 specularconstant: 'specularConstant',
613 specularexponent: 'specularExponent',
614 spreadmethod: 'spreadMethod',
615 startoffset: 'startOffset',
616 stddeviation: 'stdDeviation',
617 stitchtiles: 'stitchTiles',
618 surfacescale: 'surfaceScale',
619 systemlanguage: 'systemLanguage',
620 tablevalues: 'tableValues',
623 textlength: 'textLength',
625 viewtarget: 'viewTarget',
626 xchannelselector: 'xChannelSelector',
627 ychannelselector: 'yChannelSelector',
628 zoomandpan: 'zoomAndPan'
630 foreign_attr_fixes = {
631 'xlink:actuate': 'xlink actuate',
632 'xlink:arcrole': 'xlink arcrole',
633 'xlink:href': 'xlink href',
634 'xlink:role': 'xlink role',
635 'xlink:show': 'xlink show',
636 'xlink:title': 'xlink title',
637 'xlink:type': 'xlink type',
638 'xml:base': 'xml base',
639 'xml:lang': 'xml lang',
640 'xml:space': 'xml space',
642 'xmlns:xlink': 'xmlns xlink'
644 adjust_mathml_attributes = function (t) {
646 for (i = 0; i < t.attrs_a.length; ++i) {
648 if (a[0] === 'definitionurl') {
649 a[0] = 'definitionURL'
653 adjust_svg_attributes = function (t) {
655 for (i = 0; i < t.attrs_a.length; ++i) {
657 if (svg_attribute_fixes[a[0]] != null) {
658 a[0] = svg_attribute_fixes[a[0]]
662 adjust_foreign_attributes = function (t) {
665 for (i = 0; i < t.attrs_a.length; ++i) {
667 if (foreign_attr_fixes[a[0]] != null) {
668 a[0] = foreign_attr_fixes[a[0]]
673 // decode_named_char_ref()
675 // The list of named character references is _huge_ so if we're running in a
676 // browser, we get the browser to decode them, rather than increasing the code
677 // size to include the table.
678 if (context === 'module') {
679 _decode_named_char_ref = require('./parser_no_browser_helper.js')
681 decode_named_char_ref_el = document.createElement('textarea')
682 _decode_named_char_ref = function (txt) {
684 txt = "&" + txt + ";"
685 decode_named_char_ref_el.innerHTML = txt
686 decoded = decode_named_char_ref_el.value
687 if (decoded === txt) {
693 // Pass the name of a named entity _that has a terminating semicolon_
694 // Entities without terminating semicolons should use legacy_char_refs[]
695 // Do not include the "&" or ";" in your argument, eg pass "alpha"
696 decode_named_char_ref_cache = {}
697 decode_named_char_ref = function (txt) {
699 decoded = decode_named_char_ref_cache[txt]
700 if (decoded != null) {
703 decoded = _decode_named_char_ref(txt)
704 return decode_named_char_ref_cache[txt] = decoded
707 parse_html = function (args_html, args) {
708 var adjusted_current_node, adjusted_insertion_location, adoption_agency, afe, afe_push, afe_push_marker, button_scopers, clear_afe_to_marker, clear_stack_to_table_body_context, clear_stack_to_table_context, clear_stack_to_table_row_context, clear_to_table_body_stopers, clear_to_table_row_stopers, clear_to_table_stopers, close_p_element, close_p_if_in_button_scope, close_the_cell, context_element, cur, doc, eat_next_token_if_newline, el_is_in_scope, flag_foster_parenting, flag_fragment_parsing, flag_frameset_ok, flag_parsing, flag_scripting, form_element_pointer, fragment_root, generate_implied_end_tags, has_color_face_or_size, head_element_pointer, in_body_any_other_end_tag, in_foreign_content, in_foreign_content_end_script, in_foreign_content_other_start, ins_mode, ins_mode_after_after_body, ins_mode_after_after_frameset, ins_mode_after_body, ins_mode_after_frameset, ins_mode_after_head, ins_mode_after_head_else, ins_mode_before_head, ins_mode_before_html, ins_mode_in_body, ins_mode_in_caption, ins_mode_in_cell, ins_mode_in_column_group, ins_mode_in_frameset, ins_mode_in_head, ins_mode_in_head_else, ins_mode_in_head_noscript, ins_mode_in_head_noscript_else, ins_mode_in_row, ins_mode_in_select, ins_mode_in_select_in_table, ins_mode_in_table, ins_mode_in_table_body, ins_mode_in_table_else, ins_mode_in_table_text, ins_mode_in_template, ins_mode_initial, ins_mode_text, insert_character, insert_comment, insert_foreign_element, insert_html_element, is_appropriate_end_tag, is_in_button_scope, is_in_li_scope, is_in_scope, is_in_scope_x, is_in_scope_x_y, is_in_select_scope, is_in_table_scope, is_quirks_limited_doctype, is_quirks_yes_doctype, li_scopers, open_els, original_ins_mode, parse_character_reference, parse_error, parse_generic_raw_text, parse_generic_rcdata_text, parse_init, parse_main_loop, pending_table_character_tokens, process_token, reconstruct_afe, reset_ins_mode, standard_scopers, stop_parsing, table_scopers, template_ins_modes, template_tag_is_open, temporary_buffer, tok_cur_tag, tok_state, tok_state_after_attribute_name, tok_state_after_attribute_value_quoted, tok_state_after_doctype_name, tok_state_after_doctype_public_identifier, tok_state_after_doctype_public_keyword, tok_state_after_doctype_system_identifier, tok_state_after_doctype_system_keyword, tok_state_attribute_name, tok_state_attribute_value_double_quoted, tok_state_attribute_value_single_quoted, tok_state_attribute_value_unquoted, tok_state_before_attribute_name, tok_state_before_attribute_value, tok_state_before_doctype_name, tok_state_before_doctype_public_identifier, tok_state_before_doctype_system_identifier, tok_state_between_doctype_public_and_system_identifiers, tok_state_bogus_comment, tok_state_bogus_doctype, tok_state_cdata_section, tok_state_comment, tok_state_comment_end, tok_state_comment_end_bang, tok_state_comment_end_dash, tok_state_comment_start, tok_state_comment_start_dash, tok_state_data, tok_state_doctype, tok_state_doctype_name, tok_state_doctype_public_identifier_double_quoted, tok_state_doctype_public_identifier_single_quoted, tok_state_doctype_system_identifier_double_quoted, tok_state_doctype_system_identifier_single_quoted, tok_state_end_tag_open, tok_state_markup_declaration_open, tok_state_plaintext, tok_state_rawtext, tok_state_rawtext_end_tag_name, tok_state_rawtext_end_tag_open, tok_state_rawtext_less_than_sign, tok_state_rcdata, tok_state_rcdata_end_tag_name, tok_state_rcdata_end_tag_open, tok_state_rcdata_less_than_sign, tok_state_script_data, tok_state_script_data_double_escape_end, tok_state_script_data_double_escape_start, tok_state_script_data_double_escaped, tok_state_script_data_double_escaped_dash, tok_state_script_data_double_escaped_dash_dash, tok_state_script_data_double_escaped_less_than_sign, tok_state_script_data_end_tag_name, tok_state_script_data_end_tag_open, tok_state_script_data_escape_start, tok_state_script_data_escape_start_dash, tok_state_script_data_escaped, tok_state_script_data_escaped_dash, tok_state_script_data_escaped_dash_dash, tok_state_script_data_escaped_end_tag_name, tok_state_script_data_escaped_end_tag_open, tok_state_script_data_escaped_less_than_sign, tok_state_script_data_less_than_sign, tok_state_self_closing_start_tag, tok_state_tag_name, tok_state_tag_open, token_to_element, txt
713 cur = null // index of next char in txt to be parsed
714 // declare doc and tokenizer variables so they're in scope below
716 open_els = null // stack of open elements
717 afe = null // active formatting elements
718 template_ins_modes = null
720 original_ins_mode = null
722 tok_cur_tag = null // partially parsed tag
723 flag_scripting = null
724 flag_frameset_ok = null
726 flag_foster_parenting = null
727 form_element_pointer = null
728 temporary_buffer = null
729 pending_table_character_tokens = null
730 head_element_pointer = null
731 flag_fragment_parsing = null
732 context_element = null
734 stop_parsing = function () {
738 parse_error = function () {
739 if (args.error_cb != null) {
744 // http://www.w3.org/TR/html5/syntax.html#push-onto-the-list-of-active-formatting-elements
745 // "Noah's Ark clause" but with three
746 afe_push = function (new_el) {
747 var attrs_match, el, i, j, k, matches, v
749 for (i = 0; i < afe.length; ++i) {
751 if (el.type === TYPE_AFE_MARKER) {
754 if (el.name === new_el.name && el.namespace === new_el.namespace) {
756 for (k in el.attrs) {
758 if (new_el.attrs[k] !== v) {
764 for (k in new_el.attrs) {
766 if (el.attrs[k] !== v) {
784 afe_push_marker = function () {
785 afe.unshift(new_afe_marker())
788 // the functions below impliment the Tree Contstruction algorithm
789 // http://www.w3.org/TR/html5/syntax.html#tree-construction
791 // But first... the helpers
792 template_tag_is_open = function () {
794 for (i = 0; i < open_els.length; ++i) {
796 if (el.name === 'template' && el.namespace === NS_HTML) {
802 is_in_scope_x = function (tag_name, scope, namespace) {
804 for (i = 0; i < open_els.length; ++i) {
806 if (el.name === tag_name && (namespace === null || namespace === el.namespace)) {
809 if (scope[el.name] === el.namespace) {
815 is_in_scope_x_y = function (tag_name, scope, scope2, namespace) {
817 for (i = 0; i < open_els.length; ++i) {
819 if (el.name === tag_name && (namespace === null || namespace === el.namespace)) {
822 if (scope[el.name] === el.namespace) {
825 if (scope2[el.name] === el.namespace) {
832 applet: NS_HTML, caption: NS_HTML, html: NS_HTML, table: NS_HTML,
833 td: NS_HTML, th: NS_HTML, marquee: NS_HTML, object: NS_HTML,
836 mi: NS_MATHML, mo: NS_MATHML, mn: NS_MATHML, ms: NS_MATHML,
837 mtext: NS_MATHML, 'annotation-xml': NS_MATHML,
839 foreignObject: NS_SVG, desc: NS_SVG, title: NS_SVG
841 button_scopers = { button: NS_HTML }
842 li_scopers = { ol: NS_HTML, ul: NS_HTML }
843 table_scopers = { html: NS_HTML, table: NS_HTML, template: NS_HTML }
844 is_in_scope = function (tag_name, namespace) {
845 if (namespace == null) {
848 return is_in_scope_x(tag_name, standard_scopers, namespace)
850 is_in_button_scope = function (tag_name, namespace) {
851 if (namespace == null) {
854 return is_in_scope_x_y(tag_name, standard_scopers, button_scopers, namespace)
856 is_in_table_scope = function (tag_name, namespace) {
857 if (namespace == null) {
860 return is_in_scope_x(tag_name, table_scopers, namespace)
862 // aka is_in_list_item_scope
863 is_in_li_scope = function (tag_name, namespace) {
864 if (namespace == null) {
867 return is_in_scope_x_y(tag_name, standard_scopers, li_scopers, namespace)
869 is_in_select_scope = function (tag_name, namespace) {
871 if (namespace == null) {
874 for (i = 0; i < open_els.length; ++i) {
876 if (t.name === tag_name && (namespace === null || namespace === t.namespace)) {
879 if (t.namespace !== NS_HTML && t.name !== 'optgroup' && t.name !== 'option') {
885 // this checks for a particular element, not by name
886 // this requires a namespace match
887 el_is_in_scope = function (needle) {
889 for (i = 0; i < open_els.length; ++i) {
894 if (standard_scopers[el.name] === el.namespace) {
901 clear_to_table_stopers = {
906 clear_stack_to_table_context = function () {
908 if (clear_to_table_stopers[open_els[0].name] != null) {
914 clear_to_table_body_stopers = {
921 clear_stack_to_table_body_context = function () {
923 if (clear_to_table_body_stopers[open_els[0].name] === open_els[0].namespace) {
929 clear_to_table_row_stopers = {
934 clear_stack_to_table_row_context = function () {
936 if (clear_to_table_row_stopers[open_els[0].name] != null) {
942 clear_afe_to_marker = function () {
945 if (!(afe.length > 0)) { // this happens in fragment case, ?spec error
949 if (el.type === TYPE_AFE_MARKER) {
956 // http://www.w3.org/TR/html5/syntax.html#reset-the-insertion-mode-appropriately
957 reset_ins_mode = function () {
958 var ancestor, ancestor_i, last, node, node_i
959 // 1. Let last be false.
961 // 2. Let node be the last node in the stack of open elements.
963 node = open_els[node_i]
964 // 3. Loop: If node is the first node in the stack of open elements,
965 // then set last to true, and, if the parser was originally created as
966 // part of the HTML fragment parsing algorithm (fragment case) set node
967 // to the context element.
969 if (node_i === open_els.length - 1) {
971 if (flag_fragment_parsing) {
972 node = context_element
975 // 4. If node is a select element, run these substeps:
976 if (node.name === 'select' && node.namespace === NS_HTML) {
977 // 1. If last is true, jump to the step below labeled done.
979 // 2. Let ancestor be node.
982 // 3. Loop: If ancestor is the first node in the stack of
983 // open elements, jump to the step below labeled done.
985 if (ancestor_i === open_els.length - 1) {
988 // 4. Let ancestor be the node before ancestor in the stack
991 ancestor = open_els[ancestor_i]
992 // 5. If ancestor is a template node, jump to the step below
994 if (ancestor.name === 'template' && ancestor.namespace === NS_HTML) {
997 // 6. If ancestor is a table node, switch the insertion mode
998 // to "in select in table" and abort these steps.
999 if (ancestor.name === 'table' && ancestor.namespace === NS_HTML) {
1000 ins_mode = ins_mode_in_select_in_table
1003 // 7. Jump back to the step labeled loop.
1006 // 8. Done: Switch the insertion mode to "in select" and abort
1008 ins_mode = ins_mode_in_select
1011 // 5. If node is a td or th element and last is false, then switch
1012 // the insertion mode to "in cell" and abort these steps.
1013 if ((node.name === 'td' || node.name === 'th') && node.namespace === NS_HTML && last === false) {
1014 ins_mode = ins_mode_in_cell
1017 // 6. If node is a tr element, then switch the insertion mode to "in
1018 // row" and abort these steps.
1019 if (node.name === 'tr' && node.namespace === NS_HTML) {
1020 ins_mode = ins_mode_in_row
1023 // 7. If node is a tbody, thead, or tfoot element, then switch the
1024 // insertion mode to "in table body" and abort these steps.
1025 if ((node.name === 'tbody' || node.name === 'thead' || node.name === 'tfoot') && node.namespace === NS_HTML) {
1026 ins_mode = ins_mode_in_table_body
1029 // 8. If node is a caption element, then switch the insertion mode
1030 // to "in caption" and abort these steps.
1031 if (node.name === 'caption' && node.namespace === NS_HTML) {
1032 ins_mode = ins_mode_in_caption
1035 // 9. If node is a colgroup element, then switch the insertion mode
1036 // to "in column group" and abort these steps.
1037 if (node.name === 'colgroup' && node.namespace === NS_HTML) {
1038 ins_mode = ins_mode_in_column_group
1041 // 10. If node is a table element, then switch the insertion mode to
1042 // "in table" and abort these steps.
1043 if (node.name === 'table' && node.namespace === NS_HTML) {
1044 ins_mode = ins_mode_in_table
1047 // 11. If node is a template element, then switch the insertion mode
1048 // to the current template insertion mode and abort these steps.
1049 if (node.name === 'template' && node.namespace === NS_HTML) {
1050 ins_mode = template_ins_modes[0]
1053 // 12. If node is a head element and last is true, then switch the
1054 // insertion mode to "in body" ("in body"! not "in head"!) and abort
1055 // these steps. (fragment case)
1056 if (node.name === 'head' && node.namespace === NS_HTML && last) {
1057 ins_mode = ins_mode_in_body
1060 // 13. If node is a head element and last is false, then switch the
1061 // insertion mode to "in head" and abort these steps.
1062 if (node.name === 'head' && node.namespace === NS_HTML && last === false) {
1063 ins_mode = ins_mode_in_head
1066 // 14. If node is a body element, then switch the insertion mode to
1067 // "in body" and abort these steps.
1068 if (node.name === 'body' && node.namespace === NS_HTML) {
1069 ins_mode = ins_mode_in_body
1072 // 15. If node is a frameset element, then switch the insertion mode
1073 // to "in frameset" and abort these steps. (fragment case)
1074 if (node.name === 'frameset' && node.namespace === NS_HTML) {
1075 ins_mode = ins_mode_in_frameset
1078 // 16. If node is an html element, run these substeps:
1079 if (node.name === 'html' && node.namespace === NS_HTML) {
1080 // 1. If the head element pointer is null, switch the insertion
1081 // mode to "before head" and abort these steps. (fragment case)
1082 if (head_element_pointer === null) {
1083 ins_mode = ins_mode_before_head
1085 // 2. Otherwise, the head element pointer is not null,
1086 // switch the insertion mode to "after head" and abort these
1088 ins_mode = ins_mode_after_head
1092 // 17. If last is true, then switch the insertion mode to "in body"
1093 // and abort these steps. (fragment case)
1095 ins_mode = ins_mode_in_body
1098 // 18. Let node now be the node before node in the stack of open
1101 node = open_els[node_i]
1102 // 19. Return to the step labeled loop.
1108 // http://www.w3.org/TR/html5/syntax.html#adjusted-current-node
1109 adjusted_current_node = function () {
1110 if (open_els.length === 1 && flag_fragment_parsing) {
1111 return context_element
1116 // http://www.w3.org/TR/html5/syntax.html#reconstruct-the-active-formatting-elements
1117 // this implementation is structured (mostly) as described at the link above.
1118 // capitalized comments are the "labels" described at the link above.
1119 reconstruct_afe = function () {
1121 if (afe.length === 0) {
1124 if (afe[0].type === TYPE_AFE_MARKER || open_els.indexOf(afe[0]) >= 0) {
1130 if (i === afe.length - 1) {
1134 if (afe[i].type === TYPE_AFE_MARKER || open_els.indexOf(afe[i]) >= 0) {
1141 el = insert_html_element(afe[i].token)
1150 // http://www.w3.org/TR/html5/syntax.html#adoption-agency-algorithm
1151 // adoption agency algorithm
1153 // http://www.w3.org/TR/html5/syntax.html#misnested-tags:-b-i-/b-/i
1154 // http://www.w3.org/TR/html5/syntax.html#misnested-tags:-b-p-/b-/p
1155 // http://www.w3.org/TR/html5/syntax.html#unclosed-formatting-elements
1156 adoption_agency = function (subject) {
1157 var aa, ab, ac, ad, ae, af, bookmark, c, ca, dest, el, fb, fb_of_open_els, fe, fe_of_afe, fe_of_open_els, i, in_afe, in_open_els, inner, j, l, last_node, len, len1, len10, len11, len12, len13, len14, len15, len16, len17, len2, len3, len4, len5, len6, len7, len8, len9, m, n, new_node, node, node_above, node_in_afe, node_next, o, outer, q, r, s, t, u, w, y, z
1158 // this block implements tha W3C spec
1159 // # 1. If the current node is an HTML element whose tag name is subject,
1160 // # then run these substeps:
1162 // # 1. Let element be the current node.
1164 // # 2. Pop element off the stack of open elements.
1166 // # 3. If element is also in the list of active formatting elements,
1167 // # remove the element from the list.
1169 // # 4. Abort the adoption agency algorithm.
1170 // if open_els[0].name is subject and open_els[0].namespace is NS_HTML
1171 // el = open_els.shift()
1172 // # remove it from the list of active formatting elements (if found)
1178 // WHATWG: https://html.spec.whatwg.org/multipage/syntax.html#adoption-agency-algorithm
1179 // If the current node is an HTML element whose tag name is subject, and
1180 // the current node is not in the list of active formatting elements,
1181 // then pop the current node off the stack of open elements, and abort
1183 if (open_els[0].name === subject && open_els[0].namespace === NS_HTML) {
1184 // remove it from the list of active formatting elements (if found)
1186 for (i = 0; i < afe.length; ++i) {
1188 if (el === open_els[0]) {
1206 // 5. Let formatting element be the last element in the list of
1207 // active formatting elements that: is between the end of the list
1208 // and the last scope marker in the list, if any, or the start of
1209 // the list otherwise, and has the tag name subject.
1211 for (fe_of_afe = 0; fe_of_afe < afe.length; ++fe_of_afe) {
1213 if (t.type === TYPE_AFE_MARKER) {
1216 if (t.name === subject) {
1221 // If there is no such element, then abort these steps and instead
1222 // act as described in the "any other end tag" entry above.
1224 in_body_any_other_end_tag(subject)
1227 // 6. If formatting element is not in the stack of open elements,
1228 // then this is a parse error; remove the element from the list, and
1229 // abort these steps.
1231 for (fe_of_open_els = 0; fe_of_open_els < open_els.length; ++fe_of_open_els) {
1232 t = open_els[fe_of_open_els]
1240 // "remove it from the list" must mean afe, since it's not in open_els
1241 afe.splice(fe_of_afe, 1)
1244 // 7. If formatting element is in the stack of open elements, but
1245 // the element is not in scope, then this is a parse error; abort
1247 if (!el_is_in_scope(fe)) {
1251 // 8. If formatting element is not the current node, this is a parse
1252 // error. (But do not abort these steps.)
1253 if (open_els[0] !== fe) {
1257 // 9. Let furthest block be the topmost node in the stack of open
1258 // elements that is lower in the stack than formatting element, and
1259 // is an element in the special category. There might not be one.
1261 fb_of_open_els = null
1262 for (i = 0; i < open_els.length; ++i) {
1267 if (el_is_special(t)) {
1270 // and continue, to see if there's one that's more "topmost"
1273 // 10. If there is no furthest block, then the UA must first pop all
1274 // the nodes from the bottom of the stack of open elements, from the
1275 // current node up to and including formatting element, then remove
1276 // formatting element from the list of active formatting elements,
1277 // and finally abort these steps.
1280 t = open_els.shift()
1282 afe.splice(fe_of_afe, 1)
1287 // 11. Let common ancestor be the element immediately above
1288 // formatting element in the stack of open elements.
1289 ca = open_els[fe_of_open_els + 1] // common ancestor
1291 node_above = open_els[fb_of_open_els + 1] // next node if node isn't in open_els anymore
1292 // 12. Let a bookmark note the position of formatting element in the list of active formatting elements relative to the elements on either side of it in the list.
1293 bookmark = new_aaa_bookmark()
1294 for (i = 0; i < afe.length; ++i) {
1297 afe.splice(i, 0, bookmark)
1301 node = last_node = fb
1305 // 3. Let node be the element immediately above node in the
1306 // stack of open elements, or if node is no longer in the stack
1307 // of open elements (e.g. because it got removed by this
1308 // algorithm), the element that was immediately above node in
1309 // the stack of open elements before node was removed.
1311 for (i = 0; i < open_els.length; ++i) {
1314 node_next = open_els[i + 1]
1318 node = node_next != null ? node_next : node_above
1319 // TODO make sure node_above gets re-set if/when node is removed from open_els
1321 // 4. If node is formatting element, then go to the next step in
1322 // the overall algorithm.
1326 // 5. If inner loop counter is greater than three and node is in
1327 // the list of active formatting elements, then remove node from
1328 // the list of active formatting elements.
1330 for (i = 0; i < afe.length; ++i) {
1341 // 6. If node is not in the list of active formatting elements,
1342 // then remove node from the stack of open elements and then go
1343 // back to the step labeled inner loop.
1345 for (i = 0; i < open_els.length; ++i) {
1348 node_above = open_els[i + 1]
1349 open_els.splice(i, 1)
1355 // 7. create an element for the token for which the element node
1356 // was created, in the HTML namespace, with common ancestor as
1357 // the intended parent; replace the entry for node in the list
1358 // of active formatting elements with an entry for the new
1359 // element, replace the entry for node in the stack of open
1360 // elements with an entry for the new element, and let node be
1362 new_node = token_to_element(node.token, NS_HTML, ca)
1363 for (i = 0; i < afe.length; ++i) {
1370 for (i = 0; i < open_els.length; ++i) {
1373 node_above = open_els[i + 1]
1374 open_els[i] = new_node
1379 // 8. If last node is furthest block, then move the
1380 // aforementioned bookmark to be immediately after the new node
1381 // in the list of active formatting elements.
1382 if (last_node === fb) {
1383 for (i = 0; i < afe.length; ++i) {
1385 if (t === bookmark) {
1390 for (i = 0; i < afe.length; ++i) {
1393 // "after" means lower
1394 afe.splice(i, 0, bookmark) // "after as <-
1399 // 9. Insert last node into node, first removing it from its
1400 // previous parent node if any.
1401 if (last_node.parent != null) {
1402 for (i = 0; i < last_node.parent.children.length; ++i) {
1403 c = last_node.parent.children[i]
1404 if (c === last_node) {
1405 last_node.parent.children.splice(i, 1)
1410 node.children.push(last_node)
1411 last_node.parent = node
1412 // 10. Let last node be node.
1414 // 11. Return to the step labeled inner loop.
1416 // 14. Insert whatever last node ended up being in the previous step
1417 // at the appropriate place for inserting a node, but using common
1418 // ancestor as the override target.
1420 // In the case where fe is immediately followed by fb:
1421 // * inner loop exits out early (node==fe)
1422 // * last_node is fb
1423 // * last_node is still in the tree (not a duplicate)
1424 if (last_node.parent != null) {
1425 for (i = 0; i < last_node.parent.children.length; ++i) {
1426 c = last_node.parent.children[i]
1427 if (c === last_node) {
1428 last_node.parent.children.splice(i, 1)
1433 // can't use standard insert token thing, because it's already in
1434 // open_els and must stay at it's current position in open_els
1435 dest = adjusted_insertion_location(ca)
1436 dest[0].children.splice(dest[1], 0, last_node)
1437 last_node.parent = dest[0]
1438 // 15. Create an element for the token for which formatting element
1439 // was created, in the HTML namespace, with furthest block as the
1441 new_element = token_to_element(fe.token, NS_HTML, fb)
1442 // 16. Take all of the child nodes of furthest block and append them
1443 // to the element created in the last step.
1444 while (fb.children.length) {
1445 t = fb.children.shift()
1446 t.parent = new_element
1447 new_element.children.push(t)
1449 // 17. Append that new element to furthest block.
1450 new_element.parent = fb
1451 fb.children.push(new_element)
1452 // 18. Remove formatting element from the list of active formatting
1453 // elements, and insert the new element into the list of active
1454 // formatting elements at the position of the aforementioned
1456 for (i = 0; i < afe.length; ++i) {
1463 for (i = 0; i < afe.length; ++i) {
1465 if (t === bookmark) {
1466 afe[i] = new_element
1470 // 19. Remove formatting element from the stack of open elements,
1471 // and insert the new element into the stack of open elements
1472 // immediately below the position of furthest block in that stack.
1473 for (i = 0; i < open_els.length; ++i) {
1476 open_els.splice(i, 1)
1480 for (i = 0; i < open_els.length; ++i) {
1483 open_els.splice(i, 0, new_element)
1487 // 20. Jump back to the step labeled outer loop.
1491 // http://www.w3.org/TR/html5/syntax.html#close-a-p-element
1492 close_p_element = function () {
1493 generate_implied_end_tags('p') // arg is exception
1494 if (!(open_els[0].name === 'p' && open_els[0].namespace === NS_HTML)) {
1497 while (open_els.length > 1) { // just in case
1498 el = open_els.shift()
1499 if (el.name === 'p' && el.namespace === NS_HTML) {
1504 close_p_if_in_button_scope = function () {
1505 if (is_in_button_scope('p', NS_HTML)) {
1510 // http://www.w3.org/TR/html5/syntax.html#insert-a-character
1511 // aka insert_a_character = function (t) {
1512 insert_character = function (t) {
1514 dest = adjusted_insertion_location()
1515 // fixfull check for Document node
1517 prev = dest[0].children[dest[1] - 1]
1518 if (prev.type === TYPE_TEXT) {
1523 dest[0].children.splice(dest[1], 0, t)
1527 // 8.2.5 http://www.w3.org/TR/html5/syntax.html#tree-construction
1528 process_token = function (t) {
1530 acn = adjusted_current_node()
1535 if (acn.namespace === NS_HTML) {
1539 if (is_mathml_text_integration_point(acn)) {
1540 if (t.type === TYPE_START_TAG && !(t.name === 'mglyph' || t.name === 'malignmark')) {
1544 if (t.type === TYPE_TEXT) {
1549 if (acn.namespace === NS_MATHML && acn.name === 'annotation-xml' && t.type === TYPE_START_TAG && t.name === 'svg') {
1553 if (is_html_integration(acn)) {
1554 if (t.type === TYPE_START_TAG || t.type === TYPE_TEXT) {
1559 if (t.type === TYPE_EOF) {
1563 in_foreign_content(t)
1567 // http://www.w3.org/TR/html5/syntax.html#creating-and-inserting-nodes
1568 // http://www.w3.org/TR/html5/syntax.html#appropriate-place-for-inserting-a-node
1569 adjusted_insertion_location = function (override_target) {
1570 var c, el, i, j, l, last_table, last_table_i, last_template, last_template_i, len, len1, len2, m, previous_element, target, target_i
1571 // 1. If there was an override target specified, then let target be the
1573 if (override_target != null) {
1574 target = override_target
1575 } else { // Otherwise, let target be the current node.
1576 target = open_els[0]
1578 // 2. Determine the adjusted insertion location using the first matching
1579 // steps from the following list:
1581 // If foster parenting is enabled and target is a table, tbody, tfoot,
1582 // thead, or tr element Foster parenting happens when content is
1583 // misnested in tables.
1584 if (flag_foster_parenting && foster_parenting_targets[target.name] === target.namespace) {
1585 while (true) { // once. this is here so we can ``break`` to "abort these substeps"
1586 // 1. Let last template be the last template element in the
1587 // stack of open elements, if any.
1588 last_template = null
1589 last_template_i = null
1590 for (i = 0; i < open_els.length; ++i) {
1592 if (el.name === 'template' && el.namespace === NS_HTML) {
1598 // 2. Let last table be the last table element in the stack of
1599 // open elements, if any.
1602 for (i = 0; i < open_els.length; ++i) {
1604 if (el.name === 'table' && el.namespace === NS_HTML) {
1610 // 3. If there is a last template and either there is no last
1611 // table, or there is one, but last template is lower (more
1612 // recently added) than last table in the stack of open
1613 // elements, then: let adjusted insertion location be inside
1614 // last template's template contents, after its last child (if
1615 // any), and abort these substeps.
1616 if (last_template && (last_table === null || last_template_i < last_table_i)) {
1617 target = last_template // fixfull should be it's contents
1618 target_i = target.children.length
1621 // 4. If there is no last table, then let adjusted insertion
1622 // location be inside the first element in the stack of open
1623 // elements (the html element), after its last child (if any),
1624 // and abort these substeps. (fragment case)
1625 if (last_table === null) {
1627 target = open_els[open_els.length - 1]
1628 target_i = target.children.length
1631 // 5. If last table has a parent element, then let adjusted
1632 // insertion location be inside last table's parent element,
1633 // immediately before last table, and abort these substeps.
1634 if (last_table.parent != null) {
1635 for (i = 0; i < last_table.parent.children.length; ++i) {
1636 c = last_table.parent.children[i]
1637 if (c === last_table) {
1638 target = last_table.parent
1645 // 6. Let previous element be the element immediately above last
1646 // table in the stack of open elements.
1648 // huh? how could it not have a parent?
1649 previous_element = open_els[last_table_i + 1]
1650 // 7. Let adjusted insertion location be inside previous
1651 // element, after its last child (if any).
1652 target = previous_element
1653 target_i = target.children.length
1654 // Note: These steps are involved in part because it's possible
1655 // for elements, the table element in this case in particular,
1656 // to have been moved by a script around in the DOM, or indeed
1657 // removed from the DOM entirely, after the element was inserted
1659 break // don't really loop
1662 // Otherwise Let adjusted insertion location be inside target, after
1663 // its last child (if any).
1664 target_i = target.children.length
1667 // 3. If the adjusted insertion location is inside a template element,
1668 // let it instead be inside the template element's template contents,
1669 // after its last child (if any).
1670 // fixfull (template)
1672 // 4. Return the adjusted insertion location.
1673 return [target, target_i]
1676 // http://www.w3.org/TR/html5/syntax.html#create-an-element-for-the-token
1677 // aka create_an_element_for_token
1678 token_to_element = function (t, namespace, intended_parent) {
1680 // convert attributes into a hash
1682 for (i = 0; i < t.attrs_a.length; ++i) {
1684 attrs[a[0]] = a[1] // TODO check what to do with dupilcate attrs
1686 el = new Node(TYPE_TAG, {name: t.name, namespace: namespace, attrs: attrs, token: t})
1688 // TODO 2. If the newly created element has an xmlns attribute in the
1689 // XMLNS namespace whose value is not exactly the same as the element's
1690 // namespace, that is a parse error. Similarly, if the newly created
1691 // element has an xmlns:xlink attribute in the XMLNS namespace whose
1692 // value is not the XLink Namespace, that is a parse error.
1694 // fixfull: the spec says stuff about form pointers and ownerDocument
1699 // http://www.w3.org/TR/html5/syntax.html#insert-a-foreign-element
1700 insert_foreign_element = function (token, namespace) {
1701 var ail, ail_el, ail_i, el
1702 ail = adjusted_insertion_location()
1705 el = token_to_element(token, namespace, ail_el)
1706 // TODO skip this next step if it's broken (eg ail_el is document with child already)
1708 ail_el.children.splice(ail_i, 0, el)
1709 open_els.unshift(el)
1712 // http://www.w3.org/TR/html5/syntax.html#insert-an-html-element
1713 insert_html_element = function (token) {
1714 return insert_foreign_element(token, NS_HTML)
1717 // http://www.w3.org/TR/html5/syntax.html#insert-a-comment
1718 // position should be [node, index_within_children]
1719 insert_comment = function (t, position) {
1720 if (position == null) {
1721 position = adjusted_insertion_location()
1723 position[0].children.splice(position[1], 0, t)
1728 // http://www.w3.org/TR/html5/syntax.html#generic-raw-text-element-parsing-algorithm
1729 parse_generic_raw_text = function (t) {
1730 insert_html_element(t)
1731 tok_state = tok_state_rawtext
1732 original_ins_mode = ins_mode
1733 ins_mode = ins_mode_text
1735 parse_generic_rcdata_text = function (t) {
1736 insert_html_element(t)
1737 tok_state = tok_state_rcdata
1738 original_ins_mode = ins_mode
1739 ins_mode = ins_mode_text
1742 // 8.2.5.3 http://www.w3.org/TR/html5/syntax.html#closing-elements-that-have-implied-end-tags
1743 // http://www.w3.org/TR/html5/syntax.html#generate-implied-end-tags
1744 generate_implied_end_tags = function (except) {
1745 if (except == null) {
1748 while (end_tag_implied[open_els[0].name] === open_els[0].namespace && open_els[0].name !== except) {
1753 // 8.2.5.4 The rules for parsing tokens in HTML content
1754 // http://www.w3.org/TR/html5/syntax.html#parsing-main-inhtml
1756 // 8.2.5.4.1 The "initial" insertion mode
1757 // http://www.w3.org/TR/html5/syntax.html#the-initial-insertion-mode
1758 is_quirks_yes_doctype = function (t) {
1760 if (t.flag('force-quirks')) {
1763 if (t.name !== 'html') {
1766 if (t.public_identifier != null) {
1767 pi = t.public_identifier.toLowerCase()
1768 for (i = 0; i < quirks_yes_pi_prefixes.length; ++i) {
1769 p = quirks_yes_pi_prefixes[i]
1770 if (pi.substr(0, p.length) === p) {
1774 if (pi === '-//w3o//dtd w3 html strict 3.0//en//' || pi === '-/w3c/dtd html 4.0 transitional/en' || pi === 'html') {
1778 if (t.system_identifier != null) {
1779 if (t.system_identifier.toLowerCase() === 'http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd') {
1782 } else if (t.public_identifier != null) {
1783 // already did this: pi = t.public_identifier.toLowerCase()
1784 if (pi.substr(0, 32) === '-//w3c//dtd html 4.01 frameset//' || pi.substr(0, 36) === '-//w3c//dtd html 4.01 transitional//') {
1790 is_quirks_limited_doctype = function (t) {
1792 if (t.public_identifier != null) {
1793 pi = t.public_identifier.toLowerCase()
1794 if (pi.substr(0, 32) === '-//w3c//dtd xhtml 1.0 frameset//' || pi.substr(0, 36) === '-//w3c//dtd xhtml 1.0 transitional//') {
1797 if (t.system_identifier != null) {
1798 if (pi.substr(0, 32) === '-//w3c//dtd html 4.01 frameset//' || pi.substr(0, 36) === '-//w3c//dtd html 4.01 transitional//') {
1805 ins_mode_initial = function (t) {
1806 if (is_space_tok(t)) {
1809 if (t.type === TYPE_COMMENT) {
1811 doc.children.push(t)
1814 if (t.type === TYPE_DOCTYPE) {
1815 // fixfull syntax error from first paragraph and following bullets
1816 // fixfull set doc.doctype
1817 // fixfull is the "not an iframe srcdoc" thing relevant?
1818 if (is_quirks_yes_doctype(t)) {
1819 doc.flag('quirks mode', QUIRKS_YES)
1820 } else if (is_quirks_limited_doctype(t)) {
1821 doc.flag('quirks mode', QUIRKS_LIMITED)
1823 doc.children.push(t)
1824 ins_mode = ins_mode_before_html
1828 // fixfull not iframe srcdoc?
1830 doc.flag('quirks mode', QUIRKS_YES)
1831 ins_mode = ins_mode_before_html
1835 // 8.2.5.4.2 http://www.w3.org/TR/html5/syntax.html#the-before-html-insertion-mode
1836 ins_mode_before_html = function (t) {
1837 if (t.type === TYPE_DOCTYPE) {
1841 if (t.type === TYPE_COMMENT) {
1842 doc.children.push(t)
1845 if (is_space_tok(t)) {
1848 if (t.type === TYPE_START_TAG && t.name === 'html') {
1849 el = token_to_element(t, NS_HTML, doc)
1850 doc.children.push(el)
1852 open_els.unshift(el)
1853 // fixfull (big paragraph in spec about manifest, fragment, urls, etc)
1854 ins_mode = ins_mode_before_head
1857 if (t.type === TYPE_END_TAG) {
1858 if (t.name === 'head' || t.name === 'body' || t.name === 'html' || t.name === 'br') {
1859 // fall through to "anything else"
1866 el = token_to_element(new_open_tag('html'), NS_HTML, doc)
1867 doc.children.push(el)
1869 open_els.unshift(el)
1870 // ?fixfull browsing context
1871 ins_mode = ins_mode_before_head
1875 // 8.2.5.4.3 http://www.w3.org/TR/html5/syntax.html#the-before-head-insertion-mode
1876 ins_mode_before_head = function (t) {
1878 if (is_space_tok(t)) {
1881 if (t.type === TYPE_COMMENT) {
1885 if (t.type === TYPE_DOCTYPE) {
1889 if (t.type === TYPE_START_TAG && t.name === 'html') {
1893 if (t.type === TYPE_START_TAG && t.name === 'head') {
1894 el = insert_html_element(t)
1895 head_element_pointer = el
1896 ins_mode = ins_mode_in_head
1899 if (t.type === TYPE_END_TAG) {
1900 if (t.name === 'head' || t.name === 'body' || t.name === 'html' || t.name === 'br') {
1901 // fall through to Anything else below
1908 el = insert_html_element(new_open_tag('head'))
1909 head_element_pointer = el
1910 ins_mode = ins_mode_in_head
1914 // 8.2.5.4.4 http://www.w3.org/TR/html5/syntax.html#parsing-main-inhead
1915 ins_mode_in_head_else = function (t) { // factored out for same-as-spec flow control
1916 open_els.shift() // spec says this will be a 'head' node
1917 ins_mode = ins_mode_after_head
1920 ins_mode_in_head = function (t) {
1922 if (t.type === TYPE_TEXT && (t.text === "\t" || t.text === "\n" || t.text === "\u000c" || t.text === ' ')) {
1926 if (t.type === TYPE_COMMENT) {
1930 if (t.type === TYPE_DOCTYPE) {
1934 if (t.type === TYPE_START_TAG && t.name === 'html') {
1938 if (t.type === TYPE_START_TAG && (t.name === 'base' || t.name === 'basefont' || t.name === 'bgsound' || t.name === 'link')) {
1939 el = insert_html_element(t)
1941 t.acknowledge_self_closing()
1944 if (t.type === TYPE_START_TAG && t.name === 'meta') {
1945 el = insert_html_element(t)
1947 t.acknowledge_self_closing()
1948 // fixfull encoding stuff
1951 if (t.type === TYPE_START_TAG && t.name === 'title') {
1952 parse_generic_rcdata_text(t)
1955 if (t.type === TYPE_START_TAG && ((t.name === 'noscript' && flag_scripting) || t.name === 'noframes' || t.name === 'style')) {
1956 parse_generic_raw_text(t)
1959 if (t.type === TYPE_START_TAG && t.name === 'noscript' && flag_scripting === false) {
1960 insert_html_element(t)
1961 ins_mode = ins_mode_in_head_noscript
1964 if (t.type === TYPE_START_TAG && t.name === 'script') {
1965 ail = adjusted_insertion_location()
1966 el = token_to_element(t, NS_HTML, ail)
1967 el.flag('parser-inserted', true)
1968 // fixfull frament case
1969 ail[0].children.splice(ail[1], 0, el)
1970 open_els.unshift(el)
1971 tok_state = tok_state_script_data
1972 original_ins_mode = ins_mode // make sure orig... is defined
1973 ins_mode = ins_mode_text
1976 if (t.type === TYPE_END_TAG && t.name === 'head') {
1977 open_els.shift() // will be a head element... spec says so
1978 ins_mode = ins_mode_after_head
1981 if (t.type === TYPE_END_TAG && (t.name === 'body' || t.name === 'html' || t.name === 'br')) {
1982 ins_mode_in_head_else(t)
1985 if (t.type === TYPE_START_TAG && t.name === 'template') {
1986 insert_html_element(t)
1988 flag_frameset_ok = false
1989 ins_mode = ins_mode_in_template
1990 template_ins_modes.unshift(ins_mode_in_template)
1993 if (t.type === TYPE_END_TAG && t.name === 'template') {
1994 if (template_tag_is_open()) {
1995 generate_implied_end_tags
1996 if (open_els[0].name !== 'template') {
2000 el = open_els.shift()
2001 if (el.name === 'template' && el.namespace === NS_HTML) {
2005 clear_afe_to_marker()
2006 template_ins_modes.shift()
2013 if ((t.type === TYPE_START_TAG && t.name === 'head') || t.type === TYPE_END_TAG) {
2017 ins_mode_in_head_else(t)
2020 // 8.2.5.4.5 http://www.w3.org/TR/html5/syntax.html#parsing-main-inheadnoscript
2021 ins_mode_in_head_noscript_else = function (t) {
2024 ins_mode = ins_mode_in_head
2027 ins_mode_in_head_noscript = function (t) {
2028 if (t.type === TYPE_DOCTYPE) {
2032 if (t.type === TYPE_START_TAG && t.name === 'html') {
2036 if (t.type === TYPE_END_TAG && t.name === 'noscript') {
2038 ins_mode = ins_mode_in_head
2041 if (is_space_tok(t) || t.type === TYPE_COMMENT || (t.type === TYPE_START_TAG && (t.name === 'basefont' || t.name === 'bgsound' || t.name === 'link' || t.name === 'meta' || t.name === 'noframes' || t.name === 'style'))) {
2045 if (t.type === TYPE_END_TAG && t.name === 'br') {
2046 ins_mode_in_head_noscript_else(t)
2049 if ((t.type === TYPE_START_TAG && (t.name === 'head' || t.name === 'noscript')) || t.type === TYPE_END_TAG) {
2054 ins_mode_in_head_noscript_else(t)
2057 // 8.2.5.4.6 http://www.w3.org/TR/html5/syntax.html#the-after-head-insertion-mode
2058 ins_mode_after_head_else = function (t) {
2060 body_tok = new_open_tag('body')
2061 insert_html_element(body_tok)
2062 ins_mode = ins_mode_in_body
2065 ins_mode_after_head = function (t) {
2067 if (is_space_tok(t)) {
2071 if (t.type === TYPE_COMMENT) {
2075 if (t.type === TYPE_DOCTYPE) {
2079 if (t.type === TYPE_START_TAG && t.name === 'html') {
2083 if (t.type === TYPE_START_TAG && t.name === 'body') {
2084 insert_html_element(t)
2085 flag_frameset_ok = false
2086 ins_mode = ins_mode_in_body
2089 if (t.type === TYPE_START_TAG && t.name === 'frameset') {
2090 insert_html_element(t)
2091 ins_mode = ins_mode_in_frameset
2094 if (t.type === TYPE_START_TAG && (t.name === 'base' || t.name === 'basefont' || t.name === 'bgsound' || t.name === 'link' || t.name === 'meta' || t.name === 'noframes' || t.name === 'script' || t.name === 'style' || t.name === 'template' || t.name === 'title')) {
2096 open_els.unshift(head_element_pointer)
2098 for (i = 0; i < open_els.length; ++i) {
2100 if (el === head_element_pointer) {
2101 open_els.splice(i, 1)
2107 if (t.type === TYPE_END_TAG && t.name === 'template') {
2111 if (t.type === TYPE_END_TAG && (t.name === 'body' || t.name === 'html' || t.name === 'br')) {
2112 ins_mode_after_head_else(t)
2115 if ((t.type === TYPE_START_TAG && t.name === 'head') || t.type === TYPE_END_TAG) {
2120 ins_mode_after_head_else(t)
2123 // 8.2.5.4.7 http://www.w3.org/TR/html5/syntax.html#parsing-main-inbody
2124 in_body_any_other_end_tag = function (name) { // factored out because adoption agency calls it
2128 if (node.name === name && node.namespace === NS_HTML) {
2129 generate_implied_end_tags(name) // arg is exception
2130 if (node !== open_els[0]) {
2134 el = open_els.shift()
2140 if (special_elements[node.name] === node.namespace) {
2144 for (i = 0; i < open_els.length; ++i) {
2147 node = open_els[i + 1]
2153 ins_mode_in_body = function (t) {
2154 var a, aa, ab, ac, el, found, h_in_scope, i, input_el, j, l, len, len1, len10, len11, len12, len13, len14, len2, len3, len4, len5, len6, len7, len8, len9, m, n, node, o, ok_tags, prompt, q, r, root_attrs, s, second, second_i, u, w, y, z
2155 if (t.type === TYPE_TEXT && t.text === "\u0000") {
2159 if (is_space_tok(t)) {
2164 if (t.type === TYPE_TEXT) {
2167 flag_frameset_ok = false
2170 if (t.type === TYPE_COMMENT) {
2174 if (t.type === TYPE_DOCTYPE) {
2178 if (t.type === TYPE_START_TAG && t.name === 'html') {
2180 if (template_tag_is_open()) {
2183 root_attrs = open_els[open_els.length - 1].attrs
2184 for (i = 0; i < t.attrs_a.length; ++i) {
2186 if (root_attrs[a[0]] == null) {
2187 root_attrs[a[0]] = a[1]
2193 if ((t.type === TYPE_START_TAG && (t.name === 'base' || t.name === 'basefont' || t.name === 'bgsound' || t.name === 'link' || t.name === 'meta' || t.name === 'noframes' || t.name === 'script' || t.name === 'style' || t.name === 'template' || t.name === 'title')) || (t.type === TYPE_END_TAG && t.name === 'template')) {
2197 if (t.type === TYPE_START_TAG && t.name === 'body') {
2199 if (open_els.length < 2) {
2202 second = open_els[open_els.length - 2]
2203 if (second.namespace !== NS_HTML) {
2206 if (second.name !== 'body') {
2209 if (template_tag_is_open()) {
2212 flag_frameset_ok = false
2213 for (i = 0; i < t.attrs_a.length; ++i) {
2215 if (second.attrs[a[0]] == null) {
2216 second.attrs[a[0]] = a[1]
2221 if (t.type === TYPE_START_TAG && t.name === 'frameset') {
2223 if (open_els.length < 2) {
2226 second_i = open_els.length - 2
2227 second = open_els[second_i]
2228 if (second.namespace !== NS_HTML) {
2231 if (second.name !== 'body') {
2234 if (flag_frameset_ok === false) {
2237 if (second.parent != null) {
2238 for (i = 0; i < second.parent.children.length; ++i) {
2239 el = second.parent.children[i]
2240 if (el === second) {
2241 second.parent.children.splice(i, 1)
2246 open_els.splice(second_i, 1)
2247 // pop everything except the "root html element"
2248 while (open_els.length > 1) {
2251 insert_html_element(t)
2252 ins_mode = ins_mode_in_frameset
2255 if (t.type === TYPE_EOF) {
2257 dd: NS_HTML, dt: NS_HTML, li: NS_HTML, p: NS_HTML, tbody: NS_HTML,
2258 td: NS_HTML, tfoot: NS_HTML, th: NS_HTML, thead: NS_HTML,
2259 tr: NS_HTML, body: NS_HTML, html: NS_HTML
2261 for (i = 0; i < open_els.length; ++i) {
2263 if (ok_tags[t.name] !== el.namespace) {
2268 if (template_ins_modes.length > 0) {
2269 ins_mode_in_template(t)
2275 if (t.type === TYPE_END_TAG && t.name === 'body') {
2276 if (!is_in_scope('body', NS_HTML)) {
2281 dd: NS_HTML, dt: NS_HTML, li: NS_HTML, optgroup: NS_HTML,
2282 option: NS_HTML, p: NS_HTML, rb: NS_HTML, rp: NS_HTML, rt: NS_HTML,
2283 rtc: NS_HTML, tbody: NS_HTML, td: NS_HTML, tfoot: NS_HTML,
2284 th: NS_HTML, thead: NS_HTML, tr: NS_HTML, body: NS_HTML,
2287 for (i = 0; i < open_els.length; ++i) {
2289 if (ok_tags[t.name] !== el.namespace) {
2294 ins_mode = ins_mode_after_body
2297 if (t.type === TYPE_END_TAG && t.name === 'html') {
2298 if (!is_in_scope('body', NS_HTML)) {
2303 dd: NS_HTML, dt: NS_HTML, li: NS_HTML, optgroup: NS_HTML,
2304 option: NS_HTML, p: NS_HTML, rb: NS_HTML, rp: NS_HTML, rt: NS_HTML,
2305 rtc: NS_HTML, tbody: NS_HTML, td: NS_HTML, tfoot: NS_HTML,
2306 th: NS_HTML, thead: NS_HTML, tr: NS_HTML, body: NS_HTML,
2309 for (i = 0; i < open_els.length; ++i) {
2311 if (ok_tags[t.name] !== el.namespace) {
2316 ins_mode = ins_mode_after_body
2320 if (t.type === TYPE_START_TAG && (t.name === 'address' || t.name === 'article' || t.name === 'aside' || t.name === 'blockquote' || t.name === 'center' || t.name === 'details' || t.name === 'dialog' || t.name === 'dir' || t.name === 'div' || t.name === 'dl' || t.name === 'fieldset' || t.name === 'figcaption' || t.name === 'figure' || t.name === 'footer' || t.name === 'header' || t.name === 'hgroup' || t.name === 'main' || t.name === 'nav' || t.name === 'ol' || t.name === 'p' || t.name === 'section' || t.name === 'summary' || t.name === 'ul')) {
2321 close_p_if_in_button_scope()
2322 insert_html_element(t)
2325 if (t.type === TYPE_START_TAG && (h_tags[t.name] != null)) { // extra perens because Coffeescript did that
2326 close_p_if_in_button_scope()
2327 if (h_tags[open_els[0].name] === open_els[0].namespace) {
2331 insert_html_element(t)
2334 if (t.type === TYPE_START_TAG && (t.name === 'pre' || t.name === 'listing')) {
2335 close_p_if_in_button_scope()
2336 insert_html_element(t)
2337 eat_next_token_if_newline()
2338 flag_frameset_ok = false
2341 if (t.type === TYPE_START_TAG && t.name === 'form') {
2342 if (!(form_element_pointer === null || template_tag_is_open())) {
2346 close_p_if_in_button_scope()
2347 el = insert_html_element(t)
2348 if (!template_tag_is_open()) {
2349 form_element_pointer = el
2353 if (t.type === TYPE_START_TAG && t.name === 'li') {
2354 flag_frameset_ok = false
2355 for (i = 0; i < open_els.length; ++i) {
2357 if (node.name === 'li' && node.namespace === NS_HTML) {
2358 generate_implied_end_tags('li') // arg is exception
2359 if (open_els[0].name !== 'li' || open_els[0].namespace !== NS_HTML) {
2363 el = open_els.shift()
2364 if (el.name === 'li' && el.namespace === NS_HTML) {
2370 if (el_is_special_not_adp(node)) {
2374 close_p_if_in_button_scope()
2375 insert_html_element(t)
2378 if (t.type === TYPE_START_TAG && (t.name === 'dd' || t.name === 'dt')) {
2379 flag_frameset_ok = false
2380 for (i = 0; i < open_els.length; ++i) {
2382 if (node.name === 'dd' && node.namespace === NS_HTML) {
2383 generate_implied_end_tags('dd') // arg is exception
2384 if (open_els[0].name !== 'dd' || open_els[0].namespace !== NS_HTML) {
2388 el = open_els.shift()
2389 if (el.name === 'dd' && el.namespace === NS_HTML) {
2395 if (node.name === 'dt' && node.namespace === NS_HTML) {
2396 generate_implied_end_tags('dt') // arg is exception
2397 if (open_els[0].name !== 'dt' || open_els[0].namespace !== NS_HTML) {
2401 el = open_els.shift()
2402 if (el.name === 'dt' && el.namespace === NS_HTML) {
2408 if (el_is_special_not_adp(node)) {
2412 close_p_if_in_button_scope()
2413 insert_html_element(t)
2416 if (t.type === TYPE_START_TAG && t.name === 'plaintext') {
2417 close_p_if_in_button_scope()
2418 insert_html_element(t)
2419 tok_state = tok_state_plaintext
2422 if (t.type === TYPE_START_TAG && t.name === 'button') {
2423 if (is_in_scope('button', NS_HTML)) {
2425 generate_implied_end_tags()
2427 el = open_els.shift()
2428 if (el.name === 'button' && el.namespace === NS_HTML) {
2434 insert_html_element(t)
2435 flag_frameset_ok = false
2438 if (t.type === TYPE_END_TAG && (t.name === 'address' || t.name === 'article' || t.name === 'aside' || t.name === 'blockquote' || t.name === 'button' || t.name === 'center' || t.name === 'details' || t.name === 'dialog' || t.name === 'dir' || t.name === 'div' || t.name === 'dl' || t.name === 'fieldset' || t.name === 'figcaption' || t.name === 'figure' || t.name === 'footer' || t.name === 'header' || t.name === 'hgroup' || t.name === 'listing' || t.name === 'main' || t.name === 'nav' || t.name === 'ol' || t.name === 'pre' || t.name === 'section' || t.name === 'summary' || t.name === 'ul')) {
2439 if (!is_in_scope(t.name, NS_HTML)) {
2443 generate_implied_end_tags()
2444 if (!(open_els[0].name === t.name && open_els[0].namespace === NS_HTML)) {
2448 el = open_els.shift()
2449 if (el.name === t.name && el.namespace === NS_HTML) {
2455 if (t.type === TYPE_END_TAG && t.name === 'form') {
2456 if (!template_tag_is_open()) {
2457 node = form_element_pointer
2458 form_element_pointer = null
2459 if (node === null || !el_is_in_scope(node)) {
2463 generate_implied_end_tags()
2464 if (open_els[0] !== node) {
2467 for (i = 0; i < open_els.length; ++i) {
2470 open_els.splice(i, 1)
2475 if (!is_in_scope('form', NS_HTML)) {
2479 generate_implied_end_tags()
2480 if (open_els[0].name !== 'form' || open_els[0].namespace !== NS_HTML) {
2484 el = open_els.shift()
2485 if (el.name === 'form' && el.namespace === NS_HTML) {
2492 if (t.type === TYPE_END_TAG && t.name === 'p') {
2493 if (!is_in_button_scope('p', NS_HTML)) {
2495 insert_html_element(new_open_tag('p'))
2500 if (t.type === TYPE_END_TAG && t.name === 'li') {
2501 if (!is_in_li_scope('li', NS_HTML)) {
2505 generate_implied_end_tags('li') // arg is exception
2506 if (open_els[0].name !== 'li' || open_els[0].namespace !== NS_HTML) {
2510 el = open_els.shift()
2511 if (el.name === 'li' && el.namespace === NS_HTML) {
2517 if (t.type === TYPE_END_TAG && (t.name === 'dd' || t.name === 'dt')) {
2518 if (!is_in_scope(t.name, NS_HTML)) {
2522 generate_implied_end_tags(t.name) // arg is exception
2523 if (open_els[0].name !== t.name || open_els[0].namespace !== NS_HTML) {
2527 el = open_els.shift()
2528 if (el.name === t.name && el.namespace === NS_HTML) {
2534 if (t.type === TYPE_END_TAG && (h_tags[t.name] != null)) { // extra perens because Coffeescript did
2536 for (i = 0; i < open_els.length; ++i) {
2538 if (h_tags[el.name] === el.namespace) {
2542 if (standard_scopers[el.name] === el.namespace) {
2550 generate_implied_end_tags()
2551 if (open_els[0].name !== t.name || open_els[0].namespace !== NS_HTML) {
2555 el = open_els.shift()
2556 if (h_tags[el.name] === el.namespace) {
2563 if (t.type === TYPE_START_TAG && t.name === 'a') {
2564 // If the list of active formatting elements contains an a element
2565 // between the end of the list and the last marker on the list (or
2566 // the start of the list if there is no marker on the list), then
2567 // this is a parse error; run the adoption agency algorithm for the
2568 // tag name "a", then remove that element from the list of active
2569 // formatting elements and the stack of open elements if the
2570 // adoption agency algorithm didn't already remove it (it might not
2571 // have if the element is not in table scope).
2573 for (i = 0; i < afe.length; ++i) {
2575 if (el.type === TYPE_AFE_MARKER) {
2578 if (el.name === 'a' && el.namespace === NS_HTML) {
2582 if (found != null) {
2584 adoption_agency('a')
2585 for (i = 0; i < afe.length; ++i) {
2591 for (i = 0; i < open_els.length; ++i) {
2594 open_els.splice(i, 1)
2599 el = insert_html_element(t)
2603 if (t.type === TYPE_START_TAG && (t.name === 'b' || t.name === 'big' || t.name === 'code' || t.name === 'em' || t.name === 'font' || t.name === 'i' || t.name === 's' || t.name === 'small' || t.name === 'strike' || t.name === 'strong' || t.name === 'tt' || t.name === 'u')) {
2605 el = insert_html_element(t)
2609 if (t.type === TYPE_START_TAG && t.name === 'nobr') {
2611 if (is_in_scope('nobr', NS_HTML)) {
2613 adoption_agency('nobr')
2616 el = insert_html_element(t)
2620 if (t.type === TYPE_END_TAG && (t.name === 'a' || t.name === 'b' || t.name === 'big' || t.name === 'code' || t.name === 'em' || t.name === 'font' || t.name === 'i' || t.name === 'nobr' || t.name === 's' || t.name === 'small' || t.name === 'strike' || t.name === 'strong' || t.name === 'tt' || t.name === 'u')) {
2621 adoption_agency(t.name)
2624 if (t.type === TYPE_START_TAG && (t.name === 'applet' || t.name === 'marquee' || t.name === 'object')) {
2626 insert_html_element(t)
2628 flag_frameset_ok = false
2631 if (t.type === TYPE_END_TAG && (t.name === 'applet' || t.name === 'marquee' || t.name === 'object')) {
2632 if (!is_in_scope(t.name, NS_HTML)) {
2636 generate_implied_end_tags()
2637 if (open_els[0].name !== t.name || open_els[0].namespace !== NS_HTML) {
2641 el = open_els.shift()
2642 if (el.name === t.name && el.namespace === NS_HTML) {
2646 clear_afe_to_marker()
2649 if (t.type === TYPE_START_TAG && t.name === 'table') {
2650 if (doc.flag('quirks mode') !== QUIRKS_YES) {
2651 close_p_if_in_button_scope() // test
2653 insert_html_element(t)
2654 flag_frameset_ok = false
2655 ins_mode = ins_mode_in_table
2658 if (t.type === TYPE_END_TAG && t.name === 'br') {
2660 // W3C: t.type = TYPE_START_TAG
2661 t = new_open_tag('br') // WHATWG
2664 if (t.type === TYPE_START_TAG && (t.name === 'area' || t.name === 'br' || t.name === 'embed' || t.name === 'img' || t.name === 'keygen' || t.name === 'wbr')) {
2666 insert_html_element(t)
2668 t.acknowledge_self_closing()
2669 flag_frameset_ok = false
2672 if (t.type === TYPE_START_TAG && t.name === 'input') {
2674 insert_html_element(t)
2676 t.acknowledge_self_closing()
2677 if (!is_input_hidden_tok(t)) {
2678 flag_frameset_ok = false
2682 if (t.type === TYPE_START_TAG && (t.name === 'menuitem' || t.name === 'param' || t.name === 'source' || t.name === 'track')) {
2683 // WHATWG adds 'menuitem' for this block
2684 insert_html_element(t)
2686 t.acknowledge_self_closing()
2689 if (t.type === TYPE_START_TAG && t.name === 'hr') {
2690 close_p_if_in_button_scope()
2691 insert_html_element(t)
2693 t.acknowledge_self_closing()
2694 flag_frameset_ok = false
2697 if (t.type === TYPE_START_TAG && t.name === 'image') {
2703 if (t.type === TYPE_START_TAG && t.name === 'isindex') {
2705 if (template_tag_is_open() === false && form_element_pointer !== null) {
2708 t.acknowledge_self_closing()
2709 flag_frameset_ok = false
2710 close_p_if_in_button_scope()
2711 el = insert_html_element(new_open_tag('form'))
2712 if (!template_tag_is_open()) {
2713 form_element_pointer = el
2715 for (i = 0; i < t.attrs_a.length; ++i) {
2717 if (a[0] === 'action') {
2718 el.attrs['action'] = a[1]
2722 insert_html_element(new_open_tag('hr'))
2725 insert_html_element(new_open_tag('label'))
2726 // note: this is a little out-of-spec-order so we only have to scan t.attrs_a once
2727 input_el = new_open_tag('input')
2729 for (i = 0; i < t.attrs_a.length; ++i) {
2731 if (a[0] === 'prompt') {
2734 if (a[0] !== 'name' && a[0] !== 'action' && a[0] !== 'prompt') {
2735 input_el.attrs_a.push([a[0], a[1]])
2738 input_el.attrs_a.push(['name', 'isindex'])
2739 // fixfull this next bit is in english... internationalize?
2740 if (prompt == null) {
2741 prompt = "This is a searchable index. Enter search keywords: "
2743 insert_character(new_character_token(prompt)) // fixfull split
2744 // TODO submit typo "balue" in spec
2745 insert_html_element(input_el)
2747 // insert_character('') // you can put chars here if prompt attr missing
2749 insert_html_element(new_open_tag('hr'))
2752 if (!template_tag_is_open()) {
2753 form_element_pointer = null
2757 if (t.type === TYPE_START_TAG && t.name === 'textarea') {
2758 insert_html_element(t)
2759 eat_next_token_if_newline()
2760 tok_state = tok_state_rcdata
2761 original_ins_mode = ins_mode
2762 flag_frameset_ok = false
2763 ins_mode = ins_mode_text
2766 if (t.type === TYPE_START_TAG && t.name === 'xmp') {
2767 close_p_if_in_button_scope()
2769 flag_frameset_ok = false
2770 parse_generic_raw_text(t)
2773 if (t.type === TYPE_START_TAG && t.name === 'iframe') {
2774 flag_frameset_ok = false
2775 parse_generic_raw_text(t)
2778 if (t.type === TYPE_START_TAG && (t.name === 'noembed' || (t.name === 'noscript' && flag_scripting))) {
2779 parse_generic_raw_text(t)
2782 if (t.type === TYPE_START_TAG && t.name === 'select') {
2784 insert_html_element(t)
2785 flag_frameset_ok = false
2786 if (ins_mode === ins_mode_in_table || ins_mode === ins_mode_in_caption || ins_mode === ins_mode_in_table_body || ins_mode === ins_mode_in_row || ins_mode === ins_mode_in_cell) {
2787 ins_mode = ins_mode_in_select_in_table
2789 ins_mode = ins_mode_in_select
2793 if (t.type === TYPE_START_TAG && (t.name === 'optgroup' || t.name === 'option')) {
2794 if (open_els[0].name === 'option' && open_els[0].namespace === NS_HTML) {
2798 insert_html_element(t)
2801 // this comment block implements the W3C spec
2802 // if t.type === TYPE_START_TAG && (t.name === 'rb' || t.name === 'rp' || t.name === 'rtc')
2803 // if is_in_scope 'ruby', NS_HTML
2804 // generate_implied_end_tags()
2805 // unless open_els[0].name === 'ruby' && open_els[0].namespace === NS_HTML
2807 // insert_html_element t
2809 // if t.type === TYPE_START_TAG && t.name === 'rt'
2810 // if is_in_scope 'ruby', NS_HTML
2811 // generate_implied_end_tags 'rtc' // arg === exception
2812 // unless (open_els[0].name === 'ruby' || open_els[0].name === 'rtc') && open_els[0].namespace === NS_HTML
2814 // insert_html_element t
2816 // below implements the WHATWG spec https://html.spec.whatwg.org/multipage/syntax.html#parsing-main-inbody
2817 if (t.type === TYPE_START_TAG && (t.name === 'rb' || t.name === 'rtc')) {
2818 if (is_in_scope('ruby', NS_HTML)) {
2819 generate_implied_end_tags()
2820 if (!(open_els[0].name === 'ruby' && open_els[0].namespace === NS_HTML)) {
2824 insert_html_element(t)
2827 if (t.type === TYPE_START_TAG && (t.name === 'rp' || t.name === 'rt')) {
2828 if (is_in_scope('ruby', NS_HTML)) {
2829 generate_implied_end_tags('rtc')
2830 if (!((open_els[0].name === 'ruby' || open_els[0].name === 'rtc') && open_els[0].namespace === NS_HTML)) {
2834 insert_html_element(t)
2838 if (t.type === TYPE_START_TAG && t.name === 'math') {
2840 adjust_mathml_attributes(t)
2841 adjust_foreign_attributes(t)
2842 insert_foreign_element(t, NS_MATHML)
2843 if (t.flag('self-closing')) {
2845 t.acknowledge_self_closing()
2849 if (t.type === TYPE_START_TAG && t.name === 'svg') {
2851 adjust_svg_attributes(t)
2852 adjust_foreign_attributes(t)
2853 insert_foreign_element(t, NS_SVG)
2854 if (t.flag('self-closing')) {
2856 t.acknowledge_self_closing()
2860 if (t.type === TYPE_START_TAG && (t.name === 'caption' || t.name === 'col' || t.name === 'colgroup' || t.name === 'frame' || t.name === 'head' || t.name === 'tbody' || t.name === 'td' || t.name === 'tfoot' || t.name === 'th' || t.name === 'thead' || t.name === 'tr')) {
2864 if (t.type === TYPE_START_TAG) { // any other start tag
2866 insert_html_element(t)
2869 if (t.type === TYPE_END_TAG) { // any other end tag
2870 in_body_any_other_end_tag(t.name)
2875 // 8.2.5.4.8 http://www.w3.org/TR/html5/syntax.html#parsing-main-incdata
2876 ins_mode_text = function (t) {
2877 if (t.type === TYPE_TEXT) {
2881 if (t.type === TYPE_EOF) {
2883 if (open_els[0].name === 'script' && open_els[0].namespace === NS_HTML) {
2884 open_els[0].flag('already started', true)
2887 ins_mode = original_ins_mode
2891 if (t.type === TYPE_END_TAG && t.name === 'script') {
2893 ins_mode = original_ins_mode
2894 // fixfull the spec seems to assume that I'm going to run the script
2895 // http://www.w3.org/TR/html5/syntax.html#scriptEndTag
2898 if (t.type === TYPE_END_TAG) {
2900 ins_mode = original_ins_mode
2905 // the functions below implement the tokenizer stats described here:
2906 // http://www.w3.org/TR/html5/syntax.html#tokenization
2908 // 8.2.5.4.9 http://www.w3.org/TR/html5/syntax.html#parsing-main-intable
2909 ins_mode_in_table_else = function (t) {
2911 flag_foster_parenting = true
2913 flag_foster_parenting = false
2915 ins_mode_in_table = function (t) {
2919 if ((open_els[0].name === 'table' || open_els[0].name === 'tbody' || open_els[0].name === 'tfoot' || open_els[0].name === 'thead' || open_els[0].name === 'tr') && open_els[0].namespace === NS_HTML) {
2920 pending_table_character_tokens = []
2921 original_ins_mode = ins_mode
2922 ins_mode = ins_mode_in_table_text
2925 ins_mode_in_table_else(t)
2934 case TYPE_START_TAG:
2937 clear_stack_to_table_context()
2939 insert_html_element(t)
2940 ins_mode = ins_mode_in_caption
2943 clear_stack_to_table_context()
2944 insert_html_element(t)
2945 ins_mode = ins_mode_in_column_group
2948 clear_stack_to_table_context()
2949 insert_html_element(new_open_tag('colgroup'))
2950 ins_mode = ins_mode_in_column_group
2956 clear_stack_to_table_context()
2957 insert_html_element(t)
2958 ins_mode = ins_mode_in_table_body
2963 clear_stack_to_table_context()
2964 insert_html_element(new_open_tag('tbody'))
2965 ins_mode = ins_mode_in_table_body
2970 if (is_in_table_scope('table', NS_HTML)) {
2972 el = open_els.shift()
2973 if (el.name === 'table' && el.namespace === NS_HTML) {
2987 if (!is_input_hidden_tok(t)) {
2988 ins_mode_in_table_else(t)
2991 el = insert_html_element(t)
2993 t.acknowledge_self_closing()
2998 if (form_element_pointer != null) {
3001 if (template_tag_is_open()) {
3004 form_element_pointer = insert_html_element(t)
3008 ins_mode_in_table_else(t)
3014 if (is_in_table_scope('table', NS_HTML)) {
3016 el = open_els.shift()
3017 if (el.name === 'table' && el.namespace === NS_HTML) {
3043 ins_mode_in_table_else(t)
3050 ins_mode_in_table_else(t)
3054 // 8.2.5.4.10 http://www.w3.org/TR/html5/syntax.html#parsing-main-intabletext
3055 ins_mode_in_table_text = function (t) {
3056 var all_space, i, l, m, old
3057 if (t.type === TYPE_TEXT && t.text === "\u0000") {
3062 if (t.type === TYPE_TEXT) {
3063 pending_table_character_tokens.push(t)
3068 for (i = 0; i < pending_table_character_tokens.length; ++i) {
3069 old = pending_table_character_tokens[i]
3070 if (!is_space_tok(old)) {
3076 for (i = 0; i < pending_table_character_tokens.length; ++i) {
3077 old = pending_table_character_tokens[i]
3078 insert_character(old)
3081 for (i = 0; i < pending_table_character_tokens.length; ++i) {
3082 old = pending_table_character_tokens[i]
3083 ins_mode_in_table_else(old)
3086 pending_table_character_tokens = []
3087 ins_mode = original_ins_mode
3091 // 8.2.5.4.11 http://www.w3.org/TR/html5/syntax.html#parsing-main-incaption
3092 ins_mode_in_caption = function (t) {
3094 if (t.type === TYPE_END_TAG && t.name === 'caption') {
3095 if (is_in_table_scope('caption', NS_HTML)) {
3096 generate_implied_end_tags()
3097 if (open_els[0].name !== 'caption') {
3101 el = open_els.shift()
3102 if (el.name === 'caption' && el.namespace === NS_HTML) {
3106 clear_afe_to_marker()
3107 ins_mode = ins_mode_in_table
3114 if ((t.type === TYPE_START_TAG && (t.name === 'caption' || t.name === 'col' || t.name === 'colgroup' || t.name === 'tbody' || t.name === 'td' || t.name === 'tfoot' || t.name === 'th' || t.name === 'thead' || t.name === 'tr')) || t.type === TYPE_END_TAG && t.name === 'table') {
3116 if (is_in_table_scope('caption', NS_HTML)) {
3118 el = open_els.shift()
3119 if (el.name === 'caption' && el.namespace === NS_HTML) {
3123 clear_afe_to_marker()
3124 ins_mode = ins_mode_in_table
3127 // else fragment case
3130 if (t.type === TYPE_END_TAG && (t.name === 'body' || t.name === 'col' || t.name === 'colgroup' || t.name === 'html' || t.name === 'tbody' || t.name === 'td' || t.name === 'tfoot' || t.name === 'th' || t.name === 'thead' || t.name === 'tr')) {
3138 // 8.2.5.4.12 http://www.w3.org/TR/html5/syntax.html#parsing-main-incolgroup
3139 ins_mode_in_column_group = function (t) {
3141 if (is_space_tok(t)) {
3145 if (t.type === TYPE_COMMENT) {
3149 if (t.type === TYPE_DOCTYPE) {
3153 if (t.type === TYPE_START_TAG && t.name === 'html') {
3157 if (t.type === TYPE_START_TAG && t.name === 'col') {
3158 el = insert_html_element(t)
3160 t.acknowledge_self_closing()
3163 if (t.type === TYPE_END_TAG && t.name === 'colgroup') {
3164 if (open_els[0].name === 'colgroup' && open_els.namespace === NS_HTML) {
3166 ins_mode = ins_mode_in_table
3172 if (t.type === TYPE_END_TAG && t.name === 'col') {
3176 if ((t.type === TYPE_START_TAG || t.type === TYPE_END_TAG) && t.name === 'template') {
3180 if (t.type === TYPE_EOF) {
3185 if (open_els[0].name !== 'colgroup') {
3190 ins_mode = ins_mode_in_table
3194 // 8.2.5.4.13 http://www.w3.org/TR/html5/syntax.html#parsing-main-intbody
3195 ins_mode_in_table_body = function (t) {
3197 if (t.type === TYPE_START_TAG && t.name === 'tr') {
3198 clear_stack_to_table_body_context()
3199 insert_html_element(t)
3200 ins_mode = ins_mode_in_row
3203 if (t.type === TYPE_START_TAG && (t.name === 'th' || t.name === 'td')) {
3205 clear_stack_to_table_body_context()
3206 insert_html_element(new_open_tag('tr'))
3207 ins_mode = ins_mode_in_row
3211 if (t.type === TYPE_END_TAG && (t.name === 'tbody' || t.name === 'tfoot' || t.name === 'thead')) {
3212 if (!is_in_table_scope(t.name, NS_HTML)) {
3216 clear_stack_to_table_body_context()
3218 ins_mode = ins_mode_in_table
3221 if ((t.type === TYPE_START_TAG && (t.name === 'caption' || t.name === 'col' || t.name === 'colgroup' || t.name === 'tbody' || t.name === 'tfoot' || t.name === 'thead')) || (t.type === TYPE_END_TAG && t.name === 'table')) {
3223 for (i = 0; i < open_els.length; ++i) {
3225 if (el.namespace === NS_HTML && (el.name === 'tbody' || el.name === 'tfoot' || el.name === 'thead')) {
3229 if (table_scopers[el.name] === el.namespace) {
3237 clear_stack_to_table_body_context()
3239 ins_mode = ins_mode_in_table
3243 if (t.type === TYPE_END_TAG && (t.name === 'body' || t.name === 'caption' || t.name === 'col' || t.name === 'colgroup' || t.name === 'html' || t.name === 'td' || t.name === 'th' || t.name === 'tr')) {
3248 ins_mode_in_table(t)
3251 // 8.2.5.4.14 http://www.w3.org/TR/html5/syntax.html#parsing-main-intr
3252 ins_mode_in_row = function (t) {
3253 if (t.type === TYPE_START_TAG && (t.name === 'th' || t.name === 'td')) {
3254 clear_stack_to_table_row_context()
3255 insert_html_element(t)
3256 ins_mode = ins_mode_in_cell
3260 if (t.type === TYPE_END_TAG && t.name === 'tr') {
3261 if (is_in_table_scope('tr', NS_HTML)) {
3262 clear_stack_to_table_row_context()
3264 ins_mode = ins_mode_in_table_body
3270 if ((t.type === TYPE_START_TAG && (t.name === 'caption' || t.name === 'col' || t.name === 'colgroup' || t.name === 'tbody' || t.name === 'tfoot' || t.name === 'thead' || t.name === 'tr')) || t.type === TYPE_END_TAG && t.name === 'table') {
3271 if (is_in_table_scope('tr', NS_HTML)) {
3272 clear_stack_to_table_row_context()
3274 ins_mode = ins_mode_in_table_body
3281 if (t.type === TYPE_END_TAG && (t.name === 'tbody' || t.name === 'tfoot' || t.name === 'thead')) {
3282 if (is_in_table_scope(t.name, NS_HTML)) {
3283 if (is_in_table_scope('tr', NS_HTML)) {
3284 clear_stack_to_table_row_context()
3286 ins_mode = ins_mode_in_table_body
3294 if (t.type === TYPE_END_TAG && (t.name === 'body' || t.name === 'caption' || t.name === 'col' || t.name === 'colgroup' || t.name === 'html' || t.name === 'td' || t.name === 'th')) {
3299 ins_mode_in_table(t)
3302 // http://www.w3.org/TR/html5/syntax.html#close-the-cell
3303 close_the_cell = function () {
3305 generate_implied_end_tags()
3306 if (!((open_els[0].name === 'td' || open_els[0] === 'th') && open_els[0].namespace === NS_HTML)) {
3310 el = open_els.shift()
3311 if (el.namespace === NS_HTML && (el.name === 'td' || el.name === 'th')) {
3315 clear_afe_to_marker()
3316 ins_mode = ins_mode_in_row
3319 // 8.2.5.4.15 http://www.w3.org/TR/html5/syntax.html#parsing-main-intd
3320 ins_mode_in_cell = function (t) {
3322 if (t.type === TYPE_END_TAG && (t.name === 'td' || t.name === 'th')) {
3323 if (is_in_table_scope(t.name, NS_HTML)) {
3324 generate_implied_end_tags()
3325 if (!((open_els[0].name === t.name) && open_els[0].namespace === NS_HTML)) {
3329 el = open_els.shift()
3330 if (el.name === t.name && el.namespace === NS_HTML) {
3334 clear_afe_to_marker()
3335 ins_mode = ins_mode_in_row
3341 if (t.type === TYPE_START_TAG && (t.name === 'caption' || t.name === 'col' || t.name === 'colgroup' || t.name === 'tbody' || t.name === 'td' || t.name === 'tfoot' || t.name === 'th' || t.name === 'thead' || t.name === 'tr')) {
3343 for (i = 0; i < open_els.length; ++i) {
3345 if (el.namespace === NS_HTML && (el.name === 'td' || el.name === 'th')) {
3349 if (table_scopers[el.name] === el.namespace) {
3361 if (t.type === TYPE_END_TAG && (t.name === 'body' || t.name === 'caption' || t.name === 'col' || t.name === 'colgroup' || t.name === 'html')) {
3365 if (t.type === TYPE_END_TAG && (t.name === 'table' || t.name === 'tbody' || t.name === 'tfoot' || t.name === 'thead' || t.name === 'tr')) {
3366 if (is_in_table_scope(t.name, NS_HTML)) {
3378 // 8.2.5.4.16 http://www.w3.org/TR/html5/syntax.html#parsing-main-inselect
3379 ins_mode_in_select = function (t) {
3381 if (t.type === TYPE_TEXT && t.text === "\u0000") {
3385 if (t.type === TYPE_TEXT) {
3389 if (t.type === TYPE_COMMENT) {
3393 if (t.type === TYPE_DOCTYPE) {
3397 if (t.type === TYPE_START_TAG && t.name === 'html') {
3401 if (t.type === TYPE_START_TAG && t.name === 'option') {
3402 if (open_els[0].name === 'option' && open_els[0].namespace === NS_HTML) {
3405 insert_html_element(t)
3408 if (t.type === TYPE_START_TAG && t.name === 'optgroup') {
3409 if (open_els[0].name === 'option' && open_els[0].namespace === NS_HTML) {
3412 if (open_els[0].name === 'optgroup' && open_els[0].namespace === NS_HTML) {
3415 insert_html_element(t)
3418 if (t.type === TYPE_END_TAG && t.name === 'optgroup') {
3419 if (open_els[0].name === 'option' && open_els[0].namespace === NS_HTML) {
3420 if (open_els[1].name === 'optgroup' && open_els[0].namespace === NS_HTML) {
3424 if (open_els[0].name === 'optgroup' && open_els[0].namespace === NS_HTML) {
3431 if (t.type === TYPE_END_TAG && t.name === 'option') {
3432 if (open_els[0].name === 'option' && open_els[0].namespace === NS_HTML) {
3439 if (t.type === TYPE_END_TAG && t.name === 'select') {
3440 if (is_in_select_scope('select', NS_HTML)) {
3442 el = open_els.shift()
3443 if (el.name === 'select' && el.namespace === NS_HTML) {
3453 if (t.type === TYPE_START_TAG && t.name === 'select') {
3456 el = open_els.shift()
3457 if (el.name === 'select' && el.namespace === NS_HTML) {
3462 // spec says that this is the same as </select> but it doesn't say
3463 // to check scope first
3466 if (t.type === TYPE_START_TAG && (t.name === 'input' || t.name === 'keygen' || t.name === 'textarea')) {
3468 if (!is_in_select_scope('select', NS_HTML)) {
3472 el = open_els.shift()
3473 if (el.name === 'select' && el.namespace === NS_HTML) {
3481 if (t.type === TYPE_START_TAG && (t.name === 'script' || t.name === 'template')) {
3485 if (t.type === TYPE_EOF) {
3493 // 8.2.5.4.17 http://www.w3.org/TR/html5/syntax.html#parsing-main-inselectintable
3494 ins_mode_in_select_in_table = function (t) {
3496 if (t.type === TYPE_START_TAG && (t.name === 'caption' || t.name === 'table' || t.name === 'tbody' || t.name === 'tfoot' || t.name === 'thead' || t.name === 'tr' || t.name === 'td' || t.name === 'th')) {
3499 el = open_els.shift()
3500 if (el.name === 'select' && el.namespace === NS_HTML) {
3508 if (t.type === TYPE_END_TAG && (t.name === 'caption' || t.name === 'table' || t.name === 'tbody' || t.name === 'tfoot' || t.name === 'thead' || t.name === 'tr' || t.name === 'td' || t.name === 'th')) {
3510 if (!is_in_table_scope(t.name, NS_HTML)) {
3514 el = open_els.shift()
3515 if (el.name === 'select' && el.namespace === NS_HTML) {
3524 ins_mode_in_select(t)
3527 // 8.2.5.4.18 http://www.w3.org/TR/html5/syntax.html#parsing-main-intemplate
3528 ins_mode_in_template = function (t) {
3530 if (t.type === TYPE_TEXT || t.type === TYPE_COMMENT || t.type === TYPE_DOCTYPE) {
3534 if ((t.type === TYPE_START_TAG && (t.name === 'base' || t.name === 'basefont' || t.name === 'bgsound' || t.name === 'link' || t.name === 'meta' || t.name === 'noframes' || t.name === 'script' || t.name === 'style' || t.name === 'template' || t.name === 'title')) || (t.type === TYPE_END_TAG && t.name === 'template')) {
3538 if (t.type === TYPE_START_TAG && (t.name === 'caption' || t.name === 'colgroup' || t.name === 'tbody' || t.name === 'tfoot' || t.name === 'thead')) {
3539 template_ins_modes.shift()
3540 template_ins_modes.unshift(ins_mode_in_table)
3541 ins_mode = ins_mode_in_table
3545 if (t.type === TYPE_START_TAG && t.name === 'col') {
3546 template_ins_modes.shift()
3547 template_ins_modes.unshift(ins_mode_in_column_group)
3548 ins_mode = ins_mode_in_column_group
3552 if (t.type === TYPE_START_TAG && t.name === 'tr') {
3553 template_ins_modes.shift()
3554 template_ins_modes.unshift(ins_mode_in_table_body)
3555 ins_mode = ins_mode_in_table_body
3559 if (t.type === TYPE_START_TAG && (t.name === 'td' || t.name === 'th')) {
3560 template_ins_modes.shift()
3561 template_ins_modes.unshift(ins_mode_in_row)
3562 ins_mode = ins_mode_in_row
3566 if (t.type === TYPE_START_TAG) {
3567 template_ins_modes.shift()
3568 template_ins_modes.unshift(ins_mode_in_body)
3569 ins_mode = ins_mode_in_body
3573 if (t.type === TYPE_END_TAG) {
3577 if (t.type === TYPE_EOF) {
3578 if (!template_tag_is_open()) {
3584 el = open_els.shift()
3585 if (el.name === 'template' && el.namespace === NS_HTML) {
3589 clear_afe_to_marker()
3590 template_ins_modes.shift()
3596 // 8.2.5.4.19 http://www.w3.org/TR/html5/syntax.html#parsing-main-afterbody
3597 ins_mode_after_body = function (t) {
3599 if (is_space_tok(t)) {
3603 if (t.type === TYPE_COMMENT) {
3604 first = open_els[open_els.length - 1]
3605 insert_comment(t, [first, first.children.length])
3608 if (t.type === TYPE_DOCTYPE) {
3612 if (t.type === TYPE_START_TAG && t.name === 'html') {
3616 if (t.type === TYPE_END_TAG && t.name === 'html') {
3617 if (flag_fragment_parsing) {
3621 ins_mode = ins_mode_after_after_body
3624 if (t.type === TYPE_EOF) {
3630 ins_mode = ins_mode_in_body
3634 // 8.2.5.4.20 http://www.w3.org/TR/html5/syntax.html#parsing-main-inframeset
3635 ins_mode_in_frameset = function (t) {
3636 if (is_space_tok(t)) {
3640 if (t.type === TYPE_COMMENT) {
3644 if (t.type === TYPE_DOCTYPE) {
3648 if (t.type === TYPE_START_TAG && t.name === 'html') {
3652 if (t.type === TYPE_START_TAG && t.name === 'frameset') {
3653 insert_html_element(t)
3656 if (t.type === TYPE_END_TAG && t.name === 'frameset') {
3657 if (open_els.length === 1) {
3659 return // fragment case
3662 if (flag_fragment_parsing === false && open_els[0].name !== 'frameset') {
3663 ins_mode = ins_mode_after_frameset
3667 if (t.type === TYPE_START_TAG && t.name === 'frame') {
3668 insert_html_element(t)
3670 t.acknowledge_self_closing()
3673 if (t.type === TYPE_START_TAG && t.name === 'noframes') {
3677 if (t.type === TYPE_EOF) {
3678 if (open_els.length !== 1) {
3688 // 8.2.5.4.21 http://www.w3.org/TR/html5/syntax.html#parsing-main-afterframeset
3689 ins_mode_after_frameset = function (t) {
3690 if (is_space_tok(t)) {
3694 if (t.type === TYPE_COMMENT) {
3698 if (t.type === TYPE_DOCTYPE) {
3702 if (t.type === TYPE_START_TAG && t.name === 'html') {
3706 if (t.type === TYPE_END_TAG && t.name === 'html') {
3707 ins_mode = ins_mode_after_after_frameset
3710 if (t.type === TYPE_START_TAG && t.name === 'noframes') {
3714 if (t.type === TYPE_EOF) {
3722 // 8.2.5.4.22 http://www.w3.org/TR/html5/syntax.html#the-after-after-body-insertion-mode
3723 ins_mode_after_after_body = function (t) {
3724 if (t.type === TYPE_COMMENT) {
3725 insert_comment(t, [doc, doc.children.length])
3728 if (t.type === TYPE_DOCTYPE || is_space_tok(t) || (t.type === TYPE_START_TAG && t.name === 'html')) {
3732 if (t.type === TYPE_EOF) {
3738 ins_mode = ins_mode_in_body
3742 // 8.2.5.4.23 http://www.w3.org/TR/html5/syntax.html#the-after-after-frameset-insertion-mode
3743 ins_mode_after_after_frameset = function (t) {
3744 if (t.type === TYPE_COMMENT) {
3745 insert_comment(t, [doc, doc.children.length])
3748 if (t.type === TYPE_DOCTYPE || is_space_tok(t) || (t.type === TYPE_START_TAG && t.name === 'html')) {
3752 if (t.type === TYPE_EOF) {
3756 if (t.type === TYPE_START_TAG && t.name === 'noframes') {
3765 // 8.2.5.5 http://www.w3.org/TR/html5/syntax.html#parsing-main-inforeign
3766 has_color_face_or_size = function (t) {
3768 for (i = 0; i < t.attrs_a.length; ++i) {
3770 if (a[0] === 'color' || a[0] === 'face' || a[0] === 'size') {
3776 in_foreign_content_end_script = function () {
3780 in_foreign_content_other_start = function (t) {
3782 acn = adjusted_current_node()
3783 if (acn.namespace === NS_MATHML) {
3784 adjust_mathml_attributes(t)
3786 if (acn.namespace === NS_SVG && (svg_name_fixes[t.name] != null)) { // extra perens because Coffeescript did
3787 t.name = svg_name_fixes[t.name]
3789 if (acn.namespace === NS_SVG) {
3790 adjust_svg_attributes(t)
3792 adjust_foreign_attributes(t)
3793 insert_foreign_element(t, acn.namespace)
3794 if (t.flag('self-closing')) {
3795 if (t.name === 'script') {
3796 t.acknowledge_self_closing()
3797 in_foreign_content_end_script()
3801 t.acknowledge_self_closing()
3805 in_foreign_content = function (t) {
3807 if (t.type === TYPE_TEXT && t.text === "\u0000") {
3809 insert_character(new_character_token("\ufffd"))
3812 if (is_space_tok(t)) {
3816 if (t.type === TYPE_TEXT) {
3817 flag_frameset_ok = false
3821 if (t.type === TYPE_COMMENT) {
3825 if (t.type === TYPE_DOCTYPE) {
3829 if (t.type === TYPE_START_TAG && (t.name === 'b' || t.name === 'big' || t.name === 'blockquote' || t.name === 'body' || t.name === 'br' || t.name === 'center' || t.name === 'code' || t.name === 'dd' || t.name === 'div' || t.name === 'dl' || t.name === 'dt' || t.name === 'em' || t.name === 'embed' || t.name === 'h1' || t.name === 'h2' || t.name === 'h3' || t.name === 'h4' || t.name === 'h5' || t.name === 'h6' || t.name === 'head' || t.name === 'hr' || t.name === 'i' || t.name === 'img' || t.name === 'li' || t.name === 'listing' || t.name === 'main' || t.name === 'meta' || t.name === 'nobr' || t.name === 'ol' || t.name === 'p' || t.name === 'pre' || t.name === 'ruby' || t.name === 's' || t.name === 'small' || t.name === 'span' || t.name === 'strong' || t.name === 'strike' || t.name === 'sub' || t.name === 'sup' || t.name === 'table' || t.name === 'tt' || t.name === 'u' || t.name === 'ul' || t.name === 'var' || (t.name === 'font' && has_color_face_or_size(t)))) {
3831 if (flag_fragment_parsing) {
3832 in_foreign_content_other_start(t)
3835 while (true) { // is this safe?
3837 if (is_mathml_text_integration_point(open_els[0]) || is_html_integration(open_els[0]) || open_els[0].namespace === NS_HTML) {
3844 if (t.type === TYPE_START_TAG) {
3845 in_foreign_content_other_start(t)
3848 if (t.type === TYPE_END_TAG && t.name === 'script' && open_els[0].name === 'script' && open_els[0].namespace === NS_SVG) {
3849 in_foreign_content_end_script()
3852 if (t.type === TYPE_END_TAG) {
3855 if (node.name.toLowerCase() !== t.name) {
3859 if (node === open_els[open_els.length - 1]) {
3862 if (node.name.toLowerCase() === t.name) {
3864 el = open_els.shift()
3872 if (node.namespace === NS_HTML) {
3876 ins_mode(t) // explicitly call HTML insertion mode
3881 // 8.2.4.1 http://www.w3.org/TR/html5/syntax.html#data-state
3882 tok_state_data = function () {
3884 switch (c = txt.charAt(cur++)) {
3886 return new_text_node(parse_character_reference())
3889 tok_state = tok_state_tag_open
3893 return new_text_node(c)
3896 return new_eof_token()
3899 return new_text_node(c)
3904 // 8.2.4.2 http://www.w3.org/TR/html5/syntax.html#character-reference-in-data-state
3905 // not needed: tok_state_character_reference_in_data = function () {
3906 // just call parse_character_reference()
3908 // 8.2.4.3 http://www.w3.org/TR/html5/syntax.html#rcdata-state
3909 tok_state_rcdata = function () {
3911 switch (c = txt.charAt(cur++)) {
3913 return new_text_node(parse_character_reference())
3916 tok_state = tok_state_rcdata_less_than_sign
3920 return new_character_token("\ufffd")
3923 return new_eof_token()
3926 return new_character_token(c)
3931 // 8.2.4.4 http://www.w3.org/TR/html5/syntax.html#character-reference-in-rcdata-state
3932 // not needed: tok_state_character_reference_in_rcdata = function () {
3933 // just call parse_character_reference()
3935 // 8.2.4.5 http://www.w3.org/TR/html5/syntax.html#rawtext-state
3936 tok_state_rawtext = function () {
3938 switch (c = txt.charAt(cur++)) {
3940 tok_state = tok_state_rawtext_less_than_sign
3944 return new_character_token("\ufffd")
3947 return new_eof_token()
3950 return new_character_token(c)
3955 // 8.2.4.6 http://www.w3.org/TR/html5/syntax.html#script-data-state
3956 tok_state_script_data = function () {
3958 switch (c = txt.charAt(cur++)) {
3960 tok_state = tok_state_script_data_less_than_sign
3964 return new_character_token("\ufffd")
3967 return new_eof_token()
3970 return new_character_token(c)
3975 // 8.2.4.7 http://www.w3.org/TR/html5/syntax.html#plaintext-state
3976 tok_state_plaintext = function () {
3978 switch (c = txt.charAt(cur++)) {
3981 return new_character_token("\ufffd")
3984 return new_eof_token()
3987 return new_character_token(c)
3992 // 8.2.4.8 http://www.w3.org/TR/html5/syntax.html#tag-open-state
3993 tok_state_tag_open = function () {
3995 c = txt.charAt(cur++)
3997 tok_state = tok_state_markup_declaration_open
4001 tok_state = tok_state_end_tag_open
4004 if (is_uc_alpha(c)) {
4005 tok_cur_tag = new_open_tag(c.toLowerCase())
4006 tok_state = tok_state_tag_name
4009 if (is_lc_alpha(c)) {
4010 tok_cur_tag = new_open_tag(c)
4011 tok_state = tok_state_tag_name
4016 tok_cur_tag = new_comment_token('?') // FIXME right?
4017 tok_state = tok_state_bogus_comment
4022 tok_state = tok_state_data
4023 cur -= 1 // we didn't parse/handle the char after <
4024 return new_text_node('<')
4027 // 8.2.4.9 http://www.w3.org/TR/html5/syntax.html#end-tag-open-state
4028 tok_state_end_tag_open = function () {
4030 c = txt.charAt(cur++)
4031 if (is_uc_alpha(c)) {
4032 tok_cur_tag = new_end_tag(c.toLowerCase())
4033 tok_state = tok_state_tag_name
4036 if (is_lc_alpha(c)) {
4037 tok_cur_tag = new_end_tag(c)
4038 tok_state = tok_state_tag_name
4043 tok_state = tok_state_data
4046 if (c === '') { // EOF
4048 tok_state = tok_state_data
4049 return new_text_node('</')
4053 tok_cur_tag = new_comment_token(c)
4054 tok_state = tok_state_bogus_comment
4058 // 8.2.4.10 http://www.w3.org/TR/html5/syntax.html#tag-name-state
4059 tok_state_tag_name = function () {
4061 switch (c = txt.charAt(cur++)) {
4066 tok_state = tok_state_before_attribute_name
4069 tok_state = tok_state_self_closing_start_tag
4072 tok_state = tok_state_data
4079 tok_cur_tag.name += "\ufffd"
4083 tok_state = tok_state_data
4086 if (is_uc_alpha(c)) {
4087 tok_cur_tag.name += c.toLowerCase()
4089 tok_cur_tag.name += c
4095 // 8.2.4.11 http://www.w3.org/TR/html5/syntax.html#rcdata-less-than-sign-state
4096 tok_state_rcdata_less_than_sign = function () {
4098 c = txt.charAt(cur++)
4100 temporary_buffer = ''
4101 tok_state = tok_state_rcdata_end_tag_open
4105 tok_state = tok_state_rcdata
4106 cur -= 1 // reconsume the input character
4107 return new_character_token('<')
4110 // 8.2.4.12 http://www.w3.org/TR/html5/syntax.html#rcdata-end-tag-open-state
4111 tok_state_rcdata_end_tag_open = function () {
4113 c = txt.charAt(cur++)
4114 if (is_uc_alpha(c)) {
4115 tok_cur_tag = new_end_tag(c.toLowerCase())
4116 temporary_buffer += c
4117 tok_state = tok_state_rcdata_end_tag_name
4120 if (is_lc_alpha(c)) {
4121 tok_cur_tag = new_end_tag(c)
4122 temporary_buffer += c
4123 tok_state = tok_state_rcdata_end_tag_name
4127 tok_state = tok_state_rcdata
4128 cur -= 1 // reconsume the input character
4129 return new_character_token("</") // fixfull separate these
4132 // http://www.w3.org/TR/html5/syntax.html#appropriate-end-tag-token
4133 is_appropriate_end_tag = function (t) {
4134 // fixfull: this assumes that open_els[0].name is "the tag name of the last
4135 // start tag to have been emitted from this tokenizer"
4136 return t.type === TYPE_END_TAG && t.name === open_els[0].name
4139 // 8.2.4.13 http://www.w3.org/TR/html5/syntax.html#rcdata-end-tag-name-state
4140 tok_state_rcdata_end_tag_name = function () {
4142 c = txt.charAt(cur++)
4143 if (c === "\t" || c === "\n" || c === "\u000c" || c === ' ') {
4144 if (is_appropriate_end_tag(tok_cur_tag)) {
4145 tok_state = tok_state_before_attribute_name
4148 // else fall through to "Anything else"
4151 if (is_appropriate_end_tag(tok_cur_tag)) {
4152 tok_state = tok_state_self_closing_start_tag // FIXME spec typo?
4155 // else fall through to "Anything else"
4158 if (is_appropriate_end_tag(tok_cur_tag)) {
4159 tok_state = tok_state_data
4162 // else fall through to "Anything else"
4164 if (is_uc_alpha(c)) {
4165 tok_cur_tag.name += c.toLowerCase()
4166 temporary_buffer += c
4169 if (is_lc_alpha(c)) {
4170 tok_cur_tag.name += c
4171 temporary_buffer += c
4175 tok_state = tok_state_rcdata
4176 cur -= 1 // reconsume the input character
4177 return new_character_token('</' + temporary_buffer) // fixfull separate these
4180 // 8.2.4.14 http://www.w3.org/TR/html5/syntax.html#rawtext-less-than-sign-state
4181 tok_state_rawtext_less_than_sign = function () {
4183 c = txt.charAt(cur++)
4185 temporary_buffer = ''
4186 tok_state = tok_state_rawtext_end_tag_open
4190 tok_state = tok_state_rawtext
4191 cur -= 1 // reconsume the input character
4192 return new_character_token('<')
4195 // 8.2.4.15 http://www.w3.org/TR/html5/syntax.html#rawtext-end-tag-open-state
4196 tok_state_rawtext_end_tag_open = function () {
4197 c = txt.charAt(cur++)
4198 if (is_uc_alpha(c)) {
4199 tok_cur_tag = new_end_tag(c.toLowerCase())
4200 temporary_buffer += c
4201 tok_state = tok_state_rawtext_end_tag_name
4204 if (is_lc_alpha(c)) {
4205 tok_cur_tag = new_end_tag(c)
4206 temporary_buffer += c
4207 tok_state = tok_state_rawtext_end_tag_name
4211 tok_state = tok_state_rawtext
4212 cur -= 1 // reconsume the input character
4213 return new_character_token("</") // fixfull separate these
4216 // 8.2.4.16 http://www.w3.org/TR/html5/syntax.html#rawtext-end-tag-name-state
4217 tok_state_rawtext_end_tag_name = function () {
4219 c = txt.charAt(cur++)
4220 if (c === "\t" || c === "\n" || c === "\u000c" || c === ' ') {
4221 if (is_appropriate_end_tag(tok_cur_tag)) {
4222 tok_state = tok_state_before_attribute_name
4225 // else fall through to "Anything else"
4228 if (is_appropriate_end_tag(tok_cur_tag)) {
4229 tok_state = tok_state_self_closing_start_tag
4232 // else fall through to "Anything else"
4235 if (is_appropriate_end_tag(tok_cur_tag)) {
4236 tok_state = tok_state_data
4239 // else fall through to "Anything else"
4241 if (is_uc_alpha(c)) {
4242 tok_cur_tag.name += c.toLowerCase()
4243 temporary_buffer += c
4246 if (is_lc_alpha(c)) {
4247 tok_cur_tag.name += c
4248 temporary_buffer += c
4252 tok_state = tok_state_rawtext
4253 cur -= 1 // reconsume the input character
4254 return new_character_token('</' + temporary_buffer) // fixfull separate these
4257 // 8.2.4.17 http://www.w3.org/TR/html5/syntax.html#script-data-less-than-sign-state
4258 tok_state_script_data_less_than_sign = function () {
4260 c = txt.charAt(cur++)
4262 temporary_buffer = ''
4263 tok_state = tok_state_script_data_end_tag_open
4267 tok_state = tok_state_script_data_escape_start
4268 return new_character_token('<!') // fixfull split
4271 tok_state = tok_state_script_data
4272 cur -= 1 // reconsume
4273 return new_character_token('<')
4276 // 8.2.4.18 http://www.w3.org/TR/html5/syntax.html#script-data-end-tag-open-state
4277 tok_state_script_data_end_tag_open = function () {
4279 c = txt.charAt(cur++)
4280 if (is_uc_alpha(c)) {
4281 tok_cur_tag = new_end_tag(c.toLowerCase())
4282 temporary_buffer += c
4283 tok_state = tok_state_script_data_end_tag_name
4286 if (is_lc_alpha(c)) {
4287 tok_cur_tag = new_end_tag(c)
4288 temporary_buffer += c
4289 tok_state = tok_state_script_data_end_tag_name
4293 tok_state = tok_state_script_data
4294 cur -= 1 // reconsume
4295 return new_character_token('</')
4298 // 8.2.4.19 http://www.w3.org/TR/html5/syntax.html#script-data-end-tag-open-state
4299 tok_state_script_data_end_tag_name = function () {
4301 c = txt.charAt(cur++)
4302 if (c === "\t" || c === "\n" || c === "\u000c" || c === ' ') {
4303 if (is_appropriate_end_tag(tok_cur_tag)) {
4304 tok_state = tok_state_before_attribute_name
4310 if (is_appropriate_end_tag(tok_cur_tag)) {
4311 tok_state = tok_state_self_closing_start_tag
4317 if (is_appropriate_end_tag(tok_cur_tag)) {
4318 tok_state = tok_state_data
4323 if (is_uc_alpha(c)) {
4324 tok_cur_tag.name += c.toLowerCase()
4325 temporary_buffer += c
4328 if (is_lc_alpha(c)) {
4329 tok_cur_tag.name += c
4330 temporary_buffer += c
4334 tok_state = tok_state_script_data
4335 cur -= 1 // reconsume
4336 return new_character_token("</" + temporary_buffer) // fixfull split
4339 // 8.2.4.20 http://www.w3.org/TR/html5/syntax.html#script-data-escape-start-state
4340 tok_state_script_data_escape_start = function () {
4342 c = txt.charAt(cur++)
4344 tok_state = tok_state_script_data_escape_start_dash
4345 return new_character_token('-')
4348 tok_state = tok_state_script_data
4349 cur -= 1 // reconsume
4352 // 8.2.4.21 http://www.w3.org/TR/html5/syntax.html#script-data-escape-start-dash-state
4353 tok_state_script_data_escape_start_dash = function () {
4355 c = txt.charAt(cur++)
4357 tok_state = tok_state_script_data_escaped_dash_dash
4358 return new_character_token('-')
4361 tok_state = tok_state_script_data
4362 cur -= 1 // reconsume
4365 // 8.2.4.22 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-state
4366 tok_state_script_data_escaped = function () {
4368 c = txt.charAt(cur++)
4370 tok_state = tok_state_script_data_escaped_dash
4371 return new_character_token('-')
4374 tok_state = tok_state_script_data_escaped_less_than_sign
4377 if (c === "\u0000") {
4379 return new_character_token("\ufffd")
4381 if (c === '') { // EOF
4382 tok_state = tok_state_data
4384 cur -= 1 // reconsume
4388 return new_character_token(c)
4391 // 8.2.4.23 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-dash-state
4392 tok_state_script_data_escaped_dash = function () {
4394 c = txt.charAt(cur++)
4396 tok_state = tok_state_script_data_escaped_dash_dash
4397 return new_character_token('-')
4400 tok_state = tok_state_script_data_escaped_less_than_sign
4403 if (c === "\u0000") {
4405 tok_state = tok_state_script_data_escaped
4406 return new_character_token("\ufffd")
4408 if (c === '') { // EOF
4409 tok_state = tok_state_data
4411 cur -= 1 // reconsume
4415 tok_state = tok_state_script_data_escaped
4416 return new_character_token(c)
4419 // 8.2.4.24 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-dash-dash-state
4420 tok_state_script_data_escaped_dash_dash = function () {
4422 c = txt.charAt(cur++)
4424 return new_character_token('-')
4427 tok_state = tok_state_script_data_escaped_less_than_sign
4431 tok_state = tok_state_script_data
4432 return new_character_token('>')
4434 if (c === "\u0000") {
4436 tok_state = tok_state_script_data_escaped
4437 return new_character_token("\ufffd")
4439 if (c === '') { // EOF
4441 tok_state = tok_state_data
4442 cur -= 1 // reconsume
4446 tok_state = tok_state_script_data_escaped
4447 return new_character_token(c)
4450 // 8.2.4.25 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-less-than-sign-state
4451 tok_state_script_data_escaped_less_than_sign = function () {
4453 c = txt.charAt(cur++)
4455 temporary_buffer = ''
4456 tok_state = tok_state_script_data_escaped_end_tag_open
4459 if (is_uc_alpha(c)) {
4460 temporary_buffer = c.toLowerCase() // yes, really
4461 tok_state = tok_state_script_data_double_escape_start
4462 return new_character_token("<" + c) // fixfull split
4464 if (is_lc_alpha(c)) {
4465 temporary_buffer = c
4466 tok_state = tok_state_script_data_double_escape_start
4467 return new_character_token("<" + c) // fixfull split
4470 tok_state = tok_state_script_data_escaped
4471 cur -= 1 // reconsume
4472 return new_character_token('<')
4475 // 8.2.4.26 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-end-tag-open-state
4476 tok_state_script_data_escaped_end_tag_open = function () {
4478 c = txt.charAt(cur++)
4479 if (is_uc_alpha(c)) {
4480 tok_cur_tag = new_end_tag(c.toLowerCase())
4481 temporary_buffer += c
4482 tok_state = tok_state_script_data_escaped_end_tag_name
4485 if (is_lc_alpha(c)) {
4486 tok_cur_tag = new_end_tag(c)
4487 temporary_buffer += c
4488 tok_state = tok_state_script_data_escaped_end_tag_name
4492 tok_state = tok_state_script_data_escaped
4493 cur -= 1 // reconsume
4494 return new_character_token('</') // fixfull split
4497 // 8.2.4.27 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-end-tag-name-state
4498 tok_state_script_data_escaped_end_tag_name = function () {
4500 c = txt.charAt(cur++)
4501 if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ') {
4502 if (is_appropriate_end_tag(tok_cur_tag)) {
4503 tok_state = tok_state_before_attribute_name
4509 if (is_appropriate_end_tag(tok_cur_tag)) {
4510 tok_state = tok_state_self_closing_start_tag
4516 if (is_appropriate_end_tag(tok_cur_tag)) {
4517 tok_state = tok_state_data
4522 if (is_uc_alpha(c)) {
4523 tok_cur_tag.name += c.toLowerCase()
4524 temporary_buffer += c.toLowerCase()
4527 if (is_lc_alpha(c)) {
4528 tok_cur_tag.name += c
4529 temporary_buffer += c.toLowerCase()
4533 tok_state = tok_state_script_data_escaped
4534 cur -= 1 // reconsume
4535 return new_character_token("</" + temporary_buffer) // fixfull split
4538 // 8.2.4.28 http://www.w3.org/TR/html5/syntax.html#script-data-double-escape-start-state
4539 tok_state_script_data_double_escape_start = function () {
4541 c = txt.charAt(cur++)
4542 if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ' || c === '/' || c === '>') {
4543 if (temporary_buffer === 'script') {
4544 tok_state = tok_state_script_data_double_escaped
4546 tok_state = tok_state_script_data_escaped
4548 return new_character_token(c)
4550 if (is_uc_alpha(c)) {
4551 temporary_buffer += c.toLowerCase() // yes, really lowercase
4552 return new_character_token(c)
4554 if (is_lc_alpha(c)) {
4555 temporary_buffer += c
4556 return new_character_token(c)
4559 tok_state = tok_state_script_data_escaped
4560 cur -= 1 // reconsume
4563 // 8.2.4.29 http://www.w3.org/TR/html5/syntax.html#script-data-double-escaped-state
4564 tok_state_script_data_double_escaped = function () {
4566 c = txt.charAt(cur++)
4568 tok_state = tok_state_script_data_double_escaped_dash
4569 return new_character_token('-')
4572 tok_state = tok_state_script_data_double_escaped_less_than_sign
4573 return new_character_token('<')
4575 if (c === "\u0000") {
4577 return new_character_token("\ufffd")
4579 if (c === '') { // EOF
4581 tok_state = tok_state_data
4582 cur -= 1 // reconsume
4586 return new_character_token(c)
4589 // 8.2.4.30 http://www.w3.org/TR/html5/syntax.html#script-data-double-escaped-dash-state
4590 tok_state_script_data_double_escaped_dash = function () {
4592 c = txt.charAt(cur++)
4594 tok_state = tok_state_script_data_double_escaped_dash_dash
4595 return new_character_token('-')
4598 tok_state = tok_state_script_data_double_escaped_less_than_sign
4599 return new_character_token('<')
4601 if (c === "\u0000") {
4603 tok_state = tok_state_script_data_double_escaped
4604 return new_character_token("\ufffd")
4606 if (c === '') { // EOF
4608 tok_state = tok_state_data
4609 cur -= 1 // reconsume
4613 tok_state = tok_state_script_data_double_escaped
4614 return new_character_token(c)
4617 // 8.2.4.31 http://www.w3.org/TR/html5/syntax.html#script-data-double-escaped-dash-dash-state
4618 tok_state_script_data_double_escaped_dash_dash = function () {
4620 c = txt.charAt(cur++)
4622 return new_character_token('-')
4625 tok_state = tok_state_script_data_double_escaped_less_than_sign
4626 return new_character_token('<')
4629 tok_state = tok_state_script_data
4630 return new_character_token('>')
4632 if (c === "\u0000") {
4634 tok_state = tok_state_script_data_double_escaped
4635 return new_character_token("\ufffd")
4637 if (c === '') { // EOF
4639 tok_state = tok_state_data
4640 cur -= 1 // reconsume
4644 tok_state = tok_state_script_data_double_escaped
4645 return new_character_token(c)
4648 // 8.2.4.32 http://www.w3.org/TR/html5/syntax.html#script-data-double-escaped-less-than-sign-state
4649 tok_state_script_data_double_escaped_less_than_sign = function () {
4651 c = txt.charAt(cur++)
4653 temporary_buffer = ''
4654 tok_state = tok_state_script_data_double_escape_end
4655 return new_character_token('/')
4658 tok_state = tok_state_script_data_double_escaped
4659 cur -= 1 // reconsume
4662 // 8.2.4.33 http://www.w3.org/TR/html5/syntax.html#script-data-double-escape-end-state
4663 tok_state_script_data_double_escape_end = function () {
4665 c = txt.charAt(cur++)
4666 if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ' || c === '/' || c === '>') {
4667 if (temporary_buffer === 'script') {
4668 tok_state = tok_state_script_data_escaped
4670 tok_state = tok_state_script_data_double_escaped
4672 return new_character_token(c)
4674 if (is_uc_alpha(c)) {
4675 temporary_buffer += c.toLowerCase() // yes, really lowercase
4676 return new_character_token(c)
4678 if (is_lc_alpha(c)) {
4679 temporary_buffer += c
4680 return new_character_token(c)
4683 tok_state = tok_state_script_data_double_escaped
4684 cur -= 1 // reconsume
4687 // 8.2.4.34 http://www.w3.org/TR/html5/syntax.html#before-attribute-name-state
4688 tok_state_before_attribute_name = function () {
4689 var attr_name, c, tmp
4691 switch (c = txt.charAt(cur++)) {
4699 tok_state = tok_state_self_closing_start_tag
4703 tok_state = tok_state_data
4710 attr_name = "\ufffd"
4721 tok_state = tok_state_data
4724 if (is_uc_alpha(c)) {
4725 attr_name = c.toLowerCase()
4730 if (attr_name != null) {
4731 tok_cur_tag.attrs_a.unshift([attr_name, ''])
4732 tok_state = tok_state_attribute_name
4737 // 8.2.4.35 http://www.w3.org/TR/html5/syntax.html#attribute-name-state
4738 tok_state_attribute_name = function () {
4740 switch (c = txt.charAt(cur++)) {
4745 tok_state = tok_state_after_attribute_name
4748 tok_state = tok_state_self_closing_start_tag
4751 tok_state = tok_state_before_attribute_value
4754 tok_state = tok_state_data
4761 tok_cur_tag.attrs_a[0][0] += "\ufffd"
4767 tok_cur_tag.attrs_a[0][0] += c
4771 tok_state = tok_state_data
4774 if (is_uc_alpha(c)) {
4775 tok_cur_tag.attrs_a[0][0] += c.toLowerCase()
4777 tok_cur_tag.attrs_a[0][0] += c
4783 // 8.2.4.36 http://www.w3.org/TR/html5/syntax.html#after-attribute-name-state
4784 tok_state_after_attribute_name = function () {
4786 c = txt.charAt(cur++)
4787 if (c === "\t" || c === "\n" || c === "\u000c" || c === ' ') {
4791 tok_state = tok_state_self_closing_start_tag
4795 tok_state = tok_state_before_attribute_value
4799 tok_state = tok_state_data
4802 if (is_uc_alpha(c)) {
4803 tok_cur_tag.attrs_a.unshift([c.toLowerCase(), ''])
4804 tok_state = tok_state_attribute_name
4807 if (c === "\u0000") {
4809 tok_cur_tag.attrs_a.unshift(["\ufffd", ''])
4810 tok_state = tok_state_attribute_name
4813 if (c === '') { // EOF
4815 tok_state = tok_state_data
4816 cur -= 1 // reconsume
4819 if (c === '"' || c === "'" || c === '<') {
4821 // fall through to Anything else
4824 tok_cur_tag.attrs_a.unshift([c, ''])
4825 tok_state = tok_state_attribute_name
4828 // 8.2.4.37 http://www.w3.org/TR/html5/syntax.html#before-attribute-value-state
4829 tok_state_before_attribute_value = function () {
4831 switch (c = txt.charAt(cur++)) {
4839 tok_state = tok_state_attribute_value_double_quoted
4842 tok_state = tok_state_attribute_value_unquoted
4846 tok_state = tok_state_attribute_value_single_quoted
4850 tok_cur_tag.attrs_a[0][1] += "\ufffd"
4851 tok_state = tok_state_attribute_value_unquoted
4855 tok_state = tok_state_data
4862 tok_state = tok_state_data
4865 tok_cur_tag.attrs_a[0][1] += c
4866 tok_state = tok_state_attribute_value_unquoted
4871 // 8.2.4.38 http://www.w3.org/TR/html5/syntax.html#attribute-value-(double-quoted)-state
4872 tok_state_attribute_value_double_quoted = function () {
4874 switch (c = txt.charAt(cur++)) {
4876 tok_state = tok_state_after_attribute_value_quoted
4879 tok_cur_tag.attrs_a[0][1] += parse_character_reference('"', true)
4883 tok_cur_tag.attrs_a[0][1] += "\ufffd"
4887 tok_state = tok_state_data
4890 tok_cur_tag.attrs_a[0][1] += c
4895 // 8.2.4.39 http://www.w3.org/TR/html5/syntax.html#attribute-value-(single-quoted)-state
4896 tok_state_attribute_value_single_quoted = function () {
4898 switch (c = txt.charAt(cur++)) {
4900 tok_state = tok_state_after_attribute_value_quoted
4903 tok_cur_tag.attrs_a[0][1] += parse_character_reference("'", true)
4907 tok_cur_tag.attrs_a[0][1] += "\ufffd"
4911 tok_state = tok_state_data
4914 tok_cur_tag.attrs_a[0][1] += c
4919 // 8.2.4.40 http://www.w3.org/TR/html5/syntax.html#attribute-value-(unquoted)-state
4920 tok_state_attribute_value_unquoted = function () {
4922 switch (c = txt.charAt(cur++)) {
4927 tok_state = tok_state_before_attribute_name
4930 tok_cur_tag.attrs_a[0][1] += parse_character_reference('>', true)
4933 tok_state = tok_state_data
4939 tok_cur_tag.attrs_a[0][1] += "\ufffd"
4943 tok_state = tok_state_data
4946 // Parse Error if ', <, = or ` (backtick)
4947 tok_cur_tag.attrs_a[0][1] += c
4952 // 8.2.4.42 http://www.w3.org/TR/html5/syntax.html#after-attribute-value-(quoted)-state
4953 tok_state_after_attribute_value_quoted = function () {
4955 switch (c = txt.charAt(cur++)) {
4960 tok_state = tok_state_before_attribute_name
4963 tok_state = tok_state_self_closing_start_tag
4966 tok_state = tok_state_data
4973 tok_state = tok_state_data
4977 tok_state = tok_state_before_attribute_name
4978 cur -= 1 // we didn't handle that char
4983 // 8.2.4.43 http://www.w3.org/TR/html5/syntax.html#self-closing-start-tag-state
4984 tok_state_self_closing_start_tag = function () {
4986 c = txt.charAt(cur++)
4988 tok_cur_tag.flag('self-closing', true)
4989 tok_state = tok_state_data
4994 tok_state = tok_state_data
4995 cur -= 1 // reconsume
5000 tok_state = tok_state_before_attribute_name
5001 cur -= 1 // reconsume
5004 // 8.2.4.44 http://www.w3.org/TR/html5/syntax.html#bogus-comment-state
5005 // WARNING: put a comment token in tok_cur_tag before setting this state
5006 tok_state_bogus_comment = function () {
5008 next_gt = txt.indexOf('>', cur)
5009 if (next_gt === -1) {
5010 val = txt.substr(cur)
5013 val = txt.substr(cur, next_gt - cur)
5016 val = val.replace(new RegExp("\u0000", 'g'), "\ufffd")
5017 tok_cur_tag.text += val
5018 tok_state = tok_state_data
5022 // 8.2.4.45 http://www.w3.org/TR/html5/syntax.html#markup-declaration-open-state
5023 tok_state_markup_declaration_open = function () {
5025 if (txt.substr(cur, 2) === '--') {
5027 tok_cur_tag = new_comment_token('')
5028 tok_state = tok_state_comment_start
5031 if (txt.substr(cur, 7).toLowerCase() === 'doctype') {
5033 tok_state = tok_state_doctype
5036 acn = adjusted_current_node()
5037 if (acn && acn.namespace !== NS_HTML && txt.substr(cur, 7) === '[CDATA[') {
5039 tok_state = tok_state_cdata_section
5044 tok_cur_tag = new_comment_token('')
5045 tok_state = tok_state_bogus_comment
5048 // 8.2.4.46 http://www.w3.org/TR/html5/syntax.html#comment-start-state
5049 tok_state_comment_start = function () {
5051 switch (c = txt.charAt(cur++)) {
5053 tok_state = tok_state_comment_start_dash
5057 tok_state = tok_state_comment
5058 return new_character_token("\ufffd")
5062 tok_state = tok_state_data
5067 tok_state = tok_state_data
5068 cur -= 1 // reconsume
5072 tok_cur_tag.text += c
5073 tok_state = tok_state_comment
5078 // 8.2.4.47 http://www.w3.org/TR/html5/syntax.html#comment-start-dash-state
5079 tok_state_comment_start_dash = function () {
5081 switch (c = txt.charAt(cur++)) {
5083 tok_state = tok_state_comment_end
5087 tok_cur_tag.text += "-\ufffd"
5088 tok_state = tok_state_comment
5092 tok_state = tok_state_data
5097 tok_state = tok_state_data
5098 cur -= 1 // reconsume
5102 tok_cur_tag.text += "-" + c
5103 tok_state = tok_state_comment
5108 // 8.2.4.48 http://www.w3.org/TR/html5/syntax.html#comment-state
5109 tok_state_comment = function () {
5111 switch (c = txt.charAt(cur++)) {
5113 tok_state = tok_state_comment_end_dash
5117 tok_cur_tag.text += "\ufffd"
5121 tok_state = tok_state_data
5122 cur -= 1 // reconsume
5126 tok_cur_tag.text += c
5131 // 8.2.4.49 http://www.w3.org/TR/html5/syntax.html#comment-end-dash-state
5132 tok_state_comment_end_dash = function () {
5134 switch (c = txt.charAt(cur++)) {
5136 tok_state = tok_state_comment_end
5140 tok_cur_tag.text += "-\ufffd"
5141 tok_state = tok_state_comment
5145 tok_state = tok_state_data
5146 cur -= 1 // reconsume
5150 tok_cur_tag.text += "-" + c
5151 tok_state = tok_state_comment
5156 // 8.2.4.50 http://www.w3.org/TR/html5/syntax.html#comment-end-state
5157 tok_state_comment_end = function () {
5159 switch (c = txt.charAt(cur++)) {
5161 tok_state = tok_state_data
5166 tok_cur_tag.text += "--\ufffd"
5167 tok_state = tok_state_comment
5171 tok_state = tok_state_comment_end_bang
5175 tok_cur_tag.text += '-'
5179 tok_state = tok_state_data
5180 cur -= 1 // reconsume
5185 tok_cur_tag.text += "--" + c
5186 tok_state = tok_state_comment
5191 // 8.2.4.51 http://www.w3.org/TR/html5/syntax.html#comment-end-bang-state
5192 tok_state_comment_end_bang = function () {
5194 switch (c = txt.charAt(cur++)) {
5196 tok_cur_tag.text += "--!" + c
5197 tok_state = tok_state_comment_end_dash
5200 tok_state = tok_state_data
5205 tok_cur_tag.text += "--!\ufffd"
5206 tok_state = tok_state_comment
5210 tok_state = tok_state_data
5211 cur -= 1 // reconsume
5215 tok_cur_tag.text += "--!" + c
5216 tok_state = tok_state_comment
5221 // 8.2.4.52 http://www.w3.org/TR/html5/syntax.html#doctype-state
5222 tok_state_doctype = function () {
5224 switch (c = txt.charAt(cur++)) {
5229 tok_state = tok_state_before_doctype_name
5233 tok_state = tok_state_data
5234 el = new_doctype_token('')
5235 el.flag('force-quirks', true)
5236 cur -= 1 // reconsume
5241 tok_state = tok_state_before_doctype_name
5242 cur -= 1 // reconsume
5247 // 8.2.4.52 http://www.w3.org/TR/html5/syntax.html#doctype-state
5248 tok_state_before_doctype_name = function () {
5250 c = txt.charAt(cur++)
5251 if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ') {
5254 if (is_uc_alpha(c)) {
5255 tok_cur_tag = new_doctype_token(c.toLowerCase())
5256 tok_state = tok_state_doctype_name
5259 if (c === "\u0000") {
5261 tok_cur_tag = new_doctype_token("\ufffd")
5262 tok_state = tok_state_doctype_name
5267 el = new_doctype_token('')
5268 el.flag('force-quirks', true)
5269 tok_state = tok_state_data
5272 if (c === '') { // EOF
5274 tok_state = tok_state_data
5275 el = new_doctype_token('')
5276 el.flag('force-quirks', true)
5277 cur -= 1 // reconsume
5281 tok_cur_tag = new_doctype_token(c)
5282 tok_state = tok_state_doctype_name
5286 // 8.2.4.54 http://www.w3.org/TR/html5/syntax.html#doctype-name-state
5287 tok_state_doctype_name = function () {
5289 c = txt.charAt(cur++)
5290 if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ') {
5291 tok_state = tok_state_after_doctype_name
5295 tok_state = tok_state_data
5298 if (is_uc_alpha(c)) {
5299 tok_cur_tag.name += c.toLowerCase()
5302 if (c === "\u0000") {
5304 tok_cur_tag.name += "\ufffd"
5307 if (c === '') { // EOF
5309 tok_state = tok_state_data
5310 tok_cur_tag.flag('force-quirks', true)
5311 cur -= 1 // reconsume
5315 tok_cur_tag.name += c
5319 // 8.2.4.55 http://www.w3.org/TR/html5/syntax.html#after-doctype-name-state
5320 tok_state_after_doctype_name = function () {
5322 c = txt.charAt(cur++)
5323 if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ') {
5327 tok_state = tok_state_data
5330 if (c === '') { // EOF
5332 tok_state = tok_state_data
5333 tok_cur_tag.flag('force-quirks', true)
5334 cur -= 1 // reconsume
5338 if (txt.substr(cur - 1, 6).toLowerCase() === 'public') {
5340 tok_state = tok_state_after_doctype_public_keyword
5343 if (txt.substr(cur - 1, 6).toLowerCase() === 'system') {
5345 tok_state = tok_state_after_doctype_system_keyword
5349 tok_cur_tag.flag('force-quirks', true)
5350 tok_state = tok_state_bogus_doctype
5354 // 8.2.4.56 http://www.w3.org/TR/html5/syntax.html#after-doctype-public-keyword-state
5355 tok_state_after_doctype_public_keyword = function () {
5357 c = txt.charAt(cur++)
5358 if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ') {
5359 tok_state = tok_state_before_doctype_public_identifier
5364 tok_cur_tag.public_identifier = ''
5365 tok_state = tok_state_doctype_public_identifier_double_quoted
5370 tok_cur_tag.public_identifier = ''
5371 tok_state = tok_state_doctype_public_identifier_single_quoted
5376 tok_cur_tag.flag('force-quirks', true)
5377 tok_state = tok_state_data
5380 if (c === '') { // EOF
5382 tok_state = tok_state_data
5383 tok_cur_tag.flag('force-quirks', true)
5384 cur -= 1 // reconsume
5389 tok_cur_tag.flag('force-quirks', true)
5390 tok_state = tok_state_bogus_doctype
5394 // 8.2.4.57 http://www.w3.org/TR/html5/syntax.html#before-doctype-public-identifier-state
5395 tok_state_before_doctype_public_identifier = function () {
5397 c = txt.charAt(cur++)
5398 if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ') {
5403 tok_cur_tag.public_identifier = ''
5404 tok_state = tok_state_doctype_public_identifier_double_quoted
5409 tok_cur_tag.public_identifier = ''
5410 tok_state = tok_state_doctype_public_identifier_single_quoted
5415 tok_cur_tag.flag('force-quirks', true)
5416 tok_state = tok_state_data
5419 if (c === '') { // EOF
5421 tok_state = tok_state_data
5422 tok_cur_tag.flag('force-quirks', true)
5423 cur -= 1 // reconsume
5428 tok_cur_tag.flag('force-quirks', true)
5429 tok_state = tok_state_bogus_doctype
5434 // 8.2.4.58 http://www.w3.org/TR/html5/syntax.html#doctype-public-identifier-(double-quoted)-state
5435 tok_state_doctype_public_identifier_double_quoted = function () {
5437 c = txt.charAt(cur++)
5439 tok_state = tok_state_after_doctype_public_identifier
5442 if (c === "\u0000") {
5444 tok_cur_tag.public_identifier += "\ufffd"
5449 tok_cur_tag.flag('force-quirks', true)
5450 tok_state = tok_state_data
5453 if (c === '') { // EOF
5455 tok_state = tok_state_data
5456 tok_cur_tag.flag('force-quirks', true)
5457 cur -= 1 // reconsume
5461 tok_cur_tag.public_identifier += c
5465 // 8.2.4.59 http://www.w3.org/TR/html5/syntax.html#doctype-public-identifier-(single-quoted)-state
5466 tok_state_doctype_public_identifier_single_quoted = function () {
5468 c = txt.charAt(cur++)
5470 tok_state = tok_state_after_doctype_public_identifier
5473 if (c === "\u0000") {
5475 tok_cur_tag.public_identifier += "\ufffd"
5480 tok_cur_tag.flag('force-quirks', true)
5481 tok_state = tok_state_data
5484 if (c === '') { // EOF
5486 tok_state = tok_state_data
5487 tok_cur_tag.flag('force-quirks', true)
5488 cur -= 1 // reconsume
5492 tok_cur_tag.public_identifier += c
5496 // 8.2.4.60 http://www.w3.org/TR/html5/syntax.html#after-doctype-public-identifier-state
5497 tok_state_after_doctype_public_identifier = function () {
5499 c = txt.charAt(cur++)
5500 if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ') {
5501 tok_state = tok_state_between_doctype_public_and_system_identifiers
5505 tok_state = tok_state_data
5510 tok_cur_tag.system_identifier = ''
5511 tok_state = tok_state_doctype_system_identifier_double_quoted
5516 tok_cur_tag.system_identifier = ''
5517 tok_state = tok_state_doctype_system_identifier_single_quoted
5520 if (c === '') { // EOF
5522 tok_state = tok_state_data
5523 tok_cur_tag.flag('force-quirks', true)
5524 cur -= 1 // reconsume
5529 tok_cur_tag.flag('force-quirks', true)
5530 tok_state = tok_state_bogus_doctype
5534 // 8.2.4.61 http://www.w3.org/TR/html5/syntax.html#between-doctype-public-&&-system-identifiers-state
5535 tok_state_between_doctype_public_and_system_identifiers = function () {
5537 c = txt.charAt(cur++)
5538 if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ') {
5542 tok_state = tok_state_data
5547 tok_cur_tag.system_identifier = ''
5548 tok_state = tok_state_doctype_system_identifier_double_quoted
5553 tok_cur_tag.system_identifier = ''
5554 tok_state = tok_state_doctype_system_identifier_single_quoted
5557 if (c === '') { // EOF
5559 tok_state = tok_state_data
5560 tok_cur_tag.flag('force-quirks', true)
5561 cur -= 1 // reconsume
5566 tok_cur_tag.flag('force-quirks', true)
5567 tok_state = tok_state_bogus_doctype
5571 // 8.2.4.62 http://www.w3.org/TR/html5/syntax.html#after-doctype-system-keyword-state
5572 tok_state_after_doctype_system_keyword = function () {
5574 c = txt.charAt(cur++)
5575 if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ') {
5576 tok_state = tok_state_before_doctype_system_identifier
5581 tok_cur_tag.system_identifier = ''
5582 tok_state = tok_state_doctype_system_identifier_double_quoted
5587 tok_cur_tag.system_identifier = ''
5588 tok_state = tok_state_doctype_system_identifier_single_quoted
5593 tok_cur_tag.flag('force-quirks', true)
5594 tok_state = tok_state_data
5597 if (c === '') { // EOF
5599 tok_state = tok_state_data
5600 tok_cur_tag.flag('force-quirks', true)
5601 cur -= 1 // reconsume
5606 tok_cur_tag.flag('force-quirks', true)
5607 tok_state = tok_state_bogus_doctype
5611 // 8.2.4.63 http://www.w3.org/TR/html5/syntax.html#before-doctype-system-identifier-state
5612 tok_state_before_doctype_system_identifier = function () {
5614 c = txt.charAt(cur++)
5615 if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ') {
5619 tok_cur_tag.system_identifier = ''
5620 tok_state = tok_state_doctype_system_identifier_double_quoted
5624 tok_cur_tag.system_identifier = ''
5625 tok_state = tok_state_doctype_system_identifier_single_quoted
5630 tok_cur_tag.flag('force-quirks', true)
5631 tok_state = tok_state_data
5634 if (c === '') { // EOF
5636 tok_state = tok_state_data
5637 tok_cur_tag.flag('force-quirks', true)
5638 cur -= 1 // reconsume
5643 tok_cur_tag.flag('force-quirks', true)
5644 tok_state = tok_state_bogus_doctype
5648 // 8.2.4.64 http://www.w3.org/TR/html5/syntax.html#doctype-system-identifier-(double-quoted)-state
5649 tok_state_doctype_system_identifier_double_quoted = function () {
5651 c = txt.charAt(cur++)
5653 tok_state = tok_state_after_doctype_system_identifier
5656 if (c === "\u0000") {
5658 tok_cur_tag.system_identifier += "\ufffd"
5663 tok_cur_tag.flag('force-quirks', true)
5664 tok_state = tok_state_data
5667 if (c === '') { // EOF
5669 tok_state = tok_state_data
5670 tok_cur_tag.flag('force-quirks', true)
5671 cur -= 1 // reconsume
5675 tok_cur_tag.system_identifier += c
5679 // 8.2.4.65 http://www.w3.org/TR/html5/syntax.html#doctype-system-identifier-(single-quoted)-state
5680 tok_state_doctype_system_identifier_single_quoted = function () {
5682 c = txt.charAt(cur++)
5684 tok_state = tok_state_after_doctype_system_identifier
5687 if (c === "\u0000") {
5689 tok_cur_tag.system_identifier += "\ufffd"
5694 tok_cur_tag.flag('force-quirks', true)
5695 tok_state = tok_state_data
5698 if (c === '') { // EOF
5700 tok_state = tok_state_data
5701 tok_cur_tag.flag('force-quirks', true)
5702 cur -= 1 // reconsume
5706 tok_cur_tag.system_identifier += c
5710 // 8.2.4.66 http://www.w3.org/TR/html5/syntax.html#after-doctype-system-identifier-state
5711 tok_state_after_doctype_system_identifier = function () {
5713 c = txt.charAt(cur++)
5714 if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ') {
5718 tok_state = tok_state_data
5721 if (c === '') { // EOF
5723 tok_state = tok_state_data
5724 tok_cur_tag.flag('force-quirks', true)
5725 cur -= 1 // reconsume
5730 // do _not_ tok_cur_tag.flag 'force-quirks', true
5731 tok_state = tok_state_bogus_doctype
5735 // 8.2.4.67 http://www.w3.org/TR/html5/syntax.html#bogus-doctype-state
5736 tok_state_bogus_doctype = function () {
5738 c = txt.charAt(cur++)
5740 tok_state = tok_state_data
5743 if (c === '') { // EOF
5744 tok_state = tok_state_data
5745 cur -= 1 // reconsume
5752 // 8.2.4.68 http://www.w3.org/TR/html5/syntax.html#cdata-section-state
5753 tok_state_cdata_section = function () {
5755 tok_state = tok_state_data
5756 next_gt = txt.indexOf(']]>', cur)
5757 if (next_gt === -1) {
5758 val = txt.substr(cur)
5761 val = txt.substr(cur, next_gt - cur)
5764 val = val.replace(new RegExp("\u0000", 'g'), "\ufffd")
5765 if (val.length > 0) {
5766 return new_character_token(val) // fixfull split
5771 // 8.2.4.69 http://www.w3.org/TR/html5/syntax.html#consume-a-character-reference
5772 // Don't set this as a state, just call it
5773 // returns a string (NOT a text node)
5774 parse_character_reference = function (allowed_char, in_attr) {
5775 var base, c, charset, code_point, decoded, i, max, start
5776 if (allowed_char == null) {
5779 if (in_attr == null) {
5782 if (cur >= txt.length) {
5785 switch (c = txt.charAt(cur)) {
5794 // explicitly not a parse error
5798 // there has to be "one or more" alnums between & and ; to be a parse error
5802 if (cur + 1 >= txt.length) {
5805 if (txt.charAt(cur + 1).toLowerCase() === 'x') {
5815 while (start + i < txt.length && charset.indexOf(txt.charAt(start + i)) > -1) {
5822 if (txt.charAt(start + i) === ';') {
5827 code_point = txt.substr(start, i)
5828 while (code_point.charAt(0) === '0' && code_point.length > 1) {
5829 code_point = code_point.substr(1)
5831 code_point = parseInt(code_point, base)
5832 if (unicode_fixes[code_point] != null) {
5834 return unicode_fixes[code_point]
5836 if ((code_point >= 0xd800 && code_point <= 0xdfff) || code_point > 0x10ffff) {
5840 if ((code_point >= 0x0001 && code_point <= 0x0008) || (code_point >= 0x000D && code_point <= 0x001F) || (code_point >= 0x007F && code_point <= 0x009F) || (code_point >= 0xFDD0 && code_point <= 0xFDEF) || code_point === 0x000B || code_point === 0xFFFE || code_point === 0xFFFF || code_point === 0x1FFFE || code_point === 0x1FFFF || code_point === 0x2FFFE || code_point === 0x2FFFF || code_point === 0x3FFFE || code_point === 0x3FFFF || code_point === 0x4FFFE || code_point === 0x4FFFF || code_point === 0x5FFFE || code_point === 0x5FFFF || code_point === 0x6FFFE || code_point === 0x6FFFF || code_point === 0x7FFFE || code_point === 0x7FFFF || code_point === 0x8FFFE || code_point === 0x8FFFF || code_point === 0x9FFFE || code_point === 0x9FFFF || code_point === 0xAFFFE || code_point === 0xAFFFF || code_point === 0xBFFFE || code_point === 0xBFFFF || code_point === 0xCFFFE || code_point === 0xCFFFF || code_point === 0xDFFFE || code_point === 0xDFFFF || code_point === 0xEFFFE || code_point === 0xEFFFF || code_point === 0xFFFFE || code_point === 0xFFFFF || code_point === 0x10FFFE || code_point === 0x10FFFF) {
5843 return from_code_point(code_point)
5849 for (i = 0; i < 31; ++i) {
5850 if (alnum.indexOf(txt.charAt(cur + i)) === -1) {
5855 // exit early, because parse_error() below needs at least one alnum
5858 if (txt.charAt(cur + i) === ';') {
5859 decoded = decode_named_char_ref(txt.substr(cur, i))
5860 i += 1 // scan past the ';' (after, so we dno't pass it to decode)
5861 if (decoded != null) {
5865 // else FALL THROUGH (check for match without last char(s) or ";")
5867 // no ';' terminator (only legacy char refs)
5869 for (i = 2; i <= max; ++i) { // no prefix matches, so ok to check shortest first
5870 c = legacy_char_refs[txt.substr(cur, i)]
5873 if (txt.charAt(cur + i) === '=') {
5874 // "because some legacy user agents will
5875 // misinterpret the markup in those cases"
5879 if (alnum.indexOf(txt.charAt(cur + i)) > -1) {
5880 // this makes attributes forgiving about url args
5884 // ok, and besides the weird exceptions for attributes...
5885 // return the matching char
5886 cur += i // consume entity chars
5887 parse_error() // because no terminating ";"
5897 eat_next_token_if_newline = function () {
5904 if (t.type === TYPE_TEXT) {
5905 // definition of a newline depends on whether it was a character ref or not
5906 if (cur - old_cur === 1) {
5907 // not a character reference
5908 if (t.text === "\u000d" || t.text === "\u000a") {
5912 if (t.text === "\u000a") {
5921 // tree constructor initialization
5922 // see comments on TYPE_TAG/etc for the structure of this data
5925 doc = new Node(TYPE_TAG, {name: 'document', namespace: NS_HTML})
5926 doc.flag('quirks mode', QUIRKS_NO) // TODO bugreport spec for not specifying this
5927 fragment_root = null // fragment parsing algorithm returns children of this
5929 afe = [] // active formatting elements
5930 template_ins_modes = []
5931 ins_mode = ins_mode_initial
5932 original_ins_mode = ins_mode // TODO check spec
5933 flag_scripting = args.scripting != null ? args.scripting : true // TODO might need an extra flag to get <noscript> to parse correctly
5934 flag_frameset_ok = true
5936 flag_foster_parenting = false
5937 form_element_pointer = null
5938 temporary_buffer = null
5939 pending_table_character_tokens = []
5940 head_element_pointer = null
5941 flag_fragment_parsing = false
5942 context_element = null
5943 prev_node_id = 0 // just for debugging
5945 // tokenizer initialization
5946 tok_state = tok_state_data
5948 parse_init = function () {
5949 var el, f, ns, old_doc, t
5950 // fragment parsing (text arg)
5951 if (args.fragment != null) {
5952 // this handles the fragment from the tests in the format described here:
5953 // https://github.com/html5lib/html5lib-tests/blob/master/tree-construction/README.md
5956 if (f.substr(0, 5) === 'math ') {
5959 } else if (f.substr(0, 4) === 'svg ') {
5964 context_element = token_to_element(t, ns)
5965 context_element.document = new Node(TYPE_TAG, {name: 'document', namespace: NS_HTML})
5966 context_element.document.flag('quirks mode', QUIRKS_NO)
5968 // fragment parsing (Node arg)
5969 if (args.context != null) {
5970 context_element = args.context
5973 // http://www.w3.org/TR/html5/syntax.html#parsing-html-fragments
5974 // fragment parsing algorithm
5975 if (context_element != null) {
5976 flag_fragment_parsing = true
5977 doc = new Node(TYPE_TAG, {name: 'html', namespace: NS_HTML})
5978 // search up the tree from context, to try to find it's document,
5979 // because this file only puts a "document" property on the root
5982 el = context_element
5984 if (el.document != null) {
5985 old_doc = el.document
5995 doc.flag('quirks mode', old_doc.flag('quirks mode'))
5998 if (context_element.namespace === NS_HTML) {
5999 switch (context_element.name) {
6002 tok_state = tok_state_rcdata
6009 tok_state = tok_state_rawtext
6012 tok_state = tok_state_script_data
6015 if (flag_scripting) {
6016 tok_state = tok_state_rawtext
6020 tok_state = tok_state_plaintext
6023 fragment_root = new Node(TYPE_TAG, {name: 'html', namespace: NS_HTML})
6024 doc.children.push(fragment_root)
6025 fragment_root.document = doc
6026 open_els = [fragment_root]
6027 if (context_element.name === 'template' && context_element.namespace === NS_HTML) {
6028 template_ins_modes.unshift(ins_mode_in_template)
6030 // fixfull create token for context (it should have it's original one already)
6032 // set form_element pointer... in the foreign doc?!
6033 el = context_element
6035 if (el.name === 'form' && el.namespace === NS_HTML) {
6036 form_element_pointer = el
6047 // text pre-processing
6048 // FIXME check http://www.w3.org/TR/html5/syntax.html#preprocessing-the-input-stream
6049 txt = txt.replace(new RegExp("\r\n", 'g'), "\n") // fixfull spec doesn't say this
6050 txt = txt.replace(new RegExp("\r", 'g'), "\n") // fixfull spec doesn't say this
6053 // http://www.w3.org/TR/html5/syntax.html#tree-construction
6054 parse_main_loop = function () {
6056 while (flag_parsing) {
6060 // fixfull parse error if has self-closing flag, but it wasn't acknolwedged
6067 if (flag_fragment_parsing) {
6068 return fragment_root.children
6073 if (context === 'module') {
6074 module.exports = parse_html
6076 window.peach_parser = parse_html
6079 parse_html.Node = Node