1 // todo remove refs and lens, js, ls
4 // Copyright 2015 Jason Woofenden
5 // This file implements an HTML5 parser
7 // This program is free software: you can redistribute it and/or modify it under
8 // the terms of the GNU Affero General Public License as published by the Free
9 // Software Foundation, either version 3 of the License, or (at your option) any
12 // This program is distributed in the hope that it will be useful, but WITHOUT
13 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
17 // You should have received a copy of the GNU Affero General Public License
18 // along with this program. If not, see <http://www.gnu.org/licenses/>.
21 // This file implements a thorough parser for html5, meant to be used by a
24 // The implementation is a pretty direct implementation of the parsing algorithm
27 // http://www.w3.org/TR/html5/syntax.html
29 // except for some places marked "WHATWG" that are implemented as described here:
31 // https://html.spec.whatwg.org/multipage/syntax.html
33 // This code passes all of the tests in the .dat files at:
35 // https://github.com/JasonWoof/html5lib-tests/tree/patch-1/tree-construction
38 //////////////////////////
39 // how to use this code //
40 //////////////////////////
42 // See README.md for how to run this file in the browser or in node.js.
44 // This file exports a single useful function: parse_tml, and some constants
45 // (see the bottom of this file for those.)
49 // peach_parser.parse("<p><b>hi</p>")
51 // Or, if you don't want <html><head><body>/etc, do this:
53 // peach_parser.parse("<p><b>hi</p>", {fragment: "body"})
55 // return value is an array of Nodes, see "class Node" below.
57 // This code is a work in progress, eg try search this file for "fixfull",
61 // Notes: stacks/lists
63 // Jason was frequently confused by the terminology used to refer to different
64 // parts of the stacks and lists in the spec, so he made this chart to help keep
67 // stacks grow downward (current element is index=0)
69 // example: open_els = [a, b, c, d, e, f, g]
71 // "grows downwards" means it's visualized like this: (index: el "names")
73 // 6: g "start of the list", "topmost", "first"
75 // 4: e "previous" (to d), "above", "before"
76 // 3: d (previous/next are relative to this element)
77 // 2: c "next", "after", "lower", "below"
79 // 0: a "end of the list", "current node", "bottommost", "last"
81 if ((typeof module) !== 'undefined' && (module.exports != null)) {
83 exports = module.exports
86 window.peach_parser = {}
87 exports = window.peach_parser
90 from_code_point = function (x) {
91 if (String.fromCodePoint != null) {
92 return String.fromCodePoint(x)
95 return String.fromCharCode(x)
98 return String.fromCharCode((x >> 10) + 0xd800, (x % 0x400) + 0xdc00)
102 // Each node is an obect of the Node class. Here are the Node types:
103 TYPE_TAG = 'tag' // name, {attributes}, [children]
104 TYPE_TEXT = 'text' // "text"
105 TYPE_COMMENT = 'comment'
106 TYPE_DOCTYPE = 'doctype'
107 // the following types are emited by the tokenizer, but shouldn't end up in the tree:
108 TYPE_START_TAG = 4 // name, [attributes ([key,value]...) in reverse order], [children]
109 TYPE_END_TAG = 5 // name
111 TYPE_AFE_MARKER = 7 // http://www.w3.org/TR/html5/syntax.html#reconstruct-the-active-formatting-elements
112 TYPE_AAA_BOOKMARK = 8 // http://www.w3.org/TR/html5/syntax.html#adoption-agency-algorithm
114 // namespace constants
119 // quirks mode constants
121 QUIRKS_LIMITED = 'limited'
124 // queue up debug logs, so eg they can be shown only for tests that fail
126 debug_log_reset = function () {
129 debug_log = function (str) {
130 g_debug_log.push(str)
132 debug_log_each = function (cb) {
134 for (i = 0; i < g_debug_log.length; ++i) {
140 function Node (type, args) {
144 this.type = type // one of the TYPE_* constants above
145 this.name = args.name != null ? args.name : '' // tag name
146 this.text = args.text != null ? args.text : '' // contents for text/comment nodes
147 this.attrs = args.attrs != null ? args.attrs : {}
148 this.attrs_a = args.attr_k != null ? args.attr_k : [] // attrs in progress, TYPE_START_TAG only
149 this.children = args.children != null ? args.children : []
150 this.namespace = args.namespace != null ? args.namespace : NS_HTML
151 this.parent = args.parent != null ? args.parent : null
152 this.token = args.token != null ? args.token : null
153 this.flags = args.flags != null ? args.flags : {}
154 if (args.id != null) {
155 this.id = args.id + "+"
157 this.id = "" + (++prev_node_id)
161 Node.prototype.acknowledge_self_closing = function () {
162 if (this.token != null) {
163 this.token.flag('did_self_close', true)
165 this.flag('did_self_close', true)
169 Node.prototype.flag = function (key, value) {
171 this.flags[key] = value
173 return this.flags[key]
177 // helpers: (only take args that are normally known when parser creates nodes)
178 new_open_tag = function (name) {
179 return new Node(TYPE_START_TAG, {name: name})
181 new_end_tag = function (name) {
182 return new Node(TYPE_END_TAG, {name: name})
184 new_element = function (name) {
185 return new Node(TYPE_TAG, {name: name})
187 new_text_node = function (txt) {
188 return new Node(TYPE_TEXT, {text: txt})
190 new_character_token = new_text_node
191 new_comment_token = function (txt) {
192 return new Node(TYPE_COMMENT, {text: txt})
194 new_doctype_token = function (name) {
195 return new Node(TYPE_DOCTYPE, {name: name})
197 new_eof_token = function () {
198 return new Node(TYPE_EOF)
200 new_afe_marker = function () {
201 return new Node(TYPE_AFE_MARKER)
203 new_aaa_bookmark = function () {
204 return new Node(TYPE_AAA_BOOKMARK)
207 lc_alpha = "abcdefghijklmnopqrstuvwxyz"
208 uc_alpha = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
209 digits = "0123456789"
210 alnum = lc_alpha + uc_alpha + digits
211 hex_chars = digits + "abcdefABCDEF"
213 is_uc_alpha = function (str) {
214 return str.length === 1 && uc_alpha.indexOf(str) > -1
216 is_lc_alpha = function (str) {
217 return str.length === 1 && lc_alpha.indexOf(str) > -1
220 // some SVG elements have dashes in them
221 tag_name_chars = alnum + "-"
223 // http://www.w3.org/TR/html5/infrastructure.html#space-character
224 space_chars = "\u0009\u000a\u000c\u000d\u0020"
225 is_space = function (txt) {
226 return txt.length === 1 && space_chars.indexOf(txt) > -1
228 is_space_tok = function (t) {
229 return t.type === TYPE_TEXT && t.text.length === 1 && space_chars.indexOf(t.text) > -1
232 is_input_hidden_tok = function (t) {
234 if (t.type !== TYPE_START_TAG) {
237 for (i = 0; i < t.attrs_a.length; ++i) {
239 if (a[0] === 'type') {
240 if (a[1].toLowerCase() === 'hidden') {
249 // https://en.wikipedia.org/wiki/Whitespace_character#Unicode
250 whitespace_chars = "\u0009\u000a\u000b\u000c\u000d\u0020\u0085\u00a0\u1680\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200a\u2028\u2029\u202f\u205f\u3000"
253 unicode_fixes[0x00] = "\uFFFD"
254 unicode_fixes[0x80] = "\u20AC"
255 unicode_fixes[0x82] = "\u201A"
256 unicode_fixes[0x83] = "\u0192"
257 unicode_fixes[0x84] = "\u201E"
258 unicode_fixes[0x85] = "\u2026"
259 unicode_fixes[0x86] = "\u2020"
260 unicode_fixes[0x87] = "\u2021"
261 unicode_fixes[0x88] = "\u02C6"
262 unicode_fixes[0x89] = "\u2030"
263 unicode_fixes[0x8A] = "\u0160"
264 unicode_fixes[0x8B] = "\u2039"
265 unicode_fixes[0x8C] = "\u0152"
266 unicode_fixes[0x8E] = "\u017D"
267 unicode_fixes[0x91] = "\u2018"
268 unicode_fixes[0x92] = "\u2019"
269 unicode_fixes[0x93] = "\u201C"
270 unicode_fixes[0x94] = "\u201D"
271 unicode_fixes[0x95] = "\u2022"
272 unicode_fixes[0x96] = "\u2013"
273 unicode_fixes[0x97] = "\u2014"
274 unicode_fixes[0x98] = "\u02DC"
275 unicode_fixes[0x99] = "\u2122"
276 unicode_fixes[0x9A] = "\u0161"
277 unicode_fixes[0x9B] = "\u203A"
278 unicode_fixes[0x9C] = "\u0153"
279 unicode_fixes[0x9E] = "\u017E"
280 unicode_fixes[0x9F] = "\u0178"
282 quirks_yes_pi_prefixes = [
283 "+//silmaril//dtd html pro v0r11 19970101//",
284 "-//as//dtd html 3.0 aswedit + extensions//",
285 "-//advasoft ltd//dtd html 3.0 aswedit + extensions//",
286 "-//ietf//dtd html 2.0 level 1//",
287 "-//ietf//dtd html 2.0 level 2//",
288 "-//ietf//dtd html 2.0 strict level 1//",
289 "-//ietf//dtd html 2.0 strict level 2//",
290 "-//ietf//dtd html 2.0 strict//",
291 "-//ietf//dtd html 2.0//",
292 "-//ietf//dtd html 2.1e//",
293 "-//ietf//dtd html 3.0//",
294 "-//ietf//dtd html 3.2 final//",
295 "-//ietf//dtd html 3.2//",
296 "-//ietf//dtd html 3//",
297 "-//ietf//dtd html level 0//",
298 "-//ietf//dtd html level 1//",
299 "-//ietf//dtd html level 2//",
300 "-//ietf//dtd html level 3//",
301 "-//ietf//dtd html strict level 0//",
302 "-//ietf//dtd html strict level 1//",
303 "-//ietf//dtd html strict level 2//",
304 "-//ietf//dtd html strict level 3//",
305 "-//ietf//dtd html strict//",
306 "-//ietf//dtd html//",
307 "-//metrius//dtd metrius presentational//",
308 "-//microsoft//dtd internet explorer 2.0 html strict//",
309 "-//microsoft//dtd internet explorer 2.0 html//",
310 "-//microsoft//dtd internet explorer 2.0 tables//",
311 "-//microsoft//dtd internet explorer 3.0 html strict//",
312 "-//microsoft//dtd internet explorer 3.0 html//",
313 "-//microsoft//dtd internet explorer 3.0 tables//",
314 "-//netscape comm. corp.//dtd html//",
315 "-//netscape comm. corp.//dtd strict html//",
316 "-//o'reilly and associates//dtd html 2.0//",
317 "-//o'reilly and associates//dtd html extended 1.0//",
318 "-//o'reilly and associates//dtd html extended relaxed 1.0//",
319 "-//sq//dtd html 2.0 hotmetal + extensions//",
320 "-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//",
321 "-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//",
322 "-//spyglass//dtd html 2.0 extended//",
323 "-//sun microsystems corp.//dtd hotjava html//",
324 "-//sun microsystems corp.//dtd hotjava strict html//",
325 "-//w3c//dtd html 3 1995-03-24//",
326 "-//w3c//dtd html 3.2 draft//",
327 "-//w3c//dtd html 3.2 final//",
328 "-//w3c//dtd html 3.2//",
329 "-//w3c//dtd html 3.2s draft//",
330 "-//w3c//dtd html 4.0 frameset//",
331 "-//w3c//dtd html 4.0 transitional//",
332 "-//w3c//dtd html experimental 19960712//",
333 "-//w3c//dtd html experimental 970421//",
334 "-//w3c//dtd w3 html//",
335 "-//w3o//dtd w3 html 3.0//",
336 "-//webtechs//dtd mozilla html 2.0//",
337 "-//webtechs//dtd mozilla html//",
340 // These are the character references that don't need a terminating semicolon
341 // min length: 2, max: 6, none are a prefix of any other.
343 Aacute: 'Á', aacute: 'á', Acirc: 'Â', acirc: 'â', acute: '´', AElig: 'Æ',
344 aelig: 'æ', Agrave: 'À', agrave: 'à', AMP: '&', amp: '&', Aring: 'Å',
345 aring: 'å', Atilde: 'Ã', atilde: 'ã', Auml: 'Ä', auml: 'ä', brvbar: '¦',
346 Ccedil: 'Ç', ccedil: 'ç', cedil: '¸', cent: '¢', COPY: '©', copy: '©',
347 curren: '¤', deg: '°', divide: '÷', Eacute: 'É', eacute: 'é', Ecirc: 'Ê',
348 ecirc: 'ê', Egrave: 'È', egrave: 'è', ETH: 'Ð', eth: 'ð', Euml: 'Ë',
349 euml: 'ë', frac12: '½', frac14: '¼', frac34: '¾', GT: '>', gt: '>',
350 Iacute: 'Í', iacute: 'í', Icirc: 'Î', icirc: 'î', iexcl: '¡', Igrave: 'Ì',
351 igrave: 'ì', iquest: '¿', Iuml: 'Ï', iuml: 'ï', laquo: '«', LT: '<',
352 lt: '<', macr: '¯', micro: 'µ', middot: '·', nbsp: "\u00a0", not: '¬',
353 Ntilde: 'Ñ', ntilde: 'ñ', Oacute: 'Ó', oacute: 'ó', Ocirc: 'Ô', ocirc: 'ô',
354 Ograve: 'Ò', ograve: 'ò', ordf: 'ª', ordm: 'º', Oslash: 'Ø', oslash: 'ø',
355 Otilde: 'Õ', otilde: 'õ', Ouml: 'Ö', ouml: 'ö', para: '¶', plusmn: '±',
356 pound: '£', QUOT: '"', quot: '"', raquo: '»', REG: '®', reg: '®', sect: '§',
357 shy: '', sup1: '¹', sup2: '²', sup3: '³', szlig: 'ß', THORN: 'Þ', thorn: 'þ',
358 times: '×', Uacute: 'Ú', uacute: 'ú', Ucirc: 'Û', ucirc: 'û', Ugrave: 'Ù',
359 ugrave: 'ù', uml: '¨', Uuml: 'Ü', uuml: 'ü', Yacute: 'Ý', yacute: 'ý',
363 //void_elements = ['area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input', 'keygen', 'link', 'meta', 'param', 'source', 'track', 'wbr']
364 //raw_text_elements = ['script', 'style']
365 //escapable_raw_text_elements = ['textarea', 'title']
366 // http://www.w3.org/TR/SVG/ 1.1 (Second Edition)
368 'a', 'altGlyph', 'altGlyphDef', 'altGlyphItem', 'animate', 'animateColor',
369 'animateMotion', 'animateTransform', 'circle', 'clipPath', 'color-profile',
370 'cursor', 'defs', 'desc', 'ellipse', 'feBlend', 'feColorMatrix',
371 'feComponentTransfer', 'feComposite', 'feConvolveMatrix',
372 'feDiffuseLighting', 'feDisplacementMap', 'feDistantLight', 'feFlood',
373 'feFuncA', 'feFuncB', 'feFuncG', 'feFuncR', 'feGaussianBlur', 'feImage',
374 'feMerge', 'feMergeNode', 'feMorphology', 'feOffset', 'fePointLight',
375 'feSpecularLighting', 'feSpotLight', 'feTile', 'feTurbulence', 'filter',
376 'font', 'font-face', 'font-face-format', 'font-face-name', 'font-face-src',
377 'font-face-uri', 'foreignObject', 'g', 'glyph', 'glyphRef', 'hkern',
378 'image', 'line', 'linearGradient', 'marker', 'mask', 'metadata',
379 'missing-glyph', 'mpath', 'path', 'pattern', 'polygon', 'polyline',
380 'radialGradient', 'rect', 'script', 'set', 'stop', 'style', 'svg',
381 'switch', 'symbol', 'text', 'textPath', 'title', 'tref', 'tspan', 'use',
385 // http://www.w3.org/TR/MathML/ Version 3.0 2nd Edition
387 'abs', 'and', 'annotation', 'annotation-xml', 'apply', 'approx', 'arccos',
388 'arccosh', 'arccot', 'arccoth', 'arccsc', 'arccsch', 'arcsec', 'arcsech',
389 'arcsin', 'arcsinh', 'arctan', 'arctanh', 'arg', 'bind', 'bvar', 'card',
390 'cartesianproduct', 'cbytes', 'ceiling', 'cerror', 'ci', 'cn', 'codomain',
391 'complexes', 'compose', 'condition', 'conjugate', 'cos', 'cosh', 'cot',
392 'coth', 'cs', 'csc', 'csch', 'csymbol', 'curl', 'declare', 'degree',
393 'determinant', 'diff', 'divergence', 'divide', 'domain',
394 'domainofapplication', 'emptyset', 'eq', 'equivalent', 'eulergamma',
395 'exists', 'exp', 'exponentiale', 'factorial', 'factorof', 'false', 'floor',
396 'fn', 'forall', 'gcd', 'geq', 'grad', 'gt', 'ident', 'image', 'imaginary',
397 'imaginaryi', 'implies', 'in', 'infinity', 'int', 'integers', 'intersect',
398 'interval', 'inverse', 'lambda', 'laplacian', 'lcm', 'leq', 'limit',
399 'list', 'ln', 'log', 'logbase', 'lowlimit', 'lt', 'maction', 'maligngroup',
400 'malignmark', 'math', 'matrix', 'matrixrow', 'max', 'mean', 'median',
401 'menclose', 'merror', 'mfenced', 'mfrac', 'mglyph', 'mi', 'mi', 'min',
402 'minus', 'mlabeledtr', 'mlongdiv', 'mmultiscripts', 'mn', 'mo', 'mode',
403 'moment', 'momentabout', 'mover', 'mpadded', 'mphantom', 'mprescripts',
404 'mroot', 'mrow', 'ms', 'mscarries', 'mscarry', 'msgroup', 'msline',
405 'mspace', 'msqrt', 'msrow', 'mstack', 'mstyle', 'msub', 'msubsup', 'msup',
406 'mtable', 'mtd', 'mtext', 'mtr', 'munder', 'munderover', 'naturalnumbers',
407 'neq', 'none', 'not', 'notanumber', 'notin', 'notprsubset', 'notsubset',
408 'or', 'otherwise', 'outerproduct', 'partialdiff', 'pi', 'piece',
409 'piecewise', 'plus', 'power', 'primes', 'product', 'prsubset', 'quotient',
410 'rationals', 'real', 'reals', 'reln', 'rem', 'root', 'scalarproduct',
411 'sdev', 'sec', 'sech', 'selector', 'semantics', 'sep', 'set', 'setdiff',
412 'share', 'sin', 'sinh', 'span', 'subset', 'sum', 'tan', 'tanh', 'tendsto',
413 'times', 'transpose', 'true', 'union', 'uplimit', 'variance', 'vector',
414 'vectorproduct', 'xor'
416 // foreign_elements = [svg_elements..., mathml_elements...]
417 //normal_elements = All other allowed HTML elements are normal elements.
421 address: NS_HTML, applet: NS_HTML, area: NS_HTML, article: NS_HTML,
422 aside: NS_HTML, base: NS_HTML, basefont: NS_HTML, bgsound: NS_HTML,
423 blockquote: NS_HTML, body: NS_HTML, br: NS_HTML, button: NS_HTML,
424 caption: NS_HTML, center: NS_HTML, col: NS_HTML, colgroup: NS_HTML, dd: NS_HTML,
425 details: NS_HTML, dir: NS_HTML, div: NS_HTML, dl: NS_HTML, dt: NS_HTML,
426 embed: NS_HTML, fieldset: NS_HTML, figcaption: NS_HTML, figure: NS_HTML,
427 footer: NS_HTML, form: NS_HTML, frame: NS_HTML, frameset: NS_HTML, h1: NS_HTML,
428 h2: NS_HTML, h3: NS_HTML, h4: NS_HTML, h5: NS_HTML, h6: NS_HTML, head: NS_HTML,
429 header: NS_HTML, hgroup: NS_HTML, hr: NS_HTML, html: NS_HTML, iframe: NS_HTML,
430 img: NS_HTML, input: NS_HTML, isindex: NS_HTML, li: NS_HTML, link: NS_HTML,
431 listing: NS_HTML, main: NS_HTML, marquee: NS_HTML,
433 menu: NS_HTML,menuitem: NS_HTML, // WHATWG adds these
435 meta: NS_HTML, nav: NS_HTML, noembed: NS_HTML, noframes: NS_HTML,
436 noscript: NS_HTML, object: NS_HTML, ol: NS_HTML, p: NS_HTML, param: NS_HTML,
437 plaintext: NS_HTML, pre: NS_HTML, script: NS_HTML, section: NS_HTML,
438 select: NS_HTML, source: NS_HTML, style: NS_HTML, summary: NS_HTML,
439 table: NS_HTML, tbody: NS_HTML, td: NS_HTML, template: NS_HTML,
440 textarea: NS_HTML, tfoot: NS_HTML, th: NS_HTML, thead: NS_HTML, title: NS_HTML,
441 tr: NS_HTML, track: NS_HTML, ul: NS_HTML, wbr: NS_HTML, xmp: NS_HTML,
444 mi: NS_MATHML, mo: NS_MATHML, mn: NS_MATHML, ms: NS_MATHML, mtext: NS_MATHML,
445 'annotation-xml': NS_MATHML,
448 foreignObject: NS_SVG, desc: NS_SVG, title: NS_SVG
451 formatting_elements = {
452 a: true, b: true, big: true, code: true, em: true, font: true, i: true,
453 nobr: true, s: true, small: true, strike: true, strong: true, tt: true,
457 mathml_text_integration = {
458 mi: NS_MATHML, mo: NS_MATHML, mn: NS_MATHML, ms: NS_MATHML, mtext: NS_MATHML
460 is_mathml_text_integration_point = function (el) {
461 return mathml_text_integration[el.name] === el.namespace
463 is_html_integration = function (el) { // DON'T PASS A TOKEN
464 if (el.namespace === NS_MATHML) {
465 if (el.name === 'annotation-xml') {
466 if (el.attrs.encoding != null) {
467 if (el.attrs.encoding.toLowerCase() === 'text/html') {
470 if (el.attrs.encoding.toLowerCase() === 'application/xhtml+xml') {
477 if (el.namespace === NS_SVG) {
478 if (el.name === 'foreignObject' || el.name === 'desc' || el.name === 'title') {
486 h1: NS_HTML, h2: NS_HTML, h3: NS_HTML, h4: NS_HTML, h5: NS_HTML, h6: NS_HTML
489 foster_parenting_targets = {
510 el_is_special = function (e) {
511 return special_elements[e.name] === e.namespace
514 adp_els = { address: NS_HTML, div: NS_HTML, p: NS_HTML }
515 el_is_special_not_adp = function (el) {
516 return special_elements[el.name] === el.namespace && adp_els[el.name] !== el.namespace
520 altglyph: 'altGlyph',
521 altglyphdef: 'altGlyphDef',
522 altglyphitem: 'altGlyphItem',
523 animatecolor: 'animateColor',
524 animatemotion: 'animateMotion',
525 animatetransform: 'animateTransform',
526 clippath: 'clipPath',
528 fecolormatrix: 'feColorMatrix',
529 fecomponenttransfer: 'feComponentTransfer',
530 fecomposite: 'feComposite',
531 feconvolvematrix: 'feConvolveMatrix',
532 fediffuselighting: 'feDiffuseLighting',
533 fedisplacementmap: 'feDisplacementMap',
534 fedistantlight: 'feDistantLight',
535 fedropshadow: 'feDropShadow',
541 fegaussianblur: 'feGaussianBlur',
544 femergenode: 'feMergeNode',
545 femorphology: 'feMorphology',
546 feoffset: 'feOffset',
547 fepointlight: 'fePointLight',
548 fespecularlighting: 'feSpecularLighting',
549 fespotlight: 'feSpotLight',
551 feturbulence: 'feTurbulence',
552 foreignobject: 'foreignObject',
553 glyphref: 'glyphRef',
554 lineargradient: 'linearGradient',
555 radialgradient: 'radialGradient',
558 svg_attribute_fixes = {
559 attributename: 'attributeName',
560 attributetype: 'attributeType',
561 basefrequency: 'baseFrequency',
562 baseprofile: 'baseProfile',
563 calcmode: 'calcMode',
564 clippathunits: 'clipPathUnits',
565 contentscripttype: 'contentScriptType',
566 contentstyletype: 'contentStyleType',
567 diffuseconstant: 'diffuseConstant',
568 edgemode: 'edgeMode',
569 externalresourcesrequired: 'externalResourcesRequired',
570 // WHATWG removes this: filterres: 'filterRes',
571 filterunits: 'filterUnits',
572 glyphref: 'glyphRef',
573 gradienttransform: 'gradientTransform',
574 gradientunits: 'gradientUnits',
575 kernelmatrix: 'kernelMatrix',
576 kernelunitlength: 'kernelUnitLength',
577 keypoints: 'keyPoints',
578 keysplines: 'keySplines',
579 keytimes: 'keyTimes',
580 lengthadjust: 'lengthAdjust',
581 limitingconeangle: 'limitingConeAngle',
582 markerheight: 'markerHeight',
583 markerunits: 'markerUnits',
584 markerwidth: 'markerWidth',
585 maskcontentunits: 'maskContentUnits',
586 maskunits: 'maskUnits',
587 numoctaves: 'numOctaves',
588 pathlength: 'pathLength',
589 patterncontentunits: 'patternContentUnits',
590 patterntransform: 'patternTransform',
591 patternunits: 'patternUnits',
592 pointsatx: 'pointsAtX',
593 pointsaty: 'pointsAtY',
594 pointsatz: 'pointsAtZ',
595 preservealpha: 'preserveAlpha',
596 preserveaspectratio: 'preserveAspectRatio',
597 primitiveunits: 'primitiveUnits',
600 repeatcount: 'repeatCount',
601 repeatdur: 'repeatDur',
602 requiredextensions: 'requiredExtensions',
603 requiredfeatures: 'requiredFeatures',
604 specularconstant: 'specularConstant',
605 specularexponent: 'specularExponent',
606 spreadmethod: 'spreadMethod',
607 startoffset: 'startOffset',
608 stddeviation: 'stdDeviation',
609 stitchtiles: 'stitchTiles',
610 surfacescale: 'surfaceScale',
611 systemlanguage: 'systemLanguage',
612 tablevalues: 'tableValues',
615 textlength: 'textLength',
617 viewtarget: 'viewTarget',
618 xchannelselector: 'xChannelSelector',
619 ychannelselector: 'yChannelSelector',
620 zoomandpan: 'zoomAndPan'
622 foreign_attr_fixes = {
623 'xlink:actuate': 'xlink actuate',
624 'xlink:arcrole': 'xlink arcrole',
625 'xlink:href': 'xlink href',
626 'xlink:role': 'xlink role',
627 'xlink:show': 'xlink show',
628 'xlink:title': 'xlink title',
629 'xlink:type': 'xlink type',
630 'xml:base': 'xml base',
631 'xml:lang': 'xml lang',
632 'xml:space': 'xml space',
634 'xmlns:xlink': 'xmlns xlink'
636 adjust_mathml_attributes = function (t) {
638 for (i = 0; i < t.attrs_a.length; ++i) {
640 if (a[0] === 'definitionurl') {
641 a[0] = 'definitionURL'
645 adjust_svg_attributes = function (t) {
647 for (i = 0; i < t.attrs_a.length; ++i) {
649 if (svg_attribute_fixes[a[0]] != null) {
650 a[0] = svg_attribute_fixes[a[0]]
654 adjust_foreign_attributes = function (t) {
657 for (i = 0; i < t.attrs_a.length; ++i) {
659 if (foreign_attr_fixes[a[0]] != null) {
660 a[0] = foreign_attr_fixes[a[0]]
665 // decode_named_char_ref()
667 // The list of named character references is _huge_ so if we're running in a
668 // browser, we get the browser to decode them, rather than increasing the code
669 // size to include the table.
670 if (context === 'module') {
671 _decode_named_char_ref = require('./parser_no_browser_helper.js')
673 decode_named_char_ref_el = document.createElement('textarea')
674 _decode_named_char_ref = function (txt) {
676 txt = "&" + txt + ";"
677 decode_named_char_ref_el.innerHTML = txt
678 decoded = decode_named_char_ref_el.value
679 if (decoded === txt) {
685 // Pass the name of a named entity _that has a terminating semicolon_
686 // Entities without terminating semicolons should use legacy_char_refs[]
687 // Do not include the "&" or ";" in your argument, eg pass "alpha"
688 decode_named_char_ref_cache = {}
689 decode_named_char_ref = function (txt) {
691 decoded = decode_named_char_ref_cache[txt]
692 if (decoded != null) {
695 decoded = _decode_named_char_ref(txt)
696 return decode_named_char_ref_cache[txt] = decoded
699 parse_html = function (args_html, args) {
700 var adjusted_current_node, adjusted_insertion_location, adoption_agency, afe, afe_push, afe_push_marker, button_scopers, clear_afe_to_marker, clear_stack_to_table_body_context, clear_stack_to_table_context, clear_stack_to_table_row_context, clear_to_table_body_stopers, clear_to_table_row_stopers, clear_to_table_stopers, close_p_element, close_p_if_in_button_scope, close_the_cell, context_element, cur, doc, eat_next_token_if_newline, el_is_in_scope, flag_foster_parenting, flag_fragment_parsing, flag_frameset_ok, flag_parsing, flag_scripting, form_element_pointer, fragment_root, generate_implied_end_tags, has_color_face_or_size, head_element_pointer, in_body_any_other_end_tag, in_foreign_content, in_foreign_content_end_script, in_foreign_content_other_start, ins_mode, ins_mode_after_after_body, ins_mode_after_after_frameset, ins_mode_after_body, ins_mode_after_frameset, ins_mode_after_head, ins_mode_after_head_else, ins_mode_before_head, ins_mode_before_html, ins_mode_in_body, ins_mode_in_caption, ins_mode_in_cell, ins_mode_in_column_group, ins_mode_in_frameset, ins_mode_in_head, ins_mode_in_head_else, ins_mode_in_head_noscript, ins_mode_in_head_noscript_else, ins_mode_in_row, ins_mode_in_select, ins_mode_in_select_in_table, ins_mode_in_table, ins_mode_in_table_body, ins_mode_in_table_else, ins_mode_in_table_text, ins_mode_in_template, ins_mode_initial, ins_mode_text, insert_character, insert_comment, insert_foreign_element, insert_html_element, is_appropriate_end_tag, is_in_button_scope, is_in_li_scope, is_in_scope, is_in_scope_x, is_in_scope_x_y, is_in_select_scope, is_in_table_scope, is_quirks_limited_doctype, is_quirks_yes_doctype, li_scopers, open_els, original_ins_mode, parse_character_reference, parse_error, parse_generic_raw_text, parse_generic_rcdata_text, parse_init, parse_main_loop, pending_table_character_tokens, process_token, reconstruct_afe, ref, reset_ins_mode, standard_scopers, stop_parsing, table_scopers, template_ins_modes, template_tag_is_open, temporary_buffer, tok_cur_tag, tok_state, tok_state_after_attribute_name, tok_state_after_attribute_value_quoted, tok_state_after_doctype_name, tok_state_after_doctype_public_identifier, tok_state_after_doctype_public_keyword, tok_state_after_doctype_system_identifier, tok_state_after_doctype_system_keyword, tok_state_attribute_name, tok_state_attribute_value_double_quoted, tok_state_attribute_value_single_quoted, tok_state_attribute_value_unquoted, tok_state_before_attribute_name, tok_state_before_attribute_value, tok_state_before_doctype_name, tok_state_before_doctype_public_identifier, tok_state_before_doctype_system_identifier, tok_state_between_doctype_public_and_system_identifiers, tok_state_bogus_comment, tok_state_bogus_doctype, tok_state_cdata_section, tok_state_comment, tok_state_comment_end, tok_state_comment_end_bang, tok_state_comment_end_dash, tok_state_comment_start, tok_state_comment_start_dash, tok_state_data, tok_state_doctype, tok_state_doctype_name, tok_state_doctype_public_identifier_double_quoted, tok_state_doctype_public_identifier_single_quoted, tok_state_doctype_system_identifier_double_quoted, tok_state_doctype_system_identifier_single_quoted, tok_state_end_tag_open, tok_state_markup_declaration_open, tok_state_plaintext, tok_state_rawtext, tok_state_rawtext_end_tag_name, tok_state_rawtext_end_tag_open, tok_state_rawtext_less_than_sign, tok_state_rcdata, tok_state_rcdata_end_tag_name, tok_state_rcdata_end_tag_open, tok_state_rcdata_less_than_sign, tok_state_script_data, tok_state_script_data_double_escape_end, tok_state_script_data_double_escape_start, tok_state_script_data_double_escaped, tok_state_script_data_double_escaped_dash, tok_state_script_data_double_escaped_dash_dash, tok_state_script_data_double_escaped_less_than_sign, tok_state_script_data_end_tag_name, tok_state_script_data_end_tag_open, tok_state_script_data_escape_start, tok_state_script_data_escape_start_dash, tok_state_script_data_escaped, tok_state_script_data_escaped_dash, tok_state_script_data_escaped_dash_dash, tok_state_script_data_escaped_end_tag_name, tok_state_script_data_escaped_end_tag_open, tok_state_script_data_escaped_less_than_sign, tok_state_script_data_less_than_sign, tok_state_self_closing_start_tag, tok_state_tag_name, tok_state_tag_open, token_to_element, txt
705 cur = null // index of next char in txt to be parsed
706 // declare doc and tokenizer variables so they're in scope below
708 open_els = null // stack of open elements
709 afe = null // active formatting elements
710 template_ins_modes = null
712 original_ins_mode = null
714 tok_cur_tag = null // partially parsed tag
715 flag_scripting = null
716 flag_frameset_ok = null
718 flag_foster_parenting = null
719 form_element_pointer = null
720 temporary_buffer = null
721 pending_table_character_tokens = null
722 head_element_pointer = null
723 flag_fragment_parsing = null
724 context_element = null
726 stop_parsing = function () {
730 parse_error = function () {
731 if (args.error_cb != null) {
736 // http://www.w3.org/TR/html5/syntax.html#push-onto-the-list-of-active-formatting-elements
737 // "Noah's Ark clause" but with three
738 afe_push = function (new_el) {
739 var attrs_match, el, i, j, k, len, matches, ref, ref1, v
741 for (i = 0; i < afe.length; ++i) {
743 if (el.type === TYPE_AFE_MARKER) {
746 if (el.name === new_el.name && el.namespace === new_el.namespace) {
748 for (k in el.attrs) {
750 if (new_el.attrs[k] !== v) {
756 for (k in new_el.attrs) {
758 if (el.attrs[k] !== v) {
776 afe_push_marker = function () {
777 afe.unshift(new_afe_marker())
780 // the functions below impliment the Tree Contstruction algorithm
781 // http://www.w3.org/TR/html5/syntax.html#tree-construction
783 // But first... the helpers
784 template_tag_is_open = function () {
786 for (i = 0; i < open_els.length; ++i) {
788 if (el.name === 'template' && el.namespace === NS_HTML) {
794 is_in_scope_x = function (tag_name, scope, namespace) {
796 for (i = 0; i < open_els.length; ++i) {
798 if (el.name === tag_name && (namespace === null || namespace === el.namespace)) {
801 if (scope[el.name] === el.namespace) {
807 is_in_scope_x_y = function (tag_name, scope, scope2, namespace) {
809 for (i = 0; i < open_els.length; ++i) {
811 if (el.name === tag_name && (namespace === null || namespace === el.namespace)) {
814 if (scope[el.name] === el.namespace) {
817 if (scope2[el.name] === el.namespace) {
824 applet: NS_HTML, caption: NS_HTML, html: NS_HTML, table: NS_HTML,
825 td: NS_HTML, th: NS_HTML, marquee: NS_HTML, object: NS_HTML,
828 mi: NS_MATHML, mo: NS_MATHML, mn: NS_MATHML, ms: NS_MATHML,
829 mtext: NS_MATHML, 'annotation-xml': NS_MATHML,
831 foreignObject: NS_SVG, desc: NS_SVG, title: NS_SVG
833 button_scopers = { button: NS_HTML }
834 li_scopers = { ol: NS_HTML, ul: NS_HTML }
835 table_scopers = { html: NS_HTML, table: NS_HTML, template: NS_HTML }
836 is_in_scope = function (tag_name, namespace) {
837 if (namespace == null) {
840 return is_in_scope_x(tag_name, standard_scopers, namespace)
842 is_in_button_scope = function (tag_name, namespace) {
843 if (namespace == null) {
846 return is_in_scope_x_y(tag_name, standard_scopers, button_scopers, namespace)
848 is_in_table_scope = function (tag_name, namespace) {
849 if (namespace == null) {
852 return is_in_scope_x(tag_name, table_scopers, namespace)
854 // aka is_in_list_item_scope
855 is_in_li_scope = function (tag_name, namespace) {
856 if (namespace == null) {
859 return is_in_scope_x_y(tag_name, standard_scopers, li_scopers, namespace)
861 is_in_select_scope = function (tag_name, namespace) {
863 if (namespace == null) {
866 for (i = 0; i < open_els.length; ++i) {
868 if (t.name === tag_name && (namespace === null || namespace === t.namespace)) {
871 if (t.namespace !== NS_HTML && t.name !== 'optgroup' && t.name !== 'option') {
877 // this checks for a particular element, not by name
878 // this requires a namespace match
879 el_is_in_scope = function (needle) {
881 for (i = 0; i < open_els.length; ++i) {
886 if (standard_scopers[el.name] === el.namespace) {
893 clear_to_table_stopers = {
898 clear_stack_to_table_context = function () {
900 if (clear_to_table_stopers[open_els[0].name] != null) {
906 clear_to_table_body_stopers = {
913 clear_stack_to_table_body_context = function () {
915 if (clear_to_table_body_stopers[open_els[0].name] === open_els[0].namespace) {
921 clear_to_table_row_stopers = {
926 clear_stack_to_table_row_context = function () {
928 if (clear_to_table_row_stopers[open_els[0].name] != null) {
934 clear_afe_to_marker = function () {
937 if (!(afe.length > 0)) { // this happens in fragment case, ?spec error
941 if (el.type === TYPE_AFE_MARKER) {
948 // http://www.w3.org/TR/html5/syntax.html#reset-the-insertion-mode-appropriately
949 reset_ins_mode = function () {
950 var ancestor, ancestor_i, last, node, node_i
951 // 1. Let last be false.
953 // 2. Let node be the last node in the stack of open elements.
955 node = open_els[node_i]
956 // 3. Loop: If node is the first node in the stack of open elements,
957 // then set last to true, and, if the parser was originally created as
958 // part of the HTML fragment parsing algorithm (fragment case) set node
959 // to the context element.
961 if (node_i === open_els.length - 1) {
963 if (flag_fragment_parsing) {
964 node = context_element
967 // 4. If node is a select element, run these substeps:
968 if (node.name === 'select' && node.namespace === NS_HTML) {
969 // 1. If last is true, jump to the step below labeled done.
971 // 2. Let ancestor be node.
974 // 3. Loop: If ancestor is the first node in the stack of
975 // open elements, jump to the step below labeled done.
977 if (ancestor_i === open_els.length - 1) {
980 // 4. Let ancestor be the node before ancestor in the stack
983 ancestor = open_els[ancestor_i]
984 // 5. If ancestor is a template node, jump to the step below
986 if (ancestor.name === 'template' && ancestor.namespace === NS_HTML) {
989 // 6. If ancestor is a table node, switch the insertion mode
990 // to "in select in table" and abort these steps.
991 if (ancestor.name === 'table' && ancestor.namespace === NS_HTML) {
992 ins_mode = ins_mode_in_select_in_table
995 // 7. Jump back to the step labeled loop.
998 // 8. Done: Switch the insertion mode to "in select" and abort
1000 ins_mode = ins_mode_in_select
1003 // 5. If node is a td or th element and last is false, then switch
1004 // the insertion mode to "in cell" and abort these steps.
1005 if ((node.name === 'td' || node.name === 'th') && node.namespace === NS_HTML && last === false) {
1006 ins_mode = ins_mode_in_cell
1009 // 6. If node is a tr element, then switch the insertion mode to "in
1010 // row" and abort these steps.
1011 if (node.name === 'tr' && node.namespace === NS_HTML) {
1012 ins_mode = ins_mode_in_row
1015 // 7. If node is a tbody, thead, or tfoot element, then switch the
1016 // insertion mode to "in table body" and abort these steps.
1017 if ((node.name === 'tbody' || node.name === 'thead' || node.name === 'tfoot') && node.namespace === NS_HTML) {
1018 ins_mode = ins_mode_in_table_body
1021 // 8. If node is a caption element, then switch the insertion mode
1022 // to "in caption" and abort these steps.
1023 if (node.name === 'caption' && node.namespace === NS_HTML) {
1024 ins_mode = ins_mode_in_caption
1027 // 9. If node is a colgroup element, then switch the insertion mode
1028 // to "in column group" and abort these steps.
1029 if (node.name === 'colgroup' && node.namespace === NS_HTML) {
1030 ins_mode = ins_mode_in_column_group
1033 // 10. If node is a table element, then switch the insertion mode to
1034 // "in table" and abort these steps.
1035 if (node.name === 'table' && node.namespace === NS_HTML) {
1036 ins_mode = ins_mode_in_table
1039 // 11. If node is a template element, then switch the insertion mode
1040 // to the current template insertion mode and abort these steps.
1041 if (node.name === 'template' && node.namespace === NS_HTML) {
1042 ins_mode = template_ins_modes[0]
1045 // 12. If node is a head element and last is true, then switch the
1046 // insertion mode to "in body" ("in body"! not "in head"!) and abort
1047 // these steps. (fragment case)
1048 if (node.name === 'head' && node.namespace === NS_HTML && last) {
1049 ins_mode = ins_mode_in_body
1052 // 13. If node is a head element and last is false, then switch the
1053 // insertion mode to "in head" and abort these steps.
1054 if (node.name === 'head' && node.namespace === NS_HTML && last === false) {
1055 ins_mode = ins_mode_in_head
1058 // 14. If node is a body element, then switch the insertion mode to
1059 // "in body" and abort these steps.
1060 if (node.name === 'body' && node.namespace === NS_HTML) {
1061 ins_mode = ins_mode_in_body
1064 // 15. If node is a frameset element, then switch the insertion mode
1065 // to "in frameset" and abort these steps. (fragment case)
1066 if (node.name === 'frameset' && node.namespace === NS_HTML) {
1067 ins_mode = ins_mode_in_frameset
1070 // 16. If node is an html element, run these substeps:
1071 if (node.name === 'html' && node.namespace === NS_HTML) {
1072 // 1. If the head element pointer is null, switch the insertion
1073 // mode to "before head" and abort these steps. (fragment case)
1074 if (head_element_pointer === null) {
1075 ins_mode = ins_mode_before_head
1077 // 2. Otherwise, the head element pointer is not null,
1078 // switch the insertion mode to "after head" and abort these
1080 ins_mode = ins_mode_after_head
1084 // 17. If last is true, then switch the insertion mode to "in body"
1085 // and abort these steps. (fragment case)
1087 ins_mode = ins_mode_in_body
1090 // 18. Let node now be the node before node in the stack of open
1093 node = open_els[node_i]
1094 // 19. Return to the step labeled loop.
1100 // http://www.w3.org/TR/html5/syntax.html#adjusted-current-node
1101 adjusted_current_node = function () {
1102 if (open_els.length === 1 && flag_fragment_parsing) {
1103 return context_element
1108 // http://www.w3.org/TR/html5/syntax.html#reconstruct-the-active-formatting-elements
1109 // this implementation is structured (mostly) as described at the link above.
1110 // capitalized comments are the "labels" described at the link above.
1111 reconstruct_afe = function () {
1113 if (afe.length === 0) {
1116 if (afe[0].type === TYPE_AFE_MARKER || open_els.indexOf(afe[0]) >= 0) {
1122 if (i === afe.length - 1) {
1126 if (afe[i].type === TYPE_AFE_MARKER || open_els.indexOf(afe[i]) >= 0) {
1133 el = insert_html_element(afe[i].token)
1142 // http://www.w3.org/TR/html5/syntax.html#adoption-agency-algorithm
1143 // adoption agency algorithm
1145 // http://www.w3.org/TR/html5/syntax.html#misnested-tags:-b-i-/b-/i
1146 // http://www.w3.org/TR/html5/syntax.html#misnested-tags:-b-p-/b-/p
1147 // http://www.w3.org/TR/html5/syntax.html#unclosed-formatting-elements
1148 adoption_agency = function (subject) {
1149 var aa, ab, ac, ad, ae, af, bookmark, c, ca, dest, el, fb, fb_of_open_els, fe, fe_of_afe, fe_of_open_els, i, in_afe, in_open_els, inner, j, l, last_node, len, len1, len10, len11, len12, len13, len14, len15, len16, len17, len2, len3, len4, len5, len6, len7, len8, len9, m, n, new_node, node, node_above, node_in_afe, node_next, o, outer, q, r, ref, ref1, s, t, u, w, y, z
1150 // this block implements tha W3C spec
1151 // # 1. If the current node is an HTML element whose tag name is subject,
1152 // # then run these substeps:
1154 // # 1. Let element be the current node.
1156 // # 2. Pop element off the stack of open elements.
1158 // # 3. If element is also in the list of active formatting elements,
1159 // # remove the element from the list.
1161 // # 4. Abort the adoption agency algorithm.
1162 // if open_els[0].name is subject and open_els[0].namespace is NS_HTML
1163 // el = open_els.shift()
1164 // # remove it from the list of active formatting elements (if found)
1170 // WHATWG: https://html.spec.whatwg.org/multipage/syntax.html#adoption-agency-algorithm
1171 // If the current node is an HTML element whose tag name is subject, and
1172 // the current node is not in the list of active formatting elements,
1173 // then pop the current node off the stack of open elements, and abort
1175 if (open_els[0].name === subject && open_els[0].namespace === NS_HTML) {
1176 // remove it from the list of active formatting elements (if found)
1178 for (i = 0; i < afe.length; ++i) {
1180 if (el === open_els[0]) {
1198 // 5. Let formatting element be the last element in the list of
1199 // active formatting elements that: is between the end of the list
1200 // and the last scope marker in the list, if any, or the start of
1201 // the list otherwise, and has the tag name subject.
1203 for (fe_of_afe = 0; fe_of_afe < afe.length; ++fe_of_afe) {
1205 if (t.type === TYPE_AFE_MARKER) {
1208 if (t.name === subject) {
1213 // If there is no such element, then abort these steps and instead
1214 // act as described in the "any other end tag" entry above.
1216 in_body_any_other_end_tag(subject)
1219 // 6. If formatting element is not in the stack of open elements,
1220 // then this is a parse error; remove the element from the list, and
1221 // abort these steps.
1223 for (fe_of_open_els = 0; fe_of_open_els < open_els.length; ++fe_of_open_els) {
1224 t = open_els[fe_of_open_els]
1232 // "remove it from the list" must mean afe, since it's not in open_els
1233 afe.splice(fe_of_afe, 1)
1236 // 7. If formatting element is in the stack of open elements, but
1237 // the element is not in scope, then this is a parse error; abort
1239 if (!el_is_in_scope(fe)) {
1243 // 8. If formatting element is not the current node, this is a parse
1244 // error. (But do not abort these steps.)
1245 if (open_els[0] !== fe) {
1249 // 9. Let furthest block be the topmost node in the stack of open
1250 // elements that is lower in the stack than formatting element, and
1251 // is an element in the special category. There might not be one.
1253 fb_of_open_els = null
1254 for (i = 0; i < open_els.length; ++i) {
1259 if (el_is_special(t)) {
1262 // and continue, to see if there's one that's more "topmost"
1265 // 10. If there is no furthest block, then the UA must first pop all
1266 // the nodes from the bottom of the stack of open elements, from the
1267 // current node up to and including formatting element, then remove
1268 // formatting element from the list of active formatting elements,
1269 // and finally abort these steps.
1272 t = open_els.shift()
1274 afe.splice(fe_of_afe, 1)
1279 // 11. Let common ancestor be the element immediately above
1280 // formatting element in the stack of open elements.
1281 ca = open_els[fe_of_open_els + 1] // common ancestor
1283 node_above = open_els[fb_of_open_els + 1] // next node if node isn't in open_els anymore
1284 // 12. Let a bookmark note the position of formatting element in the list of active formatting elements relative to the elements on either side of it in the list.
1285 bookmark = new_aaa_bookmark()
1286 for (i = 0; i < afe.length; ++i) {
1289 afe.splice(i, 0, bookmark)
1293 node = last_node = fb
1297 // 3. Let node be the element immediately above node in the
1298 // stack of open elements, or if node is no longer in the stack
1299 // of open elements (e.g. because it got removed by this
1300 // algorithm), the element that was immediately above node in
1301 // the stack of open elements before node was removed.
1303 for (i = 0; i < open_els.length; ++i) {
1306 node_next = open_els[i + 1]
1310 node = node_next != null ? node_next : node_above
1311 // TODO make sure node_above gets re-set if/when node is removed from open_els
1313 // 4. If node is formatting element, then go to the next step in
1314 // the overall algorithm.
1318 // 5. If inner loop counter is greater than three and node is in
1319 // the list of active formatting elements, then remove node from
1320 // the list of active formatting elements.
1322 for (i = 0; i < afe.length; ++i) {
1333 // 6. If node is not in the list of active formatting elements,
1334 // then remove node from the stack of open elements and then go
1335 // back to the step labeled inner loop.
1337 for (i = 0; i < open_els.length; ++i) {
1340 node_above = open_els[i + 1]
1341 open_els.splice(i, 1)
1347 // 7. create an element for the token for which the element node
1348 // was created, in the HTML namespace, with common ancestor as
1349 // the intended parent; replace the entry for node in the list
1350 // of active formatting elements with an entry for the new
1351 // element, replace the entry for node in the stack of open
1352 // elements with an entry for the new element, and let node be
1354 new_node = token_to_element(node.token, NS_HTML, ca)
1355 for (i = 0; i < afe.length; ++i) {
1362 for (i = 0; i < open_els.length; ++i) {
1365 node_above = open_els[i + 1]
1366 open_els[i] = new_node
1371 // 8. If last node is furthest block, then move the
1372 // aforementioned bookmark to be immediately after the new node
1373 // in the list of active formatting elements.
1374 if (last_node === fb) {
1375 for (i = 0; i < afe.length; ++i) {
1377 if (t === bookmark) {
1382 for (i = 0; i < afe.length; ++i) {
1385 // "after" means lower
1386 afe.splice(i, 0, bookmark) // "after as <-
1391 // 9. Insert last node into node, first removing it from its
1392 // previous parent node if any.
1393 if (last_node.parent != null) {
1394 for (i = 0; i < last_node.parent.children.length; ++i) {
1395 c = last_node.parent.children[i]
1396 if (c === last_node) {
1397 last_node.parent.children.splice(i, 1)
1402 node.children.push(last_node)
1403 last_node.parent = node
1404 // 10. Let last node be node.
1406 // 11. Return to the step labeled inner loop.
1408 // 14. Insert whatever last node ended up being in the previous step
1409 // at the appropriate place for inserting a node, but using common
1410 // ancestor as the override target.
1412 // In the case where fe is immediately followed by fb:
1413 // * inner loop exits out early (node==fe)
1414 // * last_node is fb
1415 // * last_node is still in the tree (not a duplicate)
1416 if (last_node.parent != null) {
1417 for (i = 0; i < last_node.parent.children.length; ++i) {
1418 c = last_node.parent.children[i]
1419 if (c === last_node) {
1420 last_node.parent.children.splice(i, 1)
1425 // can't use standard insert token thing, because it's already in
1426 // open_els and must stay at it's current position in open_els
1427 dest = adjusted_insertion_location(ca)
1428 dest[0].children.splice(dest[1], 0, last_node)
1429 last_node.parent = dest[0]
1430 // 15. Create an element for the token for which formatting element
1431 // was created, in the HTML namespace, with furthest block as the
1433 new_element = token_to_element(fe.token, NS_HTML, fb)
1434 // 16. Take all of the child nodes of furthest block and append them
1435 // to the element created in the last step.
1436 while (fb.children.length) {
1437 t = fb.children.shift()
1438 t.parent = new_element
1439 new_element.children.push(t)
1441 // 17. Append that new element to furthest block.
1442 new_element.parent = fb
1443 fb.children.push(new_element)
1444 // 18. Remove formatting element from the list of active formatting
1445 // elements, and insert the new element into the list of active
1446 // formatting elements at the position of the aforementioned
1448 for (i = 0; i < afe.length; ++i) {
1455 for (i = 0; i < afe.length; ++i) {
1457 if (t === bookmark) {
1458 afe[i] = new_element
1462 // 19. Remove formatting element from the stack of open elements,
1463 // and insert the new element into the stack of open elements
1464 // immediately below the position of furthest block in that stack.
1465 for (i = 0; i < open_els.length; ++i) {
1468 open_els.splice(i, 1)
1472 for (i = 0; i < open_els.length; ++i) {
1475 open_els.splice(i, 0, new_element)
1479 // 20. Jump back to the step labeled outer loop.
1483 // http://www.w3.org/TR/html5/syntax.html#close-a-p-element
1484 close_p_element = function () {
1485 generate_implied_end_tags('p') // arg is exception
1486 if (!(open_els[0].name === 'p' && open_els[0].namespace === NS_HTML)) {
1489 while (open_els.length > 1) { // just in case
1490 el = open_els.shift()
1491 if (el.name === 'p' && el.namespace === NS_HTML) {
1496 close_p_if_in_button_scope = function () {
1497 if (is_in_button_scope('p', NS_HTML)) {
1502 // http://www.w3.org/TR/html5/syntax.html#insert-a-character
1503 // aka insert_a_character = function (t) {
1504 insert_character = function (t) {
1506 dest = adjusted_insertion_location()
1507 // fixfull check for Document node
1509 prev = dest[0].children[dest[1] - 1]
1510 if (prev.type === TYPE_TEXT) {
1515 dest[0].children.splice(dest[1], 0, t)
1519 // 8.2.5 http://www.w3.org/TR/html5/syntax.html#tree-construction
1520 process_token = function (t) {
1522 acn = adjusted_current_node()
1527 if (acn.namespace === NS_HTML) {
1531 if (is_mathml_text_integration_point(acn)) {
1532 if (t.type === TYPE_START_TAG && !(t.name === 'mglyph' || t.name === 'malignmark')) {
1536 if (t.type === TYPE_TEXT) {
1541 if (acn.namespace === NS_MATHML && acn.name === 'annotation-xml' && t.type === TYPE_START_TAG && t.name === 'svg') {
1545 if (is_html_integration(acn)) {
1546 if (t.type === TYPE_START_TAG || t.type === TYPE_TEXT) {
1551 if (t.type === TYPE_EOF) {
1555 in_foreign_content(t)
1559 // http://www.w3.org/TR/html5/syntax.html#creating-and-inserting-nodes
1560 // http://www.w3.org/TR/html5/syntax.html#appropriate-place-for-inserting-a-node
1561 adjusted_insertion_location = function (override_target) {
1562 var c, el, i, j, l, last_table, last_table_i, last_template, last_template_i, len, len1, len2, m, previous_element, ref, target, target_i
1563 // 1. If there was an override target specified, then let target be the
1565 if (override_target != null) {
1566 target = override_target
1567 } else { // Otherwise, let target be the current node.
1568 target = open_els[0]
1570 // 2. Determine the adjusted insertion location using the first matching
1571 // steps from the following list:
1573 // If foster parenting is enabled and target is a table, tbody, tfoot,
1574 // thead, or tr element Foster parenting happens when content is
1575 // misnested in tables.
1576 if (flag_foster_parenting && foster_parenting_targets[target.name] === target.namespace) {
1577 while (true) { // once. this is here so we can ``break`` to "abort these substeps"
1578 // 1. Let last template be the last template element in the
1579 // stack of open elements, if any.
1580 last_template = null
1581 last_template_i = null
1582 for (i = 0; i < open_els.length; ++i) {
1584 if (el.name === 'template' && el.namespace === NS_HTML) {
1590 // 2. Let last table be the last table element in the stack of
1591 // open elements, if any.
1594 for (i = 0; i < open_els.length; ++i) {
1596 if (el.name === 'table' && el.namespace === NS_HTML) {
1602 // 3. If there is a last template and either there is no last
1603 // table, or there is one, but last template is lower (more
1604 // recently added) than last table in the stack of open
1605 // elements, then: let adjusted insertion location be inside
1606 // last template's template contents, after its last child (if
1607 // any), and abort these substeps.
1608 if (last_template && (last_table === null || last_template_i < last_table_i)) {
1609 target = last_template // fixfull should be it's contents
1610 target_i = target.children.length
1613 // 4. If there is no last table, then let adjusted insertion
1614 // location be inside the first element in the stack of open
1615 // elements (the html element), after its last child (if any),
1616 // and abort these substeps. (fragment case)
1617 if (last_table === null) {
1619 target = open_els[open_els.length - 1]
1620 target_i = target.children.length
1623 // 5. If last table has a parent element, then let adjusted
1624 // insertion location be inside last table's parent element,
1625 // immediately before last table, and abort these substeps.
1626 if (last_table.parent != null) {
1627 for (i = 0; i < last_table.parent.children.length; ++i) {
1628 c = last_table.parent.children[i]
1629 if (c === last_table) {
1630 target = last_table.parent
1637 // 6. Let previous element be the element immediately above last
1638 // table in the stack of open elements.
1640 // huh? how could it not have a parent?
1641 previous_element = open_els[last_table_i + 1]
1642 // 7. Let adjusted insertion location be inside previous
1643 // element, after its last child (if any).
1644 target = previous_element
1645 target_i = target.children.length
1646 // Note: These steps are involved in part because it's possible
1647 // for elements, the table element in this case in particular,
1648 // to have been moved by a script around in the DOM, or indeed
1649 // removed from the DOM entirely, after the element was inserted
1651 break // don't really loop
1654 // Otherwise Let adjusted insertion location be inside target, after
1655 // its last child (if any).
1656 target_i = target.children.length
1659 // 3. If the adjusted insertion location is inside a template element,
1660 // let it instead be inside the template element's template contents,
1661 // after its last child (if any).
1662 // fixfull (template)
1664 // 4. Return the adjusted insertion location.
1665 return [target, target_i]
1668 // http://www.w3.org/TR/html5/syntax.html#create-an-element-for-the-token
1669 // aka create_an_element_for_token
1670 token_to_element = function (t, namespace, intended_parent) {
1672 // convert attributes into a hash
1674 for (i = 0; i < t.attrs_a.length; ++i) {
1676 attrs[a[0]] = a[1] // TODO check what to do with dupilcate attrs
1678 el = new Node(TYPE_TAG, {name: t.name, namespace: namespace, attrs: attrs, token: t})
1680 // TODO 2. If the newly created element has an xmlns attribute in the
1681 // XMLNS namespace whose value is not exactly the same as the element's
1682 // namespace, that is a parse error. Similarly, if the newly created
1683 // element has an xmlns:xlink attribute in the XMLNS namespace whose
1684 // value is not the XLink Namespace, that is a parse error.
1686 // fixfull: the spec says stuff about form pointers and ownerDocument
1691 // http://www.w3.org/TR/html5/syntax.html#insert-a-foreign-element
1692 insert_foreign_element = function (token, namespace) {
1693 var ail, ail_el, ail_i, el
1694 ail = adjusted_insertion_location()
1697 el = token_to_element(token, namespace, ail_el)
1698 // TODO skip this next step if it's broken (eg ail_el is document with child already)
1700 ail_el.children.splice(ail_i, 0, el)
1701 open_els.unshift(el)
1704 // http://www.w3.org/TR/html5/syntax.html#insert-an-html-element
1705 insert_html_element = function (token) {
1706 return insert_foreign_element(token, NS_HTML)
1709 // http://www.w3.org/TR/html5/syntax.html#insert-a-comment
1710 // position should be [node, index_within_children]
1711 insert_comment = function (t, position) {
1712 if (position == null) {
1713 position = adjusted_insertion_location()
1715 position[0].children.splice(position[1], 0, t)
1720 // http://www.w3.org/TR/html5/syntax.html#generic-raw-text-element-parsing-algorithm
1721 parse_generic_raw_text = function (t) {
1722 insert_html_element(t)
1723 tok_state = tok_state_rawtext
1724 original_ins_mode = ins_mode
1725 ins_mode = ins_mode_text
1727 parse_generic_rcdata_text = function (t) {
1728 insert_html_element(t)
1729 tok_state = tok_state_rcdata
1730 original_ins_mode = ins_mode
1731 ins_mode = ins_mode_text
1734 // 8.2.5.3 http://www.w3.org/TR/html5/syntax.html#closing-elements-that-have-implied-end-tags
1735 // http://www.w3.org/TR/html5/syntax.html#generate-implied-end-tags
1736 generate_implied_end_tags = function (except) {
1737 if (except == null) {
1740 while (end_tag_implied[open_els[0].name] === open_els[0].namespace && open_els[0].name !== except) {
1745 // 8.2.5.4 The rules for parsing tokens in HTML content
1746 // http://www.w3.org/TR/html5/syntax.html#parsing-main-inhtml
1748 // 8.2.5.4.1 The "initial" insertion mode
1749 // http://www.w3.org/TR/html5/syntax.html#the-initial-insertion-mode
1750 is_quirks_yes_doctype = function (t) {
1752 if (t.flag('force-quirks')) {
1755 if (t.name !== 'html') {
1758 if (t.public_identifier != null) {
1759 pi = t.public_identifier.toLowerCase()
1760 for (i = 0; i < quirks_yes_pi_prefixes.length; ++i) {
1761 p = quirks_yes_pi_prefixes[i]
1762 if (pi.substr(0, p.length) === p) {
1766 if (pi === '-//w3o//dtd w3 html strict 3.0//en//' || pi === '-/w3c/dtd html 4.0 transitional/en' || pi === 'html') {
1770 if (t.system_identifier != null) {
1771 if (t.system_identifier.toLowerCase() === 'http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd') {
1774 } else if (t.public_identifier != null) {
1775 // already did this: pi = t.public_identifier.toLowerCase()
1776 if (pi.substr(0, 32) === '-//w3c//dtd html 4.01 frameset//' || pi.substr(0, 36) === '-//w3c//dtd html 4.01 transitional//') {
1782 is_quirks_limited_doctype = function (t) {
1784 if (t.public_identifier != null) {
1785 pi = t.public_identifier.toLowerCase()
1786 if (pi.substr(0, 32) === '-//w3c//dtd xhtml 1.0 frameset//' || pi.substr(0, 36) === '-//w3c//dtd xhtml 1.0 transitional//') {
1789 if (t.system_identifier != null) {
1790 if (pi.substr(0, 32) === '-//w3c//dtd html 4.01 frameset//' || pi.substr(0, 36) === '-//w3c//dtd html 4.01 transitional//') {
1797 ins_mode_initial = function (t) {
1798 if (is_space_tok(t)) {
1801 if (t.type === TYPE_COMMENT) {
1803 doc.children.push(t)
1806 if (t.type === TYPE_DOCTYPE) {
1807 // fixfull syntax error from first paragraph and following bullets
1808 // fixfull set doc.doctype
1809 // fixfull is the "not an iframe srcdoc" thing relevant?
1810 if (is_quirks_yes_doctype(t)) {
1811 doc.flag('quirks mode', QUIRKS_YES)
1812 } else if (is_quirks_limited_doctype(t)) {
1813 doc.flag('quirks mode', QUIRKS_LIMITED)
1815 doc.children.push(t)
1816 ins_mode = ins_mode_before_html
1820 // fixfull not iframe srcdoc?
1822 doc.flag('quirks mode', QUIRKS_YES)
1823 ins_mode = ins_mode_before_html
1827 // 8.2.5.4.2 http://www.w3.org/TR/html5/syntax.html#the-before-html-insertion-mode
1828 ins_mode_before_html = function (t) {
1829 if (t.type === TYPE_DOCTYPE) {
1833 if (t.type === TYPE_COMMENT) {
1834 doc.children.push(t)
1837 if (is_space_tok(t)) {
1840 if (t.type === TYPE_START_TAG && t.name === 'html') {
1841 el = token_to_element(t, NS_HTML, doc)
1842 doc.children.push(el)
1844 open_els.unshift(el)
1845 // fixfull (big paragraph in spec about manifest, fragment, urls, etc)
1846 ins_mode = ins_mode_before_head
1849 if (t.type === TYPE_END_TAG) {
1850 if (t.name === 'head' || t.name === 'body' || t.name === 'html' || t.name === 'br') {
1851 // fall through to "anything else"
1858 el = token_to_element(new_open_tag('html'), NS_HTML, doc)
1859 doc.children.push(el)
1861 open_els.unshift(el)
1862 // ?fixfull browsing context
1863 ins_mode = ins_mode_before_head
1867 // 8.2.5.4.3 http://www.w3.org/TR/html5/syntax.html#the-before-head-insertion-mode
1868 ins_mode_before_head = function (t) {
1870 if (is_space_tok(t)) {
1873 if (t.type === TYPE_COMMENT) {
1877 if (t.type === TYPE_DOCTYPE) {
1881 if (t.type === TYPE_START_TAG && t.name === 'html') {
1885 if (t.type === TYPE_START_TAG && t.name === 'head') {
1886 el = insert_html_element(t)
1887 head_element_pointer = el
1888 ins_mode = ins_mode_in_head
1891 if (t.type === TYPE_END_TAG) {
1892 if (t.name === 'head' || t.name === 'body' || t.name === 'html' || t.name === 'br') {
1893 // fall through to Anything else below
1900 el = insert_html_element(new_open_tag('head'))
1901 head_element_pointer = el
1902 ins_mode = ins_mode_in_head
1906 // 8.2.5.4.4 http://www.w3.org/TR/html5/syntax.html#parsing-main-inhead
1907 ins_mode_in_head_else = function (t) { // factored out for same-as-spec flow control
1908 open_els.shift() // spec says this will be a 'head' node
1909 ins_mode = ins_mode_after_head
1912 ins_mode_in_head = function (t) {
1914 if (t.type === TYPE_TEXT && (t.text === "\t" || t.text === "\n" || t.text === "\u000c" || t.text === ' ')) {
1918 if (t.type === TYPE_COMMENT) {
1922 if (t.type === TYPE_DOCTYPE) {
1926 if (t.type === TYPE_START_TAG && t.name === 'html') {
1930 if (t.type === TYPE_START_TAG && (t.name === 'base' || t.name === 'basefont' || t.name === 'bgsound' || t.name === 'link')) {
1931 el = insert_html_element(t)
1933 t.acknowledge_self_closing()
1936 if (t.type === TYPE_START_TAG && t.name === 'meta') {
1937 el = insert_html_element(t)
1939 t.acknowledge_self_closing()
1940 // fixfull encoding stuff
1943 if (t.type === TYPE_START_TAG && t.name === 'title') {
1944 parse_generic_rcdata_text(t)
1947 if (t.type === TYPE_START_TAG && ((t.name === 'noscript' && flag_scripting) || t.name === 'noframes' || t.name === 'style')) {
1948 parse_generic_raw_text(t)
1951 if (t.type === TYPE_START_TAG && t.name === 'noscript' && flag_scripting === false) {
1952 insert_html_element(t)
1953 ins_mode = ins_mode_in_head_noscript
1956 if (t.type === TYPE_START_TAG && t.name === 'script') {
1957 ail = adjusted_insertion_location()
1958 el = token_to_element(t, NS_HTML, ail)
1959 el.flag('parser-inserted', true)
1960 // fixfull frament case
1961 ail[0].children.splice(ail[1], 0, el)
1962 open_els.unshift(el)
1963 tok_state = tok_state_script_data
1964 original_ins_mode = ins_mode // make sure orig... is defined
1965 ins_mode = ins_mode_text
1968 if (t.type === TYPE_END_TAG && t.name === 'head') {
1969 open_els.shift() // will be a head element... spec says so
1970 ins_mode = ins_mode_after_head
1973 if (t.type === TYPE_END_TAG && (t.name === 'body' || t.name === 'html' || t.name === 'br')) {
1974 ins_mode_in_head_else(t)
1977 if (t.type === TYPE_START_TAG && t.name === 'template') {
1978 insert_html_element(t)
1980 flag_frameset_ok = false
1981 ins_mode = ins_mode_in_template
1982 template_ins_modes.unshift(ins_mode_in_template)
1985 if (t.type === TYPE_END_TAG && t.name === 'template') {
1986 if (template_tag_is_open()) {
1987 generate_implied_end_tags
1988 if (open_els[0].name !== 'template') {
1992 el = open_els.shift()
1993 if (el.name === 'template' && el.namespace === NS_HTML) {
1997 clear_afe_to_marker()
1998 template_ins_modes.shift()
2005 if ((t.type === TYPE_START_TAG && t.name === 'head') || t.type === TYPE_END_TAG) {
2009 ins_mode_in_head_else(t)
2012 // 8.2.5.4.5 http://www.w3.org/TR/html5/syntax.html#parsing-main-inheadnoscript
2013 ins_mode_in_head_noscript_else = function (t) {
2016 ins_mode = ins_mode_in_head
2019 ins_mode_in_head_noscript = function (t) {
2020 if (t.type === TYPE_DOCTYPE) {
2024 if (t.type === TYPE_START_TAG && t.name === 'html') {
2028 if (t.type === TYPE_END_TAG && t.name === 'noscript') {
2030 ins_mode = ins_mode_in_head
2033 if (is_space_tok(t) || t.type === TYPE_COMMENT || (t.type === TYPE_START_TAG && (t.name === 'basefont' || t.name === 'bgsound' || t.name === 'link' || t.name === 'meta' || t.name === 'noframes' || t.name === 'style'))) {
2037 if (t.type === TYPE_END_TAG && t.name === 'br') {
2038 ins_mode_in_head_noscript_else(t)
2041 if ((t.type === TYPE_START_TAG && (t.name === 'head' || t.name === 'noscript')) || t.type === TYPE_END_TAG) {
2046 ins_mode_in_head_noscript_else(t)
2049 // 8.2.5.4.6 http://www.w3.org/TR/html5/syntax.html#the-after-head-insertion-mode
2050 ins_mode_after_head_else = function (t) {
2052 body_tok = new_open_tag('body')
2053 insert_html_element(body_tok)
2054 ins_mode = ins_mode_in_body
2057 ins_mode_after_head = function (t) {
2059 if (is_space_tok(t)) {
2063 if (t.type === TYPE_COMMENT) {
2067 if (t.type === TYPE_DOCTYPE) {
2071 if (t.type === TYPE_START_TAG && t.name === 'html') {
2075 if (t.type === TYPE_START_TAG && t.name === 'body') {
2076 insert_html_element(t)
2077 flag_frameset_ok = false
2078 ins_mode = ins_mode_in_body
2081 if (t.type === TYPE_START_TAG && t.name === 'frameset') {
2082 insert_html_element(t)
2083 ins_mode = ins_mode_in_frameset
2086 if (t.type === TYPE_START_TAG && (t.name === 'base' || t.name === 'basefont' || t.name === 'bgsound' || t.name === 'link' || t.name === 'meta' || t.name === 'noframes' || t.name === 'script' || t.name === 'style' || t.name === 'template' || t.name === 'title')) {
2088 open_els.unshift(head_element_pointer)
2090 for (i = 0; i < open_els.length; ++i) {
2092 if (el === head_element_pointer) {
2093 open_els.splice(i, 1)
2099 if (t.type === TYPE_END_TAG && t.name === 'template') {
2103 if (t.type === TYPE_END_TAG && (t.name === 'body' || t.name === 'html' || t.name === 'br')) {
2104 ins_mode_after_head_else(t)
2107 if ((t.type === TYPE_START_TAG && t.name === 'head') || t.type === TYPE_END_TAG) {
2112 ins_mode_after_head_else(t)
2115 // 8.2.5.4.7 http://www.w3.org/TR/html5/syntax.html#parsing-main-inbody
2116 in_body_any_other_end_tag = function (name) { // factored out because adoption agency calls it
2120 if (node.name === name && node.namespace === NS_HTML) {
2121 generate_implied_end_tags(name) // arg is exception
2122 if (node !== open_els[0]) {
2126 el = open_els.shift()
2132 if (special_elements[node.name] === node.namespace) {
2136 for (i = 0; i < open_els.length; ++i) {
2139 node = open_els[i + 1]
2145 ins_mode_in_body = function (t) {
2146 var a, aa, ab, ac, el, found, h_in_scope, i, input_el, j, l, len, len1, len10, len11, len12, len13, len14, len2, len3, len4, len5, len6, len7, len8, len9, m, n, node, o, ok_tags, prompt, q, r, ref, ref1, ref2, ref3, ref4, root_attrs, s, second, second_i, u, w, y, z
2147 if (t.type === TYPE_TEXT && t.text === "\u0000") {
2151 if (is_space_tok(t)) {
2156 if (t.type === TYPE_TEXT) {
2159 flag_frameset_ok = false
2162 if (t.type === TYPE_COMMENT) {
2166 if (t.type === TYPE_DOCTYPE) {
2170 if (t.type === TYPE_START_TAG && t.name === 'html') {
2172 if (template_tag_is_open()) {
2175 root_attrs = open_els[open_els.length - 1].attrs
2176 for (i = 0; i < t.attrs_a.length; ++i) {
2178 if (root_attrs[a[0]] == null) {
2179 root_attrs[a[0]] = a[1]
2185 if ((t.type === TYPE_START_TAG && (t.name === 'base' || t.name === 'basefont' || t.name === 'bgsound' || t.name === 'link' || t.name === 'meta' || t.name === 'noframes' || t.name === 'script' || t.name === 'style' || t.name === 'template' || t.name === 'title')) || (t.type === TYPE_END_TAG && t.name === 'template')) {
2189 if (t.type === TYPE_START_TAG && t.name === 'body') {
2191 if (open_els.length < 2) {
2194 second = open_els[open_els.length - 2]
2195 if (second.namespace !== NS_HTML) {
2198 if (second.name !== 'body') {
2201 if (template_tag_is_open()) {
2204 flag_frameset_ok = false
2205 for (i = 0; i < t.attrs_a.length; ++i) {
2207 if (second.attrs[a[0]] == null) {
2208 second.attrs[a[0]] = a[1]
2213 if (t.type === TYPE_START_TAG && t.name === 'frameset') {
2215 if (open_els.length < 2) {
2218 second_i = open_els.length - 2
2219 second = open_els[second_i]
2220 if (second.namespace !== NS_HTML) {
2223 if (second.name !== 'body') {
2226 if (flag_frameset_ok === false) {
2229 if (second.parent != null) {
2230 for (i = 0; i < second.parent.children.length; ++i) {
2231 el = second.parent.children[i]
2232 if (el === second) {
2233 second.parent.children.splice(i, 1)
2238 open_els.splice(second_i, 1)
2239 // pop everything except the "root html element"
2240 while (open_els.length > 1) {
2243 insert_html_element(t)
2244 ins_mode = ins_mode_in_frameset
2247 if (t.type === TYPE_EOF) {
2249 dd: NS_HTML, dt: NS_HTML, li: NS_HTML, p: NS_HTML, tbody: NS_HTML,
2250 td: NS_HTML, tfoot: NS_HTML, th: NS_HTML, thead: NS_HTML,
2251 tr: NS_HTML, body: NS_HTML, html: NS_HTML
2253 for (i = 0; i < open_els.length; ++i) {
2255 if (ok_tags[t.name] !== el.namespace) {
2260 if (template_ins_modes.length > 0) {
2261 ins_mode_in_template(t)
2267 if (t.type === TYPE_END_TAG && t.name === 'body') {
2268 if (!is_in_scope('body', NS_HTML)) {
2273 dd: NS_HTML, dt: NS_HTML, li: NS_HTML, optgroup: NS_HTML,
2274 option: NS_HTML, p: NS_HTML, rb: NS_HTML, rp: NS_HTML, rt: NS_HTML,
2275 rtc: NS_HTML, tbody: NS_HTML, td: NS_HTML, tfoot: NS_HTML,
2276 th: NS_HTML, thead: NS_HTML, tr: NS_HTML, body: NS_HTML,
2279 for (i = 0; i < open_els.length; ++i) {
2281 if (ok_tags[t.name] !== el.namespace) {
2286 ins_mode = ins_mode_after_body
2289 if (t.type === TYPE_END_TAG && t.name === 'html') {
2290 if (!is_in_scope('body', NS_HTML)) {
2295 dd: NS_HTML, dt: NS_HTML, li: NS_HTML, optgroup: NS_HTML,
2296 option: NS_HTML, p: NS_HTML, rb: NS_HTML, rp: NS_HTML, rt: NS_HTML,
2297 rtc: NS_HTML, tbody: NS_HTML, td: NS_HTML, tfoot: NS_HTML,
2298 th: NS_HTML, thead: NS_HTML, tr: NS_HTML, body: NS_HTML,
2301 for (i = 0; i < open_els.length; ++i) {
2303 if (ok_tags[t.name] !== el.namespace) {
2308 ins_mode = ins_mode_after_body
2312 if (t.type === TYPE_START_TAG && (t.name === 'address' || t.name === 'article' || t.name === 'aside' || t.name === 'blockquote' || t.name === 'center' || t.name === 'details' || t.name === 'dialog' || t.name === 'dir' || t.name === 'div' || t.name === 'dl' || t.name === 'fieldset' || t.name === 'figcaption' || t.name === 'figure' || t.name === 'footer' || t.name === 'header' || t.name === 'hgroup' || t.name === 'main' || t.name === 'nav' || t.name === 'ol' || t.name === 'p' || t.name === 'section' || t.name === 'summary' || t.name === 'ul')) {
2313 close_p_if_in_button_scope()
2314 insert_html_element(t)
2317 if (t.type === TYPE_START_TAG && (h_tags[t.name] != null)) { // extra perens because Coffeescript did that
2318 close_p_if_in_button_scope()
2319 if (h_tags[open_els[0].name] === open_els[0].namespace) {
2323 insert_html_element(t)
2326 if (t.type === TYPE_START_TAG && (t.name === 'pre' || t.name === 'listing')) {
2327 close_p_if_in_button_scope()
2328 insert_html_element(t)
2329 eat_next_token_if_newline()
2330 flag_frameset_ok = false
2333 if (t.type === TYPE_START_TAG && t.name === 'form') {
2334 if (!(form_element_pointer === null || template_tag_is_open())) {
2338 close_p_if_in_button_scope()
2339 el = insert_html_element(t)
2340 if (!template_tag_is_open()) {
2341 form_element_pointer = el
2345 if (t.type === TYPE_START_TAG && t.name === 'li') {
2346 flag_frameset_ok = false
2347 for (i = 0; i < open_els.length; ++i) {
2349 if (node.name === 'li' && node.namespace === NS_HTML) {
2350 generate_implied_end_tags('li') // arg is exception
2351 if (open_els[0].name !== 'li' || open_els[0].namespace !== NS_HTML) {
2355 el = open_els.shift()
2356 if (el.name === 'li' && el.namespace === NS_HTML) {
2362 if (el_is_special_not_adp(node)) {
2366 close_p_if_in_button_scope()
2367 insert_html_element(t)
2370 if (t.type === TYPE_START_TAG && (t.name === 'dd' || t.name === 'dt')) {
2371 flag_frameset_ok = false
2372 for (i = 0; i < open_els.length; ++i) {
2374 if (node.name === 'dd' && node.namespace === NS_HTML) {
2375 generate_implied_end_tags('dd') // arg is exception
2376 if (open_els[0].name !== 'dd' || open_els[0].namespace !== NS_HTML) {
2380 el = open_els.shift()
2381 if (el.name === 'dd' && el.namespace === NS_HTML) {
2387 if (node.name === 'dt' && node.namespace === NS_HTML) {
2388 generate_implied_end_tags('dt') // arg is exception
2389 if (open_els[0].name !== 'dt' || open_els[0].namespace !== NS_HTML) {
2393 el = open_els.shift()
2394 if (el.name === 'dt' && el.namespace === NS_HTML) {
2400 if (el_is_special_not_adp(node)) {
2404 close_p_if_in_button_scope()
2405 insert_html_element(t)
2408 if (t.type === TYPE_START_TAG && t.name === 'plaintext') {
2409 close_p_if_in_button_scope()
2410 insert_html_element(t)
2411 tok_state = tok_state_plaintext
2414 if (t.type === TYPE_START_TAG && t.name === 'button') {
2415 if (is_in_scope('button', NS_HTML)) {
2417 generate_implied_end_tags()
2419 el = open_els.shift()
2420 if (el.name === 'button' && el.namespace === NS_HTML) {
2426 insert_html_element(t)
2427 flag_frameset_ok = false
2430 if (t.type === TYPE_END_TAG && (t.name === 'address' || t.name === 'article' || t.name === 'aside' || t.name === 'blockquote' || t.name === 'button' || t.name === 'center' || t.name === 'details' || t.name === 'dialog' || t.name === 'dir' || t.name === 'div' || t.name === 'dl' || t.name === 'fieldset' || t.name === 'figcaption' || t.name === 'figure' || t.name === 'footer' || t.name === 'header' || t.name === 'hgroup' || t.name === 'listing' || t.name === 'main' || t.name === 'nav' || t.name === 'ol' || t.name === 'pre' || t.name === 'section' || t.name === 'summary' || t.name === 'ul')) {
2431 if (!is_in_scope(t.name, NS_HTML)) {
2435 generate_implied_end_tags()
2436 if (!(open_els[0].name === t.name && open_els[0].namespace === NS_HTML)) {
2440 el = open_els.shift()
2441 if (el.name === t.name && el.namespace === NS_HTML) {
2447 if (t.type === TYPE_END_TAG && t.name === 'form') {
2448 if (!template_tag_is_open()) {
2449 node = form_element_pointer
2450 form_element_pointer = null
2451 if (node === null || !el_is_in_scope(node)) {
2455 generate_implied_end_tags()
2456 if (open_els[0] !== node) {
2459 for (i = 0; i < open_els.length; ++i) {
2462 open_els.splice(i, 1)
2467 if (!is_in_scope('form', NS_HTML)) {
2471 generate_implied_end_tags()
2472 if (open_els[0].name !== 'form' || open_els[0].namespace !== NS_HTML) {
2476 el = open_els.shift()
2477 if (el.name === 'form' && el.namespace === NS_HTML) {
2484 if (t.type === TYPE_END_TAG && t.name === 'p') {
2485 if (!is_in_button_scope('p', NS_HTML)) {
2487 insert_html_element(new_open_tag('p'))
2492 if (t.type === TYPE_END_TAG && t.name === 'li') {
2493 if (!is_in_li_scope('li', NS_HTML)) {
2497 generate_implied_end_tags('li') // arg is exception
2498 if (open_els[0].name !== 'li' || open_els[0].namespace !== NS_HTML) {
2502 el = open_els.shift()
2503 if (el.name === 'li' && el.namespace === NS_HTML) {
2509 if (t.type === TYPE_END_TAG && (t.name === 'dd' || t.name === 'dt')) {
2510 if (!is_in_scope(t.name, NS_HTML)) {
2514 generate_implied_end_tags(t.name) // arg is exception
2515 if (open_els[0].name !== t.name || open_els[0].namespace !== NS_HTML) {
2519 el = open_els.shift()
2520 if (el.name === t.name && el.namespace === NS_HTML) {
2526 if (t.type === TYPE_END_TAG && (h_tags[t.name] != null)) { // extra perens because Coffeescript did
2528 for (i = 0; i < open_els.length; ++i) {
2530 if (h_tags[el.name] === el.namespace) {
2534 if (standard_scopers[el.name] === el.namespace) {
2542 generate_implied_end_tags()
2543 if (open_els[0].name !== t.name || open_els[0].namespace !== NS_HTML) {
2547 el = open_els.shift()
2548 if (h_tags[el.name] === el.namespace) {
2555 if (t.type === TYPE_START_TAG && t.name === 'a') {
2556 // If the list of active formatting elements contains an a element
2557 // between the end of the list and the last marker on the list (or
2558 // the start of the list if there is no marker on the list), then
2559 // this is a parse error; run the adoption agency algorithm for the
2560 // tag name "a", then remove that element from the list of active
2561 // formatting elements and the stack of open elements if the
2562 // adoption agency algorithm didn't already remove it (it might not
2563 // have if the element is not in table scope).
2565 for (i = 0; i < afe.length; ++i) {
2567 if (el.type === TYPE_AFE_MARKER) {
2570 if (el.name === 'a' && el.namespace === NS_HTML) {
2574 if (found != null) {
2576 adoption_agency('a')
2577 for (i = 0; i < afe.length; ++i) {
2583 for (i = 0; i < open_els.length; ++i) {
2586 open_els.splice(i, 1)
2591 el = insert_html_element(t)
2595 if (t.type === TYPE_START_TAG && (t.name === 'b' || t.name === 'big' || t.name === 'code' || t.name === 'em' || t.name === 'font' || t.name === 'i' || t.name === 's' || t.name === 'small' || t.name === 'strike' || t.name === 'strong' || t.name === 'tt' || t.name === 'u')) {
2597 el = insert_html_element(t)
2601 if (t.type === TYPE_START_TAG && t.name === 'nobr') {
2603 if (is_in_scope('nobr', NS_HTML)) {
2605 adoption_agency('nobr')
2608 el = insert_html_element(t)
2612 if (t.type === TYPE_END_TAG && (t.name === 'a' || t.name === 'b' || t.name === 'big' || t.name === 'code' || t.name === 'em' || t.name === 'font' || t.name === 'i' || t.name === 'nobr' || t.name === 's' || t.name === 'small' || t.name === 'strike' || t.name === 'strong' || t.name === 'tt' || t.name === 'u')) {
2613 adoption_agency(t.name)
2616 if (t.type === TYPE_START_TAG && (t.name === 'applet' || t.name === 'marquee' || t.name === 'object')) {
2618 insert_html_element(t)
2620 flag_frameset_ok = false
2623 if (t.type === TYPE_END_TAG && (t.name === 'applet' || t.name === 'marquee' || t.name === 'object')) {
2624 if (!is_in_scope(t.name, NS_HTML)) {
2628 generate_implied_end_tags()
2629 if (open_els[0].name !== t.name || open_els[0].namespace !== NS_HTML) {
2633 el = open_els.shift()
2634 if (el.name === t.name && el.namespace === NS_HTML) {
2638 clear_afe_to_marker()
2641 if (t.type === TYPE_START_TAG && t.name === 'table') {
2642 if (doc.flag('quirks mode') !== QUIRKS_YES) {
2643 close_p_if_in_button_scope() // test
2645 insert_html_element(t)
2646 flag_frameset_ok = false
2647 ins_mode = ins_mode_in_table
2650 if (t.type === TYPE_END_TAG && t.name === 'br') {
2652 // W3C: t.type = TYPE_START_TAG
2653 t = new_open_tag('br') // WHATWG
2656 if (t.type === TYPE_START_TAG && (t.name === 'area' || t.name === 'br' || t.name === 'embed' || t.name === 'img' || t.name === 'keygen' || t.name === 'wbr')) {
2658 insert_html_element(t)
2660 t.acknowledge_self_closing()
2661 flag_frameset_ok = false
2664 if (t.type === TYPE_START_TAG && t.name === 'input') {
2666 insert_html_element(t)
2668 t.acknowledge_self_closing()
2669 if (!is_input_hidden_tok(t)) {
2670 flag_frameset_ok = false
2674 if (t.type === TYPE_START_TAG && (t.name === 'menuitem' || t.name === 'param' || t.name === 'source' || t.name === 'track')) {
2675 // WHATWG adds 'menuitem' for this block
2676 insert_html_element(t)
2678 t.acknowledge_self_closing()
2681 if (t.type === TYPE_START_TAG && t.name === 'hr') {
2682 close_p_if_in_button_scope()
2683 insert_html_element(t)
2685 t.acknowledge_self_closing()
2686 flag_frameset_ok = false
2689 if (t.type === TYPE_START_TAG && t.name === 'image') {
2695 if (t.type === TYPE_START_TAG && t.name === 'isindex') {
2697 if (template_tag_is_open() === false && form_element_pointer !== null) {
2700 t.acknowledge_self_closing()
2701 flag_frameset_ok = false
2702 close_p_if_in_button_scope()
2703 el = insert_html_element(new_open_tag('form'))
2704 if (!template_tag_is_open()) {
2705 form_element_pointer = el
2707 for (i = 0; i < t.attrs_a.length; ++i) {
2709 if (a[0] === 'action') {
2710 el.attrs['action'] = a[1]
2714 insert_html_element(new_open_tag('hr'))
2717 insert_html_element(new_open_tag('label'))
2718 // note: this is a little out-of-spec-order so we only have to scan t.attrs_a once
2719 input_el = new_open_tag('input')
2721 for (i = 0; i < t.attrs_a.length; ++i) {
2723 if (a[0] === 'prompt') {
2726 if (a[0] !== 'name' && a[0] !== 'action' && a[0] !== 'prompt') {
2727 input_el.attrs_a.push([a[0], a[1]])
2730 input_el.attrs_a.push(['name', 'isindex'])
2731 // fixfull this next bit is in english... internationalize?
2732 if (prompt == null) {
2733 prompt = "This is a searchable index. Enter search keywords: "
2735 insert_character(new_character_token(prompt)) // fixfull split
2736 // TODO submit typo "balue" in spec
2737 insert_html_element(input_el)
2739 // insert_character('') // you can put chars here if prompt attr missing
2741 insert_html_element(new_open_tag('hr'))
2744 if (!template_tag_is_open()) {
2745 form_element_pointer = null
2749 if (t.type === TYPE_START_TAG && t.name === 'textarea') {
2750 insert_html_element(t)
2751 eat_next_token_if_newline()
2752 tok_state = tok_state_rcdata
2753 original_ins_mode = ins_mode
2754 flag_frameset_ok = false
2755 ins_mode = ins_mode_text
2758 if (t.type === TYPE_START_TAG && t.name === 'xmp') {
2759 close_p_if_in_button_scope()
2761 flag_frameset_ok = false
2762 parse_generic_raw_text(t)
2765 if (t.type === TYPE_START_TAG && t.name === 'iframe') {
2766 flag_frameset_ok = false
2767 parse_generic_raw_text(t)
2770 if (t.type === TYPE_START_TAG && (t.name === 'noembed' || (t.name === 'noscript' && flag_scripting))) {
2771 parse_generic_raw_text(t)
2774 if (t.type === TYPE_START_TAG && t.name === 'select') {
2776 insert_html_element(t)
2777 flag_frameset_ok = false
2778 if (ins_mode === ins_mode_in_table || ins_mode === ins_mode_in_caption || ins_mode === ins_mode_in_table_body || ins_mode === ins_mode_in_row || ins_mode === ins_mode_in_cell) {
2779 ins_mode = ins_mode_in_select_in_table
2781 ins_mode = ins_mode_in_select
2785 if (t.type === TYPE_START_TAG && (t.name === 'optgroup' || t.name === 'option')) {
2786 if (open_els[0].name === 'option' && open_els[0].namespace === NS_HTML) {
2790 insert_html_element(t)
2793 // this comment block implements the W3C spec
2794 // if t.type === TYPE_START_TAG && (t.name === 'rb' || t.name === 'rp' || t.name === 'rtc')
2795 // if is_in_scope 'ruby', NS_HTML
2796 // generate_implied_end_tags()
2797 // unless open_els[0].name === 'ruby' && open_els[0].namespace === NS_HTML
2799 // insert_html_element t
2801 // if t.type === TYPE_START_TAG && t.name === 'rt'
2802 // if is_in_scope 'ruby', NS_HTML
2803 // generate_implied_end_tags 'rtc' // arg === exception
2804 // unless (open_els[0].name === 'ruby' || open_els[0].name === 'rtc') && open_els[0].namespace === NS_HTML
2806 // insert_html_element t
2808 // below implements the WHATWG spec https://html.spec.whatwg.org/multipage/syntax.html#parsing-main-inbody
2809 if (t.type === TYPE_START_TAG && (t.name === 'rb' || t.name === 'rtc')) {
2810 if (is_in_scope('ruby', NS_HTML)) {
2811 generate_implied_end_tags()
2812 if (!(open_els[0].name === 'ruby' && open_els[0].namespace === NS_HTML)) {
2816 insert_html_element(t)
2819 if (t.type === TYPE_START_TAG && (t.name === 'rp' || t.name === 'rt')) {
2820 if (is_in_scope('ruby', NS_HTML)) {
2821 generate_implied_end_tags('rtc')
2822 if (!((open_els[0].name === 'ruby' || open_els[0].name === 'rtc') && open_els[0].namespace === NS_HTML)) {
2826 insert_html_element(t)
2830 if (t.type === TYPE_START_TAG && t.name === 'math') {
2832 adjust_mathml_attributes(t)
2833 adjust_foreign_attributes(t)
2834 insert_foreign_element(t, NS_MATHML)
2835 if (t.flag('self-closing')) {
2837 t.acknowledge_self_closing()
2841 if (t.type === TYPE_START_TAG && t.name === 'svg') {
2843 adjust_svg_attributes(t)
2844 adjust_foreign_attributes(t)
2845 insert_foreign_element(t, NS_SVG)
2846 if (t.flag('self-closing')) {
2848 t.acknowledge_self_closing()
2852 if (t.type === TYPE_START_TAG && (t.name === 'caption' || t.name === 'col' || t.name === 'colgroup' || t.name === 'frame' || t.name === 'head' || t.name === 'tbody' || t.name === 'td' || t.name === 'tfoot' || t.name === 'th' || t.name === 'thead' || t.name === 'tr')) {
2856 if (t.type === TYPE_START_TAG) { // any other start tag
2858 insert_html_element(t)
2861 if (t.type === TYPE_END_TAG) { // any other end tag
2862 in_body_any_other_end_tag(t.name)
2867 // 8.2.5.4.8 http://www.w3.org/TR/html5/syntax.html#parsing-main-incdata
2868 ins_mode_text = function (t) {
2869 if (t.type === TYPE_TEXT) {
2873 if (t.type === TYPE_EOF) {
2875 if (open_els[0].name === 'script' && open_els[0].namespace === NS_HTML) {
2876 open_els[0].flag('already started', true)
2879 ins_mode = original_ins_mode
2883 if (t.type === TYPE_END_TAG && t.name === 'script') {
2885 ins_mode = original_ins_mode
2886 // fixfull the spec seems to assume that I'm going to run the script
2887 // http://www.w3.org/TR/html5/syntax.html#scriptEndTag
2890 if (t.type === TYPE_END_TAG) {
2892 ins_mode = original_ins_mode
2897 // the functions below implement the tokenizer stats described here:
2898 // http://www.w3.org/TR/html5/syntax.html#tokenization
2900 // 8.2.5.4.9 http://www.w3.org/TR/html5/syntax.html#parsing-main-intable
2901 ins_mode_in_table_else = function (t) {
2903 flag_foster_parenting = true
2905 flag_foster_parenting = false
2907 ins_mode_in_table = function (t) {
2911 if ((open_els[0].name === 'table' || open_els[0].name === 'tbody' || open_els[0].name === 'tfoot' || open_els[0].name === 'thead' || open_els[0].name === 'tr') && open_els[0].namespace === NS_HTML) {
2912 pending_table_character_tokens = []
2913 original_ins_mode = ins_mode
2914 ins_mode = ins_mode_in_table_text
2917 ins_mode_in_table_else(t)
2926 case TYPE_START_TAG:
2929 clear_stack_to_table_context()
2931 insert_html_element(t)
2932 ins_mode = ins_mode_in_caption
2935 clear_stack_to_table_context()
2936 insert_html_element(t)
2937 ins_mode = ins_mode_in_column_group
2940 clear_stack_to_table_context()
2941 insert_html_element(new_open_tag('colgroup'))
2942 ins_mode = ins_mode_in_column_group
2948 clear_stack_to_table_context()
2949 insert_html_element(t)
2950 ins_mode = ins_mode_in_table_body
2955 clear_stack_to_table_context()
2956 insert_html_element(new_open_tag('tbody'))
2957 ins_mode = ins_mode_in_table_body
2962 if (is_in_table_scope('table', NS_HTML)) {
2964 el = open_els.shift()
2965 if (el.name === 'table' && el.namespace === NS_HTML) {
2979 if (!is_input_hidden_tok(t)) {
2980 ins_mode_in_table_else(t)
2983 el = insert_html_element(t)
2985 t.acknowledge_self_closing()
2990 if (form_element_pointer != null) {
2993 if (template_tag_is_open()) {
2996 form_element_pointer = insert_html_element(t)
3000 ins_mode_in_table_else(t)
3006 if (is_in_table_scope('table', NS_HTML)) {
3008 el = open_els.shift()
3009 if (el.name === 'table' && el.namespace === NS_HTML) {
3035 ins_mode_in_table_else(t)
3042 ins_mode_in_table_else(t)
3046 // 8.2.5.4.10 http://www.w3.org/TR/html5/syntax.html#parsing-main-intabletext
3047 ins_mode_in_table_text = function (t) {
3048 var all_space, i, l, m, old
3049 if (t.type === TYPE_TEXT && t.text === "\u0000") {
3054 if (t.type === TYPE_TEXT) {
3055 pending_table_character_tokens.push(t)
3060 for (i = 0; i < pending_table_character_tokens.length; ++i) {
3061 old = pending_table_character_tokens[i]
3062 if (!is_space_tok(old)) {
3068 for (i = 0; i < pending_table_character_tokens.length; ++i) {
3069 old = pending_table_character_tokens[i]
3070 insert_character(old)
3073 for (i = 0; i < pending_table_character_tokens.length; ++i) {
3074 old = pending_table_character_tokens[i]
3075 ins_mode_in_table_else(old)
3078 pending_table_character_tokens = []
3079 ins_mode = original_ins_mode
3083 // 8.2.5.4.11 http://www.w3.org/TR/html5/syntax.html#parsing-main-incaption
3084 ins_mode_in_caption = function (t) {
3086 if (t.type === TYPE_END_TAG && t.name === 'caption') {
3087 if (is_in_table_scope('caption', NS_HTML)) {
3088 generate_implied_end_tags()
3089 if (open_els[0].name !== 'caption') {
3093 el = open_els.shift()
3094 if (el.name === 'caption' && el.namespace === NS_HTML) {
3098 clear_afe_to_marker()
3099 ins_mode = ins_mode_in_table
3106 if ((t.type === TYPE_START_TAG && (t.name === 'caption' || t.name === 'col' || t.name === 'colgroup' || t.name === 'tbody' || t.name === 'td' || t.name === 'tfoot' || t.name === 'th' || t.name === 'thead' || t.name === 'tr')) || t.type === TYPE_END_TAG && t.name === 'table') {
3108 if (is_in_table_scope('caption', NS_HTML)) {
3110 el = open_els.shift()
3111 if (el.name === 'caption' && el.namespace === NS_HTML) {
3115 clear_afe_to_marker()
3116 ins_mode = ins_mode_in_table
3119 // else fragment case
3122 if (t.type === TYPE_END_TAG && (t.name === 'body' || t.name === 'col' || t.name === 'colgroup' || t.name === 'html' || t.name === 'tbody' || t.name === 'td' || t.name === 'tfoot' || t.name === 'th' || t.name === 'thead' || t.name === 'tr')) {
3130 // 8.2.5.4.12 http://www.w3.org/TR/html5/syntax.html#parsing-main-incolgroup
3131 ins_mode_in_column_group = function (t) {
3133 if (is_space_tok(t)) {
3137 if (t.type === TYPE_COMMENT) {
3141 if (t.type === TYPE_DOCTYPE) {
3145 if (t.type === TYPE_START_TAG && t.name === 'html') {
3149 if (t.type === TYPE_START_TAG && t.name === 'col') {
3150 el = insert_html_element(t)
3152 t.acknowledge_self_closing()
3155 if (t.type === TYPE_END_TAG && t.name === 'colgroup') {
3156 if (open_els[0].name === 'colgroup' && open_els.namespace === NS_HTML) {
3158 ins_mode = ins_mode_in_table
3164 if (t.type === TYPE_END_TAG && t.name === 'col') {
3168 if ((t.type === TYPE_START_TAG || t.type === TYPE_END_TAG) && t.name === 'template') {
3172 if (t.type === TYPE_EOF) {
3177 if (open_els[0].name !== 'colgroup') {
3182 ins_mode = ins_mode_in_table
3186 // 8.2.5.4.13 http://www.w3.org/TR/html5/syntax.html#parsing-main-intbody
3187 ins_mode_in_table_body = function (t) {
3189 if (t.type === TYPE_START_TAG && t.name === 'tr') {
3190 clear_stack_to_table_body_context()
3191 insert_html_element(t)
3192 ins_mode = ins_mode_in_row
3195 if (t.type === TYPE_START_TAG && (t.name === 'th' || t.name === 'td')) {
3197 clear_stack_to_table_body_context()
3198 insert_html_element(new_open_tag('tr'))
3199 ins_mode = ins_mode_in_row
3203 if (t.type === TYPE_END_TAG && (t.name === 'tbody' || t.name === 'tfoot' || t.name === 'thead')) {
3204 if (!is_in_table_scope(t.name, NS_HTML)) {
3208 clear_stack_to_table_body_context()
3210 ins_mode = ins_mode_in_table
3213 if ((t.type === TYPE_START_TAG && (t.name === 'caption' || t.name === 'col' || t.name === 'colgroup' || t.name === 'tbody' || t.name === 'tfoot' || t.name === 'thead')) || (t.type === TYPE_END_TAG && t.name === 'table')) {
3215 for (i = 0; i < open_els.length; ++i) {
3217 if (el.namespace === NS_HTML && (el.name === 'tbody' || el.name === 'tfoot' || el.name === 'thead')) {
3221 if (table_scopers[el.name] === el.namespace) {
3229 clear_stack_to_table_body_context()
3231 ins_mode = ins_mode_in_table
3235 if (t.type === TYPE_END_TAG && (t.name === 'body' || t.name === 'caption' || t.name === 'col' || t.name === 'colgroup' || t.name === 'html' || t.name === 'td' || t.name === 'th' || t.name === 'tr')) {
3240 ins_mode_in_table(t)
3243 // 8.2.5.4.14 http://www.w3.org/TR/html5/syntax.html#parsing-main-intr
3244 ins_mode_in_row = function (t) {
3245 if (t.type === TYPE_START_TAG && (t.name === 'th' || t.name === 'td')) {
3246 clear_stack_to_table_row_context()
3247 insert_html_element(t)
3248 ins_mode = ins_mode_in_cell
3252 if (t.type === TYPE_END_TAG && t.name === 'tr') {
3253 if (is_in_table_scope('tr', NS_HTML)) {
3254 clear_stack_to_table_row_context()
3256 ins_mode = ins_mode_in_table_body
3262 if ((t.type === TYPE_START_TAG && (t.name === 'caption' || t.name === 'col' || t.name === 'colgroup' || t.name === 'tbody' || t.name === 'tfoot' || t.name === 'thead' || t.name === 'tr')) || t.type === TYPE_END_TAG && t.name === 'table') {
3263 if (is_in_table_scope('tr', NS_HTML)) {
3264 clear_stack_to_table_row_context()
3266 ins_mode = ins_mode_in_table_body
3273 if (t.type === TYPE_END_TAG && (t.name === 'tbody' || t.name === 'tfoot' || t.name === 'thead')) {
3274 if (is_in_table_scope(t.name, NS_HTML)) {
3275 if (is_in_table_scope('tr', NS_HTML)) {
3276 clear_stack_to_table_row_context()
3278 ins_mode = ins_mode_in_table_body
3286 if (t.type === TYPE_END_TAG && (t.name === 'body' || t.name === 'caption' || t.name === 'col' || t.name === 'colgroup' || t.name === 'html' || t.name === 'td' || t.name === 'th')) {
3291 ins_mode_in_table(t)
3294 // http://www.w3.org/TR/html5/syntax.html#close-the-cell
3295 close_the_cell = function () {
3297 generate_implied_end_tags()
3298 if (!((open_els[0].name === 'td' || open_els[0] === 'th') && open_els[0].namespace === NS_HTML)) {
3302 el = open_els.shift()
3303 if (el.namespace === NS_HTML && (el.name === 'td' || el.name === 'th')) {
3307 clear_afe_to_marker()
3308 ins_mode = ins_mode_in_row
3311 // 8.2.5.4.15 http://www.w3.org/TR/html5/syntax.html#parsing-main-intd
3312 ins_mode_in_cell = function (t) {
3314 if (t.type === TYPE_END_TAG && (t.name === 'td' || t.name === 'th')) {
3315 if (is_in_table_scope(t.name, NS_HTML)) {
3316 generate_implied_end_tags()
3317 if (!((open_els[0].name === t.name) && open_els[0].namespace === NS_HTML)) {
3321 el = open_els.shift()
3322 if (el.name === t.name && el.namespace === NS_HTML) {
3326 clear_afe_to_marker()
3327 ins_mode = ins_mode_in_row
3333 if (t.type === TYPE_START_TAG && (t.name === 'caption' || t.name === 'col' || t.name === 'colgroup' || t.name === 'tbody' || t.name === 'td' || t.name === 'tfoot' || t.name === 'th' || t.name === 'thead' || t.name === 'tr')) {
3335 for (i = 0; i < open_els.length; ++i) {
3337 if (el.namespace === NS_HTML && (el.name === 'td' || el.name === 'th')) {
3341 if (table_scopers[el.name] === el.namespace) {
3353 if (t.type === TYPE_END_TAG && (t.name === 'body' || t.name === 'caption' || t.name === 'col' || t.name === 'colgroup' || t.name === 'html')) {
3357 if (t.type === TYPE_END_TAG && (t.name === 'table' || t.name === 'tbody' || t.name === 'tfoot' || t.name === 'thead' || t.name === 'tr')) {
3358 if (is_in_table_scope(t.name, NS_HTML)) {
3370 // 8.2.5.4.16 http://www.w3.org/TR/html5/syntax.html#parsing-main-inselect
3371 ins_mode_in_select = function (t) {
3373 if (t.type === TYPE_TEXT && t.text === "\u0000") {
3377 if (t.type === TYPE_TEXT) {
3381 if (t.type === TYPE_COMMENT) {
3385 if (t.type === TYPE_DOCTYPE) {
3389 if (t.type === TYPE_START_TAG && t.name === 'html') {
3393 if (t.type === TYPE_START_TAG && t.name === 'option') {
3394 if (open_els[0].name === 'option' && open_els[0].namespace === NS_HTML) {
3397 insert_html_element(t)
3400 if (t.type === TYPE_START_TAG && t.name === 'optgroup') {
3401 if (open_els[0].name === 'option' && open_els[0].namespace === NS_HTML) {
3404 if (open_els[0].name === 'optgroup' && open_els[0].namespace === NS_HTML) {
3407 insert_html_element(t)
3410 if (t.type === TYPE_END_TAG && t.name === 'optgroup') {
3411 if (open_els[0].name === 'option' && open_els[0].namespace === NS_HTML) {
3412 if (open_els[1].name === 'optgroup' && open_els[0].namespace === NS_HTML) {
3416 if (open_els[0].name === 'optgroup' && open_els[0].namespace === NS_HTML) {
3423 if (t.type === TYPE_END_TAG && t.name === 'option') {
3424 if (open_els[0].name === 'option' && open_els[0].namespace === NS_HTML) {
3431 if (t.type === TYPE_END_TAG && t.name === 'select') {
3432 if (is_in_select_scope('select', NS_HTML)) {
3434 el = open_els.shift()
3435 if (el.name === 'select' && el.namespace === NS_HTML) {
3445 if (t.type === TYPE_START_TAG && t.name === 'select') {
3448 el = open_els.shift()
3449 if (el.name === 'select' && el.namespace === NS_HTML) {
3454 // spec says that this is the same as </select> but it doesn't say
3455 // to check scope first
3458 if (t.type === TYPE_START_TAG && (t.name === 'input' || t.name === 'keygen' || t.name === 'textarea')) {
3460 if (!is_in_select_scope('select', NS_HTML)) {
3464 el = open_els.shift()
3465 if (el.name === 'select' && el.namespace === NS_HTML) {
3473 if (t.type === TYPE_START_TAG && (t.name === 'script' || t.name === 'template')) {
3477 if (t.type === TYPE_EOF) {
3485 // 8.2.5.4.17 http://www.w3.org/TR/html5/syntax.html#parsing-main-inselectintable
3486 ins_mode_in_select_in_table = function (t) {
3488 if (t.type === TYPE_START_TAG && (t.name === 'caption' || t.name === 'table' || t.name === 'tbody' || t.name === 'tfoot' || t.name === 'thead' || t.name === 'tr' || t.name === 'td' || t.name === 'th')) {
3491 el = open_els.shift()
3492 if (el.name === 'select' && el.namespace === NS_HTML) {
3500 if (t.type === TYPE_END_TAG && (t.name === 'caption' || t.name === 'table' || t.name === 'tbody' || t.name === 'tfoot' || t.name === 'thead' || t.name === 'tr' || t.name === 'td' || t.name === 'th')) {
3502 if (!is_in_table_scope(t.name, NS_HTML)) {
3506 el = open_els.shift()
3507 if (el.name === 'select' && el.namespace === NS_HTML) {
3516 ins_mode_in_select(t)
3519 // 8.2.5.4.18 http://www.w3.org/TR/html5/syntax.html#parsing-main-intemplate
3520 ins_mode_in_template = function (t) {
3522 if (t.type === TYPE_TEXT || t.type === TYPE_COMMENT || t.type === TYPE_DOCTYPE) {
3526 if ((t.type === TYPE_START_TAG && (t.name === 'base' || t.name === 'basefont' || t.name === 'bgsound' || t.name === 'link' || t.name === 'meta' || t.name === 'noframes' || t.name === 'script' || t.name === 'style' || t.name === 'template' || t.name === 'title')) || (t.type === TYPE_END_TAG && t.name === 'template')) {
3530 if (t.type === TYPE_START_TAG && (t.name === 'caption' || t.name === 'colgroup' || t.name === 'tbody' || t.name === 'tfoot' || t.name === 'thead')) {
3531 template_ins_modes.shift()
3532 template_ins_modes.unshift(ins_mode_in_table)
3533 ins_mode = ins_mode_in_table
3537 if (t.type === TYPE_START_TAG && t.name === 'col') {
3538 template_ins_modes.shift()
3539 template_ins_modes.unshift(ins_mode_in_column_group)
3540 ins_mode = ins_mode_in_column_group
3544 if (t.type === TYPE_START_TAG && t.name === 'tr') {
3545 template_ins_modes.shift()
3546 template_ins_modes.unshift(ins_mode_in_table_body)
3547 ins_mode = ins_mode_in_table_body
3551 if (t.type === TYPE_START_TAG && (t.name === 'td' || t.name === 'th')) {
3552 template_ins_modes.shift()
3553 template_ins_modes.unshift(ins_mode_in_row)
3554 ins_mode = ins_mode_in_row
3558 if (t.type === TYPE_START_TAG) {
3559 template_ins_modes.shift()
3560 template_ins_modes.unshift(ins_mode_in_body)
3561 ins_mode = ins_mode_in_body
3565 if (t.type === TYPE_END_TAG) {
3569 if (t.type === TYPE_EOF) {
3570 if (!template_tag_is_open()) {
3576 el = open_els.shift()
3577 if (el.name === 'template' && el.namespace === NS_HTML) {
3581 clear_afe_to_marker()
3582 template_ins_modes.shift()
3588 // 8.2.5.4.19 http://www.w3.org/TR/html5/syntax.html#parsing-main-afterbody
3589 ins_mode_after_body = function (t) {
3591 if (is_space_tok(t)) {
3595 if (t.type === TYPE_COMMENT) {
3596 first = open_els[open_els.length - 1]
3597 insert_comment(t, [first, first.children.length])
3600 if (t.type === TYPE_DOCTYPE) {
3604 if (t.type === TYPE_START_TAG && t.name === 'html') {
3608 if (t.type === TYPE_END_TAG && t.name === 'html') {
3609 if (flag_fragment_parsing) {
3613 ins_mode = ins_mode_after_after_body
3616 if (t.type === TYPE_EOF) {
3622 ins_mode = ins_mode_in_body
3626 // 8.2.5.4.20 http://www.w3.org/TR/html5/syntax.html#parsing-main-inframeset
3627 ins_mode_in_frameset = function (t) {
3628 if (is_space_tok(t)) {
3632 if (t.type === TYPE_COMMENT) {
3636 if (t.type === TYPE_DOCTYPE) {
3640 if (t.type === TYPE_START_TAG && t.name === 'html') {
3644 if (t.type === TYPE_START_TAG && t.name === 'frameset') {
3645 insert_html_element(t)
3648 if (t.type === TYPE_END_TAG && t.name === 'frameset') {
3649 if (open_els.length === 1) {
3651 return // fragment case
3654 if (flag_fragment_parsing === false && open_els[0].name !== 'frameset') {
3655 ins_mode = ins_mode_after_frameset
3659 if (t.type === TYPE_START_TAG && t.name === 'frame') {
3660 insert_html_element(t)
3662 t.acknowledge_self_closing()
3665 if (t.type === TYPE_START_TAG && t.name === 'noframes') {
3669 if (t.type === TYPE_EOF) {
3670 if (open_els.length !== 1) {
3680 // 8.2.5.4.21 http://www.w3.org/TR/html5/syntax.html#parsing-main-afterframeset
3681 ins_mode_after_frameset = function (t) {
3682 if (is_space_tok(t)) {
3686 if (t.type === TYPE_COMMENT) {
3690 if (t.type === TYPE_DOCTYPE) {
3694 if (t.type === TYPE_START_TAG && t.name === 'html') {
3698 if (t.type === TYPE_END_TAG && t.name === 'html') {
3699 ins_mode = ins_mode_after_after_frameset
3702 if (t.type === TYPE_START_TAG && t.name === 'noframes') {
3706 if (t.type === TYPE_EOF) {
3714 // 8.2.5.4.22 http://www.w3.org/TR/html5/syntax.html#the-after-after-body-insertion-mode
3715 ins_mode_after_after_body = function (t) {
3716 if (t.type === TYPE_COMMENT) {
3717 insert_comment(t, [doc, doc.children.length])
3720 if (t.type === TYPE_DOCTYPE || is_space_tok(t) || (t.type === TYPE_START_TAG && t.name === 'html')) {
3724 if (t.type === TYPE_EOF) {
3730 ins_mode = ins_mode_in_body
3734 // 8.2.5.4.23 http://www.w3.org/TR/html5/syntax.html#the-after-after-frameset-insertion-mode
3735 ins_mode_after_after_frameset = function (t) {
3736 if (t.type === TYPE_COMMENT) {
3737 insert_comment(t, [doc, doc.children.length])
3740 if (t.type === TYPE_DOCTYPE || is_space_tok(t) || (t.type === TYPE_START_TAG && t.name === 'html')) {
3744 if (t.type === TYPE_EOF) {
3748 if (t.type === TYPE_START_TAG && t.name === 'noframes') {
3757 // 8.2.5.5 http://www.w3.org/TR/html5/syntax.html#parsing-main-inforeign
3758 has_color_face_or_size = function (t) {
3760 for (i = 0; i < t.attrs_a.length; ++i) {
3762 if (a[0] === 'color' || a[0] === 'face' || a[0] === 'size') {
3768 in_foreign_content_end_script = function () {
3772 in_foreign_content_other_start = function (t) {
3774 acn = adjusted_current_node()
3775 if (acn.namespace === NS_MATHML) {
3776 adjust_mathml_attributes(t)
3778 if (acn.namespace === NS_SVG && (svg_name_fixes[t.name] != null)) { // extra perens because Coffeescript did
3779 t.name = svg_name_fixes[t.name]
3781 if (acn.namespace === NS_SVG) {
3782 adjust_svg_attributes(t)
3784 adjust_foreign_attributes(t)
3785 insert_foreign_element(t, acn.namespace)
3786 if (t.flag('self-closing')) {
3787 if (t.name === 'script') {
3788 t.acknowledge_self_closing()
3789 in_foreign_content_end_script()
3793 t.acknowledge_self_closing()
3797 in_foreign_content = function (t) {
3799 if (t.type === TYPE_TEXT && t.text === "\u0000") {
3801 insert_character(new_character_token("\ufffd"))
3804 if (is_space_tok(t)) {
3808 if (t.type === TYPE_TEXT) {
3809 flag_frameset_ok = false
3813 if (t.type === TYPE_COMMENT) {
3817 if (t.type === TYPE_DOCTYPE) {
3821 if (t.type === TYPE_START_TAG && (t.name === 'b' || t.name === 'big' || t.name === 'blockquote' || t.name === 'body' || t.name === 'br' || t.name === 'center' || t.name === 'code' || t.name === 'dd' || t.name === 'div' || t.name === 'dl' || t.name === 'dt' || t.name === 'em' || t.name === 'embed' || t.name === 'h1' || t.name === 'h2' || t.name === 'h3' || t.name === 'h4' || t.name === 'h5' || t.name === 'h6' || t.name === 'head' || t.name === 'hr' || t.name === 'i' || t.name === 'img' || t.name === 'li' || t.name === 'listing' || t.name === 'main' || t.name === 'meta' || t.name === 'nobr' || t.name === 'ol' || t.name === 'p' || t.name === 'pre' || t.name === 'ruby' || t.name === 's' || t.name === 'small' || t.name === 'span' || t.name === 'strong' || t.name === 'strike' || t.name === 'sub' || t.name === 'sup' || t.name === 'table' || t.name === 'tt' || t.name === 'u' || t.name === 'ul' || t.name === 'var' || (t.name === 'font' && has_color_face_or_size(t)))) {
3823 if (flag_fragment_parsing) {
3824 in_foreign_content_other_start(t)
3827 while (true) { // is this safe?
3829 if (is_mathml_text_integration_point(open_els[0]) || is_html_integration(open_els[0]) || open_els[0].namespace === NS_HTML) {
3836 if (t.type === TYPE_START_TAG) {
3837 in_foreign_content_other_start(t)
3840 if (t.type === TYPE_END_TAG && t.name === 'script' && open_els[0].name === 'script' && open_els[0].namespace === NS_SVG) {
3841 in_foreign_content_end_script()
3844 if (t.type === TYPE_END_TAG) {
3847 if (node.name.toLowerCase() !== t.name) {
3851 if (node === open_els[open_els.length - 1]) {
3854 if (node.name.toLowerCase() === t.name) {
3856 el = open_els.shift()
3864 if (node.namespace === NS_HTML) {
3868 ins_mode(t) // explicitly call HTML insertion mode
3873 // 8.2.4.1 http://www.w3.org/TR/html5/syntax.html#data-state
3874 tok_state_data = function () {
3876 switch (c = txt.charAt(cur++)) {
3878 return new_text_node(parse_character_reference())
3881 tok_state = tok_state_tag_open
3885 return new_text_node(c)
3888 return new_eof_token()
3891 return new_text_node(c)
3896 // 8.2.4.2 http://www.w3.org/TR/html5/syntax.html#character-reference-in-data-state
3897 // not needed: tok_state_character_reference_in_data = function () {
3898 // just call parse_character_reference()
3900 // 8.2.4.3 http://www.w3.org/TR/html5/syntax.html#rcdata-state
3901 tok_state_rcdata = function () {
3903 switch (c = txt.charAt(cur++)) {
3905 return new_text_node(parse_character_reference())
3908 tok_state = tok_state_rcdata_less_than_sign
3912 return new_character_token("\ufffd")
3915 return new_eof_token()
3918 return new_character_token(c)
3923 // 8.2.4.4 http://www.w3.org/TR/html5/syntax.html#character-reference-in-rcdata-state
3924 // not needed: tok_state_character_reference_in_rcdata = function () {
3925 // just call parse_character_reference()
3927 // 8.2.4.5 http://www.w3.org/TR/html5/syntax.html#rawtext-state
3928 tok_state_rawtext = function () {
3930 switch (c = txt.charAt(cur++)) {
3932 tok_state = tok_state_rawtext_less_than_sign
3936 return new_character_token("\ufffd")
3939 return new_eof_token()
3942 return new_character_token(c)
3947 // 8.2.4.6 http://www.w3.org/TR/html5/syntax.html#script-data-state
3948 tok_state_script_data = function () {
3950 switch (c = txt.charAt(cur++)) {
3952 tok_state = tok_state_script_data_less_than_sign
3956 return new_character_token("\ufffd")
3959 return new_eof_token()
3962 return new_character_token(c)
3967 // 8.2.4.7 http://www.w3.org/TR/html5/syntax.html#plaintext-state
3968 tok_state_plaintext = function () {
3970 switch (c = txt.charAt(cur++)) {
3973 return new_character_token("\ufffd")
3976 return new_eof_token()
3979 return new_character_token(c)
3984 // 8.2.4.8 http://www.w3.org/TR/html5/syntax.html#tag-open-state
3985 tok_state_tag_open = function () {
3987 c = txt.charAt(cur++)
3989 tok_state = tok_state_markup_declaration_open
3993 tok_state = tok_state_end_tag_open
3996 if (is_uc_alpha(c)) {
3997 tok_cur_tag = new_open_tag(c.toLowerCase())
3998 tok_state = tok_state_tag_name
4001 if (is_lc_alpha(c)) {
4002 tok_cur_tag = new_open_tag(c)
4003 tok_state = tok_state_tag_name
4008 tok_cur_tag = new_comment_token('?') // FIXME right?
4009 tok_state = tok_state_bogus_comment
4014 tok_state = tok_state_data
4015 cur -= 1 // we didn't parse/handle the char after <
4016 return new_text_node('<')
4019 // 8.2.4.9 http://www.w3.org/TR/html5/syntax.html#end-tag-open-state
4020 tok_state_end_tag_open = function () {
4022 c = txt.charAt(cur++)
4023 if (is_uc_alpha(c)) {
4024 tok_cur_tag = new_end_tag(c.toLowerCase())
4025 tok_state = tok_state_tag_name
4028 if (is_lc_alpha(c)) {
4029 tok_cur_tag = new_end_tag(c)
4030 tok_state = tok_state_tag_name
4035 tok_state = tok_state_data
4038 if (c === '') { // EOF
4040 tok_state = tok_state_data
4041 return new_text_node('</')
4045 tok_cur_tag = new_comment_token(c)
4046 tok_state = tok_state_bogus_comment
4050 // 8.2.4.10 http://www.w3.org/TR/html5/syntax.html#tag-name-state
4051 tok_state_tag_name = function () {
4053 switch (c = txt.charAt(cur++)) {
4058 tok_state = tok_state_before_attribute_name
4061 tok_state = tok_state_self_closing_start_tag
4064 tok_state = tok_state_data
4071 tok_cur_tag.name += "\ufffd"
4075 tok_state = tok_state_data
4078 if (is_uc_alpha(c)) {
4079 tok_cur_tag.name += c.toLowerCase()
4081 tok_cur_tag.name += c
4087 // 8.2.4.11 http://www.w3.org/TR/html5/syntax.html#rcdata-less-than-sign-state
4088 tok_state_rcdata_less_than_sign = function () {
4090 c = txt.charAt(cur++)
4092 temporary_buffer = ''
4093 tok_state = tok_state_rcdata_end_tag_open
4097 tok_state = tok_state_rcdata
4098 cur -= 1 // reconsume the input character
4099 return new_character_token('<')
4102 // 8.2.4.12 http://www.w3.org/TR/html5/syntax.html#rcdata-end-tag-open-state
4103 tok_state_rcdata_end_tag_open = function () {
4105 c = txt.charAt(cur++)
4106 if (is_uc_alpha(c)) {
4107 tok_cur_tag = new_end_tag(c.toLowerCase())
4108 temporary_buffer += c
4109 tok_state = tok_state_rcdata_end_tag_name
4112 if (is_lc_alpha(c)) {
4113 tok_cur_tag = new_end_tag(c)
4114 temporary_buffer += c
4115 tok_state = tok_state_rcdata_end_tag_name
4119 tok_state = tok_state_rcdata
4120 cur -= 1 // reconsume the input character
4121 return new_character_token("</") // fixfull separate these
4124 // http://www.w3.org/TR/html5/syntax.html#appropriate-end-tag-token
4125 is_appropriate_end_tag = function (t) {
4126 // fixfull: this assumes that open_els[0].name is "the tag name of the last
4127 // start tag to have been emitted from this tokenizer"
4128 return t.type === TYPE_END_TAG && t.name === open_els[0].name
4131 // 8.2.4.13 http://www.w3.org/TR/html5/syntax.html#rcdata-end-tag-name-state
4132 tok_state_rcdata_end_tag_name = function () {
4134 c = txt.charAt(cur++)
4135 if (c === "\t" || c === "\n" || c === "\u000c" || c === ' ') {
4136 if (is_appropriate_end_tag(tok_cur_tag)) {
4137 tok_state = tok_state_before_attribute_name
4140 // else fall through to "Anything else"
4143 if (is_appropriate_end_tag(tok_cur_tag)) {
4144 tok_state = tok_state_self_closing_start_tag // FIXME spec typo?
4147 // else fall through to "Anything else"
4150 if (is_appropriate_end_tag(tok_cur_tag)) {
4151 tok_state = tok_state_data
4154 // else fall through to "Anything else"
4156 if (is_uc_alpha(c)) {
4157 tok_cur_tag.name += c.toLowerCase()
4158 temporary_buffer += c
4161 if (is_lc_alpha(c)) {
4162 tok_cur_tag.name += c
4163 temporary_buffer += c
4167 tok_state = tok_state_rcdata
4168 cur -= 1 // reconsume the input character
4169 return new_character_token('</' + temporary_buffer) // fixfull separate these
4172 // 8.2.4.14 http://www.w3.org/TR/html5/syntax.html#rawtext-less-than-sign-state
4173 tok_state_rawtext_less_than_sign = function () {
4175 c = txt.charAt(cur++)
4177 temporary_buffer = ''
4178 tok_state = tok_state_rawtext_end_tag_open
4182 tok_state = tok_state_rawtext
4183 cur -= 1 // reconsume the input character
4184 return new_character_token('<')
4187 // 8.2.4.15 http://www.w3.org/TR/html5/syntax.html#rawtext-end-tag-open-state
4188 tok_state_rawtext_end_tag_open = function () {
4189 c = txt.charAt(cur++)
4190 if (is_uc_alpha(c)) {
4191 tok_cur_tag = new_end_tag(c.toLowerCase())
4192 temporary_buffer += c
4193 tok_state = tok_state_rawtext_end_tag_name
4196 if (is_lc_alpha(c)) {
4197 tok_cur_tag = new_end_tag(c)
4198 temporary_buffer += c
4199 tok_state = tok_state_rawtext_end_tag_name
4203 tok_state = tok_state_rawtext
4204 cur -= 1 // reconsume the input character
4205 return new_character_token("</") // fixfull separate these
4208 // 8.2.4.16 http://www.w3.org/TR/html5/syntax.html#rawtext-end-tag-name-state
4209 tok_state_rawtext_end_tag_name = function () {
4211 c = txt.charAt(cur++)
4212 if (c === "\t" || c === "\n" || c === "\u000c" || c === ' ') {
4213 if (is_appropriate_end_tag(tok_cur_tag)) {
4214 tok_state = tok_state_before_attribute_name
4217 // else fall through to "Anything else"
4220 if (is_appropriate_end_tag(tok_cur_tag)) {
4221 tok_state = tok_state_self_closing_start_tag
4224 // else fall through to "Anything else"
4227 if (is_appropriate_end_tag(tok_cur_tag)) {
4228 tok_state = tok_state_data
4231 // else fall through to "Anything else"
4233 if (is_uc_alpha(c)) {
4234 tok_cur_tag.name += c.toLowerCase()
4235 temporary_buffer += c
4238 if (is_lc_alpha(c)) {
4239 tok_cur_tag.name += c
4240 temporary_buffer += c
4244 tok_state = tok_state_rawtext
4245 cur -= 1 // reconsume the input character
4246 return new_character_token('</' + temporary_buffer) // fixfull separate these
4249 // 8.2.4.17 http://www.w3.org/TR/html5/syntax.html#script-data-less-than-sign-state
4250 tok_state_script_data_less_than_sign = function () {
4252 c = txt.charAt(cur++)
4254 temporary_buffer = ''
4255 tok_state = tok_state_script_data_end_tag_open
4259 tok_state = tok_state_script_data_escape_start
4260 return new_character_token('<!') // fixfull split
4263 tok_state = tok_state_script_data
4264 cur -= 1 // reconsume
4265 return new_character_token('<')
4268 // 8.2.4.18 http://www.w3.org/TR/html5/syntax.html#script-data-end-tag-open-state
4269 tok_state_script_data_end_tag_open = function () {
4271 c = txt.charAt(cur++)
4272 if (is_uc_alpha(c)) {
4273 tok_cur_tag = new_end_tag(c.toLowerCase())
4274 temporary_buffer += c
4275 tok_state = tok_state_script_data_end_tag_name
4278 if (is_lc_alpha(c)) {
4279 tok_cur_tag = new_end_tag(c)
4280 temporary_buffer += c
4281 tok_state = tok_state_script_data_end_tag_name
4285 tok_state = tok_state_script_data
4286 cur -= 1 // reconsume
4287 return new_character_token('</')
4290 // 8.2.4.19 http://www.w3.org/TR/html5/syntax.html#script-data-end-tag-open-state
4291 tok_state_script_data_end_tag_name = function () {
4293 c = txt.charAt(cur++)
4294 if (c === "\t" || c === "\n" || c === "\u000c" || c === ' ') {
4295 if (is_appropriate_end_tag(tok_cur_tag)) {
4296 tok_state = tok_state_before_attribute_name
4302 if (is_appropriate_end_tag(tok_cur_tag)) {
4303 tok_state = tok_state_self_closing_start_tag
4309 if (is_appropriate_end_tag(tok_cur_tag)) {
4310 tok_state = tok_state_data
4315 if (is_uc_alpha(c)) {
4316 tok_cur_tag.name += c.toLowerCase()
4317 temporary_buffer += c
4320 if (is_lc_alpha(c)) {
4321 tok_cur_tag.name += c
4322 temporary_buffer += c
4326 tok_state = tok_state_script_data
4327 cur -= 1 // reconsume
4328 return new_character_token("</" + temporary_buffer) // fixfull split
4331 // 8.2.4.20 http://www.w3.org/TR/html5/syntax.html#script-data-escape-start-state
4332 tok_state_script_data_escape_start = function () {
4334 c = txt.charAt(cur++)
4336 tok_state = tok_state_script_data_escape_start_dash
4337 return new_character_token('-')
4340 tok_state = tok_state_script_data
4341 cur -= 1 // reconsume
4344 // 8.2.4.21 http://www.w3.org/TR/html5/syntax.html#script-data-escape-start-dash-state
4345 tok_state_script_data_escape_start_dash = function () {
4347 c = txt.charAt(cur++)
4349 tok_state = tok_state_script_data_escaped_dash_dash
4350 return new_character_token('-')
4353 tok_state = tok_state_script_data
4354 cur -= 1 // reconsume
4357 // 8.2.4.22 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-state
4358 tok_state_script_data_escaped = function () {
4360 c = txt.charAt(cur++)
4362 tok_state = tok_state_script_data_escaped_dash
4363 return new_character_token('-')
4366 tok_state = tok_state_script_data_escaped_less_than_sign
4369 if (c === "\u0000") {
4371 return new_character_token("\ufffd")
4373 if (c === '') { // EOF
4374 tok_state = tok_state_data
4376 cur -= 1 // reconsume
4380 return new_character_token(c)
4383 // 8.2.4.23 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-dash-state
4384 tok_state_script_data_escaped_dash = function () {
4386 c = txt.charAt(cur++)
4388 tok_state = tok_state_script_data_escaped_dash_dash
4389 return new_character_token('-')
4392 tok_state = tok_state_script_data_escaped_less_than_sign
4395 if (c === "\u0000") {
4397 tok_state = tok_state_script_data_escaped
4398 return new_character_token("\ufffd")
4400 if (c === '') { // EOF
4401 tok_state = tok_state_data
4403 cur -= 1 // reconsume
4407 tok_state = tok_state_script_data_escaped
4408 return new_character_token(c)
4411 // 8.2.4.24 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-dash-dash-state
4412 tok_state_script_data_escaped_dash_dash = function () {
4414 c = txt.charAt(cur++)
4416 return new_character_token('-')
4419 tok_state = tok_state_script_data_escaped_less_than_sign
4423 tok_state = tok_state_script_data
4424 return new_character_token('>')
4426 if (c === "\u0000") {
4428 tok_state = tok_state_script_data_escaped
4429 return new_character_token("\ufffd")
4431 if (c === '') { // EOF
4433 tok_state = tok_state_data
4434 cur -= 1 // reconsume
4438 tok_state = tok_state_script_data_escaped
4439 return new_character_token(c)
4442 // 8.2.4.25 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-less-than-sign-state
4443 tok_state_script_data_escaped_less_than_sign = function () {
4445 c = txt.charAt(cur++)
4447 temporary_buffer = ''
4448 tok_state = tok_state_script_data_escaped_end_tag_open
4451 if (is_uc_alpha(c)) {
4452 temporary_buffer = c.toLowerCase() // yes, really
4453 tok_state = tok_state_script_data_double_escape_start
4454 return new_character_token("<" + c) // fixfull split
4456 if (is_lc_alpha(c)) {
4457 temporary_buffer = c
4458 tok_state = tok_state_script_data_double_escape_start
4459 return new_character_token("<" + c) // fixfull split
4462 tok_state = tok_state_script_data_escaped
4463 cur -= 1 // reconsume
4464 return new_character_token('<')
4467 // 8.2.4.26 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-end-tag-open-state
4468 tok_state_script_data_escaped_end_tag_open = function () {
4470 c = txt.charAt(cur++)
4471 if (is_uc_alpha(c)) {
4472 tok_cur_tag = new_end_tag(c.toLowerCase())
4473 temporary_buffer += c
4474 tok_state = tok_state_script_data_escaped_end_tag_name
4477 if (is_lc_alpha(c)) {
4478 tok_cur_tag = new_end_tag(c)
4479 temporary_buffer += c
4480 tok_state = tok_state_script_data_escaped_end_tag_name
4484 tok_state = tok_state_script_data_escaped
4485 cur -= 1 // reconsume
4486 return new_character_token('</') // fixfull split
4489 // 8.2.4.27 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-end-tag-name-state
4490 tok_state_script_data_escaped_end_tag_name = function () {
4492 c = txt.charAt(cur++)
4493 if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ') {
4494 if (is_appropriate_end_tag(tok_cur_tag)) {
4495 tok_state = tok_state_before_attribute_name
4501 if (is_appropriate_end_tag(tok_cur_tag)) {
4502 tok_state = tok_state_self_closing_start_tag
4508 if (is_appropriate_end_tag(tok_cur_tag)) {
4509 tok_state = tok_state_data
4514 if (is_uc_alpha(c)) {
4515 tok_cur_tag.name += c.toLowerCase()
4516 temporary_buffer += c.toLowerCase()
4519 if (is_lc_alpha(c)) {
4520 tok_cur_tag.name += c
4521 temporary_buffer += c.toLowerCase()
4525 tok_state = tok_state_script_data_escaped
4526 cur -= 1 // reconsume
4527 return new_character_token("</" + temporary_buffer) // fixfull split
4530 // 8.2.4.28 http://www.w3.org/TR/html5/syntax.html#script-data-double-escape-start-state
4531 tok_state_script_data_double_escape_start = function () {
4533 c = txt.charAt(cur++)
4534 if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ' || c === '/' || c === '>') {
4535 if (temporary_buffer === 'script') {
4536 tok_state = tok_state_script_data_double_escaped
4538 tok_state = tok_state_script_data_escaped
4540 return new_character_token(c)
4542 if (is_uc_alpha(c)) {
4543 temporary_buffer += c.toLowerCase() // yes, really lowercase
4544 return new_character_token(c)
4546 if (is_lc_alpha(c)) {
4547 temporary_buffer += c
4548 return new_character_token(c)
4551 tok_state = tok_state_script_data_escaped
4552 cur -= 1 // reconsume
4555 // 8.2.4.29 http://www.w3.org/TR/html5/syntax.html#script-data-double-escaped-state
4556 tok_state_script_data_double_escaped = function () {
4558 c = txt.charAt(cur++)
4560 tok_state = tok_state_script_data_double_escaped_dash
4561 return new_character_token('-')
4564 tok_state = tok_state_script_data_double_escaped_less_than_sign
4565 return new_character_token('<')
4567 if (c === "\u0000") {
4569 return new_character_token("\ufffd")
4571 if (c === '') { // EOF
4573 tok_state = tok_state_data
4574 cur -= 1 // reconsume
4578 return new_character_token(c)
4581 // 8.2.4.30 http://www.w3.org/TR/html5/syntax.html#script-data-double-escaped-dash-state
4582 tok_state_script_data_double_escaped_dash = function () {
4584 c = txt.charAt(cur++)
4586 tok_state = tok_state_script_data_double_escaped_dash_dash
4587 return new_character_token('-')
4590 tok_state = tok_state_script_data_double_escaped_less_than_sign
4591 return new_character_token('<')
4593 if (c === "\u0000") {
4595 tok_state = tok_state_script_data_double_escaped
4596 return new_character_token("\ufffd")
4598 if (c === '') { // EOF
4600 tok_state = tok_state_data
4601 cur -= 1 // reconsume
4605 tok_state = tok_state_script_data_double_escaped
4606 return new_character_token(c)
4609 // 8.2.4.31 http://www.w3.org/TR/html5/syntax.html#script-data-double-escaped-dash-dash-state
4610 tok_state_script_data_double_escaped_dash_dash = function () {
4612 c = txt.charAt(cur++)
4614 return new_character_token('-')
4617 tok_state = tok_state_script_data_double_escaped_less_than_sign
4618 return new_character_token('<')
4621 tok_state = tok_state_script_data
4622 return new_character_token('>')
4624 if (c === "\u0000") {
4626 tok_state = tok_state_script_data_double_escaped
4627 return new_character_token("\ufffd")
4629 if (c === '') { // EOF
4631 tok_state = tok_state_data
4632 cur -= 1 // reconsume
4636 tok_state = tok_state_script_data_double_escaped
4637 return new_character_token(c)
4640 // 8.2.4.32 http://www.w3.org/TR/html5/syntax.html#script-data-double-escaped-less-than-sign-state
4641 tok_state_script_data_double_escaped_less_than_sign = function () {
4643 c = txt.charAt(cur++)
4645 temporary_buffer = ''
4646 tok_state = tok_state_script_data_double_escape_end
4647 return new_character_token('/')
4650 tok_state = tok_state_script_data_double_escaped
4651 cur -= 1 // reconsume
4654 // 8.2.4.33 http://www.w3.org/TR/html5/syntax.html#script-data-double-escape-end-state
4655 tok_state_script_data_double_escape_end = function () {
4657 c = txt.charAt(cur++)
4658 if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ' || c === '/' || c === '>') {
4659 if (temporary_buffer === 'script') {
4660 tok_state = tok_state_script_data_escaped
4662 tok_state = tok_state_script_data_double_escaped
4664 return new_character_token(c)
4666 if (is_uc_alpha(c)) {
4667 temporary_buffer += c.toLowerCase() // yes, really lowercase
4668 return new_character_token(c)
4670 if (is_lc_alpha(c)) {
4671 temporary_buffer += c
4672 return new_character_token(c)
4675 tok_state = tok_state_script_data_double_escaped
4676 cur -= 1 // reconsume
4679 // 8.2.4.34 http://www.w3.org/TR/html5/syntax.html#before-attribute-name-state
4680 tok_state_before_attribute_name = function () {
4681 var attr_name, c, tmp
4683 switch (c = txt.charAt(cur++)) {
4691 tok_state = tok_state_self_closing_start_tag
4695 tok_state = tok_state_data
4702 attr_name = "\ufffd"
4713 tok_state = tok_state_data
4716 if (is_uc_alpha(c)) {
4717 attr_name = c.toLowerCase()
4722 if (attr_name != null) {
4723 tok_cur_tag.attrs_a.unshift([attr_name, ''])
4724 tok_state = tok_state_attribute_name
4729 // 8.2.4.35 http://www.w3.org/TR/html5/syntax.html#attribute-name-state
4730 tok_state_attribute_name = function () {
4732 switch (c = txt.charAt(cur++)) {
4737 tok_state = tok_state_after_attribute_name
4740 tok_state = tok_state_self_closing_start_tag
4743 tok_state = tok_state_before_attribute_value
4746 tok_state = tok_state_data
4753 tok_cur_tag.attrs_a[0][0] += "\ufffd"
4759 tok_cur_tag.attrs_a[0][0] += c
4763 tok_state = tok_state_data
4766 if (is_uc_alpha(c)) {
4767 tok_cur_tag.attrs_a[0][0] += c.toLowerCase()
4769 tok_cur_tag.attrs_a[0][0] += c
4775 // 8.2.4.36 http://www.w3.org/TR/html5/syntax.html#after-attribute-name-state
4776 tok_state_after_attribute_name = function () {
4778 c = txt.charAt(cur++)
4779 if (c === "\t" || c === "\n" || c === "\u000c" || c === ' ') {
4783 tok_state = tok_state_self_closing_start_tag
4787 tok_state = tok_state_before_attribute_value
4791 tok_state = tok_state_data
4794 if (is_uc_alpha(c)) {
4795 tok_cur_tag.attrs_a.unshift([c.toLowerCase(), ''])
4796 tok_state = tok_state_attribute_name
4799 if (c === "\u0000") {
4801 tok_cur_tag.attrs_a.unshift(["\ufffd", ''])
4802 tok_state = tok_state_attribute_name
4805 if (c === '') { // EOF
4807 tok_state = tok_state_data
4808 cur -= 1 // reconsume
4811 if (c === '"' || c === "'" || c === '<') {
4813 // fall through to Anything else
4816 tok_cur_tag.attrs_a.unshift([c, ''])
4817 tok_state = tok_state_attribute_name
4820 // 8.2.4.37 http://www.w3.org/TR/html5/syntax.html#before-attribute-value-state
4821 tok_state_before_attribute_value = function () {
4823 switch (c = txt.charAt(cur++)) {
4831 tok_state = tok_state_attribute_value_double_quoted
4834 tok_state = tok_state_attribute_value_unquoted
4838 tok_state = tok_state_attribute_value_single_quoted
4842 tok_cur_tag.attrs_a[0][1] += "\ufffd"
4843 tok_state = tok_state_attribute_value_unquoted
4847 tok_state = tok_state_data
4854 tok_state = tok_state_data
4857 tok_cur_tag.attrs_a[0][1] += c
4858 tok_state = tok_state_attribute_value_unquoted
4863 // 8.2.4.38 http://www.w3.org/TR/html5/syntax.html#attribute-value-(double-quoted)-state
4864 tok_state_attribute_value_double_quoted = function () {
4866 switch (c = txt.charAt(cur++)) {
4868 tok_state = tok_state_after_attribute_value_quoted
4871 tok_cur_tag.attrs_a[0][1] += parse_character_reference('"', true)
4875 tok_cur_tag.attrs_a[0][1] += "\ufffd"
4879 tok_state = tok_state_data
4882 tok_cur_tag.attrs_a[0][1] += c
4887 // 8.2.4.39 http://www.w3.org/TR/html5/syntax.html#attribute-value-(single-quoted)-state
4888 tok_state_attribute_value_single_quoted = function () {
4890 switch (c = txt.charAt(cur++)) {
4892 tok_state = tok_state_after_attribute_value_quoted
4895 tok_cur_tag.attrs_a[0][1] += parse_character_reference("'", true)
4899 tok_cur_tag.attrs_a[0][1] += "\ufffd"
4903 tok_state = tok_state_data
4906 tok_cur_tag.attrs_a[0][1] += c
4911 // 8.2.4.40 http://www.w3.org/TR/html5/syntax.html#attribute-value-(unquoted)-state
4912 tok_state_attribute_value_unquoted = function () {
4914 switch (c = txt.charAt(cur++)) {
4919 tok_state = tok_state_before_attribute_name
4922 tok_cur_tag.attrs_a[0][1] += parse_character_reference('>', true)
4925 tok_state = tok_state_data
4931 tok_cur_tag.attrs_a[0][1] += "\ufffd"
4935 tok_state = tok_state_data
4938 // Parse Error if ', <, = or ` (backtick)
4939 tok_cur_tag.attrs_a[0][1] += c
4944 // 8.2.4.42 http://www.w3.org/TR/html5/syntax.html#after-attribute-value-(quoted)-state
4945 tok_state_after_attribute_value_quoted = function () {
4947 switch (c = txt.charAt(cur++)) {
4952 tok_state = tok_state_before_attribute_name
4955 tok_state = tok_state_self_closing_start_tag
4958 tok_state = tok_state_data
4965 tok_state = tok_state_data
4969 tok_state = tok_state_before_attribute_name
4970 cur -= 1 // we didn't handle that char
4975 // 8.2.4.43 http://www.w3.org/TR/html5/syntax.html#self-closing-start-tag-state
4976 tok_state_self_closing_start_tag = function () {
4978 c = txt.charAt(cur++)
4980 tok_cur_tag.flag('self-closing', true)
4981 tok_state = tok_state_data
4986 tok_state = tok_state_data
4987 cur -= 1 // reconsume
4992 tok_state = tok_state_before_attribute_name
4993 cur -= 1 // reconsume
4996 // 8.2.4.44 http://www.w3.org/TR/html5/syntax.html#bogus-comment-state
4997 // WARNING: put a comment token in tok_cur_tag before setting this state
4998 tok_state_bogus_comment = function () {
5000 next_gt = txt.indexOf('>', cur)
5001 if (next_gt === -1) {
5002 val = txt.substr(cur)
5005 val = txt.substr(cur, next_gt - cur)
5008 val = val.replace(new RegExp("\u0000", 'g'), "\ufffd")
5009 tok_cur_tag.text += val
5010 tok_state = tok_state_data
5014 // 8.2.4.45 http://www.w3.org/TR/html5/syntax.html#markup-declaration-open-state
5015 tok_state_markup_declaration_open = function () {
5017 if (txt.substr(cur, 2) === '--') {
5019 tok_cur_tag = new_comment_token('')
5020 tok_state = tok_state_comment_start
5023 if (txt.substr(cur, 7).toLowerCase() === 'doctype') {
5025 tok_state = tok_state_doctype
5028 acn = adjusted_current_node()
5029 if (acn && acn.namespace !== NS_HTML && txt.substr(cur, 7) === '[CDATA[') {
5031 tok_state = tok_state_cdata_section
5036 tok_cur_tag = new_comment_token('')
5037 tok_state = tok_state_bogus_comment
5040 // 8.2.4.46 http://www.w3.org/TR/html5/syntax.html#comment-start-state
5041 tok_state_comment_start = function () {
5043 switch (c = txt.charAt(cur++)) {
5045 tok_state = tok_state_comment_start_dash
5049 tok_state = tok_state_comment
5050 return new_character_token("\ufffd")
5054 tok_state = tok_state_data
5059 tok_state = tok_state_data
5060 cur -= 1 // reconsume
5064 tok_cur_tag.text += c
5065 tok_state = tok_state_comment
5070 // 8.2.4.47 http://www.w3.org/TR/html5/syntax.html#comment-start-dash-state
5071 tok_state_comment_start_dash = function () {
5073 switch (c = txt.charAt(cur++)) {
5075 tok_state = tok_state_comment_end
5079 tok_cur_tag.text += "-\ufffd"
5080 tok_state = tok_state_comment
5084 tok_state = tok_state_data
5089 tok_state = tok_state_data
5090 cur -= 1 // reconsume
5094 tok_cur_tag.text += "-" + c
5095 tok_state = tok_state_comment
5100 // 8.2.4.48 http://www.w3.org/TR/html5/syntax.html#comment-state
5101 tok_state_comment = function () {
5103 switch (c = txt.charAt(cur++)) {
5105 tok_state = tok_state_comment_end_dash
5109 tok_cur_tag.text += "\ufffd"
5113 tok_state = tok_state_data
5114 cur -= 1 // reconsume
5118 tok_cur_tag.text += c
5123 // 8.2.4.49 http://www.w3.org/TR/html5/syntax.html#comment-end-dash-state
5124 tok_state_comment_end_dash = function () {
5126 switch (c = txt.charAt(cur++)) {
5128 tok_state = tok_state_comment_end
5132 tok_cur_tag.text += "-\ufffd"
5133 tok_state = tok_state_comment
5137 tok_state = tok_state_data
5138 cur -= 1 // reconsume
5142 tok_cur_tag.text += "-" + c
5143 tok_state = tok_state_comment
5148 // 8.2.4.50 http://www.w3.org/TR/html5/syntax.html#comment-end-state
5149 tok_state_comment_end = function () {
5151 switch (c = txt.charAt(cur++)) {
5153 tok_state = tok_state_data
5158 tok_cur_tag.text += "--\ufffd"
5159 tok_state = tok_state_comment
5163 tok_state = tok_state_comment_end_bang
5167 tok_cur_tag.text += '-'
5171 tok_state = tok_state_data
5172 cur -= 1 // reconsume
5177 tok_cur_tag.text += "--" + c
5178 tok_state = tok_state_comment
5183 // 8.2.4.51 http://www.w3.org/TR/html5/syntax.html#comment-end-bang-state
5184 tok_state_comment_end_bang = function () {
5186 switch (c = txt.charAt(cur++)) {
5188 tok_cur_tag.text += "--!" + c
5189 tok_state = tok_state_comment_end_dash
5192 tok_state = tok_state_data
5197 tok_cur_tag.text += "--!\ufffd"
5198 tok_state = tok_state_comment
5202 tok_state = tok_state_data
5203 cur -= 1 // reconsume
5207 tok_cur_tag.text += "--!" + c
5208 tok_state = tok_state_comment
5213 // 8.2.4.52 http://www.w3.org/TR/html5/syntax.html#doctype-state
5214 tok_state_doctype = function () {
5216 switch (c = txt.charAt(cur++)) {
5221 tok_state = tok_state_before_doctype_name
5225 tok_state = tok_state_data
5226 el = new_doctype_token('')
5227 el.flag('force-quirks', true)
5228 cur -= 1 // reconsume
5233 tok_state = tok_state_before_doctype_name
5234 cur -= 1 // reconsume
5239 // 8.2.4.52 http://www.w3.org/TR/html5/syntax.html#doctype-state
5240 tok_state_before_doctype_name = function () {
5242 c = txt.charAt(cur++)
5243 if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ') {
5246 if (is_uc_alpha(c)) {
5247 tok_cur_tag = new_doctype_token(c.toLowerCase())
5248 tok_state = tok_state_doctype_name
5251 if (c === "\u0000") {
5253 tok_cur_tag = new_doctype_token("\ufffd")
5254 tok_state = tok_state_doctype_name
5259 el = new_doctype_token('')
5260 el.flag('force-quirks', true)
5261 tok_state = tok_state_data
5264 if (c === '') { // EOF
5266 tok_state = tok_state_data
5267 el = new_doctype_token('')
5268 el.flag('force-quirks', true)
5269 cur -= 1 // reconsume
5273 tok_cur_tag = new_doctype_token(c)
5274 tok_state = tok_state_doctype_name
5278 // 8.2.4.54 http://www.w3.org/TR/html5/syntax.html#doctype-name-state
5279 tok_state_doctype_name = function () {
5281 c = txt.charAt(cur++)
5282 if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ') {
5283 tok_state = tok_state_after_doctype_name
5287 tok_state = tok_state_data
5290 if (is_uc_alpha(c)) {
5291 tok_cur_tag.name += c.toLowerCase()
5294 if (c === "\u0000") {
5296 tok_cur_tag.name += "\ufffd"
5299 if (c === '') { // EOF
5301 tok_state = tok_state_data
5302 tok_cur_tag.flag('force-quirks', true)
5303 cur -= 1 // reconsume
5307 tok_cur_tag.name += c
5311 // 8.2.4.55 http://www.w3.org/TR/html5/syntax.html#after-doctype-name-state
5312 tok_state_after_doctype_name = function () {
5314 c = txt.charAt(cur++)
5315 if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ') {
5319 tok_state = tok_state_data
5322 if (c === '') { // EOF
5324 tok_state = tok_state_data
5325 tok_cur_tag.flag('force-quirks', true)
5326 cur -= 1 // reconsume
5330 if (txt.substr(cur - 1, 6).toLowerCase() === 'public') {
5332 tok_state = tok_state_after_doctype_public_keyword
5335 if (txt.substr(cur - 1, 6).toLowerCase() === 'system') {
5337 tok_state = tok_state_after_doctype_system_keyword
5341 tok_cur_tag.flag('force-quirks', true)
5342 tok_state = tok_state_bogus_doctype
5346 // 8.2.4.56 http://www.w3.org/TR/html5/syntax.html#after-doctype-public-keyword-state
5347 tok_state_after_doctype_public_keyword = function () {
5349 c = txt.charAt(cur++)
5350 if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ') {
5351 tok_state = tok_state_before_doctype_public_identifier
5356 tok_cur_tag.public_identifier = ''
5357 tok_state = tok_state_doctype_public_identifier_double_quoted
5362 tok_cur_tag.public_identifier = ''
5363 tok_state = tok_state_doctype_public_identifier_single_quoted
5368 tok_cur_tag.flag('force-quirks', true)
5369 tok_state = tok_state_data
5372 if (c === '') { // EOF
5374 tok_state = tok_state_data
5375 tok_cur_tag.flag('force-quirks', true)
5376 cur -= 1 // reconsume
5381 tok_cur_tag.flag('force-quirks', true)
5382 tok_state = tok_state_bogus_doctype
5386 // 8.2.4.57 http://www.w3.org/TR/html5/syntax.html#before-doctype-public-identifier-state
5387 tok_state_before_doctype_public_identifier = function () {
5389 c = txt.charAt(cur++)
5390 if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ') {
5395 tok_cur_tag.public_identifier = ''
5396 tok_state = tok_state_doctype_public_identifier_double_quoted
5401 tok_cur_tag.public_identifier = ''
5402 tok_state = tok_state_doctype_public_identifier_single_quoted
5407 tok_cur_tag.flag('force-quirks', true)
5408 tok_state = tok_state_data
5411 if (c === '') { // EOF
5413 tok_state = tok_state_data
5414 tok_cur_tag.flag('force-quirks', true)
5415 cur -= 1 // reconsume
5420 tok_cur_tag.flag('force-quirks', true)
5421 tok_state = tok_state_bogus_doctype
5426 // 8.2.4.58 http://www.w3.org/TR/html5/syntax.html#doctype-public-identifier-(double-quoted)-state
5427 tok_state_doctype_public_identifier_double_quoted = function () {
5429 c = txt.charAt(cur++)
5431 tok_state = tok_state_after_doctype_public_identifier
5434 if (c === "\u0000") {
5436 tok_cur_tag.public_identifier += "\ufffd"
5441 tok_cur_tag.flag('force-quirks', true)
5442 tok_state = tok_state_data
5445 if (c === '') { // EOF
5447 tok_state = tok_state_data
5448 tok_cur_tag.flag('force-quirks', true)
5449 cur -= 1 // reconsume
5453 tok_cur_tag.public_identifier += c
5457 // 8.2.4.59 http://www.w3.org/TR/html5/syntax.html#doctype-public-identifier-(single-quoted)-state
5458 tok_state_doctype_public_identifier_single_quoted = function () {
5460 c = txt.charAt(cur++)
5462 tok_state = tok_state_after_doctype_public_identifier
5465 if (c === "\u0000") {
5467 tok_cur_tag.public_identifier += "\ufffd"
5472 tok_cur_tag.flag('force-quirks', true)
5473 tok_state = tok_state_data
5476 if (c === '') { // EOF
5478 tok_state = tok_state_data
5479 tok_cur_tag.flag('force-quirks', true)
5480 cur -= 1 // reconsume
5484 tok_cur_tag.public_identifier += c
5488 // 8.2.4.60 http://www.w3.org/TR/html5/syntax.html#after-doctype-public-identifier-state
5489 tok_state_after_doctype_public_identifier = function () {
5491 c = txt.charAt(cur++)
5492 if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ') {
5493 tok_state = tok_state_between_doctype_public_and_system_identifiers
5497 tok_state = tok_state_data
5502 tok_cur_tag.system_identifier = ''
5503 tok_state = tok_state_doctype_system_identifier_double_quoted
5508 tok_cur_tag.system_identifier = ''
5509 tok_state = tok_state_doctype_system_identifier_single_quoted
5512 if (c === '') { // EOF
5514 tok_state = tok_state_data
5515 tok_cur_tag.flag('force-quirks', true)
5516 cur -= 1 // reconsume
5521 tok_cur_tag.flag('force-quirks', true)
5522 tok_state = tok_state_bogus_doctype
5526 // 8.2.4.61 http://www.w3.org/TR/html5/syntax.html#between-doctype-public-&&-system-identifiers-state
5527 tok_state_between_doctype_public_and_system_identifiers = function () {
5529 c = txt.charAt(cur++)
5530 if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ') {
5534 tok_state = tok_state_data
5539 tok_cur_tag.system_identifier = ''
5540 tok_state = tok_state_doctype_system_identifier_double_quoted
5545 tok_cur_tag.system_identifier = ''
5546 tok_state = tok_state_doctype_system_identifier_single_quoted
5549 if (c === '') { // EOF
5551 tok_state = tok_state_data
5552 tok_cur_tag.flag('force-quirks', true)
5553 cur -= 1 // reconsume
5558 tok_cur_tag.flag('force-quirks', true)
5559 tok_state = tok_state_bogus_doctype
5563 // 8.2.4.62 http://www.w3.org/TR/html5/syntax.html#after-doctype-system-keyword-state
5564 tok_state_after_doctype_system_keyword = function () {
5566 c = txt.charAt(cur++)
5567 if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ') {
5568 tok_state = tok_state_before_doctype_system_identifier
5573 tok_cur_tag.system_identifier = ''
5574 tok_state = tok_state_doctype_system_identifier_double_quoted
5579 tok_cur_tag.system_identifier = ''
5580 tok_state = tok_state_doctype_system_identifier_single_quoted
5585 tok_cur_tag.flag('force-quirks', true)
5586 tok_state = tok_state_data
5589 if (c === '') { // EOF
5591 tok_state = tok_state_data
5592 tok_cur_tag.flag('force-quirks', true)
5593 cur -= 1 // reconsume
5598 tok_cur_tag.flag('force-quirks', true)
5599 tok_state = tok_state_bogus_doctype
5603 // 8.2.4.63 http://www.w3.org/TR/html5/syntax.html#before-doctype-system-identifier-state
5604 tok_state_before_doctype_system_identifier = function () {
5606 c = txt.charAt(cur++)
5607 if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ') {
5611 tok_cur_tag.system_identifier = ''
5612 tok_state = tok_state_doctype_system_identifier_double_quoted
5616 tok_cur_tag.system_identifier = ''
5617 tok_state = tok_state_doctype_system_identifier_single_quoted
5622 tok_cur_tag.flag('force-quirks', true)
5623 tok_state = tok_state_data
5626 if (c === '') { // EOF
5628 tok_state = tok_state_data
5629 tok_cur_tag.flag('force-quirks', true)
5630 cur -= 1 // reconsume
5635 tok_cur_tag.flag('force-quirks', true)
5636 tok_state = tok_state_bogus_doctype
5640 // 8.2.4.64 http://www.w3.org/TR/html5/syntax.html#doctype-system-identifier-(double-quoted)-state
5641 tok_state_doctype_system_identifier_double_quoted = function () {
5643 c = txt.charAt(cur++)
5645 tok_state = tok_state_after_doctype_system_identifier
5648 if (c === "\u0000") {
5650 tok_cur_tag.system_identifier += "\ufffd"
5655 tok_cur_tag.flag('force-quirks', true)
5656 tok_state = tok_state_data
5659 if (c === '') { // EOF
5661 tok_state = tok_state_data
5662 tok_cur_tag.flag('force-quirks', true)
5663 cur -= 1 // reconsume
5667 tok_cur_tag.system_identifier += c
5671 // 8.2.4.65 http://www.w3.org/TR/html5/syntax.html#doctype-system-identifier-(single-quoted)-state
5672 tok_state_doctype_system_identifier_single_quoted = function () {
5674 c = txt.charAt(cur++)
5676 tok_state = tok_state_after_doctype_system_identifier
5679 if (c === "\u0000") {
5681 tok_cur_tag.system_identifier += "\ufffd"
5686 tok_cur_tag.flag('force-quirks', true)
5687 tok_state = tok_state_data
5690 if (c === '') { // EOF
5692 tok_state = tok_state_data
5693 tok_cur_tag.flag('force-quirks', true)
5694 cur -= 1 // reconsume
5698 tok_cur_tag.system_identifier += c
5702 // 8.2.4.66 http://www.w3.org/TR/html5/syntax.html#after-doctype-system-identifier-state
5703 tok_state_after_doctype_system_identifier = function () {
5705 c = txt.charAt(cur++)
5706 if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ') {
5710 tok_state = tok_state_data
5713 if (c === '') { // EOF
5715 tok_state = tok_state_data
5716 tok_cur_tag.flag('force-quirks', true)
5717 cur -= 1 // reconsume
5722 // do _not_ tok_cur_tag.flag 'force-quirks', true
5723 tok_state = tok_state_bogus_doctype
5727 // 8.2.4.67 http://www.w3.org/TR/html5/syntax.html#bogus-doctype-state
5728 tok_state_bogus_doctype = function () {
5730 c = txt.charAt(cur++)
5732 tok_state = tok_state_data
5735 if (c === '') { // EOF
5736 tok_state = tok_state_data
5737 cur -= 1 // reconsume
5744 // 8.2.4.68 http://www.w3.org/TR/html5/syntax.html#cdata-section-state
5745 tok_state_cdata_section = function () {
5747 tok_state = tok_state_data
5748 next_gt = txt.indexOf(']]>', cur)
5749 if (next_gt === -1) {
5750 val = txt.substr(cur)
5753 val = txt.substr(cur, next_gt - cur)
5756 val = val.replace(new RegExp("\u0000", 'g'), "\ufffd")
5757 if (val.length > 0) {
5758 return new_character_token(val) // fixfull split
5763 // 8.2.4.69 http://www.w3.org/TR/html5/syntax.html#consume-a-character-reference
5764 // Don't set this as a state, just call it
5765 // returns a string (NOT a text node)
5766 parse_character_reference = function (allowed_char, in_attr) {
5767 var base, c, charset, code_point, decoded, i, max, start
5768 if (allowed_char == null) {
5771 if (in_attr == null) {
5774 if (cur >= txt.length) {
5777 switch (c = txt.charAt(cur)) {
5786 // explicitly not a parse error
5790 // there has to be "one or more" alnums between & and ; to be a parse error
5794 if (cur + 1 >= txt.length) {
5797 if (txt.charAt(cur + 1).toLowerCase() === 'x') {
5807 while (start + i < txt.length && charset.indexOf(txt.charAt(start + i)) > -1) {
5814 if (txt.charAt(start + i) === ';') {
5819 code_point = txt.substr(start, i)
5820 while (code_point.charAt(0) === '0' && code_point.length > 1) {
5821 code_point = code_point.substr(1)
5823 code_point = parseInt(code_point, base)
5824 if (unicode_fixes[code_point] != null) {
5826 return unicode_fixes[code_point]
5828 if ((code_point >= 0xd800 && code_point <= 0xdfff) || code_point > 0x10ffff) {
5832 if ((code_point >= 0x0001 && code_point <= 0x0008) || (code_point >= 0x000D && code_point <= 0x001F) || (code_point >= 0x007F && code_point <= 0x009F) || (code_point >= 0xFDD0 && code_point <= 0xFDEF) || code_point === 0x000B || code_point === 0xFFFE || code_point === 0xFFFF || code_point === 0x1FFFE || code_point === 0x1FFFF || code_point === 0x2FFFE || code_point === 0x2FFFF || code_point === 0x3FFFE || code_point === 0x3FFFF || code_point === 0x4FFFE || code_point === 0x4FFFF || code_point === 0x5FFFE || code_point === 0x5FFFF || code_point === 0x6FFFE || code_point === 0x6FFFF || code_point === 0x7FFFE || code_point === 0x7FFFF || code_point === 0x8FFFE || code_point === 0x8FFFF || code_point === 0x9FFFE || code_point === 0x9FFFF || code_point === 0xAFFFE || code_point === 0xAFFFF || code_point === 0xBFFFE || code_point === 0xBFFFF || code_point === 0xCFFFE || code_point === 0xCFFFF || code_point === 0xDFFFE || code_point === 0xDFFFF || code_point === 0xEFFFE || code_point === 0xEFFFF || code_point === 0xFFFFE || code_point === 0xFFFFF || code_point === 0x10FFFE || code_point === 0x10FFFF) {
5835 return from_code_point(code_point)
5841 for (i = 0; i < 31; ++i) {
5842 if (alnum.indexOf(txt.charAt(cur + i)) === -1) {
5847 // exit early, because parse_error() below needs at least one alnum
5850 if (txt.charAt(cur + i) === ';') {
5851 decoded = decode_named_char_ref(txt.substr(cur, i))
5852 i += 1 // scan past the ';' (after, so we dno't pass it to decode)
5853 if (decoded != null) {
5857 // else FALL THROUGH (check for match without last char(s) or ";")
5859 // no ';' terminator (only legacy char refs)
5861 for (i = 2; i <= max; ++i) { // no prefix matches, so ok to check shortest first
5862 c = legacy_char_refs[txt.substr(cur, i)]
5865 if (txt.charAt(cur + i) === '=') {
5866 // "because some legacy user agents will
5867 // misinterpret the markup in those cases"
5871 if (alnum.indexOf(txt.charAt(cur + i)) > -1) {
5872 // this makes attributes forgiving about url args
5876 // ok, and besides the weird exceptions for attributes...
5877 // return the matching char
5878 cur += i // consume entity chars
5879 parse_error() // because no terminating ";"
5889 eat_next_token_if_newline = function () {
5896 if (t.type === TYPE_TEXT) {
5897 // definition of a newline depends on whether it was a character ref or not
5898 if (cur - old_cur === 1) {
5899 // not a character reference
5900 if (t.text === "\u000d" || t.text === "\u000a") {
5904 if (t.text === "\u000a") {
5913 // tree constructor initialization
5914 // see comments on TYPE_TAG/etc for the structure of this data
5917 doc = new Node(TYPE_TAG, {name: 'document', namespace: NS_HTML})
5918 doc.flag('quirks mode', QUIRKS_NO) // TODO bugreport spec for not specifying this
5919 fragment_root = null // fragment parsing algorithm returns children of this
5921 afe = [] // active formatting elements
5922 template_ins_modes = []
5923 ins_mode = ins_mode_initial
5924 original_ins_mode = ins_mode // TODO check spec
5925 flag_scripting = args.scripting != null ? args.scripting : true // TODO might need an extra flag to get <noscript> to parse correctly
5926 flag_frameset_ok = true
5928 flag_foster_parenting = false
5929 form_element_pointer = null
5930 temporary_buffer = null
5931 pending_table_character_tokens = []
5932 head_element_pointer = null
5933 flag_fragment_parsing = false
5934 context_element = null
5935 prev_node_id = 0 // just for debugging
5937 // tokenizer initialization
5938 tok_state = tok_state_data
5940 parse_init = function () {
5941 var el, f, ns, old_doc, t
5942 // fragment parsing (text arg)
5943 if (args.fragment != null) {
5944 // this handles the fragment from the tests in the format described here:
5945 // https://github.com/html5lib/html5lib-tests/blob/master/tree-construction/README.md
5948 if (f.substr(0, 5) === 'math ') {
5951 } else if (f.substr(0, 4) === 'svg ') {
5956 context_element = token_to_element(t, ns)
5957 context_element.document = new Node(TYPE_TAG, {name: 'document', namespace: NS_HTML})
5958 context_element.document.flag('quirks mode', QUIRKS_NO)
5960 // fragment parsing (Node arg)
5961 if (args.context != null) {
5962 context_element = args.context
5965 // http://www.w3.org/TR/html5/syntax.html#parsing-html-fragments
5966 // fragment parsing algorithm
5967 if (context_element != null) {
5968 flag_fragment_parsing = true
5969 doc = new Node(TYPE_TAG, {name: 'html', namespace: NS_HTML})
5970 // search up the tree from context, to try to find it's document,
5971 // because this file only puts a "document" property on the root
5974 el = context_element
5976 if (el.document != null) {
5977 old_doc = el.document
5987 doc.flag('quirks mode', old_doc.flag('quirks mode'))
5990 if (context_element.namespace === NS_HTML) {
5991 switch (context_element.name) {
5994 tok_state = tok_state_rcdata
6001 tok_state = tok_state_rawtext
6004 tok_state = tok_state_script_data
6007 if (flag_scripting) {
6008 tok_state = tok_state_rawtext
6012 tok_state = tok_state_plaintext
6015 fragment_root = new Node(TYPE_TAG, {name: 'html', namespace: NS_HTML})
6016 doc.children.push(fragment_root)
6017 fragment_root.document = doc
6018 open_els = [fragment_root]
6019 if (context_element.name === 'template' && context_element.namespace === NS_HTML) {
6020 template_ins_modes.unshift(ins_mode_in_template)
6022 // fixfull create token for context (it should have it's original one already)
6024 // set form_element pointer... in the foreign doc?!
6025 el = context_element
6027 if (el.name === 'form' && el.namespace === NS_HTML) {
6028 form_element_pointer = el
6039 // text pre-processing
6040 // FIXME check http://www.w3.org/TR/html5/syntax.html#preprocessing-the-input-stream
6041 txt = txt.replace(new RegExp("\r\n", 'g'), "\n") // fixfull spec doesn't say this
6042 txt = txt.replace(new RegExp("\r", 'g'), "\n") // fixfull spec doesn't say this
6045 // http://www.w3.org/TR/html5/syntax.html#tree-construction
6046 parse_main_loop = function () {
6048 while (flag_parsing) {
6052 // fixfull parse error if has self-closing flag, but it wasn't acknolwedged
6059 if (flag_fragment_parsing) {
6060 return fragment_root.children
6065 exports.parse = parse_html
6067 exports.debug_log_reset = debug_log_reset
6068 exports.debug_log_each = debug_log_each
6069 exports.TYPE_TAG = TYPE_TAG
6070 exports.TYPE_TEXT = TYPE_TEXT
6071 exports.TYPE_COMMENT = TYPE_COMMENT
6072 exports.TYPE_DOCTYPE = TYPE_DOCTYPE
6073 exports.NS_HTML = NS_HTML
6074 exports.NS_MATHML = NS_MATHML
6075 exports.NS_SVG = NS_SVG
6076 exports.QUIRKS_NO = QUIRKS_NO
6077 exports.QUIRKS_LIMITED = QUIRKS_LIMITED
6078 exports.QUIRKS_YES = QUIRKS_YES