1 // todo remove refs and lens, js, ls
4 // Copyright 2015 Jason Woofenden
5 // This file implements an HTML5 parser
7 // This program is free software: you can redistribute it and/or modify it under
8 // the terms of the GNU Affero General Public License as published by the Free
9 // Software Foundation, either version 3 of the License, or (at your option) any
12 // This program is distributed in the hope that it will be useful, but WITHOUT
13 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
17 // You should have received a copy of the GNU Affero General Public License
18 // along with this program. If not, see <http://www.gnu.org/licenses/>.
21 // This file implements a thorough parser for html5, meant to be used by a
24 // The implementation is a pretty direct implementation of the parsing algorithm
27 // http://www.w3.org/TR/html5/syntax.html
29 // except for some places marked "WHATWG" that are implemented as described here:
31 // https://html.spec.whatwg.org/multipage/syntax.html
33 // This code passes all of the tests in the .dat files at:
35 // https://github.com/JasonWoof/html5lib-tests/tree/patch-1/tree-construction
38 //////////////////////////
39 // how to use this code //
40 //////////////////////////
42 // See README.md for how to run this file in the browser or in node.js.
44 // This file exports a single useful function: parse, and some constants
45 // (see the bottom of this file for those.)
49 // peach_parser.parse("<p><b>hi</p>")
51 // Or, if you don't want <html><head><body>/etc, do this:
53 // peach_parser.parse("<p><b>hi</p>", {fragment: "body"})
55 // return value is an array of Nodes, A Node contains:
56 // type: one of: "tag", "text", "comment", "doctype"
57 // text: contents for text/comment nodes
58 // attrs: object of attributes, eg {href: "#main"}
59 // children: array of Nodes
60 // namespace: one of: "html", "mathml", "svg"
61 // parent: another Node or null
63 // This code is a work in progress, eg try search this file for "fixfull",
67 // Notes: stacks/lists
69 // Jason was frequently confused by the terminology used to refer to different
70 // parts of the stacks and lists in the spec, so he made this chart to help keep
73 // stacks grow downward (current element is index=0)
75 // example: open_els = [a, b, c, d, e, f, g]
77 // "grows downwards" means it's visualized like this: (index: el "names")
79 // 6: g "start of the list", "topmost", "first"
81 // 4: e "previous" (to d), "above", "before"
82 // 3: d (previous/next are relative to this element)
83 // 2: c "next", "after", "lower", "below"
85 // 0: a "end of the list", "current node", "bottommost", "last"
87 if ((typeof module) !== 'undefined' && (module.exports != null)) {
89 exports = module.exports
92 window.peach_parser = {}
93 exports = window.peach_parser
96 from_code_point = function (x) {
97 if (String.fromCodePoint != null) {
98 return String.fromCodePoint(x)
101 return String.fromCharCode(x)
104 return String.fromCharCode((x >> 10) + 0xd800, (x % 0x400) + 0xdc00)
108 // Each node is an obect of the Node class. Here are the Node types:
109 TYPE_TAG = 'tag' // name, {attributes}, [children]
110 TYPE_TEXT = 'text' // "text"
111 TYPE_COMMENT = 'comment'
112 TYPE_DOCTYPE = 'doctype'
113 // the following types are emited by the tokenizer, but shouldn't end up in the tree:
114 TYPE_START_TAG = 4 // name, [attributes ([key,value]...) in reverse order], [children]
115 TYPE_END_TAG = 5 // name
117 TYPE_AFE_MARKER = 7 // http://www.w3.org/TR/html5/syntax.html#reconstruct-the-active-formatting-elements
118 TYPE_AAA_BOOKMARK = 8 // http://www.w3.org/TR/html5/syntax.html#adoption-agency-algorithm
120 // namespace constants
125 // quirks mode constants
127 QUIRKS_LIMITED = 'limited'
130 // queue up debug logs, so eg they can be shown only for tests that fail
132 debug_log_reset = function () {
135 debug_log = function (str) {
136 g_debug_log.push(str)
138 debug_log_each = function (cb) {
140 for (i = 0; i < g_debug_log.length; ++i) {
146 function Node (type, args) {
150 this.type = type // one of the TYPE_* constants above
151 this.name = args.name != null ? args.name : '' // tag name
152 this.text = args.text != null ? args.text : '' // contents for text/comment nodes
153 this.attrs = args.attrs != null ? args.attrs : {}
154 this.children = args.children != null ? args.children : []
155 this.namespace = args.namespace != null ? args.namespace : NS_HTML
156 this.parent = args.parent != null ? args.parent : null
158 this.attrs_a = args.attr_k != null ? args.attr_k : [] // attrs in progress, TYPE_START_TAG only
159 this.token = args.token != null ? args.token : null
160 this.flags = args.flags != null ? args.flags : {}
161 if (args.id != null) {
162 this.id = args.id + "+"
164 this.id = "" + (++prev_node_id)
168 Node.prototype.acknowledge_self_closing = function () {
169 if (this.token != null) {
170 this.token.flag('did_self_close', true)
172 this.flag('did_self_close', true)
176 Node.prototype.flag = function (key, value) {
178 this.flags[key] = value
180 return this.flags[key]
184 // helpers: (only take args that are normally known when parser creates nodes)
185 new_open_tag = function (name) {
186 return new Node(TYPE_START_TAG, {name: name})
188 new_end_tag = function (name) {
189 return new Node(TYPE_END_TAG, {name: name})
191 new_element = function (name) {
192 return new Node(TYPE_TAG, {name: name})
194 new_text_node = function (txt) {
195 return new Node(TYPE_TEXT, {text: txt})
197 new_character_token = new_text_node
198 new_comment_token = function (txt) {
199 return new Node(TYPE_COMMENT, {text: txt})
201 new_doctype_token = function (name) {
202 return new Node(TYPE_DOCTYPE, {name: name})
204 new_eof_token = function () {
205 return new Node(TYPE_EOF)
207 new_afe_marker = function () {
208 return new Node(TYPE_AFE_MARKER)
210 new_aaa_bookmark = function () {
211 return new Node(TYPE_AAA_BOOKMARK)
214 lc_alpha = "abcdefghijklmnopqrstuvwxyz"
215 uc_alpha = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
216 digits = "0123456789"
217 alnum = lc_alpha + uc_alpha + digits
218 hex_chars = digits + "abcdefABCDEF"
220 is_uc_alpha = function (str) {
221 return str.length === 1 && uc_alpha.indexOf(str) > -1
223 is_lc_alpha = function (str) {
224 return str.length === 1 && lc_alpha.indexOf(str) > -1
227 // some SVG elements have dashes in them
228 tag_name_chars = alnum + "-"
230 // http://www.w3.org/TR/html5/infrastructure.html#space-character
231 space_chars = "\u0009\u000a\u000c\u000d\u0020"
232 is_space = function (txt) {
233 return txt.length === 1 && space_chars.indexOf(txt) > -1
235 is_space_tok = function (t) {
236 return t.type === TYPE_TEXT && t.text.length === 1 && space_chars.indexOf(t.text) > -1
239 is_input_hidden_tok = function (t) {
241 if (t.type !== TYPE_START_TAG) {
244 for (i = 0; i < t.attrs_a.length; ++i) {
246 if (a[0] === 'type') {
247 if (a[1].toLowerCase() === 'hidden') {
256 // https://en.wikipedia.org/wiki/Whitespace_character#Unicode
257 whitespace_chars = "\u0009\u000a\u000b\u000c\u000d\u0020\u0085\u00a0\u1680\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200a\u2028\u2029\u202f\u205f\u3000"
260 unicode_fixes[0x00] = "\uFFFD"
261 unicode_fixes[0x80] = "\u20AC"
262 unicode_fixes[0x82] = "\u201A"
263 unicode_fixes[0x83] = "\u0192"
264 unicode_fixes[0x84] = "\u201E"
265 unicode_fixes[0x85] = "\u2026"
266 unicode_fixes[0x86] = "\u2020"
267 unicode_fixes[0x87] = "\u2021"
268 unicode_fixes[0x88] = "\u02C6"
269 unicode_fixes[0x89] = "\u2030"
270 unicode_fixes[0x8A] = "\u0160"
271 unicode_fixes[0x8B] = "\u2039"
272 unicode_fixes[0x8C] = "\u0152"
273 unicode_fixes[0x8E] = "\u017D"
274 unicode_fixes[0x91] = "\u2018"
275 unicode_fixes[0x92] = "\u2019"
276 unicode_fixes[0x93] = "\u201C"
277 unicode_fixes[0x94] = "\u201D"
278 unicode_fixes[0x95] = "\u2022"
279 unicode_fixes[0x96] = "\u2013"
280 unicode_fixes[0x97] = "\u2014"
281 unicode_fixes[0x98] = "\u02DC"
282 unicode_fixes[0x99] = "\u2122"
283 unicode_fixes[0x9A] = "\u0161"
284 unicode_fixes[0x9B] = "\u203A"
285 unicode_fixes[0x9C] = "\u0153"
286 unicode_fixes[0x9E] = "\u017E"
287 unicode_fixes[0x9F] = "\u0178"
289 quirks_yes_pi_prefixes = [
290 "+//silmaril//dtd html pro v0r11 19970101//",
291 "-//as//dtd html 3.0 aswedit + extensions//",
292 "-//advasoft ltd//dtd html 3.0 aswedit + extensions//",
293 "-//ietf//dtd html 2.0 level 1//",
294 "-//ietf//dtd html 2.0 level 2//",
295 "-//ietf//dtd html 2.0 strict level 1//",
296 "-//ietf//dtd html 2.0 strict level 2//",
297 "-//ietf//dtd html 2.0 strict//",
298 "-//ietf//dtd html 2.0//",
299 "-//ietf//dtd html 2.1e//",
300 "-//ietf//dtd html 3.0//",
301 "-//ietf//dtd html 3.2 final//",
302 "-//ietf//dtd html 3.2//",
303 "-//ietf//dtd html 3//",
304 "-//ietf//dtd html level 0//",
305 "-//ietf//dtd html level 1//",
306 "-//ietf//dtd html level 2//",
307 "-//ietf//dtd html level 3//",
308 "-//ietf//dtd html strict level 0//",
309 "-//ietf//dtd html strict level 1//",
310 "-//ietf//dtd html strict level 2//",
311 "-//ietf//dtd html strict level 3//",
312 "-//ietf//dtd html strict//",
313 "-//ietf//dtd html//",
314 "-//metrius//dtd metrius presentational//",
315 "-//microsoft//dtd internet explorer 2.0 html strict//",
316 "-//microsoft//dtd internet explorer 2.0 html//",
317 "-//microsoft//dtd internet explorer 2.0 tables//",
318 "-//microsoft//dtd internet explorer 3.0 html strict//",
319 "-//microsoft//dtd internet explorer 3.0 html//",
320 "-//microsoft//dtd internet explorer 3.0 tables//",
321 "-//netscape comm. corp.//dtd html//",
322 "-//netscape comm. corp.//dtd strict html//",
323 "-//o'reilly and associates//dtd html 2.0//",
324 "-//o'reilly and associates//dtd html extended 1.0//",
325 "-//o'reilly and associates//dtd html extended relaxed 1.0//",
326 "-//sq//dtd html 2.0 hotmetal + extensions//",
327 "-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//",
328 "-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//",
329 "-//spyglass//dtd html 2.0 extended//",
330 "-//sun microsystems corp.//dtd hotjava html//",
331 "-//sun microsystems corp.//dtd hotjava strict html//",
332 "-//w3c//dtd html 3 1995-03-24//",
333 "-//w3c//dtd html 3.2 draft//",
334 "-//w3c//dtd html 3.2 final//",
335 "-//w3c//dtd html 3.2//",
336 "-//w3c//dtd html 3.2s draft//",
337 "-//w3c//dtd html 4.0 frameset//",
338 "-//w3c//dtd html 4.0 transitional//",
339 "-//w3c//dtd html experimental 19960712//",
340 "-//w3c//dtd html experimental 970421//",
341 "-//w3c//dtd w3 html//",
342 "-//w3o//dtd w3 html 3.0//",
343 "-//webtechs//dtd mozilla html 2.0//",
344 "-//webtechs//dtd mozilla html//",
347 // These are the character references that don't need a terminating semicolon
348 // min length: 2, max: 6, none are a prefix of any other.
350 Aacute: 'Á', aacute: 'á', Acirc: 'Â', acirc: 'â', acute: '´', AElig: 'Æ',
351 aelig: 'æ', Agrave: 'À', agrave: 'à', AMP: '&', amp: '&', Aring: 'Å',
352 aring: 'å', Atilde: 'Ã', atilde: 'ã', Auml: 'Ä', auml: 'ä', brvbar: '¦',
353 Ccedil: 'Ç', ccedil: 'ç', cedil: '¸', cent: '¢', COPY: '©', copy: '©',
354 curren: '¤', deg: '°', divide: '÷', Eacute: 'É', eacute: 'é', Ecirc: 'Ê',
355 ecirc: 'ê', Egrave: 'È', egrave: 'è', ETH: 'Ð', eth: 'ð', Euml: 'Ë',
356 euml: 'ë', frac12: '½', frac14: '¼', frac34: '¾', GT: '>', gt: '>',
357 Iacute: 'Í', iacute: 'í', Icirc: 'Î', icirc: 'î', iexcl: '¡', Igrave: 'Ì',
358 igrave: 'ì', iquest: '¿', Iuml: 'Ï', iuml: 'ï', laquo: '«', LT: '<',
359 lt: '<', macr: '¯', micro: 'µ', middot: '·', nbsp: "\u00a0", not: '¬',
360 Ntilde: 'Ñ', ntilde: 'ñ', Oacute: 'Ó', oacute: 'ó', Ocirc: 'Ô', ocirc: 'ô',
361 Ograve: 'Ò', ograve: 'ò', ordf: 'ª', ordm: 'º', Oslash: 'Ø', oslash: 'ø',
362 Otilde: 'Õ', otilde: 'õ', Ouml: 'Ö', ouml: 'ö', para: '¶', plusmn: '±',
363 pound: '£', QUOT: '"', quot: '"', raquo: '»', REG: '®', reg: '®', sect: '§',
364 shy: '', sup1: '¹', sup2: '²', sup3: '³', szlig: 'ß', THORN: 'Þ', thorn: 'þ',
365 times: '×', Uacute: 'Ú', uacute: 'ú', Ucirc: 'Û', ucirc: 'û', Ugrave: 'Ù',
366 ugrave: 'ù', uml: '¨', Uuml: 'Ü', uuml: 'ü', Yacute: 'Ý', yacute: 'ý',
370 //void_elements = ['area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input', 'keygen', 'link', 'meta', 'param', 'source', 'track', 'wbr']
371 //raw_text_elements = ['script', 'style']
372 //escapable_raw_text_elements = ['textarea', 'title']
373 // http://www.w3.org/TR/SVG/ 1.1 (Second Edition)
375 'a', 'altGlyph', 'altGlyphDef', 'altGlyphItem', 'animate', 'animateColor',
376 'animateMotion', 'animateTransform', 'circle', 'clipPath', 'color-profile',
377 'cursor', 'defs', 'desc', 'ellipse', 'feBlend', 'feColorMatrix',
378 'feComponentTransfer', 'feComposite', 'feConvolveMatrix',
379 'feDiffuseLighting', 'feDisplacementMap', 'feDistantLight', 'feFlood',
380 'feFuncA', 'feFuncB', 'feFuncG', 'feFuncR', 'feGaussianBlur', 'feImage',
381 'feMerge', 'feMergeNode', 'feMorphology', 'feOffset', 'fePointLight',
382 'feSpecularLighting', 'feSpotLight', 'feTile', 'feTurbulence', 'filter',
383 'font', 'font-face', 'font-face-format', 'font-face-name', 'font-face-src',
384 'font-face-uri', 'foreignObject', 'g', 'glyph', 'glyphRef', 'hkern',
385 'image', 'line', 'linearGradient', 'marker', 'mask', 'metadata',
386 'missing-glyph', 'mpath', 'path', 'pattern', 'polygon', 'polyline',
387 'radialGradient', 'rect', 'script', 'set', 'stop', 'style', 'svg',
388 'switch', 'symbol', 'text', 'textPath', 'title', 'tref', 'tspan', 'use',
392 // http://www.w3.org/TR/MathML/ Version 3.0 2nd Edition
394 'abs', 'and', 'annotation', 'annotation-xml', 'apply', 'approx', 'arccos',
395 'arccosh', 'arccot', 'arccoth', 'arccsc', 'arccsch', 'arcsec', 'arcsech',
396 'arcsin', 'arcsinh', 'arctan', 'arctanh', 'arg', 'bind', 'bvar', 'card',
397 'cartesianproduct', 'cbytes', 'ceiling', 'cerror', 'ci', 'cn', 'codomain',
398 'complexes', 'compose', 'condition', 'conjugate', 'cos', 'cosh', 'cot',
399 'coth', 'cs', 'csc', 'csch', 'csymbol', 'curl', 'declare', 'degree',
400 'determinant', 'diff', 'divergence', 'divide', 'domain',
401 'domainofapplication', 'emptyset', 'eq', 'equivalent', 'eulergamma',
402 'exists', 'exp', 'exponentiale', 'factorial', 'factorof', 'false', 'floor',
403 'fn', 'forall', 'gcd', 'geq', 'grad', 'gt', 'ident', 'image', 'imaginary',
404 'imaginaryi', 'implies', 'in', 'infinity', 'int', 'integers', 'intersect',
405 'interval', 'inverse', 'lambda', 'laplacian', 'lcm', 'leq', 'limit',
406 'list', 'ln', 'log', 'logbase', 'lowlimit', 'lt', 'maction', 'maligngroup',
407 'malignmark', 'math', 'matrix', 'matrixrow', 'max', 'mean', 'median',
408 'menclose', 'merror', 'mfenced', 'mfrac', 'mglyph', 'mi', 'mi', 'min',
409 'minus', 'mlabeledtr', 'mlongdiv', 'mmultiscripts', 'mn', 'mo', 'mode',
410 'moment', 'momentabout', 'mover', 'mpadded', 'mphantom', 'mprescripts',
411 'mroot', 'mrow', 'ms', 'mscarries', 'mscarry', 'msgroup', 'msline',
412 'mspace', 'msqrt', 'msrow', 'mstack', 'mstyle', 'msub', 'msubsup', 'msup',
413 'mtable', 'mtd', 'mtext', 'mtr', 'munder', 'munderover', 'naturalnumbers',
414 'neq', 'none', 'not', 'notanumber', 'notin', 'notprsubset', 'notsubset',
415 'or', 'otherwise', 'outerproduct', 'partialdiff', 'pi', 'piece',
416 'piecewise', 'plus', 'power', 'primes', 'product', 'prsubset', 'quotient',
417 'rationals', 'real', 'reals', 'reln', 'rem', 'root', 'scalarproduct',
418 'sdev', 'sec', 'sech', 'selector', 'semantics', 'sep', 'set', 'setdiff',
419 'share', 'sin', 'sinh', 'span', 'subset', 'sum', 'tan', 'tanh', 'tendsto',
420 'times', 'transpose', 'true', 'union', 'uplimit', 'variance', 'vector',
421 'vectorproduct', 'xor'
423 // foreign_elements = [svg_elements..., mathml_elements...]
424 //normal_elements = All other allowed HTML elements are normal elements.
428 address: NS_HTML, applet: NS_HTML, area: NS_HTML, article: NS_HTML,
429 aside: NS_HTML, base: NS_HTML, basefont: NS_HTML, bgsound: NS_HTML,
430 blockquote: NS_HTML, body: NS_HTML, br: NS_HTML, button: NS_HTML,
431 caption: NS_HTML, center: NS_HTML, col: NS_HTML, colgroup: NS_HTML, dd: NS_HTML,
432 details: NS_HTML, dir: NS_HTML, div: NS_HTML, dl: NS_HTML, dt: NS_HTML,
433 embed: NS_HTML, fieldset: NS_HTML, figcaption: NS_HTML, figure: NS_HTML,
434 footer: NS_HTML, form: NS_HTML, frame: NS_HTML, frameset: NS_HTML, h1: NS_HTML,
435 h2: NS_HTML, h3: NS_HTML, h4: NS_HTML, h5: NS_HTML, h6: NS_HTML, head: NS_HTML,
436 header: NS_HTML, hgroup: NS_HTML, hr: NS_HTML, html: NS_HTML, iframe: NS_HTML,
437 img: NS_HTML, input: NS_HTML, isindex: NS_HTML, li: NS_HTML, link: NS_HTML,
438 listing: NS_HTML, main: NS_HTML, marquee: NS_HTML,
440 menu: NS_HTML,menuitem: NS_HTML, // WHATWG adds these
442 meta: NS_HTML, nav: NS_HTML, noembed: NS_HTML, noframes: NS_HTML,
443 noscript: NS_HTML, object: NS_HTML, ol: NS_HTML, p: NS_HTML, param: NS_HTML,
444 plaintext: NS_HTML, pre: NS_HTML, script: NS_HTML, section: NS_HTML,
445 select: NS_HTML, source: NS_HTML, style: NS_HTML, summary: NS_HTML,
446 table: NS_HTML, tbody: NS_HTML, td: NS_HTML, template: NS_HTML,
447 textarea: NS_HTML, tfoot: NS_HTML, th: NS_HTML, thead: NS_HTML, title: NS_HTML,
448 tr: NS_HTML, track: NS_HTML, ul: NS_HTML, wbr: NS_HTML, xmp: NS_HTML,
451 mi: NS_MATHML, mo: NS_MATHML, mn: NS_MATHML, ms: NS_MATHML, mtext: NS_MATHML,
452 'annotation-xml': NS_MATHML,
455 foreignObject: NS_SVG, desc: NS_SVG, title: NS_SVG
458 formatting_elements = {
459 a: true, b: true, big: true, code: true, em: true, font: true, i: true,
460 nobr: true, s: true, small: true, strike: true, strong: true, tt: true,
464 mathml_text_integration = {
465 mi: NS_MATHML, mo: NS_MATHML, mn: NS_MATHML, ms: NS_MATHML, mtext: NS_MATHML
467 is_mathml_text_integration_point = function (el) {
468 return mathml_text_integration[el.name] === el.namespace
470 is_html_integration = function (el) { // DON'T PASS A TOKEN
471 if (el.namespace === NS_MATHML) {
472 if (el.name === 'annotation-xml') {
473 if (el.attrs.encoding != null) {
474 if (el.attrs.encoding.toLowerCase() === 'text/html') {
477 if (el.attrs.encoding.toLowerCase() === 'application/xhtml+xml') {
484 if (el.namespace === NS_SVG) {
485 if (el.name === 'foreignObject' || el.name === 'desc' || el.name === 'title') {
493 h1: NS_HTML, h2: NS_HTML, h3: NS_HTML, h4: NS_HTML, h5: NS_HTML, h6: NS_HTML
496 foster_parenting_targets = {
517 el_is_special = function (e) {
518 return special_elements[e.name] === e.namespace
521 adp_els = { address: NS_HTML, div: NS_HTML, p: NS_HTML }
522 el_is_special_not_adp = function (el) {
523 return special_elements[el.name] === el.namespace && adp_els[el.name] !== el.namespace
527 altglyph: 'altGlyph',
528 altglyphdef: 'altGlyphDef',
529 altglyphitem: 'altGlyphItem',
530 animatecolor: 'animateColor',
531 animatemotion: 'animateMotion',
532 animatetransform: 'animateTransform',
533 clippath: 'clipPath',
535 fecolormatrix: 'feColorMatrix',
536 fecomponenttransfer: 'feComponentTransfer',
537 fecomposite: 'feComposite',
538 feconvolvematrix: 'feConvolveMatrix',
539 fediffuselighting: 'feDiffuseLighting',
540 fedisplacementmap: 'feDisplacementMap',
541 fedistantlight: 'feDistantLight',
542 fedropshadow: 'feDropShadow',
548 fegaussianblur: 'feGaussianBlur',
551 femergenode: 'feMergeNode',
552 femorphology: 'feMorphology',
553 feoffset: 'feOffset',
554 fepointlight: 'fePointLight',
555 fespecularlighting: 'feSpecularLighting',
556 fespotlight: 'feSpotLight',
558 feturbulence: 'feTurbulence',
559 foreignobject: 'foreignObject',
560 glyphref: 'glyphRef',
561 lineargradient: 'linearGradient',
562 radialgradient: 'radialGradient',
565 svg_attribute_fixes = {
566 attributename: 'attributeName',
567 attributetype: 'attributeType',
568 basefrequency: 'baseFrequency',
569 baseprofile: 'baseProfile',
570 calcmode: 'calcMode',
571 clippathunits: 'clipPathUnits',
572 contentscripttype: 'contentScriptType',
573 contentstyletype: 'contentStyleType',
574 diffuseconstant: 'diffuseConstant',
575 edgemode: 'edgeMode',
576 externalresourcesrequired: 'externalResourcesRequired',
577 // WHATWG removes this: filterres: 'filterRes',
578 filterunits: 'filterUnits',
579 glyphref: 'glyphRef',
580 gradienttransform: 'gradientTransform',
581 gradientunits: 'gradientUnits',
582 kernelmatrix: 'kernelMatrix',
583 kernelunitlength: 'kernelUnitLength',
584 keypoints: 'keyPoints',
585 keysplines: 'keySplines',
586 keytimes: 'keyTimes',
587 lengthadjust: 'lengthAdjust',
588 limitingconeangle: 'limitingConeAngle',
589 markerheight: 'markerHeight',
590 markerunits: 'markerUnits',
591 markerwidth: 'markerWidth',
592 maskcontentunits: 'maskContentUnits',
593 maskunits: 'maskUnits',
594 numoctaves: 'numOctaves',
595 pathlength: 'pathLength',
596 patterncontentunits: 'patternContentUnits',
597 patterntransform: 'patternTransform',
598 patternunits: 'patternUnits',
599 pointsatx: 'pointsAtX',
600 pointsaty: 'pointsAtY',
601 pointsatz: 'pointsAtZ',
602 preservealpha: 'preserveAlpha',
603 preserveaspectratio: 'preserveAspectRatio',
604 primitiveunits: 'primitiveUnits',
607 repeatcount: 'repeatCount',
608 repeatdur: 'repeatDur',
609 requiredextensions: 'requiredExtensions',
610 requiredfeatures: 'requiredFeatures',
611 specularconstant: 'specularConstant',
612 specularexponent: 'specularExponent',
613 spreadmethod: 'spreadMethod',
614 startoffset: 'startOffset',
615 stddeviation: 'stdDeviation',
616 stitchtiles: 'stitchTiles',
617 surfacescale: 'surfaceScale',
618 systemlanguage: 'systemLanguage',
619 tablevalues: 'tableValues',
622 textlength: 'textLength',
624 viewtarget: 'viewTarget',
625 xchannelselector: 'xChannelSelector',
626 ychannelselector: 'yChannelSelector',
627 zoomandpan: 'zoomAndPan'
629 foreign_attr_fixes = {
630 'xlink:actuate': 'xlink actuate',
631 'xlink:arcrole': 'xlink arcrole',
632 'xlink:href': 'xlink href',
633 'xlink:role': 'xlink role',
634 'xlink:show': 'xlink show',
635 'xlink:title': 'xlink title',
636 'xlink:type': 'xlink type',
637 'xml:base': 'xml base',
638 'xml:lang': 'xml lang',
639 'xml:space': 'xml space',
641 'xmlns:xlink': 'xmlns xlink'
643 adjust_mathml_attributes = function (t) {
645 for (i = 0; i < t.attrs_a.length; ++i) {
647 if (a[0] === 'definitionurl') {
648 a[0] = 'definitionURL'
652 adjust_svg_attributes = function (t) {
654 for (i = 0; i < t.attrs_a.length; ++i) {
656 if (svg_attribute_fixes[a[0]] != null) {
657 a[0] = svg_attribute_fixes[a[0]]
661 adjust_foreign_attributes = function (t) {
664 for (i = 0; i < t.attrs_a.length; ++i) {
666 if (foreign_attr_fixes[a[0]] != null) {
667 a[0] = foreign_attr_fixes[a[0]]
672 // decode_named_char_ref()
674 // The list of named character references is _huge_ so if we're running in a
675 // browser, we get the browser to decode them, rather than increasing the code
676 // size to include the table.
677 if (context === 'module') {
678 _decode_named_char_ref = require('./parser_no_browser_helper.js')
680 decode_named_char_ref_el = document.createElement('textarea')
681 _decode_named_char_ref = function (txt) {
683 txt = "&" + txt + ";"
684 decode_named_char_ref_el.innerHTML = txt
685 decoded = decode_named_char_ref_el.value
686 if (decoded === txt) {
692 // Pass the name of a named entity _that has a terminating semicolon_
693 // Entities without terminating semicolons should use legacy_char_refs[]
694 // Do not include the "&" or ";" in your argument, eg pass "alpha"
695 decode_named_char_ref_cache = {}
696 decode_named_char_ref = function (txt) {
698 decoded = decode_named_char_ref_cache[txt]
699 if (decoded != null) {
702 decoded = _decode_named_char_ref(txt)
703 return decode_named_char_ref_cache[txt] = decoded
706 parse_html = function (args_html, args) {
707 var adjusted_current_node, adjusted_insertion_location, adoption_agency, afe, afe_push, afe_push_marker, button_scopers, clear_afe_to_marker, clear_stack_to_table_body_context, clear_stack_to_table_context, clear_stack_to_table_row_context, clear_to_table_body_stopers, clear_to_table_row_stopers, clear_to_table_stopers, close_p_element, close_p_if_in_button_scope, close_the_cell, context_element, cur, doc, eat_next_token_if_newline, el_is_in_scope, flag_foster_parenting, flag_fragment_parsing, flag_frameset_ok, flag_parsing, flag_scripting, form_element_pointer, fragment_root, generate_implied_end_tags, has_color_face_or_size, head_element_pointer, in_body_any_other_end_tag, in_foreign_content, in_foreign_content_end_script, in_foreign_content_other_start, ins_mode, ins_mode_after_after_body, ins_mode_after_after_frameset, ins_mode_after_body, ins_mode_after_frameset, ins_mode_after_head, ins_mode_after_head_else, ins_mode_before_head, ins_mode_before_html, ins_mode_in_body, ins_mode_in_caption, ins_mode_in_cell, ins_mode_in_column_group, ins_mode_in_frameset, ins_mode_in_head, ins_mode_in_head_else, ins_mode_in_head_noscript, ins_mode_in_head_noscript_else, ins_mode_in_row, ins_mode_in_select, ins_mode_in_select_in_table, ins_mode_in_table, ins_mode_in_table_body, ins_mode_in_table_else, ins_mode_in_table_text, ins_mode_in_template, ins_mode_initial, ins_mode_text, insert_character, insert_comment, insert_foreign_element, insert_html_element, is_appropriate_end_tag, is_in_button_scope, is_in_li_scope, is_in_scope, is_in_scope_x, is_in_scope_x_y, is_in_select_scope, is_in_table_scope, is_quirks_limited_doctype, is_quirks_yes_doctype, li_scopers, open_els, original_ins_mode, parse_character_reference, parse_error, parse_generic_raw_text, parse_generic_rcdata_text, parse_init, parse_main_loop, pending_table_character_tokens, process_token, reconstruct_afe, ref, reset_ins_mode, standard_scopers, stop_parsing, table_scopers, template_ins_modes, template_tag_is_open, temporary_buffer, tok_cur_tag, tok_state, tok_state_after_attribute_name, tok_state_after_attribute_value_quoted, tok_state_after_doctype_name, tok_state_after_doctype_public_identifier, tok_state_after_doctype_public_keyword, tok_state_after_doctype_system_identifier, tok_state_after_doctype_system_keyword, tok_state_attribute_name, tok_state_attribute_value_double_quoted, tok_state_attribute_value_single_quoted, tok_state_attribute_value_unquoted, tok_state_before_attribute_name, tok_state_before_attribute_value, tok_state_before_doctype_name, tok_state_before_doctype_public_identifier, tok_state_before_doctype_system_identifier, tok_state_between_doctype_public_and_system_identifiers, tok_state_bogus_comment, tok_state_bogus_doctype, tok_state_cdata_section, tok_state_comment, tok_state_comment_end, tok_state_comment_end_bang, tok_state_comment_end_dash, tok_state_comment_start, tok_state_comment_start_dash, tok_state_data, tok_state_doctype, tok_state_doctype_name, tok_state_doctype_public_identifier_double_quoted, tok_state_doctype_public_identifier_single_quoted, tok_state_doctype_system_identifier_double_quoted, tok_state_doctype_system_identifier_single_quoted, tok_state_end_tag_open, tok_state_markup_declaration_open, tok_state_plaintext, tok_state_rawtext, tok_state_rawtext_end_tag_name, tok_state_rawtext_end_tag_open, tok_state_rawtext_less_than_sign, tok_state_rcdata, tok_state_rcdata_end_tag_name, tok_state_rcdata_end_tag_open, tok_state_rcdata_less_than_sign, tok_state_script_data, tok_state_script_data_double_escape_end, tok_state_script_data_double_escape_start, tok_state_script_data_double_escaped, tok_state_script_data_double_escaped_dash, tok_state_script_data_double_escaped_dash_dash, tok_state_script_data_double_escaped_less_than_sign, tok_state_script_data_end_tag_name, tok_state_script_data_end_tag_open, tok_state_script_data_escape_start, tok_state_script_data_escape_start_dash, tok_state_script_data_escaped, tok_state_script_data_escaped_dash, tok_state_script_data_escaped_dash_dash, tok_state_script_data_escaped_end_tag_name, tok_state_script_data_escaped_end_tag_open, tok_state_script_data_escaped_less_than_sign, tok_state_script_data_less_than_sign, tok_state_self_closing_start_tag, tok_state_tag_name, tok_state_tag_open, token_to_element, txt
712 cur = null // index of next char in txt to be parsed
713 // declare doc and tokenizer variables so they're in scope below
715 open_els = null // stack of open elements
716 afe = null // active formatting elements
717 template_ins_modes = null
719 original_ins_mode = null
721 tok_cur_tag = null // partially parsed tag
722 flag_scripting = null
723 flag_frameset_ok = null
725 flag_foster_parenting = null
726 form_element_pointer = null
727 temporary_buffer = null
728 pending_table_character_tokens = null
729 head_element_pointer = null
730 flag_fragment_parsing = null
731 context_element = null
733 stop_parsing = function () {
737 parse_error = function () {
738 if (args.error_cb != null) {
743 // http://www.w3.org/TR/html5/syntax.html#push-onto-the-list-of-active-formatting-elements
744 // "Noah's Ark clause" but with three
745 afe_push = function (new_el) {
746 var attrs_match, el, i, j, k, len, matches, ref, ref1, v
748 for (i = 0; i < afe.length; ++i) {
750 if (el.type === TYPE_AFE_MARKER) {
753 if (el.name === new_el.name && el.namespace === new_el.namespace) {
755 for (k in el.attrs) {
757 if (new_el.attrs[k] !== v) {
763 for (k in new_el.attrs) {
765 if (el.attrs[k] !== v) {
783 afe_push_marker = function () {
784 afe.unshift(new_afe_marker())
787 // the functions below impliment the Tree Contstruction algorithm
788 // http://www.w3.org/TR/html5/syntax.html#tree-construction
790 // But first... the helpers
791 template_tag_is_open = function () {
793 for (i = 0; i < open_els.length; ++i) {
795 if (el.name === 'template' && el.namespace === NS_HTML) {
801 is_in_scope_x = function (tag_name, scope, namespace) {
803 for (i = 0; i < open_els.length; ++i) {
805 if (el.name === tag_name && (namespace === null || namespace === el.namespace)) {
808 if (scope[el.name] === el.namespace) {
814 is_in_scope_x_y = function (tag_name, scope, scope2, namespace) {
816 for (i = 0; i < open_els.length; ++i) {
818 if (el.name === tag_name && (namespace === null || namespace === el.namespace)) {
821 if (scope[el.name] === el.namespace) {
824 if (scope2[el.name] === el.namespace) {
831 applet: NS_HTML, caption: NS_HTML, html: NS_HTML, table: NS_HTML,
832 td: NS_HTML, th: NS_HTML, marquee: NS_HTML, object: NS_HTML,
835 mi: NS_MATHML, mo: NS_MATHML, mn: NS_MATHML, ms: NS_MATHML,
836 mtext: NS_MATHML, 'annotation-xml': NS_MATHML,
838 foreignObject: NS_SVG, desc: NS_SVG, title: NS_SVG
840 button_scopers = { button: NS_HTML }
841 li_scopers = { ol: NS_HTML, ul: NS_HTML }
842 table_scopers = { html: NS_HTML, table: NS_HTML, template: NS_HTML }
843 is_in_scope = function (tag_name, namespace) {
844 if (namespace == null) {
847 return is_in_scope_x(tag_name, standard_scopers, namespace)
849 is_in_button_scope = function (tag_name, namespace) {
850 if (namespace == null) {
853 return is_in_scope_x_y(tag_name, standard_scopers, button_scopers, namespace)
855 is_in_table_scope = function (tag_name, namespace) {
856 if (namespace == null) {
859 return is_in_scope_x(tag_name, table_scopers, namespace)
861 // aka is_in_list_item_scope
862 is_in_li_scope = function (tag_name, namespace) {
863 if (namespace == null) {
866 return is_in_scope_x_y(tag_name, standard_scopers, li_scopers, namespace)
868 is_in_select_scope = function (tag_name, namespace) {
870 if (namespace == null) {
873 for (i = 0; i < open_els.length; ++i) {
875 if (t.name === tag_name && (namespace === null || namespace === t.namespace)) {
878 if (t.namespace !== NS_HTML && t.name !== 'optgroup' && t.name !== 'option') {
884 // this checks for a particular element, not by name
885 // this requires a namespace match
886 el_is_in_scope = function (needle) {
888 for (i = 0; i < open_els.length; ++i) {
893 if (standard_scopers[el.name] === el.namespace) {
900 clear_to_table_stopers = {
905 clear_stack_to_table_context = function () {
907 if (clear_to_table_stopers[open_els[0].name] != null) {
913 clear_to_table_body_stopers = {
920 clear_stack_to_table_body_context = function () {
922 if (clear_to_table_body_stopers[open_els[0].name] === open_els[0].namespace) {
928 clear_to_table_row_stopers = {
933 clear_stack_to_table_row_context = function () {
935 if (clear_to_table_row_stopers[open_els[0].name] != null) {
941 clear_afe_to_marker = function () {
944 if (!(afe.length > 0)) { // this happens in fragment case, ?spec error
948 if (el.type === TYPE_AFE_MARKER) {
955 // http://www.w3.org/TR/html5/syntax.html#reset-the-insertion-mode-appropriately
956 reset_ins_mode = function () {
957 var ancestor, ancestor_i, last, node, node_i
958 // 1. Let last be false.
960 // 2. Let node be the last node in the stack of open elements.
962 node = open_els[node_i]
963 // 3. Loop: If node is the first node in the stack of open elements,
964 // then set last to true, and, if the parser was originally created as
965 // part of the HTML fragment parsing algorithm (fragment case) set node
966 // to the context element.
968 if (node_i === open_els.length - 1) {
970 if (flag_fragment_parsing) {
971 node = context_element
974 // 4. If node is a select element, run these substeps:
975 if (node.name === 'select' && node.namespace === NS_HTML) {
976 // 1. If last is true, jump to the step below labeled done.
978 // 2. Let ancestor be node.
981 // 3. Loop: If ancestor is the first node in the stack of
982 // open elements, jump to the step below labeled done.
984 if (ancestor_i === open_els.length - 1) {
987 // 4. Let ancestor be the node before ancestor in the stack
990 ancestor = open_els[ancestor_i]
991 // 5. If ancestor is a template node, jump to the step below
993 if (ancestor.name === 'template' && ancestor.namespace === NS_HTML) {
996 // 6. If ancestor is a table node, switch the insertion mode
997 // to "in select in table" and abort these steps.
998 if (ancestor.name === 'table' && ancestor.namespace === NS_HTML) {
999 ins_mode = ins_mode_in_select_in_table
1002 // 7. Jump back to the step labeled loop.
1005 // 8. Done: Switch the insertion mode to "in select" and abort
1007 ins_mode = ins_mode_in_select
1010 // 5. If node is a td or th element and last is false, then switch
1011 // the insertion mode to "in cell" and abort these steps.
1012 if ((node.name === 'td' || node.name === 'th') && node.namespace === NS_HTML && last === false) {
1013 ins_mode = ins_mode_in_cell
1016 // 6. If node is a tr element, then switch the insertion mode to "in
1017 // row" and abort these steps.
1018 if (node.name === 'tr' && node.namespace === NS_HTML) {
1019 ins_mode = ins_mode_in_row
1022 // 7. If node is a tbody, thead, or tfoot element, then switch the
1023 // insertion mode to "in table body" and abort these steps.
1024 if ((node.name === 'tbody' || node.name === 'thead' || node.name === 'tfoot') && node.namespace === NS_HTML) {
1025 ins_mode = ins_mode_in_table_body
1028 // 8. If node is a caption element, then switch the insertion mode
1029 // to "in caption" and abort these steps.
1030 if (node.name === 'caption' && node.namespace === NS_HTML) {
1031 ins_mode = ins_mode_in_caption
1034 // 9. If node is a colgroup element, then switch the insertion mode
1035 // to "in column group" and abort these steps.
1036 if (node.name === 'colgroup' && node.namespace === NS_HTML) {
1037 ins_mode = ins_mode_in_column_group
1040 // 10. If node is a table element, then switch the insertion mode to
1041 // "in table" and abort these steps.
1042 if (node.name === 'table' && node.namespace === NS_HTML) {
1043 ins_mode = ins_mode_in_table
1046 // 11. If node is a template element, then switch the insertion mode
1047 // to the current template insertion mode and abort these steps.
1048 if (node.name === 'template' && node.namespace === NS_HTML) {
1049 ins_mode = template_ins_modes[0]
1052 // 12. If node is a head element and last is true, then switch the
1053 // insertion mode to "in body" ("in body"! not "in head"!) and abort
1054 // these steps. (fragment case)
1055 if (node.name === 'head' && node.namespace === NS_HTML && last) {
1056 ins_mode = ins_mode_in_body
1059 // 13. If node is a head element and last is false, then switch the
1060 // insertion mode to "in head" and abort these steps.
1061 if (node.name === 'head' && node.namespace === NS_HTML && last === false) {
1062 ins_mode = ins_mode_in_head
1065 // 14. If node is a body element, then switch the insertion mode to
1066 // "in body" and abort these steps.
1067 if (node.name === 'body' && node.namespace === NS_HTML) {
1068 ins_mode = ins_mode_in_body
1071 // 15. If node is a frameset element, then switch the insertion mode
1072 // to "in frameset" and abort these steps. (fragment case)
1073 if (node.name === 'frameset' && node.namespace === NS_HTML) {
1074 ins_mode = ins_mode_in_frameset
1077 // 16. If node is an html element, run these substeps:
1078 if (node.name === 'html' && node.namespace === NS_HTML) {
1079 // 1. If the head element pointer is null, switch the insertion
1080 // mode to "before head" and abort these steps. (fragment case)
1081 if (head_element_pointer === null) {
1082 ins_mode = ins_mode_before_head
1084 // 2. Otherwise, the head element pointer is not null,
1085 // switch the insertion mode to "after head" and abort these
1087 ins_mode = ins_mode_after_head
1091 // 17. If last is true, then switch the insertion mode to "in body"
1092 // and abort these steps. (fragment case)
1094 ins_mode = ins_mode_in_body
1097 // 18. Let node now be the node before node in the stack of open
1100 node = open_els[node_i]
1101 // 19. Return to the step labeled loop.
1107 // http://www.w3.org/TR/html5/syntax.html#adjusted-current-node
1108 adjusted_current_node = function () {
1109 if (open_els.length === 1 && flag_fragment_parsing) {
1110 return context_element
1115 // http://www.w3.org/TR/html5/syntax.html#reconstruct-the-active-formatting-elements
1116 // this implementation is structured (mostly) as described at the link above.
1117 // capitalized comments are the "labels" described at the link above.
1118 reconstruct_afe = function () {
1120 if (afe.length === 0) {
1123 if (afe[0].type === TYPE_AFE_MARKER || open_els.indexOf(afe[0]) >= 0) {
1129 if (i === afe.length - 1) {
1133 if (afe[i].type === TYPE_AFE_MARKER || open_els.indexOf(afe[i]) >= 0) {
1140 el = insert_html_element(afe[i].token)
1149 // http://www.w3.org/TR/html5/syntax.html#adoption-agency-algorithm
1150 // adoption agency algorithm
1152 // http://www.w3.org/TR/html5/syntax.html#misnested-tags:-b-i-/b-/i
1153 // http://www.w3.org/TR/html5/syntax.html#misnested-tags:-b-p-/b-/p
1154 // http://www.w3.org/TR/html5/syntax.html#unclosed-formatting-elements
1155 adoption_agency = function (subject) {
1156 var aa, ab, ac, ad, ae, af, bookmark, c, ca, dest, el, fb, fb_of_open_els, fe, fe_of_afe, fe_of_open_els, i, in_afe, in_open_els, inner, j, l, last_node, len, len1, len10, len11, len12, len13, len14, len15, len16, len17, len2, len3, len4, len5, len6, len7, len8, len9, m, n, new_node, node, node_above, node_in_afe, node_next, o, outer, q, r, ref, ref1, s, t, u, w, y, z
1157 // this block implements tha W3C spec
1158 // # 1. If the current node is an HTML element whose tag name is subject,
1159 // # then run these substeps:
1161 // # 1. Let element be the current node.
1163 // # 2. Pop element off the stack of open elements.
1165 // # 3. If element is also in the list of active formatting elements,
1166 // # remove the element from the list.
1168 // # 4. Abort the adoption agency algorithm.
1169 // if open_els[0].name is subject and open_els[0].namespace is NS_HTML
1170 // el = open_els.shift()
1171 // # remove it from the list of active formatting elements (if found)
1177 // WHATWG: https://html.spec.whatwg.org/multipage/syntax.html#adoption-agency-algorithm
1178 // If the current node is an HTML element whose tag name is subject, and
1179 // the current node is not in the list of active formatting elements,
1180 // then pop the current node off the stack of open elements, and abort
1182 if (open_els[0].name === subject && open_els[0].namespace === NS_HTML) {
1183 // remove it from the list of active formatting elements (if found)
1185 for (i = 0; i < afe.length; ++i) {
1187 if (el === open_els[0]) {
1205 // 5. Let formatting element be the last element in the list of
1206 // active formatting elements that: is between the end of the list
1207 // and the last scope marker in the list, if any, or the start of
1208 // the list otherwise, and has the tag name subject.
1210 for (fe_of_afe = 0; fe_of_afe < afe.length; ++fe_of_afe) {
1212 if (t.type === TYPE_AFE_MARKER) {
1215 if (t.name === subject) {
1220 // If there is no such element, then abort these steps and instead
1221 // act as described in the "any other end tag" entry above.
1223 in_body_any_other_end_tag(subject)
1226 // 6. If formatting element is not in the stack of open elements,
1227 // then this is a parse error; remove the element from the list, and
1228 // abort these steps.
1230 for (fe_of_open_els = 0; fe_of_open_els < open_els.length; ++fe_of_open_els) {
1231 t = open_els[fe_of_open_els]
1239 // "remove it from the list" must mean afe, since it's not in open_els
1240 afe.splice(fe_of_afe, 1)
1243 // 7. If formatting element is in the stack of open elements, but
1244 // the element is not in scope, then this is a parse error; abort
1246 if (!el_is_in_scope(fe)) {
1250 // 8. If formatting element is not the current node, this is a parse
1251 // error. (But do not abort these steps.)
1252 if (open_els[0] !== fe) {
1256 // 9. Let furthest block be the topmost node in the stack of open
1257 // elements that is lower in the stack than formatting element, and
1258 // is an element in the special category. There might not be one.
1260 fb_of_open_els = null
1261 for (i = 0; i < open_els.length; ++i) {
1266 if (el_is_special(t)) {
1269 // and continue, to see if there's one that's more "topmost"
1272 // 10. If there is no furthest block, then the UA must first pop all
1273 // the nodes from the bottom of the stack of open elements, from the
1274 // current node up to and including formatting element, then remove
1275 // formatting element from the list of active formatting elements,
1276 // and finally abort these steps.
1279 t = open_els.shift()
1281 afe.splice(fe_of_afe, 1)
1286 // 11. Let common ancestor be the element immediately above
1287 // formatting element in the stack of open elements.
1288 ca = open_els[fe_of_open_els + 1] // common ancestor
1290 node_above = open_els[fb_of_open_els + 1] // next node if node isn't in open_els anymore
1291 // 12. Let a bookmark note the position of formatting element in the list of active formatting elements relative to the elements on either side of it in the list.
1292 bookmark = new_aaa_bookmark()
1293 for (i = 0; i < afe.length; ++i) {
1296 afe.splice(i, 0, bookmark)
1300 node = last_node = fb
1304 // 3. Let node be the element immediately above node in the
1305 // stack of open elements, or if node is no longer in the stack
1306 // of open elements (e.g. because it got removed by this
1307 // algorithm), the element that was immediately above node in
1308 // the stack of open elements before node was removed.
1310 for (i = 0; i < open_els.length; ++i) {
1313 node_next = open_els[i + 1]
1317 node = node_next != null ? node_next : node_above
1318 // TODO make sure node_above gets re-set if/when node is removed from open_els
1320 // 4. If node is formatting element, then go to the next step in
1321 // the overall algorithm.
1325 // 5. If inner loop counter is greater than three and node is in
1326 // the list of active formatting elements, then remove node from
1327 // the list of active formatting elements.
1329 for (i = 0; i < afe.length; ++i) {
1340 // 6. If node is not in the list of active formatting elements,
1341 // then remove node from the stack of open elements and then go
1342 // back to the step labeled inner loop.
1344 for (i = 0; i < open_els.length; ++i) {
1347 node_above = open_els[i + 1]
1348 open_els.splice(i, 1)
1354 // 7. create an element for the token for which the element node
1355 // was created, in the HTML namespace, with common ancestor as
1356 // the intended parent; replace the entry for node in the list
1357 // of active formatting elements with an entry for the new
1358 // element, replace the entry for node in the stack of open
1359 // elements with an entry for the new element, and let node be
1361 new_node = token_to_element(node.token, NS_HTML, ca)
1362 for (i = 0; i < afe.length; ++i) {
1369 for (i = 0; i < open_els.length; ++i) {
1372 node_above = open_els[i + 1]
1373 open_els[i] = new_node
1378 // 8. If last node is furthest block, then move the
1379 // aforementioned bookmark to be immediately after the new node
1380 // in the list of active formatting elements.
1381 if (last_node === fb) {
1382 for (i = 0; i < afe.length; ++i) {
1384 if (t === bookmark) {
1389 for (i = 0; i < afe.length; ++i) {
1392 // "after" means lower
1393 afe.splice(i, 0, bookmark) // "after as <-
1398 // 9. Insert last node into node, first removing it from its
1399 // previous parent node if any.
1400 if (last_node.parent != null) {
1401 for (i = 0; i < last_node.parent.children.length; ++i) {
1402 c = last_node.parent.children[i]
1403 if (c === last_node) {
1404 last_node.parent.children.splice(i, 1)
1409 node.children.push(last_node)
1410 last_node.parent = node
1411 // 10. Let last node be node.
1413 // 11. Return to the step labeled inner loop.
1415 // 14. Insert whatever last node ended up being in the previous step
1416 // at the appropriate place for inserting a node, but using common
1417 // ancestor as the override target.
1419 // In the case where fe is immediately followed by fb:
1420 // * inner loop exits out early (node==fe)
1421 // * last_node is fb
1422 // * last_node is still in the tree (not a duplicate)
1423 if (last_node.parent != null) {
1424 for (i = 0; i < last_node.parent.children.length; ++i) {
1425 c = last_node.parent.children[i]
1426 if (c === last_node) {
1427 last_node.parent.children.splice(i, 1)
1432 // can't use standard insert token thing, because it's already in
1433 // open_els and must stay at it's current position in open_els
1434 dest = adjusted_insertion_location(ca)
1435 dest[0].children.splice(dest[1], 0, last_node)
1436 last_node.parent = dest[0]
1437 // 15. Create an element for the token for which formatting element
1438 // was created, in the HTML namespace, with furthest block as the
1440 new_element = token_to_element(fe.token, NS_HTML, fb)
1441 // 16. Take all of the child nodes of furthest block and append them
1442 // to the element created in the last step.
1443 while (fb.children.length) {
1444 t = fb.children.shift()
1445 t.parent = new_element
1446 new_element.children.push(t)
1448 // 17. Append that new element to furthest block.
1449 new_element.parent = fb
1450 fb.children.push(new_element)
1451 // 18. Remove formatting element from the list of active formatting
1452 // elements, and insert the new element into the list of active
1453 // formatting elements at the position of the aforementioned
1455 for (i = 0; i < afe.length; ++i) {
1462 for (i = 0; i < afe.length; ++i) {
1464 if (t === bookmark) {
1465 afe[i] = new_element
1469 // 19. Remove formatting element from the stack of open elements,
1470 // and insert the new element into the stack of open elements
1471 // immediately below the position of furthest block in that stack.
1472 for (i = 0; i < open_els.length; ++i) {
1475 open_els.splice(i, 1)
1479 for (i = 0; i < open_els.length; ++i) {
1482 open_els.splice(i, 0, new_element)
1486 // 20. Jump back to the step labeled outer loop.
1490 // http://www.w3.org/TR/html5/syntax.html#close-a-p-element
1491 close_p_element = function () {
1492 generate_implied_end_tags('p') // arg is exception
1493 if (!(open_els[0].name === 'p' && open_els[0].namespace === NS_HTML)) {
1496 while (open_els.length > 1) { // just in case
1497 el = open_els.shift()
1498 if (el.name === 'p' && el.namespace === NS_HTML) {
1503 close_p_if_in_button_scope = function () {
1504 if (is_in_button_scope('p', NS_HTML)) {
1509 // http://www.w3.org/TR/html5/syntax.html#insert-a-character
1510 // aka insert_a_character = function (t) {
1511 insert_character = function (t) {
1513 dest = adjusted_insertion_location()
1514 // fixfull check for Document node
1516 prev = dest[0].children[dest[1] - 1]
1517 if (prev.type === TYPE_TEXT) {
1522 dest[0].children.splice(dest[1], 0, t)
1526 // 8.2.5 http://www.w3.org/TR/html5/syntax.html#tree-construction
1527 process_token = function (t) {
1529 acn = adjusted_current_node()
1534 if (acn.namespace === NS_HTML) {
1538 if (is_mathml_text_integration_point(acn)) {
1539 if (t.type === TYPE_START_TAG && !(t.name === 'mglyph' || t.name === 'malignmark')) {
1543 if (t.type === TYPE_TEXT) {
1548 if (acn.namespace === NS_MATHML && acn.name === 'annotation-xml' && t.type === TYPE_START_TAG && t.name === 'svg') {
1552 if (is_html_integration(acn)) {
1553 if (t.type === TYPE_START_TAG || t.type === TYPE_TEXT) {
1558 if (t.type === TYPE_EOF) {
1562 in_foreign_content(t)
1566 // http://www.w3.org/TR/html5/syntax.html#creating-and-inserting-nodes
1567 // http://www.w3.org/TR/html5/syntax.html#appropriate-place-for-inserting-a-node
1568 adjusted_insertion_location = function (override_target) {
1569 var c, el, i, j, l, last_table, last_table_i, last_template, last_template_i, len, len1, len2, m, previous_element, ref, target, target_i
1570 // 1. If there was an override target specified, then let target be the
1572 if (override_target != null) {
1573 target = override_target
1574 } else { // Otherwise, let target be the current node.
1575 target = open_els[0]
1577 // 2. Determine the adjusted insertion location using the first matching
1578 // steps from the following list:
1580 // If foster parenting is enabled and target is a table, tbody, tfoot,
1581 // thead, or tr element Foster parenting happens when content is
1582 // misnested in tables.
1583 if (flag_foster_parenting && foster_parenting_targets[target.name] === target.namespace) {
1584 while (true) { // once. this is here so we can ``break`` to "abort these substeps"
1585 // 1. Let last template be the last template element in the
1586 // stack of open elements, if any.
1587 last_template = null
1588 last_template_i = null
1589 for (i = 0; i < open_els.length; ++i) {
1591 if (el.name === 'template' && el.namespace === NS_HTML) {
1597 // 2. Let last table be the last table element in the stack of
1598 // open elements, if any.
1601 for (i = 0; i < open_els.length; ++i) {
1603 if (el.name === 'table' && el.namespace === NS_HTML) {
1609 // 3. If there is a last template and either there is no last
1610 // table, or there is one, but last template is lower (more
1611 // recently added) than last table in the stack of open
1612 // elements, then: let adjusted insertion location be inside
1613 // last template's template contents, after its last child (if
1614 // any), and abort these substeps.
1615 if (last_template && (last_table === null || last_template_i < last_table_i)) {
1616 target = last_template // fixfull should be it's contents
1617 target_i = target.children.length
1620 // 4. If there is no last table, then let adjusted insertion
1621 // location be inside the first element in the stack of open
1622 // elements (the html element), after its last child (if any),
1623 // and abort these substeps. (fragment case)
1624 if (last_table === null) {
1626 target = open_els[open_els.length - 1]
1627 target_i = target.children.length
1630 // 5. If last table has a parent element, then let adjusted
1631 // insertion location be inside last table's parent element,
1632 // immediately before last table, and abort these substeps.
1633 if (last_table.parent != null) {
1634 for (i = 0; i < last_table.parent.children.length; ++i) {
1635 c = last_table.parent.children[i]
1636 if (c === last_table) {
1637 target = last_table.parent
1644 // 6. Let previous element be the element immediately above last
1645 // table in the stack of open elements.
1647 // huh? how could it not have a parent?
1648 previous_element = open_els[last_table_i + 1]
1649 // 7. Let adjusted insertion location be inside previous
1650 // element, after its last child (if any).
1651 target = previous_element
1652 target_i = target.children.length
1653 // Note: These steps are involved in part because it's possible
1654 // for elements, the table element in this case in particular,
1655 // to have been moved by a script around in the DOM, or indeed
1656 // removed from the DOM entirely, after the element was inserted
1658 break // don't really loop
1661 // Otherwise Let adjusted insertion location be inside target, after
1662 // its last child (if any).
1663 target_i = target.children.length
1666 // 3. If the adjusted insertion location is inside a template element,
1667 // let it instead be inside the template element's template contents,
1668 // after its last child (if any).
1669 // fixfull (template)
1671 // 4. Return the adjusted insertion location.
1672 return [target, target_i]
1675 // http://www.w3.org/TR/html5/syntax.html#create-an-element-for-the-token
1676 // aka create_an_element_for_token
1677 token_to_element = function (t, namespace, intended_parent) {
1679 // convert attributes into a hash
1681 for (i = 0; i < t.attrs_a.length; ++i) {
1683 attrs[a[0]] = a[1] // TODO check what to do with dupilcate attrs
1685 el = new Node(TYPE_TAG, {name: t.name, namespace: namespace, attrs: attrs, token: t})
1687 // TODO 2. If the newly created element has an xmlns attribute in the
1688 // XMLNS namespace whose value is not exactly the same as the element's
1689 // namespace, that is a parse error. Similarly, if the newly created
1690 // element has an xmlns:xlink attribute in the XMLNS namespace whose
1691 // value is not the XLink Namespace, that is a parse error.
1693 // fixfull: the spec says stuff about form pointers and ownerDocument
1698 // http://www.w3.org/TR/html5/syntax.html#insert-a-foreign-element
1699 insert_foreign_element = function (token, namespace) {
1700 var ail, ail_el, ail_i, el
1701 ail = adjusted_insertion_location()
1704 el = token_to_element(token, namespace, ail_el)
1705 // TODO skip this next step if it's broken (eg ail_el is document with child already)
1707 ail_el.children.splice(ail_i, 0, el)
1708 open_els.unshift(el)
1711 // http://www.w3.org/TR/html5/syntax.html#insert-an-html-element
1712 insert_html_element = function (token) {
1713 return insert_foreign_element(token, NS_HTML)
1716 // http://www.w3.org/TR/html5/syntax.html#insert-a-comment
1717 // position should be [node, index_within_children]
1718 insert_comment = function (t, position) {
1719 if (position == null) {
1720 position = adjusted_insertion_location()
1722 position[0].children.splice(position[1], 0, t)
1727 // http://www.w3.org/TR/html5/syntax.html#generic-raw-text-element-parsing-algorithm
1728 parse_generic_raw_text = function (t) {
1729 insert_html_element(t)
1730 tok_state = tok_state_rawtext
1731 original_ins_mode = ins_mode
1732 ins_mode = ins_mode_text
1734 parse_generic_rcdata_text = function (t) {
1735 insert_html_element(t)
1736 tok_state = tok_state_rcdata
1737 original_ins_mode = ins_mode
1738 ins_mode = ins_mode_text
1741 // 8.2.5.3 http://www.w3.org/TR/html5/syntax.html#closing-elements-that-have-implied-end-tags
1742 // http://www.w3.org/TR/html5/syntax.html#generate-implied-end-tags
1743 generate_implied_end_tags = function (except) {
1744 if (except == null) {
1747 while (end_tag_implied[open_els[0].name] === open_els[0].namespace && open_els[0].name !== except) {
1752 // 8.2.5.4 The rules for parsing tokens in HTML content
1753 // http://www.w3.org/TR/html5/syntax.html#parsing-main-inhtml
1755 // 8.2.5.4.1 The "initial" insertion mode
1756 // http://www.w3.org/TR/html5/syntax.html#the-initial-insertion-mode
1757 is_quirks_yes_doctype = function (t) {
1759 if (t.flag('force-quirks')) {
1762 if (t.name !== 'html') {
1765 if (t.public_identifier != null) {
1766 pi = t.public_identifier.toLowerCase()
1767 for (i = 0; i < quirks_yes_pi_prefixes.length; ++i) {
1768 p = quirks_yes_pi_prefixes[i]
1769 if (pi.substr(0, p.length) === p) {
1773 if (pi === '-//w3o//dtd w3 html strict 3.0//en//' || pi === '-/w3c/dtd html 4.0 transitional/en' || pi === 'html') {
1777 if (t.system_identifier != null) {
1778 if (t.system_identifier.toLowerCase() === 'http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd') {
1781 } else if (t.public_identifier != null) {
1782 // already did this: pi = t.public_identifier.toLowerCase()
1783 if (pi.substr(0, 32) === '-//w3c//dtd html 4.01 frameset//' || pi.substr(0, 36) === '-//w3c//dtd html 4.01 transitional//') {
1789 is_quirks_limited_doctype = function (t) {
1791 if (t.public_identifier != null) {
1792 pi = t.public_identifier.toLowerCase()
1793 if (pi.substr(0, 32) === '-//w3c//dtd xhtml 1.0 frameset//' || pi.substr(0, 36) === '-//w3c//dtd xhtml 1.0 transitional//') {
1796 if (t.system_identifier != null) {
1797 if (pi.substr(0, 32) === '-//w3c//dtd html 4.01 frameset//' || pi.substr(0, 36) === '-//w3c//dtd html 4.01 transitional//') {
1804 ins_mode_initial = function (t) {
1805 if (is_space_tok(t)) {
1808 if (t.type === TYPE_COMMENT) {
1810 doc.children.push(t)
1813 if (t.type === TYPE_DOCTYPE) {
1814 // fixfull syntax error from first paragraph and following bullets
1815 // fixfull set doc.doctype
1816 // fixfull is the "not an iframe srcdoc" thing relevant?
1817 if (is_quirks_yes_doctype(t)) {
1818 doc.flag('quirks mode', QUIRKS_YES)
1819 } else if (is_quirks_limited_doctype(t)) {
1820 doc.flag('quirks mode', QUIRKS_LIMITED)
1822 doc.children.push(t)
1823 ins_mode = ins_mode_before_html
1827 // fixfull not iframe srcdoc?
1829 doc.flag('quirks mode', QUIRKS_YES)
1830 ins_mode = ins_mode_before_html
1834 // 8.2.5.4.2 http://www.w3.org/TR/html5/syntax.html#the-before-html-insertion-mode
1835 ins_mode_before_html = function (t) {
1836 if (t.type === TYPE_DOCTYPE) {
1840 if (t.type === TYPE_COMMENT) {
1841 doc.children.push(t)
1844 if (is_space_tok(t)) {
1847 if (t.type === TYPE_START_TAG && t.name === 'html') {
1848 el = token_to_element(t, NS_HTML, doc)
1849 doc.children.push(el)
1851 open_els.unshift(el)
1852 // fixfull (big paragraph in spec about manifest, fragment, urls, etc)
1853 ins_mode = ins_mode_before_head
1856 if (t.type === TYPE_END_TAG) {
1857 if (t.name === 'head' || t.name === 'body' || t.name === 'html' || t.name === 'br') {
1858 // fall through to "anything else"
1865 el = token_to_element(new_open_tag('html'), NS_HTML, doc)
1866 doc.children.push(el)
1868 open_els.unshift(el)
1869 // ?fixfull browsing context
1870 ins_mode = ins_mode_before_head
1874 // 8.2.5.4.3 http://www.w3.org/TR/html5/syntax.html#the-before-head-insertion-mode
1875 ins_mode_before_head = function (t) {
1877 if (is_space_tok(t)) {
1880 if (t.type === TYPE_COMMENT) {
1884 if (t.type === TYPE_DOCTYPE) {
1888 if (t.type === TYPE_START_TAG && t.name === 'html') {
1892 if (t.type === TYPE_START_TAG && t.name === 'head') {
1893 el = insert_html_element(t)
1894 head_element_pointer = el
1895 ins_mode = ins_mode_in_head
1898 if (t.type === TYPE_END_TAG) {
1899 if (t.name === 'head' || t.name === 'body' || t.name === 'html' || t.name === 'br') {
1900 // fall through to Anything else below
1907 el = insert_html_element(new_open_tag('head'))
1908 head_element_pointer = el
1909 ins_mode = ins_mode_in_head
1913 // 8.2.5.4.4 http://www.w3.org/TR/html5/syntax.html#parsing-main-inhead
1914 ins_mode_in_head_else = function (t) { // factored out for same-as-spec flow control
1915 open_els.shift() // spec says this will be a 'head' node
1916 ins_mode = ins_mode_after_head
1919 ins_mode_in_head = function (t) {
1921 if (t.type === TYPE_TEXT && (t.text === "\t" || t.text === "\n" || t.text === "\u000c" || t.text === ' ')) {
1925 if (t.type === TYPE_COMMENT) {
1929 if (t.type === TYPE_DOCTYPE) {
1933 if (t.type === TYPE_START_TAG && t.name === 'html') {
1937 if (t.type === TYPE_START_TAG && (t.name === 'base' || t.name === 'basefont' || t.name === 'bgsound' || t.name === 'link')) {
1938 el = insert_html_element(t)
1940 t.acknowledge_self_closing()
1943 if (t.type === TYPE_START_TAG && t.name === 'meta') {
1944 el = insert_html_element(t)
1946 t.acknowledge_self_closing()
1947 // fixfull encoding stuff
1950 if (t.type === TYPE_START_TAG && t.name === 'title') {
1951 parse_generic_rcdata_text(t)
1954 if (t.type === TYPE_START_TAG && ((t.name === 'noscript' && flag_scripting) || t.name === 'noframes' || t.name === 'style')) {
1955 parse_generic_raw_text(t)
1958 if (t.type === TYPE_START_TAG && t.name === 'noscript' && flag_scripting === false) {
1959 insert_html_element(t)
1960 ins_mode = ins_mode_in_head_noscript
1963 if (t.type === TYPE_START_TAG && t.name === 'script') {
1964 ail = adjusted_insertion_location()
1965 el = token_to_element(t, NS_HTML, ail)
1966 el.flag('parser-inserted', true)
1967 // fixfull frament case
1968 ail[0].children.splice(ail[1], 0, el)
1969 open_els.unshift(el)
1970 tok_state = tok_state_script_data
1971 original_ins_mode = ins_mode // make sure orig... is defined
1972 ins_mode = ins_mode_text
1975 if (t.type === TYPE_END_TAG && t.name === 'head') {
1976 open_els.shift() // will be a head element... spec says so
1977 ins_mode = ins_mode_after_head
1980 if (t.type === TYPE_END_TAG && (t.name === 'body' || t.name === 'html' || t.name === 'br')) {
1981 ins_mode_in_head_else(t)
1984 if (t.type === TYPE_START_TAG && t.name === 'template') {
1985 insert_html_element(t)
1987 flag_frameset_ok = false
1988 ins_mode = ins_mode_in_template
1989 template_ins_modes.unshift(ins_mode_in_template)
1992 if (t.type === TYPE_END_TAG && t.name === 'template') {
1993 if (template_tag_is_open()) {
1994 generate_implied_end_tags
1995 if (open_els[0].name !== 'template') {
1999 el = open_els.shift()
2000 if (el.name === 'template' && el.namespace === NS_HTML) {
2004 clear_afe_to_marker()
2005 template_ins_modes.shift()
2012 if ((t.type === TYPE_START_TAG && t.name === 'head') || t.type === TYPE_END_TAG) {
2016 ins_mode_in_head_else(t)
2019 // 8.2.5.4.5 http://www.w3.org/TR/html5/syntax.html#parsing-main-inheadnoscript
2020 ins_mode_in_head_noscript_else = function (t) {
2023 ins_mode = ins_mode_in_head
2026 ins_mode_in_head_noscript = function (t) {
2027 if (t.type === TYPE_DOCTYPE) {
2031 if (t.type === TYPE_START_TAG && t.name === 'html') {
2035 if (t.type === TYPE_END_TAG && t.name === 'noscript') {
2037 ins_mode = ins_mode_in_head
2040 if (is_space_tok(t) || t.type === TYPE_COMMENT || (t.type === TYPE_START_TAG && (t.name === 'basefont' || t.name === 'bgsound' || t.name === 'link' || t.name === 'meta' || t.name === 'noframes' || t.name === 'style'))) {
2044 if (t.type === TYPE_END_TAG && t.name === 'br') {
2045 ins_mode_in_head_noscript_else(t)
2048 if ((t.type === TYPE_START_TAG && (t.name === 'head' || t.name === 'noscript')) || t.type === TYPE_END_TAG) {
2053 ins_mode_in_head_noscript_else(t)
2056 // 8.2.5.4.6 http://www.w3.org/TR/html5/syntax.html#the-after-head-insertion-mode
2057 ins_mode_after_head_else = function (t) {
2059 body_tok = new_open_tag('body')
2060 insert_html_element(body_tok)
2061 ins_mode = ins_mode_in_body
2064 ins_mode_after_head = function (t) {
2066 if (is_space_tok(t)) {
2070 if (t.type === TYPE_COMMENT) {
2074 if (t.type === TYPE_DOCTYPE) {
2078 if (t.type === TYPE_START_TAG && t.name === 'html') {
2082 if (t.type === TYPE_START_TAG && t.name === 'body') {
2083 insert_html_element(t)
2084 flag_frameset_ok = false
2085 ins_mode = ins_mode_in_body
2088 if (t.type === TYPE_START_TAG && t.name === 'frameset') {
2089 insert_html_element(t)
2090 ins_mode = ins_mode_in_frameset
2093 if (t.type === TYPE_START_TAG && (t.name === 'base' || t.name === 'basefont' || t.name === 'bgsound' || t.name === 'link' || t.name === 'meta' || t.name === 'noframes' || t.name === 'script' || t.name === 'style' || t.name === 'template' || t.name === 'title')) {
2095 open_els.unshift(head_element_pointer)
2097 for (i = 0; i < open_els.length; ++i) {
2099 if (el === head_element_pointer) {
2100 open_els.splice(i, 1)
2106 if (t.type === TYPE_END_TAG && t.name === 'template') {
2110 if (t.type === TYPE_END_TAG && (t.name === 'body' || t.name === 'html' || t.name === 'br')) {
2111 ins_mode_after_head_else(t)
2114 if ((t.type === TYPE_START_TAG && t.name === 'head') || t.type === TYPE_END_TAG) {
2119 ins_mode_after_head_else(t)
2122 // 8.2.5.4.7 http://www.w3.org/TR/html5/syntax.html#parsing-main-inbody
2123 in_body_any_other_end_tag = function (name) { // factored out because adoption agency calls it
2127 if (node.name === name && node.namespace === NS_HTML) {
2128 generate_implied_end_tags(name) // arg is exception
2129 if (node !== open_els[0]) {
2133 el = open_els.shift()
2139 if (special_elements[node.name] === node.namespace) {
2143 for (i = 0; i < open_els.length; ++i) {
2146 node = open_els[i + 1]
2152 ins_mode_in_body = function (t) {
2153 var a, aa, ab, ac, el, found, h_in_scope, i, input_el, j, l, len, len1, len10, len11, len12, len13, len14, len2, len3, len4, len5, len6, len7, len8, len9, m, n, node, o, ok_tags, prompt, q, r, ref, ref1, ref2, ref3, ref4, root_attrs, s, second, second_i, u, w, y, z
2154 if (t.type === TYPE_TEXT && t.text === "\u0000") {
2158 if (is_space_tok(t)) {
2163 if (t.type === TYPE_TEXT) {
2166 flag_frameset_ok = false
2169 if (t.type === TYPE_COMMENT) {
2173 if (t.type === TYPE_DOCTYPE) {
2177 if (t.type === TYPE_START_TAG && t.name === 'html') {
2179 if (template_tag_is_open()) {
2182 root_attrs = open_els[open_els.length - 1].attrs
2183 for (i = 0; i < t.attrs_a.length; ++i) {
2185 if (root_attrs[a[0]] == null) {
2186 root_attrs[a[0]] = a[1]
2192 if ((t.type === TYPE_START_TAG && (t.name === 'base' || t.name === 'basefont' || t.name === 'bgsound' || t.name === 'link' || t.name === 'meta' || t.name === 'noframes' || t.name === 'script' || t.name === 'style' || t.name === 'template' || t.name === 'title')) || (t.type === TYPE_END_TAG && t.name === 'template')) {
2196 if (t.type === TYPE_START_TAG && t.name === 'body') {
2198 if (open_els.length < 2) {
2201 second = open_els[open_els.length - 2]
2202 if (second.namespace !== NS_HTML) {
2205 if (second.name !== 'body') {
2208 if (template_tag_is_open()) {
2211 flag_frameset_ok = false
2212 for (i = 0; i < t.attrs_a.length; ++i) {
2214 if (second.attrs[a[0]] == null) {
2215 second.attrs[a[0]] = a[1]
2220 if (t.type === TYPE_START_TAG && t.name === 'frameset') {
2222 if (open_els.length < 2) {
2225 second_i = open_els.length - 2
2226 second = open_els[second_i]
2227 if (second.namespace !== NS_HTML) {
2230 if (second.name !== 'body') {
2233 if (flag_frameset_ok === false) {
2236 if (second.parent != null) {
2237 for (i = 0; i < second.parent.children.length; ++i) {
2238 el = second.parent.children[i]
2239 if (el === second) {
2240 second.parent.children.splice(i, 1)
2245 open_els.splice(second_i, 1)
2246 // pop everything except the "root html element"
2247 while (open_els.length > 1) {
2250 insert_html_element(t)
2251 ins_mode = ins_mode_in_frameset
2254 if (t.type === TYPE_EOF) {
2256 dd: NS_HTML, dt: NS_HTML, li: NS_HTML, p: NS_HTML, tbody: NS_HTML,
2257 td: NS_HTML, tfoot: NS_HTML, th: NS_HTML, thead: NS_HTML,
2258 tr: NS_HTML, body: NS_HTML, html: NS_HTML
2260 for (i = 0; i < open_els.length; ++i) {
2262 if (ok_tags[t.name] !== el.namespace) {
2267 if (template_ins_modes.length > 0) {
2268 ins_mode_in_template(t)
2274 if (t.type === TYPE_END_TAG && t.name === 'body') {
2275 if (!is_in_scope('body', NS_HTML)) {
2280 dd: NS_HTML, dt: NS_HTML, li: NS_HTML, optgroup: NS_HTML,
2281 option: NS_HTML, p: NS_HTML, rb: NS_HTML, rp: NS_HTML, rt: NS_HTML,
2282 rtc: NS_HTML, tbody: NS_HTML, td: NS_HTML, tfoot: NS_HTML,
2283 th: NS_HTML, thead: NS_HTML, tr: NS_HTML, body: NS_HTML,
2286 for (i = 0; i < open_els.length; ++i) {
2288 if (ok_tags[t.name] !== el.namespace) {
2293 ins_mode = ins_mode_after_body
2296 if (t.type === TYPE_END_TAG && t.name === 'html') {
2297 if (!is_in_scope('body', NS_HTML)) {
2302 dd: NS_HTML, dt: NS_HTML, li: NS_HTML, optgroup: NS_HTML,
2303 option: NS_HTML, p: NS_HTML, rb: NS_HTML, rp: NS_HTML, rt: NS_HTML,
2304 rtc: NS_HTML, tbody: NS_HTML, td: NS_HTML, tfoot: NS_HTML,
2305 th: NS_HTML, thead: NS_HTML, tr: NS_HTML, body: NS_HTML,
2308 for (i = 0; i < open_els.length; ++i) {
2310 if (ok_tags[t.name] !== el.namespace) {
2315 ins_mode = ins_mode_after_body
2319 if (t.type === TYPE_START_TAG && (t.name === 'address' || t.name === 'article' || t.name === 'aside' || t.name === 'blockquote' || t.name === 'center' || t.name === 'details' || t.name === 'dialog' || t.name === 'dir' || t.name === 'div' || t.name === 'dl' || t.name === 'fieldset' || t.name === 'figcaption' || t.name === 'figure' || t.name === 'footer' || t.name === 'header' || t.name === 'hgroup' || t.name === 'main' || t.name === 'nav' || t.name === 'ol' || t.name === 'p' || t.name === 'section' || t.name === 'summary' || t.name === 'ul')) {
2320 close_p_if_in_button_scope()
2321 insert_html_element(t)
2324 if (t.type === TYPE_START_TAG && (h_tags[t.name] != null)) { // extra perens because Coffeescript did that
2325 close_p_if_in_button_scope()
2326 if (h_tags[open_els[0].name] === open_els[0].namespace) {
2330 insert_html_element(t)
2333 if (t.type === TYPE_START_TAG && (t.name === 'pre' || t.name === 'listing')) {
2334 close_p_if_in_button_scope()
2335 insert_html_element(t)
2336 eat_next_token_if_newline()
2337 flag_frameset_ok = false
2340 if (t.type === TYPE_START_TAG && t.name === 'form') {
2341 if (!(form_element_pointer === null || template_tag_is_open())) {
2345 close_p_if_in_button_scope()
2346 el = insert_html_element(t)
2347 if (!template_tag_is_open()) {
2348 form_element_pointer = el
2352 if (t.type === TYPE_START_TAG && t.name === 'li') {
2353 flag_frameset_ok = false
2354 for (i = 0; i < open_els.length; ++i) {
2356 if (node.name === 'li' && node.namespace === NS_HTML) {
2357 generate_implied_end_tags('li') // arg is exception
2358 if (open_els[0].name !== 'li' || open_els[0].namespace !== NS_HTML) {
2362 el = open_els.shift()
2363 if (el.name === 'li' && el.namespace === NS_HTML) {
2369 if (el_is_special_not_adp(node)) {
2373 close_p_if_in_button_scope()
2374 insert_html_element(t)
2377 if (t.type === TYPE_START_TAG && (t.name === 'dd' || t.name === 'dt')) {
2378 flag_frameset_ok = false
2379 for (i = 0; i < open_els.length; ++i) {
2381 if (node.name === 'dd' && node.namespace === NS_HTML) {
2382 generate_implied_end_tags('dd') // arg is exception
2383 if (open_els[0].name !== 'dd' || open_els[0].namespace !== NS_HTML) {
2387 el = open_els.shift()
2388 if (el.name === 'dd' && el.namespace === NS_HTML) {
2394 if (node.name === 'dt' && node.namespace === NS_HTML) {
2395 generate_implied_end_tags('dt') // arg is exception
2396 if (open_els[0].name !== 'dt' || open_els[0].namespace !== NS_HTML) {
2400 el = open_els.shift()
2401 if (el.name === 'dt' && el.namespace === NS_HTML) {
2407 if (el_is_special_not_adp(node)) {
2411 close_p_if_in_button_scope()
2412 insert_html_element(t)
2415 if (t.type === TYPE_START_TAG && t.name === 'plaintext') {
2416 close_p_if_in_button_scope()
2417 insert_html_element(t)
2418 tok_state = tok_state_plaintext
2421 if (t.type === TYPE_START_TAG && t.name === 'button') {
2422 if (is_in_scope('button', NS_HTML)) {
2424 generate_implied_end_tags()
2426 el = open_els.shift()
2427 if (el.name === 'button' && el.namespace === NS_HTML) {
2433 insert_html_element(t)
2434 flag_frameset_ok = false
2437 if (t.type === TYPE_END_TAG && (t.name === 'address' || t.name === 'article' || t.name === 'aside' || t.name === 'blockquote' || t.name === 'button' || t.name === 'center' || t.name === 'details' || t.name === 'dialog' || t.name === 'dir' || t.name === 'div' || t.name === 'dl' || t.name === 'fieldset' || t.name === 'figcaption' || t.name === 'figure' || t.name === 'footer' || t.name === 'header' || t.name === 'hgroup' || t.name === 'listing' || t.name === 'main' || t.name === 'nav' || t.name === 'ol' || t.name === 'pre' || t.name === 'section' || t.name === 'summary' || t.name === 'ul')) {
2438 if (!is_in_scope(t.name, NS_HTML)) {
2442 generate_implied_end_tags()
2443 if (!(open_els[0].name === t.name && open_els[0].namespace === NS_HTML)) {
2447 el = open_els.shift()
2448 if (el.name === t.name && el.namespace === NS_HTML) {
2454 if (t.type === TYPE_END_TAG && t.name === 'form') {
2455 if (!template_tag_is_open()) {
2456 node = form_element_pointer
2457 form_element_pointer = null
2458 if (node === null || !el_is_in_scope(node)) {
2462 generate_implied_end_tags()
2463 if (open_els[0] !== node) {
2466 for (i = 0; i < open_els.length; ++i) {
2469 open_els.splice(i, 1)
2474 if (!is_in_scope('form', NS_HTML)) {
2478 generate_implied_end_tags()
2479 if (open_els[0].name !== 'form' || open_els[0].namespace !== NS_HTML) {
2483 el = open_els.shift()
2484 if (el.name === 'form' && el.namespace === NS_HTML) {
2491 if (t.type === TYPE_END_TAG && t.name === 'p') {
2492 if (!is_in_button_scope('p', NS_HTML)) {
2494 insert_html_element(new_open_tag('p'))
2499 if (t.type === TYPE_END_TAG && t.name === 'li') {
2500 if (!is_in_li_scope('li', NS_HTML)) {
2504 generate_implied_end_tags('li') // arg is exception
2505 if (open_els[0].name !== 'li' || open_els[0].namespace !== NS_HTML) {
2509 el = open_els.shift()
2510 if (el.name === 'li' && el.namespace === NS_HTML) {
2516 if (t.type === TYPE_END_TAG && (t.name === 'dd' || t.name === 'dt')) {
2517 if (!is_in_scope(t.name, NS_HTML)) {
2521 generate_implied_end_tags(t.name) // arg is exception
2522 if (open_els[0].name !== t.name || open_els[0].namespace !== NS_HTML) {
2526 el = open_els.shift()
2527 if (el.name === t.name && el.namespace === NS_HTML) {
2533 if (t.type === TYPE_END_TAG && (h_tags[t.name] != null)) { // extra perens because Coffeescript did
2535 for (i = 0; i < open_els.length; ++i) {
2537 if (h_tags[el.name] === el.namespace) {
2541 if (standard_scopers[el.name] === el.namespace) {
2549 generate_implied_end_tags()
2550 if (open_els[0].name !== t.name || open_els[0].namespace !== NS_HTML) {
2554 el = open_els.shift()
2555 if (h_tags[el.name] === el.namespace) {
2562 if (t.type === TYPE_START_TAG && t.name === 'a') {
2563 // If the list of active formatting elements contains an a element
2564 // between the end of the list and the last marker on the list (or
2565 // the start of the list if there is no marker on the list), then
2566 // this is a parse error; run the adoption agency algorithm for the
2567 // tag name "a", then remove that element from the list of active
2568 // formatting elements and the stack of open elements if the
2569 // adoption agency algorithm didn't already remove it (it might not
2570 // have if the element is not in table scope).
2572 for (i = 0; i < afe.length; ++i) {
2574 if (el.type === TYPE_AFE_MARKER) {
2577 if (el.name === 'a' && el.namespace === NS_HTML) {
2581 if (found != null) {
2583 adoption_agency('a')
2584 for (i = 0; i < afe.length; ++i) {
2590 for (i = 0; i < open_els.length; ++i) {
2593 open_els.splice(i, 1)
2598 el = insert_html_element(t)
2602 if (t.type === TYPE_START_TAG && (t.name === 'b' || t.name === 'big' || t.name === 'code' || t.name === 'em' || t.name === 'font' || t.name === 'i' || t.name === 's' || t.name === 'small' || t.name === 'strike' || t.name === 'strong' || t.name === 'tt' || t.name === 'u')) {
2604 el = insert_html_element(t)
2608 if (t.type === TYPE_START_TAG && t.name === 'nobr') {
2610 if (is_in_scope('nobr', NS_HTML)) {
2612 adoption_agency('nobr')
2615 el = insert_html_element(t)
2619 if (t.type === TYPE_END_TAG && (t.name === 'a' || t.name === 'b' || t.name === 'big' || t.name === 'code' || t.name === 'em' || t.name === 'font' || t.name === 'i' || t.name === 'nobr' || t.name === 's' || t.name === 'small' || t.name === 'strike' || t.name === 'strong' || t.name === 'tt' || t.name === 'u')) {
2620 adoption_agency(t.name)
2623 if (t.type === TYPE_START_TAG && (t.name === 'applet' || t.name === 'marquee' || t.name === 'object')) {
2625 insert_html_element(t)
2627 flag_frameset_ok = false
2630 if (t.type === TYPE_END_TAG && (t.name === 'applet' || t.name === 'marquee' || t.name === 'object')) {
2631 if (!is_in_scope(t.name, NS_HTML)) {
2635 generate_implied_end_tags()
2636 if (open_els[0].name !== t.name || open_els[0].namespace !== NS_HTML) {
2640 el = open_els.shift()
2641 if (el.name === t.name && el.namespace === NS_HTML) {
2645 clear_afe_to_marker()
2648 if (t.type === TYPE_START_TAG && t.name === 'table') {
2649 if (doc.flag('quirks mode') !== QUIRKS_YES) {
2650 close_p_if_in_button_scope() // test
2652 insert_html_element(t)
2653 flag_frameset_ok = false
2654 ins_mode = ins_mode_in_table
2657 if (t.type === TYPE_END_TAG && t.name === 'br') {
2659 // W3C: t.type = TYPE_START_TAG
2660 t = new_open_tag('br') // WHATWG
2663 if (t.type === TYPE_START_TAG && (t.name === 'area' || t.name === 'br' || t.name === 'embed' || t.name === 'img' || t.name === 'keygen' || t.name === 'wbr')) {
2665 insert_html_element(t)
2667 t.acknowledge_self_closing()
2668 flag_frameset_ok = false
2671 if (t.type === TYPE_START_TAG && t.name === 'input') {
2673 insert_html_element(t)
2675 t.acknowledge_self_closing()
2676 if (!is_input_hidden_tok(t)) {
2677 flag_frameset_ok = false
2681 if (t.type === TYPE_START_TAG && (t.name === 'menuitem' || t.name === 'param' || t.name === 'source' || t.name === 'track')) {
2682 // WHATWG adds 'menuitem' for this block
2683 insert_html_element(t)
2685 t.acknowledge_self_closing()
2688 if (t.type === TYPE_START_TAG && t.name === 'hr') {
2689 close_p_if_in_button_scope()
2690 insert_html_element(t)
2692 t.acknowledge_self_closing()
2693 flag_frameset_ok = false
2696 if (t.type === TYPE_START_TAG && t.name === 'image') {
2702 if (t.type === TYPE_START_TAG && t.name === 'isindex') {
2704 if (template_tag_is_open() === false && form_element_pointer !== null) {
2707 t.acknowledge_self_closing()
2708 flag_frameset_ok = false
2709 close_p_if_in_button_scope()
2710 el = insert_html_element(new_open_tag('form'))
2711 if (!template_tag_is_open()) {
2712 form_element_pointer = el
2714 for (i = 0; i < t.attrs_a.length; ++i) {
2716 if (a[0] === 'action') {
2717 el.attrs['action'] = a[1]
2721 insert_html_element(new_open_tag('hr'))
2724 insert_html_element(new_open_tag('label'))
2725 // note: this is a little out-of-spec-order so we only have to scan t.attrs_a once
2726 input_el = new_open_tag('input')
2728 for (i = 0; i < t.attrs_a.length; ++i) {
2730 if (a[0] === 'prompt') {
2733 if (a[0] !== 'name' && a[0] !== 'action' && a[0] !== 'prompt') {
2734 input_el.attrs_a.push([a[0], a[1]])
2737 input_el.attrs_a.push(['name', 'isindex'])
2738 // fixfull this next bit is in english... internationalize?
2739 if (prompt == null) {
2740 prompt = "This is a searchable index. Enter search keywords: "
2742 insert_character(new_character_token(prompt)) // fixfull split
2743 // TODO submit typo "balue" in spec
2744 insert_html_element(input_el)
2746 // insert_character('') // you can put chars here if prompt attr missing
2748 insert_html_element(new_open_tag('hr'))
2751 if (!template_tag_is_open()) {
2752 form_element_pointer = null
2756 if (t.type === TYPE_START_TAG && t.name === 'textarea') {
2757 insert_html_element(t)
2758 eat_next_token_if_newline()
2759 tok_state = tok_state_rcdata
2760 original_ins_mode = ins_mode
2761 flag_frameset_ok = false
2762 ins_mode = ins_mode_text
2765 if (t.type === TYPE_START_TAG && t.name === 'xmp') {
2766 close_p_if_in_button_scope()
2768 flag_frameset_ok = false
2769 parse_generic_raw_text(t)
2772 if (t.type === TYPE_START_TAG && t.name === 'iframe') {
2773 flag_frameset_ok = false
2774 parse_generic_raw_text(t)
2777 if (t.type === TYPE_START_TAG && (t.name === 'noembed' || (t.name === 'noscript' && flag_scripting))) {
2778 parse_generic_raw_text(t)
2781 if (t.type === TYPE_START_TAG && t.name === 'select') {
2783 insert_html_element(t)
2784 flag_frameset_ok = false
2785 if (ins_mode === ins_mode_in_table || ins_mode === ins_mode_in_caption || ins_mode === ins_mode_in_table_body || ins_mode === ins_mode_in_row || ins_mode === ins_mode_in_cell) {
2786 ins_mode = ins_mode_in_select_in_table
2788 ins_mode = ins_mode_in_select
2792 if (t.type === TYPE_START_TAG && (t.name === 'optgroup' || t.name === 'option')) {
2793 if (open_els[0].name === 'option' && open_els[0].namespace === NS_HTML) {
2797 insert_html_element(t)
2800 // this comment block implements the W3C spec
2801 // if t.type === TYPE_START_TAG && (t.name === 'rb' || t.name === 'rp' || t.name === 'rtc')
2802 // if is_in_scope 'ruby', NS_HTML
2803 // generate_implied_end_tags()
2804 // unless open_els[0].name === 'ruby' && open_els[0].namespace === NS_HTML
2806 // insert_html_element t
2808 // if t.type === TYPE_START_TAG && t.name === 'rt'
2809 // if is_in_scope 'ruby', NS_HTML
2810 // generate_implied_end_tags 'rtc' // arg === exception
2811 // unless (open_els[0].name === 'ruby' || open_els[0].name === 'rtc') && open_els[0].namespace === NS_HTML
2813 // insert_html_element t
2815 // below implements the WHATWG spec https://html.spec.whatwg.org/multipage/syntax.html#parsing-main-inbody
2816 if (t.type === TYPE_START_TAG && (t.name === 'rb' || t.name === 'rtc')) {
2817 if (is_in_scope('ruby', NS_HTML)) {
2818 generate_implied_end_tags()
2819 if (!(open_els[0].name === 'ruby' && open_els[0].namespace === NS_HTML)) {
2823 insert_html_element(t)
2826 if (t.type === TYPE_START_TAG && (t.name === 'rp' || t.name === 'rt')) {
2827 if (is_in_scope('ruby', NS_HTML)) {
2828 generate_implied_end_tags('rtc')
2829 if (!((open_els[0].name === 'ruby' || open_els[0].name === 'rtc') && open_els[0].namespace === NS_HTML)) {
2833 insert_html_element(t)
2837 if (t.type === TYPE_START_TAG && t.name === 'math') {
2839 adjust_mathml_attributes(t)
2840 adjust_foreign_attributes(t)
2841 insert_foreign_element(t, NS_MATHML)
2842 if (t.flag('self-closing')) {
2844 t.acknowledge_self_closing()
2848 if (t.type === TYPE_START_TAG && t.name === 'svg') {
2850 adjust_svg_attributes(t)
2851 adjust_foreign_attributes(t)
2852 insert_foreign_element(t, NS_SVG)
2853 if (t.flag('self-closing')) {
2855 t.acknowledge_self_closing()
2859 if (t.type === TYPE_START_TAG && (t.name === 'caption' || t.name === 'col' || t.name === 'colgroup' || t.name === 'frame' || t.name === 'head' || t.name === 'tbody' || t.name === 'td' || t.name === 'tfoot' || t.name === 'th' || t.name === 'thead' || t.name === 'tr')) {
2863 if (t.type === TYPE_START_TAG) { // any other start tag
2865 insert_html_element(t)
2868 if (t.type === TYPE_END_TAG) { // any other end tag
2869 in_body_any_other_end_tag(t.name)
2874 // 8.2.5.4.8 http://www.w3.org/TR/html5/syntax.html#parsing-main-incdata
2875 ins_mode_text = function (t) {
2876 if (t.type === TYPE_TEXT) {
2880 if (t.type === TYPE_EOF) {
2882 if (open_els[0].name === 'script' && open_els[0].namespace === NS_HTML) {
2883 open_els[0].flag('already started', true)
2886 ins_mode = original_ins_mode
2890 if (t.type === TYPE_END_TAG && t.name === 'script') {
2892 ins_mode = original_ins_mode
2893 // fixfull the spec seems to assume that I'm going to run the script
2894 // http://www.w3.org/TR/html5/syntax.html#scriptEndTag
2897 if (t.type === TYPE_END_TAG) {
2899 ins_mode = original_ins_mode
2904 // the functions below implement the tokenizer stats described here:
2905 // http://www.w3.org/TR/html5/syntax.html#tokenization
2907 // 8.2.5.4.9 http://www.w3.org/TR/html5/syntax.html#parsing-main-intable
2908 ins_mode_in_table_else = function (t) {
2910 flag_foster_parenting = true
2912 flag_foster_parenting = false
2914 ins_mode_in_table = function (t) {
2918 if ((open_els[0].name === 'table' || open_els[0].name === 'tbody' || open_els[0].name === 'tfoot' || open_els[0].name === 'thead' || open_els[0].name === 'tr') && open_els[0].namespace === NS_HTML) {
2919 pending_table_character_tokens = []
2920 original_ins_mode = ins_mode
2921 ins_mode = ins_mode_in_table_text
2924 ins_mode_in_table_else(t)
2933 case TYPE_START_TAG:
2936 clear_stack_to_table_context()
2938 insert_html_element(t)
2939 ins_mode = ins_mode_in_caption
2942 clear_stack_to_table_context()
2943 insert_html_element(t)
2944 ins_mode = ins_mode_in_column_group
2947 clear_stack_to_table_context()
2948 insert_html_element(new_open_tag('colgroup'))
2949 ins_mode = ins_mode_in_column_group
2955 clear_stack_to_table_context()
2956 insert_html_element(t)
2957 ins_mode = ins_mode_in_table_body
2962 clear_stack_to_table_context()
2963 insert_html_element(new_open_tag('tbody'))
2964 ins_mode = ins_mode_in_table_body
2969 if (is_in_table_scope('table', NS_HTML)) {
2971 el = open_els.shift()
2972 if (el.name === 'table' && el.namespace === NS_HTML) {
2986 if (!is_input_hidden_tok(t)) {
2987 ins_mode_in_table_else(t)
2990 el = insert_html_element(t)
2992 t.acknowledge_self_closing()
2997 if (form_element_pointer != null) {
3000 if (template_tag_is_open()) {
3003 form_element_pointer = insert_html_element(t)
3007 ins_mode_in_table_else(t)
3013 if (is_in_table_scope('table', NS_HTML)) {
3015 el = open_els.shift()
3016 if (el.name === 'table' && el.namespace === NS_HTML) {
3042 ins_mode_in_table_else(t)
3049 ins_mode_in_table_else(t)
3053 // 8.2.5.4.10 http://www.w3.org/TR/html5/syntax.html#parsing-main-intabletext
3054 ins_mode_in_table_text = function (t) {
3055 var all_space, i, l, m, old
3056 if (t.type === TYPE_TEXT && t.text === "\u0000") {
3061 if (t.type === TYPE_TEXT) {
3062 pending_table_character_tokens.push(t)
3067 for (i = 0; i < pending_table_character_tokens.length; ++i) {
3068 old = pending_table_character_tokens[i]
3069 if (!is_space_tok(old)) {
3075 for (i = 0; i < pending_table_character_tokens.length; ++i) {
3076 old = pending_table_character_tokens[i]
3077 insert_character(old)
3080 for (i = 0; i < pending_table_character_tokens.length; ++i) {
3081 old = pending_table_character_tokens[i]
3082 ins_mode_in_table_else(old)
3085 pending_table_character_tokens = []
3086 ins_mode = original_ins_mode
3090 // 8.2.5.4.11 http://www.w3.org/TR/html5/syntax.html#parsing-main-incaption
3091 ins_mode_in_caption = function (t) {
3093 if (t.type === TYPE_END_TAG && t.name === 'caption') {
3094 if (is_in_table_scope('caption', NS_HTML)) {
3095 generate_implied_end_tags()
3096 if (open_els[0].name !== 'caption') {
3100 el = open_els.shift()
3101 if (el.name === 'caption' && el.namespace === NS_HTML) {
3105 clear_afe_to_marker()
3106 ins_mode = ins_mode_in_table
3113 if ((t.type === TYPE_START_TAG && (t.name === 'caption' || t.name === 'col' || t.name === 'colgroup' || t.name === 'tbody' || t.name === 'td' || t.name === 'tfoot' || t.name === 'th' || t.name === 'thead' || t.name === 'tr')) || t.type === TYPE_END_TAG && t.name === 'table') {
3115 if (is_in_table_scope('caption', NS_HTML)) {
3117 el = open_els.shift()
3118 if (el.name === 'caption' && el.namespace === NS_HTML) {
3122 clear_afe_to_marker()
3123 ins_mode = ins_mode_in_table
3126 // else fragment case
3129 if (t.type === TYPE_END_TAG && (t.name === 'body' || t.name === 'col' || t.name === 'colgroup' || t.name === 'html' || t.name === 'tbody' || t.name === 'td' || t.name === 'tfoot' || t.name === 'th' || t.name === 'thead' || t.name === 'tr')) {
3137 // 8.2.5.4.12 http://www.w3.org/TR/html5/syntax.html#parsing-main-incolgroup
3138 ins_mode_in_column_group = function (t) {
3140 if (is_space_tok(t)) {
3144 if (t.type === TYPE_COMMENT) {
3148 if (t.type === TYPE_DOCTYPE) {
3152 if (t.type === TYPE_START_TAG && t.name === 'html') {
3156 if (t.type === TYPE_START_TAG && t.name === 'col') {
3157 el = insert_html_element(t)
3159 t.acknowledge_self_closing()
3162 if (t.type === TYPE_END_TAG && t.name === 'colgroup') {
3163 if (open_els[0].name === 'colgroup' && open_els.namespace === NS_HTML) {
3165 ins_mode = ins_mode_in_table
3171 if (t.type === TYPE_END_TAG && t.name === 'col') {
3175 if ((t.type === TYPE_START_TAG || t.type === TYPE_END_TAG) && t.name === 'template') {
3179 if (t.type === TYPE_EOF) {
3184 if (open_els[0].name !== 'colgroup') {
3189 ins_mode = ins_mode_in_table
3193 // 8.2.5.4.13 http://www.w3.org/TR/html5/syntax.html#parsing-main-intbody
3194 ins_mode_in_table_body = function (t) {
3196 if (t.type === TYPE_START_TAG && t.name === 'tr') {
3197 clear_stack_to_table_body_context()
3198 insert_html_element(t)
3199 ins_mode = ins_mode_in_row
3202 if (t.type === TYPE_START_TAG && (t.name === 'th' || t.name === 'td')) {
3204 clear_stack_to_table_body_context()
3205 insert_html_element(new_open_tag('tr'))
3206 ins_mode = ins_mode_in_row
3210 if (t.type === TYPE_END_TAG && (t.name === 'tbody' || t.name === 'tfoot' || t.name === 'thead')) {
3211 if (!is_in_table_scope(t.name, NS_HTML)) {
3215 clear_stack_to_table_body_context()
3217 ins_mode = ins_mode_in_table
3220 if ((t.type === TYPE_START_TAG && (t.name === 'caption' || t.name === 'col' || t.name === 'colgroup' || t.name === 'tbody' || t.name === 'tfoot' || t.name === 'thead')) || (t.type === TYPE_END_TAG && t.name === 'table')) {
3222 for (i = 0; i < open_els.length; ++i) {
3224 if (el.namespace === NS_HTML && (el.name === 'tbody' || el.name === 'tfoot' || el.name === 'thead')) {
3228 if (table_scopers[el.name] === el.namespace) {
3236 clear_stack_to_table_body_context()
3238 ins_mode = ins_mode_in_table
3242 if (t.type === TYPE_END_TAG && (t.name === 'body' || t.name === 'caption' || t.name === 'col' || t.name === 'colgroup' || t.name === 'html' || t.name === 'td' || t.name === 'th' || t.name === 'tr')) {
3247 ins_mode_in_table(t)
3250 // 8.2.5.4.14 http://www.w3.org/TR/html5/syntax.html#parsing-main-intr
3251 ins_mode_in_row = function (t) {
3252 if (t.type === TYPE_START_TAG && (t.name === 'th' || t.name === 'td')) {
3253 clear_stack_to_table_row_context()
3254 insert_html_element(t)
3255 ins_mode = ins_mode_in_cell
3259 if (t.type === TYPE_END_TAG && t.name === 'tr') {
3260 if (is_in_table_scope('tr', NS_HTML)) {
3261 clear_stack_to_table_row_context()
3263 ins_mode = ins_mode_in_table_body
3269 if ((t.type === TYPE_START_TAG && (t.name === 'caption' || t.name === 'col' || t.name === 'colgroup' || t.name === 'tbody' || t.name === 'tfoot' || t.name === 'thead' || t.name === 'tr')) || t.type === TYPE_END_TAG && t.name === 'table') {
3270 if (is_in_table_scope('tr', NS_HTML)) {
3271 clear_stack_to_table_row_context()
3273 ins_mode = ins_mode_in_table_body
3280 if (t.type === TYPE_END_TAG && (t.name === 'tbody' || t.name === 'tfoot' || t.name === 'thead')) {
3281 if (is_in_table_scope(t.name, NS_HTML)) {
3282 if (is_in_table_scope('tr', NS_HTML)) {
3283 clear_stack_to_table_row_context()
3285 ins_mode = ins_mode_in_table_body
3293 if (t.type === TYPE_END_TAG && (t.name === 'body' || t.name === 'caption' || t.name === 'col' || t.name === 'colgroup' || t.name === 'html' || t.name === 'td' || t.name === 'th')) {
3298 ins_mode_in_table(t)
3301 // http://www.w3.org/TR/html5/syntax.html#close-the-cell
3302 close_the_cell = function () {
3304 generate_implied_end_tags()
3305 if (!((open_els[0].name === 'td' || open_els[0] === 'th') && open_els[0].namespace === NS_HTML)) {
3309 el = open_els.shift()
3310 if (el.namespace === NS_HTML && (el.name === 'td' || el.name === 'th')) {
3314 clear_afe_to_marker()
3315 ins_mode = ins_mode_in_row
3318 // 8.2.5.4.15 http://www.w3.org/TR/html5/syntax.html#parsing-main-intd
3319 ins_mode_in_cell = function (t) {
3321 if (t.type === TYPE_END_TAG && (t.name === 'td' || t.name === 'th')) {
3322 if (is_in_table_scope(t.name, NS_HTML)) {
3323 generate_implied_end_tags()
3324 if (!((open_els[0].name === t.name) && open_els[0].namespace === NS_HTML)) {
3328 el = open_els.shift()
3329 if (el.name === t.name && el.namespace === NS_HTML) {
3333 clear_afe_to_marker()
3334 ins_mode = ins_mode_in_row
3340 if (t.type === TYPE_START_TAG && (t.name === 'caption' || t.name === 'col' || t.name === 'colgroup' || t.name === 'tbody' || t.name === 'td' || t.name === 'tfoot' || t.name === 'th' || t.name === 'thead' || t.name === 'tr')) {
3342 for (i = 0; i < open_els.length; ++i) {
3344 if (el.namespace === NS_HTML && (el.name === 'td' || el.name === 'th')) {
3348 if (table_scopers[el.name] === el.namespace) {
3360 if (t.type === TYPE_END_TAG && (t.name === 'body' || t.name === 'caption' || t.name === 'col' || t.name === 'colgroup' || t.name === 'html')) {
3364 if (t.type === TYPE_END_TAG && (t.name === 'table' || t.name === 'tbody' || t.name === 'tfoot' || t.name === 'thead' || t.name === 'tr')) {
3365 if (is_in_table_scope(t.name, NS_HTML)) {
3377 // 8.2.5.4.16 http://www.w3.org/TR/html5/syntax.html#parsing-main-inselect
3378 ins_mode_in_select = function (t) {
3380 if (t.type === TYPE_TEXT && t.text === "\u0000") {
3384 if (t.type === TYPE_TEXT) {
3388 if (t.type === TYPE_COMMENT) {
3392 if (t.type === TYPE_DOCTYPE) {
3396 if (t.type === TYPE_START_TAG && t.name === 'html') {
3400 if (t.type === TYPE_START_TAG && t.name === 'option') {
3401 if (open_els[0].name === 'option' && open_els[0].namespace === NS_HTML) {
3404 insert_html_element(t)
3407 if (t.type === TYPE_START_TAG && t.name === 'optgroup') {
3408 if (open_els[0].name === 'option' && open_els[0].namespace === NS_HTML) {
3411 if (open_els[0].name === 'optgroup' && open_els[0].namespace === NS_HTML) {
3414 insert_html_element(t)
3417 if (t.type === TYPE_END_TAG && t.name === 'optgroup') {
3418 if (open_els[0].name === 'option' && open_els[0].namespace === NS_HTML) {
3419 if (open_els[1].name === 'optgroup' && open_els[0].namespace === NS_HTML) {
3423 if (open_els[0].name === 'optgroup' && open_els[0].namespace === NS_HTML) {
3430 if (t.type === TYPE_END_TAG && t.name === 'option') {
3431 if (open_els[0].name === 'option' && open_els[0].namespace === NS_HTML) {
3438 if (t.type === TYPE_END_TAG && t.name === 'select') {
3439 if (is_in_select_scope('select', NS_HTML)) {
3441 el = open_els.shift()
3442 if (el.name === 'select' && el.namespace === NS_HTML) {
3452 if (t.type === TYPE_START_TAG && t.name === 'select') {
3455 el = open_els.shift()
3456 if (el.name === 'select' && el.namespace === NS_HTML) {
3461 // spec says that this is the same as </select> but it doesn't say
3462 // to check scope first
3465 if (t.type === TYPE_START_TAG && (t.name === 'input' || t.name === 'keygen' || t.name === 'textarea')) {
3467 if (!is_in_select_scope('select', NS_HTML)) {
3471 el = open_els.shift()
3472 if (el.name === 'select' && el.namespace === NS_HTML) {
3480 if (t.type === TYPE_START_TAG && (t.name === 'script' || t.name === 'template')) {
3484 if (t.type === TYPE_EOF) {
3492 // 8.2.5.4.17 http://www.w3.org/TR/html5/syntax.html#parsing-main-inselectintable
3493 ins_mode_in_select_in_table = function (t) {
3495 if (t.type === TYPE_START_TAG && (t.name === 'caption' || t.name === 'table' || t.name === 'tbody' || t.name === 'tfoot' || t.name === 'thead' || t.name === 'tr' || t.name === 'td' || t.name === 'th')) {
3498 el = open_els.shift()
3499 if (el.name === 'select' && el.namespace === NS_HTML) {
3507 if (t.type === TYPE_END_TAG && (t.name === 'caption' || t.name === 'table' || t.name === 'tbody' || t.name === 'tfoot' || t.name === 'thead' || t.name === 'tr' || t.name === 'td' || t.name === 'th')) {
3509 if (!is_in_table_scope(t.name, NS_HTML)) {
3513 el = open_els.shift()
3514 if (el.name === 'select' && el.namespace === NS_HTML) {
3523 ins_mode_in_select(t)
3526 // 8.2.5.4.18 http://www.w3.org/TR/html5/syntax.html#parsing-main-intemplate
3527 ins_mode_in_template = function (t) {
3529 if (t.type === TYPE_TEXT || t.type === TYPE_COMMENT || t.type === TYPE_DOCTYPE) {
3533 if ((t.type === TYPE_START_TAG && (t.name === 'base' || t.name === 'basefont' || t.name === 'bgsound' || t.name === 'link' || t.name === 'meta' || t.name === 'noframes' || t.name === 'script' || t.name === 'style' || t.name === 'template' || t.name === 'title')) || (t.type === TYPE_END_TAG && t.name === 'template')) {
3537 if (t.type === TYPE_START_TAG && (t.name === 'caption' || t.name === 'colgroup' || t.name === 'tbody' || t.name === 'tfoot' || t.name === 'thead')) {
3538 template_ins_modes.shift()
3539 template_ins_modes.unshift(ins_mode_in_table)
3540 ins_mode = ins_mode_in_table
3544 if (t.type === TYPE_START_TAG && t.name === 'col') {
3545 template_ins_modes.shift()
3546 template_ins_modes.unshift(ins_mode_in_column_group)
3547 ins_mode = ins_mode_in_column_group
3551 if (t.type === TYPE_START_TAG && t.name === 'tr') {
3552 template_ins_modes.shift()
3553 template_ins_modes.unshift(ins_mode_in_table_body)
3554 ins_mode = ins_mode_in_table_body
3558 if (t.type === TYPE_START_TAG && (t.name === 'td' || t.name === 'th')) {
3559 template_ins_modes.shift()
3560 template_ins_modes.unshift(ins_mode_in_row)
3561 ins_mode = ins_mode_in_row
3565 if (t.type === TYPE_START_TAG) {
3566 template_ins_modes.shift()
3567 template_ins_modes.unshift(ins_mode_in_body)
3568 ins_mode = ins_mode_in_body
3572 if (t.type === TYPE_END_TAG) {
3576 if (t.type === TYPE_EOF) {
3577 if (!template_tag_is_open()) {
3583 el = open_els.shift()
3584 if (el.name === 'template' && el.namespace === NS_HTML) {
3588 clear_afe_to_marker()
3589 template_ins_modes.shift()
3595 // 8.2.5.4.19 http://www.w3.org/TR/html5/syntax.html#parsing-main-afterbody
3596 ins_mode_after_body = function (t) {
3598 if (is_space_tok(t)) {
3602 if (t.type === TYPE_COMMENT) {
3603 first = open_els[open_els.length - 1]
3604 insert_comment(t, [first, first.children.length])
3607 if (t.type === TYPE_DOCTYPE) {
3611 if (t.type === TYPE_START_TAG && t.name === 'html') {
3615 if (t.type === TYPE_END_TAG && t.name === 'html') {
3616 if (flag_fragment_parsing) {
3620 ins_mode = ins_mode_after_after_body
3623 if (t.type === TYPE_EOF) {
3629 ins_mode = ins_mode_in_body
3633 // 8.2.5.4.20 http://www.w3.org/TR/html5/syntax.html#parsing-main-inframeset
3634 ins_mode_in_frameset = function (t) {
3635 if (is_space_tok(t)) {
3639 if (t.type === TYPE_COMMENT) {
3643 if (t.type === TYPE_DOCTYPE) {
3647 if (t.type === TYPE_START_TAG && t.name === 'html') {
3651 if (t.type === TYPE_START_TAG && t.name === 'frameset') {
3652 insert_html_element(t)
3655 if (t.type === TYPE_END_TAG && t.name === 'frameset') {
3656 if (open_els.length === 1) {
3658 return // fragment case
3661 if (flag_fragment_parsing === false && open_els[0].name !== 'frameset') {
3662 ins_mode = ins_mode_after_frameset
3666 if (t.type === TYPE_START_TAG && t.name === 'frame') {
3667 insert_html_element(t)
3669 t.acknowledge_self_closing()
3672 if (t.type === TYPE_START_TAG && t.name === 'noframes') {
3676 if (t.type === TYPE_EOF) {
3677 if (open_els.length !== 1) {
3687 // 8.2.5.4.21 http://www.w3.org/TR/html5/syntax.html#parsing-main-afterframeset
3688 ins_mode_after_frameset = function (t) {
3689 if (is_space_tok(t)) {
3693 if (t.type === TYPE_COMMENT) {
3697 if (t.type === TYPE_DOCTYPE) {
3701 if (t.type === TYPE_START_TAG && t.name === 'html') {
3705 if (t.type === TYPE_END_TAG && t.name === 'html') {
3706 ins_mode = ins_mode_after_after_frameset
3709 if (t.type === TYPE_START_TAG && t.name === 'noframes') {
3713 if (t.type === TYPE_EOF) {
3721 // 8.2.5.4.22 http://www.w3.org/TR/html5/syntax.html#the-after-after-body-insertion-mode
3722 ins_mode_after_after_body = function (t) {
3723 if (t.type === TYPE_COMMENT) {
3724 insert_comment(t, [doc, doc.children.length])
3727 if (t.type === TYPE_DOCTYPE || is_space_tok(t) || (t.type === TYPE_START_TAG && t.name === 'html')) {
3731 if (t.type === TYPE_EOF) {
3737 ins_mode = ins_mode_in_body
3741 // 8.2.5.4.23 http://www.w3.org/TR/html5/syntax.html#the-after-after-frameset-insertion-mode
3742 ins_mode_after_after_frameset = function (t) {
3743 if (t.type === TYPE_COMMENT) {
3744 insert_comment(t, [doc, doc.children.length])
3747 if (t.type === TYPE_DOCTYPE || is_space_tok(t) || (t.type === TYPE_START_TAG && t.name === 'html')) {
3751 if (t.type === TYPE_EOF) {
3755 if (t.type === TYPE_START_TAG && t.name === 'noframes') {
3764 // 8.2.5.5 http://www.w3.org/TR/html5/syntax.html#parsing-main-inforeign
3765 has_color_face_or_size = function (t) {
3767 for (i = 0; i < t.attrs_a.length; ++i) {
3769 if (a[0] === 'color' || a[0] === 'face' || a[0] === 'size') {
3775 in_foreign_content_end_script = function () {
3779 in_foreign_content_other_start = function (t) {
3781 acn = adjusted_current_node()
3782 if (acn.namespace === NS_MATHML) {
3783 adjust_mathml_attributes(t)
3785 if (acn.namespace === NS_SVG && (svg_name_fixes[t.name] != null)) { // extra perens because Coffeescript did
3786 t.name = svg_name_fixes[t.name]
3788 if (acn.namespace === NS_SVG) {
3789 adjust_svg_attributes(t)
3791 adjust_foreign_attributes(t)
3792 insert_foreign_element(t, acn.namespace)
3793 if (t.flag('self-closing')) {
3794 if (t.name === 'script') {
3795 t.acknowledge_self_closing()
3796 in_foreign_content_end_script()
3800 t.acknowledge_self_closing()
3804 in_foreign_content = function (t) {
3806 if (t.type === TYPE_TEXT && t.text === "\u0000") {
3808 insert_character(new_character_token("\ufffd"))
3811 if (is_space_tok(t)) {
3815 if (t.type === TYPE_TEXT) {
3816 flag_frameset_ok = false
3820 if (t.type === TYPE_COMMENT) {
3824 if (t.type === TYPE_DOCTYPE) {
3828 if (t.type === TYPE_START_TAG && (t.name === 'b' || t.name === 'big' || t.name === 'blockquote' || t.name === 'body' || t.name === 'br' || t.name === 'center' || t.name === 'code' || t.name === 'dd' || t.name === 'div' || t.name === 'dl' || t.name === 'dt' || t.name === 'em' || t.name === 'embed' || t.name === 'h1' || t.name === 'h2' || t.name === 'h3' || t.name === 'h4' || t.name === 'h5' || t.name === 'h6' || t.name === 'head' || t.name === 'hr' || t.name === 'i' || t.name === 'img' || t.name === 'li' || t.name === 'listing' || t.name === 'main' || t.name === 'meta' || t.name === 'nobr' || t.name === 'ol' || t.name === 'p' || t.name === 'pre' || t.name === 'ruby' || t.name === 's' || t.name === 'small' || t.name === 'span' || t.name === 'strong' || t.name === 'strike' || t.name === 'sub' || t.name === 'sup' || t.name === 'table' || t.name === 'tt' || t.name === 'u' || t.name === 'ul' || t.name === 'var' || (t.name === 'font' && has_color_face_or_size(t)))) {
3830 if (flag_fragment_parsing) {
3831 in_foreign_content_other_start(t)
3834 while (true) { // is this safe?
3836 if (is_mathml_text_integration_point(open_els[0]) || is_html_integration(open_els[0]) || open_els[0].namespace === NS_HTML) {
3843 if (t.type === TYPE_START_TAG) {
3844 in_foreign_content_other_start(t)
3847 if (t.type === TYPE_END_TAG && t.name === 'script' && open_els[0].name === 'script' && open_els[0].namespace === NS_SVG) {
3848 in_foreign_content_end_script()
3851 if (t.type === TYPE_END_TAG) {
3854 if (node.name.toLowerCase() !== t.name) {
3858 if (node === open_els[open_els.length - 1]) {
3861 if (node.name.toLowerCase() === t.name) {
3863 el = open_els.shift()
3871 if (node.namespace === NS_HTML) {
3875 ins_mode(t) // explicitly call HTML insertion mode
3880 // 8.2.4.1 http://www.w3.org/TR/html5/syntax.html#data-state
3881 tok_state_data = function () {
3883 switch (c = txt.charAt(cur++)) {
3885 return new_text_node(parse_character_reference())
3888 tok_state = tok_state_tag_open
3892 return new_text_node(c)
3895 return new_eof_token()
3898 return new_text_node(c)
3903 // 8.2.4.2 http://www.w3.org/TR/html5/syntax.html#character-reference-in-data-state
3904 // not needed: tok_state_character_reference_in_data = function () {
3905 // just call parse_character_reference()
3907 // 8.2.4.3 http://www.w3.org/TR/html5/syntax.html#rcdata-state
3908 tok_state_rcdata = function () {
3910 switch (c = txt.charAt(cur++)) {
3912 return new_text_node(parse_character_reference())
3915 tok_state = tok_state_rcdata_less_than_sign
3919 return new_character_token("\ufffd")
3922 return new_eof_token()
3925 return new_character_token(c)
3930 // 8.2.4.4 http://www.w3.org/TR/html5/syntax.html#character-reference-in-rcdata-state
3931 // not needed: tok_state_character_reference_in_rcdata = function () {
3932 // just call parse_character_reference()
3934 // 8.2.4.5 http://www.w3.org/TR/html5/syntax.html#rawtext-state
3935 tok_state_rawtext = function () {
3937 switch (c = txt.charAt(cur++)) {
3939 tok_state = tok_state_rawtext_less_than_sign
3943 return new_character_token("\ufffd")
3946 return new_eof_token()
3949 return new_character_token(c)
3954 // 8.2.4.6 http://www.w3.org/TR/html5/syntax.html#script-data-state
3955 tok_state_script_data = function () {
3957 switch (c = txt.charAt(cur++)) {
3959 tok_state = tok_state_script_data_less_than_sign
3963 return new_character_token("\ufffd")
3966 return new_eof_token()
3969 return new_character_token(c)
3974 // 8.2.4.7 http://www.w3.org/TR/html5/syntax.html#plaintext-state
3975 tok_state_plaintext = function () {
3977 switch (c = txt.charAt(cur++)) {
3980 return new_character_token("\ufffd")
3983 return new_eof_token()
3986 return new_character_token(c)
3991 // 8.2.4.8 http://www.w3.org/TR/html5/syntax.html#tag-open-state
3992 tok_state_tag_open = function () {
3994 c = txt.charAt(cur++)
3996 tok_state = tok_state_markup_declaration_open
4000 tok_state = tok_state_end_tag_open
4003 if (is_uc_alpha(c)) {
4004 tok_cur_tag = new_open_tag(c.toLowerCase())
4005 tok_state = tok_state_tag_name
4008 if (is_lc_alpha(c)) {
4009 tok_cur_tag = new_open_tag(c)
4010 tok_state = tok_state_tag_name
4015 tok_cur_tag = new_comment_token('?') // FIXME right?
4016 tok_state = tok_state_bogus_comment
4021 tok_state = tok_state_data
4022 cur -= 1 // we didn't parse/handle the char after <
4023 return new_text_node('<')
4026 // 8.2.4.9 http://www.w3.org/TR/html5/syntax.html#end-tag-open-state
4027 tok_state_end_tag_open = function () {
4029 c = txt.charAt(cur++)
4030 if (is_uc_alpha(c)) {
4031 tok_cur_tag = new_end_tag(c.toLowerCase())
4032 tok_state = tok_state_tag_name
4035 if (is_lc_alpha(c)) {
4036 tok_cur_tag = new_end_tag(c)
4037 tok_state = tok_state_tag_name
4042 tok_state = tok_state_data
4045 if (c === '') { // EOF
4047 tok_state = tok_state_data
4048 return new_text_node('</')
4052 tok_cur_tag = new_comment_token(c)
4053 tok_state = tok_state_bogus_comment
4057 // 8.2.4.10 http://www.w3.org/TR/html5/syntax.html#tag-name-state
4058 tok_state_tag_name = function () {
4060 switch (c = txt.charAt(cur++)) {
4065 tok_state = tok_state_before_attribute_name
4068 tok_state = tok_state_self_closing_start_tag
4071 tok_state = tok_state_data
4078 tok_cur_tag.name += "\ufffd"
4082 tok_state = tok_state_data
4085 if (is_uc_alpha(c)) {
4086 tok_cur_tag.name += c.toLowerCase()
4088 tok_cur_tag.name += c
4094 // 8.2.4.11 http://www.w3.org/TR/html5/syntax.html#rcdata-less-than-sign-state
4095 tok_state_rcdata_less_than_sign = function () {
4097 c = txt.charAt(cur++)
4099 temporary_buffer = ''
4100 tok_state = tok_state_rcdata_end_tag_open
4104 tok_state = tok_state_rcdata
4105 cur -= 1 // reconsume the input character
4106 return new_character_token('<')
4109 // 8.2.4.12 http://www.w3.org/TR/html5/syntax.html#rcdata-end-tag-open-state
4110 tok_state_rcdata_end_tag_open = function () {
4112 c = txt.charAt(cur++)
4113 if (is_uc_alpha(c)) {
4114 tok_cur_tag = new_end_tag(c.toLowerCase())
4115 temporary_buffer += c
4116 tok_state = tok_state_rcdata_end_tag_name
4119 if (is_lc_alpha(c)) {
4120 tok_cur_tag = new_end_tag(c)
4121 temporary_buffer += c
4122 tok_state = tok_state_rcdata_end_tag_name
4126 tok_state = tok_state_rcdata
4127 cur -= 1 // reconsume the input character
4128 return new_character_token("</") // fixfull separate these
4131 // http://www.w3.org/TR/html5/syntax.html#appropriate-end-tag-token
4132 is_appropriate_end_tag = function (t) {
4133 // fixfull: this assumes that open_els[0].name is "the tag name of the last
4134 // start tag to have been emitted from this tokenizer"
4135 return t.type === TYPE_END_TAG && t.name === open_els[0].name
4138 // 8.2.4.13 http://www.w3.org/TR/html5/syntax.html#rcdata-end-tag-name-state
4139 tok_state_rcdata_end_tag_name = function () {
4141 c = txt.charAt(cur++)
4142 if (c === "\t" || c === "\n" || c === "\u000c" || c === ' ') {
4143 if (is_appropriate_end_tag(tok_cur_tag)) {
4144 tok_state = tok_state_before_attribute_name
4147 // else fall through to "Anything else"
4150 if (is_appropriate_end_tag(tok_cur_tag)) {
4151 tok_state = tok_state_self_closing_start_tag // FIXME spec typo?
4154 // else fall through to "Anything else"
4157 if (is_appropriate_end_tag(tok_cur_tag)) {
4158 tok_state = tok_state_data
4161 // else fall through to "Anything else"
4163 if (is_uc_alpha(c)) {
4164 tok_cur_tag.name += c.toLowerCase()
4165 temporary_buffer += c
4168 if (is_lc_alpha(c)) {
4169 tok_cur_tag.name += c
4170 temporary_buffer += c
4174 tok_state = tok_state_rcdata
4175 cur -= 1 // reconsume the input character
4176 return new_character_token('</' + temporary_buffer) // fixfull separate these
4179 // 8.2.4.14 http://www.w3.org/TR/html5/syntax.html#rawtext-less-than-sign-state
4180 tok_state_rawtext_less_than_sign = function () {
4182 c = txt.charAt(cur++)
4184 temporary_buffer = ''
4185 tok_state = tok_state_rawtext_end_tag_open
4189 tok_state = tok_state_rawtext
4190 cur -= 1 // reconsume the input character
4191 return new_character_token('<')
4194 // 8.2.4.15 http://www.w3.org/TR/html5/syntax.html#rawtext-end-tag-open-state
4195 tok_state_rawtext_end_tag_open = function () {
4196 c = txt.charAt(cur++)
4197 if (is_uc_alpha(c)) {
4198 tok_cur_tag = new_end_tag(c.toLowerCase())
4199 temporary_buffer += c
4200 tok_state = tok_state_rawtext_end_tag_name
4203 if (is_lc_alpha(c)) {
4204 tok_cur_tag = new_end_tag(c)
4205 temporary_buffer += c
4206 tok_state = tok_state_rawtext_end_tag_name
4210 tok_state = tok_state_rawtext
4211 cur -= 1 // reconsume the input character
4212 return new_character_token("</") // fixfull separate these
4215 // 8.2.4.16 http://www.w3.org/TR/html5/syntax.html#rawtext-end-tag-name-state
4216 tok_state_rawtext_end_tag_name = function () {
4218 c = txt.charAt(cur++)
4219 if (c === "\t" || c === "\n" || c === "\u000c" || c === ' ') {
4220 if (is_appropriate_end_tag(tok_cur_tag)) {
4221 tok_state = tok_state_before_attribute_name
4224 // else fall through to "Anything else"
4227 if (is_appropriate_end_tag(tok_cur_tag)) {
4228 tok_state = tok_state_self_closing_start_tag
4231 // else fall through to "Anything else"
4234 if (is_appropriate_end_tag(tok_cur_tag)) {
4235 tok_state = tok_state_data
4238 // else fall through to "Anything else"
4240 if (is_uc_alpha(c)) {
4241 tok_cur_tag.name += c.toLowerCase()
4242 temporary_buffer += c
4245 if (is_lc_alpha(c)) {
4246 tok_cur_tag.name += c
4247 temporary_buffer += c
4251 tok_state = tok_state_rawtext
4252 cur -= 1 // reconsume the input character
4253 return new_character_token('</' + temporary_buffer) // fixfull separate these
4256 // 8.2.4.17 http://www.w3.org/TR/html5/syntax.html#script-data-less-than-sign-state
4257 tok_state_script_data_less_than_sign = function () {
4259 c = txt.charAt(cur++)
4261 temporary_buffer = ''
4262 tok_state = tok_state_script_data_end_tag_open
4266 tok_state = tok_state_script_data_escape_start
4267 return new_character_token('<!') // fixfull split
4270 tok_state = tok_state_script_data
4271 cur -= 1 // reconsume
4272 return new_character_token('<')
4275 // 8.2.4.18 http://www.w3.org/TR/html5/syntax.html#script-data-end-tag-open-state
4276 tok_state_script_data_end_tag_open = function () {
4278 c = txt.charAt(cur++)
4279 if (is_uc_alpha(c)) {
4280 tok_cur_tag = new_end_tag(c.toLowerCase())
4281 temporary_buffer += c
4282 tok_state = tok_state_script_data_end_tag_name
4285 if (is_lc_alpha(c)) {
4286 tok_cur_tag = new_end_tag(c)
4287 temporary_buffer += c
4288 tok_state = tok_state_script_data_end_tag_name
4292 tok_state = tok_state_script_data
4293 cur -= 1 // reconsume
4294 return new_character_token('</')
4297 // 8.2.4.19 http://www.w3.org/TR/html5/syntax.html#script-data-end-tag-open-state
4298 tok_state_script_data_end_tag_name = function () {
4300 c = txt.charAt(cur++)
4301 if (c === "\t" || c === "\n" || c === "\u000c" || c === ' ') {
4302 if (is_appropriate_end_tag(tok_cur_tag)) {
4303 tok_state = tok_state_before_attribute_name
4309 if (is_appropriate_end_tag(tok_cur_tag)) {
4310 tok_state = tok_state_self_closing_start_tag
4316 if (is_appropriate_end_tag(tok_cur_tag)) {
4317 tok_state = tok_state_data
4322 if (is_uc_alpha(c)) {
4323 tok_cur_tag.name += c.toLowerCase()
4324 temporary_buffer += c
4327 if (is_lc_alpha(c)) {
4328 tok_cur_tag.name += c
4329 temporary_buffer += c
4333 tok_state = tok_state_script_data
4334 cur -= 1 // reconsume
4335 return new_character_token("</" + temporary_buffer) // fixfull split
4338 // 8.2.4.20 http://www.w3.org/TR/html5/syntax.html#script-data-escape-start-state
4339 tok_state_script_data_escape_start = function () {
4341 c = txt.charAt(cur++)
4343 tok_state = tok_state_script_data_escape_start_dash
4344 return new_character_token('-')
4347 tok_state = tok_state_script_data
4348 cur -= 1 // reconsume
4351 // 8.2.4.21 http://www.w3.org/TR/html5/syntax.html#script-data-escape-start-dash-state
4352 tok_state_script_data_escape_start_dash = function () {
4354 c = txt.charAt(cur++)
4356 tok_state = tok_state_script_data_escaped_dash_dash
4357 return new_character_token('-')
4360 tok_state = tok_state_script_data
4361 cur -= 1 // reconsume
4364 // 8.2.4.22 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-state
4365 tok_state_script_data_escaped = function () {
4367 c = txt.charAt(cur++)
4369 tok_state = tok_state_script_data_escaped_dash
4370 return new_character_token('-')
4373 tok_state = tok_state_script_data_escaped_less_than_sign
4376 if (c === "\u0000") {
4378 return new_character_token("\ufffd")
4380 if (c === '') { // EOF
4381 tok_state = tok_state_data
4383 cur -= 1 // reconsume
4387 return new_character_token(c)
4390 // 8.2.4.23 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-dash-state
4391 tok_state_script_data_escaped_dash = function () {
4393 c = txt.charAt(cur++)
4395 tok_state = tok_state_script_data_escaped_dash_dash
4396 return new_character_token('-')
4399 tok_state = tok_state_script_data_escaped_less_than_sign
4402 if (c === "\u0000") {
4404 tok_state = tok_state_script_data_escaped
4405 return new_character_token("\ufffd")
4407 if (c === '') { // EOF
4408 tok_state = tok_state_data
4410 cur -= 1 // reconsume
4414 tok_state = tok_state_script_data_escaped
4415 return new_character_token(c)
4418 // 8.2.4.24 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-dash-dash-state
4419 tok_state_script_data_escaped_dash_dash = function () {
4421 c = txt.charAt(cur++)
4423 return new_character_token('-')
4426 tok_state = tok_state_script_data_escaped_less_than_sign
4430 tok_state = tok_state_script_data
4431 return new_character_token('>')
4433 if (c === "\u0000") {
4435 tok_state = tok_state_script_data_escaped
4436 return new_character_token("\ufffd")
4438 if (c === '') { // EOF
4440 tok_state = tok_state_data
4441 cur -= 1 // reconsume
4445 tok_state = tok_state_script_data_escaped
4446 return new_character_token(c)
4449 // 8.2.4.25 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-less-than-sign-state
4450 tok_state_script_data_escaped_less_than_sign = function () {
4452 c = txt.charAt(cur++)
4454 temporary_buffer = ''
4455 tok_state = tok_state_script_data_escaped_end_tag_open
4458 if (is_uc_alpha(c)) {
4459 temporary_buffer = c.toLowerCase() // yes, really
4460 tok_state = tok_state_script_data_double_escape_start
4461 return new_character_token("<" + c) // fixfull split
4463 if (is_lc_alpha(c)) {
4464 temporary_buffer = c
4465 tok_state = tok_state_script_data_double_escape_start
4466 return new_character_token("<" + c) // fixfull split
4469 tok_state = tok_state_script_data_escaped
4470 cur -= 1 // reconsume
4471 return new_character_token('<')
4474 // 8.2.4.26 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-end-tag-open-state
4475 tok_state_script_data_escaped_end_tag_open = function () {
4477 c = txt.charAt(cur++)
4478 if (is_uc_alpha(c)) {
4479 tok_cur_tag = new_end_tag(c.toLowerCase())
4480 temporary_buffer += c
4481 tok_state = tok_state_script_data_escaped_end_tag_name
4484 if (is_lc_alpha(c)) {
4485 tok_cur_tag = new_end_tag(c)
4486 temporary_buffer += c
4487 tok_state = tok_state_script_data_escaped_end_tag_name
4491 tok_state = tok_state_script_data_escaped
4492 cur -= 1 // reconsume
4493 return new_character_token('</') // fixfull split
4496 // 8.2.4.27 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-end-tag-name-state
4497 tok_state_script_data_escaped_end_tag_name = function () {
4499 c = txt.charAt(cur++)
4500 if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ') {
4501 if (is_appropriate_end_tag(tok_cur_tag)) {
4502 tok_state = tok_state_before_attribute_name
4508 if (is_appropriate_end_tag(tok_cur_tag)) {
4509 tok_state = tok_state_self_closing_start_tag
4515 if (is_appropriate_end_tag(tok_cur_tag)) {
4516 tok_state = tok_state_data
4521 if (is_uc_alpha(c)) {
4522 tok_cur_tag.name += c.toLowerCase()
4523 temporary_buffer += c.toLowerCase()
4526 if (is_lc_alpha(c)) {
4527 tok_cur_tag.name += c
4528 temporary_buffer += c.toLowerCase()
4532 tok_state = tok_state_script_data_escaped
4533 cur -= 1 // reconsume
4534 return new_character_token("</" + temporary_buffer) // fixfull split
4537 // 8.2.4.28 http://www.w3.org/TR/html5/syntax.html#script-data-double-escape-start-state
4538 tok_state_script_data_double_escape_start = function () {
4540 c = txt.charAt(cur++)
4541 if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ' || c === '/' || c === '>') {
4542 if (temporary_buffer === 'script') {
4543 tok_state = tok_state_script_data_double_escaped
4545 tok_state = tok_state_script_data_escaped
4547 return new_character_token(c)
4549 if (is_uc_alpha(c)) {
4550 temporary_buffer += c.toLowerCase() // yes, really lowercase
4551 return new_character_token(c)
4553 if (is_lc_alpha(c)) {
4554 temporary_buffer += c
4555 return new_character_token(c)
4558 tok_state = tok_state_script_data_escaped
4559 cur -= 1 // reconsume
4562 // 8.2.4.29 http://www.w3.org/TR/html5/syntax.html#script-data-double-escaped-state
4563 tok_state_script_data_double_escaped = function () {
4565 c = txt.charAt(cur++)
4567 tok_state = tok_state_script_data_double_escaped_dash
4568 return new_character_token('-')
4571 tok_state = tok_state_script_data_double_escaped_less_than_sign
4572 return new_character_token('<')
4574 if (c === "\u0000") {
4576 return new_character_token("\ufffd")
4578 if (c === '') { // EOF
4580 tok_state = tok_state_data
4581 cur -= 1 // reconsume
4585 return new_character_token(c)
4588 // 8.2.4.30 http://www.w3.org/TR/html5/syntax.html#script-data-double-escaped-dash-state
4589 tok_state_script_data_double_escaped_dash = function () {
4591 c = txt.charAt(cur++)
4593 tok_state = tok_state_script_data_double_escaped_dash_dash
4594 return new_character_token('-')
4597 tok_state = tok_state_script_data_double_escaped_less_than_sign
4598 return new_character_token('<')
4600 if (c === "\u0000") {
4602 tok_state = tok_state_script_data_double_escaped
4603 return new_character_token("\ufffd")
4605 if (c === '') { // EOF
4607 tok_state = tok_state_data
4608 cur -= 1 // reconsume
4612 tok_state = tok_state_script_data_double_escaped
4613 return new_character_token(c)
4616 // 8.2.4.31 http://www.w3.org/TR/html5/syntax.html#script-data-double-escaped-dash-dash-state
4617 tok_state_script_data_double_escaped_dash_dash = function () {
4619 c = txt.charAt(cur++)
4621 return new_character_token('-')
4624 tok_state = tok_state_script_data_double_escaped_less_than_sign
4625 return new_character_token('<')
4628 tok_state = tok_state_script_data
4629 return new_character_token('>')
4631 if (c === "\u0000") {
4633 tok_state = tok_state_script_data_double_escaped
4634 return new_character_token("\ufffd")
4636 if (c === '') { // EOF
4638 tok_state = tok_state_data
4639 cur -= 1 // reconsume
4643 tok_state = tok_state_script_data_double_escaped
4644 return new_character_token(c)
4647 // 8.2.4.32 http://www.w3.org/TR/html5/syntax.html#script-data-double-escaped-less-than-sign-state
4648 tok_state_script_data_double_escaped_less_than_sign = function () {
4650 c = txt.charAt(cur++)
4652 temporary_buffer = ''
4653 tok_state = tok_state_script_data_double_escape_end
4654 return new_character_token('/')
4657 tok_state = tok_state_script_data_double_escaped
4658 cur -= 1 // reconsume
4661 // 8.2.4.33 http://www.w3.org/TR/html5/syntax.html#script-data-double-escape-end-state
4662 tok_state_script_data_double_escape_end = function () {
4664 c = txt.charAt(cur++)
4665 if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ' || c === '/' || c === '>') {
4666 if (temporary_buffer === 'script') {
4667 tok_state = tok_state_script_data_escaped
4669 tok_state = tok_state_script_data_double_escaped
4671 return new_character_token(c)
4673 if (is_uc_alpha(c)) {
4674 temporary_buffer += c.toLowerCase() // yes, really lowercase
4675 return new_character_token(c)
4677 if (is_lc_alpha(c)) {
4678 temporary_buffer += c
4679 return new_character_token(c)
4682 tok_state = tok_state_script_data_double_escaped
4683 cur -= 1 // reconsume
4686 // 8.2.4.34 http://www.w3.org/TR/html5/syntax.html#before-attribute-name-state
4687 tok_state_before_attribute_name = function () {
4688 var attr_name, c, tmp
4690 switch (c = txt.charAt(cur++)) {
4698 tok_state = tok_state_self_closing_start_tag
4702 tok_state = tok_state_data
4709 attr_name = "\ufffd"
4720 tok_state = tok_state_data
4723 if (is_uc_alpha(c)) {
4724 attr_name = c.toLowerCase()
4729 if (attr_name != null) {
4730 tok_cur_tag.attrs_a.unshift([attr_name, ''])
4731 tok_state = tok_state_attribute_name
4736 // 8.2.4.35 http://www.w3.org/TR/html5/syntax.html#attribute-name-state
4737 tok_state_attribute_name = function () {
4739 switch (c = txt.charAt(cur++)) {
4744 tok_state = tok_state_after_attribute_name
4747 tok_state = tok_state_self_closing_start_tag
4750 tok_state = tok_state_before_attribute_value
4753 tok_state = tok_state_data
4760 tok_cur_tag.attrs_a[0][0] += "\ufffd"
4766 tok_cur_tag.attrs_a[0][0] += c
4770 tok_state = tok_state_data
4773 if (is_uc_alpha(c)) {
4774 tok_cur_tag.attrs_a[0][0] += c.toLowerCase()
4776 tok_cur_tag.attrs_a[0][0] += c
4782 // 8.2.4.36 http://www.w3.org/TR/html5/syntax.html#after-attribute-name-state
4783 tok_state_after_attribute_name = function () {
4785 c = txt.charAt(cur++)
4786 if (c === "\t" || c === "\n" || c === "\u000c" || c === ' ') {
4790 tok_state = tok_state_self_closing_start_tag
4794 tok_state = tok_state_before_attribute_value
4798 tok_state = tok_state_data
4801 if (is_uc_alpha(c)) {
4802 tok_cur_tag.attrs_a.unshift([c.toLowerCase(), ''])
4803 tok_state = tok_state_attribute_name
4806 if (c === "\u0000") {
4808 tok_cur_tag.attrs_a.unshift(["\ufffd", ''])
4809 tok_state = tok_state_attribute_name
4812 if (c === '') { // EOF
4814 tok_state = tok_state_data
4815 cur -= 1 // reconsume
4818 if (c === '"' || c === "'" || c === '<') {
4820 // fall through to Anything else
4823 tok_cur_tag.attrs_a.unshift([c, ''])
4824 tok_state = tok_state_attribute_name
4827 // 8.2.4.37 http://www.w3.org/TR/html5/syntax.html#before-attribute-value-state
4828 tok_state_before_attribute_value = function () {
4830 switch (c = txt.charAt(cur++)) {
4838 tok_state = tok_state_attribute_value_double_quoted
4841 tok_state = tok_state_attribute_value_unquoted
4845 tok_state = tok_state_attribute_value_single_quoted
4849 tok_cur_tag.attrs_a[0][1] += "\ufffd"
4850 tok_state = tok_state_attribute_value_unquoted
4854 tok_state = tok_state_data
4861 tok_state = tok_state_data
4864 tok_cur_tag.attrs_a[0][1] += c
4865 tok_state = tok_state_attribute_value_unquoted
4870 // 8.2.4.38 http://www.w3.org/TR/html5/syntax.html#attribute-value-(double-quoted)-state
4871 tok_state_attribute_value_double_quoted = function () {
4873 switch (c = txt.charAt(cur++)) {
4875 tok_state = tok_state_after_attribute_value_quoted
4878 tok_cur_tag.attrs_a[0][1] += parse_character_reference('"', true)
4882 tok_cur_tag.attrs_a[0][1] += "\ufffd"
4886 tok_state = tok_state_data
4889 tok_cur_tag.attrs_a[0][1] += c
4894 // 8.2.4.39 http://www.w3.org/TR/html5/syntax.html#attribute-value-(single-quoted)-state
4895 tok_state_attribute_value_single_quoted = function () {
4897 switch (c = txt.charAt(cur++)) {
4899 tok_state = tok_state_after_attribute_value_quoted
4902 tok_cur_tag.attrs_a[0][1] += parse_character_reference("'", true)
4906 tok_cur_tag.attrs_a[0][1] += "\ufffd"
4910 tok_state = tok_state_data
4913 tok_cur_tag.attrs_a[0][1] += c
4918 // 8.2.4.40 http://www.w3.org/TR/html5/syntax.html#attribute-value-(unquoted)-state
4919 tok_state_attribute_value_unquoted = function () {
4921 switch (c = txt.charAt(cur++)) {
4926 tok_state = tok_state_before_attribute_name
4929 tok_cur_tag.attrs_a[0][1] += parse_character_reference('>', true)
4932 tok_state = tok_state_data
4938 tok_cur_tag.attrs_a[0][1] += "\ufffd"
4942 tok_state = tok_state_data
4945 // Parse Error if ', <, = or ` (backtick)
4946 tok_cur_tag.attrs_a[0][1] += c
4951 // 8.2.4.42 http://www.w3.org/TR/html5/syntax.html#after-attribute-value-(quoted)-state
4952 tok_state_after_attribute_value_quoted = function () {
4954 switch (c = txt.charAt(cur++)) {
4959 tok_state = tok_state_before_attribute_name
4962 tok_state = tok_state_self_closing_start_tag
4965 tok_state = tok_state_data
4972 tok_state = tok_state_data
4976 tok_state = tok_state_before_attribute_name
4977 cur -= 1 // we didn't handle that char
4982 // 8.2.4.43 http://www.w3.org/TR/html5/syntax.html#self-closing-start-tag-state
4983 tok_state_self_closing_start_tag = function () {
4985 c = txt.charAt(cur++)
4987 tok_cur_tag.flag('self-closing', true)
4988 tok_state = tok_state_data
4993 tok_state = tok_state_data
4994 cur -= 1 // reconsume
4999 tok_state = tok_state_before_attribute_name
5000 cur -= 1 // reconsume
5003 // 8.2.4.44 http://www.w3.org/TR/html5/syntax.html#bogus-comment-state
5004 // WARNING: put a comment token in tok_cur_tag before setting this state
5005 tok_state_bogus_comment = function () {
5007 next_gt = txt.indexOf('>', cur)
5008 if (next_gt === -1) {
5009 val = txt.substr(cur)
5012 val = txt.substr(cur, next_gt - cur)
5015 val = val.replace(new RegExp("\u0000", 'g'), "\ufffd")
5016 tok_cur_tag.text += val
5017 tok_state = tok_state_data
5021 // 8.2.4.45 http://www.w3.org/TR/html5/syntax.html#markup-declaration-open-state
5022 tok_state_markup_declaration_open = function () {
5024 if (txt.substr(cur, 2) === '--') {
5026 tok_cur_tag = new_comment_token('')
5027 tok_state = tok_state_comment_start
5030 if (txt.substr(cur, 7).toLowerCase() === 'doctype') {
5032 tok_state = tok_state_doctype
5035 acn = adjusted_current_node()
5036 if (acn && acn.namespace !== NS_HTML && txt.substr(cur, 7) === '[CDATA[') {
5038 tok_state = tok_state_cdata_section
5043 tok_cur_tag = new_comment_token('')
5044 tok_state = tok_state_bogus_comment
5047 // 8.2.4.46 http://www.w3.org/TR/html5/syntax.html#comment-start-state
5048 tok_state_comment_start = function () {
5050 switch (c = txt.charAt(cur++)) {
5052 tok_state = tok_state_comment_start_dash
5056 tok_state = tok_state_comment
5057 return new_character_token("\ufffd")
5061 tok_state = tok_state_data
5066 tok_state = tok_state_data
5067 cur -= 1 // reconsume
5071 tok_cur_tag.text += c
5072 tok_state = tok_state_comment
5077 // 8.2.4.47 http://www.w3.org/TR/html5/syntax.html#comment-start-dash-state
5078 tok_state_comment_start_dash = function () {
5080 switch (c = txt.charAt(cur++)) {
5082 tok_state = tok_state_comment_end
5086 tok_cur_tag.text += "-\ufffd"
5087 tok_state = tok_state_comment
5091 tok_state = tok_state_data
5096 tok_state = tok_state_data
5097 cur -= 1 // reconsume
5101 tok_cur_tag.text += "-" + c
5102 tok_state = tok_state_comment
5107 // 8.2.4.48 http://www.w3.org/TR/html5/syntax.html#comment-state
5108 tok_state_comment = function () {
5110 switch (c = txt.charAt(cur++)) {
5112 tok_state = tok_state_comment_end_dash
5116 tok_cur_tag.text += "\ufffd"
5120 tok_state = tok_state_data
5121 cur -= 1 // reconsume
5125 tok_cur_tag.text += c
5130 // 8.2.4.49 http://www.w3.org/TR/html5/syntax.html#comment-end-dash-state
5131 tok_state_comment_end_dash = function () {
5133 switch (c = txt.charAt(cur++)) {
5135 tok_state = tok_state_comment_end
5139 tok_cur_tag.text += "-\ufffd"
5140 tok_state = tok_state_comment
5144 tok_state = tok_state_data
5145 cur -= 1 // reconsume
5149 tok_cur_tag.text += "-" + c
5150 tok_state = tok_state_comment
5155 // 8.2.4.50 http://www.w3.org/TR/html5/syntax.html#comment-end-state
5156 tok_state_comment_end = function () {
5158 switch (c = txt.charAt(cur++)) {
5160 tok_state = tok_state_data
5165 tok_cur_tag.text += "--\ufffd"
5166 tok_state = tok_state_comment
5170 tok_state = tok_state_comment_end_bang
5174 tok_cur_tag.text += '-'
5178 tok_state = tok_state_data
5179 cur -= 1 // reconsume
5184 tok_cur_tag.text += "--" + c
5185 tok_state = tok_state_comment
5190 // 8.2.4.51 http://www.w3.org/TR/html5/syntax.html#comment-end-bang-state
5191 tok_state_comment_end_bang = function () {
5193 switch (c = txt.charAt(cur++)) {
5195 tok_cur_tag.text += "--!" + c
5196 tok_state = tok_state_comment_end_dash
5199 tok_state = tok_state_data
5204 tok_cur_tag.text += "--!\ufffd"
5205 tok_state = tok_state_comment
5209 tok_state = tok_state_data
5210 cur -= 1 // reconsume
5214 tok_cur_tag.text += "--!" + c
5215 tok_state = tok_state_comment
5220 // 8.2.4.52 http://www.w3.org/TR/html5/syntax.html#doctype-state
5221 tok_state_doctype = function () {
5223 switch (c = txt.charAt(cur++)) {
5228 tok_state = tok_state_before_doctype_name
5232 tok_state = tok_state_data
5233 el = new_doctype_token('')
5234 el.flag('force-quirks', true)
5235 cur -= 1 // reconsume
5240 tok_state = tok_state_before_doctype_name
5241 cur -= 1 // reconsume
5246 // 8.2.4.52 http://www.w3.org/TR/html5/syntax.html#doctype-state
5247 tok_state_before_doctype_name = function () {
5249 c = txt.charAt(cur++)
5250 if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ') {
5253 if (is_uc_alpha(c)) {
5254 tok_cur_tag = new_doctype_token(c.toLowerCase())
5255 tok_state = tok_state_doctype_name
5258 if (c === "\u0000") {
5260 tok_cur_tag = new_doctype_token("\ufffd")
5261 tok_state = tok_state_doctype_name
5266 el = new_doctype_token('')
5267 el.flag('force-quirks', true)
5268 tok_state = tok_state_data
5271 if (c === '') { // EOF
5273 tok_state = tok_state_data
5274 el = new_doctype_token('')
5275 el.flag('force-quirks', true)
5276 cur -= 1 // reconsume
5280 tok_cur_tag = new_doctype_token(c)
5281 tok_state = tok_state_doctype_name
5285 // 8.2.4.54 http://www.w3.org/TR/html5/syntax.html#doctype-name-state
5286 tok_state_doctype_name = function () {
5288 c = txt.charAt(cur++)
5289 if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ') {
5290 tok_state = tok_state_after_doctype_name
5294 tok_state = tok_state_data
5297 if (is_uc_alpha(c)) {
5298 tok_cur_tag.name += c.toLowerCase()
5301 if (c === "\u0000") {
5303 tok_cur_tag.name += "\ufffd"
5306 if (c === '') { // EOF
5308 tok_state = tok_state_data
5309 tok_cur_tag.flag('force-quirks', true)
5310 cur -= 1 // reconsume
5314 tok_cur_tag.name += c
5318 // 8.2.4.55 http://www.w3.org/TR/html5/syntax.html#after-doctype-name-state
5319 tok_state_after_doctype_name = function () {
5321 c = txt.charAt(cur++)
5322 if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ') {
5326 tok_state = tok_state_data
5329 if (c === '') { // EOF
5331 tok_state = tok_state_data
5332 tok_cur_tag.flag('force-quirks', true)
5333 cur -= 1 // reconsume
5337 if (txt.substr(cur - 1, 6).toLowerCase() === 'public') {
5339 tok_state = tok_state_after_doctype_public_keyword
5342 if (txt.substr(cur - 1, 6).toLowerCase() === 'system') {
5344 tok_state = tok_state_after_doctype_system_keyword
5348 tok_cur_tag.flag('force-quirks', true)
5349 tok_state = tok_state_bogus_doctype
5353 // 8.2.4.56 http://www.w3.org/TR/html5/syntax.html#after-doctype-public-keyword-state
5354 tok_state_after_doctype_public_keyword = function () {
5356 c = txt.charAt(cur++)
5357 if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ') {
5358 tok_state = tok_state_before_doctype_public_identifier
5363 tok_cur_tag.public_identifier = ''
5364 tok_state = tok_state_doctype_public_identifier_double_quoted
5369 tok_cur_tag.public_identifier = ''
5370 tok_state = tok_state_doctype_public_identifier_single_quoted
5375 tok_cur_tag.flag('force-quirks', true)
5376 tok_state = tok_state_data
5379 if (c === '') { // EOF
5381 tok_state = tok_state_data
5382 tok_cur_tag.flag('force-quirks', true)
5383 cur -= 1 // reconsume
5388 tok_cur_tag.flag('force-quirks', true)
5389 tok_state = tok_state_bogus_doctype
5393 // 8.2.4.57 http://www.w3.org/TR/html5/syntax.html#before-doctype-public-identifier-state
5394 tok_state_before_doctype_public_identifier = function () {
5396 c = txt.charAt(cur++)
5397 if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ') {
5402 tok_cur_tag.public_identifier = ''
5403 tok_state = tok_state_doctype_public_identifier_double_quoted
5408 tok_cur_tag.public_identifier = ''
5409 tok_state = tok_state_doctype_public_identifier_single_quoted
5414 tok_cur_tag.flag('force-quirks', true)
5415 tok_state = tok_state_data
5418 if (c === '') { // EOF
5420 tok_state = tok_state_data
5421 tok_cur_tag.flag('force-quirks', true)
5422 cur -= 1 // reconsume
5427 tok_cur_tag.flag('force-quirks', true)
5428 tok_state = tok_state_bogus_doctype
5433 // 8.2.4.58 http://www.w3.org/TR/html5/syntax.html#doctype-public-identifier-(double-quoted)-state
5434 tok_state_doctype_public_identifier_double_quoted = function () {
5436 c = txt.charAt(cur++)
5438 tok_state = tok_state_after_doctype_public_identifier
5441 if (c === "\u0000") {
5443 tok_cur_tag.public_identifier += "\ufffd"
5448 tok_cur_tag.flag('force-quirks', true)
5449 tok_state = tok_state_data
5452 if (c === '') { // EOF
5454 tok_state = tok_state_data
5455 tok_cur_tag.flag('force-quirks', true)
5456 cur -= 1 // reconsume
5460 tok_cur_tag.public_identifier += c
5464 // 8.2.4.59 http://www.w3.org/TR/html5/syntax.html#doctype-public-identifier-(single-quoted)-state
5465 tok_state_doctype_public_identifier_single_quoted = function () {
5467 c = txt.charAt(cur++)
5469 tok_state = tok_state_after_doctype_public_identifier
5472 if (c === "\u0000") {
5474 tok_cur_tag.public_identifier += "\ufffd"
5479 tok_cur_tag.flag('force-quirks', true)
5480 tok_state = tok_state_data
5483 if (c === '') { // EOF
5485 tok_state = tok_state_data
5486 tok_cur_tag.flag('force-quirks', true)
5487 cur -= 1 // reconsume
5491 tok_cur_tag.public_identifier += c
5495 // 8.2.4.60 http://www.w3.org/TR/html5/syntax.html#after-doctype-public-identifier-state
5496 tok_state_after_doctype_public_identifier = function () {
5498 c = txt.charAt(cur++)
5499 if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ') {
5500 tok_state = tok_state_between_doctype_public_and_system_identifiers
5504 tok_state = tok_state_data
5509 tok_cur_tag.system_identifier = ''
5510 tok_state = tok_state_doctype_system_identifier_double_quoted
5515 tok_cur_tag.system_identifier = ''
5516 tok_state = tok_state_doctype_system_identifier_single_quoted
5519 if (c === '') { // EOF
5521 tok_state = tok_state_data
5522 tok_cur_tag.flag('force-quirks', true)
5523 cur -= 1 // reconsume
5528 tok_cur_tag.flag('force-quirks', true)
5529 tok_state = tok_state_bogus_doctype
5533 // 8.2.4.61 http://www.w3.org/TR/html5/syntax.html#between-doctype-public-&&-system-identifiers-state
5534 tok_state_between_doctype_public_and_system_identifiers = function () {
5536 c = txt.charAt(cur++)
5537 if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ') {
5541 tok_state = tok_state_data
5546 tok_cur_tag.system_identifier = ''
5547 tok_state = tok_state_doctype_system_identifier_double_quoted
5552 tok_cur_tag.system_identifier = ''
5553 tok_state = tok_state_doctype_system_identifier_single_quoted
5556 if (c === '') { // EOF
5558 tok_state = tok_state_data
5559 tok_cur_tag.flag('force-quirks', true)
5560 cur -= 1 // reconsume
5565 tok_cur_tag.flag('force-quirks', true)
5566 tok_state = tok_state_bogus_doctype
5570 // 8.2.4.62 http://www.w3.org/TR/html5/syntax.html#after-doctype-system-keyword-state
5571 tok_state_after_doctype_system_keyword = function () {
5573 c = txt.charAt(cur++)
5574 if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ') {
5575 tok_state = tok_state_before_doctype_system_identifier
5580 tok_cur_tag.system_identifier = ''
5581 tok_state = tok_state_doctype_system_identifier_double_quoted
5586 tok_cur_tag.system_identifier = ''
5587 tok_state = tok_state_doctype_system_identifier_single_quoted
5592 tok_cur_tag.flag('force-quirks', true)
5593 tok_state = tok_state_data
5596 if (c === '') { // EOF
5598 tok_state = tok_state_data
5599 tok_cur_tag.flag('force-quirks', true)
5600 cur -= 1 // reconsume
5605 tok_cur_tag.flag('force-quirks', true)
5606 tok_state = tok_state_bogus_doctype
5610 // 8.2.4.63 http://www.w3.org/TR/html5/syntax.html#before-doctype-system-identifier-state
5611 tok_state_before_doctype_system_identifier = function () {
5613 c = txt.charAt(cur++)
5614 if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ') {
5618 tok_cur_tag.system_identifier = ''
5619 tok_state = tok_state_doctype_system_identifier_double_quoted
5623 tok_cur_tag.system_identifier = ''
5624 tok_state = tok_state_doctype_system_identifier_single_quoted
5629 tok_cur_tag.flag('force-quirks', true)
5630 tok_state = tok_state_data
5633 if (c === '') { // EOF
5635 tok_state = tok_state_data
5636 tok_cur_tag.flag('force-quirks', true)
5637 cur -= 1 // reconsume
5642 tok_cur_tag.flag('force-quirks', true)
5643 tok_state = tok_state_bogus_doctype
5647 // 8.2.4.64 http://www.w3.org/TR/html5/syntax.html#doctype-system-identifier-(double-quoted)-state
5648 tok_state_doctype_system_identifier_double_quoted = function () {
5650 c = txt.charAt(cur++)
5652 tok_state = tok_state_after_doctype_system_identifier
5655 if (c === "\u0000") {
5657 tok_cur_tag.system_identifier += "\ufffd"
5662 tok_cur_tag.flag('force-quirks', true)
5663 tok_state = tok_state_data
5666 if (c === '') { // EOF
5668 tok_state = tok_state_data
5669 tok_cur_tag.flag('force-quirks', true)
5670 cur -= 1 // reconsume
5674 tok_cur_tag.system_identifier += c
5678 // 8.2.4.65 http://www.w3.org/TR/html5/syntax.html#doctype-system-identifier-(single-quoted)-state
5679 tok_state_doctype_system_identifier_single_quoted = function () {
5681 c = txt.charAt(cur++)
5683 tok_state = tok_state_after_doctype_system_identifier
5686 if (c === "\u0000") {
5688 tok_cur_tag.system_identifier += "\ufffd"
5693 tok_cur_tag.flag('force-quirks', true)
5694 tok_state = tok_state_data
5697 if (c === '') { // EOF
5699 tok_state = tok_state_data
5700 tok_cur_tag.flag('force-quirks', true)
5701 cur -= 1 // reconsume
5705 tok_cur_tag.system_identifier += c
5709 // 8.2.4.66 http://www.w3.org/TR/html5/syntax.html#after-doctype-system-identifier-state
5710 tok_state_after_doctype_system_identifier = function () {
5712 c = txt.charAt(cur++)
5713 if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ') {
5717 tok_state = tok_state_data
5720 if (c === '') { // EOF
5722 tok_state = tok_state_data
5723 tok_cur_tag.flag('force-quirks', true)
5724 cur -= 1 // reconsume
5729 // do _not_ tok_cur_tag.flag 'force-quirks', true
5730 tok_state = tok_state_bogus_doctype
5734 // 8.2.4.67 http://www.w3.org/TR/html5/syntax.html#bogus-doctype-state
5735 tok_state_bogus_doctype = function () {
5737 c = txt.charAt(cur++)
5739 tok_state = tok_state_data
5742 if (c === '') { // EOF
5743 tok_state = tok_state_data
5744 cur -= 1 // reconsume
5751 // 8.2.4.68 http://www.w3.org/TR/html5/syntax.html#cdata-section-state
5752 tok_state_cdata_section = function () {
5754 tok_state = tok_state_data
5755 next_gt = txt.indexOf(']]>', cur)
5756 if (next_gt === -1) {
5757 val = txt.substr(cur)
5760 val = txt.substr(cur, next_gt - cur)
5763 val = val.replace(new RegExp("\u0000", 'g'), "\ufffd")
5764 if (val.length > 0) {
5765 return new_character_token(val) // fixfull split
5770 // 8.2.4.69 http://www.w3.org/TR/html5/syntax.html#consume-a-character-reference
5771 // Don't set this as a state, just call it
5772 // returns a string (NOT a text node)
5773 parse_character_reference = function (allowed_char, in_attr) {
5774 var base, c, charset, code_point, decoded, i, max, start
5775 if (allowed_char == null) {
5778 if (in_attr == null) {
5781 if (cur >= txt.length) {
5784 switch (c = txt.charAt(cur)) {
5793 // explicitly not a parse error
5797 // there has to be "one or more" alnums between & and ; to be a parse error
5801 if (cur + 1 >= txt.length) {
5804 if (txt.charAt(cur + 1).toLowerCase() === 'x') {
5814 while (start + i < txt.length && charset.indexOf(txt.charAt(start + i)) > -1) {
5821 if (txt.charAt(start + i) === ';') {
5826 code_point = txt.substr(start, i)
5827 while (code_point.charAt(0) === '0' && code_point.length > 1) {
5828 code_point = code_point.substr(1)
5830 code_point = parseInt(code_point, base)
5831 if (unicode_fixes[code_point] != null) {
5833 return unicode_fixes[code_point]
5835 if ((code_point >= 0xd800 && code_point <= 0xdfff) || code_point > 0x10ffff) {
5839 if ((code_point >= 0x0001 && code_point <= 0x0008) || (code_point >= 0x000D && code_point <= 0x001F) || (code_point >= 0x007F && code_point <= 0x009F) || (code_point >= 0xFDD0 && code_point <= 0xFDEF) || code_point === 0x000B || code_point === 0xFFFE || code_point === 0xFFFF || code_point === 0x1FFFE || code_point === 0x1FFFF || code_point === 0x2FFFE || code_point === 0x2FFFF || code_point === 0x3FFFE || code_point === 0x3FFFF || code_point === 0x4FFFE || code_point === 0x4FFFF || code_point === 0x5FFFE || code_point === 0x5FFFF || code_point === 0x6FFFE || code_point === 0x6FFFF || code_point === 0x7FFFE || code_point === 0x7FFFF || code_point === 0x8FFFE || code_point === 0x8FFFF || code_point === 0x9FFFE || code_point === 0x9FFFF || code_point === 0xAFFFE || code_point === 0xAFFFF || code_point === 0xBFFFE || code_point === 0xBFFFF || code_point === 0xCFFFE || code_point === 0xCFFFF || code_point === 0xDFFFE || code_point === 0xDFFFF || code_point === 0xEFFFE || code_point === 0xEFFFF || code_point === 0xFFFFE || code_point === 0xFFFFF || code_point === 0x10FFFE || code_point === 0x10FFFF) {
5842 return from_code_point(code_point)
5848 for (i = 0; i < 31; ++i) {
5849 if (alnum.indexOf(txt.charAt(cur + i)) === -1) {
5854 // exit early, because parse_error() below needs at least one alnum
5857 if (txt.charAt(cur + i) === ';') {
5858 decoded = decode_named_char_ref(txt.substr(cur, i))
5859 i += 1 // scan past the ';' (after, so we dno't pass it to decode)
5860 if (decoded != null) {
5864 // else FALL THROUGH (check for match without last char(s) or ";")
5866 // no ';' terminator (only legacy char refs)
5868 for (i = 2; i <= max; ++i) { // no prefix matches, so ok to check shortest first
5869 c = legacy_char_refs[txt.substr(cur, i)]
5872 if (txt.charAt(cur + i) === '=') {
5873 // "because some legacy user agents will
5874 // misinterpret the markup in those cases"
5878 if (alnum.indexOf(txt.charAt(cur + i)) > -1) {
5879 // this makes attributes forgiving about url args
5883 // ok, and besides the weird exceptions for attributes...
5884 // return the matching char
5885 cur += i // consume entity chars
5886 parse_error() // because no terminating ";"
5896 eat_next_token_if_newline = function () {
5903 if (t.type === TYPE_TEXT) {
5904 // definition of a newline depends on whether it was a character ref or not
5905 if (cur - old_cur === 1) {
5906 // not a character reference
5907 if (t.text === "\u000d" || t.text === "\u000a") {
5911 if (t.text === "\u000a") {
5920 // tree constructor initialization
5921 // see comments on TYPE_TAG/etc for the structure of this data
5924 doc = new Node(TYPE_TAG, {name: 'document', namespace: NS_HTML})
5925 doc.flag('quirks mode', QUIRKS_NO) // TODO bugreport spec for not specifying this
5926 fragment_root = null // fragment parsing algorithm returns children of this
5928 afe = [] // active formatting elements
5929 template_ins_modes = []
5930 ins_mode = ins_mode_initial
5931 original_ins_mode = ins_mode // TODO check spec
5932 flag_scripting = args.scripting != null ? args.scripting : true // TODO might need an extra flag to get <noscript> to parse correctly
5933 flag_frameset_ok = true
5935 flag_foster_parenting = false
5936 form_element_pointer = null
5937 temporary_buffer = null
5938 pending_table_character_tokens = []
5939 head_element_pointer = null
5940 flag_fragment_parsing = false
5941 context_element = null
5942 prev_node_id = 0 // just for debugging
5944 // tokenizer initialization
5945 tok_state = tok_state_data
5947 parse_init = function () {
5948 var el, f, ns, old_doc, t
5949 // fragment parsing (text arg)
5950 if (args.fragment != null) {
5951 // this handles the fragment from the tests in the format described here:
5952 // https://github.com/html5lib/html5lib-tests/blob/master/tree-construction/README.md
5955 if (f.substr(0, 5) === 'math ') {
5958 } else if (f.substr(0, 4) === 'svg ') {
5963 context_element = token_to_element(t, ns)
5964 context_element.document = new Node(TYPE_TAG, {name: 'document', namespace: NS_HTML})
5965 context_element.document.flag('quirks mode', QUIRKS_NO)
5967 // fragment parsing (Node arg)
5968 if (args.context != null) {
5969 context_element = args.context
5972 // http://www.w3.org/TR/html5/syntax.html#parsing-html-fragments
5973 // fragment parsing algorithm
5974 if (context_element != null) {
5975 flag_fragment_parsing = true
5976 doc = new Node(TYPE_TAG, {name: 'html', namespace: NS_HTML})
5977 // search up the tree from context, to try to find it's document,
5978 // because this file only puts a "document" property on the root
5981 el = context_element
5983 if (el.document != null) {
5984 old_doc = el.document
5994 doc.flag('quirks mode', old_doc.flag('quirks mode'))
5997 if (context_element.namespace === NS_HTML) {
5998 switch (context_element.name) {
6001 tok_state = tok_state_rcdata
6008 tok_state = tok_state_rawtext
6011 tok_state = tok_state_script_data
6014 if (flag_scripting) {
6015 tok_state = tok_state_rawtext
6019 tok_state = tok_state_plaintext
6022 fragment_root = new Node(TYPE_TAG, {name: 'html', namespace: NS_HTML})
6023 doc.children.push(fragment_root)
6024 fragment_root.document = doc
6025 open_els = [fragment_root]
6026 if (context_element.name === 'template' && context_element.namespace === NS_HTML) {
6027 template_ins_modes.unshift(ins_mode_in_template)
6029 // fixfull create token for context (it should have it's original one already)
6031 // set form_element pointer... in the foreign doc?!
6032 el = context_element
6034 if (el.name === 'form' && el.namespace === NS_HTML) {
6035 form_element_pointer = el
6046 // text pre-processing
6047 // FIXME check http://www.w3.org/TR/html5/syntax.html#preprocessing-the-input-stream
6048 txt = txt.replace(new RegExp("\r\n", 'g'), "\n") // fixfull spec doesn't say this
6049 txt = txt.replace(new RegExp("\r", 'g'), "\n") // fixfull spec doesn't say this
6052 // http://www.w3.org/TR/html5/syntax.html#tree-construction
6053 parse_main_loop = function () {
6055 while (flag_parsing) {
6059 // fixfull parse error if has self-closing flag, but it wasn't acknolwedged
6066 if (flag_fragment_parsing) {
6067 return fragment_root.children
6072 exports.parse = parse_html
6074 exports.debug_log_reset = debug_log_reset
6075 exports.debug_log_each = debug_log_each
6076 exports.TYPE_TAG = TYPE_TAG
6077 exports.TYPE_TEXT = TYPE_TEXT
6078 exports.TYPE_COMMENT = TYPE_COMMENT
6079 exports.TYPE_DOCTYPE = TYPE_DOCTYPE
6080 exports.NS_HTML = NS_HTML
6081 exports.NS_MATHML = NS_MATHML
6082 exports.NS_SVG = NS_SVG
6083 exports.QUIRKS_NO = QUIRKS_NO
6084 exports.QUIRKS_LIMITED = QUIRKS_LIMITED
6085 exports.QUIRKS_YES = QUIRKS_YES