X-Git-Url: https://jasonwoof.com/gitweb/?a=blobdiff_plain;f=parser.js;fp=parser.js;h=ea87c858bd44622989ace630e8cfb56fe620cea4;hb=5aef791edd38fb3d70a71266ad0b42cf9fb45593;hp=0000000000000000000000000000000000000000;hpb=10bde13b5d10afeea68c4f0995a2aae0a0137c23;p=peach-html5-editor.git diff --git a/parser.js b/parser.js new file mode 100644 index 0000000..ea87c85 --- /dev/null +++ b/parser.js @@ -0,0 +1,6078 @@ +// todo remove refs and lens, js, ls +// run test suite! + +// Copyright 2015 Jason Woofenden +// This file implements an HTML5 parser +// +// This program is free software: you can redistribute it and/or modify it under +// the terms of the GNU Affero General Public License as published by the Free +// Software Foundation, either version 3 of the License, or (at your option) any +// later version. +// +// This program is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more +// details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + + +// This file implements a thorough parser for html5, meant to be used by a +// WYSIWYG editor. + +// The implementation is a pretty direct implementation of the parsing algorithm +// described here: +// +// http://www.w3.org/TR/html5/syntax.html +// +// except for some places marked "WHATWG" that are implemented as described here: +// +// https://html.spec.whatwg.org/multipage/syntax.html +// +// This code passes all of the tests in the .dat files at: +// +// https://github.com/JasonWoof/html5lib-tests/tree/patch-1/tree-construction + + +////////////////////////// +// how to use this code // +////////////////////////// +// +// See README.md for how to run this file in the browser or in node.js. +// +// This file exports a single useful function: parse_tml, and some constants +// (see the bottom of this file for those.) +// +// Call it like this: +// +// peach_parser.parse("

hi

") +// +// Or, if you don't want /etc, do this: +// +// peach_parser.parse("

hi

", {fragment: "body"}) +// +// return value is an array of Nodes, see "class Node" below. + +// This code is a work in progress, eg try search this file for "fixfull", +// "TODO" and "FIXME" + + +// Notes: stacks/lists +// +// Jason was frequently confused by the terminology used to refer to different +// parts of the stacks and lists in the spec, so he made this chart to help keep +// his head straight: +// +// stacks grow downward (current element is index=0) +// +// example: open_els = [a, b, c, d, e, f, g] +// +// "grows downwards" means it's visualized like this: (index: el "names") +// +// 6: g "start of the list", "topmost", "first" +// 5: f +// 4: e "previous" (to d), "above", "before" +// 3: d (previous/next are relative to this element) +// 2: c "next", "after", "lower", "below" +// 1: b +// 0: a "end of the list", "current node", "bottommost", "last" + +if ((typeof module) !== 'undefined' && (module.exports != null)) { + context = 'module' + exports = module.exports +} else { + context = 'browser' + window.peach_parser = {} + exports = window.peach_parser +} + +from_code_point = function (x) { + if (String.fromCodePoint != null) { + return String.fromCodePoint(x) + } else { + if (x <= 0xffff) { + return String.fromCharCode(x) + } + x -= 0x10000 + return String.fromCharCode((x >> 10) + 0xd800, (x % 0x400) + 0xdc00) + } +} + +// Each node is an obect of the Node class. Here are the Node types: +TYPE_TAG = 'tag' // name, {attributes}, [children] +TYPE_TEXT = 'text' // "text" +TYPE_COMMENT = 'comment' +TYPE_DOCTYPE = 'doctype' +// the following types are emited by the tokenizer, but shouldn't end up in the tree: +TYPE_START_TAG = 4 // name, [attributes ([key,value]...) in reverse order], [children] +TYPE_END_TAG = 5 // name +TYPE_EOF = 6 +TYPE_AFE_MARKER = 7 // http://www.w3.org/TR/html5/syntax.html#reconstruct-the-active-formatting-elements +TYPE_AAA_BOOKMARK = 8 // http://www.w3.org/TR/html5/syntax.html#adoption-agency-algorithm + +// namespace constants +NS_HTML = 'html' +NS_MATHML = 'mathml' +NS_SVG = 'svg' + +// quirks mode constants +QUIRKS_NO = 'no' +QUIRKS_LIMITED = 'limited' +QUIRKS_YES = 'yes' + +// queue up debug logs, so eg they can be shown only for tests that fail +g_debug_log = [] +debug_log_reset = function () { + g_debug_log = [] +} +debug_log = function (str) { + g_debug_log.push(str) +} +debug_log_each = function (cb) { + var i + for (i = 0; i < g_debug_log.length; ++i) { + cb(g_debug_log[i]) + } +} + +prev_node_id = 0 +function Node (type, args) { + if (args == null) { + args = {} + } + this.type = type // one of the TYPE_* constants above + this.name = args.name != null ? args.name : '' // tag name + this.text = args.text != null ? args.text : '' // contents for text/comment nodes + this.attrs = args.attrs != null ? args.attrs : {} + this.attrs_a = args.attr_k != null ? args.attr_k : [] // attrs in progress, TYPE_START_TAG only + this.children = args.children != null ? args.children : [] + this.namespace = args.namespace != null ? args.namespace : NS_HTML + this.parent = args.parent != null ? args.parent : null + this.token = args.token != null ? args.token : null + this.flags = args.flags != null ? args.flags : {} + if (args.id != null) { + this.id = args.id + "+" + } else { + this.id = "" + (++prev_node_id) + } +} + +Node.prototype.acknowledge_self_closing = function () { + if (this.token != null) { + this.token.flag('did_self_close', true) + } else { + this.flag('did_self_close', true) + } +} + +Node.prototype.flag = function (key, value) { + if (value != null) { + this.flags[key] = value + } else { + return this.flags[key] + } +} + +// helpers: (only take args that are normally known when parser creates nodes) +new_open_tag = function (name) { + return new Node(TYPE_START_TAG, {name: name}) +} +new_end_tag = function (name) { + return new Node(TYPE_END_TAG, {name: name}) +} +new_element = function (name) { + return new Node(TYPE_TAG, {name: name}) +} +new_text_node = function (txt) { + return new Node(TYPE_TEXT, {text: txt}) +} +new_character_token = new_text_node +new_comment_token = function (txt) { + return new Node(TYPE_COMMENT, {text: txt}) +} +new_doctype_token = function (name) { + return new Node(TYPE_DOCTYPE, {name: name}) +} +new_eof_token = function () { + return new Node(TYPE_EOF) +} +new_afe_marker = function () { + return new Node(TYPE_AFE_MARKER) +} +new_aaa_bookmark = function () { + return new Node(TYPE_AAA_BOOKMARK) +} + +lc_alpha = "abcdefghijklmnopqrstuvwxyz" +uc_alpha = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" +digits = "0123456789" +alnum = lc_alpha + uc_alpha + digits +hex_chars = digits + "abcdefABCDEF" + +is_uc_alpha = function (str) { + return str.length === 1 && uc_alpha.indexOf(str) > -1 +} +is_lc_alpha = function (str) { + return str.length === 1 && lc_alpha.indexOf(str) > -1 +} + +// some SVG elements have dashes in them +tag_name_chars = alnum + "-" + +// http://www.w3.org/TR/html5/infrastructure.html#space-character +space_chars = "\u0009\u000a\u000c\u000d\u0020" +is_space = function (txt) { + return txt.length === 1 && space_chars.indexOf(txt) > -1 +} +is_space_tok = function (t) { + return t.type === TYPE_TEXT && t.text.length === 1 && space_chars.indexOf(t.text) > -1 +} + +is_input_hidden_tok = function (t) { + var i, a + if (t.type !== TYPE_START_TAG) { + return false + } + for (i = 0; i < t.attrs_a.length; ++i) { + a = t.attrs_a[i] + if (a[0] === 'type') { + if (a[1].toLowerCase() === 'hidden') { + return true + } + return false + } + } + return false +} + +// https://en.wikipedia.org/wiki/Whitespace_character#Unicode +whitespace_chars = "\u0009\u000a\u000b\u000c\u000d\u0020\u0085\u00a0\u1680\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200a\u2028\u2029\u202f\u205f\u3000" + +unicode_fixes = {} +unicode_fixes[0x00] = "\uFFFD" +unicode_fixes[0x80] = "\u20AC" +unicode_fixes[0x82] = "\u201A" +unicode_fixes[0x83] = "\u0192" +unicode_fixes[0x84] = "\u201E" +unicode_fixes[0x85] = "\u2026" +unicode_fixes[0x86] = "\u2020" +unicode_fixes[0x87] = "\u2021" +unicode_fixes[0x88] = "\u02C6" +unicode_fixes[0x89] = "\u2030" +unicode_fixes[0x8A] = "\u0160" +unicode_fixes[0x8B] = "\u2039" +unicode_fixes[0x8C] = "\u0152" +unicode_fixes[0x8E] = "\u017D" +unicode_fixes[0x91] = "\u2018" +unicode_fixes[0x92] = "\u2019" +unicode_fixes[0x93] = "\u201C" +unicode_fixes[0x94] = "\u201D" +unicode_fixes[0x95] = "\u2022" +unicode_fixes[0x96] = "\u2013" +unicode_fixes[0x97] = "\u2014" +unicode_fixes[0x98] = "\u02DC" +unicode_fixes[0x99] = "\u2122" +unicode_fixes[0x9A] = "\u0161" +unicode_fixes[0x9B] = "\u203A" +unicode_fixes[0x9C] = "\u0153" +unicode_fixes[0x9E] = "\u017E" +unicode_fixes[0x9F] = "\u0178" + +quirks_yes_pi_prefixes = [ + "+//silmaril//dtd html pro v0r11 19970101//", + "-//as//dtd html 3.0 aswedit + extensions//", + "-//advasoft ltd//dtd html 3.0 aswedit + extensions//", + "-//ietf//dtd html 2.0 level 1//", + "-//ietf//dtd html 2.0 level 2//", + "-//ietf//dtd html 2.0 strict level 1//", + "-//ietf//dtd html 2.0 strict level 2//", + "-//ietf//dtd html 2.0 strict//", + "-//ietf//dtd html 2.0//", + "-//ietf//dtd html 2.1e//", + "-//ietf//dtd html 3.0//", + "-//ietf//dtd html 3.2 final//", + "-//ietf//dtd html 3.2//", + "-//ietf//dtd html 3//", + "-//ietf//dtd html level 0//", + "-//ietf//dtd html level 1//", + "-//ietf//dtd html level 2//", + "-//ietf//dtd html level 3//", + "-//ietf//dtd html strict level 0//", + "-//ietf//dtd html strict level 1//", + "-//ietf//dtd html strict level 2//", + "-//ietf//dtd html strict level 3//", + "-//ietf//dtd html strict//", + "-//ietf//dtd html//", + "-//metrius//dtd metrius presentational//", + "-//microsoft//dtd internet explorer 2.0 html strict//", + "-//microsoft//dtd internet explorer 2.0 html//", + "-//microsoft//dtd internet explorer 2.0 tables//", + "-//microsoft//dtd internet explorer 3.0 html strict//", + "-//microsoft//dtd internet explorer 3.0 html//", + "-//microsoft//dtd internet explorer 3.0 tables//", + "-//netscape comm. corp.//dtd html//", + "-//netscape comm. corp.//dtd strict html//", + "-//o'reilly and associates//dtd html 2.0//", + "-//o'reilly and associates//dtd html extended 1.0//", + "-//o'reilly and associates//dtd html extended relaxed 1.0//", + "-//sq//dtd html 2.0 hotmetal + extensions//", + "-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//", + "-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//", + "-//spyglass//dtd html 2.0 extended//", + "-//sun microsystems corp.//dtd hotjava html//", + "-//sun microsystems corp.//dtd hotjava strict html//", + "-//w3c//dtd html 3 1995-03-24//", + "-//w3c//dtd html 3.2 draft//", + "-//w3c//dtd html 3.2 final//", + "-//w3c//dtd html 3.2//", + "-//w3c//dtd html 3.2s draft//", + "-//w3c//dtd html 4.0 frameset//", + "-//w3c//dtd html 4.0 transitional//", + "-//w3c//dtd html experimental 19960712//", + "-//w3c//dtd html experimental 970421//", + "-//w3c//dtd w3 html//", + "-//w3o//dtd w3 html 3.0//", + "-//webtechs//dtd mozilla html 2.0//", + "-//webtechs//dtd mozilla html//", +] + +// These are the character references that don't need a terminating semicolon +// min length: 2, max: 6, none are a prefix of any other. +legacy_char_refs = { + Aacute: 'Á', aacute: 'á', Acirc: 'Â', acirc: 'â', acute: '´', AElig: 'Æ', + aelig: 'æ', Agrave: 'À', agrave: 'à', AMP: '&', amp: '&', Aring: 'Å', + aring: 'å', Atilde: 'Ã', atilde: 'ã', Auml: 'Ä', auml: 'ä', brvbar: '¦', + Ccedil: 'Ç', ccedil: 'ç', cedil: '¸', cent: '¢', COPY: '©', copy: '©', + curren: '¤', deg: '°', divide: '÷', Eacute: 'É', eacute: 'é', Ecirc: 'Ê', + ecirc: 'ê', Egrave: 'È', egrave: 'è', ETH: 'Ð', eth: 'ð', Euml: 'Ë', + euml: 'ë', frac12: '½', frac14: '¼', frac34: '¾', GT: '>', gt: '>', + Iacute: 'Í', iacute: 'í', Icirc: 'Î', icirc: 'î', iexcl: '¡', Igrave: 'Ì', + igrave: 'ì', iquest: '¿', Iuml: 'Ï', iuml: 'ï', laquo: '«', LT: '<', + lt: '<', macr: '¯', micro: 'µ', middot: '·', nbsp: "\u00a0", not: '¬', + Ntilde: 'Ñ', ntilde: 'ñ', Oacute: 'Ó', oacute: 'ó', Ocirc: 'Ô', ocirc: 'ô', + Ograve: 'Ò', ograve: 'ò', ordf: 'ª', ordm: 'º', Oslash: 'Ø', oslash: 'ø', + Otilde: 'Õ', otilde: 'õ', Ouml: 'Ö', ouml: 'ö', para: '¶', plusmn: '±', + pound: '£', QUOT: '"', quot: '"', raquo: '»', REG: '®', reg: '®', sect: '§', + shy: '­', sup1: '¹', sup2: '²', sup3: '³', szlig: 'ß', THORN: 'Þ', thorn: 'þ', + times: '×', Uacute: 'Ú', uacute: 'ú', Ucirc: 'Û', ucirc: 'û', Ugrave: 'Ù', + ugrave: 'ù', uml: '¨', Uuml: 'Ü', uuml: 'ü', Yacute: 'Ý', yacute: 'ý', + yen: '¥', yuml: 'ÿ' +} + +//void_elements = ['area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input', 'keygen', 'link', 'meta', 'param', 'source', 'track', 'wbr'] +//raw_text_elements = ['script', 'style'] +//escapable_raw_text_elements = ['textarea', 'title'] +// http://www.w3.org/TR/SVG/ 1.1 (Second Edition) +svg_elements = [ + 'a', 'altGlyph', 'altGlyphDef', 'altGlyphItem', 'animate', 'animateColor', + 'animateMotion', 'animateTransform', 'circle', 'clipPath', 'color-profile', + 'cursor', 'defs', 'desc', 'ellipse', 'feBlend', 'feColorMatrix', + 'feComponentTransfer', 'feComposite', 'feConvolveMatrix', + 'feDiffuseLighting', 'feDisplacementMap', 'feDistantLight', 'feFlood', + 'feFuncA', 'feFuncB', 'feFuncG', 'feFuncR', 'feGaussianBlur', 'feImage', + 'feMerge', 'feMergeNode', 'feMorphology', 'feOffset', 'fePointLight', + 'feSpecularLighting', 'feSpotLight', 'feTile', 'feTurbulence', 'filter', + 'font', 'font-face', 'font-face-format', 'font-face-name', 'font-face-src', + 'font-face-uri', 'foreignObject', 'g', 'glyph', 'glyphRef', 'hkern', + 'image', 'line', 'linearGradient', 'marker', 'mask', 'metadata', + 'missing-glyph', 'mpath', 'path', 'pattern', 'polygon', 'polyline', + 'radialGradient', 'rect', 'script', 'set', 'stop', 'style', 'svg', + 'switch', 'symbol', 'text', 'textPath', 'title', 'tref', 'tspan', 'use', + 'view', 'vkern' +] + +// http://www.w3.org/TR/MathML/ Version 3.0 2nd Edition +mathml_elements = [ + 'abs', 'and', 'annotation', 'annotation-xml', 'apply', 'approx', 'arccos', + 'arccosh', 'arccot', 'arccoth', 'arccsc', 'arccsch', 'arcsec', 'arcsech', + 'arcsin', 'arcsinh', 'arctan', 'arctanh', 'arg', 'bind', 'bvar', 'card', + 'cartesianproduct', 'cbytes', 'ceiling', 'cerror', 'ci', 'cn', 'codomain', + 'complexes', 'compose', 'condition', 'conjugate', 'cos', 'cosh', 'cot', + 'coth', 'cs', 'csc', 'csch', 'csymbol', 'curl', 'declare', 'degree', + 'determinant', 'diff', 'divergence', 'divide', 'domain', + 'domainofapplication', 'emptyset', 'eq', 'equivalent', 'eulergamma', + 'exists', 'exp', 'exponentiale', 'factorial', 'factorof', 'false', 'floor', + 'fn', 'forall', 'gcd', 'geq', 'grad', 'gt', 'ident', 'image', 'imaginary', + 'imaginaryi', 'implies', 'in', 'infinity', 'int', 'integers', 'intersect', + 'interval', 'inverse', 'lambda', 'laplacian', 'lcm', 'leq', 'limit', + 'list', 'ln', 'log', 'logbase', 'lowlimit', 'lt', 'maction', 'maligngroup', + 'malignmark', 'math', 'matrix', 'matrixrow', 'max', 'mean', 'median', + 'menclose', 'merror', 'mfenced', 'mfrac', 'mglyph', 'mi', 'mi', 'min', + 'minus', 'mlabeledtr', 'mlongdiv', 'mmultiscripts', 'mn', 'mo', 'mode', + 'moment', 'momentabout', 'mover', 'mpadded', 'mphantom', 'mprescripts', + 'mroot', 'mrow', 'ms', 'mscarries', 'mscarry', 'msgroup', 'msline', + 'mspace', 'msqrt', 'msrow', 'mstack', 'mstyle', 'msub', 'msubsup', 'msup', + 'mtable', 'mtd', 'mtext', 'mtr', 'munder', 'munderover', 'naturalnumbers', + 'neq', 'none', 'not', 'notanumber', 'notin', 'notprsubset', 'notsubset', + 'or', 'otherwise', 'outerproduct', 'partialdiff', 'pi', 'piece', + 'piecewise', 'plus', 'power', 'primes', 'product', 'prsubset', 'quotient', + 'rationals', 'real', 'reals', 'reln', 'rem', 'root', 'scalarproduct', + 'sdev', 'sec', 'sech', 'selector', 'semantics', 'sep', 'set', 'setdiff', + 'share', 'sin', 'sinh', 'span', 'subset', 'sum', 'tan', 'tanh', 'tendsto', + 'times', 'transpose', 'true', 'union', 'uplimit', 'variance', 'vector', + 'vectorproduct', 'xor' +] +// foreign_elements = [svg_elements..., mathml_elements...] +//normal_elements = All other allowed HTML elements are normal elements. + +special_elements = { + // HTML: + address: NS_HTML, applet: NS_HTML, area: NS_HTML, article: NS_HTML, + aside: NS_HTML, base: NS_HTML, basefont: NS_HTML, bgsound: NS_HTML, + blockquote: NS_HTML, body: NS_HTML, br: NS_HTML, button: NS_HTML, + caption: NS_HTML, center: NS_HTML, col: NS_HTML, colgroup: NS_HTML, dd: NS_HTML, + details: NS_HTML, dir: NS_HTML, div: NS_HTML, dl: NS_HTML, dt: NS_HTML, + embed: NS_HTML, fieldset: NS_HTML, figcaption: NS_HTML, figure: NS_HTML, + footer: NS_HTML, form: NS_HTML, frame: NS_HTML, frameset: NS_HTML, h1: NS_HTML, + h2: NS_HTML, h3: NS_HTML, h4: NS_HTML, h5: NS_HTML, h6: NS_HTML, head: NS_HTML, + header: NS_HTML, hgroup: NS_HTML, hr: NS_HTML, html: NS_HTML, iframe: NS_HTML, + img: NS_HTML, input: NS_HTML, isindex: NS_HTML, li: NS_HTML, link: NS_HTML, + listing: NS_HTML, main: NS_HTML, marquee: NS_HTML, + + menu: NS_HTML,menuitem: NS_HTML, // WHATWG adds these + + meta: NS_HTML, nav: NS_HTML, noembed: NS_HTML, noframes: NS_HTML, + noscript: NS_HTML, object: NS_HTML, ol: NS_HTML, p: NS_HTML, param: NS_HTML, + plaintext: NS_HTML, pre: NS_HTML, script: NS_HTML, section: NS_HTML, + select: NS_HTML, source: NS_HTML, style: NS_HTML, summary: NS_HTML, + table: NS_HTML, tbody: NS_HTML, td: NS_HTML, template: NS_HTML, + textarea: NS_HTML, tfoot: NS_HTML, th: NS_HTML, thead: NS_HTML, title: NS_HTML, + tr: NS_HTML, track: NS_HTML, ul: NS_HTML, wbr: NS_HTML, xmp: NS_HTML, + + // MathML: + mi: NS_MATHML, mo: NS_MATHML, mn: NS_MATHML, ms: NS_MATHML, mtext: NS_MATHML, + 'annotation-xml': NS_MATHML, + + // SVG: + foreignObject: NS_SVG, desc: NS_SVG, title: NS_SVG +} + +formatting_elements = { + a: true, b: true, big: true, code: true, em: true, font: true, i: true, + nobr: true, s: true, small: true, strike: true, strong: true, tt: true, + u: true +} + +mathml_text_integration = { + mi: NS_MATHML, mo: NS_MATHML, mn: NS_MATHML, ms: NS_MATHML, mtext: NS_MATHML +} +is_mathml_text_integration_point = function (el) { + return mathml_text_integration[el.name] === el.namespace +} +is_html_integration = function (el) { // DON'T PASS A TOKEN + if (el.namespace === NS_MATHML) { + if (el.name === 'annotation-xml') { + if (el.attrs.encoding != null) { + if (el.attrs.encoding.toLowerCase() === 'text/html') { + return true + } + if (el.attrs.encoding.toLowerCase() === 'application/xhtml+xml') { + return true + } + } + } + return false + } + if (el.namespace === NS_SVG) { + if (el.name === 'foreignObject' || el.name === 'desc' || el.name === 'title') { + return true + } + } + return false +} + +h_tags = { + h1: NS_HTML, h2: NS_HTML, h3: NS_HTML, h4: NS_HTML, h5: NS_HTML, h6: NS_HTML +} + +foster_parenting_targets = { + table: NS_HTML, + tbody: NS_HTML, + tfoot: NS_HTML, + thead: NS_HTML, + tr: NS_HTML +} + +end_tag_implied = { + dd: NS_HTML, + dt: NS_HTML, + li: NS_HTML, + option: NS_HTML, + optgroup: NS_HTML, + p: NS_HTML, + rb: NS_HTML, + rp: NS_HTML, + rt: NS_HTML, + rtc: NS_HTML +} + +el_is_special = function (e) { + return special_elements[e.name] === e.namespace +} + +adp_els = { address: NS_HTML, div: NS_HTML, p: NS_HTML } +el_is_special_not_adp = function (el) { + return special_elements[el.name] === el.namespace && adp_els[el.name] !== el.namespace +} + +svg_name_fixes = { + altglyph: 'altGlyph', + altglyphdef: 'altGlyphDef', + altglyphitem: 'altGlyphItem', + animatecolor: 'animateColor', + animatemotion: 'animateMotion', + animatetransform: 'animateTransform', + clippath: 'clipPath', + feblend: 'feBlend', + fecolormatrix: 'feColorMatrix', + fecomponenttransfer: 'feComponentTransfer', + fecomposite: 'feComposite', + feconvolvematrix: 'feConvolveMatrix', + fediffuselighting: 'feDiffuseLighting', + fedisplacementmap: 'feDisplacementMap', + fedistantlight: 'feDistantLight', + fedropshadow: 'feDropShadow', + feflood: 'feFlood', + fefunca: 'feFuncA', + fefuncb: 'feFuncB', + fefuncg: 'feFuncG', + fefuncr: 'feFuncR', + fegaussianblur: 'feGaussianBlur', + feimage: 'feImage', + femerge: 'feMerge', + femergenode: 'feMergeNode', + femorphology: 'feMorphology', + feoffset: 'feOffset', + fepointlight: 'fePointLight', + fespecularlighting: 'feSpecularLighting', + fespotlight: 'feSpotLight', + fetile: 'feTile', + feturbulence: 'feTurbulence', + foreignobject: 'foreignObject', + glyphref: 'glyphRef', + lineargradient: 'linearGradient', + radialgradient: 'radialGradient', + textpath: 'textPath' +} +svg_attribute_fixes = { + attributename: 'attributeName', + attributetype: 'attributeType', + basefrequency: 'baseFrequency', + baseprofile: 'baseProfile', + calcmode: 'calcMode', + clippathunits: 'clipPathUnits', + contentscripttype: 'contentScriptType', + contentstyletype: 'contentStyleType', + diffuseconstant: 'diffuseConstant', + edgemode: 'edgeMode', + externalresourcesrequired: 'externalResourcesRequired', + // WHATWG removes this: filterres: 'filterRes', + filterunits: 'filterUnits', + glyphref: 'glyphRef', + gradienttransform: 'gradientTransform', + gradientunits: 'gradientUnits', + kernelmatrix: 'kernelMatrix', + kernelunitlength: 'kernelUnitLength', + keypoints: 'keyPoints', + keysplines: 'keySplines', + keytimes: 'keyTimes', + lengthadjust: 'lengthAdjust', + limitingconeangle: 'limitingConeAngle', + markerheight: 'markerHeight', + markerunits: 'markerUnits', + markerwidth: 'markerWidth', + maskcontentunits: 'maskContentUnits', + maskunits: 'maskUnits', + numoctaves: 'numOctaves', + pathlength: 'pathLength', + patterncontentunits: 'patternContentUnits', + patterntransform: 'patternTransform', + patternunits: 'patternUnits', + pointsatx: 'pointsAtX', + pointsaty: 'pointsAtY', + pointsatz: 'pointsAtZ', + preservealpha: 'preserveAlpha', + preserveaspectratio: 'preserveAspectRatio', + primitiveunits: 'primitiveUnits', + refx: 'refX', + refy: 'refY', + repeatcount: 'repeatCount', + repeatdur: 'repeatDur', + requiredextensions: 'requiredExtensions', + requiredfeatures: 'requiredFeatures', + specularconstant: 'specularConstant', + specularexponent: 'specularExponent', + spreadmethod: 'spreadMethod', + startoffset: 'startOffset', + stddeviation: 'stdDeviation', + stitchtiles: 'stitchTiles', + surfacescale: 'surfaceScale', + systemlanguage: 'systemLanguage', + tablevalues: 'tableValues', + targetx: 'targetX', + targety: 'targetY', + textlength: 'textLength', + viewbox: 'viewBox', + viewtarget: 'viewTarget', + xchannelselector: 'xChannelSelector', + ychannelselector: 'yChannelSelector', + zoomandpan: 'zoomAndPan' +} +foreign_attr_fixes = { + 'xlink:actuate': 'xlink actuate', + 'xlink:arcrole': 'xlink arcrole', + 'xlink:href': 'xlink href', + 'xlink:role': 'xlink role', + 'xlink:show': 'xlink show', + 'xlink:title': 'xlink title', + 'xlink:type': 'xlink type', + 'xml:base': 'xml base', + 'xml:lang': 'xml lang', + 'xml:space': 'xml space', + 'xmlns': 'xmlns', + 'xmlns:xlink': 'xmlns xlink' +} +adjust_mathml_attributes = function (t) { + var i, a + for (i = 0; i < t.attrs_a.length; ++i) { + a = t.attrs_a[i] + if (a[0] === 'definitionurl') { + a[0] = 'definitionURL' + } + } +} +adjust_svg_attributes = function (t) { + var i, a + for (i = 0; i < t.attrs_a.length; ++i) { + a = t.attrs_a[i] + if (svg_attribute_fixes[a[0]] != null) { + a[0] = svg_attribute_fixes[a[0]] + } + } +} +adjust_foreign_attributes = function (t) { + // fixfull + var i, a + for (i = 0; i < t.attrs_a.length; ++i) { + a = t.attrs_a[i] + if (foreign_attr_fixes[a[0]] != null) { + a[0] = foreign_attr_fixes[a[0]] + } + } +} + +// decode_named_char_ref() +// +// The list of named character references is _huge_ so if we're running in a +// browser, we get the browser to decode them, rather than increasing the code +// size to include the table. +if (context === 'module') { + _decode_named_char_ref = require('./parser_no_browser_helper.js') +} else { + decode_named_char_ref_el = document.createElement('textarea') + _decode_named_char_ref = function (txt) { + var decoded + txt = "&" + txt + ";" + decode_named_char_ref_el.innerHTML = txt + decoded = decode_named_char_ref_el.value + if (decoded === txt) { + return null + } + return decoded + } +} +// Pass the name of a named entity _that has a terminating semicolon_ +// Entities without terminating semicolons should use legacy_char_refs[] +// Do not include the "&" or ";" in your argument, eg pass "alpha" +decode_named_char_ref_cache = {} +decode_named_char_ref = function (txt) { + var decoded + decoded = decode_named_char_ref_cache[txt] + if (decoded != null) { + return decoded + } + decoded = _decode_named_char_ref(txt) + return decode_named_char_ref_cache[txt] = decoded +} + +parse_html = function (args_html, args) { + var adjusted_current_node, adjusted_insertion_location, adoption_agency, afe, afe_push, afe_push_marker, button_scopers, clear_afe_to_marker, clear_stack_to_table_body_context, clear_stack_to_table_context, clear_stack_to_table_row_context, clear_to_table_body_stopers, clear_to_table_row_stopers, clear_to_table_stopers, close_p_element, close_p_if_in_button_scope, close_the_cell, context_element, cur, doc, eat_next_token_if_newline, el_is_in_scope, flag_foster_parenting, flag_fragment_parsing, flag_frameset_ok, flag_parsing, flag_scripting, form_element_pointer, fragment_root, generate_implied_end_tags, has_color_face_or_size, head_element_pointer, in_body_any_other_end_tag, in_foreign_content, in_foreign_content_end_script, in_foreign_content_other_start, ins_mode, ins_mode_after_after_body, ins_mode_after_after_frameset, ins_mode_after_body, ins_mode_after_frameset, ins_mode_after_head, ins_mode_after_head_else, ins_mode_before_head, ins_mode_before_html, ins_mode_in_body, ins_mode_in_caption, ins_mode_in_cell, ins_mode_in_column_group, ins_mode_in_frameset, ins_mode_in_head, ins_mode_in_head_else, ins_mode_in_head_noscript, ins_mode_in_head_noscript_else, ins_mode_in_row, ins_mode_in_select, ins_mode_in_select_in_table, ins_mode_in_table, ins_mode_in_table_body, ins_mode_in_table_else, ins_mode_in_table_text, ins_mode_in_template, ins_mode_initial, ins_mode_text, insert_character, insert_comment, insert_foreign_element, insert_html_element, is_appropriate_end_tag, is_in_button_scope, is_in_li_scope, is_in_scope, is_in_scope_x, is_in_scope_x_y, is_in_select_scope, is_in_table_scope, is_quirks_limited_doctype, is_quirks_yes_doctype, li_scopers, open_els, original_ins_mode, parse_character_reference, parse_error, parse_generic_raw_text, parse_generic_rcdata_text, parse_init, parse_main_loop, pending_table_character_tokens, process_token, reconstruct_afe, ref, reset_ins_mode, standard_scopers, stop_parsing, table_scopers, template_ins_modes, template_tag_is_open, temporary_buffer, tok_cur_tag, tok_state, tok_state_after_attribute_name, tok_state_after_attribute_value_quoted, tok_state_after_doctype_name, tok_state_after_doctype_public_identifier, tok_state_after_doctype_public_keyword, tok_state_after_doctype_system_identifier, tok_state_after_doctype_system_keyword, tok_state_attribute_name, tok_state_attribute_value_double_quoted, tok_state_attribute_value_single_quoted, tok_state_attribute_value_unquoted, tok_state_before_attribute_name, tok_state_before_attribute_value, tok_state_before_doctype_name, tok_state_before_doctype_public_identifier, tok_state_before_doctype_system_identifier, tok_state_between_doctype_public_and_system_identifiers, tok_state_bogus_comment, tok_state_bogus_doctype, tok_state_cdata_section, tok_state_comment, tok_state_comment_end, tok_state_comment_end_bang, tok_state_comment_end_dash, tok_state_comment_start, tok_state_comment_start_dash, tok_state_data, tok_state_doctype, tok_state_doctype_name, tok_state_doctype_public_identifier_double_quoted, tok_state_doctype_public_identifier_single_quoted, tok_state_doctype_system_identifier_double_quoted, tok_state_doctype_system_identifier_single_quoted, tok_state_end_tag_open, tok_state_markup_declaration_open, tok_state_plaintext, tok_state_rawtext, tok_state_rawtext_end_tag_name, tok_state_rawtext_end_tag_open, tok_state_rawtext_less_than_sign, tok_state_rcdata, tok_state_rcdata_end_tag_name, tok_state_rcdata_end_tag_open, tok_state_rcdata_less_than_sign, tok_state_script_data, tok_state_script_data_double_escape_end, tok_state_script_data_double_escape_start, tok_state_script_data_double_escaped, tok_state_script_data_double_escaped_dash, tok_state_script_data_double_escaped_dash_dash, tok_state_script_data_double_escaped_less_than_sign, tok_state_script_data_end_tag_name, tok_state_script_data_end_tag_open, tok_state_script_data_escape_start, tok_state_script_data_escape_start_dash, tok_state_script_data_escaped, tok_state_script_data_escaped_dash, tok_state_script_data_escaped_dash_dash, tok_state_script_data_escaped_end_tag_name, tok_state_script_data_escaped_end_tag_open, tok_state_script_data_escaped_less_than_sign, tok_state_script_data_less_than_sign, tok_state_self_closing_start_tag, tok_state_tag_name, tok_state_tag_open, token_to_element, txt + if (args == null) { + args = {} + } + txt = null + cur = null // index of next char in txt to be parsed + // declare doc and tokenizer variables so they're in scope below + doc = null + open_els = null // stack of open elements + afe = null // active formatting elements + template_ins_modes = null + ins_mode = null + original_ins_mode = null + tok_state = null + tok_cur_tag = null // partially parsed tag + flag_scripting = null + flag_frameset_ok = null + flag_parsing = null + flag_foster_parenting = null + form_element_pointer = null + temporary_buffer = null + pending_table_character_tokens = null + head_element_pointer = null + flag_fragment_parsing = null + context_element = null + + stop_parsing = function () { + flag_parsing = false + } + + parse_error = function () { + if (args.error_cb != null) { + args.error_cb(cur) + } + } + + // http://www.w3.org/TR/html5/syntax.html#push-onto-the-list-of-active-formatting-elements + // "Noah's Ark clause" but with three + afe_push = function (new_el) { + var attrs_match, el, i, j, k, len, matches, ref, ref1, v + matches = 0 + for (i = 0; i < afe.length; ++i) { + el = afe[i] + if (el.type === TYPE_AFE_MARKER) { + break + } + if (el.name === new_el.name && el.namespace === new_el.namespace) { + attrs_match = true + for (k in el.attrs) { + v = el.attrs[k] + if (new_el.attrs[k] !== v) { + attrs_match = false + break + } + } + if (attrs_match) { + for (k in new_el.attrs) { + v = new_el.attrs[k] + if (el.attrs[k] !== v) { + attrs_match = false + break + } + } + } + if (attrs_match) { + matches += 1 + if (matches === 3) { + afe.splice(i, 1) + break + } + } + } + } + afe.unshift(new_el) + } + + afe_push_marker = function () { + afe.unshift(new_afe_marker()) + } + + // the functions below impliment the Tree Contstruction algorithm + // http://www.w3.org/TR/html5/syntax.html#tree-construction + + // But first... the helpers + template_tag_is_open = function () { + var i, el + for (i = 0; i < open_els.length; ++i) { + el = open_els[i] + if (el.name === 'template' && el.namespace === NS_HTML) { + return true + } + } + return false + } + is_in_scope_x = function (tag_name, scope, namespace) { + var i, el + for (i = 0; i < open_els.length; ++i) { + el = open_els[i] + if (el.name === tag_name && (namespace === null || namespace === el.namespace)) { + return true + } + if (scope[el.name] === el.namespace) { + return false + } + } + return false + } + is_in_scope_x_y = function (tag_name, scope, scope2, namespace) { + var i, el + for (i = 0; i < open_els.length; ++i) { + el = open_els[i] + if (el.name === tag_name && (namespace === null || namespace === el.namespace)) { + return true + } + if (scope[el.name] === el.namespace) { + return false + } + if (scope2[el.name] === el.namespace) { + return false + } + } + return false + } + standard_scopers = { + applet: NS_HTML, caption: NS_HTML, html: NS_HTML, table: NS_HTML, + td: NS_HTML, th: NS_HTML, marquee: NS_HTML, object: NS_HTML, + template: NS_HTML, + + mi: NS_MATHML, mo: NS_MATHML, mn: NS_MATHML, ms: NS_MATHML, + mtext: NS_MATHML, 'annotation-xml': NS_MATHML, + + foreignObject: NS_SVG, desc: NS_SVG, title: NS_SVG + } + button_scopers = { button: NS_HTML } + li_scopers = { ol: NS_HTML, ul: NS_HTML } + table_scopers = { html: NS_HTML, table: NS_HTML, template: NS_HTML } + is_in_scope = function (tag_name, namespace) { + if (namespace == null) { + namespace = null + } + return is_in_scope_x(tag_name, standard_scopers, namespace) + } + is_in_button_scope = function (tag_name, namespace) { + if (namespace == null) { + namespace = null + } + return is_in_scope_x_y(tag_name, standard_scopers, button_scopers, namespace) + } + is_in_table_scope = function (tag_name, namespace) { + if (namespace == null) { + namespace = null + } + return is_in_scope_x(tag_name, table_scopers, namespace) + } + // aka is_in_list_item_scope + is_in_li_scope = function (tag_name, namespace) { + if (namespace == null) { + namespace = null + } + return is_in_scope_x_y(tag_name, standard_scopers, li_scopers, namespace) + } + is_in_select_scope = function (tag_name, namespace) { + var i, t + if (namespace == null) { + namespace = null + } + for (i = 0; i < open_els.length; ++i) { + t = open_els[i] + if (t.name === tag_name && (namespace === null || namespace === t.namespace)) { + return true + } + if (t.namespace !== NS_HTML && t.name !== 'optgroup' && t.name !== 'option') { + return false + } + } + return false + } + // this checks for a particular element, not by name + // this requires a namespace match + el_is_in_scope = function (needle) { + var i + for (i = 0; i < open_els.length; ++i) { + el = open_els[i] + if (el === needle) { + return true + } + if (standard_scopers[el.name] === el.namespace) { + return false + } + } + return false + } + + clear_to_table_stopers = { + 'table': true, + 'template': true, + 'html': true + } + clear_stack_to_table_context = function () { + while (true) { + if (clear_to_table_stopers[open_els[0].name] != null) { + break + } + open_els.shift() + } + } + clear_to_table_body_stopers = { + tbody: NS_HTML, + tfoot: NS_HTML, + thead: NS_HTML, + template: NS_HTML, + html: NS_HTML + } + clear_stack_to_table_body_context = function () { + while (true) { + if (clear_to_table_body_stopers[open_els[0].name] === open_els[0].namespace) { + break + } + open_els.shift() + } + } + clear_to_table_row_stopers = { + 'tr': true, + 'template': true, + 'html': true + } + clear_stack_to_table_row_context = function () { + while (true) { + if (clear_to_table_row_stopers[open_els[0].name] != null) { + break + } + open_els.shift() + } + } + clear_afe_to_marker = function () { + var el + while (true) { + if (!(afe.length > 0)) { // this happens in fragment case, ?spec error + return + } + el = afe.shift() + if (el.type === TYPE_AFE_MARKER) { + return + } + } + } + + // 8.2.3.1 ... + // http://www.w3.org/TR/html5/syntax.html#reset-the-insertion-mode-appropriately + reset_ins_mode = function () { + var ancestor, ancestor_i, last, node, node_i + // 1. Let last be false. + last = false + // 2. Let node be the last node in the stack of open elements. + node_i = 0 + node = open_els[node_i] + // 3. Loop: If node is the first node in the stack of open elements, + // then set last to true, and, if the parser was originally created as + // part of the HTML fragment parsing algorithm (fragment case) set node + // to the context element. + while (true) { + if (node_i === open_els.length - 1) { + last = true + if (flag_fragment_parsing) { + node = context_element + } + } + // 4. If node is a select element, run these substeps: + if (node.name === 'select' && node.namespace === NS_HTML) { + // 1. If last is true, jump to the step below labeled done. + if (!last) { + // 2. Let ancestor be node. + ancestor_i = node_i + ancestor = node + // 3. Loop: If ancestor is the first node in the stack of + // open elements, jump to the step below labeled done. + while (true) { + if (ancestor_i === open_els.length - 1) { + break + } + // 4. Let ancestor be the node before ancestor in the stack + // of open elements. + ancestor_i += 1 + ancestor = open_els[ancestor_i] + // 5. If ancestor is a template node, jump to the step below + // labeled done. + if (ancestor.name === 'template' && ancestor.namespace === NS_HTML) { + break + } + // 6. If ancestor is a table node, switch the insertion mode + // to "in select in table" and abort these steps. + if (ancestor.name === 'table' && ancestor.namespace === NS_HTML) { + ins_mode = ins_mode_in_select_in_table + return + } + // 7. Jump back to the step labeled loop. + } + } + // 8. Done: Switch the insertion mode to "in select" and abort + // these steps. + ins_mode = ins_mode_in_select + return + } + // 5. If node is a td or th element and last is false, then switch + // the insertion mode to "in cell" and abort these steps. + if ((node.name === 'td' || node.name === 'th') && node.namespace === NS_HTML && last === false) { + ins_mode = ins_mode_in_cell + return + } + // 6. If node is a tr element, then switch the insertion mode to "in + // row" and abort these steps. + if (node.name === 'tr' && node.namespace === NS_HTML) { + ins_mode = ins_mode_in_row + return + } + // 7. If node is a tbody, thead, or tfoot element, then switch the + // insertion mode to "in table body" and abort these steps. + if ((node.name === 'tbody' || node.name === 'thead' || node.name === 'tfoot') && node.namespace === NS_HTML) { + ins_mode = ins_mode_in_table_body + return + } + // 8. If node is a caption element, then switch the insertion mode + // to "in caption" and abort these steps. + if (node.name === 'caption' && node.namespace === NS_HTML) { + ins_mode = ins_mode_in_caption + return + } + // 9. If node is a colgroup element, then switch the insertion mode + // to "in column group" and abort these steps. + if (node.name === 'colgroup' && node.namespace === NS_HTML) { + ins_mode = ins_mode_in_column_group + return + } + // 10. If node is a table element, then switch the insertion mode to + // "in table" and abort these steps. + if (node.name === 'table' && node.namespace === NS_HTML) { + ins_mode = ins_mode_in_table + return + } + // 11. If node is a template element, then switch the insertion mode + // to the current template insertion mode and abort these steps. + if (node.name === 'template' && node.namespace === NS_HTML) { + ins_mode = template_ins_modes[0] + return + } + // 12. If node is a head element and last is true, then switch the + // insertion mode to "in body" ("in body"! not "in head"!) and abort + // these steps. (fragment case) + if (node.name === 'head' && node.namespace === NS_HTML && last) { + ins_mode = ins_mode_in_body + return + } + // 13. If node is a head element and last is false, then switch the + // insertion mode to "in head" and abort these steps. + if (node.name === 'head' && node.namespace === NS_HTML && last === false) { + ins_mode = ins_mode_in_head + return + } + // 14. If node is a body element, then switch the insertion mode to + // "in body" and abort these steps. + if (node.name === 'body' && node.namespace === NS_HTML) { + ins_mode = ins_mode_in_body + return + } + // 15. If node is a frameset element, then switch the insertion mode + // to "in frameset" and abort these steps. (fragment case) + if (node.name === 'frameset' && node.namespace === NS_HTML) { + ins_mode = ins_mode_in_frameset + return + } + // 16. If node is an html element, run these substeps: + if (node.name === 'html' && node.namespace === NS_HTML) { + // 1. If the head element pointer is null, switch the insertion + // mode to "before head" and abort these steps. (fragment case) + if (head_element_pointer === null) { + ins_mode = ins_mode_before_head + } else { + // 2. Otherwise, the head element pointer is not null, + // switch the insertion mode to "after head" and abort these + // steps. + ins_mode = ins_mode_after_head + } + return + } + // 17. If last is true, then switch the insertion mode to "in body" + // and abort these steps. (fragment case) + if (last) { + ins_mode = ins_mode_in_body + return + } + // 18. Let node now be the node before node in the stack of open + // elements. + node_i += 1 + node = open_els[node_i] + // 19. Return to the step labeled loop. + } + } + + // 8.2.3.2 + + // http://www.w3.org/TR/html5/syntax.html#adjusted-current-node + adjusted_current_node = function () { + if (open_els.length === 1 && flag_fragment_parsing) { + return context_element + } + return open_els[0] + } + + // http://www.w3.org/TR/html5/syntax.html#reconstruct-the-active-formatting-elements + // this implementation is structured (mostly) as described at the link above. + // capitalized comments are the "labels" described at the link above. + reconstruct_afe = function () { + var el, i + if (afe.length === 0) { + return + } + if (afe[0].type === TYPE_AFE_MARKER || open_els.indexOf(afe[0]) >= 0) { + return + } + // Rewind + i = 0 + while (true) { + if (i === afe.length - 1) { + break + } + i += 1 + if (afe[i].type === TYPE_AFE_MARKER || open_els.indexOf(afe[i]) >= 0) { + i -= 1 // Advance + break + } + } + // Create + while (true) { + el = insert_html_element(afe[i].token) + afe[i] = el + if (i === 0) { + break + } + i -= 1 // Advance + } + } + + // http://www.w3.org/TR/html5/syntax.html#adoption-agency-algorithm + // adoption agency algorithm + // overview here: + // http://www.w3.org/TR/html5/syntax.html#misnested-tags:-b-i-/b-/i + // http://www.w3.org/TR/html5/syntax.html#misnested-tags:-b-p-/b-/p + // http://www.w3.org/TR/html5/syntax.html#unclosed-formatting-elements + adoption_agency = function (subject) { + var aa, ab, ac, ad, ae, af, bookmark, c, ca, dest, el, fb, fb_of_open_els, fe, fe_of_afe, fe_of_open_els, i, in_afe, in_open_els, inner, j, l, last_node, len, len1, len10, len11, len12, len13, len14, len15, len16, len17, len2, len3, len4, len5, len6, len7, len8, len9, m, n, new_node, node, node_above, node_in_afe, node_next, o, outer, q, r, ref, ref1, s, t, u, w, y, z +// this block implements tha W3C spec +// # 1. If the current node is an HTML element whose tag name is subject, +// # then run these substeps: +// # +// # 1. Let element be the current node. +// # +// # 2. Pop element off the stack of open elements. +// # +// # 3. If element is also in the list of active formatting elements, +// # remove the element from the list. +// # +// # 4. Abort the adoption agency algorithm. +// if open_els[0].name is subject and open_els[0].namespace is NS_HTML +// el = open_els.shift() +// # remove it from the list of active formatting elements (if found) +// for t, i in afe +// if t is el +// afe.splice i, 1 +// break +// return +// WHATWG: https://html.spec.whatwg.org/multipage/syntax.html#adoption-agency-algorithm + // If the current node is an HTML element whose tag name is subject, and + // the current node is not in the list of active formatting elements, + // then pop the current node off the stack of open elements, and abort + // these steps. + if (open_els[0].name === subject && open_els[0].namespace === NS_HTML) { + // remove it from the list of active formatting elements (if found) + in_afe = false + for (i = 0; i < afe.length; ++i) { + el = afe[i] + if (el === open_els[0]) { + in_afe = true + break + } + } + if (!in_afe) { + open_els.shift() + return + } + // fall through + } +// END WHATWG + outer = 0 + while (true) { + if (outer >= 8) { + return + } + outer += 1 + // 5. Let formatting element be the last element in the list of + // active formatting elements that: is between the end of the list + // and the last scope marker in the list, if any, or the start of + // the list otherwise, and has the tag name subject. + fe = null + for (fe_of_afe = 0; fe_of_afe < afe.length; ++fe_of_afe) { + t = afe[fe_of_afe] + if (t.type === TYPE_AFE_MARKER) { + break + } + if (t.name === subject) { + fe = t + break + } + } + // If there is no such element, then abort these steps and instead + // act as described in the "any other end tag" entry above. + if (fe === null) { + in_body_any_other_end_tag(subject) + return + } + // 6. If formatting element is not in the stack of open elements, + // then this is a parse error; remove the element from the list, and + // abort these steps. + in_open_els = false + for (fe_of_open_els = 0; fe_of_open_els < open_els.length; ++fe_of_open_els) { + t = open_els[fe_of_open_els] + if (t === fe) { + in_open_els = true + break + } + } + if (!in_open_els) { + parse_error() + // "remove it from the list" must mean afe, since it's not in open_els + afe.splice(fe_of_afe, 1) + return + } + // 7. If formatting element is in the stack of open elements, but + // the element is not in scope, then this is a parse error; abort + // these steps. + if (!el_is_in_scope(fe)) { + parse_error() + return + } + // 8. If formatting element is not the current node, this is a parse + // error. (But do not abort these steps.) + if (open_els[0] !== fe) { + parse_error() + // continue + } + // 9. Let furthest block be the topmost node in the stack of open + // elements that is lower in the stack than formatting element, and + // is an element in the special category. There might not be one. + fb = null + fb_of_open_els = null + for (i = 0; i < open_els.length; ++i) { + t = open_els[i] + if (t === fe) { + break + } + if (el_is_special(t)) { + fb = t + fb_of_open_els = i + // and continue, to see if there's one that's more "topmost" + } + } + // 10. If there is no furthest block, then the UA must first pop all + // the nodes from the bottom of the stack of open elements, from the + // current node up to and including formatting element, then remove + // formatting element from the list of active formatting elements, + // and finally abort these steps. + if (fb === null) { + while (true) { + t = open_els.shift() + if (t === fe) { + afe.splice(fe_of_afe, 1) + return + } + } + } + // 11. Let common ancestor be the element immediately above + // formatting element in the stack of open elements. + ca = open_els[fe_of_open_els + 1] // common ancestor + + node_above = open_els[fb_of_open_els + 1] // next node if node isn't in open_els anymore + // 12. Let a bookmark note the position of formatting element in the list of active formatting elements relative to the elements on either side of it in the list. + bookmark = new_aaa_bookmark() + for (i = 0; i < afe.length; ++i) { + t = afe[i] + if (t === fe) { + afe.splice(i, 0, bookmark) + break + } + } + node = last_node = fb + inner = 0 + while (true) { + inner += 1 + // 3. Let node be the element immediately above node in the + // stack of open elements, or if node is no longer in the stack + // of open elements (e.g. because it got removed by this + // algorithm), the element that was immediately above node in + // the stack of open elements before node was removed. + node_next = null + for (i = 0; i < open_els.length; ++i) { + t = open_els[i] + if (t === node) { + node_next = open_els[i + 1] + break + } + } + node = node_next != null ? node_next : node_above + // TODO make sure node_above gets re-set if/when node is removed from open_els + + // 4. If node is formatting element, then go to the next step in + // the overall algorithm. + if (node === fe) { + break + } + // 5. If inner loop counter is greater than three and node is in + // the list of active formatting elements, then remove node from + // the list of active formatting elements. + node_in_afe = false + for (i = 0; i < afe.length; ++i) { + t = afe[i] + if (t === node) { + if (inner > 3) { + afe.splice(i, 1) + } else { + node_in_afe = true + } + break + } + } + // 6. If node is not in the list of active formatting elements, + // then remove node from the stack of open elements and then go + // back to the step labeled inner loop. + if (!node_in_afe) { + for (i = 0; i < open_els.length; ++i) { + t = open_els[i] + if (t === node) { + node_above = open_els[i + 1] + open_els.splice(i, 1) + break + } + } + continue + } + // 7. create an element for the token for which the element node + // was created, in the HTML namespace, with common ancestor as + // the intended parent; replace the entry for node in the list + // of active formatting elements with an entry for the new + // element, replace the entry for node in the stack of open + // elements with an entry for the new element, and let node be + // the new element. + new_node = token_to_element(node.token, NS_HTML, ca) + for (i = 0; i < afe.length; ++i) { + t = afe[i] + if (t === node) { + afe[i] = new_node + break + } + } + for (i = 0; i < open_els.length; ++i) { + t = open_els[i] + if (t === node) { + node_above = open_els[i + 1] + open_els[i] = new_node + break + } + } + node = new_node + // 8. If last node is furthest block, then move the + // aforementioned bookmark to be immediately after the new node + // in the list of active formatting elements. + if (last_node === fb) { + for (i = 0; i < afe.length; ++i) { + t = afe[i] + if (t === bookmark) { + afe.splice(i, 1) + break + } + } + for (i = 0; i < afe.length; ++i) { + t = afe[i] + if (t === node) { + // "after" means lower + afe.splice(i, 0, bookmark) // "after as <- + break + } + } + } + // 9. Insert last node into node, first removing it from its + // previous parent node if any. + if (last_node.parent != null) { + for (i = 0; i < last_node.parent.children.length; ++i) { + c = last_node.parent.children[i] + if (c === last_node) { + last_node.parent.children.splice(i, 1) + break + } + } + } + node.children.push(last_node) + last_node.parent = node + // 10. Let last node be node. + last_node = node + // 11. Return to the step labeled inner loop. + } + // 14. Insert whatever last node ended up being in the previous step + // at the appropriate place for inserting a node, but using common + // ancestor as the override target. + + // In the case where fe is immediately followed by fb: + // * inner loop exits out early (node==fe) + // * last_node is fb + // * last_node is still in the tree (not a duplicate) + if (last_node.parent != null) { + for (i = 0; i < last_node.parent.children.length; ++i) { + c = last_node.parent.children[i] + if (c === last_node) { + last_node.parent.children.splice(i, 1) + break + } + } + } + // can't use standard insert token thing, because it's already in + // open_els and must stay at it's current position in open_els + dest = adjusted_insertion_location(ca) + dest[0].children.splice(dest[1], 0, last_node) + last_node.parent = dest[0] + // 15. Create an element for the token for which formatting element + // was created, in the HTML namespace, with furthest block as the + // intended parent. + new_element = token_to_element(fe.token, NS_HTML, fb) + // 16. Take all of the child nodes of furthest block and append them + // to the element created in the last step. + while (fb.children.length) { + t = fb.children.shift() + t.parent = new_element + new_element.children.push(t) + } + // 17. Append that new element to furthest block. + new_element.parent = fb + fb.children.push(new_element) + // 18. Remove formatting element from the list of active formatting + // elements, and insert the new element into the list of active + // formatting elements at the position of the aforementioned + // bookmark. + for (i = 0; i < afe.length; ++i) { + t = afe[i] + if (t === fe) { + afe.splice(i, 1) + break + } + } + for (i = 0; i < afe.length; ++i) { + t = afe[i] + if (t === bookmark) { + afe[i] = new_element + break + } + } + // 19. Remove formatting element from the stack of open elements, + // and insert the new element into the stack of open elements + // immediately below the position of furthest block in that stack. + for (i = 0; i < open_els.length; ++i) { + t = open_els[i] + if (t === fe) { + open_els.splice(i, 1) + break + } + } + for (i = 0; i < open_els.length; ++i) { + t = open_els[i] + if (t === fb) { + open_els.splice(i, 0, new_element) + break + } + } + // 20. Jump back to the step labeled outer loop. + } + } + + // http://www.w3.org/TR/html5/syntax.html#close-a-p-element + close_p_element = function () { + generate_implied_end_tags('p') // arg is exception + if (!(open_els[0].name === 'p' && open_els[0].namespace === NS_HTML)) { + parse_error() + } + while (open_els.length > 1) { // just in case + el = open_els.shift() + if (el.name === 'p' && el.namespace === NS_HTML) { + return + } + } + } + close_p_if_in_button_scope = function () { + if (is_in_button_scope('p', NS_HTML)) { + close_p_element() + } + } + + // http://www.w3.org/TR/html5/syntax.html#insert-a-character + // aka insert_a_character = function (t) { + insert_character = function (t) { + var dest, prev + dest = adjusted_insertion_location() + // fixfull check for Document node + if (dest[1] > 0) { + prev = dest[0].children[dest[1] - 1] + if (prev.type === TYPE_TEXT) { + prev.text += t.text + return + } + } + dest[0].children.splice(dest[1], 0, t) + t.parent = dest[0] + } + + // 8.2.5 http://www.w3.org/TR/html5/syntax.html#tree-construction + process_token = function (t) { + var acn + acn = adjusted_current_node() + if (acn == null) { + ins_mode(t) + return + } + if (acn.namespace === NS_HTML) { + ins_mode(t) + return + } + if (is_mathml_text_integration_point(acn)) { + if (t.type === TYPE_START_TAG && !(t.name === 'mglyph' || t.name === 'malignmark')) { + ins_mode(t) + return + } + if (t.type === TYPE_TEXT) { + ins_mode(t) + return + } + } + if (acn.namespace === NS_MATHML && acn.name === 'annotation-xml' && t.type === TYPE_START_TAG && t.name === 'svg') { + ins_mode(t) + return + } + if (is_html_integration(acn)) { + if (t.type === TYPE_START_TAG || t.type === TYPE_TEXT) { + ins_mode(t) + return + } + } + if (t.type === TYPE_EOF) { + ins_mode(t) + return + } + in_foreign_content(t) + } + + // 8.2.5.1 + // http://www.w3.org/TR/html5/syntax.html#creating-and-inserting-nodes + // http://www.w3.org/TR/html5/syntax.html#appropriate-place-for-inserting-a-node + adjusted_insertion_location = function (override_target) { + var c, el, i, j, l, last_table, last_table_i, last_template, last_template_i, len, len1, len2, m, previous_element, ref, target, target_i + // 1. If there was an override target specified, then let target be the + // override target. + if (override_target != null) { + target = override_target + } else { // Otherwise, let target be the current node. + target = open_els[0] + } + // 2. Determine the adjusted insertion location using the first matching + // steps from the following list: + // + // If foster parenting is enabled and target is a table, tbody, tfoot, + // thead, or tr element Foster parenting happens when content is + // misnested in tables. + if (flag_foster_parenting && foster_parenting_targets[target.name] === target.namespace) { + while (true) { // once. this is here so we can ``break`` to "abort these substeps" + // 1. Let last template be the last template element in the + // stack of open elements, if any. + last_template = null + last_template_i = null + for (i = 0; i < open_els.length; ++i) { + el = open_els[i] + if (el.name === 'template' && el.namespace === NS_HTML) { + last_template = el + last_template_i = i + break + } + } + // 2. Let last table be the last table element in the stack of + // open elements, if any. + last_table = null + last_table_i + for (i = 0; i < open_els.length; ++i) { + el = open_els[i] + if (el.name === 'table' && el.namespace === NS_HTML) { + last_table = el + last_table_i = i + break + } + } + // 3. If there is a last template and either there is no last + // table, or there is one, but last template is lower (more + // recently added) than last table in the stack of open + // elements, then: let adjusted insertion location be inside + // last template's template contents, after its last child (if + // any), and abort these substeps. + if (last_template && (last_table === null || last_template_i < last_table_i)) { + target = last_template // fixfull should be it's contents + target_i = target.children.length + break + } + // 4. If there is no last table, then let adjusted insertion + // location be inside the first element in the stack of open + // elements (the html element), after its last child (if any), + // and abort these substeps. (fragment case) + if (last_table === null) { + // this is odd + target = open_els[open_els.length - 1] + target_i = target.children.length + break + } + // 5. If last table has a parent element, then let adjusted + // insertion location be inside last table's parent element, + // immediately before last table, and abort these substeps. + if (last_table.parent != null) { + for (i = 0; i < last_table.parent.children.length; ++i) { + c = last_table.parent.children[i] + if (c === last_table) { + target = last_table.parent + target_i = i + break + } + } + break + } + // 6. Let previous element be the element immediately above last + // table in the stack of open elements. + // + // huh? how could it not have a parent? + previous_element = open_els[last_table_i + 1] + // 7. Let adjusted insertion location be inside previous + // element, after its last child (if any). + target = previous_element + target_i = target.children.length + // Note: These steps are involved in part because it's possible + // for elements, the table element in this case in particular, + // to have been moved by a script around in the DOM, or indeed + // removed from the DOM entirely, after the element was inserted + // by the parser. + break // don't really loop + } + } else { + // Otherwise Let adjusted insertion location be inside target, after + // its last child (if any). + target_i = target.children.length + } + + // 3. If the adjusted insertion location is inside a template element, + // let it instead be inside the template element's template contents, + // after its last child (if any). + // fixfull (template) + + // 4. Return the adjusted insertion location. + return [target, target_i] + } + + // http://www.w3.org/TR/html5/syntax.html#create-an-element-for-the-token + // aka create_an_element_for_token + token_to_element = function (t, namespace, intended_parent) { + var a, attrs, el, i + // convert attributes into a hash + attrs = {} + for (i = 0; i < t.attrs_a.length; ++i) { + a = t.attrs_a[i] + attrs[a[0]] = a[1] // TODO check what to do with dupilcate attrs + } + el = new Node(TYPE_TAG, {name: t.name, namespace: namespace, attrs: attrs, token: t}) + + // TODO 2. If the newly created element has an xmlns attribute in the + // XMLNS namespace whose value is not exactly the same as the element's + // namespace, that is a parse error. Similarly, if the newly created + // element has an xmlns:xlink attribute in the XMLNS namespace whose + // value is not the XLink Namespace, that is a parse error. + + // fixfull: the spec says stuff about form pointers and ownerDocument + + return el + } + + // http://www.w3.org/TR/html5/syntax.html#insert-a-foreign-element + insert_foreign_element = function (token, namespace) { + var ail, ail_el, ail_i, el + ail = adjusted_insertion_location() + ail_el = ail[0] + ail_i = ail[1] + el = token_to_element(token, namespace, ail_el) + // TODO skip this next step if it's broken (eg ail_el is document with child already) + el.parent = ail_el + ail_el.children.splice(ail_i, 0, el) + open_els.unshift(el) + return el + } + // http://www.w3.org/TR/html5/syntax.html#insert-an-html-element + insert_html_element = function (token) { + return insert_foreign_element(token, NS_HTML) + } + + // http://www.w3.org/TR/html5/syntax.html#insert-a-comment + // position should be [node, index_within_children] + insert_comment = function (t, position) { + if (position == null) { + position = adjusted_insertion_location() + } + position[0].children.splice(position[1], 0, t) + return + } + + // 8.2.5.2 + // http://www.w3.org/TR/html5/syntax.html#generic-raw-text-element-parsing-algorithm + parse_generic_raw_text = function (t) { + insert_html_element(t) + tok_state = tok_state_rawtext + original_ins_mode = ins_mode + ins_mode = ins_mode_text + } + parse_generic_rcdata_text = function (t) { + insert_html_element(t) + tok_state = tok_state_rcdata + original_ins_mode = ins_mode + ins_mode = ins_mode_text + } + + // 8.2.5.3 http://www.w3.org/TR/html5/syntax.html#closing-elements-that-have-implied-end-tags + // http://www.w3.org/TR/html5/syntax.html#generate-implied-end-tags + generate_implied_end_tags = function (except) { + if (except == null) { + except = null + } + while (end_tag_implied[open_els[0].name] === open_els[0].namespace && open_els[0].name !== except) { + open_els.shift() + } + } + + // 8.2.5.4 The rules for parsing tokens in HTML content + // http://www.w3.org/TR/html5/syntax.html#parsing-main-inhtml + + // 8.2.5.4.1 The "initial" insertion mode + // http://www.w3.org/TR/html5/syntax.html#the-initial-insertion-mode + is_quirks_yes_doctype = function (t) { + var i, p, pi + if (t.flag('force-quirks')) { + return true + } + if (t.name !== 'html') { + return true + } + if (t.public_identifier != null) { + pi = t.public_identifier.toLowerCase() + for (i = 0; i < quirks_yes_pi_prefixes.length; ++i) { + p = quirks_yes_pi_prefixes[i] + if (pi.substr(0, p.length) === p) { + return true + } + } + if (pi === '-//w3o//dtd w3 html strict 3.0//en//' || pi === '-/w3c/dtd html 4.0 transitional/en' || pi === 'html') { + return true + } + } + if (t.system_identifier != null) { + if (t.system_identifier.toLowerCase() === 'http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd') { + return true + } + } else if (t.public_identifier != null) { + // already did this: pi = t.public_identifier.toLowerCase() + if (pi.substr(0, 32) === '-//w3c//dtd html 4.01 frameset//' || pi.substr(0, 36) === '-//w3c//dtd html 4.01 transitional//') { + return true + } + } + return false + } + is_quirks_limited_doctype = function (t) { + var pi + if (t.public_identifier != null) { + pi = t.public_identifier.toLowerCase() + if (pi.substr(0, 32) === '-//w3c//dtd xhtml 1.0 frameset//' || pi.substr(0, 36) === '-//w3c//dtd xhtml 1.0 transitional//') { + return true + } + if (t.system_identifier != null) { + if (pi.substr(0, 32) === '-//w3c//dtd html 4.01 frameset//' || pi.substr(0, 36) === '-//w3c//dtd html 4.01 transitional//') { + return true + } + } + } + return false + } + ins_mode_initial = function (t) { + if (is_space_tok(t)) { + return + } + if (t.type === TYPE_COMMENT) { + // ?fixfull + doc.children.push(t) + return + } + if (t.type === TYPE_DOCTYPE) { + // fixfull syntax error from first paragraph and following bullets + // fixfull set doc.doctype + // fixfull is the "not an iframe srcdoc" thing relevant? + if (is_quirks_yes_doctype(t)) { + doc.flag('quirks mode', QUIRKS_YES) + } else if (is_quirks_limited_doctype(t)) { + doc.flag('quirks mode', QUIRKS_LIMITED) + } + doc.children.push(t) + ins_mode = ins_mode_before_html + return + } + // Anything else + // fixfull not iframe srcdoc? + parse_error() + doc.flag('quirks mode', QUIRKS_YES) + ins_mode = ins_mode_before_html + process_token(t) + } + + // 8.2.5.4.2 http://www.w3.org/TR/html5/syntax.html#the-before-html-insertion-mode + ins_mode_before_html = function (t) { + if (t.type === TYPE_DOCTYPE) { + parse_error() + return + } + if (t.type === TYPE_COMMENT) { + doc.children.push(t) + return + } + if (is_space_tok(t)) { + return + } + if (t.type === TYPE_START_TAG && t.name === 'html') { + el = token_to_element(t, NS_HTML, doc) + doc.children.push(el) + el.document = doc + open_els.unshift(el) + // fixfull (big paragraph in spec about manifest, fragment, urls, etc) + ins_mode = ins_mode_before_head + return + } + if (t.type === TYPE_END_TAG) { + if (t.name === 'head' || t.name === 'body' || t.name === 'html' || t.name === 'br') { + // fall through to "anything else" + } else { + parse_error() + return + } + } + // Anything else + el = token_to_element(new_open_tag('html'), NS_HTML, doc) + doc.children.push(el) + el.document = doc + open_els.unshift(el) + // ?fixfull browsing context + ins_mode = ins_mode_before_head + process_token(t) + } + + // 8.2.5.4.3 http://www.w3.org/TR/html5/syntax.html#the-before-head-insertion-mode + ins_mode_before_head = function (t) { + var el + if (is_space_tok(t)) { + return + } + if (t.type === TYPE_COMMENT) { + insert_comment(t) + return + } + if (t.type === TYPE_DOCTYPE) { + parse_error() + return + } + if (t.type === TYPE_START_TAG && t.name === 'html') { + ins_mode_in_body(t) + return + } + if (t.type === TYPE_START_TAG && t.name === 'head') { + el = insert_html_element(t) + head_element_pointer = el + ins_mode = ins_mode_in_head + return + } + if (t.type === TYPE_END_TAG) { + if (t.name === 'head' || t.name === 'body' || t.name === 'html' || t.name === 'br') { + // fall through to Anything else below + } else { + parse_error() + return + } + } + // Anything else + el = insert_html_element(new_open_tag('head')) + head_element_pointer = el + ins_mode = ins_mode_in_head + process_token(t) + } + + // 8.2.5.4.4 http://www.w3.org/TR/html5/syntax.html#parsing-main-inhead + ins_mode_in_head_else = function (t) { // factored out for same-as-spec flow control + open_els.shift() // spec says this will be a 'head' node + ins_mode = ins_mode_after_head + process_token(t) + } + ins_mode_in_head = function (t) { + var ail, el + if (t.type === TYPE_TEXT && (t.text === "\t" || t.text === "\n" || t.text === "\u000c" || t.text === ' ')) { + insert_character(t) + return + } + if (t.type === TYPE_COMMENT) { + insert_comment(t) + return + } + if (t.type === TYPE_DOCTYPE) { + parse_error() + return + } + if (t.type === TYPE_START_TAG && t.name === 'html') { + ins_mode_in_body(t) + return + } + if (t.type === TYPE_START_TAG && (t.name === 'base' || t.name === 'basefont' || t.name === 'bgsound' || t.name === 'link')) { + el = insert_html_element(t) + open_els.shift() + t.acknowledge_self_closing() + return + } + if (t.type === TYPE_START_TAG && t.name === 'meta') { + el = insert_html_element(t) + open_els.shift() + t.acknowledge_self_closing() + // fixfull encoding stuff + return + } + if (t.type === TYPE_START_TAG && t.name === 'title') { + parse_generic_rcdata_text(t) + return + } + if (t.type === TYPE_START_TAG && ((t.name === 'noscript' && flag_scripting) || t.name === 'noframes' || t.name === 'style')) { + parse_generic_raw_text(t) + return + } + if (t.type === TYPE_START_TAG && t.name === 'noscript' && flag_scripting === false) { + insert_html_element(t) + ins_mode = ins_mode_in_head_noscript + return + } + if (t.type === TYPE_START_TAG && t.name === 'script') { + ail = adjusted_insertion_location() + el = token_to_element(t, NS_HTML, ail) + el.flag('parser-inserted', true) + // fixfull frament case + ail[0].children.splice(ail[1], 0, el) + open_els.unshift(el) + tok_state = tok_state_script_data + original_ins_mode = ins_mode // make sure orig... is defined + ins_mode = ins_mode_text + return + } + if (t.type === TYPE_END_TAG && t.name === 'head') { + open_els.shift() // will be a head element... spec says so + ins_mode = ins_mode_after_head + return + } + if (t.type === TYPE_END_TAG && (t.name === 'body' || t.name === 'html' || t.name === 'br')) { + ins_mode_in_head_else(t) + return + } + if (t.type === TYPE_START_TAG && t.name === 'template') { + insert_html_element(t) + afe_push_marker() + flag_frameset_ok = false + ins_mode = ins_mode_in_template + template_ins_modes.unshift(ins_mode_in_template) + return + } + if (t.type === TYPE_END_TAG && t.name === 'template') { + if (template_tag_is_open()) { + generate_implied_end_tags + if (open_els[0].name !== 'template') { + parse_error() + } + while (true) { + el = open_els.shift() + if (el.name === 'template' && el.namespace === NS_HTML) { + break + } + } + clear_afe_to_marker() + template_ins_modes.shift() + reset_ins_mode() + } else { + parse_error() + } + return + } + if ((t.type === TYPE_START_TAG && t.name === 'head') || t.type === TYPE_END_TAG) { + parse_error() + return + } + ins_mode_in_head_else(t) + } + + // 8.2.5.4.5 http://www.w3.org/TR/html5/syntax.html#parsing-main-inheadnoscript + ins_mode_in_head_noscript_else = function (t) { + parse_error() + open_els.shift() + ins_mode = ins_mode_in_head + process_token(t) + } + ins_mode_in_head_noscript = function (t) { + if (t.type === TYPE_DOCTYPE) { + parse_error() + return + } + if (t.type === TYPE_START_TAG && t.name === 'html') { + ins_mode_in_body(t) + return + } + if (t.type === TYPE_END_TAG && t.name === 'noscript') { + open_els.shift() + ins_mode = ins_mode_in_head + return + } + if (is_space_tok(t) || t.type === TYPE_COMMENT || (t.type === TYPE_START_TAG && (t.name === 'basefont' || t.name === 'bgsound' || t.name === 'link' || t.name === 'meta' || t.name === 'noframes' || t.name === 'style'))) { + ins_mode_in_head(t) + return + } + if (t.type === TYPE_END_TAG && t.name === 'br') { + ins_mode_in_head_noscript_else(t) + return + } + if ((t.type === TYPE_START_TAG && (t.name === 'head' || t.name === 'noscript')) || t.type === TYPE_END_TAG) { + parse_error() + return + } + // Anything else + ins_mode_in_head_noscript_else(t) + } + + // 8.2.5.4.6 http://www.w3.org/TR/html5/syntax.html#the-after-head-insertion-mode + ins_mode_after_head_else = function (t) { + var body_tok + body_tok = new_open_tag('body') + insert_html_element(body_tok) + ins_mode = ins_mode_in_body + process_token(t) + } + ins_mode_after_head = function (t) { + var el, i, j, len + if (is_space_tok(t)) { + insert_character(t) + return + } + if (t.type === TYPE_COMMENT) { + insert_comment(t) + return + } + if (t.type === TYPE_DOCTYPE) { + parse_error() + return + } + if (t.type === TYPE_START_TAG && t.name === 'html') { + ins_mode_in_body(t) + return + } + if (t.type === TYPE_START_TAG && t.name === 'body') { + insert_html_element(t) + flag_frameset_ok = false + ins_mode = ins_mode_in_body + return + } + if (t.type === TYPE_START_TAG && t.name === 'frameset') { + insert_html_element(t) + ins_mode = ins_mode_in_frameset + return + } + if (t.type === TYPE_START_TAG && (t.name === 'base' || t.name === 'basefont' || t.name === 'bgsound' || t.name === 'link' || t.name === 'meta' || t.name === 'noframes' || t.name === 'script' || t.name === 'style' || t.name === 'template' || t.name === 'title')) { + parse_error() + open_els.unshift(head_element_pointer) + ins_mode_in_head(t) + for (i = 0; i < open_els.length; ++i) { + el = open_els[i] + if (el === head_element_pointer) { + open_els.splice(i, 1) + return + } + } + return + } + if (t.type === TYPE_END_TAG && t.name === 'template') { + ins_mode_in_head(t) + return + } + if (t.type === TYPE_END_TAG && (t.name === 'body' || t.name === 'html' || t.name === 'br')) { + ins_mode_after_head_else(t) + return + } + if ((t.type === TYPE_START_TAG && t.name === 'head') || t.type === TYPE_END_TAG) { + parse_error() + return + } + // Anything else + ins_mode_after_head_else(t) + } + + // 8.2.5.4.7 http://www.w3.org/TR/html5/syntax.html#parsing-main-inbody + in_body_any_other_end_tag = function (name) { // factored out because adoption agency calls it + var el, i, node + node = open_els[0] + while (true) { + if (node.name === name && node.namespace === NS_HTML) { + generate_implied_end_tags(name) // arg is exception + if (node !== open_els[0]) { + parse_error() + } + while (true) { + el = open_els.shift() + if (el === node) { + return + } + } + } + if (special_elements[node.name] === node.namespace) { + parse_error() + return + } + for (i = 0; i < open_els.length; ++i) { + el = open_els[i] + if (node === el) { + node = open_els[i + 1] + break + } + } + } + } + ins_mode_in_body = function (t) { + var a, aa, ab, ac, el, found, h_in_scope, i, input_el, j, l, len, len1, len10, len11, len12, len13, len14, len2, len3, len4, len5, len6, len7, len8, len9, m, n, node, o, ok_tags, prompt, q, r, ref, ref1, ref2, ref3, ref4, root_attrs, s, second, second_i, u, w, y, z + if (t.type === TYPE_TEXT && t.text === "\u0000") { + parse_error() + return + } + if (is_space_tok(t)) { + reconstruct_afe() + insert_character(t) + return + } + if (t.type === TYPE_TEXT) { + reconstruct_afe() + insert_character(t) + flag_frameset_ok = false + return + } + if (t.type === TYPE_COMMENT) { + insert_comment(t) + return + } + if (t.type === TYPE_DOCTYPE) { + parse_error() + return + } + if (t.type === TYPE_START_TAG && t.name === 'html') { + parse_error() + if (template_tag_is_open()) { + return + } + root_attrs = open_els[open_els.length - 1].attrs + for (i = 0; i < t.attrs_a.length; ++i) { + a = t.attrs_a[i] + if (root_attrs[a[0]] == null) { + root_attrs[a[0]] = a[1] + } + } + return + } + + if ((t.type === TYPE_START_TAG && (t.name === 'base' || t.name === 'basefont' || t.name === 'bgsound' || t.name === 'link' || t.name === 'meta' || t.name === 'noframes' || t.name === 'script' || t.name === 'style' || t.name === 'template' || t.name === 'title')) || (t.type === TYPE_END_TAG && t.name === 'template')) { + ins_mode_in_head(t) + return + } + if (t.type === TYPE_START_TAG && t.name === 'body') { + parse_error() + if (open_els.length < 2) { + return + } + second = open_els[open_els.length - 2] + if (second.namespace !== NS_HTML) { + return + } + if (second.name !== 'body') { + return + } + if (template_tag_is_open()) { + return + } + flag_frameset_ok = false + for (i = 0; i < t.attrs_a.length; ++i) { + a = t.attrs_a[i] + if (second.attrs[a[0]] == null) { + second.attrs[a[0]] = a[1] + } + } + return + } + if (t.type === TYPE_START_TAG && t.name === 'frameset') { + parse_error() + if (open_els.length < 2) { + return + } + second_i = open_els.length - 2 + second = open_els[second_i] + if (second.namespace !== NS_HTML) { + return + } + if (second.name !== 'body') { + return + } + if (flag_frameset_ok === false) { + return + } + if (second.parent != null) { + for (i = 0; i < second.parent.children.length; ++i) { + el = second.parent.children[i] + if (el === second) { + second.parent.children.splice(i, 1) + break + } + } + } + open_els.splice(second_i, 1) + // pop everything except the "root html element" + while (open_els.length > 1) { + open_els.shift() + } + insert_html_element(t) + ins_mode = ins_mode_in_frameset + return + } + if (t.type === TYPE_EOF) { + ok_tags = { + dd: NS_HTML, dt: NS_HTML, li: NS_HTML, p: NS_HTML, tbody: NS_HTML, + td: NS_HTML, tfoot: NS_HTML, th: NS_HTML, thead: NS_HTML, + tr: NS_HTML, body: NS_HTML, html: NS_HTML + } + for (i = 0; i < open_els.length; ++i) { + el = open_els[i] + if (ok_tags[t.name] !== el.namespace) { + parse_error() + break + } + } + if (template_ins_modes.length > 0) { + ins_mode_in_template(t) + } else { + stop_parsing() + } + return + } + if (t.type === TYPE_END_TAG && t.name === 'body') { + if (!is_in_scope('body', NS_HTML)) { + parse_error() + return + } + ok_tags = { + dd: NS_HTML, dt: NS_HTML, li: NS_HTML, optgroup: NS_HTML, + option: NS_HTML, p: NS_HTML, rb: NS_HTML, rp: NS_HTML, rt: NS_HTML, + rtc: NS_HTML, tbody: NS_HTML, td: NS_HTML, tfoot: NS_HTML, + th: NS_HTML, thead: NS_HTML, tr: NS_HTML, body: NS_HTML, + html: NS_HTML + } + for (i = 0; i < open_els.length; ++i) { + el = open_els[i] + if (ok_tags[t.name] !== el.namespace) { + parse_error() + break + } + } + ins_mode = ins_mode_after_body + return + } + if (t.type === TYPE_END_TAG && t.name === 'html') { + if (!is_in_scope('body', NS_HTML)) { + parse_error() + return + } + ok_tags = { + dd: NS_HTML, dt: NS_HTML, li: NS_HTML, optgroup: NS_HTML, + option: NS_HTML, p: NS_HTML, rb: NS_HTML, rp: NS_HTML, rt: NS_HTML, + rtc: NS_HTML, tbody: NS_HTML, td: NS_HTML, tfoot: NS_HTML, + th: NS_HTML, thead: NS_HTML, tr: NS_HTML, body: NS_HTML, + html: NS_HTML + } + for (i = 0; i < open_els.length; ++i) { + el = open_els[i] + if (ok_tags[t.name] !== el.namespace) { + parse_error() + break + } + } + ins_mode = ins_mode_after_body + process_token(t) + return + } + if (t.type === TYPE_START_TAG && (t.name === 'address' || t.name === 'article' || t.name === 'aside' || t.name === 'blockquote' || t.name === 'center' || t.name === 'details' || t.name === 'dialog' || t.name === 'dir' || t.name === 'div' || t.name === 'dl' || t.name === 'fieldset' || t.name === 'figcaption' || t.name === 'figure' || t.name === 'footer' || t.name === 'header' || t.name === 'hgroup' || t.name === 'main' || t.name === 'nav' || t.name === 'ol' || t.name === 'p' || t.name === 'section' || t.name === 'summary' || t.name === 'ul')) { + close_p_if_in_button_scope() + insert_html_element(t) + return + } + if (t.type === TYPE_START_TAG && (h_tags[t.name] != null)) { // extra perens because Coffeescript did that + close_p_if_in_button_scope() + if (h_tags[open_els[0].name] === open_els[0].namespace) { + parse_error() + open_els.shift() + } + insert_html_element(t) + return + } + if (t.type === TYPE_START_TAG && (t.name === 'pre' || t.name === 'listing')) { + close_p_if_in_button_scope() + insert_html_element(t) + eat_next_token_if_newline() + flag_frameset_ok = false + return + } + if (t.type === TYPE_START_TAG && t.name === 'form') { + if (!(form_element_pointer === null || template_tag_is_open())) { + parse_error() + return + } + close_p_if_in_button_scope() + el = insert_html_element(t) + if (!template_tag_is_open()) { + form_element_pointer = el + } + return + } + if (t.type === TYPE_START_TAG && t.name === 'li') { + flag_frameset_ok = false + for (i = 0; i < open_els.length; ++i) { + node = open_els[i] + if (node.name === 'li' && node.namespace === NS_HTML) { + generate_implied_end_tags('li') // arg is exception + if (open_els[0].name !== 'li' || open_els[0].namespace !== NS_HTML) { + parse_error() + } + while (true) { + el = open_els.shift() + if (el.name === 'li' && el.namespace === NS_HTML) { + break + } + } + break + } + if (el_is_special_not_adp(node)) { + break + } + } + close_p_if_in_button_scope() + insert_html_element(t) + return + } + if (t.type === TYPE_START_TAG && (t.name === 'dd' || t.name === 'dt')) { + flag_frameset_ok = false + for (i = 0; i < open_els.length; ++i) { + node = open_els[i] + if (node.name === 'dd' && node.namespace === NS_HTML) { + generate_implied_end_tags('dd') // arg is exception + if (open_els[0].name !== 'dd' || open_els[0].namespace !== NS_HTML) { + parse_error() + } + while (true) { + el = open_els.shift() + if (el.name === 'dd' && el.namespace === NS_HTML) { + break + } + } + break + } + if (node.name === 'dt' && node.namespace === NS_HTML) { + generate_implied_end_tags('dt') // arg is exception + if (open_els[0].name !== 'dt' || open_els[0].namespace !== NS_HTML) { + parse_error() + } + while (true) { + el = open_els.shift() + if (el.name === 'dt' && el.namespace === NS_HTML) { + break + } + } + break + } + if (el_is_special_not_adp(node)) { + break + } + } + close_p_if_in_button_scope() + insert_html_element(t) + return + } + if (t.type === TYPE_START_TAG && t.name === 'plaintext') { + close_p_if_in_button_scope() + insert_html_element(t) + tok_state = tok_state_plaintext + return + } + if (t.type === TYPE_START_TAG && t.name === 'button') { + if (is_in_scope('button', NS_HTML)) { + parse_error() + generate_implied_end_tags() + while (true) { + el = open_els.shift() + if (el.name === 'button' && el.namespace === NS_HTML) { + break + } + } + } + reconstruct_afe() + insert_html_element(t) + flag_frameset_ok = false + return + } + if (t.type === TYPE_END_TAG && (t.name === 'address' || t.name === 'article' || t.name === 'aside' || t.name === 'blockquote' || t.name === 'button' || t.name === 'center' || t.name === 'details' || t.name === 'dialog' || t.name === 'dir' || t.name === 'div' || t.name === 'dl' || t.name === 'fieldset' || t.name === 'figcaption' || t.name === 'figure' || t.name === 'footer' || t.name === 'header' || t.name === 'hgroup' || t.name === 'listing' || t.name === 'main' || t.name === 'nav' || t.name === 'ol' || t.name === 'pre' || t.name === 'section' || t.name === 'summary' || t.name === 'ul')) { + if (!is_in_scope(t.name, NS_HTML)) { + parse_error() + return + } + generate_implied_end_tags() + if (!(open_els[0].name === t.name && open_els[0].namespace === NS_HTML)) { + parse_error() + } + while (true) { + el = open_els.shift() + if (el.name === t.name && el.namespace === NS_HTML) { + return + } + } + return + } + if (t.type === TYPE_END_TAG && t.name === 'form') { + if (!template_tag_is_open()) { + node = form_element_pointer + form_element_pointer = null + if (node === null || !el_is_in_scope(node)) { + parse_error() + return + } + generate_implied_end_tags() + if (open_els[0] !== node) { + parse_error() + } + for (i = 0; i < open_els.length; ++i) { + el = open_els[i] + if (el === node) { + open_els.splice(i, 1) + break + } + } + } else { + if (!is_in_scope('form', NS_HTML)) { + parse_error() + return + } + generate_implied_end_tags() + if (open_els[0].name !== 'form' || open_els[0].namespace !== NS_HTML) { + parse_error() + } + while (true) { + el = open_els.shift() + if (el.name === 'form' && el.namespace === NS_HTML) { + break + } + } + } + return + } + if (t.type === TYPE_END_TAG && t.name === 'p') { + if (!is_in_button_scope('p', NS_HTML)) { + parse_error() + insert_html_element(new_open_tag('p')) + } + close_p_element() + return + } + if (t.type === TYPE_END_TAG && t.name === 'li') { + if (!is_in_li_scope('li', NS_HTML)) { + parse_error() + return + } + generate_implied_end_tags('li') // arg is exception + if (open_els[0].name !== 'li' || open_els[0].namespace !== NS_HTML) { + parse_error() + } + while (true) { + el = open_els.shift() + if (el.name === 'li' && el.namespace === NS_HTML) { + break + } + } + return + } + if (t.type === TYPE_END_TAG && (t.name === 'dd' || t.name === 'dt')) { + if (!is_in_scope(t.name, NS_HTML)) { + parse_error() + return + } + generate_implied_end_tags(t.name) // arg is exception + if (open_els[0].name !== t.name || open_els[0].namespace !== NS_HTML) { + parse_error() + } + while (true) { + el = open_els.shift() + if (el.name === t.name && el.namespace === NS_HTML) { + break + } + } + return + } + if (t.type === TYPE_END_TAG && (h_tags[t.name] != null)) { // extra perens because Coffeescript did + h_in_scope = false + for (i = 0; i < open_els.length; ++i) { + el = open_els[i] + if (h_tags[el.name] === el.namespace) { + h_in_scope = true + break + } + if (standard_scopers[el.name] === el.namespace) { + break + } + } + if (!h_in_scope) { + parse_error() + return + } + generate_implied_end_tags() + if (open_els[0].name !== t.name || open_els[0].namespace !== NS_HTML) { + parse_error() + } + while (true) { + el = open_els.shift() + if (h_tags[el.name] === el.namespace) { + break + } + } + return + } + // deep breath! + if (t.type === TYPE_START_TAG && t.name === 'a') { + // If the list of active formatting elements contains an a element + // between the end of the list and the last marker on the list (or + // the start of the list if there is no marker on the list), then + // this is a parse error; run the adoption agency algorithm for the + // tag name "a", then remove that element from the list of active + // formatting elements and the stack of open elements if the + // adoption agency algorithm didn't already remove it (it might not + // have if the element is not in table scope). + found = false + for (i = 0; i < afe.length; ++i) { + el = afe[i] + if (el.type === TYPE_AFE_MARKER) { + break + } + if (el.name === 'a' && el.namespace === NS_HTML) { + found = el + } + } + if (found != null) { + parse_error() + adoption_agency('a') + for (i = 0; i < afe.length; ++i) { + el = afe[i] + if (el === found) { + afe.splice(i, 1) + } + } + for (i = 0; i < open_els.length; ++i) { + el = open_els[i] + if (el === found) { + open_els.splice(i, 1) + } + } + } + reconstruct_afe() + el = insert_html_element(t) + afe_push(el) + return + } + if (t.type === TYPE_START_TAG && (t.name === 'b' || t.name === 'big' || t.name === 'code' || t.name === 'em' || t.name === 'font' || t.name === 'i' || t.name === 's' || t.name === 'small' || t.name === 'strike' || t.name === 'strong' || t.name === 'tt' || t.name === 'u')) { + reconstruct_afe() + el = insert_html_element(t) + afe_push(el) + return + } + if (t.type === TYPE_START_TAG && t.name === 'nobr') { + reconstruct_afe() + if (is_in_scope('nobr', NS_HTML)) { + parse_error() + adoption_agency('nobr') + reconstruct_afe() + } + el = insert_html_element(t) + afe_push(el) + return + } + if (t.type === TYPE_END_TAG && (t.name === 'a' || t.name === 'b' || t.name === 'big' || t.name === 'code' || t.name === 'em' || t.name === 'font' || t.name === 'i' || t.name === 'nobr' || t.name === 's' || t.name === 'small' || t.name === 'strike' || t.name === 'strong' || t.name === 'tt' || t.name === 'u')) { + adoption_agency(t.name) + return + } + if (t.type === TYPE_START_TAG && (t.name === 'applet' || t.name === 'marquee' || t.name === 'object')) { + reconstruct_afe() + insert_html_element(t) + afe_push_marker() + flag_frameset_ok = false + return + } + if (t.type === TYPE_END_TAG && (t.name === 'applet' || t.name === 'marquee' || t.name === 'object')) { + if (!is_in_scope(t.name, NS_HTML)) { + parse_error() + return + } + generate_implied_end_tags() + if (open_els[0].name !== t.name || open_els[0].namespace !== NS_HTML) { + parse_error() + } + while (true) { + el = open_els.shift() + if (el.name === t.name && el.namespace === NS_HTML) { + break + } + } + clear_afe_to_marker() + return + } + if (t.type === TYPE_START_TAG && t.name === 'table') { + if (doc.flag('quirks mode') !== QUIRKS_YES) { + close_p_if_in_button_scope() // test + } + insert_html_element(t) + flag_frameset_ok = false + ins_mode = ins_mode_in_table + return + } + if (t.type === TYPE_END_TAG && t.name === 'br') { + parse_error() + // W3C: t.type = TYPE_START_TAG + t = new_open_tag('br') // WHATWG + // fall through + } + if (t.type === TYPE_START_TAG && (t.name === 'area' || t.name === 'br' || t.name === 'embed' || t.name === 'img' || t.name === 'keygen' || t.name === 'wbr')) { + reconstruct_afe() + insert_html_element(t) + open_els.shift() + t.acknowledge_self_closing() + flag_frameset_ok = false + return + } + if (t.type === TYPE_START_TAG && t.name === 'input') { + reconstruct_afe() + insert_html_element(t) + open_els.shift() + t.acknowledge_self_closing() + if (!is_input_hidden_tok(t)) { + flag_frameset_ok = false + } + return + } + if (t.type === TYPE_START_TAG && (t.name === 'menuitem' || t.name === 'param' || t.name === 'source' || t.name === 'track')) { + // WHATWG adds 'menuitem' for this block + insert_html_element(t) + open_els.shift() + t.acknowledge_self_closing() + return + } + if (t.type === TYPE_START_TAG && t.name === 'hr') { + close_p_if_in_button_scope() + insert_html_element(t) + open_els.shift() + t.acknowledge_self_closing() + flag_frameset_ok = false + return + } + if (t.type === TYPE_START_TAG && t.name === 'image') { + parse_error() + t.name = 'img' + process_token(t) + return + } + if (t.type === TYPE_START_TAG && t.name === 'isindex') { + parse_error() + if (template_tag_is_open() === false && form_element_pointer !== null) { + return + } + t.acknowledge_self_closing() + flag_frameset_ok = false + close_p_if_in_button_scope() + el = insert_html_element(new_open_tag('form')) + if (!template_tag_is_open()) { + form_element_pointer = el + } + for (i = 0; i < t.attrs_a.length; ++i) { + a = t.attrs_a[i] + if (a[0] === 'action') { + el.attrs['action'] = a[1] + break + } + } + insert_html_element(new_open_tag('hr')) + open_els.shift() + reconstruct_afe() + insert_html_element(new_open_tag('label')) + // note: this is a little out-of-spec-order so we only have to scan t.attrs_a once + input_el = new_open_tag('input') + prompt = null + for (i = 0; i < t.attrs_a.length; ++i) { + a = t.attrs_a[i] + if (a[0] === 'prompt') { + prompt = a[1] + } + if (a[0] !== 'name' && a[0] !== 'action' && a[0] !== 'prompt') { + input_el.attrs_a.push([a[0], a[1]]) + } + } + input_el.attrs_a.push(['name', 'isindex']) + // fixfull this next bit is in english... internationalize? + if (prompt == null) { + prompt = "This is a searchable index. Enter search keywords: " + } + insert_character(new_character_token(prompt)) // fixfull split + // TODO submit typo "balue" in spec + insert_html_element(input_el) + open_els.shift() + // insert_character('') // you can put chars here if prompt attr missing + open_els.shift() + insert_html_element(new_open_tag('hr')) + open_els.shift() + open_els.shift() + if (!template_tag_is_open()) { + form_element_pointer = null + } + return + } + if (t.type === TYPE_START_TAG && t.name === 'textarea') { + insert_html_element(t) + eat_next_token_if_newline() + tok_state = tok_state_rcdata + original_ins_mode = ins_mode + flag_frameset_ok = false + ins_mode = ins_mode_text + return + } + if (t.type === TYPE_START_TAG && t.name === 'xmp') { + close_p_if_in_button_scope() + reconstruct_afe() + flag_frameset_ok = false + parse_generic_raw_text(t) + return + } + if (t.type === TYPE_START_TAG && t.name === 'iframe') { + flag_frameset_ok = false + parse_generic_raw_text(t) + return + } + if (t.type === TYPE_START_TAG && (t.name === 'noembed' || (t.name === 'noscript' && flag_scripting))) { + parse_generic_raw_text(t) + return + } + if (t.type === TYPE_START_TAG && t.name === 'select') { + reconstruct_afe() + insert_html_element(t) + flag_frameset_ok = false + if (ins_mode === ins_mode_in_table || ins_mode === ins_mode_in_caption || ins_mode === ins_mode_in_table_body || ins_mode === ins_mode_in_row || ins_mode === ins_mode_in_cell) { + ins_mode = ins_mode_in_select_in_table + } else { + ins_mode = ins_mode_in_select + } + return + } + if (t.type === TYPE_START_TAG && (t.name === 'optgroup' || t.name === 'option')) { + if (open_els[0].name === 'option' && open_els[0].namespace === NS_HTML) { + open_els.shift() + } + reconstruct_afe() + insert_html_element(t) + return + } +// this comment block implements the W3C spec +// if t.type === TYPE_START_TAG && (t.name === 'rb' || t.name === 'rp' || t.name === 'rtc') +// if is_in_scope 'ruby', NS_HTML +// generate_implied_end_tags() +// unless open_els[0].name === 'ruby' && open_els[0].namespace === NS_HTML +// parse_error() +// insert_html_element t +// return +// if t.type === TYPE_START_TAG && t.name === 'rt' +// if is_in_scope 'ruby', NS_HTML +// generate_implied_end_tags 'rtc' // arg === exception +// unless (open_els[0].name === 'ruby' || open_els[0].name === 'rtc') && open_els[0].namespace === NS_HTML +// parse_error() +// insert_html_element t +// return +// below implements the WHATWG spec https://html.spec.whatwg.org/multipage/syntax.html#parsing-main-inbody + if (t.type === TYPE_START_TAG && (t.name === 'rb' || t.name === 'rtc')) { + if (is_in_scope('ruby', NS_HTML)) { + generate_implied_end_tags() + if (!(open_els[0].name === 'ruby' && open_els[0].namespace === NS_HTML)) { + parse_error() + } + } + insert_html_element(t) + return + } + if (t.type === TYPE_START_TAG && (t.name === 'rp' || t.name === 'rt')) { + if (is_in_scope('ruby', NS_HTML)) { + generate_implied_end_tags('rtc') + if (!((open_els[0].name === 'ruby' || open_els[0].name === 'rtc') && open_els[0].namespace === NS_HTML)) { + parse_error() + } + } + insert_html_element(t) + return + } +// end WHATWG chunk + if (t.type === TYPE_START_TAG && t.name === 'math') { + reconstruct_afe() + adjust_mathml_attributes(t) + adjust_foreign_attributes(t) + insert_foreign_element(t, NS_MATHML) + if (t.flag('self-closing')) { + open_els.shift() + t.acknowledge_self_closing() + } + return + } + if (t.type === TYPE_START_TAG && t.name === 'svg') { + reconstruct_afe() + adjust_svg_attributes(t) + adjust_foreign_attributes(t) + insert_foreign_element(t, NS_SVG) + if (t.flag('self-closing')) { + open_els.shift() + t.acknowledge_self_closing() + } + return + } + if (t.type === TYPE_START_TAG && (t.name === 'caption' || t.name === 'col' || t.name === 'colgroup' || t.name === 'frame' || t.name === 'head' || t.name === 'tbody' || t.name === 'td' || t.name === 'tfoot' || t.name === 'th' || t.name === 'thead' || t.name === 'tr')) { + parse_error() + return + } + if (t.type === TYPE_START_TAG) { // any other start tag + reconstruct_afe() + insert_html_element(t) + return + } + if (t.type === TYPE_END_TAG) { // any other end tag + in_body_any_other_end_tag(t.name) + return + } + } + + // 8.2.5.4.8 http://www.w3.org/TR/html5/syntax.html#parsing-main-incdata + ins_mode_text = function (t) { + if (t.type === TYPE_TEXT) { + insert_character(t) + return + } + if (t.type === TYPE_EOF) { + parse_error() + if (open_els[0].name === 'script' && open_els[0].namespace === NS_HTML) { + open_els[0].flag('already started', true) + } + open_els.shift() + ins_mode = original_ins_mode + process_token(t) + return + } + if (t.type === TYPE_END_TAG && t.name === 'script') { + open_els.shift() + ins_mode = original_ins_mode + // fixfull the spec seems to assume that I'm going to run the script + // http://www.w3.org/TR/html5/syntax.html#scriptEndTag + return + } + if (t.type === TYPE_END_TAG) { + open_els.shift() + ins_mode = original_ins_mode + return + } + } + + // the functions below implement the tokenizer stats described here: + // http://www.w3.org/TR/html5/syntax.html#tokenization + + // 8.2.5.4.9 http://www.w3.org/TR/html5/syntax.html#parsing-main-intable + ins_mode_in_table_else = function (t) { + parse_error() + flag_foster_parenting = true + ins_mode_in_body(t) + flag_foster_parenting = false + } + ins_mode_in_table = function (t) { + var el + switch (t.type) { + case TYPE_TEXT: + if ((open_els[0].name === 'table' || open_els[0].name === 'tbody' || open_els[0].name === 'tfoot' || open_els[0].name === 'thead' || open_els[0].name === 'tr') && open_els[0].namespace === NS_HTML) { + pending_table_character_tokens = [] + original_ins_mode = ins_mode + ins_mode = ins_mode_in_table_text + process_token(t) + } else { + ins_mode_in_table_else(t) + } + break + case TYPE_COMMENT: + insert_comment(t) + break + case TYPE_DOCTYPE: + parse_error() + break + case TYPE_START_TAG: + switch (t.name) { + case 'caption': + clear_stack_to_table_context() + afe_push_marker() + insert_html_element(t) + ins_mode = ins_mode_in_caption + break + case 'colgroup': + clear_stack_to_table_context() + insert_html_element(t) + ins_mode = ins_mode_in_column_group + break + case 'col': + clear_stack_to_table_context() + insert_html_element(new_open_tag('colgroup')) + ins_mode = ins_mode_in_column_group + process_token(t) + break + case 'tbody': + case 'tfoot': + case 'thead': + clear_stack_to_table_context() + insert_html_element(t) + ins_mode = ins_mode_in_table_body + break + case 'td': + case 'th': + case 'tr': + clear_stack_to_table_context() + insert_html_element(new_open_tag('tbody')) + ins_mode = ins_mode_in_table_body + process_token(t) + break + case 'table': + parse_error() + if (is_in_table_scope('table', NS_HTML)) { + while (true) { + el = open_els.shift() + if (el.name === 'table' && el.namespace === NS_HTML) { + break + } + } + reset_ins_mode() + process_token(t) + } + break + case 'style': + case 'script': + case 'template': + ins_mode_in_head(t) + break + case 'input': + if (!is_input_hidden_tok(t)) { + ins_mode_in_table_else(t) + } else { + parse_error() + el = insert_html_element(t) + open_els.shift() + t.acknowledge_self_closing() + } + break + case 'form': + parse_error() + if (form_element_pointer != null) { + return + } + if (template_tag_is_open()) { + return + } + form_element_pointer = insert_html_element(t) + open_els.shift() + break + default: + ins_mode_in_table_else(t) + } + break + case TYPE_END_TAG: + switch (t.name) { + case 'table': + if (is_in_table_scope('table', NS_HTML)) { + while (true) { + el = open_els.shift() + if (el.name === 'table' && el.namespace === NS_HTML) { + break + } + } + reset_ins_mode() + } else { + parse_error() + } + break + case 'body': + case 'caption': + case 'col': + case 'colgroup': + case 'html': + case 'tbody': + case 'td': + case 'tfoot': + case 'th': + case 'thead': + case 'tr': + parse_error() + break + case 'template': + ins_mode_in_head(t) + break + default: + ins_mode_in_table_else(t) + } + break + case TYPE_EOF: + ins_mode_in_body(t) + break + default: + ins_mode_in_table_else(t) + } + } + + // 8.2.5.4.10 http://www.w3.org/TR/html5/syntax.html#parsing-main-intabletext + ins_mode_in_table_text = function (t) { + var all_space, i, l, m, old + if (t.type === TYPE_TEXT && t.text === "\u0000") { + // from javascript? + parse_error() + return + } + if (t.type === TYPE_TEXT) { + pending_table_character_tokens.push(t) + return + } + // Anything else + all_space = true + for (i = 0; i < pending_table_character_tokens.length; ++i) { + old = pending_table_character_tokens[i] + if (!is_space_tok(old)) { + all_space = false + break + } + } + if (all_space) { + for (i = 0; i < pending_table_character_tokens.length; ++i) { + old = pending_table_character_tokens[i] + insert_character(old) + } + } else { + for (i = 0; i < pending_table_character_tokens.length; ++i) { + old = pending_table_character_tokens[i] + ins_mode_in_table_else(old) + } + } + pending_table_character_tokens = [] + ins_mode = original_ins_mode + process_token(t) + } + + // 8.2.5.4.11 http://www.w3.org/TR/html5/syntax.html#parsing-main-incaption + ins_mode_in_caption = function (t) { + var el + if (t.type === TYPE_END_TAG && t.name === 'caption') { + if (is_in_table_scope('caption', NS_HTML)) { + generate_implied_end_tags() + if (open_els[0].name !== 'caption') { + parse_error() + } + while (true) { + el = open_els.shift() + if (el.name === 'caption' && el.namespace === NS_HTML) { + break + } + } + clear_afe_to_marker() + ins_mode = ins_mode_in_table + } else { + parse_error() + // fragment case + } + return + } + if ((t.type === TYPE_START_TAG && (t.name === 'caption' || t.name === 'col' || t.name === 'colgroup' || t.name === 'tbody' || t.name === 'td' || t.name === 'tfoot' || t.name === 'th' || t.name === 'thead' || t.name === 'tr')) || t.type === TYPE_END_TAG && t.name === 'table') { + parse_error() + if (is_in_table_scope('caption', NS_HTML)) { + while (true) { + el = open_els.shift() + if (el.name === 'caption' && el.namespace === NS_HTML) { + break + } + } + clear_afe_to_marker() + ins_mode = ins_mode_in_table + process_token(t) + } + // else fragment case + return + } + if (t.type === TYPE_END_TAG && (t.name === 'body' || t.name === 'col' || t.name === 'colgroup' || t.name === 'html' || t.name === 'tbody' || t.name === 'td' || t.name === 'tfoot' || t.name === 'th' || t.name === 'thead' || t.name === 'tr')) { + parse_error() + return + } + // Anything else + ins_mode_in_body(t) + } + + // 8.2.5.4.12 http://www.w3.org/TR/html5/syntax.html#parsing-main-incolgroup + ins_mode_in_column_group = function (t) { + var el + if (is_space_tok(t)) { + insert_character(t) + return + } + if (t.type === TYPE_COMMENT) { + insert_comment(t) + return + } + if (t.type === TYPE_DOCTYPE) { + parse_error() + return + } + if (t.type === TYPE_START_TAG && t.name === 'html') { + ins_mode_in_body(t) + return + } + if (t.type === TYPE_START_TAG && t.name === 'col') { + el = insert_html_element(t) + open_els.shift() + t.acknowledge_self_closing() + return + } + if (t.type === TYPE_END_TAG && t.name === 'colgroup') { + if (open_els[0].name === 'colgroup' && open_els.namespace === NS_HTML) { + open_els.shift() + ins_mode = ins_mode_in_table + } else { + parse_error() + } + return + } + if (t.type === TYPE_END_TAG && t.name === 'col') { + parse_error() + return + } + if ((t.type === TYPE_START_TAG || t.type === TYPE_END_TAG) && t.name === 'template') { + ins_mode_in_head(t) + return + } + if (t.type === TYPE_EOF) { + ins_mode_in_body(t) + return + } + // Anything else + if (open_els[0].name !== 'colgroup') { + parse_error() + return + } + open_els.shift() + ins_mode = ins_mode_in_table + process_token(t) + } + + // 8.2.5.4.13 http://www.w3.org/TR/html5/syntax.html#parsing-main-intbody + ins_mode_in_table_body = function (t) { + var el, has, i + if (t.type === TYPE_START_TAG && t.name === 'tr') { + clear_stack_to_table_body_context() + insert_html_element(t) + ins_mode = ins_mode_in_row + return + } + if (t.type === TYPE_START_TAG && (t.name === 'th' || t.name === 'td')) { + parse_error() + clear_stack_to_table_body_context() + insert_html_element(new_open_tag('tr')) + ins_mode = ins_mode_in_row + process_token(t) + return + } + if (t.type === TYPE_END_TAG && (t.name === 'tbody' || t.name === 'tfoot' || t.name === 'thead')) { + if (!is_in_table_scope(t.name, NS_HTML)) { + parse_error() + return + } + clear_stack_to_table_body_context() + open_els.shift() + ins_mode = ins_mode_in_table + return + } + if ((t.type === TYPE_START_TAG && (t.name === 'caption' || t.name === 'col' || t.name === 'colgroup' || t.name === 'tbody' || t.name === 'tfoot' || t.name === 'thead')) || (t.type === TYPE_END_TAG && t.name === 'table')) { + has = false + for (i = 0; i < open_els.length; ++i) { + el = open_els[i] + if (el.namespace === NS_HTML && (el.name === 'tbody' || el.name === 'tfoot' || el.name === 'thead')) { + has = true + break + } + if (table_scopers[el.name] === el.namespace) { + break + } + } + if (!has) { + parse_error() + return + } + clear_stack_to_table_body_context() + open_els.shift() + ins_mode = ins_mode_in_table + process_token(t) + return + } + if (t.type === TYPE_END_TAG && (t.name === 'body' || t.name === 'caption' || t.name === 'col' || t.name === 'colgroup' || t.name === 'html' || t.name === 'td' || t.name === 'th' || t.name === 'tr')) { + parse_error() + return + } + // Anything else + ins_mode_in_table(t) + } + + // 8.2.5.4.14 http://www.w3.org/TR/html5/syntax.html#parsing-main-intr + ins_mode_in_row = function (t) { + if (t.type === TYPE_START_TAG && (t.name === 'th' || t.name === 'td')) { + clear_stack_to_table_row_context() + insert_html_element(t) + ins_mode = ins_mode_in_cell + afe_push_marker() + return + } + if (t.type === TYPE_END_TAG && t.name === 'tr') { + if (is_in_table_scope('tr', NS_HTML)) { + clear_stack_to_table_row_context() + open_els.shift() + ins_mode = ins_mode_in_table_body + } else { + parse_error() + } + return + } + if ((t.type === TYPE_START_TAG && (t.name === 'caption' || t.name === 'col' || t.name === 'colgroup' || t.name === 'tbody' || t.name === 'tfoot' || t.name === 'thead' || t.name === 'tr')) || t.type === TYPE_END_TAG && t.name === 'table') { + if (is_in_table_scope('tr', NS_HTML)) { + clear_stack_to_table_row_context() + open_els.shift() + ins_mode = ins_mode_in_table_body + process_token(t) + } else { + parse_error() + } + return + } + if (t.type === TYPE_END_TAG && (t.name === 'tbody' || t.name === 'tfoot' || t.name === 'thead')) { + if (is_in_table_scope(t.name, NS_HTML)) { + if (is_in_table_scope('tr', NS_HTML)) { + clear_stack_to_table_row_context() + open_els.shift() + ins_mode = ins_mode_in_table_body + process_token(t) + } + } else { + parse_error() + } + return + } + if (t.type === TYPE_END_TAG && (t.name === 'body' || t.name === 'caption' || t.name === 'col' || t.name === 'colgroup' || t.name === 'html' || t.name === 'td' || t.name === 'th')) { + parse_error() + return + } + // Anything else + ins_mode_in_table(t) + } + + // http://www.w3.org/TR/html5/syntax.html#close-the-cell + close_the_cell = function () { + var el + generate_implied_end_tags() + if (!((open_els[0].name === 'td' || open_els[0] === 'th') && open_els[0].namespace === NS_HTML)) { + parse_error() + } + while (true) { + el = open_els.shift() + if (el.namespace === NS_HTML && (el.name === 'td' || el.name === 'th')) { + break + } + } + clear_afe_to_marker() + ins_mode = ins_mode_in_row + } + + // 8.2.5.4.15 http://www.w3.org/TR/html5/syntax.html#parsing-main-intd + ins_mode_in_cell = function (t) { + var el, has, i + if (t.type === TYPE_END_TAG && (t.name === 'td' || t.name === 'th')) { + if (is_in_table_scope(t.name, NS_HTML)) { + generate_implied_end_tags() + if (!((open_els[0].name === t.name) && open_els[0].namespace === NS_HTML)) { + parse_error() + } + while (true) { + el = open_els.shift() + if (el.name === t.name && el.namespace === NS_HTML) { + break + } + } + clear_afe_to_marker() + ins_mode = ins_mode_in_row + } else { + parse_error() + } + return + } + if (t.type === TYPE_START_TAG && (t.name === 'caption' || t.name === 'col' || t.name === 'colgroup' || t.name === 'tbody' || t.name === 'td' || t.name === 'tfoot' || t.name === 'th' || t.name === 'thead' || t.name === 'tr')) { + has = false + for (i = 0; i < open_els.length; ++i) { + el = open_els[i] + if (el.namespace === NS_HTML && (el.name === 'td' || el.name === 'th')) { + has = true + break + } + if (table_scopers[el.name] === el.namespace) { + break + } + } + if (!has) { + parse_error() + return + } + close_the_cell() + process_token(t) + return + } + if (t.type === TYPE_END_TAG && (t.name === 'body' || t.name === 'caption' || t.name === 'col' || t.name === 'colgroup' || t.name === 'html')) { + parse_error() + return + } + if (t.type === TYPE_END_TAG && (t.name === 'table' || t.name === 'tbody' || t.name === 'tfoot' || t.name === 'thead' || t.name === 'tr')) { + if (is_in_table_scope(t.name, NS_HTML)) { + close_the_cell() + process_token(t) + } else { + parse_error() + } + return + } + // Anything Else + ins_mode_in_body(t) + } + + // 8.2.5.4.16 http://www.w3.org/TR/html5/syntax.html#parsing-main-inselect + ins_mode_in_select = function (t) { + var el + if (t.type === TYPE_TEXT && t.text === "\u0000") { + parse_error() + return + } + if (t.type === TYPE_TEXT) { + insert_character(t) + return + } + if (t.type === TYPE_COMMENT) { + insert_comment(t) + return + } + if (t.type === TYPE_DOCTYPE) { + parse_error() + return + } + if (t.type === TYPE_START_TAG && t.name === 'html') { + ins_mode_in_body(t) + return + } + if (t.type === TYPE_START_TAG && t.name === 'option') { + if (open_els[0].name === 'option' && open_els[0].namespace === NS_HTML) { + open_els.shift() + } + insert_html_element(t) + return + } + if (t.type === TYPE_START_TAG && t.name === 'optgroup') { + if (open_els[0].name === 'option' && open_els[0].namespace === NS_HTML) { + open_els.shift() + } + if (open_els[0].name === 'optgroup' && open_els[0].namespace === NS_HTML) { + open_els.shift() + } + insert_html_element(t) + return + } + if (t.type === TYPE_END_TAG && t.name === 'optgroup') { + if (open_els[0].name === 'option' && open_els[0].namespace === NS_HTML) { + if (open_els[1].name === 'optgroup' && open_els[0].namespace === NS_HTML) { + open_els.shift() + } + } + if (open_els[0].name === 'optgroup' && open_els[0].namespace === NS_HTML) { + open_els.shift() + } else { + parse_error() + } + return + } + if (t.type === TYPE_END_TAG && t.name === 'option') { + if (open_els[0].name === 'option' && open_els[0].namespace === NS_HTML) { + open_els.shift() + } else { + parse_error() + } + return + } + if (t.type === TYPE_END_TAG && t.name === 'select') { + if (is_in_select_scope('select', NS_HTML)) { + while (true) { + el = open_els.shift() + if (el.name === 'select' && el.namespace === NS_HTML) { + break + } + } + reset_ins_mode() + } else { + parse_error() + } + return + } + if (t.type === TYPE_START_TAG && t.name === 'select') { + parse_error() + while (true) { + el = open_els.shift() + if (el.name === 'select' && el.namespace === NS_HTML) { + break + } + } + reset_ins_mode() + // spec says that this is the same as but it doesn't say + // to check scope first + return + } + if (t.type === TYPE_START_TAG && (t.name === 'input' || t.name === 'keygen' || t.name === 'textarea')) { + parse_error() + if (!is_in_select_scope('select', NS_HTML)) { + return + } + while (true) { + el = open_els.shift() + if (el.name === 'select' && el.namespace === NS_HTML) { + break + } + } + reset_ins_mode() + process_token(t) + return + } + if (t.type === TYPE_START_TAG && (t.name === 'script' || t.name === 'template')) { + ins_mode_in_head(t) + return + } + if (t.type === TYPE_EOF) { + ins_mode_in_body(t) + return + } + // Anything else + parse_error() + } + + // 8.2.5.4.17 http://www.w3.org/TR/html5/syntax.html#parsing-main-inselectintable + ins_mode_in_select_in_table = function (t) { + var el + if (t.type === TYPE_START_TAG && (t.name === 'caption' || t.name === 'table' || t.name === 'tbody' || t.name === 'tfoot' || t.name === 'thead' || t.name === 'tr' || t.name === 'td' || t.name === 'th')) { + parse_error() + while (true) { + el = open_els.shift() + if (el.name === 'select' && el.namespace === NS_HTML) { + break + } + } + reset_ins_mode() + process_token(t) + return + } + if (t.type === TYPE_END_TAG && (t.name === 'caption' || t.name === 'table' || t.name === 'tbody' || t.name === 'tfoot' || t.name === 'thead' || t.name === 'tr' || t.name === 'td' || t.name === 'th')) { + parse_error() + if (!is_in_table_scope(t.name, NS_HTML)) { + return + } + while (true) { + el = open_els.shift() + if (el.name === 'select' && el.namespace === NS_HTML) { + break + } + } + reset_ins_mode() + process_token(t) + return + } + // Anything else + ins_mode_in_select(t) + } + + // 8.2.5.4.18 http://www.w3.org/TR/html5/syntax.html#parsing-main-intemplate + ins_mode_in_template = function (t) { + var el + if (t.type === TYPE_TEXT || t.type === TYPE_COMMENT || t.type === TYPE_DOCTYPE) { + ins_mode_in_body(t) + return + } + if ((t.type === TYPE_START_TAG && (t.name === 'base' || t.name === 'basefont' || t.name === 'bgsound' || t.name === 'link' || t.name === 'meta' || t.name === 'noframes' || t.name === 'script' || t.name === 'style' || t.name === 'template' || t.name === 'title')) || (t.type === TYPE_END_TAG && t.name === 'template')) { + ins_mode_in_head(t) + return + } + if (t.type === TYPE_START_TAG && (t.name === 'caption' || t.name === 'colgroup' || t.name === 'tbody' || t.name === 'tfoot' || t.name === 'thead')) { + template_ins_modes.shift() + template_ins_modes.unshift(ins_mode_in_table) + ins_mode = ins_mode_in_table + process_token(t) + return + } + if (t.type === TYPE_START_TAG && t.name === 'col') { + template_ins_modes.shift() + template_ins_modes.unshift(ins_mode_in_column_group) + ins_mode = ins_mode_in_column_group + process_token(t) + return + } + if (t.type === TYPE_START_TAG && t.name === 'tr') { + template_ins_modes.shift() + template_ins_modes.unshift(ins_mode_in_table_body) + ins_mode = ins_mode_in_table_body + process_token(t) + return + } + if (t.type === TYPE_START_TAG && (t.name === 'td' || t.name === 'th')) { + template_ins_modes.shift() + template_ins_modes.unshift(ins_mode_in_row) + ins_mode = ins_mode_in_row + process_token(t) + return + } + if (t.type === TYPE_START_TAG) { + template_ins_modes.shift() + template_ins_modes.unshift(ins_mode_in_body) + ins_mode = ins_mode_in_body + process_token(t) + return + } + if (t.type === TYPE_END_TAG) { + parse_error() + return + } + if (t.type === TYPE_EOF) { + if (!template_tag_is_open()) { + stop_parsing() + return + } + parse_error() + while (true) { + el = open_els.shift() + if (el.name === 'template' && el.namespace === NS_HTML) { + break + } + } + clear_afe_to_marker() + template_ins_modes.shift() + reset_ins_mode() + process_token(t) + } + } + + // 8.2.5.4.19 http://www.w3.org/TR/html5/syntax.html#parsing-main-afterbody + ins_mode_after_body = function (t) { + var first + if (is_space_tok(t)) { + ins_mode_in_body(t) + return + } + if (t.type === TYPE_COMMENT) { + first = open_els[open_els.length - 1] + insert_comment(t, [first, first.children.length]) + return + } + if (t.type === TYPE_DOCTYPE) { + parse_error() + return + } + if (t.type === TYPE_START_TAG && t.name === 'html') { + ins_mode_in_body(t) + return + } + if (t.type === TYPE_END_TAG && t.name === 'html') { + if (flag_fragment_parsing) { + parse_error() + return + } + ins_mode = ins_mode_after_after_body + return + } + if (t.type === TYPE_EOF) { + stop_parsing() + return + } + // Anything ELse + parse_error() + ins_mode = ins_mode_in_body + process_token(t) + } + + // 8.2.5.4.20 http://www.w3.org/TR/html5/syntax.html#parsing-main-inframeset + ins_mode_in_frameset = function (t) { + if (is_space_tok(t)) { + insert_character(t) + return + } + if (t.type === TYPE_COMMENT) { + insert_comment(t) + return + } + if (t.type === TYPE_DOCTYPE) { + parse_error() + return + } + if (t.type === TYPE_START_TAG && t.name === 'html') { + ins_mode_in_body(t) + return + } + if (t.type === TYPE_START_TAG && t.name === 'frameset') { + insert_html_element(t) + return + } + if (t.type === TYPE_END_TAG && t.name === 'frameset') { + if (open_els.length === 1) { + parse_error() + return // fragment case + } + open_els.shift() + if (flag_fragment_parsing === false && open_els[0].name !== 'frameset') { + ins_mode = ins_mode_after_frameset + } + return + } + if (t.type === TYPE_START_TAG && t.name === 'frame') { + insert_html_element(t) + open_els.shift() + t.acknowledge_self_closing() + return + } + if (t.type === TYPE_START_TAG && t.name === 'noframes') { + ins_mode_in_head(t) + return + } + if (t.type === TYPE_EOF) { + if (open_els.length !== 1) { + parse_error() + } + stop_parsing() + return + } + // Anything else + parse_error() + } + + // 8.2.5.4.21 http://www.w3.org/TR/html5/syntax.html#parsing-main-afterframeset + ins_mode_after_frameset = function (t) { + if (is_space_tok(t)) { + insert_character(t) + return + } + if (t.type === TYPE_COMMENT) { + insert_comment(t) + return + } + if (t.type === TYPE_DOCTYPE) { + parse_error() + return + } + if (t.type === TYPE_START_TAG && t.name === 'html') { + ins_mode_in_body(t) + return + } + if (t.type === TYPE_END_TAG && t.name === 'html') { + ins_mode = ins_mode_after_after_frameset + return + } + if (t.type === TYPE_START_TAG && t.name === 'noframes') { + ins_mode_in_head(t) + return + } + if (t.type === TYPE_EOF) { + stop_parsing() + return + } + // Anything else + parse_error() + } + + // 8.2.5.4.22 http://www.w3.org/TR/html5/syntax.html#the-after-after-body-insertion-mode + ins_mode_after_after_body = function (t) { + if (t.type === TYPE_COMMENT) { + insert_comment(t, [doc, doc.children.length]) + return + } + if (t.type === TYPE_DOCTYPE || is_space_tok(t) || (t.type === TYPE_START_TAG && t.name === 'html')) { + ins_mode_in_body(t) + return + } + if (t.type === TYPE_EOF) { + stop_parsing() + return + } + // Anything else + parse_error() + ins_mode = ins_mode_in_body + process_token(t) + } + + // 8.2.5.4.23 http://www.w3.org/TR/html5/syntax.html#the-after-after-frameset-insertion-mode + ins_mode_after_after_frameset = function (t) { + if (t.type === TYPE_COMMENT) { + insert_comment(t, [doc, doc.children.length]) + return + } + if (t.type === TYPE_DOCTYPE || is_space_tok(t) || (t.type === TYPE_START_TAG && t.name === 'html')) { + ins_mode_in_body(t) + return + } + if (t.type === TYPE_EOF) { + stop_parsing() + return + } + if (t.type === TYPE_START_TAG && t.name === 'noframes') { + ins_mode_in_head(t) + return + } + // Anything else + parse_error() + return + } + + // 8.2.5.5 http://www.w3.org/TR/html5/syntax.html#parsing-main-inforeign + has_color_face_or_size = function (t) { + var a, i + for (i = 0; i < t.attrs_a.length; ++i) { + a = t.attrs_a[i] + if (a[0] === 'color' || a[0] === 'face' || a[0] === 'size') { + return true + } + } + return false + } + in_foreign_content_end_script = function () { + open_els.shift() + // fixfull + } + in_foreign_content_other_start = function (t) { + var acn + acn = adjusted_current_node() + if (acn.namespace === NS_MATHML) { + adjust_mathml_attributes(t) + } + if (acn.namespace === NS_SVG && (svg_name_fixes[t.name] != null)) { // extra perens because Coffeescript did + t.name = svg_name_fixes[t.name] + } + if (acn.namespace === NS_SVG) { + adjust_svg_attributes(t) + } + adjust_foreign_attributes(t) + insert_foreign_element(t, acn.namespace) + if (t.flag('self-closing')) { + if (t.name === 'script') { + t.acknowledge_self_closing() + in_foreign_content_end_script() + // fixfull + } else { + open_els.shift() + t.acknowledge_self_closing() + } + } + } + in_foreign_content = function (t) { + var el, i, node + if (t.type === TYPE_TEXT && t.text === "\u0000") { + parse_error() + insert_character(new_character_token("\ufffd")) + return + } + if (is_space_tok(t)) { + insert_character(t) + return + } + if (t.type === TYPE_TEXT) { + flag_frameset_ok = false + insert_character(t) + return + } + if (t.type === TYPE_COMMENT) { + insert_comment(t) + return + } + if (t.type === TYPE_DOCTYPE) { + parse_error() + return + } + if (t.type === TYPE_START_TAG && (t.name === 'b' || t.name === 'big' || t.name === 'blockquote' || t.name === 'body' || t.name === 'br' || t.name === 'center' || t.name === 'code' || t.name === 'dd' || t.name === 'div' || t.name === 'dl' || t.name === 'dt' || t.name === 'em' || t.name === 'embed' || t.name === 'h1' || t.name === 'h2' || t.name === 'h3' || t.name === 'h4' || t.name === 'h5' || t.name === 'h6' || t.name === 'head' || t.name === 'hr' || t.name === 'i' || t.name === 'img' || t.name === 'li' || t.name === 'listing' || t.name === 'main' || t.name === 'meta' || t.name === 'nobr' || t.name === 'ol' || t.name === 'p' || t.name === 'pre' || t.name === 'ruby' || t.name === 's' || t.name === 'small' || t.name === 'span' || t.name === 'strong' || t.name === 'strike' || t.name === 'sub' || t.name === 'sup' || t.name === 'table' || t.name === 'tt' || t.name === 'u' || t.name === 'ul' || t.name === 'var' || (t.name === 'font' && has_color_face_or_size(t)))) { + parse_error() + if (flag_fragment_parsing) { + in_foreign_content_other_start(t) + return + } + while (true) { // is this safe? + open_els.shift() + if (is_mathml_text_integration_point(open_els[0]) || is_html_integration(open_els[0]) || open_els[0].namespace === NS_HTML) { + break + } + } + process_token(t) + return + } + if (t.type === TYPE_START_TAG) { + in_foreign_content_other_start(t) + return + } + if (t.type === TYPE_END_TAG && t.name === 'script' && open_els[0].name === 'script' && open_els[0].namespace === NS_SVG) { + in_foreign_content_end_script() + return + } + if (t.type === TYPE_END_TAG) { + i = 0 + node = open_els[i] + if (node.name.toLowerCase() !== t.name) { + parse_error() + } + while (true) { + if (node === open_els[open_els.length - 1]) { + return + } + if (node.name.toLowerCase() === t.name) { + while (true) { + el = open_els.shift() + if (el === node) { + return + } + } + } + i += 1 + node = open_els[i] + if (node.namespace === NS_HTML) { + break + } + } + ins_mode(t) // explicitly call HTML insertion mode + } + } + + + // 8.2.4.1 http://www.w3.org/TR/html5/syntax.html#data-state + tok_state_data = function () { + var c + switch (c = txt.charAt(cur++)) { + case '&': + return new_text_node(parse_character_reference()) + break + case '<': + tok_state = tok_state_tag_open + break + case "\u0000": + parse_error() + return new_text_node(c) + break + case '': // EOF + return new_eof_token() + break + default: + return new_text_node(c) + } + return null + } + + // 8.2.4.2 http://www.w3.org/TR/html5/syntax.html#character-reference-in-data-state + // not needed: tok_state_character_reference_in_data = function () { + // just call parse_character_reference() + + // 8.2.4.3 http://www.w3.org/TR/html5/syntax.html#rcdata-state + tok_state_rcdata = function () { + var c + switch (c = txt.charAt(cur++)) { + case '&': + return new_text_node(parse_character_reference()) + break + case '<': + tok_state = tok_state_rcdata_less_than_sign + break + case "\u0000": + parse_error() + return new_character_token("\ufffd") + break + case '': // EOF + return new_eof_token() + break + default: + return new_character_token(c) + } + return null + } + + // 8.2.4.4 http://www.w3.org/TR/html5/syntax.html#character-reference-in-rcdata-state + // not needed: tok_state_character_reference_in_rcdata = function () { + // just call parse_character_reference() + + // 8.2.4.5 http://www.w3.org/TR/html5/syntax.html#rawtext-state + tok_state_rawtext = function () { + var c + switch (c = txt.charAt(cur++)) { + case '<': + tok_state = tok_state_rawtext_less_than_sign + break + case "\u0000": + parse_error() + return new_character_token("\ufffd") + break + case '': // EOF + return new_eof_token() + break + default: + return new_character_token(c) + } + return null + } + + // 8.2.4.6 http://www.w3.org/TR/html5/syntax.html#script-data-state + tok_state_script_data = function () { + var c + switch (c = txt.charAt(cur++)) { + case '<': + tok_state = tok_state_script_data_less_than_sign + break + case "\u0000": + parse_error() + return new_character_token("\ufffd") + break + case '': // EOF + return new_eof_token() + break + default: + return new_character_token(c) + } + return null + } + + // 8.2.4.7 http://www.w3.org/TR/html5/syntax.html#plaintext-state + tok_state_plaintext = function () { + var c + switch (c = txt.charAt(cur++)) { + case "\u0000": + parse_error() + return new_character_token("\ufffd") + break + case '': // EOF + return new_eof_token() + break + default: + return new_character_token(c) + } + return null + } + + // 8.2.4.8 http://www.w3.org/TR/html5/syntax.html#tag-open-state + tok_state_tag_open = function () { + var c + c = txt.charAt(cur++) + if (c === '!') { + tok_state = tok_state_markup_declaration_open + return + } + if (c === '/') { + tok_state = tok_state_end_tag_open + return + } + if (is_uc_alpha(c)) { + tok_cur_tag = new_open_tag(c.toLowerCase()) + tok_state = tok_state_tag_name + return + } + if (is_lc_alpha(c)) { + tok_cur_tag = new_open_tag(c) + tok_state = tok_state_tag_name + return + } + if (c === '?') { + parse_error() + tok_cur_tag = new_comment_token('?') // FIXME right? + tok_state = tok_state_bogus_comment + return + } + // Anything else + parse_error() + tok_state = tok_state_data + cur -= 1 // we didn't parse/handle the char after < + return new_text_node('<') + } + + // 8.2.4.9 http://www.w3.org/TR/html5/syntax.html#end-tag-open-state + tok_state_end_tag_open = function () { + var c + c = txt.charAt(cur++) + if (is_uc_alpha(c)) { + tok_cur_tag = new_end_tag(c.toLowerCase()) + tok_state = tok_state_tag_name + return + } + if (is_lc_alpha(c)) { + tok_cur_tag = new_end_tag(c) + tok_state = tok_state_tag_name + return + } + if (c === '>') { + parse_error() + tok_state = tok_state_data + return + } + if (c === '') { // EOF + parse_error() + tok_state = tok_state_data + return new_text_node('': + tok_state = tok_state_data + tmp = tok_cur_tag + tok_cur_tag = null + return tmp + break + case "\u0000": + parse_error() + tok_cur_tag.name += "\ufffd" + break + case '': // EOF + parse_error() + tok_state = tok_state_data + break + default: + if (is_uc_alpha(c)) { + tok_cur_tag.name += c.toLowerCase() + } else { + tok_cur_tag.name += c + } + } + return null + } + + // 8.2.4.11 http://www.w3.org/TR/html5/syntax.html#rcdata-less-than-sign-state + tok_state_rcdata_less_than_sign = function () { + var c + c = txt.charAt(cur++) + if (c === '/') { + temporary_buffer = '' + tok_state = tok_state_rcdata_end_tag_open + return null + } + // Anything else + tok_state = tok_state_rcdata + cur -= 1 // reconsume the input character + return new_character_token('<') + } + + // 8.2.4.12 http://www.w3.org/TR/html5/syntax.html#rcdata-end-tag-open-state + tok_state_rcdata_end_tag_open = function () { + var c + c = txt.charAt(cur++) + if (is_uc_alpha(c)) { + tok_cur_tag = new_end_tag(c.toLowerCase()) + temporary_buffer += c + tok_state = tok_state_rcdata_end_tag_name + return null + } + if (is_lc_alpha(c)) { + tok_cur_tag = new_end_tag(c) + temporary_buffer += c + tok_state = tok_state_rcdata_end_tag_name + return null + } + // Anything else + tok_state = tok_state_rcdata + cur -= 1 // reconsume the input character + return new_character_token("') { + if (is_appropriate_end_tag(tok_cur_tag)) { + tok_state = tok_state_data + return tok_cur_tag + } + // else fall through to "Anything else" + } + if (is_uc_alpha(c)) { + tok_cur_tag.name += c.toLowerCase() + temporary_buffer += c + return null + } + if (is_lc_alpha(c)) { + tok_cur_tag.name += c + temporary_buffer += c + return null + } + // Anything else + tok_state = tok_state_rcdata + cur -= 1 // reconsume the input character + return new_character_token('') { + if (is_appropriate_end_tag(tok_cur_tag)) { + tok_state = tok_state_data + return tok_cur_tag + } + // else fall through to "Anything else" + } + if (is_uc_alpha(c)) { + tok_cur_tag.name += c.toLowerCase() + temporary_buffer += c + return null + } + if (is_lc_alpha(c)) { + tok_cur_tag.name += c + temporary_buffer += c + return null + } + // Anything else + tok_state = tok_state_rawtext + cur -= 1 // reconsume the input character + return new_character_token('') { + if (is_appropriate_end_tag(tok_cur_tag)) { + tok_state = tok_state_data + return tok_cur_tag + } + // fall through + } + if (is_uc_alpha(c)) { + tok_cur_tag.name += c.toLowerCase() + temporary_buffer += c + return + } + if (is_lc_alpha(c)) { + tok_cur_tag.name += c + temporary_buffer += c + return + } + // Anything else + tok_state = tok_state_script_data + cur -= 1 // reconsume + return new_character_token("') { + tok_state = tok_state_script_data + return new_character_token('>') + } + if (c === "\u0000") { + parse_error() + tok_state = tok_state_script_data_escaped + return new_character_token("\ufffd") + } + if (c === '') { // EOF + parse_error() + tok_state = tok_state_data + cur -= 1 // reconsume + return + } + // Anything else + tok_state = tok_state_script_data_escaped + return new_character_token(c) + } + + // 8.2.4.25 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-less-than-sign-state + tok_state_script_data_escaped_less_than_sign = function () { + var c + c = txt.charAt(cur++) + if (c === '/') { + temporary_buffer = '' + tok_state = tok_state_script_data_escaped_end_tag_open + return + } + if (is_uc_alpha(c)) { + temporary_buffer = c.toLowerCase() // yes, really + tok_state = tok_state_script_data_double_escape_start + return new_character_token("<" + c) // fixfull split + } + if (is_lc_alpha(c)) { + temporary_buffer = c + tok_state = tok_state_script_data_double_escape_start + return new_character_token("<" + c) // fixfull split + } + // Anything else + tok_state = tok_state_script_data_escaped + cur -= 1 // reconsume + return new_character_token('<') + } + + // 8.2.4.26 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-end-tag-open-state + tok_state_script_data_escaped_end_tag_open = function () { + var c + c = txt.charAt(cur++) + if (is_uc_alpha(c)) { + tok_cur_tag = new_end_tag(c.toLowerCase()) + temporary_buffer += c + tok_state = tok_state_script_data_escaped_end_tag_name + return + } + if (is_lc_alpha(c)) { + tok_cur_tag = new_end_tag(c) + temporary_buffer += c + tok_state = tok_state_script_data_escaped_end_tag_name + return + } + // Anything else + tok_state = tok_state_script_data_escaped + cur -= 1 // reconsume + return new_character_token('') { + if (is_appropriate_end_tag(tok_cur_tag)) { + tok_state = tok_state_data + return tok_cur_tag + } + // fall through + } + if (is_uc_alpha(c)) { + tok_cur_tag.name += c.toLowerCase() + temporary_buffer += c.toLowerCase() + return + } + if (is_lc_alpha(c)) { + tok_cur_tag.name += c + temporary_buffer += c.toLowerCase() + return + } + // Anything else + tok_state = tok_state_script_data_escaped + cur -= 1 // reconsume + return new_character_token("') { + if (temporary_buffer === 'script') { + tok_state = tok_state_script_data_double_escaped + } else { + tok_state = tok_state_script_data_escaped + } + return new_character_token(c) + } + if (is_uc_alpha(c)) { + temporary_buffer += c.toLowerCase() // yes, really lowercase + return new_character_token(c) + } + if (is_lc_alpha(c)) { + temporary_buffer += c + return new_character_token(c) + } + // Anything else + tok_state = tok_state_script_data_escaped + cur -= 1 // reconsume + } + + // 8.2.4.29 http://www.w3.org/TR/html5/syntax.html#script-data-double-escaped-state + tok_state_script_data_double_escaped = function () { + var c + c = txt.charAt(cur++) + if (c === '-') { + tok_state = tok_state_script_data_double_escaped_dash + return new_character_token('-') + } + if (c === '<') { + tok_state = tok_state_script_data_double_escaped_less_than_sign + return new_character_token('<') + } + if (c === "\u0000") { + parse_error() + return new_character_token("\ufffd") + } + if (c === '') { // EOF + parse_error() + tok_state = tok_state_data + cur -= 1 // reconsume + return + } + // Anything else + return new_character_token(c) + } + + // 8.2.4.30 http://www.w3.org/TR/html5/syntax.html#script-data-double-escaped-dash-state + tok_state_script_data_double_escaped_dash = function () { + var c + c = txt.charAt(cur++) + if (c === '-') { + tok_state = tok_state_script_data_double_escaped_dash_dash + return new_character_token('-') + } + if (c === '<') { + tok_state = tok_state_script_data_double_escaped_less_than_sign + return new_character_token('<') + } + if (c === "\u0000") { + parse_error() + tok_state = tok_state_script_data_double_escaped + return new_character_token("\ufffd") + } + if (c === '') { // EOF + parse_error() + tok_state = tok_state_data + cur -= 1 // reconsume + return + } + // Anything else + tok_state = tok_state_script_data_double_escaped + return new_character_token(c) + } + + // 8.2.4.31 http://www.w3.org/TR/html5/syntax.html#script-data-double-escaped-dash-dash-state + tok_state_script_data_double_escaped_dash_dash = function () { + var c + c = txt.charAt(cur++) + if (c === '-') { + return new_character_token('-') + } + if (c === '<') { + tok_state = tok_state_script_data_double_escaped_less_than_sign + return new_character_token('<') + } + if (c === '>') { + tok_state = tok_state_script_data + return new_character_token('>') + } + if (c === "\u0000") { + parse_error() + tok_state = tok_state_script_data_double_escaped + return new_character_token("\ufffd") + } + if (c === '') { // EOF + parse_error() + tok_state = tok_state_data + cur -= 1 // reconsume + return + } + // Anything else + tok_state = tok_state_script_data_double_escaped + return new_character_token(c) + } + + // 8.2.4.32 http://www.w3.org/TR/html5/syntax.html#script-data-double-escaped-less-than-sign-state + tok_state_script_data_double_escaped_less_than_sign = function () { + var c + c = txt.charAt(cur++) + if (c === '/') { + temporary_buffer = '' + tok_state = tok_state_script_data_double_escape_end + return new_character_token('/') + } + // Anything else + tok_state = tok_state_script_data_double_escaped + cur -= 1 // reconsume + } + + // 8.2.4.33 http://www.w3.org/TR/html5/syntax.html#script-data-double-escape-end-state + tok_state_script_data_double_escape_end = function () { + var c + c = txt.charAt(cur++) + if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ' || c === '/' || c === '>') { + if (temporary_buffer === 'script') { + tok_state = tok_state_script_data_escaped + } else { + tok_state = tok_state_script_data_double_escaped + } + return new_character_token(c) + } + if (is_uc_alpha(c)) { + temporary_buffer += c.toLowerCase() // yes, really lowercase + return new_character_token(c) + } + if (is_lc_alpha(c)) { + temporary_buffer += c + return new_character_token(c) + } + // Anything else + tok_state = tok_state_script_data_double_escaped + cur -= 1 // reconsume + } + + // 8.2.4.34 http://www.w3.org/TR/html5/syntax.html#before-attribute-name-state + tok_state_before_attribute_name = function () { + var attr_name, c, tmp + attr_name = null + switch (c = txt.charAt(cur++)) { + case "\t": + case "\n": + case "\u000c": + case ' ': + return null + break + case '/': + tok_state = tok_state_self_closing_start_tag + return null + break + case '>': + tok_state = tok_state_data + tmp = tok_cur_tag + tok_cur_tag = null + return tmp + break + case "\u0000": + parse_error() + attr_name = "\ufffd" + break + case '"': + case "'": + case '<': + case '=': + parse_error() + attr_name = c + break + case '': // EOF + parse_error() + tok_state = tok_state_data + break + default: + if (is_uc_alpha(c)) { + attr_name = c.toLowerCase() + } else { + attr_name = c + } + } + if (attr_name != null) { + tok_cur_tag.attrs_a.unshift([attr_name, '']) + tok_state = tok_state_attribute_name + } + return null + } + + // 8.2.4.35 http://www.w3.org/TR/html5/syntax.html#attribute-name-state + tok_state_attribute_name = function () { + var c, tmp + switch (c = txt.charAt(cur++)) { + case "\t": + case "\n": + case "\u000c": + case ' ': + tok_state = tok_state_after_attribute_name + break + case '/': + tok_state = tok_state_self_closing_start_tag + break + case '=': + tok_state = tok_state_before_attribute_value + break + case '>': + tok_state = tok_state_data + tmp = tok_cur_tag + tok_cur_tag = null + return tmp + break + case "\u0000": + parse_error() + tok_cur_tag.attrs_a[0][0] += "\ufffd" + break + case '"': + case "'": + case '<': + parse_error() + tok_cur_tag.attrs_a[0][0] += c + break + case '': // EOF + parse_error() + tok_state = tok_state_data + break + default: + if (is_uc_alpha(c)) { + tok_cur_tag.attrs_a[0][0] += c.toLowerCase() + } else { + tok_cur_tag.attrs_a[0][0] += c + } + } + return null + } + + // 8.2.4.36 http://www.w3.org/TR/html5/syntax.html#after-attribute-name-state + tok_state_after_attribute_name = function () { + var c + c = txt.charAt(cur++) + if (c === "\t" || c === "\n" || c === "\u000c" || c === ' ') { + return + } + if (c === '/') { + tok_state = tok_state_self_closing_start_tag + return + } + if (c === '=') { + tok_state = tok_state_before_attribute_value + return + } + if (c === '>') { + tok_state = tok_state_data + return tok_cur_tag + } + if (is_uc_alpha(c)) { + tok_cur_tag.attrs_a.unshift([c.toLowerCase(), '']) + tok_state = tok_state_attribute_name + return + } + if (c === "\u0000") { + parse_error() + tok_cur_tag.attrs_a.unshift(["\ufffd", '']) + tok_state = tok_state_attribute_name + return + } + if (c === '') { // EOF + parse_error() + tok_state = tok_state_data + cur -= 1 // reconsume + return + } + if (c === '"' || c === "'" || c === '<') { + parse_error() + // fall through to Anything else + } + // Anything else + tok_cur_tag.attrs_a.unshift([c, '']) + tok_state = tok_state_attribute_name + } + + // 8.2.4.37 http://www.w3.org/TR/html5/syntax.html#before-attribute-value-state + tok_state_before_attribute_value = function () { + var c, tmp + switch (c = txt.charAt(cur++)) { + case "\t": + case "\n": + case "\u000c": + case ' ': + return null + break + case '"': + tok_state = tok_state_attribute_value_double_quoted + break + case '&': + tok_state = tok_state_attribute_value_unquoted + cur -= 1 + break + case "'": + tok_state = tok_state_attribute_value_single_quoted + break + case "\u0000": + // Parse error + tok_cur_tag.attrs_a[0][1] += "\ufffd" + tok_state = tok_state_attribute_value_unquoted + break + case '>': + // Parse error + tok_state = tok_state_data + tmp = tok_cur_tag + tok_cur_tag = null + return tmp + break + case '': // EOF + parse_error() + tok_state = tok_state_data + break + default: + tok_cur_tag.attrs_a[0][1] += c + tok_state = tok_state_attribute_value_unquoted + } + return null + } + + // 8.2.4.38 http://www.w3.org/TR/html5/syntax.html#attribute-value-(double-quoted)-state + tok_state_attribute_value_double_quoted = function () { + var c + switch (c = txt.charAt(cur++)) { + case '"': + tok_state = tok_state_after_attribute_value_quoted + break + case '&': + tok_cur_tag.attrs_a[0][1] += parse_character_reference('"', true) + break + case "\u0000": + // Parse error + tok_cur_tag.attrs_a[0][1] += "\ufffd" + break + case '': // EOF + parse_error() + tok_state = tok_state_data + break + default: + tok_cur_tag.attrs_a[0][1] += c + } + return null + } + + // 8.2.4.39 http://www.w3.org/TR/html5/syntax.html#attribute-value-(single-quoted)-state + tok_state_attribute_value_single_quoted = function () { + var c + switch (c = txt.charAt(cur++)) { + case "'": + tok_state = tok_state_after_attribute_value_quoted + break + case '&': + tok_cur_tag.attrs_a[0][1] += parse_character_reference("'", true) + break + case "\u0000": + // Parse error + tok_cur_tag.attrs_a[0][1] += "\ufffd" + break + case '': // EOF + parse_error() + tok_state = tok_state_data + break + default: + tok_cur_tag.attrs_a[0][1] += c + } + return null + } + + // 8.2.4.40 http://www.w3.org/TR/html5/syntax.html#attribute-value-(unquoted)-state + tok_state_attribute_value_unquoted = function () { + var c, tmp + switch (c = txt.charAt(cur++)) { + case "\t": + case "\n": + case "\u000c": + case ' ': + tok_state = tok_state_before_attribute_name + break + case '&': + tok_cur_tag.attrs_a[0][1] += parse_character_reference('>', true) + break + case '>': + tok_state = tok_state_data + tmp = tok_cur_tag + tok_cur_tag = null + return tmp + break + case "\u0000": + tok_cur_tag.attrs_a[0][1] += "\ufffd" + break + case '': // EOF + parse_error() + tok_state = tok_state_data + break + default: + // Parse Error if ', <, = or ` (backtick) + tok_cur_tag.attrs_a[0][1] += c + } + return null + } + + // 8.2.4.42 http://www.w3.org/TR/html5/syntax.html#after-attribute-value-(quoted)-state + tok_state_after_attribute_value_quoted = function () { + var c, tmp + switch (c = txt.charAt(cur++)) { + case "\t": + case "\n": + case "\u000c": + case ' ': + tok_state = tok_state_before_attribute_name + break + case '/': + tok_state = tok_state_self_closing_start_tag + break + case '>': + tok_state = tok_state_data + tmp = tok_cur_tag + tok_cur_tag = null + return tmp + break + case '': // EOF + parse_error() + tok_state = tok_state_data + break + default: + // Parse Error + tok_state = tok_state_before_attribute_name + cur -= 1 // we didn't handle that char + } + return null + } + + // 8.2.4.43 http://www.w3.org/TR/html5/syntax.html#self-closing-start-tag-state + tok_state_self_closing_start_tag = function () { + var c + c = txt.charAt(cur++) + if (c === '>') { + tok_cur_tag.flag('self-closing', true) + tok_state = tok_state_data + return tok_cur_tag + } + if (c === '') { + parse_error() + tok_state = tok_state_data + cur -= 1 // reconsume + return + } + // Anything else + parse_error() + tok_state = tok_state_before_attribute_name + cur -= 1 // reconsume + } + + // 8.2.4.44 http://www.w3.org/TR/html5/syntax.html#bogus-comment-state + // WARNING: put a comment token in tok_cur_tag before setting this state + tok_state_bogus_comment = function () { + var next_gt, val + next_gt = txt.indexOf('>', cur) + if (next_gt === -1) { + val = txt.substr(cur) + cur = txt.length + } else { + val = txt.substr(cur, next_gt - cur) + cur = next_gt + 1 + } + val = val.replace(new RegExp("\u0000", 'g'), "\ufffd") + tok_cur_tag.text += val + tok_state = tok_state_data + return tok_cur_tag + } + + // 8.2.4.45 http://www.w3.org/TR/html5/syntax.html#markup-declaration-open-state + tok_state_markup_declaration_open = function () { + var acn + if (txt.substr(cur, 2) === '--') { + cur += 2 + tok_cur_tag = new_comment_token('') + tok_state = tok_state_comment_start + return + } + if (txt.substr(cur, 7).toLowerCase() === 'doctype') { + cur += 7 + tok_state = tok_state_doctype + return + } + acn = adjusted_current_node() + if (acn && acn.namespace !== NS_HTML && txt.substr(cur, 7) === '[CDATA[') { + cur += 7 + tok_state = tok_state_cdata_section + return + } + // Otherwise + parse_error() + tok_cur_tag = new_comment_token('') + tok_state = tok_state_bogus_comment + } + + // 8.2.4.46 http://www.w3.org/TR/html5/syntax.html#comment-start-state + tok_state_comment_start = function () { + var c + switch (c = txt.charAt(cur++)) { + case '-': + tok_state = tok_state_comment_start_dash + break + case "\u0000": + parse_error() + tok_state = tok_state_comment + return new_character_token("\ufffd") + break + case '>': + parse_error() + tok_state = tok_state_data + return tok_cur_tag + break + case '': // EOF + parse_error() + tok_state = tok_state_data + cur -= 1 // reconsume + return tok_cur_tag + break + default: + tok_cur_tag.text += c + tok_state = tok_state_comment + } + return null + } + + // 8.2.4.47 http://www.w3.org/TR/html5/syntax.html#comment-start-dash-state + tok_state_comment_start_dash = function () { + var c + switch (c = txt.charAt(cur++)) { + case '-': + tok_state = tok_state_comment_end + break + case "\u0000": + parse_error() + tok_cur_tag.text += "-\ufffd" + tok_state = tok_state_comment + break + case '>': + parse_error() + tok_state = tok_state_data + return tok_cur_tag + break + case '': // EOF + parse_error() + tok_state = tok_state_data + cur -= 1 // reconsume + return tok_cur_tag + break + default: + tok_cur_tag.text += "-" + c + tok_state = tok_state_comment + } + return null + } + + // 8.2.4.48 http://www.w3.org/TR/html5/syntax.html#comment-state + tok_state_comment = function () { + var c + switch (c = txt.charAt(cur++)) { + case '-': + tok_state = tok_state_comment_end_dash + break + case "\u0000": + parse_error() + tok_cur_tag.text += "\ufffd" + break + case '': // EOF + parse_error() + tok_state = tok_state_data + cur -= 1 // reconsume + return tok_cur_tag + break + default: + tok_cur_tag.text += c + } + return null + } + + // 8.2.4.49 http://www.w3.org/TR/html5/syntax.html#comment-end-dash-state + tok_state_comment_end_dash = function () { + var c + switch (c = txt.charAt(cur++)) { + case '-': + tok_state = tok_state_comment_end + break + case "\u0000": + parse_error() + tok_cur_tag.text += "-\ufffd" + tok_state = tok_state_comment + break + case '': // EOF + parse_error() + tok_state = tok_state_data + cur -= 1 // reconsume + return tok_cur_tag + break + default: + tok_cur_tag.text += "-" + c + tok_state = tok_state_comment + } + return null + } + + // 8.2.4.50 http://www.w3.org/TR/html5/syntax.html#comment-end-state + tok_state_comment_end = function () { + var c + switch (c = txt.charAt(cur++)) { + case '>': + tok_state = tok_state_data + return tok_cur_tag + break + case "\u0000": + parse_error() + tok_cur_tag.text += "--\ufffd" + tok_state = tok_state_comment + break + case '!': + parse_error() + tok_state = tok_state_comment_end_bang + break + case '-': + parse_error() + tok_cur_tag.text += '-' + break + case '': // EOF + parse_error() + tok_state = tok_state_data + cur -= 1 // reconsume + return tok_cur_tag + break + default: + parse_error() + tok_cur_tag.text += "--" + c + tok_state = tok_state_comment + } + return null + } + + // 8.2.4.51 http://www.w3.org/TR/html5/syntax.html#comment-end-bang-state + tok_state_comment_end_bang = function () { + var c + switch (c = txt.charAt(cur++)) { + case '-': + tok_cur_tag.text += "--!" + c + tok_state = tok_state_comment_end_dash + break + case '>': + tok_state = tok_state_data + return tok_cur_tag + break + case "\u0000": + parse_error() + tok_cur_tag.text += "--!\ufffd" + tok_state = tok_state_comment + break + case '': // EOF + parse_error() + tok_state = tok_state_data + cur -= 1 // reconsume + return tok_cur_tag + break + default: + tok_cur_tag.text += "--!" + c + tok_state = tok_state_comment + } + return null + } + + // 8.2.4.52 http://www.w3.org/TR/html5/syntax.html#doctype-state + tok_state_doctype = function () { + var c, el + switch (c = txt.charAt(cur++)) { + case "\t": + case "\u000a": + case "\u000c": + case ' ': + tok_state = tok_state_before_doctype_name + break + case '': // EOF + parse_error() + tok_state = tok_state_data + el = new_doctype_token('') + el.flag('force-quirks', true) + cur -= 1 // reconsume + return el + break + default: + parse_error() + tok_state = tok_state_before_doctype_name + cur -= 1 // reconsume + } + return null + } + + // 8.2.4.52 http://www.w3.org/TR/html5/syntax.html#doctype-state + tok_state_before_doctype_name = function () { + var c, el + c = txt.charAt(cur++) + if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ') { + return + } + if (is_uc_alpha(c)) { + tok_cur_tag = new_doctype_token(c.toLowerCase()) + tok_state = tok_state_doctype_name + return + } + if (c === "\u0000") { + parse_error() + tok_cur_tag = new_doctype_token("\ufffd") + tok_state = tok_state_doctype_name + return + } + if (c === '>') { + parse_error() + el = new_doctype_token('') + el.flag('force-quirks', true) + tok_state = tok_state_data + return el + } + if (c === '') { // EOF + parse_error() + tok_state = tok_state_data + el = new_doctype_token('') + el.flag('force-quirks', true) + cur -= 1 // reconsume + return el + } + // Anything else + tok_cur_tag = new_doctype_token(c) + tok_state = tok_state_doctype_name + return null + } + + // 8.2.4.54 http://www.w3.org/TR/html5/syntax.html#doctype-name-state + tok_state_doctype_name = function () { + var c + c = txt.charAt(cur++) + if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ') { + tok_state = tok_state_after_doctype_name + return + } + if (c === '>') { + tok_state = tok_state_data + return tok_cur_tag + } + if (is_uc_alpha(c)) { + tok_cur_tag.name += c.toLowerCase() + return + } + if (c === "\u0000") { + parse_error() + tok_cur_tag.name += "\ufffd" + return + } + if (c === '') { // EOF + parse_error() + tok_state = tok_state_data + tok_cur_tag.flag('force-quirks', true) + cur -= 1 // reconsume + return tok_cur_tag + } + // Anything else + tok_cur_tag.name += c + return null + } + + // 8.2.4.55 http://www.w3.org/TR/html5/syntax.html#after-doctype-name-state + tok_state_after_doctype_name = function () { + var c + c = txt.charAt(cur++) + if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ') { + return + } + if (c === '>') { + tok_state = tok_state_data + return tok_cur_tag + } + if (c === '') { // EOF + parse_error() + tok_state = tok_state_data + tok_cur_tag.flag('force-quirks', true) + cur -= 1 // reconsume + return tok_cur_tag + } + // Anything else + if (txt.substr(cur - 1, 6).toLowerCase() === 'public') { + cur += 5 + tok_state = tok_state_after_doctype_public_keyword + return + } + if (txt.substr(cur - 1, 6).toLowerCase() === 'system') { + cur += 5 + tok_state = tok_state_after_doctype_system_keyword + return + } + parse_error() + tok_cur_tag.flag('force-quirks', true) + tok_state = tok_state_bogus_doctype + return null + } + + // 8.2.4.56 http://www.w3.org/TR/html5/syntax.html#after-doctype-public-keyword-state + tok_state_after_doctype_public_keyword = function () { + var c + c = txt.charAt(cur++) + if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ') { + tok_state = tok_state_before_doctype_public_identifier + return + } + if (c === '"') { + parse_error() + tok_cur_tag.public_identifier = '' + tok_state = tok_state_doctype_public_identifier_double_quoted + return + } + if (c === "'") { + parse_error() + tok_cur_tag.public_identifier = '' + tok_state = tok_state_doctype_public_identifier_single_quoted + return + } + if (c === '>') { + parse_error() + tok_cur_tag.flag('force-quirks', true) + tok_state = tok_state_data + return tok_cur_tag + } + if (c === '') { // EOF + parse_error() + tok_state = tok_state_data + tok_cur_tag.flag('force-quirks', true) + cur -= 1 // reconsume + return tok_cur_tag + } + // Anything else + parse_error() + tok_cur_tag.flag('force-quirks', true) + tok_state = tok_state_bogus_doctype + return null + } + + // 8.2.4.57 http://www.w3.org/TR/html5/syntax.html#before-doctype-public-identifier-state + tok_state_before_doctype_public_identifier = function () { + var c + c = txt.charAt(cur++) + if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ') { + return + } + if (c === '"') { + parse_error() + tok_cur_tag.public_identifier = '' + tok_state = tok_state_doctype_public_identifier_double_quoted + return + } + if (c === "'") { + parse_error() + tok_cur_tag.public_identifier = '' + tok_state = tok_state_doctype_public_identifier_single_quoted + return + } + if (c === '>') { + parse_error() + tok_cur_tag.flag('force-quirks', true) + tok_state = tok_state_data + return tok_cur_tag + } + if (c === '') { // EOF + parse_error() + tok_state = tok_state_data + tok_cur_tag.flag('force-quirks', true) + cur -= 1 // reconsume + return tok_cur_tag + } + // Anything else + parse_error() + tok_cur_tag.flag('force-quirks', true) + tok_state = tok_state_bogus_doctype + return null + } + + + // 8.2.4.58 http://www.w3.org/TR/html5/syntax.html#doctype-public-identifier-(double-quoted)-state + tok_state_doctype_public_identifier_double_quoted = function () { + var c + c = txt.charAt(cur++) + if (c === '"') { + tok_state = tok_state_after_doctype_public_identifier + return + } + if (c === "\u0000") { + parse_error() + tok_cur_tag.public_identifier += "\ufffd" + return + } + if (c === '>') { + parse_error() + tok_cur_tag.flag('force-quirks', true) + tok_state = tok_state_data + return tok_cur_tag + } + if (c === '') { // EOF + parse_error() + tok_state = tok_state_data + tok_cur_tag.flag('force-quirks', true) + cur -= 1 // reconsume + return tok_cur_tag + } + // Anything else + tok_cur_tag.public_identifier += c + return null + } + + // 8.2.4.59 http://www.w3.org/TR/html5/syntax.html#doctype-public-identifier-(single-quoted)-state + tok_state_doctype_public_identifier_single_quoted = function () { + var c + c = txt.charAt(cur++) + if (c === "'") { + tok_state = tok_state_after_doctype_public_identifier + return + } + if (c === "\u0000") { + parse_error() + tok_cur_tag.public_identifier += "\ufffd" + return + } + if (c === '>') { + parse_error() + tok_cur_tag.flag('force-quirks', true) + tok_state = tok_state_data + return tok_cur_tag + } + if (c === '') { // EOF + parse_error() + tok_state = tok_state_data + tok_cur_tag.flag('force-quirks', true) + cur -= 1 // reconsume + return tok_cur_tag + } + // Anything else + tok_cur_tag.public_identifier += c + return null + } + + // 8.2.4.60 http://www.w3.org/TR/html5/syntax.html#after-doctype-public-identifier-state + tok_state_after_doctype_public_identifier = function () { + var c + c = txt.charAt(cur++) + if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ') { + tok_state = tok_state_between_doctype_public_and_system_identifiers + return + } + if (c === '>') { + tok_state = tok_state_data + return tok_cur_tag + } + if (c === '"') { + parse_error() + tok_cur_tag.system_identifier = '' + tok_state = tok_state_doctype_system_identifier_double_quoted + return + } + if (c === "'") { + parse_error() + tok_cur_tag.system_identifier = '' + tok_state = tok_state_doctype_system_identifier_single_quoted + return + } + if (c === '') { // EOF + parse_error() + tok_state = tok_state_data + tok_cur_tag.flag('force-quirks', true) + cur -= 1 // reconsume + return tok_cur_tag + } + // Anything else + parse_error() + tok_cur_tag.flag('force-quirks', true) + tok_state = tok_state_bogus_doctype + return null + } + + // 8.2.4.61 http://www.w3.org/TR/html5/syntax.html#between-doctype-public-&&-system-identifiers-state + tok_state_between_doctype_public_and_system_identifiers = function () { + var c + c = txt.charAt(cur++) + if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ') { + return + } + if (c === '>') { + tok_state = tok_state_data + return tok_cur_tag + } + if (c === '"') { + parse_error() + tok_cur_tag.system_identifier = '' + tok_state = tok_state_doctype_system_identifier_double_quoted + return + } + if (c === "'") { + parse_error() + tok_cur_tag.system_identifier = '' + tok_state = tok_state_doctype_system_identifier_single_quoted + return + } + if (c === '') { // EOF + parse_error() + tok_state = tok_state_data + tok_cur_tag.flag('force-quirks', true) + cur -= 1 // reconsume + return tok_cur_tag + } + // Anything else + parse_error() + tok_cur_tag.flag('force-quirks', true) + tok_state = tok_state_bogus_doctype + return null + } + + // 8.2.4.62 http://www.w3.org/TR/html5/syntax.html#after-doctype-system-keyword-state + tok_state_after_doctype_system_keyword = function () { + var c + c = txt.charAt(cur++) + if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ') { + tok_state = tok_state_before_doctype_system_identifier + return + } + if (c === '"') { + parse_error() + tok_cur_tag.system_identifier = '' + tok_state = tok_state_doctype_system_identifier_double_quoted + return + } + if (c === "'") { + parse_error() + tok_cur_tag.system_identifier = '' + tok_state = tok_state_doctype_system_identifier_single_quoted + return + } + if (c === '>') { + parse_error() + tok_cur_tag.flag('force-quirks', true) + tok_state = tok_state_data + return tok_cur_tag + } + if (c === '') { // EOF + parse_error() + tok_state = tok_state_data + tok_cur_tag.flag('force-quirks', true) + cur -= 1 // reconsume + return tok_cur_tag + } + // Anything else + parse_error() + tok_cur_tag.flag('force-quirks', true) + tok_state = tok_state_bogus_doctype + return null + } + + // 8.2.4.63 http://www.w3.org/TR/html5/syntax.html#before-doctype-system-identifier-state + tok_state_before_doctype_system_identifier = function () { + var c + c = txt.charAt(cur++) + if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ') { + return + } + if (c === '"') { + tok_cur_tag.system_identifier = '' + tok_state = tok_state_doctype_system_identifier_double_quoted + return + } + if (c === "'") { + tok_cur_tag.system_identifier = '' + tok_state = tok_state_doctype_system_identifier_single_quoted + return + } + if (c === '>') { + parse_error() + tok_cur_tag.flag('force-quirks', true) + tok_state = tok_state_data + return tok_cur_tag + } + if (c === '') { // EOF + parse_error() + tok_state = tok_state_data + tok_cur_tag.flag('force-quirks', true) + cur -= 1 // reconsume + return tok_cur_tag + } + // Anything else + parse_error() + tok_cur_tag.flag('force-quirks', true) + tok_state = tok_state_bogus_doctype + return null + } + + // 8.2.4.64 http://www.w3.org/TR/html5/syntax.html#doctype-system-identifier-(double-quoted)-state + tok_state_doctype_system_identifier_double_quoted = function () { + var c + c = txt.charAt(cur++) + if (c === '"') { + tok_state = tok_state_after_doctype_system_identifier + return + } + if (c === "\u0000") { + parse_error() + tok_cur_tag.system_identifier += "\ufffd" + return + } + if (c === '>') { + parse_error() + tok_cur_tag.flag('force-quirks', true) + tok_state = tok_state_data + return tok_cur_tag + } + if (c === '') { // EOF + parse_error() + tok_state = tok_state_data + tok_cur_tag.flag('force-quirks', true) + cur -= 1 // reconsume + return tok_cur_tag + } + // Anything else + tok_cur_tag.system_identifier += c + return null + } + + // 8.2.4.65 http://www.w3.org/TR/html5/syntax.html#doctype-system-identifier-(single-quoted)-state + tok_state_doctype_system_identifier_single_quoted = function () { + var c + c = txt.charAt(cur++) + if (c === "'") { + tok_state = tok_state_after_doctype_system_identifier + return + } + if (c === "\u0000") { + parse_error() + tok_cur_tag.system_identifier += "\ufffd" + return + } + if (c === '>') { + parse_error() + tok_cur_tag.flag('force-quirks', true) + tok_state = tok_state_data + return tok_cur_tag + } + if (c === '') { // EOF + parse_error() + tok_state = tok_state_data + tok_cur_tag.flag('force-quirks', true) + cur -= 1 // reconsume + return tok_cur_tag + } + // Anything else + tok_cur_tag.system_identifier += c + return null + } + + // 8.2.4.66 http://www.w3.org/TR/html5/syntax.html#after-doctype-system-identifier-state + tok_state_after_doctype_system_identifier = function () { + var c + c = txt.charAt(cur++) + if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ') { + return + } + if (c === '>') { + tok_state = tok_state_data + return tok_cur_tag + } + if (c === '') { // EOF + parse_error() + tok_state = tok_state_data + tok_cur_tag.flag('force-quirks', true) + cur -= 1 // reconsume + return tok_cur_tag + } + // Anything else + parse_error() + // do _not_ tok_cur_tag.flag 'force-quirks', true + tok_state = tok_state_bogus_doctype + return null + } + + // 8.2.4.67 http://www.w3.org/TR/html5/syntax.html#bogus-doctype-state + tok_state_bogus_doctype = function () { + var c + c = txt.charAt(cur++) + if (c === '>') { + tok_state = tok_state_data + return tok_cur_tag + } + if (c === '') { // EOF + tok_state = tok_state_data + cur -= 1 // reconsume + return tok_cur_tag + } + // Anything else + return null + } + + // 8.2.4.68 http://www.w3.org/TR/html5/syntax.html#cdata-section-state + tok_state_cdata_section = function () { + var next_gt, val + tok_state = tok_state_data + next_gt = txt.indexOf(']]>', cur) + if (next_gt === -1) { + val = txt.substr(cur) + cur = txt.length + } else { + val = txt.substr(cur, next_gt - cur) + cur = next_gt + 3 + } + val = val.replace(new RegExp("\u0000", 'g'), "\ufffd") + if (val.length > 0) { + return new_character_token(val) // fixfull split + } + return null + } + + // 8.2.4.69 http://www.w3.org/TR/html5/syntax.html#consume-a-character-reference + // Don't set this as a state, just call it + // returns a string (NOT a text node) + parse_character_reference = function (allowed_char, in_attr) { + var base, c, charset, code_point, decoded, i, max, start + if (allowed_char == null) { + allowed_char = null + } + if (in_attr == null) { + in_attr = false + } + if (cur >= txt.length) { + return '&' + } + switch (c = txt.charAt(cur)) { + case "\t": + case "\n": + case "\u000c": + case ' ': + case '<': + case '&': + case '': + case allowed_char: + // explicitly not a parse error + return '&' + break + case ';': + // there has to be "one or more" alnums between & and ; to be a parse error + return '&' + break + case '#': + if (cur + 1 >= txt.length) { + return '&' + } + if (txt.charAt(cur + 1).toLowerCase() === 'x') { + base = 16 + charset = hex_chars + start = cur + 2 + } else { + charset = digits + start = cur + 1 + base = 10 + } + i = 0 + while (start + i < txt.length && charset.indexOf(txt.charAt(start + i)) > -1) { + i += 1 + } + if (i === 0) { + return '&' + } + cur = start + i + if (txt.charAt(start + i) === ';') { + cur += 1 + } else { + parse_error() + } + code_point = txt.substr(start, i) + while (code_point.charAt(0) === '0' && code_point.length > 1) { + code_point = code_point.substr(1) + } + code_point = parseInt(code_point, base) + if (unicode_fixes[code_point] != null) { + parse_error() + return unicode_fixes[code_point] + } else { + if ((code_point >= 0xd800 && code_point <= 0xdfff) || code_point > 0x10ffff) { + parse_error() + return "\ufffd" + } else { + if ((code_point >= 0x0001 && code_point <= 0x0008) || (code_point >= 0x000D && code_point <= 0x001F) || (code_point >= 0x007F && code_point <= 0x009F) || (code_point >= 0xFDD0 && code_point <= 0xFDEF) || code_point === 0x000B || code_point === 0xFFFE || code_point === 0xFFFF || code_point === 0x1FFFE || code_point === 0x1FFFF || code_point === 0x2FFFE || code_point === 0x2FFFF || code_point === 0x3FFFE || code_point === 0x3FFFF || code_point === 0x4FFFE || code_point === 0x4FFFF || code_point === 0x5FFFE || code_point === 0x5FFFF || code_point === 0x6FFFE || code_point === 0x6FFFF || code_point === 0x7FFFE || code_point === 0x7FFFF || code_point === 0x8FFFE || code_point === 0x8FFFF || code_point === 0x9FFFE || code_point === 0x9FFFF || code_point === 0xAFFFE || code_point === 0xAFFFF || code_point === 0xBFFFE || code_point === 0xBFFFF || code_point === 0xCFFFE || code_point === 0xCFFFF || code_point === 0xDFFFE || code_point === 0xDFFFF || code_point === 0xEFFFE || code_point === 0xEFFFF || code_point === 0xFFFFE || code_point === 0xFFFFF || code_point === 0x10FFFE || code_point === 0x10FFFF) { + parse_error() + } + return from_code_point(code_point) + } + } + return + break + default: + for (i = 0; i < 31; ++i) { + if (alnum.indexOf(txt.charAt(cur + i)) === -1) { + break + } + } + if (i === 0) { + // exit early, because parse_error() below needs at least one alnum + return '&' + } + if (txt.charAt(cur + i) === ';') { + decoded = decode_named_char_ref(txt.substr(cur, i)) + i += 1 // scan past the ';' (after, so we dno't pass it to decode) + if (decoded != null) { + cur += i + return decoded + } + // else FALL THROUGH (check for match without last char(s) or ";") + } + // no ';' terminator (only legacy char refs) + max = i + for (i = 2; i <= max; ++i) { // no prefix matches, so ok to check shortest first + c = legacy_char_refs[txt.substr(cur, i)] + if (c != null) { + if (in_attr) { + if (txt.charAt(cur + i) === '=') { + // "because some legacy user agents will + // misinterpret the markup in those cases" + parse_error() + return '&' + } + if (alnum.indexOf(txt.charAt(cur + i)) > -1) { + // this makes attributes forgiving about url args + return '&' + } + } + // ok, and besides the weird exceptions for attributes... + // return the matching char + cur += i // consume entity chars + parse_error() // because no terminating ";" + return c + } + } + parse_error() + return '&' + } + // never reached + } + + eat_next_token_if_newline = function () { + var old_cur, t + old_cur = cur + t = null + while (t == null) { + t = tok_state() + } + if (t.type === TYPE_TEXT) { + // definition of a newline depends on whether it was a character ref or not + if (cur - old_cur === 1) { + // not a character reference + if (t.text === "\u000d" || t.text === "\u000a") { + return + } + } else { + if (t.text === "\u000a") { + return + } + } + } + // not a "newline" + cur = old_cur + } + + // tree constructor initialization + // see comments on TYPE_TAG/etc for the structure of this data + txt = args_html + cur = 0 + doc = new Node(TYPE_TAG, {name: 'document', namespace: NS_HTML}) + doc.flag('quirks mode', QUIRKS_NO) // TODO bugreport spec for not specifying this + fragment_root = null // fragment parsing algorithm returns children of this + open_els = [] + afe = [] // active formatting elements + template_ins_modes = [] + ins_mode = ins_mode_initial + original_ins_mode = ins_mode // TODO check spec + flag_scripting = args.scripting != null ? args.scripting : true // TODO might need an extra flag to get