+// todo remove refs and lens, js, ls
+// run test suite!
+
+// Copyright 2015 Jason Woofenden
+// This file implements an HTML5 parser
+//
+// This program is free software: you can redistribute it and/or modify it under
+// the terms of the GNU Affero General Public License as published by the Free
+// Software Foundation, either version 3 of the License, or (at your option) any
+// later version.
+//
+// This program is distributed in the hope that it will be useful, but WITHOUT
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+// FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
+// details.
+//
+// You should have received a copy of the GNU Affero General Public License
+// along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+
+// This file implements a thorough parser for html5, meant to be used by a
+// WYSIWYG editor.
+
+// The implementation is a pretty direct implementation of the parsing algorithm
+// described here:
+//
+// http://www.w3.org/TR/html5/syntax.html
+//
+// except for some places marked "WHATWG" that are implemented as described here:
+//
+// https://html.spec.whatwg.org/multipage/syntax.html
+//
+// This code passes all of the tests in the .dat files at:
+//
+// https://github.com/JasonWoof/html5lib-tests/tree/patch-1/tree-construction
+
+
+//////////////////////////
+// how to use this code //
+//////////////////////////
+//
+// See README.md for how to run this file in the browser or in node.js.
+//
+// This file exports a single useful function: parse_tml, and some constants
+// (see the bottom of this file for those.)
+//
+// Call it like this:
+//
+// peach_parser.parse("<p><b>hi</p>")
+//
+// Or, if you don't want <html><head><body>/etc, do this:
+//
+// peach_parser.parse("<p><b>hi</p>", {fragment: "body"})
+//
+// return value is an array of Nodes, see "class Node" below.
+
+// This code is a work in progress, eg try search this file for "fixfull",
+// "TODO" and "FIXME"
+
+
+// Notes: stacks/lists
+//
+// Jason was frequently confused by the terminology used to refer to different
+// parts of the stacks and lists in the spec, so he made this chart to help keep
+// his head straight:
+//
+// stacks grow downward (current element is index=0)
+//
+// example: open_els = [a, b, c, d, e, f, g]
+//
+// "grows downwards" means it's visualized like this: (index: el "names")
+//
+// 6: g "start of the list", "topmost", "first"
+// 5: f
+// 4: e "previous" (to d), "above", "before"
+// 3: d (previous/next are relative to this element)
+// 2: c "next", "after", "lower", "below"
+// 1: b
+// 0: a "end of the list", "current node", "bottommost", "last"
+
+if ((typeof module) !== 'undefined' && (module.exports != null)) {
+ context = 'module'
+ exports = module.exports
+} else {
+ context = 'browser'
+ window.peach_parser = {}
+ exports = window.peach_parser
+}
+
+from_code_point = function (x) {
+ if (String.fromCodePoint != null) {
+ return String.fromCodePoint(x)
+ } else {
+ if (x <= 0xffff) {
+ return String.fromCharCode(x)
+ }
+ x -= 0x10000
+ return String.fromCharCode((x >> 10) + 0xd800, (x % 0x400) + 0xdc00)
+ }
+}
+
+// Each node is an obect of the Node class. Here are the Node types:
+TYPE_TAG = 'tag' // name, {attributes}, [children]
+TYPE_TEXT = 'text' // "text"
+TYPE_COMMENT = 'comment'
+TYPE_DOCTYPE = 'doctype'
+// the following types are emited by the tokenizer, but shouldn't end up in the tree:
+TYPE_START_TAG = 4 // name, [attributes ([key,value]...) in reverse order], [children]
+TYPE_END_TAG = 5 // name
+TYPE_EOF = 6
+TYPE_AFE_MARKER = 7 // http://www.w3.org/TR/html5/syntax.html#reconstruct-the-active-formatting-elements
+TYPE_AAA_BOOKMARK = 8 // http://www.w3.org/TR/html5/syntax.html#adoption-agency-algorithm
+
+// namespace constants
+NS_HTML = 'html'
+NS_MATHML = 'mathml'
+NS_SVG = 'svg'
+
+// quirks mode constants
+QUIRKS_NO = 'no'
+QUIRKS_LIMITED = 'limited'
+QUIRKS_YES = 'yes'
+
+// queue up debug logs, so eg they can be shown only for tests that fail
+g_debug_log = []
+debug_log_reset = function () {
+ g_debug_log = []
+}
+debug_log = function (str) {
+ g_debug_log.push(str)
+}
+debug_log_each = function (cb) {
+ var i
+ for (i = 0; i < g_debug_log.length; ++i) {
+ cb(g_debug_log[i])
+ }
+}
+
+prev_node_id = 0
+function Node (type, args) {
+ if (args == null) {
+ args = {}
+ }
+ this.type = type // one of the TYPE_* constants above
+ this.name = args.name != null ? args.name : '' // tag name
+ this.text = args.text != null ? args.text : '' // contents for text/comment nodes
+ this.attrs = args.attrs != null ? args.attrs : {}
+ this.attrs_a = args.attr_k != null ? args.attr_k : [] // attrs in progress, TYPE_START_TAG only
+ this.children = args.children != null ? args.children : []
+ this.namespace = args.namespace != null ? args.namespace : NS_HTML
+ this.parent = args.parent != null ? args.parent : null
+ this.token = args.token != null ? args.token : null
+ this.flags = args.flags != null ? args.flags : {}
+ if (args.id != null) {
+ this.id = args.id + "+"
+ } else {
+ this.id = "" + (++prev_node_id)
+ }
+}
+
+Node.prototype.acknowledge_self_closing = function () {
+ if (this.token != null) {
+ this.token.flag('did_self_close', true)
+ } else {
+ this.flag('did_self_close', true)
+ }
+}
+
+Node.prototype.flag = function (key, value) {
+ if (value != null) {
+ this.flags[key] = value
+ } else {
+ return this.flags[key]
+ }
+}
+
+// helpers: (only take args that are normally known when parser creates nodes)
+new_open_tag = function (name) {
+ return new Node(TYPE_START_TAG, {name: name})
+}
+new_end_tag = function (name) {
+ return new Node(TYPE_END_TAG, {name: name})
+}
+new_element = function (name) {
+ return new Node(TYPE_TAG, {name: name})
+}
+new_text_node = function (txt) {
+ return new Node(TYPE_TEXT, {text: txt})
+}
+new_character_token = new_text_node
+new_comment_token = function (txt) {
+ return new Node(TYPE_COMMENT, {text: txt})
+}
+new_doctype_token = function (name) {
+ return new Node(TYPE_DOCTYPE, {name: name})
+}
+new_eof_token = function () {
+ return new Node(TYPE_EOF)
+}
+new_afe_marker = function () {
+ return new Node(TYPE_AFE_MARKER)
+}
+new_aaa_bookmark = function () {
+ return new Node(TYPE_AAA_BOOKMARK)
+}
+
+lc_alpha = "abcdefghijklmnopqrstuvwxyz"
+uc_alpha = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+digits = "0123456789"
+alnum = lc_alpha + uc_alpha + digits
+hex_chars = digits + "abcdefABCDEF"
+
+is_uc_alpha = function (str) {
+ return str.length === 1 && uc_alpha.indexOf(str) > -1
+}
+is_lc_alpha = function (str) {
+ return str.length === 1 && lc_alpha.indexOf(str) > -1
+}
+
+// some SVG elements have dashes in them
+tag_name_chars = alnum + "-"
+
+// http://www.w3.org/TR/html5/infrastructure.html#space-character
+space_chars = "\u0009\u000a\u000c\u000d\u0020"
+is_space = function (txt) {
+ return txt.length === 1 && space_chars.indexOf(txt) > -1
+}
+is_space_tok = function (t) {
+ return t.type === TYPE_TEXT && t.text.length === 1 && space_chars.indexOf(t.text) > -1
+}
+
+is_input_hidden_tok = function (t) {
+ var i, a
+ if (t.type !== TYPE_START_TAG) {
+ return false
+ }
+ for (i = 0; i < t.attrs_a.length; ++i) {
+ a = t.attrs_a[i]
+ if (a[0] === 'type') {
+ if (a[1].toLowerCase() === 'hidden') {
+ return true
+ }
+ return false
+ }
+ }
+ return false
+}
+
+// https://en.wikipedia.org/wiki/Whitespace_character#Unicode
+whitespace_chars = "\u0009\u000a\u000b\u000c\u000d\u0020\u0085\u00a0\u1680\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200a\u2028\u2029\u202f\u205f\u3000"
+
+unicode_fixes = {}
+unicode_fixes[0x00] = "\uFFFD"
+unicode_fixes[0x80] = "\u20AC"
+unicode_fixes[0x82] = "\u201A"
+unicode_fixes[0x83] = "\u0192"
+unicode_fixes[0x84] = "\u201E"
+unicode_fixes[0x85] = "\u2026"
+unicode_fixes[0x86] = "\u2020"
+unicode_fixes[0x87] = "\u2021"
+unicode_fixes[0x88] = "\u02C6"
+unicode_fixes[0x89] = "\u2030"
+unicode_fixes[0x8A] = "\u0160"
+unicode_fixes[0x8B] = "\u2039"
+unicode_fixes[0x8C] = "\u0152"
+unicode_fixes[0x8E] = "\u017D"
+unicode_fixes[0x91] = "\u2018"
+unicode_fixes[0x92] = "\u2019"
+unicode_fixes[0x93] = "\u201C"
+unicode_fixes[0x94] = "\u201D"
+unicode_fixes[0x95] = "\u2022"
+unicode_fixes[0x96] = "\u2013"
+unicode_fixes[0x97] = "\u2014"
+unicode_fixes[0x98] = "\u02DC"
+unicode_fixes[0x99] = "\u2122"
+unicode_fixes[0x9A] = "\u0161"
+unicode_fixes[0x9B] = "\u203A"
+unicode_fixes[0x9C] = "\u0153"
+unicode_fixes[0x9E] = "\u017E"
+unicode_fixes[0x9F] = "\u0178"
+
+quirks_yes_pi_prefixes = [
+ "+//silmaril//dtd html pro v0r11 19970101//",
+ "-//as//dtd html 3.0 aswedit + extensions//",
+ "-//advasoft ltd//dtd html 3.0 aswedit + extensions//",
+ "-//ietf//dtd html 2.0 level 1//",
+ "-//ietf//dtd html 2.0 level 2//",
+ "-//ietf//dtd html 2.0 strict level 1//",
+ "-//ietf//dtd html 2.0 strict level 2//",
+ "-//ietf//dtd html 2.0 strict//",
+ "-//ietf//dtd html 2.0//",
+ "-//ietf//dtd html 2.1e//",
+ "-//ietf//dtd html 3.0//",
+ "-//ietf//dtd html 3.2 final//",
+ "-//ietf//dtd html 3.2//",
+ "-//ietf//dtd html 3//",
+ "-//ietf//dtd html level 0//",
+ "-//ietf//dtd html level 1//",
+ "-//ietf//dtd html level 2//",
+ "-//ietf//dtd html level 3//",
+ "-//ietf//dtd html strict level 0//",
+ "-//ietf//dtd html strict level 1//",
+ "-//ietf//dtd html strict level 2//",
+ "-//ietf//dtd html strict level 3//",
+ "-//ietf//dtd html strict//",
+ "-//ietf//dtd html//",
+ "-//metrius//dtd metrius presentational//",
+ "-//microsoft//dtd internet explorer 2.0 html strict//",
+ "-//microsoft//dtd internet explorer 2.0 html//",
+ "-//microsoft//dtd internet explorer 2.0 tables//",
+ "-//microsoft//dtd internet explorer 3.0 html strict//",
+ "-//microsoft//dtd internet explorer 3.0 html//",
+ "-//microsoft//dtd internet explorer 3.0 tables//",
+ "-//netscape comm. corp.//dtd html//",
+ "-//netscape comm. corp.//dtd strict html//",
+ "-//o'reilly and associates//dtd html 2.0//",
+ "-//o'reilly and associates//dtd html extended 1.0//",
+ "-//o'reilly and associates//dtd html extended relaxed 1.0//",
+ "-//sq//dtd html 2.0 hotmetal + extensions//",
+ "-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//",
+ "-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//",
+ "-//spyglass//dtd html 2.0 extended//",
+ "-//sun microsystems corp.//dtd hotjava html//",
+ "-//sun microsystems corp.//dtd hotjava strict html//",
+ "-//w3c//dtd html 3 1995-03-24//",
+ "-//w3c//dtd html 3.2 draft//",
+ "-//w3c//dtd html 3.2 final//",
+ "-//w3c//dtd html 3.2//",
+ "-//w3c//dtd html 3.2s draft//",
+ "-//w3c//dtd html 4.0 frameset//",
+ "-//w3c//dtd html 4.0 transitional//",
+ "-//w3c//dtd html experimental 19960712//",
+ "-//w3c//dtd html experimental 970421//",
+ "-//w3c//dtd w3 html//",
+ "-//w3o//dtd w3 html 3.0//",
+ "-//webtechs//dtd mozilla html 2.0//",
+ "-//webtechs//dtd mozilla html//",
+]
+
+// These are the character references that don't need a terminating semicolon
+// min length: 2, max: 6, none are a prefix of any other.
+legacy_char_refs = {
+ Aacute: 'Á', aacute: 'á', Acirc: 'Â', acirc: 'â', acute: '´', AElig: 'Æ',
+ aelig: 'æ', Agrave: 'À', agrave: 'à', AMP: '&', amp: '&', Aring: 'Å',
+ aring: 'å', Atilde: 'Ã', atilde: 'ã', Auml: 'Ä', auml: 'ä', brvbar: '¦',
+ Ccedil: 'Ç', ccedil: 'ç', cedil: '¸', cent: '¢', COPY: '©', copy: '©',
+ curren: '¤', deg: '°', divide: '÷', Eacute: 'É', eacute: 'é', Ecirc: 'Ê',
+ ecirc: 'ê', Egrave: 'È', egrave: 'è', ETH: 'Ð', eth: 'ð', Euml: 'Ë',
+ euml: 'ë', frac12: '½', frac14: '¼', frac34: '¾', GT: '>', gt: '>',
+ Iacute: 'Í', iacute: 'í', Icirc: 'Î', icirc: 'î', iexcl: '¡', Igrave: 'Ì',
+ igrave: 'ì', iquest: '¿', Iuml: 'Ï', iuml: 'ï', laquo: '«', LT: '<',
+ lt: '<', macr: '¯', micro: 'µ', middot: '·', nbsp: "\u00a0", not: '¬',
+ Ntilde: 'Ñ', ntilde: 'ñ', Oacute: 'Ó', oacute: 'ó', Ocirc: 'Ô', ocirc: 'ô',
+ Ograve: 'Ò', ograve: 'ò', ordf: 'ª', ordm: 'º', Oslash: 'Ø', oslash: 'ø',
+ Otilde: 'Õ', otilde: 'õ', Ouml: 'Ö', ouml: 'ö', para: '¶', plusmn: '±',
+ pound: '£', QUOT: '"', quot: '"', raquo: '»', REG: '®', reg: '®', sect: '§',
+ shy: '', sup1: '¹', sup2: '²', sup3: '³', szlig: 'ß', THORN: 'Þ', thorn: 'þ',
+ times: '×', Uacute: 'Ú', uacute: 'ú', Ucirc: 'Û', ucirc: 'û', Ugrave: 'Ù',
+ ugrave: 'ù', uml: '¨', Uuml: 'Ü', uuml: 'ü', Yacute: 'Ý', yacute: 'ý',
+ yen: '¥', yuml: 'ÿ'
+}
+
+//void_elements = ['area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input', 'keygen', 'link', 'meta', 'param', 'source', 'track', 'wbr']
+//raw_text_elements = ['script', 'style']
+//escapable_raw_text_elements = ['textarea', 'title']
+// http://www.w3.org/TR/SVG/ 1.1 (Second Edition)
+svg_elements = [
+ 'a', 'altGlyph', 'altGlyphDef', 'altGlyphItem', 'animate', 'animateColor',
+ 'animateMotion', 'animateTransform', 'circle', 'clipPath', 'color-profile',
+ 'cursor', 'defs', 'desc', 'ellipse', 'feBlend', 'feColorMatrix',
+ 'feComponentTransfer', 'feComposite', 'feConvolveMatrix',
+ 'feDiffuseLighting', 'feDisplacementMap', 'feDistantLight', 'feFlood',
+ 'feFuncA', 'feFuncB', 'feFuncG', 'feFuncR', 'feGaussianBlur', 'feImage',
+ 'feMerge', 'feMergeNode', 'feMorphology', 'feOffset', 'fePointLight',
+ 'feSpecularLighting', 'feSpotLight', 'feTile', 'feTurbulence', 'filter',
+ 'font', 'font-face', 'font-face-format', 'font-face-name', 'font-face-src',
+ 'font-face-uri', 'foreignObject', 'g', 'glyph', 'glyphRef', 'hkern',
+ 'image', 'line', 'linearGradient', 'marker', 'mask', 'metadata',
+ 'missing-glyph', 'mpath', 'path', 'pattern', 'polygon', 'polyline',
+ 'radialGradient', 'rect', 'script', 'set', 'stop', 'style', 'svg',
+ 'switch', 'symbol', 'text', 'textPath', 'title', 'tref', 'tspan', 'use',
+ 'view', 'vkern'
+]
+
+// http://www.w3.org/TR/MathML/ Version 3.0 2nd Edition
+mathml_elements = [
+ 'abs', 'and', 'annotation', 'annotation-xml', 'apply', 'approx', 'arccos',
+ 'arccosh', 'arccot', 'arccoth', 'arccsc', 'arccsch', 'arcsec', 'arcsech',
+ 'arcsin', 'arcsinh', 'arctan', 'arctanh', 'arg', 'bind', 'bvar', 'card',
+ 'cartesianproduct', 'cbytes', 'ceiling', 'cerror', 'ci', 'cn', 'codomain',
+ 'complexes', 'compose', 'condition', 'conjugate', 'cos', 'cosh', 'cot',
+ 'coth', 'cs', 'csc', 'csch', 'csymbol', 'curl', 'declare', 'degree',
+ 'determinant', 'diff', 'divergence', 'divide', 'domain',
+ 'domainofapplication', 'emptyset', 'eq', 'equivalent', 'eulergamma',
+ 'exists', 'exp', 'exponentiale', 'factorial', 'factorof', 'false', 'floor',
+ 'fn', 'forall', 'gcd', 'geq', 'grad', 'gt', 'ident', 'image', 'imaginary',
+ 'imaginaryi', 'implies', 'in', 'infinity', 'int', 'integers', 'intersect',
+ 'interval', 'inverse', 'lambda', 'laplacian', 'lcm', 'leq', 'limit',
+ 'list', 'ln', 'log', 'logbase', 'lowlimit', 'lt', 'maction', 'maligngroup',
+ 'malignmark', 'math', 'matrix', 'matrixrow', 'max', 'mean', 'median',
+ 'menclose', 'merror', 'mfenced', 'mfrac', 'mglyph', 'mi', 'mi', 'min',
+ 'minus', 'mlabeledtr', 'mlongdiv', 'mmultiscripts', 'mn', 'mo', 'mode',
+ 'moment', 'momentabout', 'mover', 'mpadded', 'mphantom', 'mprescripts',
+ 'mroot', 'mrow', 'ms', 'mscarries', 'mscarry', 'msgroup', 'msline',
+ 'mspace', 'msqrt', 'msrow', 'mstack', 'mstyle', 'msub', 'msubsup', 'msup',
+ 'mtable', 'mtd', 'mtext', 'mtr', 'munder', 'munderover', 'naturalnumbers',
+ 'neq', 'none', 'not', 'notanumber', 'notin', 'notprsubset', 'notsubset',
+ 'or', 'otherwise', 'outerproduct', 'partialdiff', 'pi', 'piece',
+ 'piecewise', 'plus', 'power', 'primes', 'product', 'prsubset', 'quotient',
+ 'rationals', 'real', 'reals', 'reln', 'rem', 'root', 'scalarproduct',
+ 'sdev', 'sec', 'sech', 'selector', 'semantics', 'sep', 'set', 'setdiff',
+ 'share', 'sin', 'sinh', 'span', 'subset', 'sum', 'tan', 'tanh', 'tendsto',
+ 'times', 'transpose', 'true', 'union', 'uplimit', 'variance', 'vector',
+ 'vectorproduct', 'xor'
+]
+// foreign_elements = [svg_elements..., mathml_elements...]
+//normal_elements = All other allowed HTML elements are normal elements.
+
+special_elements = {
+ // HTML:
+ address: NS_HTML, applet: NS_HTML, area: NS_HTML, article: NS_HTML,
+ aside: NS_HTML, base: NS_HTML, basefont: NS_HTML, bgsound: NS_HTML,
+ blockquote: NS_HTML, body: NS_HTML, br: NS_HTML, button: NS_HTML,
+ caption: NS_HTML, center: NS_HTML, col: NS_HTML, colgroup: NS_HTML, dd: NS_HTML,
+ details: NS_HTML, dir: NS_HTML, div: NS_HTML, dl: NS_HTML, dt: NS_HTML,
+ embed: NS_HTML, fieldset: NS_HTML, figcaption: NS_HTML, figure: NS_HTML,
+ footer: NS_HTML, form: NS_HTML, frame: NS_HTML, frameset: NS_HTML, h1: NS_HTML,
+ h2: NS_HTML, h3: NS_HTML, h4: NS_HTML, h5: NS_HTML, h6: NS_HTML, head: NS_HTML,
+ header: NS_HTML, hgroup: NS_HTML, hr: NS_HTML, html: NS_HTML, iframe: NS_HTML,
+ img: NS_HTML, input: NS_HTML, isindex: NS_HTML, li: NS_HTML, link: NS_HTML,
+ listing: NS_HTML, main: NS_HTML, marquee: NS_HTML,
+
+ menu: NS_HTML,menuitem: NS_HTML, // WHATWG adds these
+
+ meta: NS_HTML, nav: NS_HTML, noembed: NS_HTML, noframes: NS_HTML,
+ noscript: NS_HTML, object: NS_HTML, ol: NS_HTML, p: NS_HTML, param: NS_HTML,
+ plaintext: NS_HTML, pre: NS_HTML, script: NS_HTML, section: NS_HTML,
+ select: NS_HTML, source: NS_HTML, style: NS_HTML, summary: NS_HTML,
+ table: NS_HTML, tbody: NS_HTML, td: NS_HTML, template: NS_HTML,
+ textarea: NS_HTML, tfoot: NS_HTML, th: NS_HTML, thead: NS_HTML, title: NS_HTML,
+ tr: NS_HTML, track: NS_HTML, ul: NS_HTML, wbr: NS_HTML, xmp: NS_HTML,
+
+ // MathML:
+ mi: NS_MATHML, mo: NS_MATHML, mn: NS_MATHML, ms: NS_MATHML, mtext: NS_MATHML,
+ 'annotation-xml': NS_MATHML,
+
+ // SVG:
+ foreignObject: NS_SVG, desc: NS_SVG, title: NS_SVG
+}
+
+formatting_elements = {
+ a: true, b: true, big: true, code: true, em: true, font: true, i: true,
+ nobr: true, s: true, small: true, strike: true, strong: true, tt: true,
+ u: true
+}
+
+mathml_text_integration = {
+ mi: NS_MATHML, mo: NS_MATHML, mn: NS_MATHML, ms: NS_MATHML, mtext: NS_MATHML
+}
+is_mathml_text_integration_point = function (el) {
+ return mathml_text_integration[el.name] === el.namespace
+}
+is_html_integration = function (el) { // DON'T PASS A TOKEN
+ if (el.namespace === NS_MATHML) {
+ if (el.name === 'annotation-xml') {
+ if (el.attrs.encoding != null) {
+ if (el.attrs.encoding.toLowerCase() === 'text/html') {
+ return true
+ }
+ if (el.attrs.encoding.toLowerCase() === 'application/xhtml+xml') {
+ return true
+ }
+ }
+ }
+ return false
+ }
+ if (el.namespace === NS_SVG) {
+ if (el.name === 'foreignObject' || el.name === 'desc' || el.name === 'title') {
+ return true
+ }
+ }
+ return false
+}
+
+h_tags = {
+ h1: NS_HTML, h2: NS_HTML, h3: NS_HTML, h4: NS_HTML, h5: NS_HTML, h6: NS_HTML
+}
+
+foster_parenting_targets = {
+ table: NS_HTML,
+ tbody: NS_HTML,
+ tfoot: NS_HTML,
+ thead: NS_HTML,
+ tr: NS_HTML
+}
+
+end_tag_implied = {
+ dd: NS_HTML,
+ dt: NS_HTML,
+ li: NS_HTML,
+ option: NS_HTML,
+ optgroup: NS_HTML,
+ p: NS_HTML,
+ rb: NS_HTML,
+ rp: NS_HTML,
+ rt: NS_HTML,
+ rtc: NS_HTML
+}
+
+el_is_special = function (e) {
+ return special_elements[e.name] === e.namespace
+}
+
+adp_els = { address: NS_HTML, div: NS_HTML, p: NS_HTML }
+el_is_special_not_adp = function (el) {
+ return special_elements[el.name] === el.namespace && adp_els[el.name] !== el.namespace
+}
+
+svg_name_fixes = {
+ altglyph: 'altGlyph',
+ altglyphdef: 'altGlyphDef',
+ altglyphitem: 'altGlyphItem',
+ animatecolor: 'animateColor',
+ animatemotion: 'animateMotion',
+ animatetransform: 'animateTransform',
+ clippath: 'clipPath',
+ feblend: 'feBlend',
+ fecolormatrix: 'feColorMatrix',
+ fecomponenttransfer: 'feComponentTransfer',
+ fecomposite: 'feComposite',
+ feconvolvematrix: 'feConvolveMatrix',
+ fediffuselighting: 'feDiffuseLighting',
+ fedisplacementmap: 'feDisplacementMap',
+ fedistantlight: 'feDistantLight',
+ fedropshadow: 'feDropShadow',
+ feflood: 'feFlood',
+ fefunca: 'feFuncA',
+ fefuncb: 'feFuncB',
+ fefuncg: 'feFuncG',
+ fefuncr: 'feFuncR',
+ fegaussianblur: 'feGaussianBlur',
+ feimage: 'feImage',
+ femerge: 'feMerge',
+ femergenode: 'feMergeNode',
+ femorphology: 'feMorphology',
+ feoffset: 'feOffset',
+ fepointlight: 'fePointLight',
+ fespecularlighting: 'feSpecularLighting',
+ fespotlight: 'feSpotLight',
+ fetile: 'feTile',
+ feturbulence: 'feTurbulence',
+ foreignobject: 'foreignObject',
+ glyphref: 'glyphRef',
+ lineargradient: 'linearGradient',
+ radialgradient: 'radialGradient',
+ textpath: 'textPath'
+}
+svg_attribute_fixes = {
+ attributename: 'attributeName',
+ attributetype: 'attributeType',
+ basefrequency: 'baseFrequency',
+ baseprofile: 'baseProfile',
+ calcmode: 'calcMode',
+ clippathunits: 'clipPathUnits',
+ contentscripttype: 'contentScriptType',
+ contentstyletype: 'contentStyleType',
+ diffuseconstant: 'diffuseConstant',
+ edgemode: 'edgeMode',
+ externalresourcesrequired: 'externalResourcesRequired',
+ // WHATWG removes this: filterres: 'filterRes',
+ filterunits: 'filterUnits',
+ glyphref: 'glyphRef',
+ gradienttransform: 'gradientTransform',
+ gradientunits: 'gradientUnits',
+ kernelmatrix: 'kernelMatrix',
+ kernelunitlength: 'kernelUnitLength',
+ keypoints: 'keyPoints',
+ keysplines: 'keySplines',
+ keytimes: 'keyTimes',
+ lengthadjust: 'lengthAdjust',
+ limitingconeangle: 'limitingConeAngle',
+ markerheight: 'markerHeight',
+ markerunits: 'markerUnits',
+ markerwidth: 'markerWidth',
+ maskcontentunits: 'maskContentUnits',
+ maskunits: 'maskUnits',
+ numoctaves: 'numOctaves',
+ pathlength: 'pathLength',
+ patterncontentunits: 'patternContentUnits',
+ patterntransform: 'patternTransform',
+ patternunits: 'patternUnits',
+ pointsatx: 'pointsAtX',
+ pointsaty: 'pointsAtY',
+ pointsatz: 'pointsAtZ',
+ preservealpha: 'preserveAlpha',
+ preserveaspectratio: 'preserveAspectRatio',
+ primitiveunits: 'primitiveUnits',
+ refx: 'refX',
+ refy: 'refY',
+ repeatcount: 'repeatCount',
+ repeatdur: 'repeatDur',
+ requiredextensions: 'requiredExtensions',
+ requiredfeatures: 'requiredFeatures',
+ specularconstant: 'specularConstant',
+ specularexponent: 'specularExponent',
+ spreadmethod: 'spreadMethod',
+ startoffset: 'startOffset',
+ stddeviation: 'stdDeviation',
+ stitchtiles: 'stitchTiles',
+ surfacescale: 'surfaceScale',
+ systemlanguage: 'systemLanguage',
+ tablevalues: 'tableValues',
+ targetx: 'targetX',
+ targety: 'targetY',
+ textlength: 'textLength',
+ viewbox: 'viewBox',
+ viewtarget: 'viewTarget',
+ xchannelselector: 'xChannelSelector',
+ ychannelselector: 'yChannelSelector',
+ zoomandpan: 'zoomAndPan'
+}
+foreign_attr_fixes = {
+ 'xlink:actuate': 'xlink actuate',
+ 'xlink:arcrole': 'xlink arcrole',
+ 'xlink:href': 'xlink href',
+ 'xlink:role': 'xlink role',
+ 'xlink:show': 'xlink show',
+ 'xlink:title': 'xlink title',
+ 'xlink:type': 'xlink type',
+ 'xml:base': 'xml base',
+ 'xml:lang': 'xml lang',
+ 'xml:space': 'xml space',
+ 'xmlns': 'xmlns',
+ 'xmlns:xlink': 'xmlns xlink'
+}
+adjust_mathml_attributes = function (t) {
+ var i, a
+ for (i = 0; i < t.attrs_a.length; ++i) {
+ a = t.attrs_a[i]
+ if (a[0] === 'definitionurl') {
+ a[0] = 'definitionURL'
+ }
+ }
+}
+adjust_svg_attributes = function (t) {
+ var i, a
+ for (i = 0; i < t.attrs_a.length; ++i) {
+ a = t.attrs_a[i]
+ if (svg_attribute_fixes[a[0]] != null) {
+ a[0] = svg_attribute_fixes[a[0]]
+ }
+ }
+}
+adjust_foreign_attributes = function (t) {
+ // fixfull
+ var i, a
+ for (i = 0; i < t.attrs_a.length; ++i) {
+ a = t.attrs_a[i]
+ if (foreign_attr_fixes[a[0]] != null) {
+ a[0] = foreign_attr_fixes[a[0]]
+ }
+ }
+}
+
+// decode_named_char_ref()
+//
+// The list of named character references is _huge_ so if we're running in a
+// browser, we get the browser to decode them, rather than increasing the code
+// size to include the table.
+if (context === 'module') {
+ _decode_named_char_ref = require('./parser_no_browser_helper.js')
+} else {
+ decode_named_char_ref_el = document.createElement('textarea')
+ _decode_named_char_ref = function (txt) {
+ var decoded
+ txt = "&" + txt + ";"
+ decode_named_char_ref_el.innerHTML = txt
+ decoded = decode_named_char_ref_el.value
+ if (decoded === txt) {
+ return null
+ }
+ return decoded
+ }
+}
+// Pass the name of a named entity _that has a terminating semicolon_
+// Entities without terminating semicolons should use legacy_char_refs[]
+// Do not include the "&" or ";" in your argument, eg pass "alpha"
+decode_named_char_ref_cache = {}
+decode_named_char_ref = function (txt) {
+ var decoded
+ decoded = decode_named_char_ref_cache[txt]
+ if (decoded != null) {
+ return decoded
+ }
+ decoded = _decode_named_char_ref(txt)
+ return decode_named_char_ref_cache[txt] = decoded
+}
+
+parse_html = function (args_html, args) {
+ var adjusted_current_node, adjusted_insertion_location, adoption_agency, afe, afe_push, afe_push_marker, button_scopers, clear_afe_to_marker, clear_stack_to_table_body_context, clear_stack_to_table_context, clear_stack_to_table_row_context, clear_to_table_body_stopers, clear_to_table_row_stopers, clear_to_table_stopers, close_p_element, close_p_if_in_button_scope, close_the_cell, context_element, cur, doc, eat_next_token_if_newline, el_is_in_scope, flag_foster_parenting, flag_fragment_parsing, flag_frameset_ok, flag_parsing, flag_scripting, form_element_pointer, fragment_root, generate_implied_end_tags, has_color_face_or_size, head_element_pointer, in_body_any_other_end_tag, in_foreign_content, in_foreign_content_end_script, in_foreign_content_other_start, ins_mode, ins_mode_after_after_body, ins_mode_after_after_frameset, ins_mode_after_body, ins_mode_after_frameset, ins_mode_after_head, ins_mode_after_head_else, ins_mode_before_head, ins_mode_before_html, ins_mode_in_body, ins_mode_in_caption, ins_mode_in_cell, ins_mode_in_column_group, ins_mode_in_frameset, ins_mode_in_head, ins_mode_in_head_else, ins_mode_in_head_noscript, ins_mode_in_head_noscript_else, ins_mode_in_row, ins_mode_in_select, ins_mode_in_select_in_table, ins_mode_in_table, ins_mode_in_table_body, ins_mode_in_table_else, ins_mode_in_table_text, ins_mode_in_template, ins_mode_initial, ins_mode_text, insert_character, insert_comment, insert_foreign_element, insert_html_element, is_appropriate_end_tag, is_in_button_scope, is_in_li_scope, is_in_scope, is_in_scope_x, is_in_scope_x_y, is_in_select_scope, is_in_table_scope, is_quirks_limited_doctype, is_quirks_yes_doctype, li_scopers, open_els, original_ins_mode, parse_character_reference, parse_error, parse_generic_raw_text, parse_generic_rcdata_text, parse_init, parse_main_loop, pending_table_character_tokens, process_token, reconstruct_afe, ref, reset_ins_mode, standard_scopers, stop_parsing, table_scopers, template_ins_modes, template_tag_is_open, temporary_buffer, tok_cur_tag, tok_state, tok_state_after_attribute_name, tok_state_after_attribute_value_quoted, tok_state_after_doctype_name, tok_state_after_doctype_public_identifier, tok_state_after_doctype_public_keyword, tok_state_after_doctype_system_identifier, tok_state_after_doctype_system_keyword, tok_state_attribute_name, tok_state_attribute_value_double_quoted, tok_state_attribute_value_single_quoted, tok_state_attribute_value_unquoted, tok_state_before_attribute_name, tok_state_before_attribute_value, tok_state_before_doctype_name, tok_state_before_doctype_public_identifier, tok_state_before_doctype_system_identifier, tok_state_between_doctype_public_and_system_identifiers, tok_state_bogus_comment, tok_state_bogus_doctype, tok_state_cdata_section, tok_state_comment, tok_state_comment_end, tok_state_comment_end_bang, tok_state_comment_end_dash, tok_state_comment_start, tok_state_comment_start_dash, tok_state_data, tok_state_doctype, tok_state_doctype_name, tok_state_doctype_public_identifier_double_quoted, tok_state_doctype_public_identifier_single_quoted, tok_state_doctype_system_identifier_double_quoted, tok_state_doctype_system_identifier_single_quoted, tok_state_end_tag_open, tok_state_markup_declaration_open, tok_state_plaintext, tok_state_rawtext, tok_state_rawtext_end_tag_name, tok_state_rawtext_end_tag_open, tok_state_rawtext_less_than_sign, tok_state_rcdata, tok_state_rcdata_end_tag_name, tok_state_rcdata_end_tag_open, tok_state_rcdata_less_than_sign, tok_state_script_data, tok_state_script_data_double_escape_end, tok_state_script_data_double_escape_start, tok_state_script_data_double_escaped, tok_state_script_data_double_escaped_dash, tok_state_script_data_double_escaped_dash_dash, tok_state_script_data_double_escaped_less_than_sign, tok_state_script_data_end_tag_name, tok_state_script_data_end_tag_open, tok_state_script_data_escape_start, tok_state_script_data_escape_start_dash, tok_state_script_data_escaped, tok_state_script_data_escaped_dash, tok_state_script_data_escaped_dash_dash, tok_state_script_data_escaped_end_tag_name, tok_state_script_data_escaped_end_tag_open, tok_state_script_data_escaped_less_than_sign, tok_state_script_data_less_than_sign, tok_state_self_closing_start_tag, tok_state_tag_name, tok_state_tag_open, token_to_element, txt
+ if (args == null) {
+ args = {}
+ }
+ txt = null
+ cur = null // index of next char in txt to be parsed
+ // declare doc and tokenizer variables so they're in scope below
+ doc = null
+ open_els = null // stack of open elements
+ afe = null // active formatting elements
+ template_ins_modes = null
+ ins_mode = null
+ original_ins_mode = null
+ tok_state = null
+ tok_cur_tag = null // partially parsed tag
+ flag_scripting = null
+ flag_frameset_ok = null
+ flag_parsing = null
+ flag_foster_parenting = null
+ form_element_pointer = null
+ temporary_buffer = null
+ pending_table_character_tokens = null
+ head_element_pointer = null
+ flag_fragment_parsing = null
+ context_element = null
+
+ stop_parsing = function () {
+ flag_parsing = false
+ }
+
+ parse_error = function () {
+ if (args.error_cb != null) {
+ args.error_cb(cur)
+ }
+ }
+
+ // http://www.w3.org/TR/html5/syntax.html#push-onto-the-list-of-active-formatting-elements
+ // "Noah's Ark clause" but with three
+ afe_push = function (new_el) {
+ var attrs_match, el, i, j, k, len, matches, ref, ref1, v
+ matches = 0
+ for (i = 0; i < afe.length; ++i) {
+ el = afe[i]
+ if (el.type === TYPE_AFE_MARKER) {
+ break
+ }
+ if (el.name === new_el.name && el.namespace === new_el.namespace) {
+ attrs_match = true
+ for (k in el.attrs) {
+ v = el.attrs[k]
+ if (new_el.attrs[k] !== v) {
+ attrs_match = false
+ break
+ }
+ }
+ if (attrs_match) {
+ for (k in new_el.attrs) {
+ v = new_el.attrs[k]
+ if (el.attrs[k] !== v) {
+ attrs_match = false
+ break
+ }
+ }
+ }
+ if (attrs_match) {
+ matches += 1
+ if (matches === 3) {
+ afe.splice(i, 1)
+ break
+ }
+ }
+ }
+ }
+ afe.unshift(new_el)
+ }
+
+ afe_push_marker = function () {
+ afe.unshift(new_afe_marker())
+ }
+
+ // the functions below impliment the Tree Contstruction algorithm
+ // http://www.w3.org/TR/html5/syntax.html#tree-construction
+
+ // But first... the helpers
+ template_tag_is_open = function () {
+ var i, el
+ for (i = 0; i < open_els.length; ++i) {
+ el = open_els[i]
+ if (el.name === 'template' && el.namespace === NS_HTML) {
+ return true
+ }
+ }
+ return false
+ }
+ is_in_scope_x = function (tag_name, scope, namespace) {
+ var i, el
+ for (i = 0; i < open_els.length; ++i) {
+ el = open_els[i]
+ if (el.name === tag_name && (namespace === null || namespace === el.namespace)) {
+ return true
+ }
+ if (scope[el.name] === el.namespace) {
+ return false
+ }
+ }
+ return false
+ }
+ is_in_scope_x_y = function (tag_name, scope, scope2, namespace) {
+ var i, el
+ for (i = 0; i < open_els.length; ++i) {
+ el = open_els[i]
+ if (el.name === tag_name && (namespace === null || namespace === el.namespace)) {
+ return true
+ }
+ if (scope[el.name] === el.namespace) {
+ return false
+ }
+ if (scope2[el.name] === el.namespace) {
+ return false
+ }
+ }
+ return false
+ }
+ standard_scopers = {
+ applet: NS_HTML, caption: NS_HTML, html: NS_HTML, table: NS_HTML,
+ td: NS_HTML, th: NS_HTML, marquee: NS_HTML, object: NS_HTML,
+ template: NS_HTML,
+
+ mi: NS_MATHML, mo: NS_MATHML, mn: NS_MATHML, ms: NS_MATHML,
+ mtext: NS_MATHML, 'annotation-xml': NS_MATHML,
+
+ foreignObject: NS_SVG, desc: NS_SVG, title: NS_SVG
+ }
+ button_scopers = { button: NS_HTML }
+ li_scopers = { ol: NS_HTML, ul: NS_HTML }
+ table_scopers = { html: NS_HTML, table: NS_HTML, template: NS_HTML }
+ is_in_scope = function (tag_name, namespace) {
+ if (namespace == null) {
+ namespace = null
+ }
+ return is_in_scope_x(tag_name, standard_scopers, namespace)
+ }
+ is_in_button_scope = function (tag_name, namespace) {
+ if (namespace == null) {
+ namespace = null
+ }
+ return is_in_scope_x_y(tag_name, standard_scopers, button_scopers, namespace)
+ }
+ is_in_table_scope = function (tag_name, namespace) {
+ if (namespace == null) {
+ namespace = null
+ }
+ return is_in_scope_x(tag_name, table_scopers, namespace)
+ }
+ // aka is_in_list_item_scope
+ is_in_li_scope = function (tag_name, namespace) {
+ if (namespace == null) {
+ namespace = null
+ }
+ return is_in_scope_x_y(tag_name, standard_scopers, li_scopers, namespace)
+ }
+ is_in_select_scope = function (tag_name, namespace) {
+ var i, t
+ if (namespace == null) {
+ namespace = null
+ }
+ for (i = 0; i < open_els.length; ++i) {
+ t = open_els[i]
+ if (t.name === tag_name && (namespace === null || namespace === t.namespace)) {
+ return true
+ }
+ if (t.namespace !== NS_HTML && t.name !== 'optgroup' && t.name !== 'option') {
+ return false
+ }
+ }
+ return false
+ }
+ // this checks for a particular element, not by name
+ // this requires a namespace match
+ el_is_in_scope = function (needle) {
+ var i
+ for (i = 0; i < open_els.length; ++i) {
+ el = open_els[i]
+ if (el === needle) {
+ return true
+ }
+ if (standard_scopers[el.name] === el.namespace) {
+ return false
+ }
+ }
+ return false
+ }
+
+ clear_to_table_stopers = {
+ 'table': true,
+ 'template': true,
+ 'html': true
+ }
+ clear_stack_to_table_context = function () {
+ while (true) {
+ if (clear_to_table_stopers[open_els[0].name] != null) {
+ break
+ }
+ open_els.shift()
+ }
+ }
+ clear_to_table_body_stopers = {
+ tbody: NS_HTML,
+ tfoot: NS_HTML,
+ thead: NS_HTML,
+ template: NS_HTML,
+ html: NS_HTML
+ }
+ clear_stack_to_table_body_context = function () {
+ while (true) {
+ if (clear_to_table_body_stopers[open_els[0].name] === open_els[0].namespace) {
+ break
+ }
+ open_els.shift()
+ }
+ }
+ clear_to_table_row_stopers = {
+ 'tr': true,
+ 'template': true,
+ 'html': true
+ }
+ clear_stack_to_table_row_context = function () {
+ while (true) {
+ if (clear_to_table_row_stopers[open_els[0].name] != null) {
+ break
+ }
+ open_els.shift()
+ }
+ }
+ clear_afe_to_marker = function () {
+ var el
+ while (true) {
+ if (!(afe.length > 0)) { // this happens in fragment case, ?spec error
+ return
+ }
+ el = afe.shift()
+ if (el.type === TYPE_AFE_MARKER) {
+ return
+ }
+ }
+ }
+
+ // 8.2.3.1 ...
+ // http://www.w3.org/TR/html5/syntax.html#reset-the-insertion-mode-appropriately
+ reset_ins_mode = function () {
+ var ancestor, ancestor_i, last, node, node_i
+ // 1. Let last be false.
+ last = false
+ // 2. Let node be the last node in the stack of open elements.
+ node_i = 0
+ node = open_els[node_i]
+ // 3. Loop: If node is the first node in the stack of open elements,
+ // then set last to true, and, if the parser was originally created as
+ // part of the HTML fragment parsing algorithm (fragment case) set node
+ // to the context element.
+ while (true) {
+ if (node_i === open_els.length - 1) {
+ last = true
+ if (flag_fragment_parsing) {
+ node = context_element
+ }
+ }
+ // 4. If node is a select element, run these substeps:
+ if (node.name === 'select' && node.namespace === NS_HTML) {
+ // 1. If last is true, jump to the step below labeled done.
+ if (!last) {
+ // 2. Let ancestor be node.
+ ancestor_i = node_i
+ ancestor = node
+ // 3. Loop: If ancestor is the first node in the stack of
+ // open elements, jump to the step below labeled done.
+ while (true) {
+ if (ancestor_i === open_els.length - 1) {
+ break
+ }
+ // 4. Let ancestor be the node before ancestor in the stack
+ // of open elements.
+ ancestor_i += 1
+ ancestor = open_els[ancestor_i]
+ // 5. If ancestor is a template node, jump to the step below
+ // labeled done.
+ if (ancestor.name === 'template' && ancestor.namespace === NS_HTML) {
+ break
+ }
+ // 6. If ancestor is a table node, switch the insertion mode
+ // to "in select in table" and abort these steps.
+ if (ancestor.name === 'table' && ancestor.namespace === NS_HTML) {
+ ins_mode = ins_mode_in_select_in_table
+ return
+ }
+ // 7. Jump back to the step labeled loop.
+ }
+ }
+ // 8. Done: Switch the insertion mode to "in select" and abort
+ // these steps.
+ ins_mode = ins_mode_in_select
+ return
+ }
+ // 5. If node is a td or th element and last is false, then switch
+ // the insertion mode to "in cell" and abort these steps.
+ if ((node.name === 'td' || node.name === 'th') && node.namespace === NS_HTML && last === false) {
+ ins_mode = ins_mode_in_cell
+ return
+ }
+ // 6. If node is a tr element, then switch the insertion mode to "in
+ // row" and abort these steps.
+ if (node.name === 'tr' && node.namespace === NS_HTML) {
+ ins_mode = ins_mode_in_row
+ return
+ }
+ // 7. If node is a tbody, thead, or tfoot element, then switch the
+ // insertion mode to "in table body" and abort these steps.
+ if ((node.name === 'tbody' || node.name === 'thead' || node.name === 'tfoot') && node.namespace === NS_HTML) {
+ ins_mode = ins_mode_in_table_body
+ return
+ }
+ // 8. If node is a caption element, then switch the insertion mode
+ // to "in caption" and abort these steps.
+ if (node.name === 'caption' && node.namespace === NS_HTML) {
+ ins_mode = ins_mode_in_caption
+ return
+ }
+ // 9. If node is a colgroup element, then switch the insertion mode
+ // to "in column group" and abort these steps.
+ if (node.name === 'colgroup' && node.namespace === NS_HTML) {
+ ins_mode = ins_mode_in_column_group
+ return
+ }
+ // 10. If node is a table element, then switch the insertion mode to
+ // "in table" and abort these steps.
+ if (node.name === 'table' && node.namespace === NS_HTML) {
+ ins_mode = ins_mode_in_table
+ return
+ }
+ // 11. If node is a template element, then switch the insertion mode
+ // to the current template insertion mode and abort these steps.
+ if (node.name === 'template' && node.namespace === NS_HTML) {
+ ins_mode = template_ins_modes[0]
+ return
+ }
+ // 12. If node is a head element and last is true, then switch the
+ // insertion mode to "in body" ("in body"! not "in head"!) and abort
+ // these steps. (fragment case)
+ if (node.name === 'head' && node.namespace === NS_HTML && last) {
+ ins_mode = ins_mode_in_body
+ return
+ }
+ // 13. If node is a head element and last is false, then switch the
+ // insertion mode to "in head" and abort these steps.
+ if (node.name === 'head' && node.namespace === NS_HTML && last === false) {
+ ins_mode = ins_mode_in_head
+ return
+ }
+ // 14. If node is a body element, then switch the insertion mode to
+ // "in body" and abort these steps.
+ if (node.name === 'body' && node.namespace === NS_HTML) {
+ ins_mode = ins_mode_in_body
+ return
+ }
+ // 15. If node is a frameset element, then switch the insertion mode
+ // to "in frameset" and abort these steps. (fragment case)
+ if (node.name === 'frameset' && node.namespace === NS_HTML) {
+ ins_mode = ins_mode_in_frameset
+ return
+ }
+ // 16. If node is an html element, run these substeps:
+ if (node.name === 'html' && node.namespace === NS_HTML) {
+ // 1. If the head element pointer is null, switch the insertion
+ // mode to "before head" and abort these steps. (fragment case)
+ if (head_element_pointer === null) {
+ ins_mode = ins_mode_before_head
+ } else {
+ // 2. Otherwise, the head element pointer is not null,
+ // switch the insertion mode to "after head" and abort these
+ // steps.
+ ins_mode = ins_mode_after_head
+ }
+ return
+ }
+ // 17. If last is true, then switch the insertion mode to "in body"
+ // and abort these steps. (fragment case)
+ if (last) {
+ ins_mode = ins_mode_in_body
+ return
+ }
+ // 18. Let node now be the node before node in the stack of open
+ // elements.
+ node_i += 1
+ node = open_els[node_i]
+ // 19. Return to the step labeled loop.
+ }
+ }
+
+ // 8.2.3.2
+
+ // http://www.w3.org/TR/html5/syntax.html#adjusted-current-node
+ adjusted_current_node = function () {
+ if (open_els.length === 1 && flag_fragment_parsing) {
+ return context_element
+ }
+ return open_els[0]
+ }
+
+ // http://www.w3.org/TR/html5/syntax.html#reconstruct-the-active-formatting-elements
+ // this implementation is structured (mostly) as described at the link above.
+ // capitalized comments are the "labels" described at the link above.
+ reconstruct_afe = function () {
+ var el, i
+ if (afe.length === 0) {
+ return
+ }
+ if (afe[0].type === TYPE_AFE_MARKER || open_els.indexOf(afe[0]) >= 0) {
+ return
+ }
+ // Rewind
+ i = 0
+ while (true) {
+ if (i === afe.length - 1) {
+ break
+ }
+ i += 1
+ if (afe[i].type === TYPE_AFE_MARKER || open_els.indexOf(afe[i]) >= 0) {
+ i -= 1 // Advance
+ break
+ }
+ }
+ // Create
+ while (true) {
+ el = insert_html_element(afe[i].token)
+ afe[i] = el
+ if (i === 0) {
+ break
+ }
+ i -= 1 // Advance
+ }
+ }
+
+ // http://www.w3.org/TR/html5/syntax.html#adoption-agency-algorithm
+ // adoption agency algorithm
+ // overview here:
+ // http://www.w3.org/TR/html5/syntax.html#misnested-tags:-b-i-/b-/i
+ // http://www.w3.org/TR/html5/syntax.html#misnested-tags:-b-p-/b-/p
+ // http://www.w3.org/TR/html5/syntax.html#unclosed-formatting-elements
+ adoption_agency = function (subject) {
+ var aa, ab, ac, ad, ae, af, bookmark, c, ca, dest, el, fb, fb_of_open_els, fe, fe_of_afe, fe_of_open_els, i, in_afe, in_open_els, inner, j, l, last_node, len, len1, len10, len11, len12, len13, len14, len15, len16, len17, len2, len3, len4, len5, len6, len7, len8, len9, m, n, new_node, node, node_above, node_in_afe, node_next, o, outer, q, r, ref, ref1, s, t, u, w, y, z
+// this block implements tha W3C spec
+// # 1. If the current node is an HTML element whose tag name is subject,
+// # then run these substeps:
+// #
+// # 1. Let element be the current node.
+// #
+// # 2. Pop element off the stack of open elements.
+// #
+// # 3. If element is also in the list of active formatting elements,
+// # remove the element from the list.
+// #
+// # 4. Abort the adoption agency algorithm.
+// if open_els[0].name is subject and open_els[0].namespace is NS_HTML
+// el = open_els.shift()
+// # remove it from the list of active formatting elements (if found)
+// for t, i in afe
+// if t is el
+// afe.splice i, 1
+// break
+// return
+// WHATWG: https://html.spec.whatwg.org/multipage/syntax.html#adoption-agency-algorithm
+ // If the current node is an HTML element whose tag name is subject, and
+ // the current node is not in the list of active formatting elements,
+ // then pop the current node off the stack of open elements, and abort
+ // these steps.
+ if (open_els[0].name === subject && open_els[0].namespace === NS_HTML) {
+ // remove it from the list of active formatting elements (if found)
+ in_afe = false
+ for (i = 0; i < afe.length; ++i) {
+ el = afe[i]
+ if (el === open_els[0]) {
+ in_afe = true
+ break
+ }
+ }
+ if (!in_afe) {
+ open_els.shift()
+ return
+ }
+ // fall through
+ }
+// END WHATWG
+ outer = 0
+ while (true) {
+ if (outer >= 8) {
+ return
+ }
+ outer += 1
+ // 5. Let formatting element be the last element in the list of
+ // active formatting elements that: is between the end of the list
+ // and the last scope marker in the list, if any, or the start of
+ // the list otherwise, and has the tag name subject.
+ fe = null
+ for (fe_of_afe = 0; fe_of_afe < afe.length; ++fe_of_afe) {
+ t = afe[fe_of_afe]
+ if (t.type === TYPE_AFE_MARKER) {
+ break
+ }
+ if (t.name === subject) {
+ fe = t
+ break
+ }
+ }
+ // If there is no such element, then abort these steps and instead
+ // act as described in the "any other end tag" entry above.
+ if (fe === null) {
+ in_body_any_other_end_tag(subject)
+ return
+ }
+ // 6. If formatting element is not in the stack of open elements,
+ // then this is a parse error; remove the element from the list, and
+ // abort these steps.
+ in_open_els = false
+ for (fe_of_open_els = 0; fe_of_open_els < open_els.length; ++fe_of_open_els) {
+ t = open_els[fe_of_open_els]
+ if (t === fe) {
+ in_open_els = true
+ break
+ }
+ }
+ if (!in_open_els) {
+ parse_error()
+ // "remove it from the list" must mean afe, since it's not in open_els
+ afe.splice(fe_of_afe, 1)
+ return
+ }
+ // 7. If formatting element is in the stack of open elements, but
+ // the element is not in scope, then this is a parse error; abort
+ // these steps.
+ if (!el_is_in_scope(fe)) {
+ parse_error()
+ return
+ }
+ // 8. If formatting element is not the current node, this is a parse
+ // error. (But do not abort these steps.)
+ if (open_els[0] !== fe) {
+ parse_error()
+ // continue
+ }
+ // 9. Let furthest block be the topmost node in the stack of open
+ // elements that is lower in the stack than formatting element, and
+ // is an element in the special category. There might not be one.
+ fb = null
+ fb_of_open_els = null
+ for (i = 0; i < open_els.length; ++i) {
+ t = open_els[i]
+ if (t === fe) {
+ break
+ }
+ if (el_is_special(t)) {
+ fb = t
+ fb_of_open_els = i
+ // and continue, to see if there's one that's more "topmost"
+ }
+ }
+ // 10. If there is no furthest block, then the UA must first pop all
+ // the nodes from the bottom of the stack of open elements, from the
+ // current node up to and including formatting element, then remove
+ // formatting element from the list of active formatting elements,
+ // and finally abort these steps.
+ if (fb === null) {
+ while (true) {
+ t = open_els.shift()
+ if (t === fe) {
+ afe.splice(fe_of_afe, 1)
+ return
+ }
+ }
+ }
+ // 11. Let common ancestor be the element immediately above
+ // formatting element in the stack of open elements.
+ ca = open_els[fe_of_open_els + 1] // common ancestor
+
+ node_above = open_els[fb_of_open_els + 1] // next node if node isn't in open_els anymore
+ // 12. Let a bookmark note the position of formatting element in the list of active formatting elements relative to the elements on either side of it in the list.
+ bookmark = new_aaa_bookmark()
+ for (i = 0; i < afe.length; ++i) {
+ t = afe[i]
+ if (t === fe) {
+ afe.splice(i, 0, bookmark)
+ break
+ }
+ }
+ node = last_node = fb
+ inner = 0
+ while (true) {
+ inner += 1
+ // 3. Let node be the element immediately above node in the
+ // stack of open elements, or if node is no longer in the stack
+ // of open elements (e.g. because it got removed by this
+ // algorithm), the element that was immediately above node in
+ // the stack of open elements before node was removed.
+ node_next = null
+ for (i = 0; i < open_els.length; ++i) {
+ t = open_els[i]
+ if (t === node) {
+ node_next = open_els[i + 1]
+ break
+ }
+ }
+ node = node_next != null ? node_next : node_above
+ // TODO make sure node_above gets re-set if/when node is removed from open_els
+
+ // 4. If node is formatting element, then go to the next step in
+ // the overall algorithm.
+ if (node === fe) {
+ break
+ }
+ // 5. If inner loop counter is greater than three and node is in
+ // the list of active formatting elements, then remove node from
+ // the list of active formatting elements.
+ node_in_afe = false
+ for (i = 0; i < afe.length; ++i) {
+ t = afe[i]
+ if (t === node) {
+ if (inner > 3) {
+ afe.splice(i, 1)
+ } else {
+ node_in_afe = true
+ }
+ break
+ }
+ }
+ // 6. If node is not in the list of active formatting elements,
+ // then remove node from the stack of open elements and then go
+ // back to the step labeled inner loop.
+ if (!node_in_afe) {
+ for (i = 0; i < open_els.length; ++i) {
+ t = open_els[i]
+ if (t === node) {
+ node_above = open_els[i + 1]
+ open_els.splice(i, 1)
+ break
+ }
+ }
+ continue
+ }
+ // 7. create an element for the token for which the element node
+ // was created, in the HTML namespace, with common ancestor as
+ // the intended parent; replace the entry for node in the list
+ // of active formatting elements with an entry for the new
+ // element, replace the entry for node in the stack of open
+ // elements with an entry for the new element, and let node be
+ // the new element.
+ new_node = token_to_element(node.token, NS_HTML, ca)
+ for (i = 0; i < afe.length; ++i) {
+ t = afe[i]
+ if (t === node) {
+ afe[i] = new_node
+ break
+ }
+ }
+ for (i = 0; i < open_els.length; ++i) {
+ t = open_els[i]
+ if (t === node) {
+ node_above = open_els[i + 1]
+ open_els[i] = new_node
+ break
+ }
+ }
+ node = new_node
+ // 8. If last node is furthest block, then move the
+ // aforementioned bookmark to be immediately after the new node
+ // in the list of active formatting elements.
+ if (last_node === fb) {
+ for (i = 0; i < afe.length; ++i) {
+ t = afe[i]
+ if (t === bookmark) {
+ afe.splice(i, 1)
+ break
+ }
+ }
+ for (i = 0; i < afe.length; ++i) {
+ t = afe[i]
+ if (t === node) {
+ // "after" means lower
+ afe.splice(i, 0, bookmark) // "after as <-
+ break
+ }
+ }
+ }
+ // 9. Insert last node into node, first removing it from its
+ // previous parent node if any.
+ if (last_node.parent != null) {
+ for (i = 0; i < last_node.parent.children.length; ++i) {
+ c = last_node.parent.children[i]
+ if (c === last_node) {
+ last_node.parent.children.splice(i, 1)
+ break
+ }
+ }
+ }
+ node.children.push(last_node)
+ last_node.parent = node
+ // 10. Let last node be node.
+ last_node = node
+ // 11. Return to the step labeled inner loop.
+ }
+ // 14. Insert whatever last node ended up being in the previous step
+ // at the appropriate place for inserting a node, but using common
+ // ancestor as the override target.
+
+ // In the case where fe is immediately followed by fb:
+ // * inner loop exits out early (node==fe)
+ // * last_node is fb
+ // * last_node is still in the tree (not a duplicate)
+ if (last_node.parent != null) {
+ for (i = 0; i < last_node.parent.children.length; ++i) {
+ c = last_node.parent.children[i]
+ if (c === last_node) {
+ last_node.parent.children.splice(i, 1)
+ break
+ }
+ }
+ }
+ // can't use standard insert token thing, because it's already in
+ // open_els and must stay at it's current position in open_els
+ dest = adjusted_insertion_location(ca)
+ dest[0].children.splice(dest[1], 0, last_node)
+ last_node.parent = dest[0]
+ // 15. Create an element for the token for which formatting element
+ // was created, in the HTML namespace, with furthest block as the
+ // intended parent.
+ new_element = token_to_element(fe.token, NS_HTML, fb)
+ // 16. Take all of the child nodes of furthest block and append them
+ // to the element created in the last step.
+ while (fb.children.length) {
+ t = fb.children.shift()
+ t.parent = new_element
+ new_element.children.push(t)
+ }
+ // 17. Append that new element to furthest block.
+ new_element.parent = fb
+ fb.children.push(new_element)
+ // 18. Remove formatting element from the list of active formatting
+ // elements, and insert the new element into the list of active
+ // formatting elements at the position of the aforementioned
+ // bookmark.
+ for (i = 0; i < afe.length; ++i) {
+ t = afe[i]
+ if (t === fe) {
+ afe.splice(i, 1)
+ break
+ }
+ }
+ for (i = 0; i < afe.length; ++i) {
+ t = afe[i]
+ if (t === bookmark) {
+ afe[i] = new_element
+ break
+ }
+ }
+ // 19. Remove formatting element from the stack of open elements,
+ // and insert the new element into the stack of open elements
+ // immediately below the position of furthest block in that stack.
+ for (i = 0; i < open_els.length; ++i) {
+ t = open_els[i]
+ if (t === fe) {
+ open_els.splice(i, 1)
+ break
+ }
+ }
+ for (i = 0; i < open_els.length; ++i) {
+ t = open_els[i]
+ if (t === fb) {
+ open_els.splice(i, 0, new_element)
+ break
+ }
+ }
+ // 20. Jump back to the step labeled outer loop.
+ }
+ }
+
+ // http://www.w3.org/TR/html5/syntax.html#close-a-p-element
+ close_p_element = function () {
+ generate_implied_end_tags('p') // arg is exception
+ if (!(open_els[0].name === 'p' && open_els[0].namespace === NS_HTML)) {
+ parse_error()
+ }
+ while (open_els.length > 1) { // just in case
+ el = open_els.shift()
+ if (el.name === 'p' && el.namespace === NS_HTML) {
+ return
+ }
+ }
+ }
+ close_p_if_in_button_scope = function () {
+ if (is_in_button_scope('p', NS_HTML)) {
+ close_p_element()
+ }
+ }
+
+ // http://www.w3.org/TR/html5/syntax.html#insert-a-character
+ // aka insert_a_character = function (t) {
+ insert_character = function (t) {
+ var dest, prev
+ dest = adjusted_insertion_location()
+ // fixfull check for Document node
+ if (dest[1] > 0) {
+ prev = dest[0].children[dest[1] - 1]
+ if (prev.type === TYPE_TEXT) {
+ prev.text += t.text
+ return
+ }
+ }
+ dest[0].children.splice(dest[1], 0, t)
+ t.parent = dest[0]
+ }
+
+ // 8.2.5 http://www.w3.org/TR/html5/syntax.html#tree-construction
+ process_token = function (t) {
+ var acn
+ acn = adjusted_current_node()
+ if (acn == null) {
+ ins_mode(t)
+ return
+ }
+ if (acn.namespace === NS_HTML) {
+ ins_mode(t)
+ return
+ }
+ if (is_mathml_text_integration_point(acn)) {
+ if (t.type === TYPE_START_TAG && !(t.name === 'mglyph' || t.name === 'malignmark')) {
+ ins_mode(t)
+ return
+ }
+ if (t.type === TYPE_TEXT) {
+ ins_mode(t)
+ return
+ }
+ }
+ if (acn.namespace === NS_MATHML && acn.name === 'annotation-xml' && t.type === TYPE_START_TAG && t.name === 'svg') {
+ ins_mode(t)
+ return
+ }
+ if (is_html_integration(acn)) {
+ if (t.type === TYPE_START_TAG || t.type === TYPE_TEXT) {
+ ins_mode(t)
+ return
+ }
+ }
+ if (t.type === TYPE_EOF) {
+ ins_mode(t)
+ return
+ }
+ in_foreign_content(t)
+ }
+
+ // 8.2.5.1
+ // http://www.w3.org/TR/html5/syntax.html#creating-and-inserting-nodes
+ // http://www.w3.org/TR/html5/syntax.html#appropriate-place-for-inserting-a-node
+ adjusted_insertion_location = function (override_target) {
+ var c, el, i, j, l, last_table, last_table_i, last_template, last_template_i, len, len1, len2, m, previous_element, ref, target, target_i
+ // 1. If there was an override target specified, then let target be the
+ // override target.
+ if (override_target != null) {
+ target = override_target
+ } else { // Otherwise, let target be the current node.
+ target = open_els[0]
+ }
+ // 2. Determine the adjusted insertion location using the first matching
+ // steps from the following list:
+ //
+ // If foster parenting is enabled and target is a table, tbody, tfoot,
+ // thead, or tr element Foster parenting happens when content is
+ // misnested in tables.
+ if (flag_foster_parenting && foster_parenting_targets[target.name] === target.namespace) {
+ while (true) { // once. this is here so we can ``break`` to "abort these substeps"
+ // 1. Let last template be the last template element in the
+ // stack of open elements, if any.
+ last_template = null
+ last_template_i = null
+ for (i = 0; i < open_els.length; ++i) {
+ el = open_els[i]
+ if (el.name === 'template' && el.namespace === NS_HTML) {
+ last_template = el
+ last_template_i = i
+ break
+ }
+ }
+ // 2. Let last table be the last table element in the stack of
+ // open elements, if any.
+ last_table = null
+ last_table_i
+ for (i = 0; i < open_els.length; ++i) {
+ el = open_els[i]
+ if (el.name === 'table' && el.namespace === NS_HTML) {
+ last_table = el
+ last_table_i = i
+ break
+ }
+ }
+ // 3. If there is a last template and either there is no last
+ // table, or there is one, but last template is lower (more
+ // recently added) than last table in the stack of open
+ // elements, then: let adjusted insertion location be inside
+ // last template's template contents, after its last child (if
+ // any), and abort these substeps.
+ if (last_template && (last_table === null || last_template_i < last_table_i)) {
+ target = last_template // fixfull should be it's contents
+ target_i = target.children.length
+ break
+ }
+ // 4. If there is no last table, then let adjusted insertion
+ // location be inside the first element in the stack of open
+ // elements (the html element), after its last child (if any),
+ // and abort these substeps. (fragment case)
+ if (last_table === null) {
+ // this is odd
+ target = open_els[open_els.length - 1]
+ target_i = target.children.length
+ break
+ }
+ // 5. If last table has a parent element, then let adjusted
+ // insertion location be inside last table's parent element,
+ // immediately before last table, and abort these substeps.
+ if (last_table.parent != null) {
+ for (i = 0; i < last_table.parent.children.length; ++i) {
+ c = last_table.parent.children[i]
+ if (c === last_table) {
+ target = last_table.parent
+ target_i = i
+ break
+ }
+ }
+ break
+ }
+ // 6. Let previous element be the element immediately above last
+ // table in the stack of open elements.
+ //
+ // huh? how could it not have a parent?
+ previous_element = open_els[last_table_i + 1]
+ // 7. Let adjusted insertion location be inside previous
+ // element, after its last child (if any).
+ target = previous_element
+ target_i = target.children.length
+ // Note: These steps are involved in part because it's possible
+ // for elements, the table element in this case in particular,
+ // to have been moved by a script around in the DOM, or indeed
+ // removed from the DOM entirely, after the element was inserted
+ // by the parser.
+ break // don't really loop
+ }
+ } else {
+ // Otherwise Let adjusted insertion location be inside target, after
+ // its last child (if any).
+ target_i = target.children.length
+ }
+
+ // 3. If the adjusted insertion location is inside a template element,
+ // let it instead be inside the template element's template contents,
+ // after its last child (if any).
+ // fixfull (template)
+
+ // 4. Return the adjusted insertion location.
+ return [target, target_i]
+ }
+
+ // http://www.w3.org/TR/html5/syntax.html#create-an-element-for-the-token
+ // aka create_an_element_for_token
+ token_to_element = function (t, namespace, intended_parent) {
+ var a, attrs, el, i
+ // convert attributes into a hash
+ attrs = {}
+ for (i = 0; i < t.attrs_a.length; ++i) {
+ a = t.attrs_a[i]
+ attrs[a[0]] = a[1] // TODO check what to do with dupilcate attrs
+ }
+ el = new Node(TYPE_TAG, {name: t.name, namespace: namespace, attrs: attrs, token: t})
+
+ // TODO 2. If the newly created element has an xmlns attribute in the
+ // XMLNS namespace whose value is not exactly the same as the element's
+ // namespace, that is a parse error. Similarly, if the newly created
+ // element has an xmlns:xlink attribute in the XMLNS namespace whose
+ // value is not the XLink Namespace, that is a parse error.
+
+ // fixfull: the spec says stuff about form pointers and ownerDocument
+
+ return el
+ }
+
+ // http://www.w3.org/TR/html5/syntax.html#insert-a-foreign-element
+ insert_foreign_element = function (token, namespace) {
+ var ail, ail_el, ail_i, el
+ ail = adjusted_insertion_location()
+ ail_el = ail[0]
+ ail_i = ail[1]
+ el = token_to_element(token, namespace, ail_el)
+ // TODO skip this next step if it's broken (eg ail_el is document with child already)
+ el.parent = ail_el
+ ail_el.children.splice(ail_i, 0, el)
+ open_els.unshift(el)
+ return el
+ }
+ // http://www.w3.org/TR/html5/syntax.html#insert-an-html-element
+ insert_html_element = function (token) {
+ return insert_foreign_element(token, NS_HTML)
+ }
+
+ // http://www.w3.org/TR/html5/syntax.html#insert-a-comment
+ // position should be [node, index_within_children]
+ insert_comment = function (t, position) {
+ if (position == null) {
+ position = adjusted_insertion_location()
+ }
+ position[0].children.splice(position[1], 0, t)
+ return
+ }
+
+ // 8.2.5.2
+ // http://www.w3.org/TR/html5/syntax.html#generic-raw-text-element-parsing-algorithm
+ parse_generic_raw_text = function (t) {
+ insert_html_element(t)
+ tok_state = tok_state_rawtext
+ original_ins_mode = ins_mode
+ ins_mode = ins_mode_text
+ }
+ parse_generic_rcdata_text = function (t) {
+ insert_html_element(t)
+ tok_state = tok_state_rcdata
+ original_ins_mode = ins_mode
+ ins_mode = ins_mode_text
+ }
+
+ // 8.2.5.3 http://www.w3.org/TR/html5/syntax.html#closing-elements-that-have-implied-end-tags
+ // http://www.w3.org/TR/html5/syntax.html#generate-implied-end-tags
+ generate_implied_end_tags = function (except) {
+ if (except == null) {
+ except = null
+ }
+ while (end_tag_implied[open_els[0].name] === open_els[0].namespace && open_els[0].name !== except) {
+ open_els.shift()
+ }
+ }
+
+ // 8.2.5.4 The rules for parsing tokens in HTML content
+ // http://www.w3.org/TR/html5/syntax.html#parsing-main-inhtml
+
+ // 8.2.5.4.1 The "initial" insertion mode
+ // http://www.w3.org/TR/html5/syntax.html#the-initial-insertion-mode
+ is_quirks_yes_doctype = function (t) {
+ var i, p, pi
+ if (t.flag('force-quirks')) {
+ return true
+ }
+ if (t.name !== 'html') {
+ return true
+ }
+ if (t.public_identifier != null) {
+ pi = t.public_identifier.toLowerCase()
+ for (i = 0; i < quirks_yes_pi_prefixes.length; ++i) {
+ p = quirks_yes_pi_prefixes[i]
+ if (pi.substr(0, p.length) === p) {
+ return true
+ }
+ }
+ if (pi === '-//w3o//dtd w3 html strict 3.0//en//' || pi === '-/w3c/dtd html 4.0 transitional/en' || pi === 'html') {
+ return true
+ }
+ }
+ if (t.system_identifier != null) {
+ if (t.system_identifier.toLowerCase() === 'http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd') {
+ return true
+ }
+ } else if (t.public_identifier != null) {
+ // already did this: pi = t.public_identifier.toLowerCase()
+ if (pi.substr(0, 32) === '-//w3c//dtd html 4.01 frameset//' || pi.substr(0, 36) === '-//w3c//dtd html 4.01 transitional//') {
+ return true
+ }
+ }
+ return false
+ }
+ is_quirks_limited_doctype = function (t) {
+ var pi
+ if (t.public_identifier != null) {
+ pi = t.public_identifier.toLowerCase()
+ if (pi.substr(0, 32) === '-//w3c//dtd xhtml 1.0 frameset//' || pi.substr(0, 36) === '-//w3c//dtd xhtml 1.0 transitional//') {
+ return true
+ }
+ if (t.system_identifier != null) {
+ if (pi.substr(0, 32) === '-//w3c//dtd html 4.01 frameset//' || pi.substr(0, 36) === '-//w3c//dtd html 4.01 transitional//') {
+ return true
+ }
+ }
+ }
+ return false
+ }
+ ins_mode_initial = function (t) {
+ if (is_space_tok(t)) {
+ return
+ }
+ if (t.type === TYPE_COMMENT) {
+ // ?fixfull
+ doc.children.push(t)
+ return
+ }
+ if (t.type === TYPE_DOCTYPE) {
+ // fixfull syntax error from first paragraph and following bullets
+ // fixfull set doc.doctype
+ // fixfull is the "not an iframe srcdoc" thing relevant?
+ if (is_quirks_yes_doctype(t)) {
+ doc.flag('quirks mode', QUIRKS_YES)
+ } else if (is_quirks_limited_doctype(t)) {
+ doc.flag('quirks mode', QUIRKS_LIMITED)
+ }
+ doc.children.push(t)
+ ins_mode = ins_mode_before_html
+ return
+ }
+ // Anything else
+ // fixfull not iframe srcdoc?
+ parse_error()
+ doc.flag('quirks mode', QUIRKS_YES)
+ ins_mode = ins_mode_before_html
+ process_token(t)
+ }
+
+ // 8.2.5.4.2 http://www.w3.org/TR/html5/syntax.html#the-before-html-insertion-mode
+ ins_mode_before_html = function (t) {
+ if (t.type === TYPE_DOCTYPE) {
+ parse_error()
+ return
+ }
+ if (t.type === TYPE_COMMENT) {
+ doc.children.push(t)
+ return
+ }
+ if (is_space_tok(t)) {
+ return
+ }
+ if (t.type === TYPE_START_TAG && t.name === 'html') {
+ el = token_to_element(t, NS_HTML, doc)
+ doc.children.push(el)
+ el.document = doc
+ open_els.unshift(el)
+ // fixfull (big paragraph in spec about manifest, fragment, urls, etc)
+ ins_mode = ins_mode_before_head
+ return
+ }
+ if (t.type === TYPE_END_TAG) {
+ if (t.name === 'head' || t.name === 'body' || t.name === 'html' || t.name === 'br') {
+ // fall through to "anything else"
+ } else {
+ parse_error()
+ return
+ }
+ }
+ // Anything else
+ el = token_to_element(new_open_tag('html'), NS_HTML, doc)
+ doc.children.push(el)
+ el.document = doc
+ open_els.unshift(el)
+ // ?fixfull browsing context
+ ins_mode = ins_mode_before_head
+ process_token(t)
+ }
+
+ // 8.2.5.4.3 http://www.w3.org/TR/html5/syntax.html#the-before-head-insertion-mode
+ ins_mode_before_head = function (t) {
+ var el
+ if (is_space_tok(t)) {
+ return
+ }
+ if (t.type === TYPE_COMMENT) {
+ insert_comment(t)
+ return
+ }
+ if (t.type === TYPE_DOCTYPE) {
+ parse_error()
+ return
+ }
+ if (t.type === TYPE_START_TAG && t.name === 'html') {
+ ins_mode_in_body(t)
+ return
+ }
+ if (t.type === TYPE_START_TAG && t.name === 'head') {
+ el = insert_html_element(t)
+ head_element_pointer = el
+ ins_mode = ins_mode_in_head
+ return
+ }
+ if (t.type === TYPE_END_TAG) {
+ if (t.name === 'head' || t.name === 'body' || t.name === 'html' || t.name === 'br') {
+ // fall through to Anything else below
+ } else {
+ parse_error()
+ return
+ }
+ }
+ // Anything else
+ el = insert_html_element(new_open_tag('head'))
+ head_element_pointer = el
+ ins_mode = ins_mode_in_head
+ process_token(t)
+ }
+
+ // 8.2.5.4.4 http://www.w3.org/TR/html5/syntax.html#parsing-main-inhead
+ ins_mode_in_head_else = function (t) { // factored out for same-as-spec flow control
+ open_els.shift() // spec says this will be a 'head' node
+ ins_mode = ins_mode_after_head
+ process_token(t)
+ }
+ ins_mode_in_head = function (t) {
+ var ail, el
+ if (t.type === TYPE_TEXT && (t.text === "\t" || t.text === "\n" || t.text === "\u000c" || t.text === ' ')) {
+ insert_character(t)
+ return
+ }
+ if (t.type === TYPE_COMMENT) {
+ insert_comment(t)
+ return
+ }
+ if (t.type === TYPE_DOCTYPE) {
+ parse_error()
+ return
+ }
+ if (t.type === TYPE_START_TAG && t.name === 'html') {
+ ins_mode_in_body(t)
+ return
+ }
+ if (t.type === TYPE_START_TAG && (t.name === 'base' || t.name === 'basefont' || t.name === 'bgsound' || t.name === 'link')) {
+ el = insert_html_element(t)
+ open_els.shift()
+ t.acknowledge_self_closing()
+ return
+ }
+ if (t.type === TYPE_START_TAG && t.name === 'meta') {
+ el = insert_html_element(t)
+ open_els.shift()
+ t.acknowledge_self_closing()
+ // fixfull encoding stuff
+ return
+ }
+ if (t.type === TYPE_START_TAG && t.name === 'title') {
+ parse_generic_rcdata_text(t)
+ return
+ }
+ if (t.type === TYPE_START_TAG && ((t.name === 'noscript' && flag_scripting) || t.name === 'noframes' || t.name === 'style')) {
+ parse_generic_raw_text(t)
+ return
+ }
+ if (t.type === TYPE_START_TAG && t.name === 'noscript' && flag_scripting === false) {
+ insert_html_element(t)
+ ins_mode = ins_mode_in_head_noscript
+ return
+ }
+ if (t.type === TYPE_START_TAG && t.name === 'script') {
+ ail = adjusted_insertion_location()
+ el = token_to_element(t, NS_HTML, ail)
+ el.flag('parser-inserted', true)
+ // fixfull frament case
+ ail[0].children.splice(ail[1], 0, el)
+ open_els.unshift(el)
+ tok_state = tok_state_script_data
+ original_ins_mode = ins_mode // make sure orig... is defined
+ ins_mode = ins_mode_text
+ return
+ }
+ if (t.type === TYPE_END_TAG && t.name === 'head') {
+ open_els.shift() // will be a head element... spec says so
+ ins_mode = ins_mode_after_head
+ return
+ }
+ if (t.type === TYPE_END_TAG && (t.name === 'body' || t.name === 'html' || t.name === 'br')) {
+ ins_mode_in_head_else(t)
+ return
+ }
+ if (t.type === TYPE_START_TAG && t.name === 'template') {
+ insert_html_element(t)
+ afe_push_marker()
+ flag_frameset_ok = false
+ ins_mode = ins_mode_in_template
+ template_ins_modes.unshift(ins_mode_in_template)
+ return
+ }
+ if (t.type === TYPE_END_TAG && t.name === 'template') {
+ if (template_tag_is_open()) {
+ generate_implied_end_tags
+ if (open_els[0].name !== 'template') {
+ parse_error()
+ }
+ while (true) {
+ el = open_els.shift()
+ if (el.name === 'template' && el.namespace === NS_HTML) {
+ break
+ }
+ }
+ clear_afe_to_marker()
+ template_ins_modes.shift()
+ reset_ins_mode()
+ } else {
+ parse_error()
+ }
+ return
+ }
+ if ((t.type === TYPE_START_TAG && t.name === 'head') || t.type === TYPE_END_TAG) {
+ parse_error()
+ return
+ }
+ ins_mode_in_head_else(t)
+ }
+
+ // 8.2.5.4.5 http://www.w3.org/TR/html5/syntax.html#parsing-main-inheadnoscript
+ ins_mode_in_head_noscript_else = function (t) {
+ parse_error()
+ open_els.shift()
+ ins_mode = ins_mode_in_head
+ process_token(t)
+ }
+ ins_mode_in_head_noscript = function (t) {
+ if (t.type === TYPE_DOCTYPE) {
+ parse_error()
+ return
+ }
+ if (t.type === TYPE_START_TAG && t.name === 'html') {
+ ins_mode_in_body(t)
+ return
+ }
+ if (t.type === TYPE_END_TAG && t.name === 'noscript') {
+ open_els.shift()
+ ins_mode = ins_mode_in_head
+ return
+ }
+ if (is_space_tok(t) || t.type === TYPE_COMMENT || (t.type === TYPE_START_TAG && (t.name === 'basefont' || t.name === 'bgsound' || t.name === 'link' || t.name === 'meta' || t.name === 'noframes' || t.name === 'style'))) {
+ ins_mode_in_head(t)
+ return
+ }
+ if (t.type === TYPE_END_TAG && t.name === 'br') {
+ ins_mode_in_head_noscript_else(t)
+ return
+ }
+ if ((t.type === TYPE_START_TAG && (t.name === 'head' || t.name === 'noscript')) || t.type === TYPE_END_TAG) {
+ parse_error()
+ return
+ }
+ // Anything else
+ ins_mode_in_head_noscript_else(t)
+ }
+
+ // 8.2.5.4.6 http://www.w3.org/TR/html5/syntax.html#the-after-head-insertion-mode
+ ins_mode_after_head_else = function (t) {
+ var body_tok
+ body_tok = new_open_tag('body')
+ insert_html_element(body_tok)
+ ins_mode = ins_mode_in_body
+ process_token(t)
+ }
+ ins_mode_after_head = function (t) {
+ var el, i, j, len
+ if (is_space_tok(t)) {
+ insert_character(t)
+ return
+ }
+ if (t.type === TYPE_COMMENT) {
+ insert_comment(t)
+ return
+ }
+ if (t.type === TYPE_DOCTYPE) {
+ parse_error()
+ return
+ }
+ if (t.type === TYPE_START_TAG && t.name === 'html') {
+ ins_mode_in_body(t)
+ return
+ }
+ if (t.type === TYPE_START_TAG && t.name === 'body') {
+ insert_html_element(t)
+ flag_frameset_ok = false
+ ins_mode = ins_mode_in_body
+ return
+ }
+ if (t.type === TYPE_START_TAG && t.name === 'frameset') {
+ insert_html_element(t)
+ ins_mode = ins_mode_in_frameset
+ return
+ }
+ if (t.type === TYPE_START_TAG && (t.name === 'base' || t.name === 'basefont' || t.name === 'bgsound' || t.name === 'link' || t.name === 'meta' || t.name === 'noframes' || t.name === 'script' || t.name === 'style' || t.name === 'template' || t.name === 'title')) {
+ parse_error()
+ open_els.unshift(head_element_pointer)
+ ins_mode_in_head(t)
+ for (i = 0; i < open_els.length; ++i) {
+ el = open_els[i]
+ if (el === head_element_pointer) {
+ open_els.splice(i, 1)
+ return
+ }
+ }
+ return
+ }
+ if (t.type === TYPE_END_TAG && t.name === 'template') {
+ ins_mode_in_head(t)
+ return
+ }
+ if (t.type === TYPE_END_TAG && (t.name === 'body' || t.name === 'html' || t.name === 'br')) {
+ ins_mode_after_head_else(t)
+ return
+ }
+ if ((t.type === TYPE_START_TAG && t.name === 'head') || t.type === TYPE_END_TAG) {
+ parse_error()
+ return
+ }
+ // Anything else
+ ins_mode_after_head_else(t)
+ }
+
+ // 8.2.5.4.7 http://www.w3.org/TR/html5/syntax.html#parsing-main-inbody
+ in_body_any_other_end_tag = function (name) { // factored out because adoption agency calls it
+ var el, i, node
+ node = open_els[0]
+ while (true) {
+ if (node.name === name && node.namespace === NS_HTML) {
+ generate_implied_end_tags(name) // arg is exception
+ if (node !== open_els[0]) {
+ parse_error()
+ }
+ while (true) {
+ el = open_els.shift()
+ if (el === node) {
+ return
+ }
+ }
+ }
+ if (special_elements[node.name] === node.namespace) {
+ parse_error()
+ return
+ }
+ for (i = 0; i < open_els.length; ++i) {
+ el = open_els[i]
+ if (node === el) {
+ node = open_els[i + 1]
+ break
+ }
+ }
+ }
+ }
+ ins_mode_in_body = function (t) {
+ var a, aa, ab, ac, el, found, h_in_scope, i, input_el, j, l, len, len1, len10, len11, len12, len13, len14, len2, len3, len4, len5, len6, len7, len8, len9, m, n, node, o, ok_tags, prompt, q, r, ref, ref1, ref2, ref3, ref4, root_attrs, s, second, second_i, u, w, y, z
+ if (t.type === TYPE_TEXT && t.text === "\u0000") {
+ parse_error()
+ return
+ }
+ if (is_space_tok(t)) {
+ reconstruct_afe()
+ insert_character(t)
+ return
+ }
+ if (t.type === TYPE_TEXT) {
+ reconstruct_afe()
+ insert_character(t)
+ flag_frameset_ok = false
+ return
+ }
+ if (t.type === TYPE_COMMENT) {
+ insert_comment(t)
+ return
+ }
+ if (t.type === TYPE_DOCTYPE) {
+ parse_error()
+ return
+ }
+ if (t.type === TYPE_START_TAG && t.name === 'html') {
+ parse_error()
+ if (template_tag_is_open()) {
+ return
+ }
+ root_attrs = open_els[open_els.length - 1].attrs
+ for (i = 0; i < t.attrs_a.length; ++i) {
+ a = t.attrs_a[i]
+ if (root_attrs[a[0]] == null) {
+ root_attrs[a[0]] = a[1]
+ }
+ }
+ return
+ }
+
+ if ((t.type === TYPE_START_TAG && (t.name === 'base' || t.name === 'basefont' || t.name === 'bgsound' || t.name === 'link' || t.name === 'meta' || t.name === 'noframes' || t.name === 'script' || t.name === 'style' || t.name === 'template' || t.name === 'title')) || (t.type === TYPE_END_TAG && t.name === 'template')) {
+ ins_mode_in_head(t)
+ return
+ }
+ if (t.type === TYPE_START_TAG && t.name === 'body') {
+ parse_error()
+ if (open_els.length < 2) {
+ return
+ }
+ second = open_els[open_els.length - 2]
+ if (second.namespace !== NS_HTML) {
+ return
+ }
+ if (second.name !== 'body') {
+ return
+ }
+ if (template_tag_is_open()) {
+ return
+ }
+ flag_frameset_ok = false
+ for (i = 0; i < t.attrs_a.length; ++i) {
+ a = t.attrs_a[i]
+ if (second.attrs[a[0]] == null) {
+ second.attrs[a[0]] = a[1]
+ }
+ }
+ return
+ }
+ if (t.type === TYPE_START_TAG && t.name === 'frameset') {
+ parse_error()
+ if (open_els.length < 2) {
+ return
+ }
+ second_i = open_els.length - 2
+ second = open_els[second_i]
+ if (second.namespace !== NS_HTML) {
+ return
+ }
+ if (second.name !== 'body') {
+ return
+ }
+ if (flag_frameset_ok === false) {
+ return
+ }
+ if (second.parent != null) {
+ for (i = 0; i < second.parent.children.length; ++i) {
+ el = second.parent.children[i]
+ if (el === second) {
+ second.parent.children.splice(i, 1)
+ break
+ }
+ }
+ }
+ open_els.splice(second_i, 1)
+ // pop everything except the "root html element"
+ while (open_els.length > 1) {
+ open_els.shift()
+ }
+ insert_html_element(t)
+ ins_mode = ins_mode_in_frameset
+ return
+ }
+ if (t.type === TYPE_EOF) {
+ ok_tags = {
+ dd: NS_HTML, dt: NS_HTML, li: NS_HTML, p: NS_HTML, tbody: NS_HTML,
+ td: NS_HTML, tfoot: NS_HTML, th: NS_HTML, thead: NS_HTML,
+ tr: NS_HTML, body: NS_HTML, html: NS_HTML
+ }
+ for (i = 0; i < open_els.length; ++i) {
+ el = open_els[i]
+ if (ok_tags[t.name] !== el.namespace) {
+ parse_error()
+ break
+ }
+ }
+ if (template_ins_modes.length > 0) {
+ ins_mode_in_template(t)
+ } else {
+ stop_parsing()
+ }
+ return
+ }
+ if (t.type === TYPE_END_TAG && t.name === 'body') {
+ if (!is_in_scope('body', NS_HTML)) {
+ parse_error()
+ return
+ }
+ ok_tags = {
+ dd: NS_HTML, dt: NS_HTML, li: NS_HTML, optgroup: NS_HTML,
+ option: NS_HTML, p: NS_HTML, rb: NS_HTML, rp: NS_HTML, rt: NS_HTML,
+ rtc: NS_HTML, tbody: NS_HTML, td: NS_HTML, tfoot: NS_HTML,
+ th: NS_HTML, thead: NS_HTML, tr: NS_HTML, body: NS_HTML,
+ html: NS_HTML
+ }
+ for (i = 0; i < open_els.length; ++i) {
+ el = open_els[i]
+ if (ok_tags[t.name] !== el.namespace) {
+ parse_error()
+ break
+ }
+ }
+ ins_mode = ins_mode_after_body
+ return
+ }
+ if (t.type === TYPE_END_TAG && t.name === 'html') {
+ if (!is_in_scope('body', NS_HTML)) {
+ parse_error()
+ return
+ }
+ ok_tags = {
+ dd: NS_HTML, dt: NS_HTML, li: NS_HTML, optgroup: NS_HTML,
+ option: NS_HTML, p: NS_HTML, rb: NS_HTML, rp: NS_HTML, rt: NS_HTML,
+ rtc: NS_HTML, tbody: NS_HTML, td: NS_HTML, tfoot: NS_HTML,
+ th: NS_HTML, thead: NS_HTML, tr: NS_HTML, body: NS_HTML,
+ html: NS_HTML
+ }
+ for (i = 0; i < open_els.length; ++i) {
+ el = open_els[i]
+ if (ok_tags[t.name] !== el.namespace) {
+ parse_error()
+ break
+ }
+ }
+ ins_mode = ins_mode_after_body
+ process_token(t)
+ return
+ }
+ if (t.type === TYPE_START_TAG && (t.name === 'address' || t.name === 'article' || t.name === 'aside' || t.name === 'blockquote' || t.name === 'center' || t.name === 'details' || t.name === 'dialog' || t.name === 'dir' || t.name === 'div' || t.name === 'dl' || t.name === 'fieldset' || t.name === 'figcaption' || t.name === 'figure' || t.name === 'footer' || t.name === 'header' || t.name === 'hgroup' || t.name === 'main' || t.name === 'nav' || t.name === 'ol' || t.name === 'p' || t.name === 'section' || t.name === 'summary' || t.name === 'ul')) {
+ close_p_if_in_button_scope()
+ insert_html_element(t)
+ return
+ }
+ if (t.type === TYPE_START_TAG && (h_tags[t.name] != null)) { // extra perens because Coffeescript did that
+ close_p_if_in_button_scope()
+ if (h_tags[open_els[0].name] === open_els[0].namespace) {
+ parse_error()
+ open_els.shift()
+ }
+ insert_html_element(t)
+ return
+ }
+ if (t.type === TYPE_START_TAG && (t.name === 'pre' || t.name === 'listing')) {
+ close_p_if_in_button_scope()
+ insert_html_element(t)
+ eat_next_token_if_newline()
+ flag_frameset_ok = false
+ return
+ }
+ if (t.type === TYPE_START_TAG && t.name === 'form') {
+ if (!(form_element_pointer === null || template_tag_is_open())) {
+ parse_error()
+ return
+ }
+ close_p_if_in_button_scope()
+ el = insert_html_element(t)
+ if (!template_tag_is_open()) {
+ form_element_pointer = el
+ }
+ return
+ }
+ if (t.type === TYPE_START_TAG && t.name === 'li') {
+ flag_frameset_ok = false
+ for (i = 0; i < open_els.length; ++i) {
+ node = open_els[i]
+ if (node.name === 'li' && node.namespace === NS_HTML) {
+ generate_implied_end_tags('li') // arg is exception
+ if (open_els[0].name !== 'li' || open_els[0].namespace !== NS_HTML) {
+ parse_error()
+ }
+ while (true) {
+ el = open_els.shift()
+ if (el.name === 'li' && el.namespace === NS_HTML) {
+ break
+ }
+ }
+ break
+ }
+ if (el_is_special_not_adp(node)) {
+ break
+ }
+ }
+ close_p_if_in_button_scope()
+ insert_html_element(t)
+ return
+ }
+ if (t.type === TYPE_START_TAG && (t.name === 'dd' || t.name === 'dt')) {
+ flag_frameset_ok = false
+ for (i = 0; i < open_els.length; ++i) {
+ node = open_els[i]
+ if (node.name === 'dd' && node.namespace === NS_HTML) {
+ generate_implied_end_tags('dd') // arg is exception
+ if (open_els[0].name !== 'dd' || open_els[0].namespace !== NS_HTML) {
+ parse_error()
+ }
+ while (true) {
+ el = open_els.shift()
+ if (el.name === 'dd' && el.namespace === NS_HTML) {
+ break
+ }
+ }
+ break
+ }
+ if (node.name === 'dt' && node.namespace === NS_HTML) {
+ generate_implied_end_tags('dt') // arg is exception
+ if (open_els[0].name !== 'dt' || open_els[0].namespace !== NS_HTML) {
+ parse_error()
+ }
+ while (true) {
+ el = open_els.shift()
+ if (el.name === 'dt' && el.namespace === NS_HTML) {
+ break
+ }
+ }
+ break
+ }
+ if (el_is_special_not_adp(node)) {
+ break
+ }
+ }
+ close_p_if_in_button_scope()
+ insert_html_element(t)
+ return
+ }
+ if (t.type === TYPE_START_TAG && t.name === 'plaintext') {
+ close_p_if_in_button_scope()
+ insert_html_element(t)
+ tok_state = tok_state_plaintext
+ return
+ }
+ if (t.type === TYPE_START_TAG && t.name === 'button') {
+ if (is_in_scope('button', NS_HTML)) {
+ parse_error()
+ generate_implied_end_tags()
+ while (true) {
+ el = open_els.shift()
+ if (el.name === 'button' && el.namespace === NS_HTML) {
+ break
+ }
+ }
+ }
+ reconstruct_afe()
+ insert_html_element(t)
+ flag_frameset_ok = false
+ return
+ }
+ if (t.type === TYPE_END_TAG && (t.name === 'address' || t.name === 'article' || t.name === 'aside' || t.name === 'blockquote' || t.name === 'button' || t.name === 'center' || t.name === 'details' || t.name === 'dialog' || t.name === 'dir' || t.name === 'div' || t.name === 'dl' || t.name === 'fieldset' || t.name === 'figcaption' || t.name === 'figure' || t.name === 'footer' || t.name === 'header' || t.name === 'hgroup' || t.name === 'listing' || t.name === 'main' || t.name === 'nav' || t.name === 'ol' || t.name === 'pre' || t.name === 'section' || t.name === 'summary' || t.name === 'ul')) {
+ if (!is_in_scope(t.name, NS_HTML)) {
+ parse_error()
+ return
+ }
+ generate_implied_end_tags()
+ if (!(open_els[0].name === t.name && open_els[0].namespace === NS_HTML)) {
+ parse_error()
+ }
+ while (true) {
+ el = open_els.shift()
+ if (el.name === t.name && el.namespace === NS_HTML) {
+ return
+ }
+ }
+ return
+ }
+ if (t.type === TYPE_END_TAG && t.name === 'form') {
+ if (!template_tag_is_open()) {
+ node = form_element_pointer
+ form_element_pointer = null
+ if (node === null || !el_is_in_scope(node)) {
+ parse_error()
+ return
+ }
+ generate_implied_end_tags()
+ if (open_els[0] !== node) {
+ parse_error()
+ }
+ for (i = 0; i < open_els.length; ++i) {
+ el = open_els[i]
+ if (el === node) {
+ open_els.splice(i, 1)
+ break
+ }
+ }
+ } else {
+ if (!is_in_scope('form', NS_HTML)) {
+ parse_error()
+ return
+ }
+ generate_implied_end_tags()
+ if (open_els[0].name !== 'form' || open_els[0].namespace !== NS_HTML) {
+ parse_error()
+ }
+ while (true) {
+ el = open_els.shift()
+ if (el.name === 'form' && el.namespace === NS_HTML) {
+ break
+ }
+ }
+ }
+ return
+ }
+ if (t.type === TYPE_END_TAG && t.name === 'p') {
+ if (!is_in_button_scope('p', NS_HTML)) {
+ parse_error()
+ insert_html_element(new_open_tag('p'))
+ }
+ close_p_element()
+ return
+ }
+ if (t.type === TYPE_END_TAG && t.name === 'li') {
+ if (!is_in_li_scope('li', NS_HTML)) {
+ parse_error()
+ return
+ }
+ generate_implied_end_tags('li') // arg is exception
+ if (open_els[0].name !== 'li' || open_els[0].namespace !== NS_HTML) {
+ parse_error()
+ }
+ while (true) {
+ el = open_els.shift()
+ if (el.name === 'li' && el.namespace === NS_HTML) {
+ break
+ }
+ }
+ return
+ }
+ if (t.type === TYPE_END_TAG && (t.name === 'dd' || t.name === 'dt')) {
+ if (!is_in_scope(t.name, NS_HTML)) {
+ parse_error()
+ return
+ }
+ generate_implied_end_tags(t.name) // arg is exception
+ if (open_els[0].name !== t.name || open_els[0].namespace !== NS_HTML) {
+ parse_error()
+ }
+ while (true) {
+ el = open_els.shift()
+ if (el.name === t.name && el.namespace === NS_HTML) {
+ break
+ }
+ }
+ return
+ }
+ if (t.type === TYPE_END_TAG && (h_tags[t.name] != null)) { // extra perens because Coffeescript did
+ h_in_scope = false
+ for (i = 0; i < open_els.length; ++i) {
+ el = open_els[i]
+ if (h_tags[el.name] === el.namespace) {
+ h_in_scope = true
+ break
+ }
+ if (standard_scopers[el.name] === el.namespace) {
+ break
+ }
+ }
+ if (!h_in_scope) {
+ parse_error()
+ return
+ }
+ generate_implied_end_tags()
+ if (open_els[0].name !== t.name || open_els[0].namespace !== NS_HTML) {
+ parse_error()
+ }
+ while (true) {
+ el = open_els.shift()
+ if (h_tags[el.name] === el.namespace) {
+ break
+ }
+ }
+ return
+ }
+ // deep breath!
+ if (t.type === TYPE_START_TAG && t.name === 'a') {
+ // If the list of active formatting elements contains an a element
+ // between the end of the list and the last marker on the list (or
+ // the start of the list if there is no marker on the list), then
+ // this is a parse error; run the adoption agency algorithm for the
+ // tag name "a", then remove that element from the list of active
+ // formatting elements and the stack of open elements if the
+ // adoption agency algorithm didn't already remove it (it might not
+ // have if the element is not in table scope).
+ found = false
+ for (i = 0; i < afe.length; ++i) {
+ el = afe[i]
+ if (el.type === TYPE_AFE_MARKER) {
+ break
+ }
+ if (el.name === 'a' && el.namespace === NS_HTML) {
+ found = el
+ }
+ }
+ if (found != null) {
+ parse_error()
+ adoption_agency('a')
+ for (i = 0; i < afe.length; ++i) {
+ el = afe[i]
+ if (el === found) {
+ afe.splice(i, 1)
+ }
+ }
+ for (i = 0; i < open_els.length; ++i) {
+ el = open_els[i]
+ if (el === found) {
+ open_els.splice(i, 1)
+ }
+ }
+ }
+ reconstruct_afe()
+ el = insert_html_element(t)
+ afe_push(el)
+ return
+ }
+ if (t.type === TYPE_START_TAG && (t.name === 'b' || t.name === 'big' || t.name === 'code' || t.name === 'em' || t.name === 'font' || t.name === 'i' || t.name === 's' || t.name === 'small' || t.name === 'strike' || t.name === 'strong' || t.name === 'tt' || t.name === 'u')) {
+ reconstruct_afe()
+ el = insert_html_element(t)
+ afe_push(el)
+ return
+ }
+ if (t.type === TYPE_START_TAG && t.name === 'nobr') {
+ reconstruct_afe()
+ if (is_in_scope('nobr', NS_HTML)) {
+ parse_error()
+ adoption_agency('nobr')
+ reconstruct_afe()
+ }
+ el = insert_html_element(t)
+ afe_push(el)
+ return
+ }
+ if (t.type === TYPE_END_TAG && (t.name === 'a' || t.name === 'b' || t.name === 'big' || t.name === 'code' || t.name === 'em' || t.name === 'font' || t.name === 'i' || t.name === 'nobr' || t.name === 's' || t.name === 'small' || t.name === 'strike' || t.name === 'strong' || t.name === 'tt' || t.name === 'u')) {
+ adoption_agency(t.name)
+ return
+ }
+ if (t.type === TYPE_START_TAG && (t.name === 'applet' || t.name === 'marquee' || t.name === 'object')) {
+ reconstruct_afe()
+ insert_html_element(t)
+ afe_push_marker()
+ flag_frameset_ok = false
+ return
+ }
+ if (t.type === TYPE_END_TAG && (t.name === 'applet' || t.name === 'marquee' || t.name === 'object')) {
+ if (!is_in_scope(t.name, NS_HTML)) {
+ parse_error()
+ return
+ }
+ generate_implied_end_tags()
+ if (open_els[0].name !== t.name || open_els[0].namespace !== NS_HTML) {
+ parse_error()
+ }
+ while (true) {
+ el = open_els.shift()
+ if (el.name === t.name && el.namespace === NS_HTML) {
+ break
+ }
+ }
+ clear_afe_to_marker()
+ return
+ }
+ if (t.type === TYPE_START_TAG && t.name === 'table') {
+ if (doc.flag('quirks mode') !== QUIRKS_YES) {
+ close_p_if_in_button_scope() // test
+ }
+ insert_html_element(t)
+ flag_frameset_ok = false
+ ins_mode = ins_mode_in_table
+ return
+ }
+ if (t.type === TYPE_END_TAG && t.name === 'br') {
+ parse_error()
+ // W3C: t.type = TYPE_START_TAG
+ t = new_open_tag('br') // WHATWG
+ // fall through
+ }
+ if (t.type === TYPE_START_TAG && (t.name === 'area' || t.name === 'br' || t.name === 'embed' || t.name === 'img' || t.name === 'keygen' || t.name === 'wbr')) {
+ reconstruct_afe()
+ insert_html_element(t)
+ open_els.shift()
+ t.acknowledge_self_closing()
+ flag_frameset_ok = false
+ return
+ }
+ if (t.type === TYPE_START_TAG && t.name === 'input') {
+ reconstruct_afe()
+ insert_html_element(t)
+ open_els.shift()
+ t.acknowledge_self_closing()
+ if (!is_input_hidden_tok(t)) {
+ flag_frameset_ok = false
+ }
+ return
+ }
+ if (t.type === TYPE_START_TAG && (t.name === 'menuitem' || t.name === 'param' || t.name === 'source' || t.name === 'track')) {
+ // WHATWG adds 'menuitem' for this block
+ insert_html_element(t)
+ open_els.shift()
+ t.acknowledge_self_closing()
+ return
+ }
+ if (t.type === TYPE_START_TAG && t.name === 'hr') {
+ close_p_if_in_button_scope()
+ insert_html_element(t)
+ open_els.shift()
+ t.acknowledge_self_closing()
+ flag_frameset_ok = false
+ return
+ }
+ if (t.type === TYPE_START_TAG && t.name === 'image') {
+ parse_error()
+ t.name = 'img'
+ process_token(t)
+ return
+ }
+ if (t.type === TYPE_START_TAG && t.name === 'isindex') {
+ parse_error()
+ if (template_tag_is_open() === false && form_element_pointer !== null) {
+ return
+ }
+ t.acknowledge_self_closing()
+ flag_frameset_ok = false
+ close_p_if_in_button_scope()
+ el = insert_html_element(new_open_tag('form'))
+ if (!template_tag_is_open()) {
+ form_element_pointer = el
+ }
+ for (i = 0; i < t.attrs_a.length; ++i) {
+ a = t.attrs_a[i]
+ if (a[0] === 'action') {
+ el.attrs['action'] = a[1]
+ break
+ }
+ }
+ insert_html_element(new_open_tag('hr'))
+ open_els.shift()
+ reconstruct_afe()
+ insert_html_element(new_open_tag('label'))
+ // note: this is a little out-of-spec-order so we only have to scan t.attrs_a once
+ input_el = new_open_tag('input')
+ prompt = null
+ for (i = 0; i < t.attrs_a.length; ++i) {
+ a = t.attrs_a[i]
+ if (a[0] === 'prompt') {
+ prompt = a[1]
+ }
+ if (a[0] !== 'name' && a[0] !== 'action' && a[0] !== 'prompt') {
+ input_el.attrs_a.push([a[0], a[1]])
+ }
+ }
+ input_el.attrs_a.push(['name', 'isindex'])
+ // fixfull this next bit is in english... internationalize?
+ if (prompt == null) {
+ prompt = "This is a searchable index. Enter search keywords: "
+ }
+ insert_character(new_character_token(prompt)) // fixfull split
+ // TODO submit typo "balue" in spec
+ insert_html_element(input_el)
+ open_els.shift()
+ // insert_character('') // you can put chars here if prompt attr missing
+ open_els.shift()
+ insert_html_element(new_open_tag('hr'))
+ open_els.shift()
+ open_els.shift()
+ if (!template_tag_is_open()) {
+ form_element_pointer = null
+ }
+ return
+ }
+ if (t.type === TYPE_START_TAG && t.name === 'textarea') {
+ insert_html_element(t)
+ eat_next_token_if_newline()
+ tok_state = tok_state_rcdata
+ original_ins_mode = ins_mode
+ flag_frameset_ok = false
+ ins_mode = ins_mode_text
+ return
+ }
+ if (t.type === TYPE_START_TAG && t.name === 'xmp') {
+ close_p_if_in_button_scope()
+ reconstruct_afe()
+ flag_frameset_ok = false
+ parse_generic_raw_text(t)
+ return
+ }
+ if (t.type === TYPE_START_TAG && t.name === 'iframe') {
+ flag_frameset_ok = false
+ parse_generic_raw_text(t)
+ return
+ }
+ if (t.type === TYPE_START_TAG && (t.name === 'noembed' || (t.name === 'noscript' && flag_scripting))) {
+ parse_generic_raw_text(t)
+ return
+ }
+ if (t.type === TYPE_START_TAG && t.name === 'select') {
+ reconstruct_afe()
+ insert_html_element(t)
+ flag_frameset_ok = false
+ if (ins_mode === ins_mode_in_table || ins_mode === ins_mode_in_caption || ins_mode === ins_mode_in_table_body || ins_mode === ins_mode_in_row || ins_mode === ins_mode_in_cell) {
+ ins_mode = ins_mode_in_select_in_table
+ } else {
+ ins_mode = ins_mode_in_select
+ }
+ return
+ }
+ if (t.type === TYPE_START_TAG && (t.name === 'optgroup' || t.name === 'option')) {
+ if (open_els[0].name === 'option' && open_els[0].namespace === NS_HTML) {
+ open_els.shift()
+ }
+ reconstruct_afe()
+ insert_html_element(t)
+ return
+ }
+// this comment block implements the W3C spec
+// if t.type === TYPE_START_TAG && (t.name === 'rb' || t.name === 'rp' || t.name === 'rtc')
+// if is_in_scope 'ruby', NS_HTML
+// generate_implied_end_tags()
+// unless open_els[0].name === 'ruby' && open_els[0].namespace === NS_HTML
+// parse_error()
+// insert_html_element t
+// return
+// if t.type === TYPE_START_TAG && t.name === 'rt'
+// if is_in_scope 'ruby', NS_HTML
+// generate_implied_end_tags 'rtc' // arg === exception
+// unless (open_els[0].name === 'ruby' || open_els[0].name === 'rtc') && open_els[0].namespace === NS_HTML
+// parse_error()
+// insert_html_element t
+// return
+// below implements the WHATWG spec https://html.spec.whatwg.org/multipage/syntax.html#parsing-main-inbody
+ if (t.type === TYPE_START_TAG && (t.name === 'rb' || t.name === 'rtc')) {
+ if (is_in_scope('ruby', NS_HTML)) {
+ generate_implied_end_tags()
+ if (!(open_els[0].name === 'ruby' && open_els[0].namespace === NS_HTML)) {
+ parse_error()
+ }
+ }
+ insert_html_element(t)
+ return
+ }
+ if (t.type === TYPE_START_TAG && (t.name === 'rp' || t.name === 'rt')) {
+ if (is_in_scope('ruby', NS_HTML)) {
+ generate_implied_end_tags('rtc')
+ if (!((open_els[0].name === 'ruby' || open_els[0].name === 'rtc') && open_els[0].namespace === NS_HTML)) {
+ parse_error()
+ }
+ }
+ insert_html_element(t)
+ return
+ }
+// end WHATWG chunk
+ if (t.type === TYPE_START_TAG && t.name === 'math') {
+ reconstruct_afe()
+ adjust_mathml_attributes(t)
+ adjust_foreign_attributes(t)
+ insert_foreign_element(t, NS_MATHML)
+ if (t.flag('self-closing')) {
+ open_els.shift()
+ t.acknowledge_self_closing()
+ }
+ return
+ }
+ if (t.type === TYPE_START_TAG && t.name === 'svg') {
+ reconstruct_afe()
+ adjust_svg_attributes(t)
+ adjust_foreign_attributes(t)
+ insert_foreign_element(t, NS_SVG)
+ if (t.flag('self-closing')) {
+ open_els.shift()
+ t.acknowledge_self_closing()
+ }
+ return
+ }
+ if (t.type === TYPE_START_TAG && (t.name === 'caption' || t.name === 'col' || t.name === 'colgroup' || t.name === 'frame' || t.name === 'head' || t.name === 'tbody' || t.name === 'td' || t.name === 'tfoot' || t.name === 'th' || t.name === 'thead' || t.name === 'tr')) {
+ parse_error()
+ return
+ }
+ if (t.type === TYPE_START_TAG) { // any other start tag
+ reconstruct_afe()
+ insert_html_element(t)
+ return
+ }
+ if (t.type === TYPE_END_TAG) { // any other end tag
+ in_body_any_other_end_tag(t.name)
+ return
+ }
+ }
+
+ // 8.2.5.4.8 http://www.w3.org/TR/html5/syntax.html#parsing-main-incdata
+ ins_mode_text = function (t) {
+ if (t.type === TYPE_TEXT) {
+ insert_character(t)
+ return
+ }
+ if (t.type === TYPE_EOF) {
+ parse_error()
+ if (open_els[0].name === 'script' && open_els[0].namespace === NS_HTML) {
+ open_els[0].flag('already started', true)
+ }
+ open_els.shift()
+ ins_mode = original_ins_mode
+ process_token(t)
+ return
+ }
+ if (t.type === TYPE_END_TAG && t.name === 'script') {
+ open_els.shift()
+ ins_mode = original_ins_mode
+ // fixfull the spec seems to assume that I'm going to run the script
+ // http://www.w3.org/TR/html5/syntax.html#scriptEndTag
+ return
+ }
+ if (t.type === TYPE_END_TAG) {
+ open_els.shift()
+ ins_mode = original_ins_mode
+ return
+ }
+ }
+
+ // the functions below implement the tokenizer stats described here:
+ // http://www.w3.org/TR/html5/syntax.html#tokenization
+
+ // 8.2.5.4.9 http://www.w3.org/TR/html5/syntax.html#parsing-main-intable
+ ins_mode_in_table_else = function (t) {
+ parse_error()
+ flag_foster_parenting = true
+ ins_mode_in_body(t)
+ flag_foster_parenting = false
+ }
+ ins_mode_in_table = function (t) {
+ var el
+ switch (t.type) {
+ case TYPE_TEXT:
+ if ((open_els[0].name === 'table' || open_els[0].name === 'tbody' || open_els[0].name === 'tfoot' || open_els[0].name === 'thead' || open_els[0].name === 'tr') && open_els[0].namespace === NS_HTML) {
+ pending_table_character_tokens = []
+ original_ins_mode = ins_mode
+ ins_mode = ins_mode_in_table_text
+ process_token(t)
+ } else {
+ ins_mode_in_table_else(t)
+ }
+ break
+ case TYPE_COMMENT:
+ insert_comment(t)
+ break
+ case TYPE_DOCTYPE:
+ parse_error()
+ break
+ case TYPE_START_TAG:
+ switch (t.name) {
+ case 'caption':
+ clear_stack_to_table_context()
+ afe_push_marker()
+ insert_html_element(t)
+ ins_mode = ins_mode_in_caption
+ break
+ case 'colgroup':
+ clear_stack_to_table_context()
+ insert_html_element(t)
+ ins_mode = ins_mode_in_column_group
+ break
+ case 'col':
+ clear_stack_to_table_context()
+ insert_html_element(new_open_tag('colgroup'))
+ ins_mode = ins_mode_in_column_group
+ process_token(t)
+ break
+ case 'tbody':
+ case 'tfoot':
+ case 'thead':
+ clear_stack_to_table_context()
+ insert_html_element(t)
+ ins_mode = ins_mode_in_table_body
+ break
+ case 'td':
+ case 'th':
+ case 'tr':
+ clear_stack_to_table_context()
+ insert_html_element(new_open_tag('tbody'))
+ ins_mode = ins_mode_in_table_body
+ process_token(t)
+ break
+ case 'table':
+ parse_error()
+ if (is_in_table_scope('table', NS_HTML)) {
+ while (true) {
+ el = open_els.shift()
+ if (el.name === 'table' && el.namespace === NS_HTML) {
+ break
+ }
+ }
+ reset_ins_mode()
+ process_token(t)
+ }
+ break
+ case 'style':
+ case 'script':
+ case 'template':
+ ins_mode_in_head(t)
+ break
+ case 'input':
+ if (!is_input_hidden_tok(t)) {
+ ins_mode_in_table_else(t)
+ } else {
+ parse_error()
+ el = insert_html_element(t)
+ open_els.shift()
+ t.acknowledge_self_closing()
+ }
+ break
+ case 'form':
+ parse_error()
+ if (form_element_pointer != null) {
+ return
+ }
+ if (template_tag_is_open()) {
+ return
+ }
+ form_element_pointer = insert_html_element(t)
+ open_els.shift()
+ break
+ default:
+ ins_mode_in_table_else(t)
+ }
+ break
+ case TYPE_END_TAG:
+ switch (t.name) {
+ case 'table':
+ if (is_in_table_scope('table', NS_HTML)) {
+ while (true) {
+ el = open_els.shift()
+ if (el.name === 'table' && el.namespace === NS_HTML) {
+ break
+ }
+ }
+ reset_ins_mode()
+ } else {
+ parse_error()
+ }
+ break
+ case 'body':
+ case 'caption':
+ case 'col':
+ case 'colgroup':
+ case 'html':
+ case 'tbody':
+ case 'td':
+ case 'tfoot':
+ case 'th':
+ case 'thead':
+ case 'tr':
+ parse_error()
+ break
+ case 'template':
+ ins_mode_in_head(t)
+ break
+ default:
+ ins_mode_in_table_else(t)
+ }
+ break
+ case TYPE_EOF:
+ ins_mode_in_body(t)
+ break
+ default:
+ ins_mode_in_table_else(t)
+ }
+ }
+
+ // 8.2.5.4.10 http://www.w3.org/TR/html5/syntax.html#parsing-main-intabletext
+ ins_mode_in_table_text = function (t) {
+ var all_space, i, l, m, old
+ if (t.type === TYPE_TEXT && t.text === "\u0000") {
+ // from javascript?
+ parse_error()
+ return
+ }
+ if (t.type === TYPE_TEXT) {
+ pending_table_character_tokens.push(t)
+ return
+ }
+ // Anything else
+ all_space = true
+ for (i = 0; i < pending_table_character_tokens.length; ++i) {
+ old = pending_table_character_tokens[i]
+ if (!is_space_tok(old)) {
+ all_space = false
+ break
+ }
+ }
+ if (all_space) {
+ for (i = 0; i < pending_table_character_tokens.length; ++i) {
+ old = pending_table_character_tokens[i]
+ insert_character(old)
+ }
+ } else {
+ for (i = 0; i < pending_table_character_tokens.length; ++i) {
+ old = pending_table_character_tokens[i]
+ ins_mode_in_table_else(old)
+ }
+ }
+ pending_table_character_tokens = []
+ ins_mode = original_ins_mode
+ process_token(t)
+ }
+
+ // 8.2.5.4.11 http://www.w3.org/TR/html5/syntax.html#parsing-main-incaption
+ ins_mode_in_caption = function (t) {
+ var el
+ if (t.type === TYPE_END_TAG && t.name === 'caption') {
+ if (is_in_table_scope('caption', NS_HTML)) {
+ generate_implied_end_tags()
+ if (open_els[0].name !== 'caption') {
+ parse_error()
+ }
+ while (true) {
+ el = open_els.shift()
+ if (el.name === 'caption' && el.namespace === NS_HTML) {
+ break
+ }
+ }
+ clear_afe_to_marker()
+ ins_mode = ins_mode_in_table
+ } else {
+ parse_error()
+ // fragment case
+ }
+ return
+ }
+ if ((t.type === TYPE_START_TAG && (t.name === 'caption' || t.name === 'col' || t.name === 'colgroup' || t.name === 'tbody' || t.name === 'td' || t.name === 'tfoot' || t.name === 'th' || t.name === 'thead' || t.name === 'tr')) || t.type === TYPE_END_TAG && t.name === 'table') {
+ parse_error()
+ if (is_in_table_scope('caption', NS_HTML)) {
+ while (true) {
+ el = open_els.shift()
+ if (el.name === 'caption' && el.namespace === NS_HTML) {
+ break
+ }
+ }
+ clear_afe_to_marker()
+ ins_mode = ins_mode_in_table
+ process_token(t)
+ }
+ // else fragment case
+ return
+ }
+ if (t.type === TYPE_END_TAG && (t.name === 'body' || t.name === 'col' || t.name === 'colgroup' || t.name === 'html' || t.name === 'tbody' || t.name === 'td' || t.name === 'tfoot' || t.name === 'th' || t.name === 'thead' || t.name === 'tr')) {
+ parse_error()
+ return
+ }
+ // Anything else
+ ins_mode_in_body(t)
+ }
+
+ // 8.2.5.4.12 http://www.w3.org/TR/html5/syntax.html#parsing-main-incolgroup
+ ins_mode_in_column_group = function (t) {
+ var el
+ if (is_space_tok(t)) {
+ insert_character(t)
+ return
+ }
+ if (t.type === TYPE_COMMENT) {
+ insert_comment(t)
+ return
+ }
+ if (t.type === TYPE_DOCTYPE) {
+ parse_error()
+ return
+ }
+ if (t.type === TYPE_START_TAG && t.name === 'html') {
+ ins_mode_in_body(t)
+ return
+ }
+ if (t.type === TYPE_START_TAG && t.name === 'col') {
+ el = insert_html_element(t)
+ open_els.shift()
+ t.acknowledge_self_closing()
+ return
+ }
+ if (t.type === TYPE_END_TAG && t.name === 'colgroup') {
+ if (open_els[0].name === 'colgroup' && open_els.namespace === NS_HTML) {
+ open_els.shift()
+ ins_mode = ins_mode_in_table
+ } else {
+ parse_error()
+ }
+ return
+ }
+ if (t.type === TYPE_END_TAG && t.name === 'col') {
+ parse_error()
+ return
+ }
+ if ((t.type === TYPE_START_TAG || t.type === TYPE_END_TAG) && t.name === 'template') {
+ ins_mode_in_head(t)
+ return
+ }
+ if (t.type === TYPE_EOF) {
+ ins_mode_in_body(t)
+ return
+ }
+ // Anything else
+ if (open_els[0].name !== 'colgroup') {
+ parse_error()
+ return
+ }
+ open_els.shift()
+ ins_mode = ins_mode_in_table
+ process_token(t)
+ }
+
+ // 8.2.5.4.13 http://www.w3.org/TR/html5/syntax.html#parsing-main-intbody
+ ins_mode_in_table_body = function (t) {
+ var el, has, i
+ if (t.type === TYPE_START_TAG && t.name === 'tr') {
+ clear_stack_to_table_body_context()
+ insert_html_element(t)
+ ins_mode = ins_mode_in_row
+ return
+ }
+ if (t.type === TYPE_START_TAG && (t.name === 'th' || t.name === 'td')) {
+ parse_error()
+ clear_stack_to_table_body_context()
+ insert_html_element(new_open_tag('tr'))
+ ins_mode = ins_mode_in_row
+ process_token(t)
+ return
+ }
+ if (t.type === TYPE_END_TAG && (t.name === 'tbody' || t.name === 'tfoot' || t.name === 'thead')) {
+ if (!is_in_table_scope(t.name, NS_HTML)) {
+ parse_error()
+ return
+ }
+ clear_stack_to_table_body_context()
+ open_els.shift()
+ ins_mode = ins_mode_in_table
+ return
+ }
+ if ((t.type === TYPE_START_TAG && (t.name === 'caption' || t.name === 'col' || t.name === 'colgroup' || t.name === 'tbody' || t.name === 'tfoot' || t.name === 'thead')) || (t.type === TYPE_END_TAG && t.name === 'table')) {
+ has = false
+ for (i = 0; i < open_els.length; ++i) {
+ el = open_els[i]
+ if (el.namespace === NS_HTML && (el.name === 'tbody' || el.name === 'tfoot' || el.name === 'thead')) {
+ has = true
+ break
+ }
+ if (table_scopers[el.name] === el.namespace) {
+ break
+ }
+ }
+ if (!has) {
+ parse_error()
+ return
+ }
+ clear_stack_to_table_body_context()
+ open_els.shift()
+ ins_mode = ins_mode_in_table
+ process_token(t)
+ return
+ }
+ if (t.type === TYPE_END_TAG && (t.name === 'body' || t.name === 'caption' || t.name === 'col' || t.name === 'colgroup' || t.name === 'html' || t.name === 'td' || t.name === 'th' || t.name === 'tr')) {
+ parse_error()
+ return
+ }
+ // Anything else
+ ins_mode_in_table(t)
+ }
+
+ // 8.2.5.4.14 http://www.w3.org/TR/html5/syntax.html#parsing-main-intr
+ ins_mode_in_row = function (t) {
+ if (t.type === TYPE_START_TAG && (t.name === 'th' || t.name === 'td')) {
+ clear_stack_to_table_row_context()
+ insert_html_element(t)
+ ins_mode = ins_mode_in_cell
+ afe_push_marker()
+ return
+ }
+ if (t.type === TYPE_END_TAG && t.name === 'tr') {
+ if (is_in_table_scope('tr', NS_HTML)) {
+ clear_stack_to_table_row_context()
+ open_els.shift()
+ ins_mode = ins_mode_in_table_body
+ } else {
+ parse_error()
+ }
+ return
+ }
+ if ((t.type === TYPE_START_TAG && (t.name === 'caption' || t.name === 'col' || t.name === 'colgroup' || t.name === 'tbody' || t.name === 'tfoot' || t.name === 'thead' || t.name === 'tr')) || t.type === TYPE_END_TAG && t.name === 'table') {
+ if (is_in_table_scope('tr', NS_HTML)) {
+ clear_stack_to_table_row_context()
+ open_els.shift()
+ ins_mode = ins_mode_in_table_body
+ process_token(t)
+ } else {
+ parse_error()
+ }
+ return
+ }
+ if (t.type === TYPE_END_TAG && (t.name === 'tbody' || t.name === 'tfoot' || t.name === 'thead')) {
+ if (is_in_table_scope(t.name, NS_HTML)) {
+ if (is_in_table_scope('tr', NS_HTML)) {
+ clear_stack_to_table_row_context()
+ open_els.shift()
+ ins_mode = ins_mode_in_table_body
+ process_token(t)
+ }
+ } else {
+ parse_error()
+ }
+ return
+ }
+ if (t.type === TYPE_END_TAG && (t.name === 'body' || t.name === 'caption' || t.name === 'col' || t.name === 'colgroup' || t.name === 'html' || t.name === 'td' || t.name === 'th')) {
+ parse_error()
+ return
+ }
+ // Anything else
+ ins_mode_in_table(t)
+ }
+
+ // http://www.w3.org/TR/html5/syntax.html#close-the-cell
+ close_the_cell = function () {
+ var el
+ generate_implied_end_tags()
+ if (!((open_els[0].name === 'td' || open_els[0] === 'th') && open_els[0].namespace === NS_HTML)) {
+ parse_error()
+ }
+ while (true) {
+ el = open_els.shift()
+ if (el.namespace === NS_HTML && (el.name === 'td' || el.name === 'th')) {
+ break
+ }
+ }
+ clear_afe_to_marker()
+ ins_mode = ins_mode_in_row
+ }
+
+ // 8.2.5.4.15 http://www.w3.org/TR/html5/syntax.html#parsing-main-intd
+ ins_mode_in_cell = function (t) {
+ var el, has, i
+ if (t.type === TYPE_END_TAG && (t.name === 'td' || t.name === 'th')) {
+ if (is_in_table_scope(t.name, NS_HTML)) {
+ generate_implied_end_tags()
+ if (!((open_els[0].name === t.name) && open_els[0].namespace === NS_HTML)) {
+ parse_error()
+ }
+ while (true) {
+ el = open_els.shift()
+ if (el.name === t.name && el.namespace === NS_HTML) {
+ break
+ }
+ }
+ clear_afe_to_marker()
+ ins_mode = ins_mode_in_row
+ } else {
+ parse_error()
+ }
+ return
+ }
+ if (t.type === TYPE_START_TAG && (t.name === 'caption' || t.name === 'col' || t.name === 'colgroup' || t.name === 'tbody' || t.name === 'td' || t.name === 'tfoot' || t.name === 'th' || t.name === 'thead' || t.name === 'tr')) {
+ has = false
+ for (i = 0; i < open_els.length; ++i) {
+ el = open_els[i]
+ if (el.namespace === NS_HTML && (el.name === 'td' || el.name === 'th')) {
+ has = true
+ break
+ }
+ if (table_scopers[el.name] === el.namespace) {
+ break
+ }
+ }
+ if (!has) {
+ parse_error()
+ return
+ }
+ close_the_cell()
+ process_token(t)
+ return
+ }
+ if (t.type === TYPE_END_TAG && (t.name === 'body' || t.name === 'caption' || t.name === 'col' || t.name === 'colgroup' || t.name === 'html')) {
+ parse_error()
+ return
+ }
+ if (t.type === TYPE_END_TAG && (t.name === 'table' || t.name === 'tbody' || t.name === 'tfoot' || t.name === 'thead' || t.name === 'tr')) {
+ if (is_in_table_scope(t.name, NS_HTML)) {
+ close_the_cell()
+ process_token(t)
+ } else {
+ parse_error()
+ }
+ return
+ }
+ // Anything Else
+ ins_mode_in_body(t)
+ }
+
+ // 8.2.5.4.16 http://www.w3.org/TR/html5/syntax.html#parsing-main-inselect
+ ins_mode_in_select = function (t) {
+ var el
+ if (t.type === TYPE_TEXT && t.text === "\u0000") {
+ parse_error()
+ return
+ }
+ if (t.type === TYPE_TEXT) {
+ insert_character(t)
+ return
+ }
+ if (t.type === TYPE_COMMENT) {
+ insert_comment(t)
+ return
+ }
+ if (t.type === TYPE_DOCTYPE) {
+ parse_error()
+ return
+ }
+ if (t.type === TYPE_START_TAG && t.name === 'html') {
+ ins_mode_in_body(t)
+ return
+ }
+ if (t.type === TYPE_START_TAG && t.name === 'option') {
+ if (open_els[0].name === 'option' && open_els[0].namespace === NS_HTML) {
+ open_els.shift()
+ }
+ insert_html_element(t)
+ return
+ }
+ if (t.type === TYPE_START_TAG && t.name === 'optgroup') {
+ if (open_els[0].name === 'option' && open_els[0].namespace === NS_HTML) {
+ open_els.shift()
+ }
+ if (open_els[0].name === 'optgroup' && open_els[0].namespace === NS_HTML) {
+ open_els.shift()
+ }
+ insert_html_element(t)
+ return
+ }
+ if (t.type === TYPE_END_TAG && t.name === 'optgroup') {
+ if (open_els[0].name === 'option' && open_els[0].namespace === NS_HTML) {
+ if (open_els[1].name === 'optgroup' && open_els[0].namespace === NS_HTML) {
+ open_els.shift()
+ }
+ }
+ if (open_els[0].name === 'optgroup' && open_els[0].namespace === NS_HTML) {
+ open_els.shift()
+ } else {
+ parse_error()
+ }
+ return
+ }
+ if (t.type === TYPE_END_TAG && t.name === 'option') {
+ if (open_els[0].name === 'option' && open_els[0].namespace === NS_HTML) {
+ open_els.shift()
+ } else {
+ parse_error()
+ }
+ return
+ }
+ if (t.type === TYPE_END_TAG && t.name === 'select') {
+ if (is_in_select_scope('select', NS_HTML)) {
+ while (true) {
+ el = open_els.shift()
+ if (el.name === 'select' && el.namespace === NS_HTML) {
+ break
+ }
+ }
+ reset_ins_mode()
+ } else {
+ parse_error()
+ }
+ return
+ }
+ if (t.type === TYPE_START_TAG && t.name === 'select') {
+ parse_error()
+ while (true) {
+ el = open_els.shift()
+ if (el.name === 'select' && el.namespace === NS_HTML) {
+ break
+ }
+ }
+ reset_ins_mode()
+ // spec says that this is the same as </select> but it doesn't say
+ // to check scope first
+ return
+ }
+ if (t.type === TYPE_START_TAG && (t.name === 'input' || t.name === 'keygen' || t.name === 'textarea')) {
+ parse_error()
+ if (!is_in_select_scope('select', NS_HTML)) {
+ return
+ }
+ while (true) {
+ el = open_els.shift()
+ if (el.name === 'select' && el.namespace === NS_HTML) {
+ break
+ }
+ }
+ reset_ins_mode()
+ process_token(t)
+ return
+ }
+ if (t.type === TYPE_START_TAG && (t.name === 'script' || t.name === 'template')) {
+ ins_mode_in_head(t)
+ return
+ }
+ if (t.type === TYPE_EOF) {
+ ins_mode_in_body(t)
+ return
+ }
+ // Anything else
+ parse_error()
+ }
+
+ // 8.2.5.4.17 http://www.w3.org/TR/html5/syntax.html#parsing-main-inselectintable
+ ins_mode_in_select_in_table = function (t) {
+ var el
+ if (t.type === TYPE_START_TAG && (t.name === 'caption' || t.name === 'table' || t.name === 'tbody' || t.name === 'tfoot' || t.name === 'thead' || t.name === 'tr' || t.name === 'td' || t.name === 'th')) {
+ parse_error()
+ while (true) {
+ el = open_els.shift()
+ if (el.name === 'select' && el.namespace === NS_HTML) {
+ break
+ }
+ }
+ reset_ins_mode()
+ process_token(t)
+ return
+ }
+ if (t.type === TYPE_END_TAG && (t.name === 'caption' || t.name === 'table' || t.name === 'tbody' || t.name === 'tfoot' || t.name === 'thead' || t.name === 'tr' || t.name === 'td' || t.name === 'th')) {
+ parse_error()
+ if (!is_in_table_scope(t.name, NS_HTML)) {
+ return
+ }
+ while (true) {
+ el = open_els.shift()
+ if (el.name === 'select' && el.namespace === NS_HTML) {
+ break
+ }
+ }
+ reset_ins_mode()
+ process_token(t)
+ return
+ }
+ // Anything else
+ ins_mode_in_select(t)
+ }
+
+ // 8.2.5.4.18 http://www.w3.org/TR/html5/syntax.html#parsing-main-intemplate
+ ins_mode_in_template = function (t) {
+ var el
+ if (t.type === TYPE_TEXT || t.type === TYPE_COMMENT || t.type === TYPE_DOCTYPE) {
+ ins_mode_in_body(t)
+ return
+ }
+ if ((t.type === TYPE_START_TAG && (t.name === 'base' || t.name === 'basefont' || t.name === 'bgsound' || t.name === 'link' || t.name === 'meta' || t.name === 'noframes' || t.name === 'script' || t.name === 'style' || t.name === 'template' || t.name === 'title')) || (t.type === TYPE_END_TAG && t.name === 'template')) {
+ ins_mode_in_head(t)
+ return
+ }
+ if (t.type === TYPE_START_TAG && (t.name === 'caption' || t.name === 'colgroup' || t.name === 'tbody' || t.name === 'tfoot' || t.name === 'thead')) {
+ template_ins_modes.shift()
+ template_ins_modes.unshift(ins_mode_in_table)
+ ins_mode = ins_mode_in_table
+ process_token(t)
+ return
+ }
+ if (t.type === TYPE_START_TAG && t.name === 'col') {
+ template_ins_modes.shift()
+ template_ins_modes.unshift(ins_mode_in_column_group)
+ ins_mode = ins_mode_in_column_group
+ process_token(t)
+ return
+ }
+ if (t.type === TYPE_START_TAG && t.name === 'tr') {
+ template_ins_modes.shift()
+ template_ins_modes.unshift(ins_mode_in_table_body)
+ ins_mode = ins_mode_in_table_body
+ process_token(t)
+ return
+ }
+ if (t.type === TYPE_START_TAG && (t.name === 'td' || t.name === 'th')) {
+ template_ins_modes.shift()
+ template_ins_modes.unshift(ins_mode_in_row)
+ ins_mode = ins_mode_in_row
+ process_token(t)
+ return
+ }
+ if (t.type === TYPE_START_TAG) {
+ template_ins_modes.shift()
+ template_ins_modes.unshift(ins_mode_in_body)
+ ins_mode = ins_mode_in_body
+ process_token(t)
+ return
+ }
+ if (t.type === TYPE_END_TAG) {
+ parse_error()
+ return
+ }
+ if (t.type === TYPE_EOF) {
+ if (!template_tag_is_open()) {
+ stop_parsing()
+ return
+ }
+ parse_error()
+ while (true) {
+ el = open_els.shift()
+ if (el.name === 'template' && el.namespace === NS_HTML) {
+ break
+ }
+ }
+ clear_afe_to_marker()
+ template_ins_modes.shift()
+ reset_ins_mode()
+ process_token(t)
+ }
+ }
+
+ // 8.2.5.4.19 http://www.w3.org/TR/html5/syntax.html#parsing-main-afterbody
+ ins_mode_after_body = function (t) {
+ var first
+ if (is_space_tok(t)) {
+ ins_mode_in_body(t)
+ return
+ }
+ if (t.type === TYPE_COMMENT) {
+ first = open_els[open_els.length - 1]
+ insert_comment(t, [first, first.children.length])
+ return
+ }
+ if (t.type === TYPE_DOCTYPE) {
+ parse_error()
+ return
+ }
+ if (t.type === TYPE_START_TAG && t.name === 'html') {
+ ins_mode_in_body(t)
+ return
+ }
+ if (t.type === TYPE_END_TAG && t.name === 'html') {
+ if (flag_fragment_parsing) {
+ parse_error()
+ return
+ }
+ ins_mode = ins_mode_after_after_body
+ return
+ }
+ if (t.type === TYPE_EOF) {
+ stop_parsing()
+ return
+ }
+ // Anything ELse
+ parse_error()
+ ins_mode = ins_mode_in_body
+ process_token(t)
+ }
+
+ // 8.2.5.4.20 http://www.w3.org/TR/html5/syntax.html#parsing-main-inframeset
+ ins_mode_in_frameset = function (t) {
+ if (is_space_tok(t)) {
+ insert_character(t)
+ return
+ }
+ if (t.type === TYPE_COMMENT) {
+ insert_comment(t)
+ return
+ }
+ if (t.type === TYPE_DOCTYPE) {
+ parse_error()
+ return
+ }
+ if (t.type === TYPE_START_TAG && t.name === 'html') {
+ ins_mode_in_body(t)
+ return
+ }
+ if (t.type === TYPE_START_TAG && t.name === 'frameset') {
+ insert_html_element(t)
+ return
+ }
+ if (t.type === TYPE_END_TAG && t.name === 'frameset') {
+ if (open_els.length === 1) {
+ parse_error()
+ return // fragment case
+ }
+ open_els.shift()
+ if (flag_fragment_parsing === false && open_els[0].name !== 'frameset') {
+ ins_mode = ins_mode_after_frameset
+ }
+ return
+ }
+ if (t.type === TYPE_START_TAG && t.name === 'frame') {
+ insert_html_element(t)
+ open_els.shift()
+ t.acknowledge_self_closing()
+ return
+ }
+ if (t.type === TYPE_START_TAG && t.name === 'noframes') {
+ ins_mode_in_head(t)
+ return
+ }
+ if (t.type === TYPE_EOF) {
+ if (open_els.length !== 1) {
+ parse_error()
+ }
+ stop_parsing()
+ return
+ }
+ // Anything else
+ parse_error()
+ }
+
+ // 8.2.5.4.21 http://www.w3.org/TR/html5/syntax.html#parsing-main-afterframeset
+ ins_mode_after_frameset = function (t) {
+ if (is_space_tok(t)) {
+ insert_character(t)
+ return
+ }
+ if (t.type === TYPE_COMMENT) {
+ insert_comment(t)
+ return
+ }
+ if (t.type === TYPE_DOCTYPE) {
+ parse_error()
+ return
+ }
+ if (t.type === TYPE_START_TAG && t.name === 'html') {
+ ins_mode_in_body(t)
+ return
+ }
+ if (t.type === TYPE_END_TAG && t.name === 'html') {
+ ins_mode = ins_mode_after_after_frameset
+ return
+ }
+ if (t.type === TYPE_START_TAG && t.name === 'noframes') {
+ ins_mode_in_head(t)
+ return
+ }
+ if (t.type === TYPE_EOF) {
+ stop_parsing()
+ return
+ }
+ // Anything else
+ parse_error()
+ }
+
+ // 8.2.5.4.22 http://www.w3.org/TR/html5/syntax.html#the-after-after-body-insertion-mode
+ ins_mode_after_after_body = function (t) {
+ if (t.type === TYPE_COMMENT) {
+ insert_comment(t, [doc, doc.children.length])
+ return
+ }
+ if (t.type === TYPE_DOCTYPE || is_space_tok(t) || (t.type === TYPE_START_TAG && t.name === 'html')) {
+ ins_mode_in_body(t)
+ return
+ }
+ if (t.type === TYPE_EOF) {
+ stop_parsing()
+ return
+ }
+ // Anything else
+ parse_error()
+ ins_mode = ins_mode_in_body
+ process_token(t)
+ }
+
+ // 8.2.5.4.23 http://www.w3.org/TR/html5/syntax.html#the-after-after-frameset-insertion-mode
+ ins_mode_after_after_frameset = function (t) {
+ if (t.type === TYPE_COMMENT) {
+ insert_comment(t, [doc, doc.children.length])
+ return
+ }
+ if (t.type === TYPE_DOCTYPE || is_space_tok(t) || (t.type === TYPE_START_TAG && t.name === 'html')) {
+ ins_mode_in_body(t)
+ return
+ }
+ if (t.type === TYPE_EOF) {
+ stop_parsing()
+ return
+ }
+ if (t.type === TYPE_START_TAG && t.name === 'noframes') {
+ ins_mode_in_head(t)
+ return
+ }
+ // Anything else
+ parse_error()
+ return
+ }
+
+ // 8.2.5.5 http://www.w3.org/TR/html5/syntax.html#parsing-main-inforeign
+ has_color_face_or_size = function (t) {
+ var a, i
+ for (i = 0; i < t.attrs_a.length; ++i) {
+ a = t.attrs_a[i]
+ if (a[0] === 'color' || a[0] === 'face' || a[0] === 'size') {
+ return true
+ }
+ }
+ return false
+ }
+ in_foreign_content_end_script = function () {
+ open_els.shift()
+ // fixfull
+ }
+ in_foreign_content_other_start = function (t) {
+ var acn
+ acn = adjusted_current_node()
+ if (acn.namespace === NS_MATHML) {
+ adjust_mathml_attributes(t)
+ }
+ if (acn.namespace === NS_SVG && (svg_name_fixes[t.name] != null)) { // extra perens because Coffeescript did
+ t.name = svg_name_fixes[t.name]
+ }
+ if (acn.namespace === NS_SVG) {
+ adjust_svg_attributes(t)
+ }
+ adjust_foreign_attributes(t)
+ insert_foreign_element(t, acn.namespace)
+ if (t.flag('self-closing')) {
+ if (t.name === 'script') {
+ t.acknowledge_self_closing()
+ in_foreign_content_end_script()
+ // fixfull
+ } else {
+ open_els.shift()
+ t.acknowledge_self_closing()
+ }
+ }
+ }
+ in_foreign_content = function (t) {
+ var el, i, node
+ if (t.type === TYPE_TEXT && t.text === "\u0000") {
+ parse_error()
+ insert_character(new_character_token("\ufffd"))
+ return
+ }
+ if (is_space_tok(t)) {
+ insert_character(t)
+ return
+ }
+ if (t.type === TYPE_TEXT) {
+ flag_frameset_ok = false
+ insert_character(t)
+ return
+ }
+ if (t.type === TYPE_COMMENT) {
+ insert_comment(t)
+ return
+ }
+ if (t.type === TYPE_DOCTYPE) {
+ parse_error()
+ return
+ }
+ if (t.type === TYPE_START_TAG && (t.name === 'b' || t.name === 'big' || t.name === 'blockquote' || t.name === 'body' || t.name === 'br' || t.name === 'center' || t.name === 'code' || t.name === 'dd' || t.name === 'div' || t.name === 'dl' || t.name === 'dt' || t.name === 'em' || t.name === 'embed' || t.name === 'h1' || t.name === 'h2' || t.name === 'h3' || t.name === 'h4' || t.name === 'h5' || t.name === 'h6' || t.name === 'head' || t.name === 'hr' || t.name === 'i' || t.name === 'img' || t.name === 'li' || t.name === 'listing' || t.name === 'main' || t.name === 'meta' || t.name === 'nobr' || t.name === 'ol' || t.name === 'p' || t.name === 'pre' || t.name === 'ruby' || t.name === 's' || t.name === 'small' || t.name === 'span' || t.name === 'strong' || t.name === 'strike' || t.name === 'sub' || t.name === 'sup' || t.name === 'table' || t.name === 'tt' || t.name === 'u' || t.name === 'ul' || t.name === 'var' || (t.name === 'font' && has_color_face_or_size(t)))) {
+ parse_error()
+ if (flag_fragment_parsing) {
+ in_foreign_content_other_start(t)
+ return
+ }
+ while (true) { // is this safe?
+ open_els.shift()
+ if (is_mathml_text_integration_point(open_els[0]) || is_html_integration(open_els[0]) || open_els[0].namespace === NS_HTML) {
+ break
+ }
+ }
+ process_token(t)
+ return
+ }
+ if (t.type === TYPE_START_TAG) {
+ in_foreign_content_other_start(t)
+ return
+ }
+ if (t.type === TYPE_END_TAG && t.name === 'script' && open_els[0].name === 'script' && open_els[0].namespace === NS_SVG) {
+ in_foreign_content_end_script()
+ return
+ }
+ if (t.type === TYPE_END_TAG) {
+ i = 0
+ node = open_els[i]
+ if (node.name.toLowerCase() !== t.name) {
+ parse_error()
+ }
+ while (true) {
+ if (node === open_els[open_els.length - 1]) {
+ return
+ }
+ if (node.name.toLowerCase() === t.name) {
+ while (true) {
+ el = open_els.shift()
+ if (el === node) {
+ return
+ }
+ }
+ }
+ i += 1
+ node = open_els[i]
+ if (node.namespace === NS_HTML) {
+ break
+ }
+ }
+ ins_mode(t) // explicitly call HTML insertion mode
+ }
+ }
+
+
+ // 8.2.4.1 http://www.w3.org/TR/html5/syntax.html#data-state
+ tok_state_data = function () {
+ var c
+ switch (c = txt.charAt(cur++)) {
+ case '&':
+ return new_text_node(parse_character_reference())
+ break
+ case '<':
+ tok_state = tok_state_tag_open
+ break
+ case "\u0000":
+ parse_error()
+ return new_text_node(c)
+ break
+ case '': // EOF
+ return new_eof_token()
+ break
+ default:
+ return new_text_node(c)
+ }
+ return null
+ }
+
+ // 8.2.4.2 http://www.w3.org/TR/html5/syntax.html#character-reference-in-data-state
+ // not needed: tok_state_character_reference_in_data = function () {
+ // just call parse_character_reference()
+
+ // 8.2.4.3 http://www.w3.org/TR/html5/syntax.html#rcdata-state
+ tok_state_rcdata = function () {
+ var c
+ switch (c = txt.charAt(cur++)) {
+ case '&':
+ return new_text_node(parse_character_reference())
+ break
+ case '<':
+ tok_state = tok_state_rcdata_less_than_sign
+ break
+ case "\u0000":
+ parse_error()
+ return new_character_token("\ufffd")
+ break
+ case '': // EOF
+ return new_eof_token()
+ break
+ default:
+ return new_character_token(c)
+ }
+ return null
+ }
+
+ // 8.2.4.4 http://www.w3.org/TR/html5/syntax.html#character-reference-in-rcdata-state
+ // not needed: tok_state_character_reference_in_rcdata = function () {
+ // just call parse_character_reference()
+
+ // 8.2.4.5 http://www.w3.org/TR/html5/syntax.html#rawtext-state
+ tok_state_rawtext = function () {
+ var c
+ switch (c = txt.charAt(cur++)) {
+ case '<':
+ tok_state = tok_state_rawtext_less_than_sign
+ break
+ case "\u0000":
+ parse_error()
+ return new_character_token("\ufffd")
+ break
+ case '': // EOF
+ return new_eof_token()
+ break
+ default:
+ return new_character_token(c)
+ }
+ return null
+ }
+
+ // 8.2.4.6 http://www.w3.org/TR/html5/syntax.html#script-data-state
+ tok_state_script_data = function () {
+ var c
+ switch (c = txt.charAt(cur++)) {
+ case '<':
+ tok_state = tok_state_script_data_less_than_sign
+ break
+ case "\u0000":
+ parse_error()
+ return new_character_token("\ufffd")
+ break
+ case '': // EOF
+ return new_eof_token()
+ break
+ default:
+ return new_character_token(c)
+ }
+ return null
+ }
+
+ // 8.2.4.7 http://www.w3.org/TR/html5/syntax.html#plaintext-state
+ tok_state_plaintext = function () {
+ var c
+ switch (c = txt.charAt(cur++)) {
+ case "\u0000":
+ parse_error()
+ return new_character_token("\ufffd")
+ break
+ case '': // EOF
+ return new_eof_token()
+ break
+ default:
+ return new_character_token(c)
+ }
+ return null
+ }
+
+ // 8.2.4.8 http://www.w3.org/TR/html5/syntax.html#tag-open-state
+ tok_state_tag_open = function () {
+ var c
+ c = txt.charAt(cur++)
+ if (c === '!') {
+ tok_state = tok_state_markup_declaration_open
+ return
+ }
+ if (c === '/') {
+ tok_state = tok_state_end_tag_open
+ return
+ }
+ if (is_uc_alpha(c)) {
+ tok_cur_tag = new_open_tag(c.toLowerCase())
+ tok_state = tok_state_tag_name
+ return
+ }
+ if (is_lc_alpha(c)) {
+ tok_cur_tag = new_open_tag(c)
+ tok_state = tok_state_tag_name
+ return
+ }
+ if (c === '?') {
+ parse_error()
+ tok_cur_tag = new_comment_token('?') // FIXME right?
+ tok_state = tok_state_bogus_comment
+ return
+ }
+ // Anything else
+ parse_error()
+ tok_state = tok_state_data
+ cur -= 1 // we didn't parse/handle the char after <
+ return new_text_node('<')
+ }
+
+ // 8.2.4.9 http://www.w3.org/TR/html5/syntax.html#end-tag-open-state
+ tok_state_end_tag_open = function () {
+ var c
+ c = txt.charAt(cur++)
+ if (is_uc_alpha(c)) {
+ tok_cur_tag = new_end_tag(c.toLowerCase())
+ tok_state = tok_state_tag_name
+ return
+ }
+ if (is_lc_alpha(c)) {
+ tok_cur_tag = new_end_tag(c)
+ tok_state = tok_state_tag_name
+ return
+ }
+ if (c === '>') {
+ parse_error()
+ tok_state = tok_state_data
+ return
+ }
+ if (c === '') { // EOF
+ parse_error()
+ tok_state = tok_state_data
+ return new_text_node('</')
+ }
+ // Anything else
+ parse_error()
+ tok_cur_tag = new_comment_token(c)
+ tok_state = tok_state_bogus_comment
+ return null
+ }
+
+ // 8.2.4.10 http://www.w3.org/TR/html5/syntax.html#tag-name-state
+ tok_state_tag_name = function () {
+ var c, tmp
+ switch (c = txt.charAt(cur++)) {
+ case "\t":
+ case "\n":
+ case "\u000c":
+ case ' ':
+ tok_state = tok_state_before_attribute_name
+ break
+ case '/':
+ tok_state = tok_state_self_closing_start_tag
+ break
+ case '>':
+ tok_state = tok_state_data
+ tmp = tok_cur_tag
+ tok_cur_tag = null
+ return tmp
+ break
+ case "\u0000":
+ parse_error()
+ tok_cur_tag.name += "\ufffd"
+ break
+ case '': // EOF
+ parse_error()
+ tok_state = tok_state_data
+ break
+ default:
+ if (is_uc_alpha(c)) {
+ tok_cur_tag.name += c.toLowerCase()
+ } else {
+ tok_cur_tag.name += c
+ }
+ }
+ return null
+ }
+
+ // 8.2.4.11 http://www.w3.org/TR/html5/syntax.html#rcdata-less-than-sign-state
+ tok_state_rcdata_less_than_sign = function () {
+ var c
+ c = txt.charAt(cur++)
+ if (c === '/') {
+ temporary_buffer = ''
+ tok_state = tok_state_rcdata_end_tag_open
+ return null
+ }
+ // Anything else
+ tok_state = tok_state_rcdata
+ cur -= 1 // reconsume the input character
+ return new_character_token('<')
+ }
+
+ // 8.2.4.12 http://www.w3.org/TR/html5/syntax.html#rcdata-end-tag-open-state
+ tok_state_rcdata_end_tag_open = function () {
+ var c
+ c = txt.charAt(cur++)
+ if (is_uc_alpha(c)) {
+ tok_cur_tag = new_end_tag(c.toLowerCase())
+ temporary_buffer += c
+ tok_state = tok_state_rcdata_end_tag_name
+ return null
+ }
+ if (is_lc_alpha(c)) {
+ tok_cur_tag = new_end_tag(c)
+ temporary_buffer += c
+ tok_state = tok_state_rcdata_end_tag_name
+ return null
+ }
+ // Anything else
+ tok_state = tok_state_rcdata
+ cur -= 1 // reconsume the input character
+ return new_character_token("</") // fixfull separate these
+ }
+
+ // http://www.w3.org/TR/html5/syntax.html#appropriate-end-tag-token
+ is_appropriate_end_tag = function (t) {
+ // fixfull: this assumes that open_els[0].name is "the tag name of the last
+ // start tag to have been emitted from this tokenizer"
+ return t.type === TYPE_END_TAG && t.name === open_els[0].name
+ }
+
+ // 8.2.4.13 http://www.w3.org/TR/html5/syntax.html#rcdata-end-tag-name-state
+ tok_state_rcdata_end_tag_name = function () {
+ var c
+ c = txt.charAt(cur++)
+ if (c === "\t" || c === "\n" || c === "\u000c" || c === ' ') {
+ if (is_appropriate_end_tag(tok_cur_tag)) {
+ tok_state = tok_state_before_attribute_name
+ return
+ }
+ // else fall through to "Anything else"
+ }
+ if (c === '/') {
+ if (is_appropriate_end_tag(tok_cur_tag)) {
+ tok_state = tok_state_self_closing_start_tag // FIXME spec typo?
+ return
+ }
+ // else fall through to "Anything else"
+ }
+ if (c === '>') {
+ if (is_appropriate_end_tag(tok_cur_tag)) {
+ tok_state = tok_state_data
+ return tok_cur_tag
+ }
+ // else fall through to "Anything else"
+ }
+ if (is_uc_alpha(c)) {
+ tok_cur_tag.name += c.toLowerCase()
+ temporary_buffer += c
+ return null
+ }
+ if (is_lc_alpha(c)) {
+ tok_cur_tag.name += c
+ temporary_buffer += c
+ return null
+ }
+ // Anything else
+ tok_state = tok_state_rcdata
+ cur -= 1 // reconsume the input character
+ return new_character_token('</' + temporary_buffer) // fixfull separate these
+ }
+
+ // 8.2.4.14 http://www.w3.org/TR/html5/syntax.html#rawtext-less-than-sign-state
+ tok_state_rawtext_less_than_sign = function () {
+ var c
+ c = txt.charAt(cur++)
+ if (c === '/') {
+ temporary_buffer = ''
+ tok_state = tok_state_rawtext_end_tag_open
+ return null
+ }
+ // Anything else
+ tok_state = tok_state_rawtext
+ cur -= 1 // reconsume the input character
+ return new_character_token('<')
+ }
+
+ // 8.2.4.15 http://www.w3.org/TR/html5/syntax.html#rawtext-end-tag-open-state
+ tok_state_rawtext_end_tag_open = function () {
+ c = txt.charAt(cur++)
+ if (is_uc_alpha(c)) {
+ tok_cur_tag = new_end_tag(c.toLowerCase())
+ temporary_buffer += c
+ tok_state = tok_state_rawtext_end_tag_name
+ return null
+ }
+ if (is_lc_alpha(c)) {
+ tok_cur_tag = new_end_tag(c)
+ temporary_buffer += c
+ tok_state = tok_state_rawtext_end_tag_name
+ return null
+ }
+ // Anything else
+ tok_state = tok_state_rawtext
+ cur -= 1 // reconsume the input character
+ return new_character_token("</") // fixfull separate these
+ }
+
+ // 8.2.4.16 http://www.w3.org/TR/html5/syntax.html#rawtext-end-tag-name-state
+ tok_state_rawtext_end_tag_name = function () {
+ var c
+ c = txt.charAt(cur++)
+ if (c === "\t" || c === "\n" || c === "\u000c" || c === ' ') {
+ if (is_appropriate_end_tag(tok_cur_tag)) {
+ tok_state = tok_state_before_attribute_name
+ return
+ }
+ // else fall through to "Anything else"
+ }
+ if (c === '/') {
+ if (is_appropriate_end_tag(tok_cur_tag)) {
+ tok_state = tok_state_self_closing_start_tag
+ return
+ }
+ // else fall through to "Anything else"
+ }
+ if (c === '>') {
+ if (is_appropriate_end_tag(tok_cur_tag)) {
+ tok_state = tok_state_data
+ return tok_cur_tag
+ }
+ // else fall through to "Anything else"
+ }
+ if (is_uc_alpha(c)) {
+ tok_cur_tag.name += c.toLowerCase()
+ temporary_buffer += c
+ return null
+ }
+ if (is_lc_alpha(c)) {
+ tok_cur_tag.name += c
+ temporary_buffer += c
+ return null
+ }
+ // Anything else
+ tok_state = tok_state_rawtext
+ cur -= 1 // reconsume the input character
+ return new_character_token('</' + temporary_buffer) // fixfull separate these
+ }
+
+ // 8.2.4.17 http://www.w3.org/TR/html5/syntax.html#script-data-less-than-sign-state
+ tok_state_script_data_less_than_sign = function () {
+ var c
+ c = txt.charAt(cur++)
+ if (c === '/') {
+ temporary_buffer = ''
+ tok_state = tok_state_script_data_end_tag_open
+ return
+ }
+ if (c === '!') {
+ tok_state = tok_state_script_data_escape_start
+ return new_character_token('<!') // fixfull split
+ }
+ // Anything else
+ tok_state = tok_state_script_data
+ cur -= 1 // reconsume
+ return new_character_token('<')
+ }
+
+ // 8.2.4.18 http://www.w3.org/TR/html5/syntax.html#script-data-end-tag-open-state
+ tok_state_script_data_end_tag_open = function () {
+ var c
+ c = txt.charAt(cur++)
+ if (is_uc_alpha(c)) {
+ tok_cur_tag = new_end_tag(c.toLowerCase())
+ temporary_buffer += c
+ tok_state = tok_state_script_data_end_tag_name
+ return
+ }
+ if (is_lc_alpha(c)) {
+ tok_cur_tag = new_end_tag(c)
+ temporary_buffer += c
+ tok_state = tok_state_script_data_end_tag_name
+ return
+ }
+ // Anything else
+ tok_state = tok_state_script_data
+ cur -= 1 // reconsume
+ return new_character_token('</')
+ }
+
+ // 8.2.4.19 http://www.w3.org/TR/html5/syntax.html#script-data-end-tag-open-state
+ tok_state_script_data_end_tag_name = function () {
+ var c
+ c = txt.charAt(cur++)
+ if (c === "\t" || c === "\n" || c === "\u000c" || c === ' ') {
+ if (is_appropriate_end_tag(tok_cur_tag)) {
+ tok_state = tok_state_before_attribute_name
+ return
+ }
+ // fall through
+ }
+ if (c === '/') {
+ if (is_appropriate_end_tag(tok_cur_tag)) {
+ tok_state = tok_state_self_closing_start_tag
+ return
+ }
+ // fall through
+ }
+ if (c === '>') {
+ if (is_appropriate_end_tag(tok_cur_tag)) {
+ tok_state = tok_state_data
+ return tok_cur_tag
+ }
+ // fall through
+ }
+ if (is_uc_alpha(c)) {
+ tok_cur_tag.name += c.toLowerCase()
+ temporary_buffer += c
+ return
+ }
+ if (is_lc_alpha(c)) {
+ tok_cur_tag.name += c
+ temporary_buffer += c
+ return
+ }
+ // Anything else
+ tok_state = tok_state_script_data
+ cur -= 1 // reconsume
+ return new_character_token("</" + temporary_buffer) // fixfull split
+ }
+
+ // 8.2.4.20 http://www.w3.org/TR/html5/syntax.html#script-data-escape-start-state
+ tok_state_script_data_escape_start = function () {
+ var c
+ c = txt.charAt(cur++)
+ if (c === '-') {
+ tok_state = tok_state_script_data_escape_start_dash
+ return new_character_token('-')
+ }
+ // Anything else
+ tok_state = tok_state_script_data
+ cur -= 1 // reconsume
+ }
+
+ // 8.2.4.21 http://www.w3.org/TR/html5/syntax.html#script-data-escape-start-dash-state
+ tok_state_script_data_escape_start_dash = function () {
+ var c
+ c = txt.charAt(cur++)
+ if (c === '-') {
+ tok_state = tok_state_script_data_escaped_dash_dash
+ return new_character_token('-')
+ }
+ // Anything else
+ tok_state = tok_state_script_data
+ cur -= 1 // reconsume
+ }
+
+ // 8.2.4.22 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-state
+ tok_state_script_data_escaped = function () {
+ var c
+ c = txt.charAt(cur++)
+ if (c === '-') {
+ tok_state = tok_state_script_data_escaped_dash
+ return new_character_token('-')
+ }
+ if (c === '<') {
+ tok_state = tok_state_script_data_escaped_less_than_sign
+ return
+ }
+ if (c === "\u0000") {
+ parse_error()
+ return new_character_token("\ufffd")
+ }
+ if (c === '') { // EOF
+ tok_state = tok_state_data
+ parse_error()
+ cur -= 1 // reconsume
+ return
+ }
+ // Anything else
+ return new_character_token(c)
+ }
+
+ // 8.2.4.23 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-dash-state
+ tok_state_script_data_escaped_dash = function () {
+ var c
+ c = txt.charAt(cur++)
+ if (c === '-') {
+ tok_state = tok_state_script_data_escaped_dash_dash
+ return new_character_token('-')
+ }
+ if (c === '<') {
+ tok_state = tok_state_script_data_escaped_less_than_sign
+ return
+ }
+ if (c === "\u0000") {
+ parse_error()
+ tok_state = tok_state_script_data_escaped
+ return new_character_token("\ufffd")
+ }
+ if (c === '') { // EOF
+ tok_state = tok_state_data
+ parse_error()
+ cur -= 1 // reconsume
+ return
+ }
+ // Anything else
+ tok_state = tok_state_script_data_escaped
+ return new_character_token(c)
+ }
+
+ // 8.2.4.24 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-dash-dash-state
+ tok_state_script_data_escaped_dash_dash = function () {
+ var c
+ c = txt.charAt(cur++)
+ if (c === '-') {
+ return new_character_token('-')
+ }
+ if (c === '<') {
+ tok_state = tok_state_script_data_escaped_less_than_sign
+ return
+ }
+ if (c === '>') {
+ tok_state = tok_state_script_data
+ return new_character_token('>')
+ }
+ if (c === "\u0000") {
+ parse_error()
+ tok_state = tok_state_script_data_escaped
+ return new_character_token("\ufffd")
+ }
+ if (c === '') { // EOF
+ parse_error()
+ tok_state = tok_state_data
+ cur -= 1 // reconsume
+ return
+ }
+ // Anything else
+ tok_state = tok_state_script_data_escaped
+ return new_character_token(c)
+ }
+
+ // 8.2.4.25 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-less-than-sign-state
+ tok_state_script_data_escaped_less_than_sign = function () {
+ var c
+ c = txt.charAt(cur++)
+ if (c === '/') {
+ temporary_buffer = ''
+ tok_state = tok_state_script_data_escaped_end_tag_open
+ return
+ }
+ if (is_uc_alpha(c)) {
+ temporary_buffer = c.toLowerCase() // yes, really
+ tok_state = tok_state_script_data_double_escape_start
+ return new_character_token("<" + c) // fixfull split
+ }
+ if (is_lc_alpha(c)) {
+ temporary_buffer = c
+ tok_state = tok_state_script_data_double_escape_start
+ return new_character_token("<" + c) // fixfull split
+ }
+ // Anything else
+ tok_state = tok_state_script_data_escaped
+ cur -= 1 // reconsume
+ return new_character_token('<')
+ }
+
+ // 8.2.4.26 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-end-tag-open-state
+ tok_state_script_data_escaped_end_tag_open = function () {
+ var c
+ c = txt.charAt(cur++)
+ if (is_uc_alpha(c)) {
+ tok_cur_tag = new_end_tag(c.toLowerCase())
+ temporary_buffer += c
+ tok_state = tok_state_script_data_escaped_end_tag_name
+ return
+ }
+ if (is_lc_alpha(c)) {
+ tok_cur_tag = new_end_tag(c)
+ temporary_buffer += c
+ tok_state = tok_state_script_data_escaped_end_tag_name
+ return
+ }
+ // Anything else
+ tok_state = tok_state_script_data_escaped
+ cur -= 1 // reconsume
+ return new_character_token('</') // fixfull split
+ }
+
+ // 8.2.4.27 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-end-tag-name-state
+ tok_state_script_data_escaped_end_tag_name = function () {
+ var c
+ c = txt.charAt(cur++)
+ if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ') {
+ if (is_appropriate_end_tag(tok_cur_tag)) {
+ tok_state = tok_state_before_attribute_name
+ return
+ }
+ // fall through
+ }
+ if (c === '/') {
+ if (is_appropriate_end_tag(tok_cur_tag)) {
+ tok_state = tok_state_self_closing_start_tag
+ return
+ }
+ // fall through
+ }
+ if (c === '>') {
+ if (is_appropriate_end_tag(tok_cur_tag)) {
+ tok_state = tok_state_data
+ return tok_cur_tag
+ }
+ // fall through
+ }
+ if (is_uc_alpha(c)) {
+ tok_cur_tag.name += c.toLowerCase()
+ temporary_buffer += c.toLowerCase()
+ return
+ }
+ if (is_lc_alpha(c)) {
+ tok_cur_tag.name += c
+ temporary_buffer += c.toLowerCase()
+ return
+ }
+ // Anything else
+ tok_state = tok_state_script_data_escaped
+ cur -= 1 // reconsume
+ return new_character_token("</" + temporary_buffer) // fixfull split
+ }
+
+ // 8.2.4.28 http://www.w3.org/TR/html5/syntax.html#script-data-double-escape-start-state
+ tok_state_script_data_double_escape_start = function () {
+ var c
+ c = txt.charAt(cur++)
+ if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ' || c === '/' || c === '>') {
+ if (temporary_buffer === 'script') {
+ tok_state = tok_state_script_data_double_escaped
+ } else {
+ tok_state = tok_state_script_data_escaped
+ }
+ return new_character_token(c)
+ }
+ if (is_uc_alpha(c)) {
+ temporary_buffer += c.toLowerCase() // yes, really lowercase
+ return new_character_token(c)
+ }
+ if (is_lc_alpha(c)) {
+ temporary_buffer += c
+ return new_character_token(c)
+ }
+ // Anything else
+ tok_state = tok_state_script_data_escaped
+ cur -= 1 // reconsume
+ }
+
+ // 8.2.4.29 http://www.w3.org/TR/html5/syntax.html#script-data-double-escaped-state
+ tok_state_script_data_double_escaped = function () {
+ var c
+ c = txt.charAt(cur++)
+ if (c === '-') {
+ tok_state = tok_state_script_data_double_escaped_dash
+ return new_character_token('-')
+ }
+ if (c === '<') {
+ tok_state = tok_state_script_data_double_escaped_less_than_sign
+ return new_character_token('<')
+ }
+ if (c === "\u0000") {
+ parse_error()
+ return new_character_token("\ufffd")
+ }
+ if (c === '') { // EOF
+ parse_error()
+ tok_state = tok_state_data
+ cur -= 1 // reconsume
+ return
+ }
+ // Anything else
+ return new_character_token(c)
+ }
+
+ // 8.2.4.30 http://www.w3.org/TR/html5/syntax.html#script-data-double-escaped-dash-state
+ tok_state_script_data_double_escaped_dash = function () {
+ var c
+ c = txt.charAt(cur++)
+ if (c === '-') {
+ tok_state = tok_state_script_data_double_escaped_dash_dash
+ return new_character_token('-')
+ }
+ if (c === '<') {
+ tok_state = tok_state_script_data_double_escaped_less_than_sign
+ return new_character_token('<')
+ }
+ if (c === "\u0000") {
+ parse_error()
+ tok_state = tok_state_script_data_double_escaped
+ return new_character_token("\ufffd")
+ }
+ if (c === '') { // EOF
+ parse_error()
+ tok_state = tok_state_data
+ cur -= 1 // reconsume
+ return
+ }
+ // Anything else
+ tok_state = tok_state_script_data_double_escaped
+ return new_character_token(c)
+ }
+
+ // 8.2.4.31 http://www.w3.org/TR/html5/syntax.html#script-data-double-escaped-dash-dash-state
+ tok_state_script_data_double_escaped_dash_dash = function () {
+ var c
+ c = txt.charAt(cur++)
+ if (c === '-') {
+ return new_character_token('-')
+ }
+ if (c === '<') {
+ tok_state = tok_state_script_data_double_escaped_less_than_sign
+ return new_character_token('<')
+ }
+ if (c === '>') {
+ tok_state = tok_state_script_data
+ return new_character_token('>')
+ }
+ if (c === "\u0000") {
+ parse_error()
+ tok_state = tok_state_script_data_double_escaped
+ return new_character_token("\ufffd")
+ }
+ if (c === '') { // EOF
+ parse_error()
+ tok_state = tok_state_data
+ cur -= 1 // reconsume
+ return
+ }
+ // Anything else
+ tok_state = tok_state_script_data_double_escaped
+ return new_character_token(c)
+ }
+
+ // 8.2.4.32 http://www.w3.org/TR/html5/syntax.html#script-data-double-escaped-less-than-sign-state
+ tok_state_script_data_double_escaped_less_than_sign = function () {
+ var c
+ c = txt.charAt(cur++)
+ if (c === '/') {
+ temporary_buffer = ''
+ tok_state = tok_state_script_data_double_escape_end
+ return new_character_token('/')
+ }
+ // Anything else
+ tok_state = tok_state_script_data_double_escaped
+ cur -= 1 // reconsume
+ }
+
+ // 8.2.4.33 http://www.w3.org/TR/html5/syntax.html#script-data-double-escape-end-state
+ tok_state_script_data_double_escape_end = function () {
+ var c
+ c = txt.charAt(cur++)
+ if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ' || c === '/' || c === '>') {
+ if (temporary_buffer === 'script') {
+ tok_state = tok_state_script_data_escaped
+ } else {
+ tok_state = tok_state_script_data_double_escaped
+ }
+ return new_character_token(c)
+ }
+ if (is_uc_alpha(c)) {
+ temporary_buffer += c.toLowerCase() // yes, really lowercase
+ return new_character_token(c)
+ }
+ if (is_lc_alpha(c)) {
+ temporary_buffer += c
+ return new_character_token(c)
+ }
+ // Anything else
+ tok_state = tok_state_script_data_double_escaped
+ cur -= 1 // reconsume
+ }
+
+ // 8.2.4.34 http://www.w3.org/TR/html5/syntax.html#before-attribute-name-state
+ tok_state_before_attribute_name = function () {
+ var attr_name, c, tmp
+ attr_name = null
+ switch (c = txt.charAt(cur++)) {
+ case "\t":
+ case "\n":
+ case "\u000c":
+ case ' ':
+ return null
+ break
+ case '/':
+ tok_state = tok_state_self_closing_start_tag
+ return null
+ break
+ case '>':
+ tok_state = tok_state_data
+ tmp = tok_cur_tag
+ tok_cur_tag = null
+ return tmp
+ break
+ case "\u0000":
+ parse_error()
+ attr_name = "\ufffd"
+ break
+ case '"':
+ case "'":
+ case '<':
+ case '=':
+ parse_error()
+ attr_name = c
+ break
+ case '': // EOF
+ parse_error()
+ tok_state = tok_state_data
+ break
+ default:
+ if (is_uc_alpha(c)) {
+ attr_name = c.toLowerCase()
+ } else {
+ attr_name = c
+ }
+ }
+ if (attr_name != null) {
+ tok_cur_tag.attrs_a.unshift([attr_name, ''])
+ tok_state = tok_state_attribute_name
+ }
+ return null
+ }
+
+ // 8.2.4.35 http://www.w3.org/TR/html5/syntax.html#attribute-name-state
+ tok_state_attribute_name = function () {
+ var c, tmp
+ switch (c = txt.charAt(cur++)) {
+ case "\t":
+ case "\n":
+ case "\u000c":
+ case ' ':
+ tok_state = tok_state_after_attribute_name
+ break
+ case '/':
+ tok_state = tok_state_self_closing_start_tag
+ break
+ case '=':
+ tok_state = tok_state_before_attribute_value
+ break
+ case '>':
+ tok_state = tok_state_data
+ tmp = tok_cur_tag
+ tok_cur_tag = null
+ return tmp
+ break
+ case "\u0000":
+ parse_error()
+ tok_cur_tag.attrs_a[0][0] += "\ufffd"
+ break
+ case '"':
+ case "'":
+ case '<':
+ parse_error()
+ tok_cur_tag.attrs_a[0][0] += c
+ break
+ case '': // EOF
+ parse_error()
+ tok_state = tok_state_data
+ break
+ default:
+ if (is_uc_alpha(c)) {
+ tok_cur_tag.attrs_a[0][0] += c.toLowerCase()
+ } else {
+ tok_cur_tag.attrs_a[0][0] += c
+ }
+ }
+ return null
+ }
+
+ // 8.2.4.36 http://www.w3.org/TR/html5/syntax.html#after-attribute-name-state
+ tok_state_after_attribute_name = function () {
+ var c
+ c = txt.charAt(cur++)
+ if (c === "\t" || c === "\n" || c === "\u000c" || c === ' ') {
+ return
+ }
+ if (c === '/') {
+ tok_state = tok_state_self_closing_start_tag
+ return
+ }
+ if (c === '=') {
+ tok_state = tok_state_before_attribute_value
+ return
+ }
+ if (c === '>') {
+ tok_state = tok_state_data
+ return tok_cur_tag
+ }
+ if (is_uc_alpha(c)) {
+ tok_cur_tag.attrs_a.unshift([c.toLowerCase(), ''])
+ tok_state = tok_state_attribute_name
+ return
+ }
+ if (c === "\u0000") {
+ parse_error()
+ tok_cur_tag.attrs_a.unshift(["\ufffd", ''])
+ tok_state = tok_state_attribute_name
+ return
+ }
+ if (c === '') { // EOF
+ parse_error()
+ tok_state = tok_state_data
+ cur -= 1 // reconsume
+ return
+ }
+ if (c === '"' || c === "'" || c === '<') {
+ parse_error()
+ // fall through to Anything else
+ }
+ // Anything else
+ tok_cur_tag.attrs_a.unshift([c, ''])
+ tok_state = tok_state_attribute_name
+ }
+
+ // 8.2.4.37 http://www.w3.org/TR/html5/syntax.html#before-attribute-value-state
+ tok_state_before_attribute_value = function () {
+ var c, tmp
+ switch (c = txt.charAt(cur++)) {
+ case "\t":
+ case "\n":
+ case "\u000c":
+ case ' ':
+ return null
+ break
+ case '"':
+ tok_state = tok_state_attribute_value_double_quoted
+ break
+ case '&':
+ tok_state = tok_state_attribute_value_unquoted
+ cur -= 1
+ break
+ case "'":
+ tok_state = tok_state_attribute_value_single_quoted
+ break
+ case "\u0000":
+ // Parse error
+ tok_cur_tag.attrs_a[0][1] += "\ufffd"
+ tok_state = tok_state_attribute_value_unquoted
+ break
+ case '>':
+ // Parse error
+ tok_state = tok_state_data
+ tmp = tok_cur_tag
+ tok_cur_tag = null
+ return tmp
+ break
+ case '': // EOF
+ parse_error()
+ tok_state = tok_state_data
+ break
+ default:
+ tok_cur_tag.attrs_a[0][1] += c
+ tok_state = tok_state_attribute_value_unquoted
+ }
+ return null
+ }
+
+ // 8.2.4.38 http://www.w3.org/TR/html5/syntax.html#attribute-value-(double-quoted)-state
+ tok_state_attribute_value_double_quoted = function () {
+ var c
+ switch (c = txt.charAt(cur++)) {
+ case '"':
+ tok_state = tok_state_after_attribute_value_quoted
+ break
+ case '&':
+ tok_cur_tag.attrs_a[0][1] += parse_character_reference('"', true)
+ break
+ case "\u0000":
+ // Parse error
+ tok_cur_tag.attrs_a[0][1] += "\ufffd"
+ break
+ case '': // EOF
+ parse_error()
+ tok_state = tok_state_data
+ break
+ default:
+ tok_cur_tag.attrs_a[0][1] += c
+ }
+ return null
+ }
+
+ // 8.2.4.39 http://www.w3.org/TR/html5/syntax.html#attribute-value-(single-quoted)-state
+ tok_state_attribute_value_single_quoted = function () {
+ var c
+ switch (c = txt.charAt(cur++)) {
+ case "'":
+ tok_state = tok_state_after_attribute_value_quoted
+ break
+ case '&':
+ tok_cur_tag.attrs_a[0][1] += parse_character_reference("'", true)
+ break
+ case "\u0000":
+ // Parse error
+ tok_cur_tag.attrs_a[0][1] += "\ufffd"
+ break
+ case '': // EOF
+ parse_error()
+ tok_state = tok_state_data
+ break
+ default:
+ tok_cur_tag.attrs_a[0][1] += c
+ }
+ return null
+ }
+
+ // 8.2.4.40 http://www.w3.org/TR/html5/syntax.html#attribute-value-(unquoted)-state
+ tok_state_attribute_value_unquoted = function () {
+ var c, tmp
+ switch (c = txt.charAt(cur++)) {
+ case "\t":
+ case "\n":
+ case "\u000c":
+ case ' ':
+ tok_state = tok_state_before_attribute_name
+ break
+ case '&':
+ tok_cur_tag.attrs_a[0][1] += parse_character_reference('>', true)
+ break
+ case '>':
+ tok_state = tok_state_data
+ tmp = tok_cur_tag
+ tok_cur_tag = null
+ return tmp
+ break
+ case "\u0000":
+ tok_cur_tag.attrs_a[0][1] += "\ufffd"
+ break
+ case '': // EOF
+ parse_error()
+ tok_state = tok_state_data
+ break
+ default:
+ // Parse Error if ', <, = or ` (backtick)
+ tok_cur_tag.attrs_a[0][1] += c
+ }
+ return null
+ }
+
+ // 8.2.4.42 http://www.w3.org/TR/html5/syntax.html#after-attribute-value-(quoted)-state
+ tok_state_after_attribute_value_quoted = function () {
+ var c, tmp
+ switch (c = txt.charAt(cur++)) {
+ case "\t":
+ case "\n":
+ case "\u000c":
+ case ' ':
+ tok_state = tok_state_before_attribute_name
+ break
+ case '/':
+ tok_state = tok_state_self_closing_start_tag
+ break
+ case '>':
+ tok_state = tok_state_data
+ tmp = tok_cur_tag
+ tok_cur_tag = null
+ return tmp
+ break
+ case '': // EOF
+ parse_error()
+ tok_state = tok_state_data
+ break
+ default:
+ // Parse Error
+ tok_state = tok_state_before_attribute_name
+ cur -= 1 // we didn't handle that char
+ }
+ return null
+ }
+
+ // 8.2.4.43 http://www.w3.org/TR/html5/syntax.html#self-closing-start-tag-state
+ tok_state_self_closing_start_tag = function () {
+ var c
+ c = txt.charAt(cur++)
+ if (c === '>') {
+ tok_cur_tag.flag('self-closing', true)
+ tok_state = tok_state_data
+ return tok_cur_tag
+ }
+ if (c === '') {
+ parse_error()
+ tok_state = tok_state_data
+ cur -= 1 // reconsume
+ return
+ }
+ // Anything else
+ parse_error()
+ tok_state = tok_state_before_attribute_name
+ cur -= 1 // reconsume
+ }
+
+ // 8.2.4.44 http://www.w3.org/TR/html5/syntax.html#bogus-comment-state
+ // WARNING: put a comment token in tok_cur_tag before setting this state
+ tok_state_bogus_comment = function () {
+ var next_gt, val
+ next_gt = txt.indexOf('>', cur)
+ if (next_gt === -1) {
+ val = txt.substr(cur)
+ cur = txt.length
+ } else {
+ val = txt.substr(cur, next_gt - cur)
+ cur = next_gt + 1
+ }
+ val = val.replace(new RegExp("\u0000", 'g'), "\ufffd")
+ tok_cur_tag.text += val
+ tok_state = tok_state_data
+ return tok_cur_tag
+ }
+
+ // 8.2.4.45 http://www.w3.org/TR/html5/syntax.html#markup-declaration-open-state
+ tok_state_markup_declaration_open = function () {
+ var acn
+ if (txt.substr(cur, 2) === '--') {
+ cur += 2
+ tok_cur_tag = new_comment_token('')
+ tok_state = tok_state_comment_start
+ return
+ }
+ if (txt.substr(cur, 7).toLowerCase() === 'doctype') {
+ cur += 7
+ tok_state = tok_state_doctype
+ return
+ }
+ acn = adjusted_current_node()
+ if (acn && acn.namespace !== NS_HTML && txt.substr(cur, 7) === '[CDATA[') {
+ cur += 7
+ tok_state = tok_state_cdata_section
+ return
+ }
+ // Otherwise
+ parse_error()
+ tok_cur_tag = new_comment_token('')
+ tok_state = tok_state_bogus_comment
+ }
+
+ // 8.2.4.46 http://www.w3.org/TR/html5/syntax.html#comment-start-state
+ tok_state_comment_start = function () {
+ var c
+ switch (c = txt.charAt(cur++)) {
+ case '-':
+ tok_state = tok_state_comment_start_dash
+ break
+ case "\u0000":
+ parse_error()
+ tok_state = tok_state_comment
+ return new_character_token("\ufffd")
+ break
+ case '>':
+ parse_error()
+ tok_state = tok_state_data
+ return tok_cur_tag
+ break
+ case '': // EOF
+ parse_error()
+ tok_state = tok_state_data
+ cur -= 1 // reconsume
+ return tok_cur_tag
+ break
+ default:
+ tok_cur_tag.text += c
+ tok_state = tok_state_comment
+ }
+ return null
+ }
+
+ // 8.2.4.47 http://www.w3.org/TR/html5/syntax.html#comment-start-dash-state
+ tok_state_comment_start_dash = function () {
+ var c
+ switch (c = txt.charAt(cur++)) {
+ case '-':
+ tok_state = tok_state_comment_end
+ break
+ case "\u0000":
+ parse_error()
+ tok_cur_tag.text += "-\ufffd"
+ tok_state = tok_state_comment
+ break
+ case '>':
+ parse_error()
+ tok_state = tok_state_data
+ return tok_cur_tag
+ break
+ case '': // EOF
+ parse_error()
+ tok_state = tok_state_data
+ cur -= 1 // reconsume
+ return tok_cur_tag
+ break
+ default:
+ tok_cur_tag.text += "-" + c
+ tok_state = tok_state_comment
+ }
+ return null
+ }
+
+ // 8.2.4.48 http://www.w3.org/TR/html5/syntax.html#comment-state
+ tok_state_comment = function () {
+ var c
+ switch (c = txt.charAt(cur++)) {
+ case '-':
+ tok_state = tok_state_comment_end_dash
+ break
+ case "\u0000":
+ parse_error()
+ tok_cur_tag.text += "\ufffd"
+ break
+ case '': // EOF
+ parse_error()
+ tok_state = tok_state_data
+ cur -= 1 // reconsume
+ return tok_cur_tag
+ break
+ default:
+ tok_cur_tag.text += c
+ }
+ return null
+ }
+
+ // 8.2.4.49 http://www.w3.org/TR/html5/syntax.html#comment-end-dash-state
+ tok_state_comment_end_dash = function () {
+ var c
+ switch (c = txt.charAt(cur++)) {
+ case '-':
+ tok_state = tok_state_comment_end
+ break
+ case "\u0000":
+ parse_error()
+ tok_cur_tag.text += "-\ufffd"
+ tok_state = tok_state_comment
+ break
+ case '': // EOF
+ parse_error()
+ tok_state = tok_state_data
+ cur -= 1 // reconsume
+ return tok_cur_tag
+ break
+ default:
+ tok_cur_tag.text += "-" + c
+ tok_state = tok_state_comment
+ }
+ return null
+ }
+
+ // 8.2.4.50 http://www.w3.org/TR/html5/syntax.html#comment-end-state
+ tok_state_comment_end = function () {
+ var c
+ switch (c = txt.charAt(cur++)) {
+ case '>':
+ tok_state = tok_state_data
+ return tok_cur_tag
+ break
+ case "\u0000":
+ parse_error()
+ tok_cur_tag.text += "--\ufffd"
+ tok_state = tok_state_comment
+ break
+ case '!':
+ parse_error()
+ tok_state = tok_state_comment_end_bang
+ break
+ case '-':
+ parse_error()
+ tok_cur_tag.text += '-'
+ break
+ case '': // EOF
+ parse_error()
+ tok_state = tok_state_data
+ cur -= 1 // reconsume
+ return tok_cur_tag
+ break
+ default:
+ parse_error()
+ tok_cur_tag.text += "--" + c
+ tok_state = tok_state_comment
+ }
+ return null
+ }
+
+ // 8.2.4.51 http://www.w3.org/TR/html5/syntax.html#comment-end-bang-state
+ tok_state_comment_end_bang = function () {
+ var c
+ switch (c = txt.charAt(cur++)) {
+ case '-':
+ tok_cur_tag.text += "--!" + c
+ tok_state = tok_state_comment_end_dash
+ break
+ case '>':
+ tok_state = tok_state_data
+ return tok_cur_tag
+ break
+ case "\u0000":
+ parse_error()
+ tok_cur_tag.text += "--!\ufffd"
+ tok_state = tok_state_comment
+ break
+ case '': // EOF
+ parse_error()
+ tok_state = tok_state_data
+ cur -= 1 // reconsume
+ return tok_cur_tag
+ break
+ default:
+ tok_cur_tag.text += "--!" + c
+ tok_state = tok_state_comment
+ }
+ return null
+ }
+
+ // 8.2.4.52 http://www.w3.org/TR/html5/syntax.html#doctype-state
+ tok_state_doctype = function () {
+ var c, el
+ switch (c = txt.charAt(cur++)) {
+ case "\t":
+ case "\u000a":
+ case "\u000c":
+ case ' ':
+ tok_state = tok_state_before_doctype_name
+ break
+ case '': // EOF
+ parse_error()
+ tok_state = tok_state_data
+ el = new_doctype_token('')
+ el.flag('force-quirks', true)
+ cur -= 1 // reconsume
+ return el
+ break
+ default:
+ parse_error()
+ tok_state = tok_state_before_doctype_name
+ cur -= 1 // reconsume
+ }
+ return null
+ }
+
+ // 8.2.4.52 http://www.w3.org/TR/html5/syntax.html#doctype-state
+ tok_state_before_doctype_name = function () {
+ var c, el
+ c = txt.charAt(cur++)
+ if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ') {
+ return
+ }
+ if (is_uc_alpha(c)) {
+ tok_cur_tag = new_doctype_token(c.toLowerCase())
+ tok_state = tok_state_doctype_name
+ return
+ }
+ if (c === "\u0000") {
+ parse_error()
+ tok_cur_tag = new_doctype_token("\ufffd")
+ tok_state = tok_state_doctype_name
+ return
+ }
+ if (c === '>') {
+ parse_error()
+ el = new_doctype_token('')
+ el.flag('force-quirks', true)
+ tok_state = tok_state_data
+ return el
+ }
+ if (c === '') { // EOF
+ parse_error()
+ tok_state = tok_state_data
+ el = new_doctype_token('')
+ el.flag('force-quirks', true)
+ cur -= 1 // reconsume
+ return el
+ }
+ // Anything else
+ tok_cur_tag = new_doctype_token(c)
+ tok_state = tok_state_doctype_name
+ return null
+ }
+
+ // 8.2.4.54 http://www.w3.org/TR/html5/syntax.html#doctype-name-state
+ tok_state_doctype_name = function () {
+ var c
+ c = txt.charAt(cur++)
+ if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ') {
+ tok_state = tok_state_after_doctype_name
+ return
+ }
+ if (c === '>') {
+ tok_state = tok_state_data
+ return tok_cur_tag
+ }
+ if (is_uc_alpha(c)) {
+ tok_cur_tag.name += c.toLowerCase()
+ return
+ }
+ if (c === "\u0000") {
+ parse_error()
+ tok_cur_tag.name += "\ufffd"
+ return
+ }
+ if (c === '') { // EOF
+ parse_error()
+ tok_state = tok_state_data
+ tok_cur_tag.flag('force-quirks', true)
+ cur -= 1 // reconsume
+ return tok_cur_tag
+ }
+ // Anything else
+ tok_cur_tag.name += c
+ return null
+ }
+
+ // 8.2.4.55 http://www.w3.org/TR/html5/syntax.html#after-doctype-name-state
+ tok_state_after_doctype_name = function () {
+ var c
+ c = txt.charAt(cur++)
+ if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ') {
+ return
+ }
+ if (c === '>') {
+ tok_state = tok_state_data
+ return tok_cur_tag
+ }
+ if (c === '') { // EOF
+ parse_error()
+ tok_state = tok_state_data
+ tok_cur_tag.flag('force-quirks', true)
+ cur -= 1 // reconsume
+ return tok_cur_tag
+ }
+ // Anything else
+ if (txt.substr(cur - 1, 6).toLowerCase() === 'public') {
+ cur += 5
+ tok_state = tok_state_after_doctype_public_keyword
+ return
+ }
+ if (txt.substr(cur - 1, 6).toLowerCase() === 'system') {
+ cur += 5
+ tok_state = tok_state_after_doctype_system_keyword
+ return
+ }
+ parse_error()
+ tok_cur_tag.flag('force-quirks', true)
+ tok_state = tok_state_bogus_doctype
+ return null
+ }
+
+ // 8.2.4.56 http://www.w3.org/TR/html5/syntax.html#after-doctype-public-keyword-state
+ tok_state_after_doctype_public_keyword = function () {
+ var c
+ c = txt.charAt(cur++)
+ if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ') {
+ tok_state = tok_state_before_doctype_public_identifier
+ return
+ }
+ if (c === '"') {
+ parse_error()
+ tok_cur_tag.public_identifier = ''
+ tok_state = tok_state_doctype_public_identifier_double_quoted
+ return
+ }
+ if (c === "'") {
+ parse_error()
+ tok_cur_tag.public_identifier = ''
+ tok_state = tok_state_doctype_public_identifier_single_quoted
+ return
+ }
+ if (c === '>') {
+ parse_error()
+ tok_cur_tag.flag('force-quirks', true)
+ tok_state = tok_state_data
+ return tok_cur_tag
+ }
+ if (c === '') { // EOF
+ parse_error()
+ tok_state = tok_state_data
+ tok_cur_tag.flag('force-quirks', true)
+ cur -= 1 // reconsume
+ return tok_cur_tag
+ }
+ // Anything else
+ parse_error()
+ tok_cur_tag.flag('force-quirks', true)
+ tok_state = tok_state_bogus_doctype
+ return null
+ }
+
+ // 8.2.4.57 http://www.w3.org/TR/html5/syntax.html#before-doctype-public-identifier-state
+ tok_state_before_doctype_public_identifier = function () {
+ var c
+ c = txt.charAt(cur++)
+ if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ') {
+ return
+ }
+ if (c === '"') {
+ parse_error()
+ tok_cur_tag.public_identifier = ''
+ tok_state = tok_state_doctype_public_identifier_double_quoted
+ return
+ }
+ if (c === "'") {
+ parse_error()
+ tok_cur_tag.public_identifier = ''
+ tok_state = tok_state_doctype_public_identifier_single_quoted
+ return
+ }
+ if (c === '>') {
+ parse_error()
+ tok_cur_tag.flag('force-quirks', true)
+ tok_state = tok_state_data
+ return tok_cur_tag
+ }
+ if (c === '') { // EOF
+ parse_error()
+ tok_state = tok_state_data
+ tok_cur_tag.flag('force-quirks', true)
+ cur -= 1 // reconsume
+ return tok_cur_tag
+ }
+ // Anything else
+ parse_error()
+ tok_cur_tag.flag('force-quirks', true)
+ tok_state = tok_state_bogus_doctype
+ return null
+ }
+
+
+ // 8.2.4.58 http://www.w3.org/TR/html5/syntax.html#doctype-public-identifier-(double-quoted)-state
+ tok_state_doctype_public_identifier_double_quoted = function () {
+ var c
+ c = txt.charAt(cur++)
+ if (c === '"') {
+ tok_state = tok_state_after_doctype_public_identifier
+ return
+ }
+ if (c === "\u0000") {
+ parse_error()
+ tok_cur_tag.public_identifier += "\ufffd"
+ return
+ }
+ if (c === '>') {
+ parse_error()
+ tok_cur_tag.flag('force-quirks', true)
+ tok_state = tok_state_data
+ return tok_cur_tag
+ }
+ if (c === '') { // EOF
+ parse_error()
+ tok_state = tok_state_data
+ tok_cur_tag.flag('force-quirks', true)
+ cur -= 1 // reconsume
+ return tok_cur_tag
+ }
+ // Anything else
+ tok_cur_tag.public_identifier += c
+ return null
+ }
+
+ // 8.2.4.59 http://www.w3.org/TR/html5/syntax.html#doctype-public-identifier-(single-quoted)-state
+ tok_state_doctype_public_identifier_single_quoted = function () {
+ var c
+ c = txt.charAt(cur++)
+ if (c === "'") {
+ tok_state = tok_state_after_doctype_public_identifier
+ return
+ }
+ if (c === "\u0000") {
+ parse_error()
+ tok_cur_tag.public_identifier += "\ufffd"
+ return
+ }
+ if (c === '>') {
+ parse_error()
+ tok_cur_tag.flag('force-quirks', true)
+ tok_state = tok_state_data
+ return tok_cur_tag
+ }
+ if (c === '') { // EOF
+ parse_error()
+ tok_state = tok_state_data
+ tok_cur_tag.flag('force-quirks', true)
+ cur -= 1 // reconsume
+ return tok_cur_tag
+ }
+ // Anything else
+ tok_cur_tag.public_identifier += c
+ return null
+ }
+
+ // 8.2.4.60 http://www.w3.org/TR/html5/syntax.html#after-doctype-public-identifier-state
+ tok_state_after_doctype_public_identifier = function () {
+ var c
+ c = txt.charAt(cur++)
+ if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ') {
+ tok_state = tok_state_between_doctype_public_and_system_identifiers
+ return
+ }
+ if (c === '>') {
+ tok_state = tok_state_data
+ return tok_cur_tag
+ }
+ if (c === '"') {
+ parse_error()
+ tok_cur_tag.system_identifier = ''
+ tok_state = tok_state_doctype_system_identifier_double_quoted
+ return
+ }
+ if (c === "'") {
+ parse_error()
+ tok_cur_tag.system_identifier = ''
+ tok_state = tok_state_doctype_system_identifier_single_quoted
+ return
+ }
+ if (c === '') { // EOF
+ parse_error()
+ tok_state = tok_state_data
+ tok_cur_tag.flag('force-quirks', true)
+ cur -= 1 // reconsume
+ return tok_cur_tag
+ }
+ // Anything else
+ parse_error()
+ tok_cur_tag.flag('force-quirks', true)
+ tok_state = tok_state_bogus_doctype
+ return null
+ }
+
+ // 8.2.4.61 http://www.w3.org/TR/html5/syntax.html#between-doctype-public-&&-system-identifiers-state
+ tok_state_between_doctype_public_and_system_identifiers = function () {
+ var c
+ c = txt.charAt(cur++)
+ if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ') {
+ return
+ }
+ if (c === '>') {
+ tok_state = tok_state_data
+ return tok_cur_tag
+ }
+ if (c === '"') {
+ parse_error()
+ tok_cur_tag.system_identifier = ''
+ tok_state = tok_state_doctype_system_identifier_double_quoted
+ return
+ }
+ if (c === "'") {
+ parse_error()
+ tok_cur_tag.system_identifier = ''
+ tok_state = tok_state_doctype_system_identifier_single_quoted
+ return
+ }
+ if (c === '') { // EOF
+ parse_error()
+ tok_state = tok_state_data
+ tok_cur_tag.flag('force-quirks', true)
+ cur -= 1 // reconsume
+ return tok_cur_tag
+ }
+ // Anything else
+ parse_error()
+ tok_cur_tag.flag('force-quirks', true)
+ tok_state = tok_state_bogus_doctype
+ return null
+ }
+
+ // 8.2.4.62 http://www.w3.org/TR/html5/syntax.html#after-doctype-system-keyword-state
+ tok_state_after_doctype_system_keyword = function () {
+ var c
+ c = txt.charAt(cur++)
+ if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ') {
+ tok_state = tok_state_before_doctype_system_identifier
+ return
+ }
+ if (c === '"') {
+ parse_error()
+ tok_cur_tag.system_identifier = ''
+ tok_state = tok_state_doctype_system_identifier_double_quoted
+ return
+ }
+ if (c === "'") {
+ parse_error()
+ tok_cur_tag.system_identifier = ''
+ tok_state = tok_state_doctype_system_identifier_single_quoted
+ return
+ }
+ if (c === '>') {
+ parse_error()
+ tok_cur_tag.flag('force-quirks', true)
+ tok_state = tok_state_data
+ return tok_cur_tag
+ }
+ if (c === '') { // EOF
+ parse_error()
+ tok_state = tok_state_data
+ tok_cur_tag.flag('force-quirks', true)
+ cur -= 1 // reconsume
+ return tok_cur_tag
+ }
+ // Anything else
+ parse_error()
+ tok_cur_tag.flag('force-quirks', true)
+ tok_state = tok_state_bogus_doctype
+ return null
+ }
+
+ // 8.2.4.63 http://www.w3.org/TR/html5/syntax.html#before-doctype-system-identifier-state
+ tok_state_before_doctype_system_identifier = function () {
+ var c
+ c = txt.charAt(cur++)
+ if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ') {
+ return
+ }
+ if (c === '"') {
+ tok_cur_tag.system_identifier = ''
+ tok_state = tok_state_doctype_system_identifier_double_quoted
+ return
+ }
+ if (c === "'") {
+ tok_cur_tag.system_identifier = ''
+ tok_state = tok_state_doctype_system_identifier_single_quoted
+ return
+ }
+ if (c === '>') {
+ parse_error()
+ tok_cur_tag.flag('force-quirks', true)
+ tok_state = tok_state_data
+ return tok_cur_tag
+ }
+ if (c === '') { // EOF
+ parse_error()
+ tok_state = tok_state_data
+ tok_cur_tag.flag('force-quirks', true)
+ cur -= 1 // reconsume
+ return tok_cur_tag
+ }
+ // Anything else
+ parse_error()
+ tok_cur_tag.flag('force-quirks', true)
+ tok_state = tok_state_bogus_doctype
+ return null
+ }
+
+ // 8.2.4.64 http://www.w3.org/TR/html5/syntax.html#doctype-system-identifier-(double-quoted)-state
+ tok_state_doctype_system_identifier_double_quoted = function () {
+ var c
+ c = txt.charAt(cur++)
+ if (c === '"') {
+ tok_state = tok_state_after_doctype_system_identifier
+ return
+ }
+ if (c === "\u0000") {
+ parse_error()
+ tok_cur_tag.system_identifier += "\ufffd"
+ return
+ }
+ if (c === '>') {
+ parse_error()
+ tok_cur_tag.flag('force-quirks', true)
+ tok_state = tok_state_data
+ return tok_cur_tag
+ }
+ if (c === '') { // EOF
+ parse_error()
+ tok_state = tok_state_data
+ tok_cur_tag.flag('force-quirks', true)
+ cur -= 1 // reconsume
+ return tok_cur_tag
+ }
+ // Anything else
+ tok_cur_tag.system_identifier += c
+ return null
+ }
+
+ // 8.2.4.65 http://www.w3.org/TR/html5/syntax.html#doctype-system-identifier-(single-quoted)-state
+ tok_state_doctype_system_identifier_single_quoted = function () {
+ var c
+ c = txt.charAt(cur++)
+ if (c === "'") {
+ tok_state = tok_state_after_doctype_system_identifier
+ return
+ }
+ if (c === "\u0000") {
+ parse_error()
+ tok_cur_tag.system_identifier += "\ufffd"
+ return
+ }
+ if (c === '>') {
+ parse_error()
+ tok_cur_tag.flag('force-quirks', true)
+ tok_state = tok_state_data
+ return tok_cur_tag
+ }
+ if (c === '') { // EOF
+ parse_error()
+ tok_state = tok_state_data
+ tok_cur_tag.flag('force-quirks', true)
+ cur -= 1 // reconsume
+ return tok_cur_tag
+ }
+ // Anything else
+ tok_cur_tag.system_identifier += c
+ return null
+ }
+
+ // 8.2.4.66 http://www.w3.org/TR/html5/syntax.html#after-doctype-system-identifier-state
+ tok_state_after_doctype_system_identifier = function () {
+ var c
+ c = txt.charAt(cur++)
+ if (c === "\t" || c === "\u000a" || c === "\u000c" || c === ' ') {
+ return
+ }
+ if (c === '>') {
+ tok_state = tok_state_data
+ return tok_cur_tag
+ }
+ if (c === '') { // EOF
+ parse_error()
+ tok_state = tok_state_data
+ tok_cur_tag.flag('force-quirks', true)
+ cur -= 1 // reconsume
+ return tok_cur_tag
+ }
+ // Anything else
+ parse_error()
+ // do _not_ tok_cur_tag.flag 'force-quirks', true
+ tok_state = tok_state_bogus_doctype
+ return null
+ }
+
+ // 8.2.4.67 http://www.w3.org/TR/html5/syntax.html#bogus-doctype-state
+ tok_state_bogus_doctype = function () {
+ var c
+ c = txt.charAt(cur++)
+ if (c === '>') {
+ tok_state = tok_state_data
+ return tok_cur_tag
+ }
+ if (c === '') { // EOF
+ tok_state = tok_state_data
+ cur -= 1 // reconsume
+ return tok_cur_tag
+ }
+ // Anything else
+ return null
+ }
+
+ // 8.2.4.68 http://www.w3.org/TR/html5/syntax.html#cdata-section-state
+ tok_state_cdata_section = function () {
+ var next_gt, val
+ tok_state = tok_state_data
+ next_gt = txt.indexOf(']]>', cur)
+ if (next_gt === -1) {
+ val = txt.substr(cur)
+ cur = txt.length
+ } else {
+ val = txt.substr(cur, next_gt - cur)
+ cur = next_gt + 3
+ }
+ val = val.replace(new RegExp("\u0000", 'g'), "\ufffd")
+ if (val.length > 0) {
+ return new_character_token(val) // fixfull split
+ }
+ return null
+ }
+
+ // 8.2.4.69 http://www.w3.org/TR/html5/syntax.html#consume-a-character-reference
+ // Don't set this as a state, just call it
+ // returns a string (NOT a text node)
+ parse_character_reference = function (allowed_char, in_attr) {
+ var base, c, charset, code_point, decoded, i, max, start
+ if (allowed_char == null) {
+ allowed_char = null
+ }
+ if (in_attr == null) {
+ in_attr = false
+ }
+ if (cur >= txt.length) {
+ return '&'
+ }
+ switch (c = txt.charAt(cur)) {
+ case "\t":
+ case "\n":
+ case "\u000c":
+ case ' ':
+ case '<':
+ case '&':
+ case '':
+ case allowed_char:
+ // explicitly not a parse error
+ return '&'
+ break
+ case ';':
+ // there has to be "one or more" alnums between & and ; to be a parse error
+ return '&'
+ break
+ case '#':
+ if (cur + 1 >= txt.length) {
+ return '&'
+ }
+ if (txt.charAt(cur + 1).toLowerCase() === 'x') {
+ base = 16
+ charset = hex_chars
+ start = cur + 2
+ } else {
+ charset = digits
+ start = cur + 1
+ base = 10
+ }
+ i = 0
+ while (start + i < txt.length && charset.indexOf(txt.charAt(start + i)) > -1) {
+ i += 1
+ }
+ if (i === 0) {
+ return '&'
+ }
+ cur = start + i
+ if (txt.charAt(start + i) === ';') {
+ cur += 1
+ } else {
+ parse_error()
+ }
+ code_point = txt.substr(start, i)
+ while (code_point.charAt(0) === '0' && code_point.length > 1) {
+ code_point = code_point.substr(1)
+ }
+ code_point = parseInt(code_point, base)
+ if (unicode_fixes[code_point] != null) {
+ parse_error()
+ return unicode_fixes[code_point]
+ } else {
+ if ((code_point >= 0xd800 && code_point <= 0xdfff) || code_point > 0x10ffff) {
+ parse_error()
+ return "\ufffd"
+ } else {
+ if ((code_point >= 0x0001 && code_point <= 0x0008) || (code_point >= 0x000D && code_point <= 0x001F) || (code_point >= 0x007F && code_point <= 0x009F) || (code_point >= 0xFDD0 && code_point <= 0xFDEF) || code_point === 0x000B || code_point === 0xFFFE || code_point === 0xFFFF || code_point === 0x1FFFE || code_point === 0x1FFFF || code_point === 0x2FFFE || code_point === 0x2FFFF || code_point === 0x3FFFE || code_point === 0x3FFFF || code_point === 0x4FFFE || code_point === 0x4FFFF || code_point === 0x5FFFE || code_point === 0x5FFFF || code_point === 0x6FFFE || code_point === 0x6FFFF || code_point === 0x7FFFE || code_point === 0x7FFFF || code_point === 0x8FFFE || code_point === 0x8FFFF || code_point === 0x9FFFE || code_point === 0x9FFFF || code_point === 0xAFFFE || code_point === 0xAFFFF || code_point === 0xBFFFE || code_point === 0xBFFFF || code_point === 0xCFFFE || code_point === 0xCFFFF || code_point === 0xDFFFE || code_point === 0xDFFFF || code_point === 0xEFFFE || code_point === 0xEFFFF || code_point === 0xFFFFE || code_point === 0xFFFFF || code_point === 0x10FFFE || code_point === 0x10FFFF) {
+ parse_error()
+ }
+ return from_code_point(code_point)
+ }
+ }
+ return
+ break
+ default:
+ for (i = 0; i < 31; ++i) {
+ if (alnum.indexOf(txt.charAt(cur + i)) === -1) {
+ break
+ }
+ }
+ if (i === 0) {
+ // exit early, because parse_error() below needs at least one alnum
+ return '&'
+ }
+ if (txt.charAt(cur + i) === ';') {
+ decoded = decode_named_char_ref(txt.substr(cur, i))
+ i += 1 // scan past the ';' (after, so we dno't pass it to decode)
+ if (decoded != null) {
+ cur += i
+ return decoded
+ }
+ // else FALL THROUGH (check for match without last char(s) or ";")
+ }
+ // no ';' terminator (only legacy char refs)
+ max = i
+ for (i = 2; i <= max; ++i) { // no prefix matches, so ok to check shortest first
+ c = legacy_char_refs[txt.substr(cur, i)]
+ if (c != null) {
+ if (in_attr) {
+ if (txt.charAt(cur + i) === '=') {
+ // "because some legacy user agents will
+ // misinterpret the markup in those cases"
+ parse_error()
+ return '&'
+ }
+ if (alnum.indexOf(txt.charAt(cur + i)) > -1) {
+ // this makes attributes forgiving about url args
+ return '&'
+ }
+ }
+ // ok, and besides the weird exceptions for attributes...
+ // return the matching char
+ cur += i // consume entity chars
+ parse_error() // because no terminating ";"
+ return c
+ }
+ }
+ parse_error()
+ return '&'
+ }
+ // never reached
+ }
+
+ eat_next_token_if_newline = function () {
+ var old_cur, t
+ old_cur = cur
+ t = null
+ while (t == null) {
+ t = tok_state()
+ }
+ if (t.type === TYPE_TEXT) {
+ // definition of a newline depends on whether it was a character ref or not
+ if (cur - old_cur === 1) {
+ // not a character reference
+ if (t.text === "\u000d" || t.text === "\u000a") {
+ return
+ }
+ } else {
+ if (t.text === "\u000a") {
+ return
+ }
+ }
+ }
+ // not a "newline"
+ cur = old_cur
+ }
+
+ // tree constructor initialization
+ // see comments on TYPE_TAG/etc for the structure of this data
+ txt = args_html
+ cur = 0
+ doc = new Node(TYPE_TAG, {name: 'document', namespace: NS_HTML})
+ doc.flag('quirks mode', QUIRKS_NO) // TODO bugreport spec for not specifying this
+ fragment_root = null // fragment parsing algorithm returns children of this
+ open_els = []
+ afe = [] // active formatting elements
+ template_ins_modes = []
+ ins_mode = ins_mode_initial
+ original_ins_mode = ins_mode // TODO check spec
+ flag_scripting = args.scripting != null ? args.scripting : true // TODO might need an extra flag to get <noscript> to parse correctly
+ flag_frameset_ok = true
+ flag_parsing = true
+ flag_foster_parenting = false
+ form_element_pointer = null
+ temporary_buffer = null
+ pending_table_character_tokens = []
+ head_element_pointer = null
+ flag_fragment_parsing = false
+ context_element = null
+ prev_node_id = 0 // just for debugging
+
+ // tokenizer initialization
+ tok_state = tok_state_data
+
+ parse_init = function () {
+ var el, f, ns, old_doc, t
+ // fragment parsing (text arg)
+ if (args.fragment != null) {
+ // this handles the fragment from the tests in the format described here:
+ // https://github.com/html5lib/html5lib-tests/blob/master/tree-construction/README.md
+ f = args.fragment
+ ns = NS_HTML
+ if (f.substr(0, 5) === 'math ') {
+ f = f.substr(5)
+ ns = NS_MATHML
+ } else if (f.substr(0, 4) === 'svg ') {
+ f = f.substr(4)
+ ns = NS_SVG
+ }
+ t = new_open_tag(f)
+ context_element = token_to_element(t, ns)
+ context_element.document = new Node(TYPE_TAG, {name: 'document', namespace: NS_HTML})
+ context_element.document.flag('quirks mode', QUIRKS_NO)
+ }
+ // fragment parsing (Node arg)
+ if (args.context != null) {
+ context_element = args.context
+ }
+
+ // http://www.w3.org/TR/html5/syntax.html#parsing-html-fragments
+ // fragment parsing algorithm
+ if (context_element != null) {
+ flag_fragment_parsing = true
+ doc = new Node(TYPE_TAG, {name: 'html', namespace: NS_HTML})
+ // search up the tree from context, to try to find it's document,
+ // because this file only puts a "document" property on the root
+ // element.
+ old_doc = null
+ el = context_element
+ while (true) {
+ if (el.document != null) {
+ old_doc = el.document
+ break
+ }
+ if (el.parent) {
+ el = el.parent
+ } else {
+ break
+ }
+ }
+ if (old_doc) {
+ doc.flag('quirks mode', old_doc.flag('quirks mode'))
+ }
+ // set tok_state
+ if (context_element.namespace === NS_HTML) {
+ switch (context_element.name) {
+ case 'title':
+ case 'textarea':
+ tok_state = tok_state_rcdata
+ break
+ case 'style':
+ case 'xmp':
+ case 'iframe':
+ case 'noembed':
+ case 'noframes':
+ tok_state = tok_state_rawtext
+ break
+ case 'script':
+ tok_state = tok_state_script_data
+ break
+ case 'noscript':
+ if (flag_scripting) {
+ tok_state = tok_state_rawtext
+ }
+ break
+ case 'plaintext':
+ tok_state = tok_state_plaintext
+ }
+ }
+ fragment_root = new Node(TYPE_TAG, {name: 'html', namespace: NS_HTML})
+ doc.children.push(fragment_root)
+ fragment_root.document = doc
+ open_els = [fragment_root]
+ if (context_element.name === 'template' && context_element.namespace === NS_HTML) {
+ template_ins_modes.unshift(ins_mode_in_template)
+ }
+ // fixfull create token for context (it should have it's original one already)
+ reset_ins_mode()
+ // set form_element pointer... in the foreign doc?!
+ el = context_element
+ while (true) {
+ if (el.name === 'form' && el.namespace === NS_HTML) {
+ form_element_pointer = el
+ break
+ }
+ if (el.parent) {
+ el = el.parent
+ } else {
+ break
+ }
+ }
+ }
+
+ // text pre-processing
+ // FIXME check http://www.w3.org/TR/html5/syntax.html#preprocessing-the-input-stream
+ txt = txt.replace(new RegExp("\r\n", 'g'), "\n") // fixfull spec doesn't say this
+ txt = txt.replace(new RegExp("\r", 'g'), "\n") // fixfull spec doesn't say this
+ }
+
+ // http://www.w3.org/TR/html5/syntax.html#tree-construction
+ parse_main_loop = function () {
+ var t
+ while (flag_parsing) {
+ t = tok_state()
+ if (t != null) {
+ process_token(t)
+ // fixfull parse error if has self-closing flag, but it wasn't acknolwedged
+ }
+ }
+ }
+ parse_init()
+ parse_main_loop()
+
+ if (flag_fragment_parsing) {
+ return fragment_root.children
+ }
+ return doc.children
+}
+
+exports.parse = parse_html
+exports.Node = Node
+exports.debug_log_reset = debug_log_reset
+exports.debug_log_each = debug_log_each
+exports.TYPE_TAG = TYPE_TAG
+exports.TYPE_TEXT = TYPE_TEXT
+exports.TYPE_COMMENT = TYPE_COMMENT
+exports.TYPE_DOCTYPE = TYPE_DOCTYPE
+exports.NS_HTML = NS_HTML
+exports.NS_MATHML = NS_MATHML
+exports.NS_SVG = NS_SVG
+exports.QUIRKS_NO = QUIRKS_NO
+exports.QUIRKS_LIMITED = QUIRKS_LIMITED
+exports.QUIRKS_YES = QUIRKS_YES