1 # HTML parser meant to run in a browser, in support of WYSIWYG editor
2 # Copyright 2015 Jason Woofenden
4 # This program is free software: you can redistribute it and/or modify it under
5 # the terms of the GNU Affero General Public License as published by the Free
6 # Software Foundation, either version 3 of the License, or (at your option) any
9 # This program is distributed in the hope that it will be useful, but WITHOUT
10 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11 # FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
18 # This file implements a parser for html snippets, meant to be used by a
21 # The implementation is a pretty direct implementation of the parsing algorithm
23 # http://www.w3.org/TR/html5/syntax.html#preprocessing-the-input-stream
25 # Deviations from that spec:
27 # Purposeful: search this file for "WHATWG"
29 # Not finished yet: search this file for "fixfull", "TODO" and "FIXME"
34 # the spec uses a many different words do indicate which ends of lists/stacks
35 # they are talking about (and relative movement within the lists/stacks). This
36 # section splains. I'm implementing "lists" (afe and open_els) the same way
39 # stacks grow downward (current element is index=0)
41 # example: open_els = [a, b, c, d, e, f, g]
43 # "grows downwards" means it's visualized like this: (index: el, names)
45 # 6: g "start of the list", "topmost", "first"
47 # 4: e "previous" (to d), "above", "before"
48 # 3: d (previous/next are relative to this element)
49 # 2: c "next", "after", "lower", "below"
51 # 0: a "end of the list", "current node", "bottommost", "last"
55 # note: to get this to run outside a browser, you'll have to write a native
56 # implementation of decode_named_char_ref()
57 unless module?.exports?
59 module = exports: window.wheic
61 from_code_point = (x) ->
62 if String.fromCodePoint?
63 return String.fromCodePoint x
66 return String.fromCharCode x
68 return String.fromCharCode((x >> 10) + 0xd800, (x % 0x400) + 0xdc00)
70 # Each node is an obect of the Node class. Here are the Node types:
71 TYPE_TAG = 0 # name, {attributes}, [children]
72 TYPE_TEXT = 1 # "text"
75 # the following types are emited by the tokenizer, but shouldn't end up in the tree:
76 TYPE_START_TAG = 4 # name, [attributes ([key,value]...) in reverse order], [children]
77 TYPE_END_TAG = 5 # name
79 TYPE_AFE_MARKER = 7 # http://www.w3.org/TR/html5/syntax.html#reconstruct-the-active-formatting-elements
80 TYPE_AAA_BOOKMARK = 8 # http://www.w3.org/TR/html5/syntax.html#adoption-agency-algorithm
87 # quirks mode constants
92 # queue up debug logs, so eg they can be shown only for tests that fail
100 debug_log_each = (cb) ->
101 for str in g_debug_log
107 constructor: (type, args = {}) ->
108 @type = type # one of the TYPE_* constants above
109 @name = args.name ? '' # tag name
110 @text = args.text ? '' # contents for text/comment nodes
111 @attrs = args.attrs ? {}
112 @attrs_a = args.attr_k ? [] # attrs in progress, TYPE_START_TAG only
113 @children = args.children ? []
114 @namespace = args.namespace ? NS_HTML
115 @parent = args.parent ? null
116 @token = args.token ? null
117 @flags = args.flags ? {}
121 @id = "#{++prev_node_id}"
122 acknowledge_self_closing: ->
124 @token.flag 'did_self_close', true
126 @flag 'did_self_close', true
128 flag: (key, value = null) ->
135 # helpers: (only take args that are normally known when parser creates nodes)
136 new_open_tag = (name) ->
137 return new Node TYPE_START_TAG, name: name
138 new_end_tag = (name) ->
139 return new Node TYPE_END_TAG, name: name
140 new_element = (name) ->
141 return new Node TYPE_TAG, name: name
142 new_text_node = (txt) ->
143 return new Node TYPE_TEXT, text: txt
144 new_character_token = new_text_node
145 new_comment_token = (txt) ->
146 return new Node TYPE_COMMENT, text: txt
147 new_doctype_token = (name) ->
148 return new Node TYPE_DOCTYPE, name: name
150 return new Node TYPE_EOF
152 return new Node TYPE_AFE_MARKER
153 new_aaa_bookmark = ->
154 return new Node TYPE_AAA_BOOKMARK
156 lc_alpha = "abcdefghijklmnopqrstuvwxyz"
157 uc_alpha = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
158 digits = "0123456789"
159 alnum = lc_alpha + uc_alpha + digits
160 hex_chars = digits + "abcdefABCDEF"
162 is_uc_alpha = (str) ->
163 return str.length is 1 and uc_alpha.indexOf(str) > -1
164 is_lc_alpha = (str) ->
165 return str.length is 1 and lc_alpha.indexOf(str) > -1
167 # some SVG elements have dashes in them
168 tag_name_chars = alnum + "-"
170 # http://www.w3.org/TR/html5/infrastructure.html#space-character
171 space_chars = "\u0009\u000a\u000c\u000d\u0020"
173 return txt.length is 1 and space_chars.indexOf(txt) > -1
174 is_space_tok = (t) ->
175 return t.type is TYPE_TEXT && t.text.length is 1 and space_chars.indexOf(t.text) > -1
177 is_input_hidden_tok = (t) ->
178 return false unless t.type is TYPE_START_TAG
181 if a[1].toLowerCase() is 'hidden'
186 # https://en.wikipedia.org/wiki/Whitespace_character#Unicode
187 whitespace_chars = "\u0009\u000a\u000b\u000c\u000d\u0020\u0085\u00a0\u1680\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200a\u2028\u2029\u202f\u205f\u3000"
190 unicode_fixes[0x00] = "\uFFFD"
191 unicode_fixes[0x80] = "\u20AC"
192 unicode_fixes[0x82] = "\u201A"
193 unicode_fixes[0x83] = "\u0192"
194 unicode_fixes[0x84] = "\u201E"
195 unicode_fixes[0x85] = "\u2026"
196 unicode_fixes[0x86] = "\u2020"
197 unicode_fixes[0x87] = "\u2021"
198 unicode_fixes[0x88] = "\u02C6"
199 unicode_fixes[0x89] = "\u2030"
200 unicode_fixes[0x8A] = "\u0160"
201 unicode_fixes[0x8B] = "\u2039"
202 unicode_fixes[0x8C] = "\u0152"
203 unicode_fixes[0x8E] = "\u017D"
204 unicode_fixes[0x91] = "\u2018"
205 unicode_fixes[0x92] = "\u2019"
206 unicode_fixes[0x93] = "\u201C"
207 unicode_fixes[0x94] = "\u201D"
208 unicode_fixes[0x95] = "\u2022"
209 unicode_fixes[0x96] = "\u2013"
210 unicode_fixes[0x97] = "\u2014"
211 unicode_fixes[0x98] = "\u02DC"
212 unicode_fixes[0x99] = "\u2122"
213 unicode_fixes[0x9A] = "\u0161"
214 unicode_fixes[0x9B] = "\u203A"
215 unicode_fixes[0x9C] = "\u0153"
216 unicode_fixes[0x9E] = "\u017E"
217 unicode_fixes[0x9F] = "\u0178"
219 quirks_yes_pi_prefixes = [
220 "+//silmaril//dtd html pro v0r11 19970101//"
221 "-//as//dtd html 3.0 aswedit + extensions//"
222 "-//advasoft ltd//dtd html 3.0 aswedit + extensions//"
223 "-//ietf//dtd html 2.0 level 1//"
224 "-//ietf//dtd html 2.0 level 2//"
225 "-//ietf//dtd html 2.0 strict level 1//"
226 "-//ietf//dtd html 2.0 strict level 2//"
227 "-//ietf//dtd html 2.0 strict//"
228 "-//ietf//dtd html 2.0//"
229 "-//ietf//dtd html 2.1e//"
230 "-//ietf//dtd html 3.0//"
231 "-//ietf//dtd html 3.2 final//"
232 "-//ietf//dtd html 3.2//"
233 "-//ietf//dtd html 3//"
234 "-//ietf//dtd html level 0//"
235 "-//ietf//dtd html level 1//"
236 "-//ietf//dtd html level 2//"
237 "-//ietf//dtd html level 3//"
238 "-//ietf//dtd html strict level 0//"
239 "-//ietf//dtd html strict level 1//"
240 "-//ietf//dtd html strict level 2//"
241 "-//ietf//dtd html strict level 3//"
242 "-//ietf//dtd html strict//"
243 "-//ietf//dtd html//"
244 "-//metrius//dtd metrius presentational//"
245 "-//microsoft//dtd internet explorer 2.0 html strict//"
246 "-//microsoft//dtd internet explorer 2.0 html//"
247 "-//microsoft//dtd internet explorer 2.0 tables//"
248 "-//microsoft//dtd internet explorer 3.0 html strict//"
249 "-//microsoft//dtd internet explorer 3.0 html//"
250 "-//microsoft//dtd internet explorer 3.0 tables//"
251 "-//netscape comm. corp.//dtd html//"
252 "-//netscape comm. corp.//dtd strict html//"
253 "-//o'reilly and associates//dtd html 2.0//"
254 "-//o'reilly and associates//dtd html extended 1.0//"
255 "-//o'reilly and associates//dtd html extended relaxed 1.0//"
256 "-//sq//dtd html 2.0 hotmetal + extensions//"
257 "-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//"
258 "-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//"
259 "-//spyglass//dtd html 2.0 extended//"
260 "-//sun microsystems corp.//dtd hotjava html//"
261 "-//sun microsystems corp.//dtd hotjava strict html//"
262 "-//w3c//dtd html 3 1995-03-24//"
263 "-//w3c//dtd html 3.2 draft//"
264 "-//w3c//dtd html 3.2 final//"
265 "-//w3c//dtd html 3.2//"
266 "-//w3c//dtd html 3.2s draft//"
267 "-//w3c//dtd html 4.0 frameset//"
268 "-//w3c//dtd html 4.0 transitional//"
269 "-//w3c//dtd html experimental 19960712//"
270 "-//w3c//dtd html experimental 970421//"
271 "-//w3c//dtd w3 html//"
272 "-//w3o//dtd w3 html 3.0//"
273 "-//webtechs//dtd mozilla html 2.0//"
274 "-//webtechs//dtd mozilla html//"
277 # These are the character references that don't need a terminating semicolon
278 # min length: 2, max: 6, none are a prefix of any other.
280 Aacute: 'Á', aacute: 'á', Acirc: 'Â', acirc: 'â', acute: '´', AElig: 'Æ',
281 aelig: 'æ', Agrave: 'À', agrave: 'à', AMP: '&', amp: '&', Aring: 'Å',
282 aring: 'å', Atilde: 'Ã', atilde: 'ã', Auml: 'Ä', auml: 'ä', brvbar: '¦',
283 Ccedil: 'Ç', ccedil: 'ç', cedil: '¸', cent: '¢', COPY: '©', copy: '©',
284 curren: '¤', deg: '°', divide: '÷', Eacute: 'É', eacute: 'é', Ecirc: 'Ê',
285 ecirc: 'ê', Egrave: 'È', egrave: 'è', ETH: 'Ð', eth: 'ð', Euml: 'Ë',
286 euml: 'ë', frac12: '½', frac14: '¼', frac34: '¾', GT: '>', gt: '>',
287 Iacute: 'Í', iacute: 'í', Icirc: 'Î', icirc: 'î', iexcl: '¡', Igrave: 'Ì',
288 igrave: 'ì', iquest: '¿', Iuml: 'Ï', iuml: 'ï', laquo: '«', LT: '<',
289 lt: '<', macr: '¯', micro: 'µ', middot: '·', nbsp: "\u00a0", not: '¬',
290 Ntilde: 'Ñ', ntilde: 'ñ', Oacute: 'Ó', oacute: 'ó', Ocirc: 'Ô', ocirc: 'ô',
291 Ograve: 'Ò', ograve: 'ò', ordf: 'ª', ordm: 'º', Oslash: 'Ø', oslash: 'ø',
292 Otilde: 'Õ', otilde: 'õ', Ouml: 'Ö', ouml: 'ö', para: '¶', plusmn: '±',
293 pound: '£', QUOT: '"', quot: '"', raquo: '»', REG: '®', reg: '®', sect: '§',
294 shy: '', sup1: '¹', sup2: '²', sup3: '³', szlig: 'ß', THORN: 'Þ', thorn: 'þ',
295 times: '×', Uacute: 'Ú', uacute: 'ú', Ucirc: 'Û', ucirc: 'û', Ugrave: 'Ù',
296 ugrave: 'ù', uml: '¨', Uuml: 'Ü', uuml: 'ü', Yacute: 'Ý', yacute: 'ý',
300 void_elements = ['area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input', 'keygen', 'link', 'meta', 'param', 'source', 'track', 'wbr']
301 raw_text_elements = ['script', 'style']
302 escapable_raw_text_elements = ['textarea', 'title']
303 # http://www.w3.org/TR/SVG/ 1.1 (Second Edition)
305 'a', 'altGlyph', 'altGlyphDef', 'altGlyphItem', 'animate', 'animateColor',
306 'animateMotion', 'animateTransform', 'circle', 'clipPath', 'color-profile',
307 'cursor', 'defs', 'desc', 'ellipse', 'feBlend', 'feColorMatrix',
308 'feComponentTransfer', 'feComposite', 'feConvolveMatrix',
309 'feDiffuseLighting', 'feDisplacementMap', 'feDistantLight', 'feFlood',
310 'feFuncA', 'feFuncB', 'feFuncG', 'feFuncR', 'feGaussianBlur', 'feImage',
311 'feMerge', 'feMergeNode', 'feMorphology', 'feOffset', 'fePointLight',
312 'feSpecularLighting', 'feSpotLight', 'feTile', 'feTurbulence', 'filter',
313 'font', 'font-face', 'font-face-format', 'font-face-name', 'font-face-src',
314 'font-face-uri', 'foreignObject', 'g', 'glyph', 'glyphRef', 'hkern',
315 'image', 'line', 'linearGradient', 'marker', 'mask', 'metadata',
316 'missing-glyph', 'mpath', 'path', 'pattern', 'polygon', 'polyline',
317 'radialGradient', 'rect', 'script', 'set', 'stop', 'style', 'svg',
318 'switch', 'symbol', 'text', 'textPath', 'title', 'tref', 'tspan', 'use',
322 # http://www.w3.org/TR/MathML/ Version 3.0 2nd Edition
324 'abs', 'and', 'annotation', 'annotation-xml', 'apply', 'approx', 'arccos',
325 'arccosh', 'arccot', 'arccoth', 'arccsc', 'arccsch', 'arcsec', 'arcsech',
326 'arcsin', 'arcsinh', 'arctan', 'arctanh', 'arg', 'bind', 'bvar', 'card',
327 'cartesianproduct', 'cbytes', 'ceiling', 'cerror', 'ci', 'cn', 'codomain',
328 'complexes', 'compose', 'condition', 'conjugate', 'cos', 'cosh', 'cot',
329 'coth', 'cs', 'csc', 'csch', 'csymbol', 'curl', 'declare', 'degree',
330 'determinant', 'diff', 'divergence', 'divide', 'domain',
331 'domainofapplication', 'emptyset', 'eq', 'equivalent', 'eulergamma',
332 'exists', 'exp', 'exponentiale', 'factorial', 'factorof', 'false', 'floor',
333 'fn', 'forall', 'gcd', 'geq', 'grad', 'gt', 'ident', 'image', 'imaginary',
334 'imaginaryi', 'implies', 'in', 'infinity', 'int', 'integers', 'intersect',
335 'interval', 'inverse', 'lambda', 'laplacian', 'lcm', 'leq', 'limit',
336 'list', 'ln', 'log', 'logbase', 'lowlimit', 'lt', 'maction', 'maligngroup',
337 'malignmark', 'math', 'matrix', 'matrixrow', 'max', 'mean', 'median',
338 'menclose', 'merror', 'mfenced', 'mfrac', 'mglyph', 'mi', 'mi', 'min',
339 'minus', 'mlabeledtr', 'mlongdiv', 'mmultiscripts', 'mn', 'mo', 'mode',
340 'moment', 'momentabout', 'mover', 'mpadded', 'mphantom', 'mprescripts',
341 'mroot', 'mrow', 'ms', 'mscarries', 'mscarry', 'msgroup', 'msline',
342 'mspace', 'msqrt', 'msrow', 'mstack', 'mstyle', 'msub', 'msubsup', 'msup',
343 'mtable', 'mtd', 'mtext', 'mtr', 'munder', 'munderover', 'naturalnumbers',
344 'neq', 'none', 'not', 'notanumber', 'notin', 'notprsubset', 'notsubset',
345 'or', 'otherwise', 'outerproduct', 'partialdiff', 'pi', 'piece',
346 'piecewise', 'plus', 'power', 'primes', 'product', 'prsubset', 'quotient',
347 'rationals', 'real', 'reals', 'reln', 'rem', 'root', 'scalarproduct',
348 'sdev', 'sec', 'sech', 'selector', 'semantics', 'sep', 'set', 'setdiff',
349 'share', 'sin', 'sinh', 'span', 'subset', 'sum', 'tan', 'tanh', 'tendsto',
350 'times', 'transpose', 'true', 'union', 'uplimit', 'variance', 'vector',
351 'vectorproduct', 'xor'
353 # foreign_elements = [svg_elements..., mathml_elements...]
354 #normal_elements = All other allowed HTML elements are normal elements.
358 address:NS_HTML, applet:NS_HTML, area:NS_HTML, article:NS_HTML,
359 aside:NS_HTML, base:NS_HTML, basefont:NS_HTML, bgsound:NS_HTML,
360 blockquote:NS_HTML, body:NS_HTML, br:NS_HTML, button:NS_HTML,
361 caption:NS_HTML, center:NS_HTML, col:NS_HTML, colgroup:NS_HTML, dd:NS_HTML,
362 details:NS_HTML, dir:NS_HTML, div:NS_HTML, dl:NS_HTML, dt:NS_HTML,
363 embed:NS_HTML, fieldset:NS_HTML, figcaption:NS_HTML, figure:NS_HTML,
364 footer:NS_HTML, form:NS_HTML, frame:NS_HTML, frameset:NS_HTML, h1:NS_HTML,
365 h2:NS_HTML, h3:NS_HTML, h4:NS_HTML, h5:NS_HTML, h6:NS_HTML, head:NS_HTML,
366 header:NS_HTML, hgroup:NS_HTML, hr:NS_HTML, html:NS_HTML, iframe:NS_HTML,
367 img:NS_HTML, input:NS_HTML, isindex:NS_HTML, li:NS_HTML, link:NS_HTML,
368 listing:NS_HTML, main:NS_HTML, marquee:NS_HTML,
370 menu:NS_HTML,menuitem:NS_HTML, # WHATWG adds these
372 meta:NS_HTML, nav:NS_HTML, noembed:NS_HTML, noframes:NS_HTML,
373 noscript:NS_HTML, object:NS_HTML, ol:NS_HTML, p:NS_HTML, param:NS_HTML,
374 plaintext:NS_HTML, pre:NS_HTML, script:NS_HTML, section:NS_HTML,
375 select:NS_HTML, source:NS_HTML, style:NS_HTML, summary:NS_HTML,
376 table:NS_HTML, tbody:NS_HTML, td:NS_HTML, template:NS_HTML,
377 textarea:NS_HTML, tfoot:NS_HTML, th:NS_HTML, thead:NS_HTML, title:NS_HTML,
378 tr:NS_HTML, track:NS_HTML, ul:NS_HTML, wbr:NS_HTML, xmp:NS_HTML,
381 mi:NS_MATHML, mo:NS_MATHML, mn:NS_MATHML, ms:NS_MATHML, mtext:NS_MATHML,
382 'annotation-xml':NS_MATHML,
385 foreignObject:NS_SVG, desc:NS_SVG, title:NS_SVG
388 formatting_elements = {
389 a: true, b: true, big: true, code: true, em: true, font: true, i: true,
390 nobr: true, s: true, small: true, strike: true, strong: true, tt: true,
394 mathml_text_integration = {
395 mi: NS_MATHML, mo: NS_MATHML, mn: NS_MATHML, ms: NS_MATHML, mtext: NS_MATHML
397 is_mathml_text_integration_point = (el) ->
398 return mathml_text_integration[el.name] is el.namespace
399 is_html_integration = (el) -> # DON'T PASS A TOKEN
400 if el.namespace is NS_MATHML
401 if el.name is 'annotation-xml'
402 if el.attrs.encoding?
403 if el.attrs.encoding.toLowerCase() is 'text/html'
405 if el.attrs.encoding.toLowerCase() is 'application/xhtml+xml'
408 if el.namespace is NS_SVG
409 if el.name is 'foreignObject' or el.name is 'desc' or el.name is 'title'
414 h1:NS_HTML, h2:NS_HTML, h3:NS_HTML, h4:NS_HTML, h5:NS_HTML, h6:NS_HTML
417 foster_parenting_targets = {
438 el_is_special = (e) ->
439 return special_elements[e.name] is e.namespace
441 adp_els = { address: NS_HTML, div: NS_HTML, p: NS_HTML }
442 el_is_special_not_adp = (el) ->
443 return special_elements[el.name] is el.namespace and adp_els[el.name] isnt el.namespace
447 altglyphdef: 'altGlyphDef'
448 altglyphitem: 'altGlyphItem'
449 animatecolor: 'animateColor'
450 animatemotion: 'animateMotion'
451 animatetransform: 'animateTransform'
454 fecolormatrix: 'feColorMatrix'
455 fecomponenttransfer: 'feComponentTransfer'
456 fecomposite: 'feComposite'
457 feconvolvematrix: 'feConvolveMatrix'
458 fediffuselighting: 'feDiffuseLighting'
459 fedisplacementmap: 'feDisplacementMap'
460 fedistantlight: 'feDistantLight'
461 fedropshadow: 'feDropShadow'
467 fegaussianblur: 'feGaussianBlur'
470 femergenode: 'feMergeNode'
471 femorphology: 'feMorphology'
473 fepointlight: 'fePointLight'
474 fespecularlighting: 'feSpecularLighting'
475 fespotlight: 'feSpotLight'
477 feturbulence: 'feTurbulence'
478 foreignobject: 'foreignObject'
480 lineargradient: 'linearGradient'
481 radialgradient: 'radialGradient'
484 svg_attribute_fixes = {
485 attributename: 'attributeName'
486 attributetype: 'attributeType'
487 basefrequency: 'baseFrequency'
488 baseprofile: 'baseProfile'
490 clippathunits: 'clipPathUnits'
491 contentscripttype: 'contentScriptType'
492 contentstyletype: 'contentStyleType'
493 diffuseconstant: 'diffuseConstant'
495 externalresourcesrequired: 'externalResourcesRequired'
496 # WHATWG removes this: filterres: 'filterRes'
497 filterunits: 'filterUnits'
499 gradienttransform: 'gradientTransform'
500 gradientunits: 'gradientUnits'
501 kernelmatrix: 'kernelMatrix'
502 kernelunitlength: 'kernelUnitLength'
503 keypoints: 'keyPoints'
504 keysplines: 'keySplines'
506 lengthadjust: 'lengthAdjust'
507 limitingconeangle: 'limitingConeAngle'
508 markerheight: 'markerHeight'
509 markerunits: 'markerUnits'
510 markerwidth: 'markerWidth'
511 maskcontentunits: 'maskContentUnits'
512 maskunits: 'maskUnits'
513 numoctaves: 'numOctaves'
514 pathlength: 'pathLength'
515 patterncontentunits: 'patternContentUnits'
516 patterntransform: 'patternTransform'
517 patternunits: 'patternUnits'
518 pointsatx: 'pointsAtX'
519 pointsaty: 'pointsAtY'
520 pointsatz: 'pointsAtZ'
521 preservealpha: 'preserveAlpha'
522 preserveaspectratio: 'preserveAspectRatio'
523 primitiveunits: 'primitiveUnits'
526 repeatcount: 'repeatCount'
527 repeatdur: 'repeatDur'
528 requiredextensions: 'requiredExtensions'
529 requiredfeatures: 'requiredFeatures'
530 specularconstant: 'specularConstant'
531 specularexponent: 'specularExponent'
532 spreadmethod: 'spreadMethod'
533 startoffset: 'startOffset'
534 stddeviation: 'stdDeviation'
535 stitchtiles: 'stitchTiles'
536 surfacescale: 'surfaceScale'
537 systemlanguage: 'systemLanguage'
538 tablevalues: 'tableValues'
541 textlength: 'textLength'
543 viewtarget: 'viewTarget'
544 xchannelselector: 'xChannelSelector'
545 ychannelselector: 'yChannelSelector'
546 zoomandpan: 'zoomAndPan'
548 foreign_attr_fixes = {
549 'xlink:actuate': 'xlink actuate'
550 'xlink:arcrole': 'xlink arcrole'
551 'xlink:href': 'xlink href'
552 'xlink:role': 'xlink role'
553 'xlink:show': 'xlink show'
554 'xlink:title': 'xlink title'
555 'xlink:type': 'xlink type'
556 'xml:base': 'xml base'
557 'xml:lang': 'xml lang'
558 'xml:space': 'xml space'
560 'xmlns:xlink': 'xmlns xlink'
562 adjust_mathml_attributes = (t) ->
564 if a[0] is 'definitionurl'
565 a[0] = 'definitionURL'
567 adjust_svg_attributes = (t) ->
569 if svg_attribute_fixes[a[0]]?
570 a[0] = svg_attribute_fixes[a[0]]
572 adjust_foreign_attributes = (t) ->
575 if foreign_attr_fixes[a[0]]?
576 a[0] = foreign_attr_fixes[a[0]]
579 # decode_named_char_ref()
581 # The list of named character references is _huge_ so ask the browser to decode
582 # for us instead of wasting bandwidth/space on including the table here.
584 # Pass without the "&" but with the ";" examples:
585 # for "&" pass "amp;"
586 # for "′" pass "x2032;"
589 textarea: document.createElement('textarea')
591 # TODO test this in IE8
592 decode_named_char_ref = (txt) ->
594 decoded = g_dncr.cache[txt]
595 return decoded if decoded?
596 g_dncr.textarea.innerHTML = txt
597 decoded = g_dncr.textarea.value
598 return null if decoded is txt
599 return g_dncr.cache[txt] = decoded
601 parse_html = (args) ->
603 cur = null # index of next char in txt to be parsed
604 # declare doc and tokenizer variables so they're in scope below
606 open_els = null # stack of open elements
607 afe = null # active formatting elements
608 template_ins_modes = null
610 original_ins_mode = null
612 tok_cur_tag = null # partially parsed tag
613 flag_scripting = null
614 flag_frameset_ok = null
616 flag_foster_parenting = null
617 form_element_pointer = null
618 temporary_buffer = null
619 pending_table_character_tokens = null
620 head_element_pointer = null
621 flag_fragment_parsing = null
622 context_element = null
632 console.log "Parse error at character #{cur} of #{txt.length}"
635 # http://www.w3.org/TR/html5/syntax.html#push-onto-the-list-of-active-formatting-elements
636 # "Noah's Ark clause" but with three
637 afe_push = (new_el) ->
640 if el.type is TYPE_AFE_MARKER
642 if el.name is new_el.name and el.namespace is new_el.namespace
645 unless new_el.attrs[k] is v
649 for k, v of new_el.attrs
650 unless el.attrs[k] is v
662 afe.unshift new_afe_marker()
665 # the functions below impliment the Tree Contstruction algorithm
666 # http://www.w3.org/TR/html5/syntax.html#tree-construction
668 # But first... the helpers
669 template_tag_is_open = ->
671 if el.name is 'template' and el.namespace is NS_HTML
674 is_in_scope_x = (tag_name, scope, namespace) ->
676 if el.name is tag_name and (namespace is null or namespace is el.namespace)
678 if scope[el.name] is el.namespace
681 is_in_scope_x_y = (tag_name, scope, scope2, namespace) ->
683 if el.name is tag_name and (namespace is null or namespace is el.namespace)
685 if scope[el.name] is el.namespace
687 if scope2[el.name] is el.namespace
691 applet: NS_HTML, caption: NS_HTML, html: NS_HTML, table: NS_HTML,
692 td: NS_HTML, th: NS_HTML, marquee: NS_HTML, object: NS_HTML,
695 mi: NS_MATHML, mo: NS_MATHML, mn: NS_MATHML, ms: NS_MATHML,
696 mtext: NS_MATHML, 'annotation-xml': NS_MATHML,
698 foreignObject: NS_SVG, desc: NS_SVG, title: NS_SVG
700 button_scopers = button: NS_HTML
701 li_scopers = ol: NS_HTML, ul: NS_HTML
702 table_scopers = html: NS_HTML, table: NS_HTML, template: NS_HTML
703 is_in_scope = (tag_name, namespace = null) ->
704 return is_in_scope_x tag_name, standard_scopers, namespace
705 is_in_button_scope = (tag_name, namespace = null) ->
706 return is_in_scope_x_y tag_name, standard_scopers, button_scopers, namespace
707 is_in_table_scope = (tag_name, namespace = null) ->
708 return is_in_scope_x tag_name, table_scopers, namespace
709 # aka is_in_list_item_scope
710 is_in_li_scope = (tag_name, namespace = null) ->
711 return is_in_scope_x_y tag_name, standard_scopers, li_scopers, namespace
712 is_in_select_scope = (tag_name, namespace = null) ->
714 if t.name is tag_name and (namespace is null or namespace is t.namespace)
716 if t.namespace isnt NS_HTML and t.name isnt 'optgroup' and t.name isnt 'option'
719 # this checks for a particular element, not by name
720 # this requires a namespace match
721 el_is_in_scope = (needle) ->
725 if standard_scopers[el.name] is el.namespace
729 clear_to_table_stopers = {
734 clear_stack_to_table_context = ->
736 if clear_to_table_stopers[open_els[0].name]?
740 clear_to_table_body_stopers = {
747 clear_stack_to_table_body_context = ->
749 if clear_to_table_body_stopers[open_els[0].name] is open_els[0].namespace
753 clear_to_table_row_stopers = {
758 clear_stack_to_table_row_context = ->
760 if clear_to_table_row_stopers[open_els[0].name]?
764 clear_afe_to_marker = ->
766 return unless afe.length > 0 # this happens in fragment case, ?spec error
768 if el.type is TYPE_AFE_MARKER
773 # http://www.w3.org/TR/html5/syntax.html#reset-the-insertion-mode-appropriately
775 # 1. Let last be false.
777 # 2. Let node be the last node in the stack of open elements.
779 node = open_els[node_i]
780 # 3. Loop: If node is the first node in the stack of open elements,
781 # then set last to true, and, if the parser was originally created as
782 # part of the HTML fragment parsing algorithm (fragment case) set node
783 # to the context element.
785 if node_i is open_els.length - 1
787 if flag_fragment_parsing
788 node = context_element
789 # 4. If node is a select element, run these substeps:
790 if node.name is 'select' and node.namespace is NS_HTML
791 # 1. If last is true, jump to the step below labeled done.
793 # 2. Let ancestor be node.
796 # 3. Loop: If ancestor is the first node in the stack of
797 # open elements, jump to the step below labeled done.
799 if ancestor_i is open_els.length - 1
801 # 4. Let ancestor be the node before ancestor in the stack
804 ancestor = open_els[ancestor_i]
805 # 5. If ancestor is a template node, jump to the step below
807 if ancestor.name is 'template' and ancestor.namespace is NS_HTML
809 # 6. If ancestor is a table node, switch the insertion mode
810 # to "in select in table" and abort these steps.
811 if ancestor.name is 'table' and ancestor.namespace is NS_HTML
812 ins_mode = ins_mode_in_select_in_table
814 # 7. Jump back to the step labeled loop.
815 # 8. Done: Switch the insertion mode to "in select" and abort
817 ins_mode = ins_mode_in_select
819 # 5. If node is a td or th element and last is false, then switch
820 # the insertion mode to "in cell" and abort these steps.
821 if (node.name is 'td' or node.name is 'th') and node.namespace is NS_HTML and last is false
822 ins_mode = ins_mode_in_cell
824 # 6. If node is a tr element, then switch the insertion mode to "in
825 # row" and abort these steps.
826 if node.name is 'tr' and node.namespace is NS_HTML
827 ins_mode = ins_mode_in_row
829 # 7. If node is a tbody, thead, or tfoot element, then switch the
830 # insertion mode to "in table body" and abort these steps.
831 if (node.name is 'tbody' or node.name is 'thead' or node.name is 'tfoot') and node.namespace is NS_HTML
832 ins_mode = ins_mode_in_table_body
834 # 8. If node is a caption element, then switch the insertion mode
835 # to "in caption" and abort these steps.
836 if node.name is 'caption' and node.namespace is NS_HTML
837 ins_mode = ins_mode_in_caption
839 # 9. If node is a colgroup element, then switch the insertion mode
840 # to "in column group" and abort these steps.
841 if node.name is 'colgroup' and node.namespace is NS_HTML
842 ins_mode = ins_mode_in_column_group
844 # 10. If node is a table element, then switch the insertion mode to
845 # "in table" and abort these steps.
846 if node.name is 'table' and node.namespace is NS_HTML
847 ins_mode = ins_mode_in_table
849 # 11. If node is a template element, then switch the insertion mode
850 # to the current template insertion mode and abort these steps.
851 if node.name is 'template' and node.namespace is NS_HTML
852 ins_mode = template_ins_modes[0]
854 # 12. If node is a head element and last is true, then switch the
855 # insertion mode to "in body" ("in body"! not "in head"!) and abort
856 # these steps. (fragment case)
857 if node.name is 'head' and node.namespace is NS_HTML and last
858 ins_mode = ins_mode_in_body
860 # 13. If node is a head element and last is false, then switch the
861 # insertion mode to "in head" and abort these steps.
862 if node.name is 'head' and node.namespace is NS_HTML and last is false
863 ins_mode = ins_mode_in_head
865 # 14. If node is a body element, then switch the insertion mode to
866 # "in body" and abort these steps.
867 if node.name is 'body' and node.namespace is NS_HTML
868 ins_mode = ins_mode_in_body
870 # 15. If node is a frameset element, then switch the insertion mode
871 # to "in frameset" and abort these steps. (fragment case)
872 if node.name is 'frameset' and node.namespace is NS_HTML
873 ins_mode = ins_mode_in_frameset
875 # 16. If node is an html element, run these substeps:
876 if node.name is 'html' and node.namespace is NS_HTML
877 # 1. If the head element pointer is null, switch the insertion
878 # mode to "before head" and abort these steps. (fragment case)
879 if head_element_pointer is null
880 ins_mode = ins_mode_before_head
882 # 2. Otherwise, the head element pointer is not null,
883 # switch the insertion mode to "after head" and abort these
885 ins_mode = ins_mode_after_head
887 # 17. If last is true, then switch the insertion mode to "in body"
888 # and abort these steps. (fragment case)
890 ins_mode = ins_mode_in_body
892 # 18. Let node now be the node before node in the stack of open
895 node = open_els[node_i]
896 # 19. Return to the step labeled loop.
901 # http://www.w3.org/TR/html5/syntax.html#adjusted-current-node
902 adjusted_current_node = ->
903 if open_els.length is 1 and flag_fragment_parsing
904 return context_element
907 # http://www.w3.org/TR/html5/syntax.html#reconstruct-the-active-formatting-elements
908 # this implementation is structured (mostly) as described at the link above.
909 # capitalized comments are the "labels" described at the link above.
911 return if afe.length is 0
912 if afe[0].type is TYPE_AFE_MARKER or afe[0] in open_els
917 if i is afe.length - 1
920 if afe[i].type is TYPE_AFE_MARKER or afe[i] in open_els
925 el = insert_html_element afe[i].token
931 # http://www.w3.org/TR/html5/syntax.html#adoption-agency-algorithm
932 # adoption agency algorithm
934 # http://www.w3.org/TR/html5/syntax.html#misnested-tags:-b-i-/b-/i
935 # http://www.w3.org/TR/html5/syntax.html#misnested-tags:-b-p-/b-/p
936 # http://www.w3.org/TR/html5/syntax.html#unclosed-formatting-elements
937 adoption_agency = (subject) ->
938 # this block implements tha W3C spec
939 # # 1. If the current node is an HTML element whose tag name is subject,
940 # # then run these substeps:
942 # # 1. Let element be the current node.
944 # # 2. Pop element off the stack of open elements.
946 # # 3. If element is also in the list of active formatting elements,
947 # # remove the element from the list.
949 # # 4. Abort the adoption agency algorithm.
950 # if open_els[0].name is subject and open_els[0].namespace is NS_HTML
951 # el = open_els.shift()
952 # # remove it from the list of active formatting elements (if found)
958 # WHATWG: https://html.spec.whatwg.org/multipage/syntax.html#adoption-agency-algorithm
959 # If the current node is an HTML element whose tag name is subject, and
960 # the current node is not in the list of active formatting elements,
961 # then pop the current node off the stack of open elements, and abort
963 if open_els[0].name is subject and open_els[0].namespace is NS_HTML
964 # remove it from the list of active formatting elements (if found)
980 # 5. Let formatting element be the last element in the list of
981 # active formatting elements that: is between the end of the list
982 # and the last scope marker in the list, if any, or the start of
983 # the list otherwise, and has the tag name subject.
985 for t, fe_of_afe in afe
986 if t.type is TYPE_AFE_MARKER
991 # If there is no such element, then abort these steps and instead
992 # act as described in the "any other end tag" entry above.
994 in_body_any_other_end_tag subject
996 # 6. If formatting element is not in the stack of open elements,
997 # then this is a parse error; remove the element from the list, and
1000 for t, fe_of_open_els in open_els
1006 # "remove it from the list" must mean afe, since it's not in open_els
1007 afe.splice fe_of_afe, 1
1009 # 7. If formatting element is in the stack of open elements, but
1010 # the element is not in scope, then this is a parse error; abort
1012 unless el_is_in_scope fe
1015 # 8. If formatting element is not the current node, this is a parse
1016 # error. (But do not abort these steps.)
1017 unless open_els[0] is fe
1020 # 9. Let furthest block be the topmost node in the stack of open
1021 # elements that is lower in the stack than formatting element, and
1022 # is an element in the special category. There might not be one.
1024 fb_of_open_els = null
1025 for t, i in open_els
1031 # and continue, to see if there's one that's more "topmost"
1032 # 10. If there is no furthest block, then the UA must first pop all
1033 # the nodes from the bottom of the stack of open elements, from the
1034 # current node up to and including formatting element, then remove
1035 # formatting element from the list of active formatting elements,
1036 # and finally abort these steps.
1039 t = open_els.shift()
1041 afe.splice fe_of_afe, 1
1043 # 11. Let common ancestor be the element immediately above
1044 # formatting element in the stack of open elements.
1045 ca = open_els[fe_of_open_els + 1] # common ancestor
1047 node_above = open_els[fb_of_open_els + 1] # next node if node isn't in open_els anymore
1048 # 12. Let a bookmark note the position of formatting element in the list of active formatting elements relative to the elements on either side of it in the list.
1049 bookmark = new_aaa_bookmark()
1052 afe.splice i, 0, bookmark
1054 node = last_node = fb
1058 # 3. Let node be the element immediately above node in the
1059 # stack of open elements, or if node is no longer in the stack
1060 # of open elements (e.g. because it got removed by this
1061 # algorithm), the element that was immediately above node in
1062 # the stack of open elements before node was removed.
1064 for t, i in open_els
1066 node_next = open_els[i + 1]
1068 node = node_next ? node_above
1069 # TODO make sure node_above gets re-set if/when node is removed from open_els
1071 # 4. If node is formatting element, then go to the next step in
1072 # the overall algorithm.
1075 # 5. If inner loop counter is greater than three and node is in
1076 # the list of active formatting elements, then remove node from
1077 # the list of active formatting elements.
1086 # 6. If node is not in the list of active formatting elements,
1087 # then remove node from the stack of open elements and then go
1088 # back to the step labeled inner loop.
1090 for t, i in open_els
1092 node_above = open_els[i + 1]
1093 open_els.splice i, 1
1096 # 7. create an element for the token for which the element node
1097 # was created, in the HTML namespace, with common ancestor as
1098 # the intended parent; replace the entry for node in the list
1099 # of active formatting elements with an entry for the new
1100 # element, replace the entry for node in the stack of open
1101 # elements with an entry for the new element, and let node be
1103 new_node = token_to_element node.token, NS_HTML, ca
1108 for t, i in open_els
1110 node_above = open_els[i + 1]
1111 open_els[i] = new_node
1114 # 8. If last node is furthest block, then move the
1115 # aforementioned bookmark to be immediately after the new node
1116 # in the list of active formatting elements.
1124 # "after" means lower
1125 afe.splice i, 0, bookmark # "after as <-
1127 # 9. Insert last node into node, first removing it from its
1128 # previous parent node if any.
1129 if last_node.parent?
1130 for c, i in last_node.parent.children
1132 last_node.parent.children.splice i, 1
1134 node.children.push last_node
1135 last_node.parent = node
1136 # 10. Let last node be node.
1138 # 11. Return to the step labeled inner loop.
1139 # 14. Insert whatever last node ended up being in the previous step
1140 # at the appropriate place for inserting a node, but using common
1141 # ancestor as the override target.
1143 # In the case where fe is immediately followed by fb:
1144 # * inner loop exits out early (node==fe)
1146 # * last_node is still in the tree (not a duplicate)
1147 if last_node.parent?
1148 for c, i in last_node.parent.children
1150 last_node.parent.children.splice i, 1
1152 # can't use standard insert token thing, because it's already in
1153 # open_els and must stay at it's current position in open_els
1154 dest = adjusted_insertion_location ca
1155 dest[0].children.splice dest[1], 0, last_node
1156 last_node.parent = dest[0]
1157 # 15. Create an element for the token for which formatting element
1158 # was created, in the HTML namespace, with furthest block as the
1160 new_element = token_to_element fe.token, NS_HTML, fb
1161 # 16. Take all of the child nodes of furthest block and append them
1162 # to the element created in the last step.
1163 while fb.children.length
1164 t = fb.children.shift()
1165 t.parent = new_element
1166 new_element.children.push t
1167 # 17. Append that new element to furthest block.
1168 new_element.parent = fb
1169 fb.children.push new_element
1170 # 18. Remove formatting element from the list of active formatting
1171 # elements, and insert the new element into the list of active
1172 # formatting elements at the position of the aforementioned
1180 afe[i] = new_element
1182 # 19. Remove formatting element from the stack of open elements,
1183 # and insert the new element into the stack of open elements
1184 # immediately below the position of furthest block in that stack.
1185 for t, i in open_els
1187 open_els.splice i, 1
1189 for t, i in open_els
1191 open_els.splice i, 0, new_element
1193 # 20. Jump back to the step labeled outer loop.
1196 # http://www.w3.org/TR/html5/syntax.html#close-a-p-element
1197 close_p_element = ->
1198 generate_implied_end_tags 'p' # arg is exception
1199 unless open_els[0].name is 'p' and open_els[0].namespace is NS_HTML
1201 while open_els.length > 1 # just in case
1202 el = open_els.shift()
1203 if el.name is 'p' and el.namespace is NS_HTML
1206 close_p_if_in_button_scope = ->
1207 if is_in_button_scope 'p', NS_HTML
1211 # http://www.w3.org/TR/html5/syntax.html#insert-a-character
1212 # aka insert_a_character = (t) ->
1213 insert_character = (t) ->
1214 dest = adjusted_insertion_location()
1215 # fixfull check for Document node
1217 prev = dest[0].children[dest[1] - 1]
1218 if prev.type is TYPE_TEXT
1221 dest[0].children.splice dest[1], 0, t
1224 # 8.2.5 http://www.w3.org/TR/html5/syntax.html#tree-construction
1225 process_token = (t) ->
1226 acn = adjusted_current_node()
1230 if acn.namespace is NS_HTML
1233 if is_mathml_text_integration_point(acn)
1234 if t.type is TYPE_START_TAG and not (t.name is 'mglyph' or t.name is 'malignmark')
1237 if t.type is TYPE_TEXT
1240 if acn.namespace is NS_MATHML and acn.name is 'annotation-xml' and t.type is TYPE_START_TAG and t.name is 'svg'
1243 if is_html_integration acn
1244 if t.type is TYPE_START_TAG or t.type is TYPE_TEXT
1247 if t.type is TYPE_EOF
1250 in_foreign_content t
1254 # http://www.w3.org/TR/html5/syntax.html#creating-and-inserting-nodes
1255 # http://www.w3.org/TR/html5/syntax.html#appropriate-place-for-inserting-a-node
1256 adjusted_insertion_location = (override_target = null) ->
1257 # 1. If there was an override target specified, then let target be the
1260 target = override_target
1261 else # Otherwise, let target be the current node.
1262 target = open_els[0]
1263 # 2. Determine the adjusted insertion location using the first matching
1264 # steps from the following list:
1266 # If foster parenting is enabled and target is a table, tbody, tfoot,
1267 # thead, or tr element Foster parenting happens when content is
1268 # misnested in tables.
1269 if flag_foster_parenting and foster_parenting_targets[target.name] is target.namespace
1270 loop # once. this is here so we can ``break`` to "abort these substeps"
1271 # 1. Let last template be the last template element in the
1272 # stack of open elements, if any.
1273 last_template = null
1274 last_template_i = null
1275 for el, i in open_els
1276 if el.name is 'template' and el.namespace is NS_HTML
1280 # 2. Let last table be the last table element in the stack of
1281 # open elements, if any.
1284 for el, i in open_els
1285 if el.name is 'table' and el.namespace is NS_HTML
1289 # 3. If there is a last template and either there is no last
1290 # table, or there is one, but last template is lower (more
1291 # recently added) than last table in the stack of open
1292 # elements, then: let adjusted insertion location be inside
1293 # last template's template contents, after its last child (if
1294 # any), and abort these substeps.
1295 if last_template and (last_table is null or last_template_i < last_table_i)
1296 target = last_template # fixfull should be it's contents
1297 target_i = target.children.length
1299 # 4. If there is no last table, then let adjusted insertion
1300 # location be inside the first element in the stack of open
1301 # elements (the html element), after its last child (if any),
1302 # and abort these substeps. (fragment case)
1303 if last_table is null
1305 target = open_els[open_els.length - 1]
1306 target_i = target.children.length
1308 # 5. If last table has a parent element, then let adjusted
1309 # insertion location be inside last table's parent element,
1310 # immediately before last table, and abort these substeps.
1311 if last_table.parent?
1312 for c, i in last_table.parent.children
1314 target = last_table.parent
1318 # 6. Let previous element be the element immediately above last
1319 # table in the stack of open elements.
1321 # huh? how could it not have a parent?
1322 previous_element = open_els[last_table_i + 1]
1323 # 7. Let adjusted insertion location be inside previous
1324 # element, after its last child (if any).
1325 target = previous_element
1326 target_i = target.children.length
1327 # Note: These steps are involved in part because it's possible
1328 # for elements, the table element in this case in particular,
1329 # to have been moved by a script around in the DOM, or indeed
1330 # removed from the DOM entirely, after the element was inserted
1332 break # don't really loop
1334 # Otherwise Let adjusted insertion location be inside target, after
1335 # its last child (if any).
1336 target_i = target.children.length
1338 # 3. If the adjusted insertion location is inside a template element,
1339 # let it instead be inside the template element's template contents,
1340 # after its last child (if any).
1341 # fixfull (template)
1343 # 4. Return the adjusted insertion location.
1344 return [target, target_i]
1346 # http://www.w3.org/TR/html5/syntax.html#create-an-element-for-the-token
1347 # aka create_an_element_for_token
1348 token_to_element = (t, namespace, intended_parent) ->
1349 # convert attributes into a hash
1352 attrs[a[0]] = a[1] # TODO check what to do with dupilcate attrs
1353 el = new Node TYPE_TAG, name: t.name, namespace: namespace, attrs: attrs, token: t
1355 # TODO 2. If the newly created element has an xmlns attribute in the
1356 # XMLNS namespace whose value is not exactly the same as the element's
1357 # namespace, that is a parse error. Similarly, if the newly created
1358 # element has an xmlns:xlink attribute in the XMLNS namespace whose
1359 # value is not the XLink Namespace, that is a parse error.
1361 # fixfull: the spec says stuff about form pointers and ownerDocument
1365 # http://www.w3.org/TR/html5/syntax.html#insert-a-foreign-element
1366 insert_foreign_element = (token, namespace) ->
1367 ail = adjusted_insertion_location()
1370 el = token_to_element token, namespace, ail_el
1371 # TODO skip this next step if it's broken (eg ail_el is document with child already)
1373 ail_el.children.splice ail_i, 0, el
1376 # http://www.w3.org/TR/html5/syntax.html#insert-an-html-element
1377 insert_html_element = (token) ->
1378 return insert_foreign_element token, NS_HTML
1380 # http://www.w3.org/TR/html5/syntax.html#insert-a-comment
1381 # position should be [node, index_within_children]
1382 insert_comment = (t, position = null) ->
1383 position ?= adjusted_insertion_location()
1384 position[0].children.splice position[1], 0, t
1388 # http://www.w3.org/TR/html5/syntax.html#generic-raw-text-element-parsing-algorithm
1389 parse_generic_raw_text = (t) ->
1390 insert_html_element t
1391 tok_state = tok_state_rawtext
1392 original_ins_mode = ins_mode
1393 ins_mode = ins_mode_text
1395 parse_generic_rcdata_text = (t) ->
1396 insert_html_element t
1397 tok_state = tok_state_rcdata
1398 original_ins_mode = ins_mode
1399 ins_mode = ins_mode_text
1402 # 8.2.5.3 http://www.w3.org/TR/html5/syntax.html#closing-elements-that-have-implied-end-tags
1403 # http://www.w3.org/TR/html5/syntax.html#generate-implied-end-tags
1404 generate_implied_end_tags = (except = null) ->
1405 while end_tag_implied[open_els[0].name] is open_els[0].namespace and open_els[0].name isnt except
1409 # 8.2.5.4 The rules for parsing tokens in HTML content
1410 # http://www.w3.org/TR/html5/syntax.html#parsing-main-inhtml
1412 # 8.2.5.4.1 The "initial" insertion mode
1413 # http://www.w3.org/TR/html5/syntax.html#the-initial-insertion-mode
1414 is_quirks_yes_doctype = (t) ->
1415 if t.flag 'force-quirks'
1417 if t.name isnt 'html'
1419 if t.public_identifier?
1420 pi = t.public_identifier.toLowerCase()
1421 for p in quirks_yes_pi_prefixes
1422 if pi.substr(0, p.length) is p
1424 if pi is '-//w3o//dtd w3 html strict 3.0//en//' or pi is '-/w3c/dtd html 4.0 transitional/en' or pi is 'html'
1426 if t.system_identifier?
1427 if t.system_identifier.toLowerCase() is 'http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd'
1429 else if t.public_identifier?
1430 # already did this: pi = t.public_identifier.toLowerCase()
1431 if pi.substr(0, 32) is '-//w3c//dtd html 4.01 frameset//' or pi.substr(0, 36) is '-//w3c//dtd html 4.01 transitional//'
1434 is_quirks_limited_doctype = (t) ->
1435 if t.public_identifier?
1436 pi = t.public_identifier.toLowerCase()
1437 if pi.substr(0, 32) is '-//w3c//dtd xhtml 1.0 frameset//' or pi.substr(0, 36) is '-//w3c//dtd xhtml 1.0 transitional//'
1439 if t.system_identifier?
1440 if pi.substr(0, 32) is '-//w3c//dtd html 4.01 frameset//' or pi.substr(0, 36) is '-//w3c//dtd html 4.01 transitional//'
1443 ins_mode_initial = (t) ->
1446 if t.type is TYPE_COMMENT
1450 if t.type is TYPE_DOCTYPE
1451 # fixfull syntax error from first paragraph and following bullets
1452 # fixfull set doc.doctype
1453 # fixfull is the "not an iframe srcdoc" thing relevant?
1454 if is_quirks_yes_doctype t
1455 doc.flag 'quirks mode', QUIRKS_YES
1456 else if is_quirks_limited_doctype t
1457 doc.flag 'quirks mode', QUIRKS_LIMITED
1459 ins_mode = ins_mode_before_html
1462 # fixfull not iframe srcdoc?
1464 doc.flag 'quirks mode', QUIRKS_YES
1465 ins_mode = ins_mode_before_html
1469 # 8.2.5.4.2 http://www.w3.org/TR/html5/syntax.html#the-before-html-insertion-mode
1470 ins_mode_before_html = (t) ->
1471 if t.type is TYPE_DOCTYPE
1474 if t.type is TYPE_COMMENT
1479 if t.type is TYPE_START_TAG and t.name is 'html'
1480 el = token_to_element t, NS_HTML, doc
1481 doc.children.push el
1483 open_els.unshift(el)
1484 # fixfull (big paragraph in spec about manifest, fragment, urls, etc)
1485 ins_mode = ins_mode_before_head
1487 if t.type is TYPE_END_TAG
1488 if t.name is 'head' or t.name is 'body' or t.name is 'html' or t.name is 'br'
1489 # fall through to "anything else"
1494 el = token_to_element new_open_tag('html'), NS_HTML, doc
1495 doc.children.push el
1498 # ?fixfull browsing context
1499 ins_mode = ins_mode_before_head
1503 # 8.2.5.4.3 http://www.w3.org/TR/html5/syntax.html#the-before-head-insertion-mode
1504 ins_mode_before_head = (t) ->
1507 if t.type is TYPE_COMMENT
1510 if t.type is TYPE_DOCTYPE
1513 if t.type is TYPE_START_TAG and t.name is 'html'
1516 if t.type is TYPE_START_TAG and t.name is 'head'
1517 el = insert_html_element t
1518 head_element_pointer = el
1519 ins_mode = ins_mode_in_head
1521 if t.type is TYPE_END_TAG
1522 if t.name is 'head' or t.name is 'body' or t.name is 'html' or t.name is 'br'
1523 # fall through to Anything else below
1528 el = insert_html_element new_open_tag 'head'
1529 head_element_pointer = el
1530 ins_mode = ins_mode_in_head
1534 # 8.2.5.4.4 http://www.w3.org/TR/html5/syntax.html#parsing-main-inhead
1535 ins_mode_in_head_else = (t) -> # factored out for same-as-spec flow control
1536 open_els.shift() # spec says this will be a 'head' node
1537 ins_mode = ins_mode_after_head
1540 ins_mode_in_head = (t) ->
1541 if t.type is TYPE_TEXT and (t.text is "\t" or t.text is "\n" or t.text is "\u000c" or t.text is ' ')
1544 if t.type is TYPE_COMMENT
1547 if t.type is TYPE_DOCTYPE
1550 if t.type is TYPE_START_TAG and t.name is 'html'
1553 if t.type is TYPE_START_TAG and (t.name is 'base' or t.name is 'basefont' or t.name is 'bgsound' or t.name is 'link')
1554 el = insert_html_element t
1556 t.acknowledge_self_closing()
1558 if t.type is TYPE_START_TAG and t.name is 'meta'
1559 el = insert_html_element t
1561 t.acknowledge_self_closing()
1562 # fixfull encoding stuff
1564 if t.type is TYPE_START_TAG and t.name is 'title'
1565 parse_generic_rcdata_text t
1567 if t.type is TYPE_START_TAG and ((t.name is 'noscript' and flag_scripting) or t.name is 'noframes' or t.name is 'style')
1568 parse_generic_raw_text t
1570 if t.type is TYPE_START_TAG and t.name is 'noscript' and flag_scripting is false
1571 insert_html_element t
1572 ins_mode = ins_mode_in_head_noscript
1574 if t.type is TYPE_START_TAG and t.name is 'script'
1575 ail = adjusted_insertion_location()
1576 el = token_to_element t, NS_HTML, ail
1577 el.flag 'parser-inserted', true
1578 # fixfull frament case
1579 ail[0].children.splice ail[1], 0, el
1581 tok_state = tok_state_script_data
1582 original_ins_mode = ins_mode # make sure orig... is defined
1583 ins_mode = ins_mode_text
1585 if t.type is TYPE_END_TAG and t.name is 'head'
1586 open_els.shift() # will be a head element... spec says so
1587 ins_mode = ins_mode_after_head
1589 if t.type is TYPE_END_TAG and (t.name is 'body' or t.name is 'html' or t.name is 'br')
1590 ins_mode_in_head_else t
1592 if t.type is TYPE_START_TAG and t.name is 'template'
1593 insert_html_element t
1595 flag_frameset_ok = false
1596 ins_mode = ins_mode_in_template
1597 template_ins_modes.unshift ins_mode_in_template
1599 if t.type is TYPE_END_TAG and t.name is 'template'
1600 if template_tag_is_open()
1601 generate_implied_end_tags
1602 if open_els[0].name isnt 'template'
1605 el = open_els.shift()
1606 if el.name is 'template' and el.namespace is NS_HTML
1608 clear_afe_to_marker()
1609 template_ins_modes.shift()
1614 if (t.type is TYPE_START_TAG and t.name is 'head') or t.type is TYPE_END_TAG
1617 ins_mode_in_head_else t
1620 # 8.2.5.4.5 http://www.w3.org/TR/html5/syntax.html#parsing-main-inheadnoscript
1621 ins_mode_in_head_noscript_else = (t) ->
1624 ins_mode = ins_mode_in_head
1627 ins_mode_in_head_noscript = (t) ->
1628 if t.type is TYPE_DOCTYPE
1631 if t.type is TYPE_START_TAG and t.name is 'html'
1634 if t.type is TYPE_END_TAG and t.name is 'noscript'
1636 ins_mode = ins_mode_in_head
1638 if is_space_tok(t) or t.type is TYPE_COMMENT or (t.type is TYPE_START_TAG and (t.name is 'basefont' or t.name is 'bgsound' or t.name is 'link' or t.name is 'meta' or t.name is 'noframes' or t.name is 'style'))
1641 if t.type is TYPE_END_TAG and t.name is 'br'
1642 ins_mode_in_head_noscript_else t
1644 if (t.type is TYPE_START_TAG and (t.name is 'head' or t.name is 'noscript')) or t.type is TYPE_END_TAG
1648 ins_mode_in_head_noscript_else t
1651 # 8.2.5.4.6 http://www.w3.org/TR/html5/syntax.html#the-after-head-insertion-mode
1652 ins_mode_after_head_else = (t) ->
1653 body_tok = new_open_tag 'body'
1654 insert_html_element body_tok
1655 ins_mode = ins_mode_in_body
1658 ins_mode_after_head = (t) ->
1662 if t.type is TYPE_COMMENT
1665 if t.type is TYPE_DOCTYPE
1668 if t.type is TYPE_START_TAG and t.name is 'html'
1671 if t.type is TYPE_START_TAG and t.name is 'body'
1672 insert_html_element t
1673 flag_frameset_ok = false
1674 ins_mode = ins_mode_in_body
1676 if t.type is TYPE_START_TAG and t.name is 'frameset'
1677 insert_html_element t
1678 ins_mode = ins_mode_in_frameset
1680 if t.type is TYPE_START_TAG and (t.name is 'base' or t.name is 'basefont' or t.name is 'bgsound' or t.name is 'link' or t.name is 'meta' or t.name is 'noframes' or t.name is 'script' or t.name is 'style' or t.name is 'template' or t.name is 'title')
1682 open_els.unshift head_element_pointer
1684 for el, i in open_els
1685 if el is head_element_pointer
1686 open_els.splice i, 1
1689 if t.type is TYPE_END_TAG and t.name is 'template'
1692 if t.type is TYPE_END_TAG and (t.name is 'body' or t.name is 'html' or t.name is 'br')
1693 ins_mode_after_head_else t
1695 if (t.type is TYPE_START_TAG and t.name is 'head') or t.type is TYPE_END_TAG
1699 ins_mode_after_head_else t
1702 # 8.2.5.4.7 http://www.w3.org/TR/html5/syntax.html#parsing-main-inbody
1703 in_body_any_other_end_tag = (name) -> # factored out because adoption agency calls it
1706 if node.name is name and node.namespace is NS_HTML
1707 generate_implied_end_tags name # arg is exception
1708 unless node is open_els[0]
1711 el = open_els.shift()
1714 if special_elements[node.name] is node.namespace
1717 for el, i in open_els
1719 node = open_els[i + 1]
1722 ins_mode_in_body = (t) ->
1723 if t.type is TYPE_TEXT and t.text is "\u0000"
1730 if t.type is TYPE_TEXT
1733 flag_frameset_ok = false
1735 if t.type is TYPE_COMMENT
1738 if t.type is TYPE_DOCTYPE
1741 if t.type is TYPE_START_TAG and t.name is 'html'
1743 return if template_tag_is_open()
1744 root_attrs = open_els[open_els.length - 1].attrs
1746 root_attrs[a[0]] = a[1] unless root_attrs[a[0]]?
1749 if (t.type is TYPE_START_TAG and (t.name is 'base' or t.name is 'basefont' or t.name is 'bgsound' or t.name is 'link' or t.name is 'meta' or t.name is 'noframes' or t.name is 'script' or t.name is 'style' or t.name is 'template' or t.name is 'title')) or (t.type is TYPE_END_TAG and t.name is 'template')
1752 if t.type is TYPE_START_TAG and t.name is 'body'
1754 return if open_els.length < 2
1755 second = open_els[open_els.length - 2]
1756 return unless second.namespace is NS_HTML
1757 return unless second.name is 'body'
1758 return if template_tag_is_open()
1759 flag_frameset_ok = false
1761 second.attrs[a[0]] = a[1] unless second.attrs[a[0]]?
1763 if t.type is TYPE_START_TAG and t.name is 'frameset'
1765 return if open_els.length < 2
1766 second_i = open_els.length - 2
1767 second = open_els[second_i]
1768 return unless second.namespace is NS_HTML
1769 return unless second.name is 'body'
1770 if flag_frameset_ok is false
1773 for el, i in second.parent.children
1775 second.parent.children.splice i, 1
1777 open_els.splice second_i, 1
1778 # pop everything except the "root html element"
1779 while open_els.length > 1
1781 insert_html_element t
1782 ins_mode = ins_mode_in_frameset
1784 if t.type is TYPE_EOF
1786 dd:NS_HTML, dt:NS_HTML, li:NS_HTML, p:NS_HTML, tbody:NS_HTML,
1787 td:NS_HTML, tfoot:NS_HTML, th:NS_HTML, thead:NS_HTML,
1788 tr:NS_HTML, body:NS_HTML, html:NS_HTML,
1791 unless ok_tags[t.name] is el.namespace
1794 if template_ins_modes.length > 0
1795 ins_mode_in_template t
1799 if t.type is TYPE_END_TAG and t.name is 'body'
1800 unless is_in_scope 'body', NS_HTML
1804 dd:NS_HTML, dt:NS_HTML, li:NS_HTML, optgroup:NS_HTML,
1805 option:NS_HTML, p:NS_HTML, rb:NS_HTML, rp:NS_HTML, rt:NS_HTML,
1806 rtc:NS_HTML, tbody:NS_HTML, td:NS_HTML, tfoot:NS_HTML,
1807 th:NS_HTML, thead:NS_HTML, tr:NS_HTML, body:NS_HTML,
1811 unless ok_tags[t.name] is el.namespace
1814 ins_mode = ins_mode_after_body
1816 if t.type is TYPE_END_TAG and t.name is 'html'
1817 unless is_in_scope 'body', NS_HTML
1821 dd:NS_HTML, dt:NS_HTML, li:NS_HTML, optgroup:NS_HTML,
1822 option:NS_HTML, p:NS_HTML, rb:NS_HTML, rp:NS_HTML, rt:NS_HTML,
1823 rtc:NS_HTML, tbody:NS_HTML, td:NS_HTML, tfoot:NS_HTML,
1824 th:NS_HTML, thead:NS_HTML, tr:NS_HTML, body:NS_HTML,
1828 unless ok_tags[t.name] is el.namespace
1831 ins_mode = ins_mode_after_body
1834 if t.type is TYPE_START_TAG and (t.name is 'address' or t.name is 'article' or t.name is 'aside' or t.name is 'blockquote' or t.name is 'center' or t.name is 'details' or t.name is 'dialog' or t.name is 'dir' or t.name is 'div' or t.name is 'dl' or t.name is 'fieldset' or t.name is 'figcaption' or t.name is 'figure' or t.name is 'footer' or t.name is 'header' or t.name is 'hgroup' or t.name is 'main' or t.name is 'nav' or t.name is 'ol' or t.name is 'p' or t.name is 'section' or t.name is 'summary' or t.name is 'ul')
1835 close_p_if_in_button_scope()
1836 insert_html_element t
1838 if t.type is TYPE_START_TAG and h_tags[t.name]?
1839 close_p_if_in_button_scope()
1840 if h_tags[open_els[0].name] is open_els[0].namespace
1843 insert_html_element t
1845 if t.type is TYPE_START_TAG and (t.name is 'pre' or t.name is 'listing')
1846 close_p_if_in_button_scope()
1847 insert_html_element t
1848 eat_next_token_if_newline()
1849 flag_frameset_ok = false
1851 if t.type is TYPE_START_TAG and t.name is 'form'
1852 unless form_element_pointer is null or template_tag_is_open()
1855 close_p_if_in_button_scope()
1856 el = insert_html_element t
1857 unless template_tag_is_open()
1858 form_element_pointer = el
1860 if t.type is TYPE_START_TAG and t.name is 'li'
1861 flag_frameset_ok = false
1862 for node in open_els
1863 if node.name is 'li' and node.namespace is NS_HTML
1864 generate_implied_end_tags 'li' # arg is exception
1865 if open_els[0].name isnt 'li' or open_els[0].namespace isnt NS_HTML
1868 el = open_els.shift()
1869 if el.name is 'li' and el.namespace is NS_HTML
1872 if el_is_special_not_adp node
1874 close_p_if_in_button_scope()
1875 insert_html_element t
1877 if t.type is TYPE_START_TAG and (t.name is 'dd' or t.name is 'dt')
1878 flag_frameset_ok = false
1879 for node in open_els
1880 if node.name is 'dd' and node.namespace is NS_HTML
1881 generate_implied_end_tags 'dd' # arg is exception
1882 if open_els[0].name isnt 'dd' or open_els[0].namespace isnt NS_HTML
1885 el = open_els.shift()
1886 if el.name is 'dd' and el.namespace is NS_HTML
1889 if node.name is 'dt' and node.namespace is NS_HTML
1890 generate_implied_end_tags 'dt' # arg is exception
1891 if open_els[0].name isnt 'dt' or open_els[0].namespace isnt NS_HTML
1894 el = open_els.shift()
1895 if el.name is 'dt' and el.namespace is NS_HTML
1898 if el_is_special_not_adp node
1900 close_p_if_in_button_scope()
1901 insert_html_element t
1903 if t.type is TYPE_START_TAG and t.name is 'plaintext'
1904 close_p_if_in_button_scope()
1905 insert_html_element t
1906 tok_state = tok_state_plaintext
1908 if t.type is TYPE_START_TAG and t.name is 'button'
1909 if is_in_scope 'button', NS_HTML
1911 generate_implied_end_tags()
1913 el = open_els.shift()
1914 if el.name is 'button' and el.namespace is NS_HTML
1917 insert_html_element t
1918 flag_frameset_ok = false
1920 if t.type is TYPE_END_TAG and (t.name is 'address' or t.name is 'article' or t.name is 'aside' or t.name is 'blockquote' or t.name is 'button' or t.name is 'center' or t.name is 'details' or t.name is 'dialog' or t.name is 'dir' or t.name is 'div' or t.name is 'dl' or t.name is 'fieldset' or t.name is 'figcaption' or t.name is 'figure' or t.name is 'footer' or t.name is 'header' or t.name is 'hgroup' or t.name is 'listing' or t.name is 'main' or t.name is 'nav' or t.name is 'ol' or t.name is 'pre' or t.name is 'section' or t.name is 'summary' or t.name is 'ul')
1921 unless is_in_scope t.name, NS_HTML
1924 generate_implied_end_tags()
1925 unless open_els[0].name is t.name and open_els[0].namespace is NS_HTML
1928 el = open_els.shift()
1929 if el.name is t.name and el.namespace is NS_HTML
1932 if t.type is TYPE_END_TAG and t.name is 'form'
1933 unless template_tag_is_open()
1934 node = form_element_pointer
1935 form_element_pointer = null
1936 if node is null or not el_is_in_scope node
1939 generate_implied_end_tags()
1940 if open_els[0] isnt node
1942 for el, i in open_els
1944 open_els.splice i, 1
1947 unless is_in_scope 'form', NS_HTML
1950 generate_implied_end_tags()
1951 if open_els[0].name isnt 'form' or open_els[0].namespace isnt NS_HTML
1954 el = open_els.shift()
1955 if el.name is 'form' and el.namespace is NS_HTML
1958 if t.type is TYPE_END_TAG and t.name is 'p'
1959 unless is_in_button_scope 'p', NS_HTML
1961 insert_html_element new_open_tag 'p'
1964 if t.type is TYPE_END_TAG and t.name is 'li'
1965 unless is_in_li_scope 'li', NS_HTML
1968 generate_implied_end_tags 'li' # arg is exception
1969 if open_els[0].name isnt 'li' or open_els[0].namespace isnt NS_HTML
1972 el = open_els.shift()
1973 if el.name is 'li' and el.namespace is NS_HTML
1976 if t.type is TYPE_END_TAG and (t.name is 'dd' or t.name is 'dt')
1977 unless is_in_scope t.name, NS_HTML
1980 generate_implied_end_tags t.name # arg is exception
1981 if open_els[0].name isnt t.name or open_els[0].namespace isnt NS_HTML
1984 el = open_els.shift()
1985 if el.name is t.name and el.namespace is NS_HTML
1988 if t.type is TYPE_END_TAG and h_tags[t.name]?
1991 if h_tags[el.name] is el.namespace
1994 if standard_scopers[el.name] is el.namespace
1999 generate_implied_end_tags()
2000 if open_els[0].name isnt t.name or open_els[0].namespace isnt NS_HTML
2003 el = open_els.shift()
2004 if h_tags[el.name] is el.namespace
2008 if t.type is TYPE_START_TAG and t.name is 'a'
2009 # If the list of active formatting elements contains an a element
2010 # between the end of the list and the last marker on the list (or
2011 # the start of the list if there is no marker on the list), then
2012 # this is a parse error; run the adoption agency algorithm for the
2013 # tag name "a", then remove that element from the list of active
2014 # formatting elements and the stack of open elements if the
2015 # adoption agency algorithm didn't already remove it (it might not
2016 # have if the element is not in table scope).
2019 if el.type is TYPE_AFE_MARKER
2021 if el.name is 'a' and el.namespace is NS_HTML
2029 for el, i in open_els
2031 open_els.splice i, 1
2033 el = insert_html_element t
2036 if t.type is TYPE_START_TAG and (t.name is 'b' or t.name is 'big' or t.name is 'code' or t.name is 'em' or t.name is 'font' or t.name is 'i' or t.name is 's' or t.name is 'small' or t.name is 'strike' or t.name is 'strong' or t.name is 'tt' or t.name is 'u')
2038 el = insert_html_element t
2041 if t.type is TYPE_START_TAG and t.name is 'nobr'
2043 if is_in_scope 'nobr', NS_HTML
2045 adoption_agency 'nobr'
2047 el = insert_html_element t
2050 if t.type is TYPE_END_TAG and (t.name is 'a' or t.name is 'b' or t.name is 'big' or t.name is 'code' or t.name is 'em' or t.name is 'font' or t.name is 'i' or t.name is 'nobr' or t.name is 's' or t.name is 'small' or t.name is 'strike' or t.name is 'strong' or t.name is 'tt' or t.name is 'u')
2051 adoption_agency t.name
2053 if t.type is TYPE_START_TAG and (t.name is 'applet' or t.name is 'marquee' or t.name is 'object')
2055 insert_html_element t
2057 flag_frameset_ok = false
2059 if t.type is TYPE_END_TAG and (t.name is 'applet' or t.name is 'marquee' or t.name is 'object')
2060 unless is_in_scope t.name, NS_HTML
2063 generate_implied_end_tags()
2064 if open_els[0].name isnt t.name or open_els[0].namespace isnt NS_HTML
2067 el = open_els.shift()
2068 if el.name is t.name and el.namespace is NS_HTML
2070 clear_afe_to_marker()
2072 if t.type is TYPE_START_TAG and t.name is 'table'
2073 unless doc.flag('quirks mode') is QUIRKS_YES
2074 close_p_if_in_button_scope() # test
2075 insert_html_element t
2076 flag_frameset_ok = false
2077 ins_mode = ins_mode_in_table
2079 if t.type is TYPE_END_TAG and t.name is 'br'
2081 # W3C: t.type = TYPE_START_TAG
2082 t = new_open_tag 'br' # WHATWG
2084 if t.type is TYPE_START_TAG and (t.name is 'area' or t.name is 'br' or t.name is 'embed' or t.name is 'img' or t.name is 'keygen' or t.name is 'wbr')
2086 insert_html_element t
2088 t.acknowledge_self_closing()
2089 flag_frameset_ok = false
2091 if t.type is TYPE_START_TAG and t.name is 'input'
2093 insert_html_element t
2095 t.acknowledge_self_closing()
2096 unless is_input_hidden_tok t
2097 flag_frameset_ok = false
2099 if t.type is TYPE_START_TAG and (t.name is 'menuitem' or t.name is 'param' or t.name is 'source' or t.name is 'track')
2100 # WHATWG adds 'menuitem' for this block
2101 insert_html_element t
2103 t.acknowledge_self_closing()
2105 if t.type is TYPE_START_TAG and t.name is 'hr'
2106 close_p_if_in_button_scope()
2107 insert_html_element t
2109 t.acknowledge_self_closing()
2110 flag_frameset_ok = false
2112 if t.type is TYPE_START_TAG and t.name is 'image'
2117 if t.type is TYPE_START_TAG and t.name is 'isindex'
2119 if template_tag_is_open() is false and form_element_pointer isnt null
2121 t.acknowledge_self_closing()
2122 flag_frameset_ok = false
2123 close_p_if_in_button_scope()
2124 el = insert_html_element new_open_tag 'form'
2125 unless template_tag_is_open()
2126 form_element_pointer = el
2129 el.attrs['action'] = a[1]
2131 insert_html_element new_open_tag 'hr'
2134 insert_html_element new_open_tag 'label'
2135 # note: this is a little out-of-spec-order so we only have to scan t.attrs_a once
2136 input_el = new_open_tag 'input'
2141 if a[0] isnt 'name' and a[0] isnt 'action' and a[0] isnt 'prompt'
2142 input_el.attrs_a.push [a[0], a[1]]
2143 input_el.attrs_a.push ['name', 'isindex']
2144 # fixfull this next bit is in english... internationalize?
2145 prompt ?= "This is a searchable index. Enter search keywords: "
2146 insert_character new_character_token prompt # fixfull split
2147 # TODO submit typo "balue" in spec
2148 insert_html_element input_el
2150 # insert_character '' # you can put chars here if promt attr missing
2152 insert_html_element new_open_tag 'hr'
2155 unless template_tag_is_open()
2156 form_element_pointer = null
2158 if t.type is TYPE_START_TAG and t.name is 'textarea'
2159 insert_html_element t
2160 eat_next_token_if_newline()
2161 tok_state = tok_state_rcdata
2162 original_ins_mode = ins_mode
2163 flag_frameset_ok = false
2164 ins_mode = ins_mode_text
2166 if t.type is TYPE_START_TAG and t.name is 'xmp'
2167 close_p_if_in_button_scope()
2169 flag_frameset_ok = false
2170 parse_generic_raw_text t
2172 if t.type is TYPE_START_TAG and t.name is 'iframe'
2173 flag_frameset_ok = false
2174 parse_generic_raw_text t
2176 if t.type is TYPE_START_TAG and (t.name is 'noembed' or (t.name is 'noscript' and flag_scripting))
2177 parse_generic_raw_text t
2179 if t.type is TYPE_START_TAG and t.name is 'select'
2181 insert_html_element t
2182 flag_frameset_ok = false
2183 if ins_mode is ins_mode_in_table or ins_mode is ins_mode_in_caption or ins_mode is ins_mode_in_table_body or ins_mode is ins_mode_in_row or ins_mode is ins_mode_in_cell
2184 ins_mode = ins_mode_in_select_in_table
2186 ins_mode = ins_mode_in_select
2188 if t.type is TYPE_START_TAG and (t.name is 'optgroup' or t.name is 'option')
2189 if open_els[0].name is 'option' and open_els[0].namespace is NS_HTML
2192 insert_html_element t
2194 # this comment block implements the W3C spec
2195 # if t.type is TYPE_START_TAG and (t.name is 'rb' or t.name is 'rp' or t.name is 'rtc')
2196 # if is_in_scope 'ruby', NS_HTML
2197 # generate_implied_end_tags()
2198 # unless open_els[0].name is 'ruby' and open_els[0].namespace is NS_HTML
2200 # insert_html_element t
2202 # if t.type is TYPE_START_TAG and t.name is 'rt'
2203 # if is_in_scope 'ruby', NS_HTML
2204 # generate_implied_end_tags 'rtc' # arg is exception
2205 # unless (open_els[0].name is 'ruby' or open_els[0].name is 'rtc') and open_els[0].namespace is NS_HTML
2207 # insert_html_element t
2209 # below implements the WHATWG spec https://html.spec.whatwg.org/multipage/syntax.html#parsing-main-inbody
2210 if t.type is TYPE_START_TAG and (t.name is 'rb' or t.name is 'rtc')
2211 if is_in_scope 'ruby', NS_HTML
2212 generate_implied_end_tags()
2213 unless open_els[0].name is 'ruby' and open_els[0].namespace is NS_HTML
2215 insert_html_element t
2217 if t.type is TYPE_START_TAG and (t.name is 'rp' or t.name is 'rt')
2218 if is_in_scope 'ruby', NS_HTML
2219 generate_implied_end_tags 'rtc'
2220 unless (open_els[0].name is 'ruby' or open_els[0].name is 'rtc') and open_els[0].namespace is NS_HTML
2222 insert_html_element t
2225 if t.type is TYPE_START_TAG and t.name is 'math'
2227 adjust_mathml_attributes t
2228 adjust_foreign_attributes t
2229 insert_foreign_element t, NS_MATHML
2230 if t.flag 'self-closing'
2232 t.acknowledge_self_closing()
2234 if t.type is TYPE_START_TAG and t.name is 'svg'
2236 adjust_svg_attributes t
2237 adjust_foreign_attributes t
2238 insert_foreign_element t, NS_SVG
2239 if t.flag 'self-closing'
2241 t.acknowledge_self_closing()
2243 if t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'frame' or t.name is 'head' or t.name is 'tbody' or t.name is 'td' or t.name is 'tfoot' or t.name is 'th' or t.name is 'thead' or t.name is 'tr')
2246 if t.type is TYPE_START_TAG # any other start tag
2248 insert_html_element t
2250 if t.type is TYPE_END_TAG # any other end tag
2251 in_body_any_other_end_tag t.name
2255 # 8.2.5.4.8 http://www.w3.org/TR/html5/syntax.html#parsing-main-incdata
2256 ins_mode_text = (t) ->
2257 if t.type is TYPE_TEXT
2260 if t.type is TYPE_EOF
2262 if open_els[0].name is 'script' and open_els[0].namespace is NS_HTML
2263 open_els[0].flag 'already started', true
2265 ins_mode = original_ins_mode
2268 if t.type is TYPE_END_TAG and t.name is 'script'
2270 ins_mode = original_ins_mode
2271 # fixfull the spec seems to assume that I'm going to run the script
2272 # http://www.w3.org/TR/html5/syntax.html#scriptEndTag
2274 if t.type is TYPE_END_TAG
2276 ins_mode = original_ins_mode
2280 # the functions below implement the tokenizer stats described here:
2281 # http://www.w3.org/TR/html5/syntax.html#tokenization
2283 # 8.2.5.4.9 http://www.w3.org/TR/html5/syntax.html#parsing-main-intable
2284 ins_mode_in_table_else = (t) ->
2286 flag_foster_parenting = true
2288 flag_foster_parenting = false
2290 ins_mode_in_table = (t) ->
2293 if (open_els[0].name is 'table' or open_els[0].name is 'tbody' or open_els[0].name is 'tfoot' or open_els[0].name is 'thead' or open_els[0].name is 'tr') and open_els[0].namespace is NS_HTML
2294 pending_table_character_tokens = []
2295 original_ins_mode = ins_mode
2296 ins_mode = ins_mode_in_table_text
2299 ins_mode_in_table_else t
2307 clear_stack_to_table_context()
2309 insert_html_element t
2310 ins_mode = ins_mode_in_caption
2312 clear_stack_to_table_context()
2313 insert_html_element t
2314 ins_mode = ins_mode_in_column_group
2316 clear_stack_to_table_context()
2317 insert_html_element new_open_tag 'colgroup'
2318 ins_mode = ins_mode_in_column_group
2320 when 'tbody', 'tfoot', 'thead'
2321 clear_stack_to_table_context()
2322 insert_html_element t
2323 ins_mode = ins_mode_in_table_body
2324 when 'td', 'th', 'tr'
2325 clear_stack_to_table_context()
2326 insert_html_element new_open_tag 'tbody'
2327 ins_mode = ins_mode_in_table_body
2331 if is_in_table_scope 'table', NS_HTML
2333 el = open_els.shift()
2334 if el.name is 'table' and el.namespace is NS_HTML
2338 when 'style', 'script', 'template'
2341 unless is_input_hidden_tok t
2342 ins_mode_in_table_else t
2345 el = insert_html_element t
2347 t.acknowledge_self_closing()
2350 if form_element_pointer?
2352 if template_tag_is_open()
2354 form_element_pointer = insert_html_element t
2357 ins_mode_in_table_else t
2361 if is_in_table_scope 'table', NS_HTML
2363 el = open_els.shift()
2364 if el.name is 'table' and el.namespace is NS_HTML
2369 when 'body', 'caption', 'col', 'colgroup', 'html', 'tbody', 'td', 'tfoot', 'th', 'thead', 'tr'
2374 ins_mode_in_table_else t
2378 ins_mode_in_table_else t
2382 # 8.2.5.4.10 http://www.w3.org/TR/html5/syntax.html#parsing-main-intabletext
2383 ins_mode_in_table_text = (t) ->
2384 if t.type is TYPE_TEXT and t.text is "\u0000"
2388 if t.type is TYPE_TEXT
2389 pending_table_character_tokens.push t
2393 for old in pending_table_character_tokens
2394 unless is_space_tok old
2398 for old in pending_table_character_tokens
2399 insert_character old
2401 for old in pending_table_character_tokens
2402 ins_mode_in_table_else old
2403 pending_table_character_tokens = []
2404 ins_mode = original_ins_mode
2408 # 8.2.5.4.11 http://www.w3.org/TR/html5/syntax.html#parsing-main-incaption
2409 ins_mode_in_caption = (t) ->
2410 if t.type is TYPE_END_TAG and t.name is 'caption'
2411 if is_in_table_scope 'caption', NS_HTML
2412 generate_implied_end_tags()
2413 if open_els[0].name isnt 'caption'
2416 el = open_els.shift()
2417 if el.name is 'caption' and el.namespace is NS_HTML
2419 clear_afe_to_marker()
2420 ins_mode = ins_mode_in_table
2425 if (t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'tbody' or t.name is 'td' or t.name is 'tfoot' or t.name is 'th' or t.name is 'thead' or t.name is 'tr')) or t.type is TYPE_END_TAG and t.name is 'table'
2427 if is_in_table_scope 'caption', NS_HTML
2429 el = open_els.shift()
2430 if el.name is 'caption' and el.namespace is NS_HTML
2432 clear_afe_to_marker()
2433 ins_mode = ins_mode_in_table
2435 # else fragment case
2437 if t.type is TYPE_END_TAG and (t.name is 'body' or t.name is 'col' or t.name is 'colgroup' or t.name is 'html' or t.name is 'tbody' or t.name is 'td' or t.name is 'tfoot' or t.name is 'th' or t.name is 'thead' or t.name is 'tr')
2444 # 8.2.5.4.12 http://www.w3.org/TR/html5/syntax.html#parsing-main-incolgroup
2445 ins_mode_in_column_group = (t) ->
2449 if t.type is TYPE_COMMENT
2452 if t.type is TYPE_DOCTYPE
2455 if t.type is TYPE_START_TAG and t.name is 'html'
2458 if t.type is TYPE_START_TAG and t.name is 'col'
2459 el = insert_html_element t
2461 t.acknowledge_self_closing()
2463 if t.type is TYPE_END_TAG and t.name is 'colgroup'
2464 if open_els[0].name is 'colgroup' and open_els.namespace is NS_HTML
2466 ins_mode = ins_mode_in_table
2470 if t.type is TYPE_END_TAG and t.name is 'col'
2473 if (t.type is TYPE_START_TAG or t.type is TYPE_END_TAG) and t.name is 'template'
2476 if t.type is TYPE_EOF
2480 if open_els[0].name isnt 'colgroup'
2484 ins_mode = ins_mode_in_table
2488 # 8.2.5.4.13 http://www.w3.org/TR/html5/syntax.html#parsing-main-intbody
2489 ins_mode_in_table_body = (t) ->
2490 if t.type is TYPE_START_TAG and t.name is 'tr'
2491 clear_stack_to_table_body_context()
2492 insert_html_element t
2493 ins_mode = ins_mode_in_row
2495 if t.type is TYPE_START_TAG and (t.name is 'th' or t.name is 'td')
2497 clear_stack_to_table_body_context()
2498 insert_html_element new_open_tag 'tr'
2499 ins_mode = ins_mode_in_row
2502 if t.type is TYPE_END_TAG and (t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead')
2503 unless is_in_table_scope t.name, NS_HTML
2506 clear_stack_to_table_body_context()
2508 ins_mode = ins_mode_in_table
2510 if (t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead')) or (t.type is TYPE_END_TAG and t.name is 'table')
2513 if el.namespace is NS_HTML and (el.name is 'tbody' or el.name is 'tfoot' or el.name is 'thead')
2516 if table_scopers[el.name] is el.namespace
2521 clear_stack_to_table_body_context()
2523 ins_mode = ins_mode_in_table
2526 if t.type is TYPE_END_TAG and (t.name is 'body' or t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'html' or t.name is 'td' or t.name is 'th' or t.name is 'tr')
2533 # 8.2.5.4.14 http://www.w3.org/TR/html5/syntax.html#parsing-main-intr
2534 ins_mode_in_row = (t) ->
2535 if t.type is TYPE_START_TAG and (t.name is 'th' or t.name is 'td')
2536 clear_stack_to_table_row_context()
2537 insert_html_element t
2538 ins_mode = ins_mode_in_cell
2541 if t.type is TYPE_END_TAG and t.name is 'tr'
2542 if is_in_table_scope 'tr', NS_HTML
2543 clear_stack_to_table_row_context()
2545 ins_mode = ins_mode_in_table_body
2549 if (t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead' or t.name is 'tr')) or t.type is TYPE_END_TAG and t.name is 'table'
2550 if is_in_table_scope 'tr', NS_HTML
2551 clear_stack_to_table_row_context()
2553 ins_mode = ins_mode_in_table_body
2558 if t.type is TYPE_END_TAG and (t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead')
2559 if is_in_table_scope t.name, NS_HTML
2560 if is_in_table_scope 'tr', NS_HTML
2561 clear_stack_to_table_row_context()
2563 ins_mode = ins_mode_in_table_body
2568 if t.type is TYPE_END_TAG and (t.name is 'body' or t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'html' or t.name is 'td' or t.name is 'th')
2575 # http://www.w3.org/TR/html5/syntax.html#close-the-cell
2577 generate_implied_end_tags()
2578 unless (open_els[0].name is 'td' or open_els[0] is 'th') and open_els[0].namespace is NS_HTML
2581 el = open_els.shift()
2582 if el.namespace is NS_HTML and (el.name is 'td' or el.name is 'th')
2584 clear_afe_to_marker()
2585 ins_mode = ins_mode_in_row
2588 # 8.2.5.4.15 http://www.w3.org/TR/html5/syntax.html#parsing-main-intd
2589 ins_mode_in_cell = (t) ->
2590 if t.type is TYPE_END_TAG and (t.name is 'td' or t.name is 'th')
2591 if is_in_table_scope t.name, NS_HTML
2592 generate_implied_end_tags()
2593 unless (open_els[0].name is t.name) and open_els[0].namespace is NS_HTML
2596 el = open_els.shift()
2597 if el.name is t.name and el.namespace is NS_HTML
2599 clear_afe_to_marker()
2600 ins_mode = ins_mode_in_row
2604 if t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'tbody' or t.name is 'td' or t.name is 'tfoot' or t.name is 'th' or t.name is 'thead' or t.name is 'tr')
2607 if el.namespace is NS_HTML and (el.name is 'td' or el.name is 'th')
2610 if table_scopers[el.name] is el.namespace
2618 if t.type is TYPE_END_TAG and (t.name is 'body' or t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'html')
2621 if t.type is TYPE_END_TAG and (t.name is 'table' or t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead' or t.name is 'tr')
2622 if is_in_table_scope t.name, NS_HTML
2632 # 8.2.5.4.16 http://www.w3.org/TR/html5/syntax.html#parsing-main-inselect
2633 ins_mode_in_select = (t) ->
2634 if t.type is TYPE_TEXT and t.text is "\u0000"
2637 if t.type is TYPE_TEXT
2640 if t.type is TYPE_COMMENT
2643 if t.type is TYPE_DOCTYPE
2646 if t.type is TYPE_START_TAG and t.name is 'html'
2649 if t.type is TYPE_START_TAG and t.name is 'option'
2650 if open_els[0].name is 'option' and open_els[0].namespace is NS_HTML
2652 insert_html_element t
2654 if t.type is TYPE_START_TAG and t.name is 'optgroup'
2655 if open_els[0].name is 'option' and open_els[0].namespace is NS_HTML
2657 if open_els[0].name is 'optgroup' and open_els[0].namespace is NS_HTML
2659 insert_html_element t
2661 if t.type is TYPE_END_TAG and t.name is 'optgroup'
2662 if open_els[0].name is 'option' and open_els[0].namespace is NS_HTML
2663 if open_els[1].name is 'optgroup' and open_els[0].namespace is NS_HTML
2665 if open_els[0].name is 'optgroup' and open_els[0].namespace is NS_HTML
2670 if t.type is TYPE_END_TAG and t.name is 'option'
2671 if open_els[0].name is 'option' and open_els[0].namespace is NS_HTML
2676 if t.type is TYPE_END_TAG and t.name is 'select'
2677 if is_in_select_scope 'select', NS_HTML
2679 el = open_els.shift()
2680 if el.name is 'select' and el.namespace is NS_HTML
2686 if t.type is TYPE_START_TAG and t.name is 'select'
2689 el = open_els.shift()
2690 if el.name is 'select' and el.namespace is NS_HTML
2693 # spec says that this is the same as </select> but it doesn't say
2694 # to check scope first
2696 if t.type is TYPE_START_TAG and (t.name is 'input' or t.name is 'keygen' or t.name is 'textarea')
2698 unless is_in_select_scope 'select', NS_HTML
2701 el = open_els.shift()
2702 if el.name is 'select' and el.namespace is NS_HTML
2707 if t.type is TYPE_START_TAG and (t.name is 'script' or t.name is 'template')
2710 if t.type is TYPE_EOF
2717 # 8.2.5.4.17 http://www.w3.org/TR/html5/syntax.html#parsing-main-inselectintable
2718 ins_mode_in_select_in_table = (t) ->
2719 if t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'table' or t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead' or t.name is 'tr' or t.name is 'td' or t.name is 'th')
2722 el = open_els.shift()
2723 if el.name is 'select' and el.namespace is NS_HTML
2728 if t.type is TYPE_END_TAG and (t.name is 'caption' or t.name is 'table' or t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead' or t.name is 'tr' or t.name is 'td' or t.name is 'th')
2730 unless is_in_table_scope t.name, NS_HTML
2733 el = open_els.shift()
2734 if el.name is 'select' and el.namespace is NS_HTML
2740 ins_mode_in_select t
2743 # 8.2.5.4.18 http://www.w3.org/TR/html5/syntax.html#parsing-main-intemplate
2744 ins_mode_in_template = (t) ->
2745 if t.type is TYPE_TEXT or t.type is TYPE_COMMENT or t.type is TYPE_DOCTYPE
2748 if (t.type is TYPE_START_TAG and (t.name is 'base' or t.name is 'basefont' or t.name is 'bgsound' or t.name is 'link' or t.name is 'meta' or t.name is 'noframes' or t.name is 'script' or t.name is 'style' or t.name is 'template' or t.name is 'title')) or (t.type is TYPE_END_TAG and t.name is 'template')
2751 if t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'colgroup' or t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead')
2752 template_ins_modes.shift()
2753 template_ins_modes.unshift ins_mode_in_table
2754 ins_mode = ins_mode_in_table
2757 if t.type is TYPE_START_TAG and t.name is 'col'
2758 template_ins_modes.shift()
2759 template_ins_modes.unshift ins_mode_in_column_group
2760 ins_mode = ins_mode_in_column_group
2763 if t.type is TYPE_START_TAG and t.name is 'tr'
2764 template_ins_modes.shift()
2765 template_ins_modes.unshift ins_mode_in_table_body
2766 ins_mode = ins_mode_in_table_body
2769 if t.type is TYPE_START_TAG and (t.name is 'td' or t.name is 'th')
2770 template_ins_modes.shift()
2771 template_ins_modes.unshift ins_mode_in_row
2772 ins_mode = ins_mode_in_row
2775 if t.type is TYPE_START_TAG
2776 template_ins_modes.shift()
2777 template_ins_modes.unshift ins_mode_in_body
2778 ins_mode = ins_mode_in_body
2781 if t.type is TYPE_END_TAG
2784 if t.type is TYPE_EOF
2785 unless template_tag_is_open()
2790 el = open_els.shift()
2791 if el.name is 'template' and el.namespace is NS_HTML
2793 clear_afe_to_marker()
2794 template_ins_modes.shift()
2799 # 8.2.5.4.19 http://www.w3.org/TR/html5/syntax.html#parsing-main-afterbody
2800 ins_mode_after_body = (t) ->
2804 if t.type is TYPE_COMMENT
2805 first = open_els[open_els.length - 1]
2806 insert_comment t, [first, first.children.length]
2808 if t.type is TYPE_DOCTYPE
2811 if t.type is TYPE_START_TAG and t.name is 'html'
2814 if t.type is TYPE_END_TAG and t.name is 'html'
2815 if flag_fragment_parsing
2818 ins_mode = ins_mode_after_after_body
2820 if t.type is TYPE_EOF
2825 ins_mode = ins_mode_in_body
2829 # 8.2.5.4.20 http://www.w3.org/TR/html5/syntax.html#parsing-main-inframeset
2830 ins_mode_in_frameset = (t) ->
2834 if t.type is TYPE_COMMENT
2837 if t.type is TYPE_DOCTYPE
2840 if t.type is TYPE_START_TAG and t.name is 'html'
2843 if t.type is TYPE_START_TAG and t.name is 'frameset'
2844 insert_html_element t
2846 if t.type is TYPE_END_TAG and t.name is 'frameset'
2847 if open_els.length is 1
2849 return # fragment case
2851 if flag_fragment_parsing is false and open_els[0].name isnt 'frameset'
2852 ins_mode = ins_mode_after_frameset
2854 if t.type is TYPE_START_TAG and t.name is 'frame'
2855 insert_html_element t
2857 t.acknowledge_self_closing()
2859 if t.type is TYPE_START_TAG and t.name is 'noframes'
2862 if t.type is TYPE_EOF
2863 if open_els.length isnt 1
2871 # 8.2.5.4.21 http://www.w3.org/TR/html5/syntax.html#parsing-main-afterframeset
2872 ins_mode_after_frameset = (t) ->
2876 if t.type is TYPE_COMMENT
2879 if t.type is TYPE_DOCTYPE
2882 if t.type is TYPE_START_TAG and t.name is 'html'
2885 if t.type is TYPE_END_TAG and t.name is 'html'
2886 ins_mode = ins_mode_after_after_frameset
2888 if t.type is TYPE_START_TAG and t.name is 'noframes'
2891 if t.type is TYPE_EOF
2898 # 8.2.5.4.22 http://www.w3.org/TR/html5/syntax.html#the-after-after-body-insertion-mode
2899 ins_mode_after_after_body = (t) ->
2900 if t.type is TYPE_COMMENT
2901 insert_comment t, [doc, doc.children.length]
2903 if t.type is TYPE_DOCTYPE or is_space_tok(t) or (t.type is TYPE_START_TAG and t.name is 'html')
2906 if t.type is TYPE_EOF
2911 ins_mode = ins_mode_in_body
2915 # 8.2.5.4.23 http://www.w3.org/TR/html5/syntax.html#the-after-after-frameset-insertion-mode
2916 ins_mode_after_after_frameset = (t) ->
2917 if t.type is TYPE_COMMENT
2918 insert_comment t, [doc, doc.children.length]
2920 if t.type is TYPE_DOCTYPE or is_space_tok(t) or (t.type is TYPE_START_TAG and t.name is 'html')
2923 if t.type is TYPE_EOF
2926 if t.type is TYPE_START_TAG and t.name is 'noframes'
2933 # 8.2.5.5 http://www.w3.org/TR/html5/syntax.html#parsing-main-inforeign
2934 has_color_face_or_size = (t) ->
2936 if a[0] is 'color' or a[0] is 'face' or a[0] is 'size'
2939 in_foreign_content_end_script = ->
2943 in_foreign_content_other_start = (t) ->
2944 acn = adjusted_current_node()
2945 if acn.namespace is NS_MATHML
2946 adjust_mathml_attributes t
2947 if acn.namespace is NS_SVG and svg_name_fixes[t.name]?
2948 t.name = svg_name_fixes[t.name]
2949 if acn.namespace is NS_SVG
2950 adjust_svg_attributes t
2951 adjust_foreign_attributes t
2952 insert_foreign_element t, acn.namespace
2953 if t.flag 'self-closing'
2954 if t.name is 'script'
2955 t.acknowledge_self_closing()
2956 in_foreign_content_end_script()
2960 t.acknowledge_self_closing()
2962 in_foreign_content = (t) ->
2963 if t.type is TYPE_TEXT and t.text is "\u0000"
2965 insert_character new_character_token "\ufffd"
2970 if t.type is TYPE_TEXT
2971 flag_frameset_ok = false
2974 if t.type is TYPE_COMMENT
2977 if t.type is TYPE_DOCTYPE
2980 if t.type is TYPE_START_TAG and (t.name is 'b' or t.name is 'big' or t.name is 'blockquote' or t.name is 'body' or t.name is 'br' or t.name is 'center' or t.name is 'code' or t.name is 'dd' or t.name is 'div' or t.name is 'dl' or t.name is 'dt' or t.name is 'em' or t.name is 'embed' or t.name is 'h1' or t.name is 'h2' or t.name is 'h3' or t.name is 'h4' or t.name is 'h5' or t.name is 'h6' or t.name is 'head' or t.name is 'hr' or t.name is 'i' or t.name is 'img' or t.name is 'li' or t.name is 'listing' or t.name is 'main' or t.name is 'meta' or t.name is 'nobr' or t.name is 'ol' or t.name is 'p' or t.name is 'pre' or t.name is 'ruby' or t.name is 's' or t.name is 'small' or t.name is 'span' or t.name is 'strong' or t.name is 'strike' or t.name is 'sub' or t.name is 'sup' or t.name is 'table' or t.name is 'tt' or t.name is 'u' or t.name is 'ul' or t.name is 'var' or (t.name is 'font' and has_color_face_or_size(t)))
2982 if flag_fragment_parsing
2983 in_foreign_content_other_start t
2985 loop # is this safe?
2987 if is_mathml_text_integration_point(open_els[0]) or is_html_integration(open_els[0]) or open_els[0].namespace is NS_HTML
2991 if t.type is TYPE_START_TAG
2992 in_foreign_content_other_start t
2994 if t.type is TYPE_END_TAG and t.name is 'script' and open_els[0].name is 'script' and open_els[0].namespace is NS_SVG
2995 in_foreign_content_end_script()
2997 if t.type is TYPE_END_TAG
3000 if node.name.toLowerCase() isnt t.name
3003 if node is open_els[open_els.length - 1]
3005 if node.name.toLowerCase() is t.name
3007 el = open_els.shift()
3012 if node.namespace is NS_HTML
3014 ins_mode t # explicitly call HTML insertion mode
3018 # 8.2.4.1 http://www.w3.org/TR/html5/syntax.html#data-state
3020 switch c = txt.charAt(cur++)
3022 return new_text_node parse_character_reference()
3024 tok_state = tok_state_tag_open
3027 return new_text_node c
3029 return new_eof_token()
3031 return new_text_node c
3034 # 8.2.4.2 http://www.w3.org/TR/html5/syntax.html#character-reference-in-data-state
3035 # not needed: tok_state_character_reference_in_data = ->
3036 # just call parse_character_reference()
3038 # 8.2.4.3 http://www.w3.org/TR/html5/syntax.html#rcdata-state
3039 tok_state_rcdata = ->
3040 switch c = txt.charAt(cur++)
3042 return new_text_node parse_character_reference()
3044 tok_state = tok_state_rcdata_less_than_sign
3047 return new_character_token "\ufffd"
3049 return new_eof_token()
3051 return new_character_token c
3054 # 8.2.4.4 http://www.w3.org/TR/html5/syntax.html#character-reference-in-rcdata-state
3055 # not needed: tok_state_character_reference_in_rcdata = ->
3056 # just call parse_character_reference()
3058 # 8.2.4.5 http://www.w3.org/TR/html5/syntax.html#rawtext-state
3059 tok_state_rawtext = ->
3060 switch c = txt.charAt(cur++)
3062 tok_state = tok_state_rawtext_less_than_sign
3065 return new_character_token "\ufffd"
3067 return new_eof_token()
3069 return new_character_token c
3072 # 8.2.4.6 http://www.w3.org/TR/html5/syntax.html#script-data-state
3073 tok_state_script_data = ->
3074 switch c = txt.charAt(cur++)
3076 tok_state = tok_state_script_data_less_than_sign
3079 return new_character_token "\ufffd"
3081 return new_eof_token()
3083 return new_character_token c
3086 # 8.2.4.7 http://www.w3.org/TR/html5/syntax.html#plaintext-state
3087 tok_state_plaintext = ->
3088 switch c = txt.charAt(cur++)
3091 return new_character_token "\ufffd"
3093 return new_eof_token()
3095 return new_character_token c
3099 # 8.2.4.8 http://www.w3.org/TR/html5/syntax.html#tag-open-state
3100 tok_state_tag_open = ->
3101 c = txt.charAt(cur++)
3103 tok_state = tok_state_markup_declaration_open
3106 tok_state = tok_state_end_tag_open
3109 tok_cur_tag = new_open_tag c.toLowerCase()
3110 tok_state = tok_state_tag_name
3113 tok_cur_tag = new_open_tag c
3114 tok_state = tok_state_tag_name
3118 tok_cur_tag = new_comment_token '?' # FIXME right?
3119 tok_state = tok_state_bogus_comment
3123 tok_state = tok_state_data
3124 cur -= 1 # we didn't parse/handle the char after <
3125 return new_text_node '<'
3127 # 8.2.4.9 http://www.w3.org/TR/html5/syntax.html#end-tag-open-state
3128 tok_state_end_tag_open = ->
3129 c = txt.charAt(cur++)
3131 tok_cur_tag = new_end_tag c.toLowerCase()
3132 tok_state = tok_state_tag_name
3135 tok_cur_tag = new_end_tag c
3136 tok_state = tok_state_tag_name
3140 tok_state = tok_state_data
3144 tok_state = tok_state_data
3145 return new_text_node '</'
3148 tok_cur_tag = new_comment_token c
3149 tok_state = tok_state_bogus_comment
3152 # 8.2.4.10 http://www.w3.org/TR/html5/syntax.html#tag-name-state
3153 tok_state_tag_name = ->
3154 switch c = txt.charAt(cur++)
3155 when "\t", "\n", "\u000c", ' '
3156 tok_state = tok_state_before_attribute_name
3158 tok_state = tok_state_self_closing_start_tag
3160 tok_state = tok_state_data
3166 tok_cur_tag.name += "\ufffd"
3169 tok_state = tok_state_data
3172 tok_cur_tag.name += c.toLowerCase()
3174 tok_cur_tag.name += c
3177 # 8.2.4.11 http://www.w3.org/TR/html5/syntax.html#rcdata-less-than-sign-state
3178 tok_state_rcdata_less_than_sign = ->
3179 c = txt.charAt(cur++)
3181 temporary_buffer = ''
3182 tok_state = tok_state_rcdata_end_tag_open
3185 tok_state = tok_state_rcdata
3186 cur -= 1 # reconsume the input character
3187 return new_character_token '<'
3189 # 8.2.4.12 http://www.w3.org/TR/html5/syntax.html#rcdata-end-tag-open-state
3190 tok_state_rcdata_end_tag_open = ->
3191 c = txt.charAt(cur++)
3193 tok_cur_tag = new_end_tag c.toLowerCase()
3194 temporary_buffer += c
3195 tok_state = tok_state_rcdata_end_tag_name
3198 tok_cur_tag = new_end_tag c
3199 temporary_buffer += c
3200 tok_state = tok_state_rcdata_end_tag_name
3203 tok_state = tok_state_rcdata
3204 cur -= 1 # reconsume the input character
3205 return new_character_token "</" # fixfull separate these
3207 # http://www.w3.org/TR/html5/syntax.html#appropriate-end-tag-token
3208 is_appropriate_end_tag = (t) ->
3209 # fixfull: this assumes that open_els[0].name is "the tag name of the last
3210 # start tag to have been emitted from this tokenizer"
3211 return t.type is TYPE_END_TAG and t.name is open_els[0].name
3213 # 8.2.4.13 http://www.w3.org/TR/html5/syntax.html#rcdata-end-tag-name-state
3214 tok_state_rcdata_end_tag_name = ->
3215 c = txt.charAt(cur++)
3216 if c is "\t" or c is "\n" or c is "\u000c" or c is ' '
3217 if is_appropriate_end_tag tok_cur_tag
3218 tok_state = tok_state_before_attribute_name
3220 # else fall through to "Anything else"
3222 if is_appropriate_end_tag tok_cur_tag
3223 tok_state = tok_state_self_closing_start_tag # FIXME spec typo?
3225 # else fall through to "Anything else"
3227 if is_appropriate_end_tag tok_cur_tag
3228 tok_state = tok_state_data
3230 # else fall through to "Anything else"
3232 tok_cur_tag.name += c.toLowerCase()
3233 temporary_buffer += c
3236 tok_cur_tag.name += c
3237 temporary_buffer += c
3240 tok_state = tok_state_rcdata
3241 cur -= 1 # reconsume the input character
3242 return new_character_token '</' + temporary_buffer # fixfull separate these
3244 # 8.2.4.14 http://www.w3.org/TR/html5/syntax.html#rawtext-less-than-sign-state
3245 tok_state_rawtext_less_than_sign = ->
3246 c = txt.charAt(cur++)
3248 temporary_buffer = ''
3249 tok_state = tok_state_rawtext_end_tag_open
3252 tok_state = tok_state_rawtext
3253 cur -= 1 # reconsume the input character
3254 return new_character_token '<'
3256 # 8.2.4.15 http://www.w3.org/TR/html5/syntax.html#rawtext-end-tag-open-state
3257 tok_state_rawtext_end_tag_open = ->
3258 c = txt.charAt(cur++)
3260 tok_cur_tag = new_end_tag c.toLowerCase()
3261 temporary_buffer += c
3262 tok_state = tok_state_rawtext_end_tag_name
3265 tok_cur_tag = new_end_tag c
3266 temporary_buffer += c
3267 tok_state = tok_state_rawtext_end_tag_name
3270 tok_state = tok_state_rawtext
3271 cur -= 1 # reconsume the input character
3272 return new_character_token "</" # fixfull separate these
3274 # 8.2.4.16 http://www.w3.org/TR/html5/syntax.html#rawtext-end-tag-name-state
3275 tok_state_rawtext_end_tag_name = ->
3276 c = txt.charAt(cur++)
3277 if c is "\t" or c is "\n" or c is "\u000c" or c is ' '
3278 if is_appropriate_end_tag tok_cur_tag
3279 tok_state = tok_state_before_attribute_name
3281 # else fall through to "Anything else"
3283 if is_appropriate_end_tag tok_cur_tag
3284 tok_state = tok_state_self_closing_start_tag
3286 # else fall through to "Anything else"
3288 if is_appropriate_end_tag tok_cur_tag
3289 tok_state = tok_state_data
3291 # else fall through to "Anything else"
3293 tok_cur_tag.name += c.toLowerCase()
3294 temporary_buffer += c
3297 tok_cur_tag.name += c
3298 temporary_buffer += c
3301 tok_state = tok_state_rawtext
3302 cur -= 1 # reconsume the input character
3303 return new_character_token '</' + temporary_buffer # fixfull separate these
3305 # 8.2.4.17 http://www.w3.org/TR/html5/syntax.html#script-data-less-than-sign-state
3306 tok_state_script_data_less_than_sign = ->
3307 c = txt.charAt(cur++)
3309 temporary_buffer = ''
3310 tok_state = tok_state_script_data_end_tag_open
3313 tok_state = tok_state_script_data_escape_start
3314 return new_character_token '<!' # fixfull split
3316 tok_state = tok_state_script_data
3317 cur -= 1 # Reconsume
3318 return new_character_token '<'
3320 # 8.2.4.18 http://www.w3.org/TR/html5/syntax.html#script-data-end-tag-open-state
3321 tok_state_script_data_end_tag_open = ->
3322 c = txt.charAt(cur++)
3324 tok_cur_tag = new_end_tag c.toLowerCase()
3325 temporary_buffer += c
3326 tok_state = tok_state_script_data_end_tag_name
3329 tok_cur_tag = new_end_tag c
3330 temporary_buffer += c
3331 tok_state = tok_state_script_data_end_tag_name
3334 tok_state = tok_state_script_data
3335 cur -= 1 # Reconsume
3336 return new_character_token '</'
3338 # 8.2.4.19 http://www.w3.org/TR/html5/syntax.html#script-data-end-tag-open-state
3339 tok_state_script_data_end_tag_name = ->
3340 c = txt.charAt(cur++)
3341 if c is "\t" or c is "\n" or c is "\u000c" or c is ' '
3342 if is_appropriate_end_tag tok_cur_tag
3343 tok_state = tok_state_before_attribute_name
3347 if is_appropriate_end_tag tok_cur_tag
3348 tok_state = tok_state_self_closing_start_tag
3352 if is_appropriate_end_tag tok_cur_tag
3353 tok_state = tok_state_data
3357 tok_cur_tag.name += c.toLowerCase()
3358 temporary_buffer += c
3361 tok_cur_tag.name += c
3362 temporary_buffer += c
3365 tok_state = tok_state_script_data
3366 cur -= 1 # Reconsume
3367 return new_character_token "</#{temporary_buffer}" # fixfull split
3369 # 8.2.4.20 http://www.w3.org/TR/html5/syntax.html#script-data-escape-start-state
3370 tok_state_script_data_escape_start = ->
3371 c = txt.charAt(cur++)
3373 tok_state = tok_state_script_data_escape_start_dash
3374 return new_character_token '-'
3376 tok_state = tok_state_script_data
3377 cur -= 1 # Reconsume
3380 # 8.2.4.21 http://www.w3.org/TR/html5/syntax.html#script-data-escape-start-dash-state
3381 tok_state_script_data_escape_start_dash = ->
3382 c = txt.charAt(cur++)
3384 tok_state = tok_state_script_data_escaped_dash_dash
3385 return new_character_token '-'
3387 tok_state = tok_state_script_data
3388 cur -= 1 # Reconsume
3391 # 8.2.4.22 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-state
3392 tok_state_script_data_escaped = ->
3393 c = txt.charAt(cur++)
3395 tok_state = tok_state_script_data_escaped_dash
3396 return new_character_token '-'
3398 tok_state = tok_state_script_data_escaped_less_than_sign
3402 return new_character_token "\ufffd"
3404 tok_state = tok_state_data
3406 cur -= 1 # Reconsume
3409 return new_character_token c
3411 # 8.2.4.23 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-dash-state
3412 tok_state_script_data_escaped_dash = ->
3413 c = txt.charAt(cur++)
3415 tok_state = tok_state_script_data_escaped_dash_dash
3416 return new_character_token '-'
3418 tok_state = tok_state_script_data_escaped_less_than_sign
3422 tok_state = tok_state_script_data_escaped
3423 return new_character_token "\ufffd"
3425 tok_state = tok_state_data
3427 cur -= 1 # Reconsume
3430 tok_state = tok_state_script_data_escaped
3431 return new_character_token c
3433 # 8.2.4.24 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-dash-dash-state
3434 tok_state_script_data_escaped_dash_dash = ->
3435 c = txt.charAt(cur++)
3437 return new_character_token '-'
3439 tok_state = tok_state_script_data_escaped_less_than_sign
3442 tok_state = tok_state_script_data
3443 return new_character_token '>'
3446 tok_state = tok_state_script_data_escaped
3447 return new_character_token "\ufffd"
3450 tok_state = tok_state_data
3451 cur -= 1 # Reconsume
3454 tok_state = tok_state_script_data_escaped
3455 return new_character_token c
3457 # 8.2.4.25 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-less-than-sign-state
3458 tok_state_script_data_escaped_less_than_sign = ->
3459 c = txt.charAt(cur++)
3461 temporary_buffer = ''
3462 tok_state = tok_state_script_data_escaped_end_tag_open
3465 temporary_buffer = c.toLowerCase() # yes, really
3466 tok_state = tok_state_script_data_double_escape_start
3467 return new_character_token "<#{c}" # fixfull split
3469 temporary_buffer = c
3470 tok_state = tok_state_script_data_double_escape_start
3471 return new_character_token "<#{c}" # fixfull split
3473 tok_state = tok_state_script_data_escaped
3474 cur -= 1 # Reconsume
3475 return new_character_token '<'
3477 # 8.2.4.26 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-end-tag-open-state
3478 tok_state_script_data_escaped_end_tag_open = ->
3479 c = txt.charAt(cur++)
3481 tok_cur_tag = new_end_tag c.toLowerCase()
3482 temporary_buffer += c
3483 tok_state = tok_state_script_data_escaped_end_tag_name
3486 tok_cur_tag = new_end_tag c
3487 temporary_buffer += c
3488 tok_state = tok_state_script_data_escaped_end_tag_name
3491 tok_state = tok_state_script_data_escaped
3492 cur -= 1 # Reconsume
3493 return new_character_token '</' # fixfull split
3495 # 8.2.4.27 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-end-tag-name-state
3496 tok_state_script_data_escaped_end_tag_name = ->
3497 c = txt.charAt(cur++)
3498 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
3499 if is_appropriate_end_tag tok_cur_tag
3500 tok_state = tok_state_before_attribute_name
3504 if is_appropriate_end_tag tok_cur_tag
3505 tok_state = tok_state_self_closing_start_tag
3509 if is_appropriate_end_tag tok_cur_tag
3510 tok_state = tok_state_data
3514 tok_cur_tag.name += c.toLowerCase()
3515 temporary_buffer += c.toLowerCase()
3518 tok_cur_tag.name += c
3519 temporary_buffer += c.toLowerCase()
3522 tok_state = tok_state_script_data_escaped
3523 cur -= 1 # Reconsume
3524 return new_character_token "</#{temporary_buffer}" # fixfull split
3526 # 8.2.4.28 http://www.w3.org/TR/html5/syntax.html#script-data-double-escape-start-state
3527 tok_state_script_data_double_escape_start = ->
3528 c = txt.charAt(cur++)
3529 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' ' or c is '/' or c is '>'
3530 if temporary_buffer is 'script'
3531 tok_state = tok_state_script_data_double_escaped
3533 tok_state = tok_state_script_data_escaped
3534 return new_character_token c
3536 temporary_buffer += c.toLowerCase() # yes, really lowercase
3537 return new_character_token c
3539 temporary_buffer += c
3540 return new_character_token c
3542 tok_state = tok_state_script_data_escaped
3543 cur -= 1 # Reconsume
3546 # 8.2.4.29 http://www.w3.org/TR/html5/syntax.html#script-data-double-escaped-state
3547 tok_state_script_data_double_escaped = ->
3548 c = txt.charAt(cur++)
3550 tok_state = tok_state_script_data_double_escaped_dash
3551 return new_character_token '-'
3553 tok_state = tok_state_script_data_double_escaped_less_than_sign
3554 return new_character_token '<'
3557 return new_character_token "\ufffd"
3560 tok_state = tok_state_data
3561 cur -= 1 # Reconsume
3564 return new_character_token c
3566 # 8.2.4.30 http://www.w3.org/TR/html5/syntax.html#script-data-double-escaped-dash-state
3567 tok_state_script_data_double_escaped_dash = ->
3568 c = txt.charAt(cur++)
3570 tok_state = tok_state_script_data_double_escaped_dash_dash
3571 return new_character_token '-'
3573 tok_state = tok_state_script_data_double_escaped_less_than_sign
3574 return new_character_token '<'
3577 tok_state = tok_state_script_data_double_escaped
3578 return new_character_token "\ufffd"
3581 tok_state = tok_state_data
3582 cur -= 1 # Reconsume
3585 tok_state = tok_state_script_data_double_escaped
3586 return new_character_token c
3588 # 8.2.4.31 http://www.w3.org/TR/html5/syntax.html#script-data-double-escaped-dash-dash-state
3589 tok_state_script_data_double_escaped_dash_dash = ->
3590 c = txt.charAt(cur++)
3592 return new_character_token '-'
3594 tok_state = tok_state_script_data_double_escaped_less_than_sign
3595 return new_character_token '<'
3597 tok_state = tok_state_script_data
3598 return new_character_token '>'
3601 tok_state = tok_state_script_data_double_escaped
3602 return new_character_token "\ufffd"
3605 tok_state = tok_state_data
3606 cur -= 1 # Reconsume
3609 tok_state = tok_state_script_data_double_escaped
3610 return new_character_token c
3612 # 8.2.4.32 http://www.w3.org/TR/html5/syntax.html#script-data-double-escaped-less-than-sign-state
3613 tok_state_script_data_double_escaped_less_than_sign = ->
3614 c = txt.charAt(cur++)
3616 temporary_buffer = ''
3617 tok_state = tok_state_script_data_double_escape_end
3618 return new_character_token '/'
3620 tok_state = tok_state_script_data_double_escaped
3621 cur -= 1 # Reconsume
3624 # 8.2.4.33 http://www.w3.org/TR/html5/syntax.html#script-data-double-escape-end-state
3625 tok_state_script_data_double_escape_end = ->
3626 c = txt.charAt(cur++)
3627 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' ' or c is '/' or c is '>'
3628 if temporary_buffer is 'script'
3629 tok_state = tok_state_script_data_escaped
3631 tok_state = tok_state_script_data_double_escaped
3632 return new_character_token c
3634 temporary_buffer += c.toLowerCase() # yes, really lowercase
3635 return new_character_token c
3637 temporary_buffer += c
3638 return new_character_token c
3640 tok_state = tok_state_script_data_double_escaped
3641 cur -= 1 # Reconsume
3644 # 8.2.4.34 http://www.w3.org/TR/html5/syntax.html#before-attribute-name-state
3645 tok_state_before_attribute_name = ->
3647 switch c = txt.charAt(cur++)
3648 when "\t", "\n", "\u000c", ' '
3651 tok_state = tok_state_self_closing_start_tag
3654 tok_state = tok_state_data
3660 attr_name = "\ufffd"
3661 when '"', "'", '<', '='
3666 tok_state = tok_state_data
3669 attr_name = c.toLowerCase()
3673 tok_cur_tag.attrs_a.unshift [attr_name, '']
3674 tok_state = tok_state_attribute_name
3677 # 8.2.4.35 http://www.w3.org/TR/html5/syntax.html#attribute-name-state
3678 tok_state_attribute_name = ->
3679 switch c = txt.charAt(cur++)
3680 when "\t", "\n", "\u000c", ' '
3681 tok_state = tok_state_after_attribute_name
3683 tok_state = tok_state_self_closing_start_tag
3685 tok_state = tok_state_before_attribute_value
3687 tok_state = tok_state_data
3693 tok_cur_tag.attrs_a[0][0] += "\ufffd"
3696 tok_cur_tag.attrs_a[0][0] += c
3699 tok_state = tok_state_data
3702 tok_cur_tag.attrs_a[0][0] += c.toLowerCase()
3704 tok_cur_tag.attrs_a[0][0] += c
3707 # 8.2.4.36 http://www.w3.org/TR/html5/syntax.html#after-attribute-name-state
3708 tok_state_after_attribute_name = ->
3709 c = txt.charAt(cur++)
3710 if c is "\t" or c is "\n" or c is "\u000c" or c is ' '
3713 tok_state = tok_state_self_closing_start_tag
3716 tok_state = tok_state_before_attribute_value
3719 tok_state = tok_state_data
3722 tok_cur_tag.attrs_a.unshift [c.toLowerCase(), '']
3723 tok_state = tok_state_attribute_name
3727 tok_cur_tag.attrs_a.unshift ["\ufffd", '']
3728 tok_state = tok_state_attribute_name
3732 tok_state = tok_state_data
3733 cur -= 1 # reconsume
3735 if c is '"' or c is "'" or c is '<'
3737 # fall through to Anything else
3739 tok_cur_tag.attrs_a.unshift [c, '']
3740 tok_state = tok_state_attribute_name
3743 # 8.2.4.37 http://www.w3.org/TR/html5/syntax.html#before-attribute-value-state
3744 tok_state_before_attribute_value = ->
3745 switch c = txt.charAt(cur++)
3746 when "\t", "\n", "\u000c", ' '
3749 tok_state = tok_state_attribute_value_double_quoted
3751 tok_state = tok_state_attribute_value_unquoted
3754 tok_state = tok_state_attribute_value_single_quoted
3757 tok_cur_tag.attrs_a[0][1] += "\ufffd"
3758 tok_state = tok_state_attribute_value_unquoted
3761 tok_state = tok_state_data
3767 tok_state = tok_state_data
3769 tok_cur_tag.attrs_a[0][1] += c
3770 tok_state = tok_state_attribute_value_unquoted
3773 # 8.2.4.38 http://www.w3.org/TR/html5/syntax.html#attribute-value-(double-quoted)-state
3774 tok_state_attribute_value_double_quoted = ->
3775 switch c = txt.charAt(cur++)
3777 tok_state = tok_state_after_attribute_value_quoted
3779 tok_cur_tag.attrs_a[0][1] += parse_character_reference '"', true
3782 tok_cur_tag.attrs_a[0][1] += "\ufffd"
3785 tok_state = tok_state_data
3787 tok_cur_tag.attrs_a[0][1] += c
3790 # 8.2.4.39 http://www.w3.org/TR/html5/syntax.html#attribute-value-(single-quoted)-state
3791 tok_state_attribute_value_single_quoted = ->
3792 switch c = txt.charAt(cur++)
3794 tok_state = tok_state_after_attribute_value_quoted
3796 tok_cur_tag.attrs_a[0][1] += parse_character_reference "'", true
3799 tok_cur_tag.attrs_a[0][1] += "\ufffd"
3802 tok_state = tok_state_data
3804 tok_cur_tag.attrs_a[0][1] += c
3807 # 8.2.4.40 http://www.w3.org/TR/html5/syntax.html#attribute-value-(unquoted)-state
3808 tok_state_attribute_value_unquoted = ->
3809 switch c = txt.charAt(cur++)
3810 when "\t", "\n", "\u000c", ' '
3811 tok_state = tok_state_before_attribute_name
3813 tok_cur_tag.attrs_a[0][1] += parse_character_reference '>', true
3815 tok_state = tok_state_data
3820 tok_cur_tag.attrs_a[0][1] += "\ufffd"
3823 tok_state = tok_state_data
3825 # Parse Error if ', <, = or ` (backtick)
3826 tok_cur_tag.attrs_a[0][1] += c
3829 # 8.2.4.42 http://www.w3.org/TR/html5/syntax.html#after-attribute-value-(quoted)-state
3830 tok_state_after_attribute_value_quoted = ->
3831 switch c = txt.charAt(cur++)
3832 when "\t", "\n", "\u000c", ' '
3833 tok_state = tok_state_before_attribute_name
3835 tok_state = tok_state_self_closing_start_tag
3837 tok_state = tok_state_data
3843 tok_state = tok_state_data
3846 tok_state = tok_state_before_attribute_name
3847 cur -= 1 # we didn't handle that char
3850 # 8.2.4.43 http://www.w3.org/TR/html5/syntax.html#self-closing-start-tag-state
3851 tok_state_self_closing_start_tag = ->
3852 c = txt.charAt(cur++)
3854 tok_cur_tag.flag 'self-closing', true
3855 tok_state = tok_state_data
3859 tok_state = tok_state_data
3860 cur -= 1 # Reconsume
3864 tok_state = tok_state_before_attribute_name
3865 cur -= 1 # Reconsume
3868 # 8.2.4.44 http://www.w3.org/TR/html5/syntax.html#bogus-comment-state
3869 # WARNING: put a comment token in tok_cur_tag before setting this state
3870 tok_state_bogus_comment = ->
3871 next_gt = txt.indexOf '>', cur
3873 val = txt.substr cur
3876 val = txt.substr cur, (next_gt - cur)
3878 val = val.replace(new RegExp("\u0000", 'g'), "\ufffd")
3879 tok_cur_tag.text += val
3880 tok_state = tok_state_data
3883 # 8.2.4.45 http://www.w3.org/TR/html5/syntax.html#markup-declaration-open-state
3884 tok_state_markup_declaration_open = ->
3885 if txt.substr(cur, 2) is '--'
3887 tok_cur_tag = new_comment_token ''
3888 tok_state = tok_state_comment_start
3890 if txt.substr(cur, 7).toLowerCase() is 'doctype'
3892 tok_state = tok_state_doctype
3894 acn = adjusted_current_node()
3895 if acn and acn.namespace isnt NS_HTML and txt.substr(cur, 7) is '[CDATA['
3897 tok_state = tok_state_cdata_section
3901 tok_cur_tag = new_comment_token ''
3902 tok_state = tok_state_bogus_comment
3905 # 8.2.4.46 http://www.w3.org/TR/html5/syntax.html#comment-start-state
3906 tok_state_comment_start = ->
3907 switch c = txt.charAt(cur++)
3909 tok_state = tok_state_comment_start_dash
3912 tok_state = tok_state_comment
3913 return new_character_token "\ufffd"
3916 tok_state = tok_state_data
3920 tok_state = tok_state_data
3921 cur -= 1 # Reconsume
3924 tok_cur_tag.text += c
3925 tok_state = tok_state_comment
3928 # 8.2.4.47 http://www.w3.org/TR/html5/syntax.html#comment-start-dash-state
3929 tok_state_comment_start_dash = ->
3930 switch c = txt.charAt(cur++)
3932 tok_state = tok_state_comment_end
3935 tok_cur_tag.text += "-\ufffd"
3936 tok_state = tok_state_comment
3939 tok_state = tok_state_data
3943 tok_state = tok_state_data
3944 cur -= 1 # Reconsume
3947 tok_cur_tag.text += "-#{c}"
3948 tok_state = tok_state_comment
3951 # 8.2.4.48 http://www.w3.org/TR/html5/syntax.html#comment-state
3952 tok_state_comment = ->
3953 switch c = txt.charAt(cur++)
3955 tok_state = tok_state_comment_end_dash
3958 tok_cur_tag.text += "\ufffd"
3961 tok_state = tok_state_data
3962 cur -= 1 # Reconsume
3965 tok_cur_tag.text += c
3968 # 8.2.4.49 http://www.w3.org/TR/html5/syntax.html#comment-end-dash-state
3969 tok_state_comment_end_dash = ->
3970 switch c = txt.charAt(cur++)
3972 tok_state = tok_state_comment_end
3975 tok_cur_tag.text += "-\ufffd"
3976 tok_state = tok_state_comment
3979 tok_state = tok_state_data
3980 cur -= 1 # Reconsume
3983 tok_cur_tag.text += "-#{c}"
3984 tok_state = tok_state_comment
3987 # 8.2.4.50 http://www.w3.org/TR/html5/syntax.html#comment-end-state
3988 tok_state_comment_end = ->
3989 switch c = txt.charAt(cur++)
3991 tok_state = tok_state_data
3995 tok_cur_tag.text += "--\ufffd"
3996 tok_state = tok_state_comment
3999 tok_state = tok_state_comment_end_bang
4002 tok_cur_tag.text += '-'
4005 tok_state = tok_state_data
4006 cur -= 1 # Reconsume
4010 tok_cur_tag.text += "--#{c}"
4011 tok_state = tok_state_comment
4014 # 8.2.4.51 http://www.w3.org/TR/html5/syntax.html#comment-end-bang-state
4015 tok_state_comment_end_bang = ->
4016 switch c = txt.charAt(cur++)
4018 tok_cur_tag.text += "--!#{c}"
4019 tok_state = tok_state_comment_end_dash
4021 tok_state = tok_state_data
4025 tok_cur_tag.text += "--!\ufffd"
4026 tok_state = tok_state_comment
4029 tok_state = tok_state_data
4030 cur -= 1 # Reconsume
4033 tok_cur_tag.text += "--!#{c}"
4034 tok_state = tok_state_comment
4037 # 8.2.4.52 http://www.w3.org/TR/html5/syntax.html#doctype-state
4038 tok_state_doctype = ->
4039 switch c = txt.charAt(cur++)
4040 when "\t", "\u000a", "\u000c", ' '
4041 tok_state = tok_state_before_doctype_name
4044 tok_state = tok_state_data
4045 el = new_doctype_token ''
4046 el.flag 'force-quirks', true
4047 cur -= 1 # Reconsume
4051 tok_state = tok_state_before_doctype_name
4052 cur -= 1 # Reconsume
4055 # 8.2.4.52 http://www.w3.org/TR/html5/syntax.html#doctype-state
4056 tok_state_before_doctype_name = ->
4057 c = txt.charAt(cur++)
4058 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4061 tok_cur_tag = new_doctype_token c.toLowerCase()
4062 tok_state = tok_state_doctype_name
4066 tok_cur_tag = new_doctype_token "\ufffd"
4067 tok_state = tok_state_doctype_name
4071 el = new_doctype_token ''
4072 el.flag 'force-quirks', true
4073 tok_state = tok_state_data
4077 tok_state = tok_state_data
4078 el = new_doctype_token ''
4079 el.flag 'force-quirks', true
4080 cur -= 1 # Reconsume
4083 tok_cur_tag = new_doctype_token c
4084 tok_state = tok_state_doctype_name
4087 # 8.2.4.54 http://www.w3.org/TR/html5/syntax.html#doctype-name-state
4088 tok_state_doctype_name = ->
4089 c = txt.charAt(cur++)
4090 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4091 tok_state = tok_state_after_doctype_name
4094 tok_state = tok_state_data
4097 tok_cur_tag.name += c.toLowerCase()
4101 tok_cur_tag.name += "\ufffd"
4105 tok_state = tok_state_data
4106 tok_cur_tag.flag 'force-quirks', true
4107 cur -= 1 # Reconsume
4110 tok_cur_tag.name += c
4113 # 8.2.4.55 http://www.w3.org/TR/html5/syntax.html#after-doctype-name-state
4114 tok_state_after_doctype_name = ->
4115 c = txt.charAt(cur++)
4116 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4119 tok_state = tok_state_data
4123 tok_state = tok_state_data
4124 tok_cur_tag.flag 'force-quirks', true
4125 cur -= 1 # Reconsume
4128 if txt.substr(cur - 1, 6).toLowerCase() is 'public'
4130 tok_state = tok_state_after_doctype_public_keyword
4132 if txt.substr(cur - 1, 6).toLowerCase() is 'system'
4134 tok_state = tok_state_after_doctype_system_keyword
4137 tok_cur_tag.flag 'force-quirks', true
4138 tok_state = tok_state_bogus_doctype
4141 # 8.2.4.56 http://www.w3.org/TR/html5/syntax.html#after-doctype-public-keyword-state
4142 tok_state_after_doctype_public_keyword = ->
4143 c = txt.charAt(cur++)
4144 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4145 tok_state = tok_state_before_doctype_public_identifier
4149 tok_cur_tag.public_identifier = ''
4150 tok_state = tok_state_doctype_public_identifier_double_quoted
4154 tok_cur_tag.public_identifier = ''
4155 tok_state = tok_state_doctype_public_identifier_single_quoted
4159 tok_cur_tag.flag 'force-quirks', true
4160 tok_state = tok_state_data
4164 tok_state = tok_state_data
4165 tok_cur_tag.flag 'force-quirks', true
4166 cur -= 1 # Reconsume
4170 tok_cur_tag.flag 'force-quirks', true
4171 tok_state = tok_state_bogus_doctype
4174 # 8.2.4.57 http://www.w3.org/TR/html5/syntax.html#before-doctype-public-identifier-state
4175 tok_state_before_doctype_public_identifier = ->
4176 c = txt.charAt(cur++)
4177 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4181 tok_cur_tag.public_identifier = ''
4182 tok_state = tok_state_doctype_public_identifier_double_quoted
4186 tok_cur_tag.public_identifier = ''
4187 tok_state = tok_state_doctype_public_identifier_single_quoted
4191 tok_cur_tag.flag 'force-quirks', true
4192 tok_state = tok_state_data
4196 tok_state = tok_state_data
4197 tok_cur_tag.flag 'force-quirks', true
4198 cur -= 1 # Reconsume
4202 tok_cur_tag.flag 'force-quirks', true
4203 tok_state = tok_state_bogus_doctype
4207 # 8.2.4.58 http://www.w3.org/TR/html5/syntax.html#doctype-public-identifier-(double-quoted)-state
4208 tok_state_doctype_public_identifier_double_quoted = ->
4209 c = txt.charAt(cur++)
4211 tok_state = tok_state_after_doctype_public_identifier
4215 tok_cur_tag.public_identifier += "\ufffd"
4219 tok_cur_tag.flag 'force-quirks', true
4220 tok_state = tok_state_data
4224 tok_state = tok_state_data
4225 tok_cur_tag.flag 'force-quirks', true
4226 cur -= 1 # Reconsume
4229 tok_cur_tag.public_identifier += c
4232 # 8.2.4.59 http://www.w3.org/TR/html5/syntax.html#doctype-public-identifier-(single-quoted)-state
4233 tok_state_doctype_public_identifier_single_quoted = ->
4234 c = txt.charAt(cur++)
4236 tok_state = tok_state_after_doctype_public_identifier
4240 tok_cur_tag.public_identifier += "\ufffd"
4244 tok_cur_tag.flag 'force-quirks', true
4245 tok_state = tok_state_data
4249 tok_state = tok_state_data
4250 tok_cur_tag.flag 'force-quirks', true
4251 cur -= 1 # Reconsume
4254 tok_cur_tag.public_identifier += c
4257 # 8.2.4.60 http://www.w3.org/TR/html5/syntax.html#after-doctype-public-identifier-state
4258 tok_state_after_doctype_public_identifier = ->
4259 c = txt.charAt(cur++)
4260 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4261 tok_state = tok_state_between_doctype_public_and_system_identifiers
4264 tok_state = tok_state_data
4268 tok_cur_tag.system_identifier = ''
4269 tok_state = tok_state_doctype_system_identifier_double_quoted
4273 tok_cur_tag.system_identifier = ''
4274 tok_state = tok_state_doctype_system_identifier_single_quoted
4278 tok_state = tok_state_data
4279 tok_cur_tag.flag 'force-quirks', true
4280 cur -= 1 # Reconsume
4284 tok_cur_tag.flag 'force-quirks', true
4285 tok_state = tok_state_bogus_doctype
4288 # 8.2.4.61 http://www.w3.org/TR/html5/syntax.html#between-doctype-public-and-system-identifiers-state
4289 tok_state_between_doctype_public_and_system_identifiers = ->
4290 c = txt.charAt(cur++)
4291 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4294 tok_state = tok_state_data
4298 tok_cur_tag.system_identifier = ''
4299 tok_state = tok_state_doctype_system_identifier_double_quoted
4303 tok_cur_tag.system_identifier = ''
4304 tok_state = tok_state_doctype_system_identifier_single_quoted
4308 tok_state = tok_state_data
4309 tok_cur_tag.flag 'force-quirks', true
4310 cur -= 1 # Reconsume
4314 tok_cur_tag.flag 'force-quirks', true
4315 tok_state = tok_state_bogus_doctype
4318 # 8.2.4.62 http://www.w3.org/TR/html5/syntax.html#after-doctype-system-keyword-state
4319 tok_state_after_doctype_system_keyword = ->
4320 c = txt.charAt(cur++)
4321 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4322 tok_state = tok_state_before_doctype_system_identifier
4326 tok_cur_tag.system_identifier = ''
4327 tok_state = tok_state_doctype_system_identifier_double_quoted
4331 tok_cur_tag.system_identifier = ''
4332 tok_state = tok_state_doctype_system_identifier_single_quoted
4336 tok_cur_tag.flag 'force-quirks', true
4337 tok_state = tok_state_data
4341 tok_state = tok_state_data
4342 tok_cur_tag.flag 'force-quirks', true
4343 cur -= 1 # Reconsume
4347 tok_cur_tag.flag 'force-quirks', true
4348 tok_state = tok_state_bogus_doctype
4351 # 8.2.4.63 http://www.w3.org/TR/html5/syntax.html#before-doctype-system-identifier-state
4352 tok_state_before_doctype_system_identifier = ->
4353 c = txt.charAt(cur++)
4354 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4357 tok_cur_tag.system_identifier = ''
4358 tok_state = tok_state_doctype_system_identifier_double_quoted
4361 tok_cur_tag.system_identifier = ''
4362 tok_state = tok_state_doctype_system_identifier_single_quoted
4366 tok_cur_tag.flag 'force-quirks', true
4367 tok_state = tok_state_data
4371 tok_state = tok_state_data
4372 tok_cur_tag.flag 'force-quirks', true
4373 cur -= 1 # Reconsume
4377 tok_cur_tag.flag 'force-quirks', true
4378 tok_state = tok_state_bogus_doctype
4381 # 8.2.4.64 http://www.w3.org/TR/html5/syntax.html#doctype-system-identifier-(double-quoted)-state
4382 tok_state_doctype_system_identifier_double_quoted = ->
4383 c = txt.charAt(cur++)
4385 tok_state = tok_state_after_doctype_system_identifier
4389 tok_cur_tag.system_identifier += "\ufffd"
4393 tok_cur_tag.flag 'force-quirks', true
4394 tok_state = tok_state_data
4398 tok_state = tok_state_data
4399 tok_cur_tag.flag 'force-quirks', true
4400 cur -= 1 # Reconsume
4403 tok_cur_tag.system_identifier += c
4406 # 8.2.4.65 http://www.w3.org/TR/html5/syntax.html#doctype-system-identifier-(single-quoted)-state
4407 tok_state_doctype_system_identifier_single_quoted = ->
4408 c = txt.charAt(cur++)
4410 tok_state = tok_state_after_doctype_system_identifier
4414 tok_cur_tag.system_identifier += "\ufffd"
4418 tok_cur_tag.flag 'force-quirks', true
4419 tok_state = tok_state_data
4423 tok_state = tok_state_data
4424 tok_cur_tag.flag 'force-quirks', true
4425 cur -= 1 # Reconsume
4428 tok_cur_tag.system_identifier += c
4431 # 8.2.4.66 http://www.w3.org/TR/html5/syntax.html#after-doctype-system-identifier-state
4432 tok_state_after_doctype_system_identifier = ->
4433 c = txt.charAt(cur++)
4434 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4437 tok_state = tok_state_data
4441 tok_state = tok_state_data
4442 tok_cur_tag.flag 'force-quirks', true
4443 cur -= 1 # Reconsume
4447 # do _not_ tok_cur_tag.flag 'force-quirks', true
4448 tok_state = tok_state_bogus_doctype
4451 # 8.2.4.67 http://www.w3.org/TR/html5/syntax.html#bogus-doctype-state
4452 tok_state_bogus_doctype = ->
4453 c = txt.charAt(cur++)
4455 tok_state = tok_state_data
4458 tok_state = tok_state_data
4459 cur -= 1 # Reconsume
4464 # 8.2.4.68 http://www.w3.org/TR/html5/syntax.html#cdata-section-state
4465 tok_state_cdata_section = ->
4466 tok_state = tok_state_data
4467 next_gt = txt.indexOf ']]>', cur
4469 val = txt.substr cur
4472 val = txt.substr cur, (next_gt - cur)
4474 val = val.replace(new RegExp("\u0000", 'g'), "\ufffd")
4476 return new_character_token val # fixfull split
4479 # 8.2.4.69 http://www.w3.org/TR/html5/syntax.html#consume-a-character-reference
4480 # Don't set this as a state, just call it
4481 # returns a string (NOT a text node)
4482 parse_character_reference = (allowed_char = null, in_attr = false) ->
4483 if cur >= txt.length
4485 switch c = txt.charAt(cur)
4486 when "\t", "\n", "\u000c", ' ', '<', '&', '', allowed_char
4487 # explicitly not a parse error
4490 # there has to be "one or more" alnums between & and ; to be a parse error
4493 if cur + 1 >= txt.length
4495 if txt.charAt(cur + 1).toLowerCase() is 'x'
4504 while start + i < txt.length and charset.indexOf(txt.charAt(start + i)) > -1
4509 if txt.charAt(start + i) is ';'
4513 code_point = txt.substr(start, i)
4514 while code_point.charAt(0) is '0' and code_point.length > 1
4515 code_point = code_point.substr 1
4516 code_point = parseInt(code_point, base)
4517 if unicode_fixes[code_point]?
4519 return unicode_fixes[code_point]
4521 if (code_point >= 0xd800 and code_point <= 0xdfff) or code_point > 0x10ffff
4525 if (code_point >= 0x0001 and code_point <= 0x0008) or (code_point >= 0x000D and code_point <= 0x001F) or (code_point >= 0x007F and code_point <= 0x009F) or (code_point >= 0xFDD0 and code_point <= 0xFDEF) or code_point is 0x000B or code_point is 0xFFFE or code_point is 0xFFFF or code_point is 0x1FFFE or code_point is 0x1FFFF or code_point is 0x2FFFE or code_point is 0x2FFFF or code_point is 0x3FFFE or code_point is 0x3FFFF or code_point is 0x4FFFE or code_point is 0x4FFFF or code_point is 0x5FFFE or code_point is 0x5FFFF or code_point is 0x6FFFE or code_point is 0x6FFFF or code_point is 0x7FFFE or code_point is 0x7FFFF or code_point is 0x8FFFE or code_point is 0x8FFFF or code_point is 0x9FFFE or code_point is 0x9FFFF or code_point is 0xAFFFE or code_point is 0xAFFFF or code_point is 0xBFFFE or code_point is 0xBFFFF or code_point is 0xCFFFE or code_point is 0xCFFFF or code_point is 0xDFFFE or code_point is 0xDFFFF or code_point is 0xEFFFE or code_point is 0xEFFFF or code_point is 0xFFFFE or code_point is 0xFFFFF or code_point is 0x10FFFE or code_point is 0x10FFFF
4527 return from_code_point code_point
4531 if alnum.indexOf(txt.charAt(cur + i)) is -1
4534 # exit early, because parse_error() below needs at least one alnum
4536 if txt.charAt(cur + i) is ';'
4537 i += 1 # include ';' terminator in value
4538 decoded = decode_named_char_ref txt.substr(cur, i)
4545 # no ';' terminator (only legacy char refs)
4547 for i in [2..max] # no prefix matches, so ok to check shortest first
4548 c = legacy_char_refs[txt.substr(cur, i)]
4551 if txt.charAt(cur + i) is '='
4552 # "because some legacy user agents will
4553 # misinterpret the markup in those cases"
4556 if alnum.indexOf(txt.charAt(cur + i)) > -1
4557 # this makes attributes forgiving about url args
4559 # ok, and besides the weird exceptions for attributes...
4560 # return the matching char
4561 cur += i # consume entity chars
4562 parse_error() # because no terminating ";"
4566 return # never reached
4568 eat_next_token_if_newline = ->
4573 if t.type is TYPE_TEXT
4574 # definition of a newline depends on whether it was a character ref or not
4575 if cur - old_cur is 1
4576 # not a character reference
4577 if t.text is "\u000d" or t.text is "\u000a"
4580 if t.text is "\u000a"
4586 # tree constructor initialization
4587 # see comments on TYPE_TAG/etc for the structure of this data
4590 doc = new Node TYPE_TAG, name: 'document', namespace: NS_HTML
4591 doc.flag 'quirks mode', QUIRKS_NO # TODO bugreport spec for not specifying this
4592 fragment_root = null # fragment parsing algorithm returns children of this
4594 afe = [] # active formatting elements
4595 template_ins_modes = []
4596 ins_mode = ins_mode_initial
4597 original_ins_mode = ins_mode # TODO check spec
4598 flag_scripting = args.scripting ? true # TODO might need an extra flag to get <noscript> to parse correctly
4599 flag_frameset_ok = true
4601 flag_foster_parenting = false
4602 form_element_pointer = null
4603 temporary_buffer = null
4604 pending_table_character_tokens = []
4605 head_element_pointer = null
4606 flag_fragment_parsing = false
4607 context_element = null
4608 prev_node_id = 0 # just for debugging
4610 # tokenizer initialization
4611 tok_state = tok_state_data
4614 # fragment parsing (text arg)
4616 # this handles the fragment from the tests in the format described here:
4617 # https://github.com/html5lib/html5lib-tests/blob/master/tree-construction/README.md
4620 if f.substr(0, 5) is 'math '
4623 else if f.substr(0, 4) is 'svg '
4627 context_element = token_to_element t, ns
4628 context_element.document = new Node TYPE_TAG, name: 'document', namespace: NS_HTML
4629 context_element.document.flag 'quirks mode', QUIRKS_NO
4630 # fragment parsing (Node arg)
4632 context_element = args.context
4634 # http://www.w3.org/TR/html5/syntax.html#parsing-html-fragments
4635 # fragment parsing algorithm
4637 flag_fragment_parsing = true
4638 doc = new Node TYPE_TAG, name: 'html', namespace: NS_HTML
4639 # search up the tree from context, to try to find it's document,
4640 # because this file only puts a "document" property on the root
4643 el = context_element
4646 old_doc = el.document
4653 doc.flag 'quirks mode', old_doc.flag 'quirks mode'
4655 if context_element.namespace is NS_HTML
4656 switch context_element.name
4657 when 'title', 'textarea'
4658 tok_state = tok_state_rcdata
4659 when 'style', 'xmp', 'iframe', 'noembed', 'noframes'
4660 tok_state = tok_state_rawtext
4662 tok_state = tok_state_script_data
4665 tok_state = tok_state_rawtext
4667 tok_state = tok_state_plaintext
4668 fragment_root = new Node TYPE_TAG, name: 'html', namespace: NS_HTML
4669 doc.children.push fragment_root
4670 fragment_root.document = doc
4671 open_els = [fragment_root]
4672 if context_element.name is 'template' and context_element.namespace is NS_HTML
4673 template_ins_modes.unshift ins_mode_in_template
4674 # fixfull create token for context (it should have it's original one already)
4676 # set form_element pointer... in the foreign doc?!
4677 el = context_element
4679 if el.name is 'form' and el.namespace is NS_HTML
4680 form_element_pointer = el
4687 # text pre-processing
4688 # FIXME check http://www.w3.org/TR/html5/syntax.html#preprocessing-the-input-stream
4689 txt = txt.replace(new RegExp("\r\n", 'g'), "\n") # fixfull spec doesn't say this
4690 txt = txt.replace(new RegExp("\r", 'g'), "\n") # fixfull spec doesn't say this
4694 # http://www.w3.org/TR/html5/syntax.html#tree-construction
4695 parse_main_loop = ->
4700 # fixfull parse error if has self-closing flag, but it wasn't acknolwedged
4705 if flag_fragment_parsing
4706 return fragment_root.children
4709 module.exports.parse_html = parse_html
4710 module.exports.debug_log_reset = debug_log_reset
4711 module.exports.debug_log_each = debug_log_each
4712 module.exports.TYPE_TAG = TYPE_TAG
4713 module.exports.TYPE_TEXT = TYPE_TEXT
4714 module.exports.TYPE_COMMENT = TYPE_COMMENT
4715 module.exports.TYPE_DOCTYPE = TYPE_DOCTYPE
4716 module.exports.NS_HTML = NS_HTML
4717 module.exports.NS_MATHML = NS_MATHML
4718 module.exports.NS_SVG = NS_SVG
4719 module.exports.QUIRKS_NO = QUIRKS_NO
4720 module.exports.QUIRKS_LIMITED = QUIRKS_LIMITED
4721 module.exports.QUIRKS_YES = QUIRKS_YES