1 # HTML parser meant to run in a browser, in support of WYSIWYG editor
2 # Copyright 2015 Jason Woofenden
4 # This program is free software: you can redistribute it and/or modify it under
5 # the terms of the GNU Affero General Public License as published by the Free
6 # Software Foundation, either version 3 of the License, or (at your option) any
9 # This program is distributed in the hope that it will be useful, but WITHOUT
10 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11 # FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
18 # This file implements a parser for html snippets, meant to be used by a
21 # The implementation is a pretty direct implementation of the parsing algorithm
23 # http://www.w3.org/TR/html5/syntax.html#preprocessing-the-input-stream
25 # Deviations from that spec:
27 # Purposeful: search this file for "WHATWG"
29 # Not finished yet: search this file for "fixfull", "TODO" and "FIXME"
34 # the spec uses a many different words do indicate which ends of lists/stacks
35 # they are talking about (and relative movement within the lists/stacks). This
36 # section splains. I'm implementing "lists" (afe and open_els) the same way
39 # stacks grow downward (current element is index=0)
41 # example: open_els = [a, b, c, d, e, f, g]
43 # "grows downwards" means it's visualized like this: (index: el, names)
45 # 6: g "start of the list", "topmost", "first"
47 # 4: e "previous" (to d), "above", "before"
48 # 3: d (previous/next are relative to this element)
49 # 2: c "next", "after", "lower", "below"
51 # 0: a "end of the list", "current node", "bottommost", "last"
55 # note: to get this to run outside a browser, you'll have to write a native
56 # implementation of decode_named_char_ref()
57 unless module?.exports?
59 module = exports: window.wheic
61 from_code_point = (x) ->
62 if String.fromCodePoint?
63 return String.fromCodePoint x
66 return String.fromCharCode x
68 return String.fromCharCode((x >> 10) + 0xd800, (x % 0x400) + 0xdc00)
70 # Each node is an obect of the Node class. Here are the Node types:
71 TYPE_TAG = 0 # name, {attributes}, [children]
72 TYPE_TEXT = 1 # "text"
75 # the following types are emited by the tokenizer, but shouldn't end up in the tree:
76 TYPE_START_TAG = 4 # name, [attributes ([key,value]...) in reverse order], [children]
77 TYPE_END_TAG = 5 # name
79 TYPE_AFE_MARKER = 7 # http://www.w3.org/TR/html5/syntax.html#reconstruct-the-active-formatting-elements
80 TYPE_AAA_BOOKMARK = 8 # http://www.w3.org/TR/html5/syntax.html#adoption-agency-algorithm
92 debug_log_each = (cb) ->
93 for str in g_debug_log
98 constructor: (type, args = {}) ->
99 @type = type # one of the TYPE_* constants above
100 @name = args.name ? '' # tag name
101 @text = args.text ? '' # contents for text/comment nodes
102 @attrs = args.attrs ? {}
103 @attrs_a = args.attr_k ? [] # attrs in progress, TYPE_START_TAG only
104 @children = args.children ? []
105 @namespace = args.namespace ? NS_HTML
106 @parent = args.parent ? null
107 @token = args.token ? null
108 @flags = args.flags ? {}
112 @id = "#{++prev_node_id}"
113 acknowledge_self_closing: ->
115 @token.flag 'did_self_close', true
117 @flag 'did_self_close', true
118 flag: (key, value = null) ->
123 serialize: (shallow = false, show_ids = false) -> # for unit tests
128 ret += JSON.stringify @name
143 ret += "#{JSON.stringify k}:#{JSON.stringify @attrs[k]}"
149 ret += c.serialize shallow, show_ids
153 ret += JSON.stringify @text
156 ret += JSON.stringify @text
158 ret += "doctype:#{@name},#{JSON.stringify(@public_identifier ? '')},#{JSON.stringify(@system_identifier ? '')}"
161 when TYPE_AAA_BOOKMARK
162 ret += 'aaa_bookmark'
165 console.log "unknown: #{JSON.stringify @}" # backtrace is just as well
168 # helpers: (only take args that are normally known when parser creates nodes)
169 new_open_tag = (name) ->
170 return new Node TYPE_START_TAG, name: name
171 new_end_tag = (name) ->
172 return new Node TYPE_END_TAG, name: name
173 new_element = (name) ->
174 return new Node TYPE_TAG, name: name
175 new_text_node = (txt) ->
176 return new Node TYPE_TEXT, text: txt
177 new_character_token = new_text_node
178 new_comment_token = (txt) ->
179 return new Node TYPE_COMMENT, text: txt
180 new_doctype_token = (name) ->
181 return new Node TYPE_DOCTYPE, name: name
183 return new Node TYPE_EOF
185 return new Node TYPE_AFE_MARKER
186 new_aaa_bookmark = ->
187 return new Node TYPE_AAA_BOOKMARK
189 lc_alpha = "abcdefghijklmnopqrstuvwxyz"
190 uc_alpha = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
191 digits = "0123456789"
192 alnum = lc_alpha + uc_alpha + digits
193 hex_chars = digits + "abcdefABCDEF"
195 is_uc_alpha = (str) ->
196 return str.length is 1 and uc_alpha.indexOf(str) > -1
197 is_lc_alpha = (str) ->
198 return str.length is 1 and lc_alpha.indexOf(str) > -1
200 # some SVG elements have dashes in them
201 tag_name_chars = alnum + "-"
203 # http://www.w3.org/TR/html5/infrastructure.html#space-character
204 space_chars = "\u0009\u000a\u000c\u000d\u0020"
206 return txt.length is 1 and space_chars.indexOf(txt) > -1
207 is_space_tok = (t) ->
208 return t.type is TYPE_TEXT && t.text.length is 1 and space_chars.indexOf(t.text) > -1
210 is_input_hidden_tok = (t) ->
211 return false unless t.type is TYPE_START_TAG
214 if a[1].toLowerCase() is 'hidden'
219 # https://en.wikipedia.org/wiki/Whitespace_character#Unicode
220 whitespace_chars = "\u0009\u000a\u000b\u000c\u000d\u0020\u0085\u00a0\u1680\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200a\u2028\u2029\u202f\u205f\u3000"
223 unicode_fixes[0x00] = "\uFFFD"
224 unicode_fixes[0x80] = "\u20AC"
225 unicode_fixes[0x82] = "\u201A"
226 unicode_fixes[0x83] = "\u0192"
227 unicode_fixes[0x84] = "\u201E"
228 unicode_fixes[0x85] = "\u2026"
229 unicode_fixes[0x86] = "\u2020"
230 unicode_fixes[0x87] = "\u2021"
231 unicode_fixes[0x88] = "\u02C6"
232 unicode_fixes[0x89] = "\u2030"
233 unicode_fixes[0x8A] = "\u0160"
234 unicode_fixes[0x8B] = "\u2039"
235 unicode_fixes[0x8C] = "\u0152"
236 unicode_fixes[0x8E] = "\u017D"
237 unicode_fixes[0x91] = "\u2018"
238 unicode_fixes[0x92] = "\u2019"
239 unicode_fixes[0x93] = "\u201C"
240 unicode_fixes[0x94] = "\u201D"
241 unicode_fixes[0x95] = "\u2022"
242 unicode_fixes[0x96] = "\u2013"
243 unicode_fixes[0x97] = "\u2014"
244 unicode_fixes[0x98] = "\u02DC"
245 unicode_fixes[0x99] = "\u2122"
246 unicode_fixes[0x9A] = "\u0161"
247 unicode_fixes[0x9B] = "\u203A"
248 unicode_fixes[0x9C] = "\u0153"
249 unicode_fixes[0x9E] = "\u017E"
250 unicode_fixes[0x9F] = "\u0178"
252 # These are the character references that don't need a terminating semicolon
253 # min length: 2, max: 6, none are a prefix of any other.
255 Aacute: 'Á', aacute: 'á', Acirc: 'Â', acirc: 'â', acute: '´', AElig: 'Æ',
256 aelig: 'æ', Agrave: 'À', agrave: 'à', AMP: '&', amp: '&', Aring: 'Å',
257 aring: 'å', Atilde: 'Ã', atilde: 'ã', Auml: 'Ä', auml: 'ä', brvbar: '¦',
258 Ccedil: 'Ç', ccedil: 'ç', cedil: '¸', cent: '¢', COPY: '©', copy: '©',
259 curren: '¤', deg: '°', divide: '÷', Eacute: 'É', eacute: 'é', Ecirc: 'Ê',
260 ecirc: 'ê', Egrave: 'È', egrave: 'è', ETH: 'Ð', eth: 'ð', Euml: 'Ë',
261 euml: 'ë', frac12: '½', frac14: '¼', frac34: '¾', GT: '>', gt: '>',
262 Iacute: 'Í', iacute: 'í', Icirc: 'Î', icirc: 'î', iexcl: '¡', Igrave: 'Ì',
263 igrave: 'ì', iquest: '¿', Iuml: 'Ï', iuml: 'ï', laquo: '«', LT: '<',
264 lt: '<', macr: '¯', micro: 'µ', middot: '·', nbsp: "\u00a0", not: '¬',
265 Ntilde: 'Ñ', ntilde: 'ñ', Oacute: 'Ó', oacute: 'ó', Ocirc: 'Ô', ocirc: 'ô',
266 Ograve: 'Ò', ograve: 'ò', ordf: 'ª', ordm: 'º', Oslash: 'Ø', oslash: 'ø',
267 Otilde: 'Õ', otilde: 'õ', Ouml: 'Ö', ouml: 'ö', para: '¶', plusmn: '±',
268 pound: '£', QUOT: '"', quot: '"', raquo: '»', REG: '®', reg: '®', sect: '§',
269 shy: '', sup1: '¹', sup2: '²', sup3: '³', szlig: 'ß', THORN: 'Þ', thorn: 'þ',
270 times: '×', Uacute: 'Ú', uacute: 'ú', Ucirc: 'Û', ucirc: 'û', Ugrave: 'Ù',
271 ugrave: 'ù', uml: '¨', Uuml: 'Ü', uuml: 'ü', Yacute: 'Ý', yacute: 'ý',
275 void_elements = ['area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input', 'keygen', 'link', 'meta', 'param', 'source', 'track', 'wbr']
276 raw_text_elements = ['script', 'style']
277 escapable_raw_text_elements = ['textarea', 'title']
278 # http://www.w3.org/TR/SVG/ 1.1 (Second Edition)
280 'a', 'altGlyph', 'altGlyphDef', 'altGlyphItem', 'animate', 'animateColor',
281 'animateMotion', 'animateTransform', 'circle', 'clipPath', 'color-profile',
282 'cursor', 'defs', 'desc', 'ellipse', 'feBlend', 'feColorMatrix',
283 'feComponentTransfer', 'feComposite', 'feConvolveMatrix',
284 'feDiffuseLighting', 'feDisplacementMap', 'feDistantLight', 'feFlood',
285 'feFuncA', 'feFuncB', 'feFuncG', 'feFuncR', 'feGaussianBlur', 'feImage',
286 'feMerge', 'feMergeNode', 'feMorphology', 'feOffset', 'fePointLight',
287 'feSpecularLighting', 'feSpotLight', 'feTile', 'feTurbulence', 'filter',
288 'font', 'font-face', 'font-face-format', 'font-face-name', 'font-face-src',
289 'font-face-uri', 'foreignObject', 'g', 'glyph', 'glyphRef', 'hkern',
290 'image', 'line', 'linearGradient', 'marker', 'mask', 'metadata',
291 'missing-glyph', 'mpath', 'path', 'pattern', 'polygon', 'polyline',
292 'radialGradient', 'rect', 'script', 'set', 'stop', 'style', 'svg',
293 'switch', 'symbol', 'text', 'textPath', 'title', 'tref', 'tspan', 'use',
297 # http://www.w3.org/TR/MathML/ Version 3.0 2nd Edition
299 'abs', 'and', 'annotation', 'annotation-xml', 'apply', 'approx', 'arccos',
300 'arccosh', 'arccot', 'arccoth', 'arccsc', 'arccsch', 'arcsec', 'arcsech',
301 'arcsin', 'arcsinh', 'arctan', 'arctanh', 'arg', 'bind', 'bvar', 'card',
302 'cartesianproduct', 'cbytes', 'ceiling', 'cerror', 'ci', 'cn', 'codomain',
303 'complexes', 'compose', 'condition', 'conjugate', 'cos', 'cosh', 'cot',
304 'coth', 'cs', 'csc', 'csch', 'csymbol', 'curl', 'declare', 'degree',
305 'determinant', 'diff', 'divergence', 'divide', 'domain',
306 'domainofapplication', 'emptyset', 'eq', 'equivalent', 'eulergamma',
307 'exists', 'exp', 'exponentiale', 'factorial', 'factorof', 'false', 'floor',
308 'fn', 'forall', 'gcd', 'geq', 'grad', 'gt', 'ident', 'image', 'imaginary',
309 'imaginaryi', 'implies', 'in', 'infinity', 'int', 'integers', 'intersect',
310 'interval', 'inverse', 'lambda', 'laplacian', 'lcm', 'leq', 'limit',
311 'list', 'ln', 'log', 'logbase', 'lowlimit', 'lt', 'maction', 'maligngroup',
312 'malignmark', 'math', 'matrix', 'matrixrow', 'max', 'mean', 'median',
313 'menclose', 'merror', 'mfenced', 'mfrac', 'mglyph', 'mi', 'mi', 'min',
314 'minus', 'mlabeledtr', 'mlongdiv', 'mmultiscripts', 'mn', 'mo', 'mode',
315 'moment', 'momentabout', 'mover', 'mpadded', 'mphantom', 'mprescripts',
316 'mroot', 'mrow', 'ms', 'mscarries', 'mscarry', 'msgroup', 'msline',
317 'mspace', 'msqrt', 'msrow', 'mstack', 'mstyle', 'msub', 'msubsup', 'msup',
318 'mtable', 'mtd', 'mtext', 'mtr', 'munder', 'munderover', 'naturalnumbers',
319 'neq', 'none', 'not', 'notanumber', 'notin', 'notprsubset', 'notsubset',
320 'or', 'otherwise', 'outerproduct', 'partialdiff', 'pi', 'piece',
321 'piecewise', 'plus', 'power', 'primes', 'product', 'prsubset', 'quotient',
322 'rationals', 'real', 'reals', 'reln', 'rem', 'root', 'scalarproduct',
323 'sdev', 'sec', 'sech', 'selector', 'semantics', 'sep', 'set', 'setdiff',
324 'share', 'sin', 'sinh', 'span', 'subset', 'sum', 'tan', 'tanh', 'tendsto',
325 'times', 'transpose', 'true', 'union', 'uplimit', 'variance', 'vector',
326 'vectorproduct', 'xor'
328 # foreign_elements = [svg_elements..., mathml_elements...]
329 #normal_elements = All other allowed HTML elements are normal elements.
333 address:NS_HTML, applet:NS_HTML, area:NS_HTML, article:NS_HTML,
334 aside:NS_HTML, base:NS_HTML, basefont:NS_HTML, bgsound:NS_HTML,
335 blockquote:NS_HTML, body:NS_HTML, br:NS_HTML, button:NS_HTML,
336 caption:NS_HTML, center:NS_HTML, col:NS_HTML, colgroup:NS_HTML, dd:NS_HTML,
337 details:NS_HTML, dir:NS_HTML, div:NS_HTML, dl:NS_HTML, dt:NS_HTML,
338 embed:NS_HTML, fieldset:NS_HTML, figcaption:NS_HTML, figure:NS_HTML,
339 footer:NS_HTML, form:NS_HTML, frame:NS_HTML, frameset:NS_HTML, h1:NS_HTML,
340 h2:NS_HTML, h3:NS_HTML, h4:NS_HTML, h5:NS_HTML, h6:NS_HTML, head:NS_HTML,
341 header:NS_HTML, hgroup:NS_HTML, hr:NS_HTML, html:NS_HTML, iframe:NS_HTML,
342 img:NS_HTML, input:NS_HTML, isindex:NS_HTML, li:NS_HTML, link:NS_HTML,
343 listing:NS_HTML, main:NS_HTML, marquee:NS_HTML,
345 menu:NS_HTML,menuitem:NS_HTML, # WHATWG adds these
347 meta:NS_HTML, nav:NS_HTML, noembed:NS_HTML, noframes:NS_HTML,
348 noscript:NS_HTML, object:NS_HTML, ol:NS_HTML, p:NS_HTML, param:NS_HTML,
349 plaintext:NS_HTML, pre:NS_HTML, script:NS_HTML, section:NS_HTML,
350 select:NS_HTML, source:NS_HTML, style:NS_HTML, summary:NS_HTML,
351 table:NS_HTML, tbody:NS_HTML, td:NS_HTML, template:NS_HTML,
352 textarea:NS_HTML, tfoot:NS_HTML, th:NS_HTML, thead:NS_HTML, title:NS_HTML,
353 tr:NS_HTML, track:NS_HTML, ul:NS_HTML, wbr:NS_HTML, xmp:NS_HTML,
356 mi:NS_MATHML, mo:NS_MATHML, mn:NS_MATHML, ms:NS_MATHML, mtext:NS_MATHML,
357 'annotation-xml':NS_MATHML,
360 foreignObject:NS_SVG, desc:NS_SVG, title:NS_SVG
363 formatting_elements = {
364 a: true, b: true, big: true, code: true, em: true, font: true, i: true,
365 nobr: true, s: true, small: true, strike: true, strong: true, tt: true,
369 mathml_text_integration = {
370 mi: NS_MATHML, mo: NS_MATHML, mn: NS_MATHML, ms: NS_MATHML, mtext: NS_MATHML
372 is_mathml_text_integration_point = (el) ->
373 return mathml_text_integration[el.name] is el.namespace
374 is_html_integration = (el) -> # DON'T PASS A TOKEN
375 if el.namespace is NS_MATHML
376 if el.name is 'annotation-xml'
377 if el.attrs.encoding?
378 if el.attrs.encoding.toLowerCase() is 'text/html'
380 if el.attrs.encoding.toLowerCase() is 'application/xhtml+xml'
383 if el.namespace is NS_SVG
384 if el.name is 'foreignObject' or el.name is 'desc' or el.name is 'title'
389 h1:NS_HTML, h2:NS_HTML, h3:NS_HTML, h4:NS_HTML, h5:NS_HTML, h6:NS_HTML
392 foster_parenting_targets = {
413 el_is_special = (e) ->
414 return special_elements[e.name] is e.namespace
416 adp_els = { address: NS_HTML, div: NS_HTML, p: NS_HTML }
417 el_is_special_not_adp = (el) ->
418 return special_elements[el.name] is el.namespace and adp_els[el.name] isnt el.namespace
422 altglyphdef: 'altGlyphDef'
423 altglyphitem: 'altGlyphItem'
424 animatecolor: 'animateColor'
425 animatemotion: 'animateMotion'
426 animatetransform: 'animateTransform'
429 fecolormatrix: 'feColorMatrix'
430 fecomponenttransfer: 'feComponentTransfer'
431 fecomposite: 'feComposite'
432 feconvolvematrix: 'feConvolveMatrix'
433 fediffuselighting: 'feDiffuseLighting'
434 fedisplacementmap: 'feDisplacementMap'
435 fedistantlight: 'feDistantLight'
436 fedropshadow: 'feDropShadow'
442 fegaussianblur: 'feGaussianBlur'
445 femergenode: 'feMergeNode'
446 femorphology: 'feMorphology'
448 fepointlight: 'fePointLight'
449 fespecularlighting: 'feSpecularLighting'
450 fespotlight: 'feSpotLight'
452 feturbulence: 'feTurbulence'
453 foreignobject: 'foreignObject'
455 lineargradient: 'linearGradient'
456 radialgradient: 'radialGradient'
459 svg_attribute_fixes = {
460 attributename: 'attributeName'
461 attributetype: 'attributeType'
462 basefrequency: 'baseFrequency'
463 baseprofile: 'baseProfile'
465 clippathunits: 'clipPathUnits'
466 contentscripttype: 'contentScriptType'
467 contentstyletype: 'contentStyleType'
468 diffuseconstant: 'diffuseConstant'
470 externalresourcesrequired: 'externalResourcesRequired'
471 # WHATWG removes this: filterres: 'filterRes'
472 filterunits: 'filterUnits'
474 gradienttransform: 'gradientTransform'
475 gradientunits: 'gradientUnits'
476 kernelmatrix: 'kernelMatrix'
477 kernelunitlength: 'kernelUnitLength'
478 keypoints: 'keyPoints'
479 keysplines: 'keySplines'
481 lengthadjust: 'lengthAdjust'
482 limitingconeangle: 'limitingConeAngle'
483 markerheight: 'markerHeight'
484 markerunits: 'markerUnits'
485 markerwidth: 'markerWidth'
486 maskcontentunits: 'maskContentUnits'
487 maskunits: 'maskUnits'
488 numoctaves: 'numOctaves'
489 pathlength: 'pathLength'
490 patterncontentunits: 'patternContentUnits'
491 patterntransform: 'patternTransform'
492 patternunits: 'patternUnits'
493 pointsatx: 'pointsAtX'
494 pointsaty: 'pointsAtY'
495 pointsatz: 'pointsAtZ'
496 preservealpha: 'preserveAlpha'
497 preserveaspectratio: 'preserveAspectRatio'
498 primitiveunits: 'primitiveUnits'
501 repeatcount: 'repeatCount'
502 repeatdur: 'repeatDur'
503 requiredextensions: 'requiredExtensions'
504 requiredfeatures: 'requiredFeatures'
505 specularconstant: 'specularConstant'
506 specularexponent: 'specularExponent'
507 spreadmethod: 'spreadMethod'
508 startoffset: 'startOffset'
509 stddeviation: 'stdDeviation'
510 stitchtiles: 'stitchTiles'
511 surfacescale: 'surfaceScale'
512 systemlanguage: 'systemLanguage'
513 tablevalues: 'tableValues'
516 textlength: 'textLength'
518 viewtarget: 'viewTarget'
519 xchannelselector: 'xChannelSelector'
520 ychannelselector: 'yChannelSelector'
521 zoomandpan: 'zoomAndPan'
523 foreign_attr_fixes = {
524 'xlink:actuate': 'xlink actuate'
525 'xlink:arcrole': 'xlink arcrole'
526 'xlink:href': 'xlink href'
527 'xlink:role': 'xlink role'
528 'xlink:show': 'xlink show'
529 'xlink:title': 'xlink title'
530 'xlink:type': 'xlink type'
531 'xml:base': 'xml base'
532 'xml:lang': 'xml lang'
533 'xml:space': 'xml space'
535 'xmlns:xlink': 'xmlns xlink'
537 adjust_mathml_attributes = (t) ->
539 if a[0] is 'definitionurl'
540 a[0] = 'definitionURL'
542 adjust_svg_attributes = (t) ->
544 if svg_attribute_fixes[a[0]]?
545 a[0] = svg_attribute_fixes[a[0]]
547 adjust_foreign_attributes = (t) ->
550 if foreign_attr_fixes[a[0]]?
551 a[0] = foreign_attr_fixes[a[0]]
554 # decode_named_char_ref()
556 # The list of named character references is _huge_ so ask the browser to decode
557 # for us instead of wasting bandwidth/space on including the table here.
559 # Pass without the "&" but with the ";" examples:
560 # for "&" pass "amp;"
561 # for "′" pass "x2032;"
564 textarea: document.createElement('textarea')
566 # TODO test this in IE8
567 decode_named_char_ref = (txt) ->
569 decoded = g_dncr.cache[txt]
570 return decoded if decoded?
571 g_dncr.textarea.innerHTML = txt
572 decoded = g_dncr.textarea.value
573 return null if decoded is txt
574 return g_dncr.cache[txt] = decoded
576 parse_html = (args) ->
578 cur = null # index of next char in txt to be parsed
579 # declare doc and tokenizer variables so they're in scope below
581 open_els = null # stack of open elements
582 afe = null # active formatting elements
583 template_ins_modes = null
585 original_ins_mode = null
587 tok_cur_tag = null # partially parsed tag
588 flag_scripting = null
589 flag_frameset_ok = null
591 flag_foster_parenting = null
592 form_element_pointer = null
593 temporary_buffer = null
594 pending_table_character_tokens = null
595 head_element_pointer = null
596 flag_fragment_parsing = null
597 context_element = null
606 console.log "Parse error at character #{cur} of #{txt.length}"
608 afe_push = (new_el) ->
611 if el.name is new_el.name and el.namespace is new_el.namespace
613 continue unless new_el.attrs[k] is v
614 for k, v of new_el.attrs
615 continue unless el.attrs[k] is v
622 afe.unshift new_afe_marker()
624 # the functions below impliment the Tree Contstruction algorithm
625 # http://www.w3.org/TR/html5/syntax.html#tree-construction
627 # But first... the helpers
628 template_tag_is_open = ->
630 if t.name is 'template' and t.namespace is NS_HTML
633 is_in_scope_x = (tag_name, scope, namespace) ->
635 if t.name is tag_name and (namespace is null or namespace is t.namespace)
637 if scope[t.name] is t.namespace
640 is_in_scope_x_y = (tag_name, scope, scope2, namespace) ->
642 if t.name is tag_name and (namespace is null or namespace is t.namespace)
644 if scope[t.name] is t.namespace
646 if scope2[t.name] is t.namespace
650 applet: NS_HTML, caption: NS_HTML, html: NS_HTML, table: NS_HTML,
651 td: NS_HTML, th: NS_HTML, marquee: NS_HTML, object: NS_HTML,
654 mi: NS_MATHML, mo: NS_MATHML, mn: NS_MATHML, ms: NS_MATHML,
655 mtext: NS_MATHML, 'annotation-xml': NS_MATHML,
657 foreignObject: NS_SVG, desc: NS_SVG, title: NS_SVG
659 button_scopers = button: NS_HTML
660 li_scopers = ol: NS_HTML, ul: NS_HTML
661 table_scopers = html: NS_HTML, table: NS_HTML, template: NS_HTML
662 is_in_scope = (tag_name, namespace = null) ->
663 return is_in_scope_x tag_name, standard_scopers, namespace
664 is_in_button_scope = (tag_name, namespace = null) ->
665 return is_in_scope_x_y tag_name, standard_scopers, button_scopers, namespace
666 is_in_table_scope = (tag_name, namespace = null) ->
667 return is_in_scope_x tag_name, table_scopers, namespace
668 # aka is_in_list_item_scope
669 is_in_li_scope = (tag_name, namespace = null) ->
670 return is_in_scope_x_y tag_name, standard_scopers, li_scopers, namespace
671 is_in_select_scope = (tag_name, namespace = null) ->
673 if t.name is tag_name and (namespace is null or namespace is t.namespace)
675 if t.namespace isnt NS_HTML and t.name isnt 'optgroup' and t.name isnt 'option'
678 # this checks for a particular element, not by name
679 # this requires a namespace match
680 el_is_in_scope = (needle) ->
684 if standard_scopers[el.name] is el.namespace
688 clear_to_table_stopers = {
693 clear_stack_to_table_context = ->
695 if clear_to_table_stopers[open_els[0].name]?
699 clear_to_table_body_stopers = {
706 clear_stack_to_table_body_context = ->
708 if clear_to_table_body_stopers[open_els[0].name] is open_els[0].namespace
712 clear_to_table_row_stopers = {
717 clear_stack_to_table_row_context = ->
719 if clear_to_table_row_stopers[open_els[0].name]?
723 clear_afe_to_marker = ->
725 return unless afe.length > 0 # this happens in fragment case, ?spec error
727 if el.type is TYPE_AFE_MARKER
732 # http://www.w3.org/TR/html5/syntax.html#reset-the-insertion-mode-appropriately
734 # 1. Let last be false.
736 # 2. Let node be the last node in the stack of open elements.
738 node = open_els[node_i]
739 # 3. Loop: If node is the first node in the stack of open elements,
740 # then set last to true, and, if the parser was originally created as
741 # part of the HTML fragment parsing algorithm (fragment case) set node
742 # to the context element.
744 if node_i is open_els.length - 1
746 # fixfull (fragment case)
748 # 4. If node is a select element, run these substeps:
749 if node.name is 'select' and node.namespace is NS_HTML
750 # 1. If last is true, jump to the step below labeled done.
752 # 2. Let ancestor be node.
755 # 3. Loop: If ancestor is the first node in the stack of
756 # open elements, jump to the step below labeled done.
758 if ancestor_i is open_els.length - 1
760 # 4. Let ancestor be the node before ancestor in the stack
763 ancestor = open_els[ancestor_i]
764 # 5. If ancestor is a template node, jump to the step below
766 if ancestor.name is 'template' and ancestor.namespace is NS_HTML
768 # 6. If ancestor is a table node, switch the insertion mode
769 # to "in select in table" and abort these steps.
770 if ancestor.name is 'table' and ancestor.namespace is NS_HTML
771 ins_mode = ins_mode_in_select_in_table
773 # 7. Jump back to the step labeled loop.
774 # 8. Done: Switch the insertion mode to "in select" and abort
776 ins_mode = ins_mode_in_select
778 # 5. If node is a td or th element and last is false, then switch
779 # the insertion mode to "in cell" and abort these steps.
780 if (node.name is 'td' or node.name is 'th') and node.namespace is NS_HTML and last is false
781 ins_mode = ins_mode_in_cell
783 # 6. If node is a tr element, then switch the insertion mode to "in
784 # row" and abort these steps.
785 if node.name is 'tr' and node.namespace is NS_HTML
786 ins_mode = ins_mode_in_row
788 # 7. If node is a tbody, thead, or tfoot element, then switch the
789 # insertion mode to "in table body" and abort these steps.
790 if (node.name is 'tbody' or node.name is 'thead' or node.name is 'tfoot') and node.namespace is NS_HTML
791 ins_mode = ins_mode_in_table_body
793 # 8. If node is a caption element, then switch the insertion mode
794 # to "in caption" and abort these steps.
795 if node.name is 'caption' and node.namespace is NS_HTML
796 ins_mode = ins_mode_in_caption
798 # 9. If node is a colgroup element, then switch the insertion mode
799 # to "in column group" and abort these steps.
800 if node.name is 'colgroup' and node.namespace is NS_HTML
801 ins_mode = ins_mode_in_column_group
803 # 10. If node is a table element, then switch the insertion mode to
804 # "in table" and abort these steps.
805 if node.name is 'table' and node.namespace is NS_HTML
806 ins_mode = ins_mode_in_table
808 # 11. If node is a template element, then switch the insertion mode
809 # to the current template insertion mode and abort these steps.
810 if node.name is 'template' and node.namespace is NS_HTML
811 ins_mode = template_ins_modes[0]
813 # 12. If node is a head element and last is true, then switch the
814 # insertion mode to "in body" ("in body"! not "in head"!) and abort
815 # these steps. (fragment case)
816 if node.name is 'head' and node.namespace is NS_HTML and last
817 ins_mode = ins_mode_in_body
819 # 13. If node is a head element and last is false, then switch the
820 # insertion mode to "in head" and abort these steps.
821 if node.name is 'head' and node.namespace is NS_HTML and last is false
822 ins_mode = ins_mode_in_head
824 # 14. If node is a body element, then switch the insertion mode to
825 # "in body" and abort these steps.
826 if node.name is 'body' and node.namespace is NS_HTML
827 ins_mode = ins_mode_in_body
829 # 15. If node is a frameset element, then switch the insertion mode
830 # to "in frameset" and abort these steps. (fragment case)
831 if node.name is 'frameset' and node.namespace is NS_HTML
832 ins_mode = ins_mode_in_frameset
834 # 16. If node is an html element, run these substeps:
835 if node.name is 'html' and node.namespace is NS_HTML
836 # 1. If the head element pointer is null, switch the insertion
837 # mode to "before head" and abort these steps. (fragment case)
838 if head_element_pointer is null
839 ins_mode = ins_mode_before_head
841 # 2. Otherwise, the head element pointer is not null,
842 # switch the insertion mode to "after head" and abort these
844 ins_mode = ins_mode_after_head
846 # 17. If last is true, then switch the insertion mode to "in body"
847 # and abort these steps. (fragment case)
849 ins_mode = ins_mode_in_body
851 # 18. Let node now be the node before node in the stack of open
854 node = open_els[node_i]
855 # 19. Return to the step labeled loop.
859 # http://www.w3.org/TR/html5/syntax.html#adjusted-current-node
860 adjusted_current_node = ->
861 if open_els.length is 1 and flag_fragment_parsing
862 return context_element
865 # http://www.w3.org/TR/html5/syntax.html#reconstruct-the-active-formatting-elements
866 # this implementation is structured (mostly) as described at the link above.
867 # capitalized comments are the "labels" described at the link above.
869 return if afe.length is 0
870 if afe[0].type is TYPE_AFE_MARKER or afe[0] in open_els
875 if i is afe.length - 1
878 if afe[i].type is TYPE_AFE_MARKER or afe[i] in open_els
883 el = insert_html_element afe[i].token
888 # http://www.w3.org/TR/html5/syntax.html#adoption-agency-algorithm
889 # adoption agency algorithm
891 # http://www.w3.org/TR/html5/syntax.html#misnested-tags:-b-i-/b-/i
892 # http://www.w3.org/TR/html5/syntax.html#misnested-tags:-b-p-/b-/p
893 # http://www.w3.org/TR/html5/syntax.html#unclosed-formatting-elements
894 adoption_agency = (subject) ->
895 debug_log "adoption_agency()"
896 debug_log "tree: #{serialize_els doc.children, false, true}"
897 debug_log "open_els: #{serialize_els open_els, true, true}"
898 debug_log "afe: #{serialize_els afe, true, true}"
899 # this block implements tha W3C spec
900 # # 1. If the current node is an HTML element whose tag name is subject,
901 # # then run these substeps:
903 # # 1. Let element be the current node.
905 # # 2. Pop element off the stack of open elements.
907 # # 3. If element is also in the list of active formatting elements,
908 # # remove the element from the list.
910 # # 4. Abort the adoption agency algorithm.
911 # if open_els[0].name is subject and open_els[0].namespace is NS_HTML
912 # el = open_els.shift()
913 # # remove it from the list of active formatting elements (if found)
918 # debug_log "aaa: starting off with subject on top of stack, exiting"
920 # WHATWG: https://html.spec.whatwg.org/multipage/syntax.html#adoption-agency-algorithm
921 # If the current node is an HTML element whose tag name is subject, and
922 # the current node is not in the list of active formatting elements,
923 # then pop the current node off the stack of open elements, and abort
925 if open_els[0].name is subject and open_els[0].namespace is NS_HTML
926 debug_log "aaa: starting off with subject on top of stack, exiting"
927 # remove it from the list of active formatting elements (if found)
934 debug_log "aaa: ...and not in afe, aaa done"
944 # 5. Let formatting element be the last element in the list of
945 # active formatting elements that: is between the end of the list
946 # and the last scope marker in the list, if any, or the start of
947 # the list otherwise, and has the tag name subject.
949 for t, fe_of_afe in afe
950 if t.type is TYPE_AFE_MARKER
955 # If there is no such element, then abort these steps and instead
956 # act as described in the "any other end tag" entry above.
958 debug_log "aaa: fe not found in afe"
959 in_body_any_other_end_tag subject
961 # 6. If formatting element is not in the stack of open elements,
962 # then this is a parse error; remove the element from the list, and
965 for t, fe_of_open_els in open_els
970 debug_log "aaa: fe not found in open_els"
972 # "remove it from the list" must mean afe, since it's not in open_els
973 afe.splice fe_of_afe, 1
975 # 7. If formatting element is in the stack of open elements, but
976 # the element is not in scope, then this is a parse error; abort
978 unless el_is_in_scope fe
979 debug_log "aaa: fe not in scope"
982 # 8. If formatting element is not the current node, this is a parse
983 # error. (But do not abort these steps.)
984 unless open_els[0] is fe
987 # 9. Let furthest block be the topmost node in the stack of open
988 # elements that is lower in the stack than formatting element, and
989 # is an element in the special category. There might not be one.
991 fb_of_open_els = null
998 # and continue, to see if there's one that's more "topmost"
999 # 10. If there is no furthest block, then the UA must first pop all
1000 # the nodes from the bottom of the stack of open elements, from the
1001 # current node up to and including formatting element, then remove
1002 # formatting element from the list of active formatting elements,
1003 # and finally abort these steps.
1005 debug_log "aaa: no fb"
1007 t = open_els.shift()
1009 afe.splice fe_of_afe, 1
1011 # 11. Let common ancestor be the element immediately above
1012 # formatting element in the stack of open elements.
1013 ca = open_els[fe_of_open_els + 1] # common ancestor
1015 node_above = open_els[fb_of_open_els + 1] # next node if node isn't in open_els anymore
1016 # 12. Let a bookmark note the position of formatting element in the list of active formatting elements relative to the elements on either side of it in the list.
1017 bookmark = new_aaa_bookmark()
1020 afe.splice i, 0, bookmark
1022 node = last_node = fb
1026 # 3. Let node be the element immediately above node in the
1027 # stack of open elements, or if node is no longer in the stack
1028 # of open elements (e.g. because it got removed by this
1029 # algorithm), the element that was immediately above node in
1030 # the stack of open elements before node was removed.
1032 for t, i in open_els
1034 node_next = open_els[i + 1]
1036 node = node_next ? node_above
1037 debug_log "inner loop #{inner}"
1038 debug_log "tree: #{serialize_els doc.children, false, true}"
1039 debug_log "open_els: #{serialize_els open_els, true, true}"
1040 debug_log "afe: #{serialize_els afe, true, true}"
1041 debug_log "ca: #{ca.name}##{ca.id} children: #{serialize_els ca.children, true, true}"
1042 debug_log "fe: #{fe.name}##{fe.id} children: #{serialize_els fe.children, true, true}"
1043 debug_log "fb: #{fb.name}##{fb.id} children: #{serialize_els fb.children, true, true}"
1044 debug_log "node: #{node.serialize true, true}"
1045 # TODO make sure node_above gets re-set if/when node is removed from open_els
1047 # 4. If node is formatting element, then go to the next step in
1048 # the overall algorithm.
1051 debug_log "the meat"
1052 # 5. If inner loop counter is greater than three and node is in
1053 # the list of active formatting elements, then remove node from
1054 # the list of active formatting elements.
1060 debug_log "max out inner"
1065 # 6. If node is not in the list of active formatting elements,
1066 # then remove node from the stack of open elements and then go
1067 # back to the step labeled inner loop.
1069 debug_log "not in afe"
1070 for t, i in open_els
1072 node_above = open_els[i + 1]
1073 open_els.splice i, 1
1076 debug_log "the bones"
1077 # 7. create an element for the token for which the element node
1078 # was created, in the HTML namespace, with common ancestor as
1079 # the intended parent; replace the entry for node in the list
1080 # of active formatting elements with an entry for the new
1081 # element, replace the entry for node in the stack of open
1082 # elements with an entry for the new element, and let node be
1084 new_node = token_to_element node.token, NS_HTML, ca
1088 debug_log "replaced in afe"
1090 for t, i in open_els
1092 node_above = open_els[i + 1]
1093 open_els[i] = new_node
1094 debug_log "replaced in open_els"
1097 # 8. If last node is furthest block, then move the
1098 # aforementioned bookmark to be immediately after the new node
1099 # in the list of active formatting elements.
1104 debug_log "removed bookmark"
1108 # "after" means lower
1109 afe.splice i, 0, bookmark # "after as <-
1110 debug_log "placed bookmark after node"
1111 debug_log "node: #{node.id} afe: #{serialize_els afe, true, true}"
1113 # 9. Insert last node into node, first removing it from its
1114 # previous parent node if any.
1115 if last_node.parent?
1116 debug_log "last_node has parent"
1117 for c, i in last_node.parent.children
1119 debug_log "removing last_node from parent"
1120 last_node.parent.children.splice i, 1
1122 node.children.push last_node
1123 last_node.parent = node
1124 # 10. Let last node be node.
1127 # 11. Return to the step labeled inner loop.
1128 # 14. Insert whatever last node ended up being in the previous step
1129 # at the appropriate place for inserting a node, but using common
1130 # ancestor as the override target.
1132 # In the case where fe is immediately followed by fb:
1133 # * inner loop exits out early (node==fe)
1135 # * last_node is still in the tree (not a duplicate)
1136 if last_node.parent?
1137 debug_log "FEFIRST? last_node has parent"
1138 for c, i in last_node.parent.children
1140 debug_log "removing last_node from parent"
1141 last_node.parent.children.splice i, 1
1144 debug_log "after aaa inner loop"
1145 debug_log "ca: #{ca.name}##{ca.id} children: #{serialize_els ca.children, true, true}"
1146 debug_log "fe: #{fe.name}##{fe.id} children: #{serialize_els fe.children, true, true}"
1147 debug_log "fb: #{fb.name}##{fb.id} children: #{serialize_els fb.children, true, true}"
1148 debug_log "last_node: #{last_node.name}##{last_node.id} children: #{serialize_els last_node.children, true, true}"
1149 debug_log "tree: #{serialize_els doc.children, false, true}"
1154 # can't use standard insert token thing, because it's already in
1155 # open_els and must stay at it's current position in open_els
1156 dest = adjusted_insertion_location ca
1157 dest[0].children.splice dest[1], 0, last_node
1158 last_node.parent = dest[0]
1161 debug_log "ca: #{ca.name}##{ca.id} children: #{serialize_els ca.children, true, true}"
1162 debug_log "fe: #{fe.name}##{fe.id} children: #{serialize_els fe.children, true, true}"
1163 debug_log "fb: #{fb.name}##{fb.id} children: #{serialize_els fb.children, true, true}"
1164 debug_log "last_node: #{last_node.name}##{last_node.id} children: #{serialize_els last_node.children, true, true}"
1165 debug_log "tree: #{serialize_els doc.children, false, true}"
1167 # 15. Create an element for the token for which formatting element
1168 # was created, in the HTML namespace, with furthest block as the
1170 new_element = token_to_element fe.token, NS_HTML, fb
1171 # 16. Take all of the child nodes of furthest block and append them
1172 # to the element created in the last step.
1173 while fb.children.length
1174 t = fb.children.shift()
1175 t.parent = new_element
1176 new_element.children.push t
1177 # 17. Append that new element to furthest block.
1178 new_element.parent = fb
1179 fb.children.push new_element
1180 # 18. Remove formatting element from the list of active formatting
1181 # elements, and insert the new element into the list of active
1182 # formatting elements at the position of the aforementioned
1190 afe[i] = new_element
1192 # 19. Remove formatting element from the stack of open elements,
1193 # and insert the new element into the stack of open elements
1194 # immediately below the position of furthest block in that stack.
1195 for t, i in open_els
1197 open_els.splice i, 1
1199 for t, i in open_els
1201 open_els.splice i, 0, new_element
1203 # 20. Jump back to the step labeled outer loop.
1204 debug_log "done wrapping fb's children. new_element: #{new_element.name}##{new_element.id}"
1205 debug_log "tree: #{serialize_els doc.children, false, true}"
1206 debug_log "open_els: #{serialize_els open_els, true, true}"
1207 debug_log "afe: #{serialize_els afe, true, true}"
1208 debug_log "AAA DONE"
1210 # http://www.w3.org/TR/html5/syntax.html#close-a-p-element
1211 close_p_element = ->
1212 generate_implied_end_tags 'p' # arg is exception
1213 unless open_els[0].name is 'p' and open_els[0].namespace is NS_HTML
1215 while open_els.length > 1 # just in case
1216 el = open_els.shift()
1217 if el.name is 'p' and el.namespace is NS_HTML
1219 close_p_if_in_button_scope = ->
1220 if is_in_button_scope 'p', NS_HTML
1223 # http://www.w3.org/TR/html5/syntax.html#insert-a-character
1224 # aka insert_a_character = (t) ->
1225 insert_character = (t) ->
1226 dest = adjusted_insertion_location()
1227 # fixfull check for Document node
1229 prev = dest[0].children[dest[1] - 1]
1230 if prev.type is TYPE_TEXT
1233 dest[0].children.splice dest[1], 0, t
1236 # 8.2.5 http://www.w3.org/TR/html5/syntax.html#tree-construction
1237 process_token = (t) ->
1238 acn = adjusted_current_node()
1242 if acn.namespace is NS_HTML
1245 if is_mathml_text_integration_point(acn)
1246 if t.type is TYPE_START_TAG and not (t.name is 'mglyph' or t.name is 'malignmark')
1249 if t.type is TYPE_TEXT
1252 if acn.namespace is NS_MATHML and acn.name is 'annotation-xml' and t.type is TYPE_START_TAG and t.name is 'svg'
1255 if is_html_integration acn
1256 if t.type is TYPE_START_TAG or t.type is TYPE_TEXT
1259 if t.type is TYPE_EOF
1262 in_foreign_content t
1266 # http://www.w3.org/TR/html5/syntax.html#creating-and-inserting-nodes
1267 # http://www.w3.org/TR/html5/syntax.html#appropriate-place-for-inserting-a-node
1268 adjusted_insertion_location = (override_target = null) ->
1269 # 1. If there was an override target specified, then let target be the
1272 target = override_target
1273 else # Otherwise, let target be the current node.
1274 target = open_els[0]
1275 # 2. Determine the adjusted insertion location using the first matching
1276 # steps from the following list:
1278 # If foster parenting is enabled and target is a table, tbody, tfoot,
1279 # thead, or tr element Foster parenting happens when content is
1280 # misnested in tables.
1281 if flag_foster_parenting and foster_parenting_targets[target.name] is target.namespace
1282 loop # once. this is here so we can ``break`` to "abort these substeps"
1283 # 1. Let last template be the last template element in the
1284 # stack of open elements, if any.
1285 last_template = null
1286 last_template_i = null
1287 for el, i in open_els
1288 if el.name is 'template' and el.namespace is NS_HTML
1292 # 2. Let last table be the last table element in the stack of
1293 # open elements, if any.
1296 for el, i in open_els
1297 if el.name is 'table' and el.namespace is NS_HTML
1301 # 3. If there is a last template and either there is no last
1302 # table, or there is one, but last template is lower (more
1303 # recently added) than last table in the stack of open
1304 # elements, then: let adjusted insertion location be inside
1305 # last template's template contents, after its last child (if
1306 # any), and abort these substeps.
1307 if last_template and (last_table is null or last_template_i < last_table_i)
1308 target = last_template # fixfull should be it's contents
1309 target_i = target.children.length
1311 # 4. If there is no last table, then let adjusted insertion
1312 # location be inside the first element in the stack of open
1313 # elements (the html element), after its last child (if any),
1314 # and abort these substeps. (fragment case)
1315 if last_table is null
1317 target = open_els[open_els.length - 1]
1318 target_i = target.children.length
1320 # 5. If last table has a parent element, then let adjusted
1321 # insertion location be inside last table's parent element,
1322 # immediately before last table, and abort these substeps.
1323 if last_table.parent?
1324 for c, i in last_table.parent.children
1326 target = last_table.parent
1330 # 6. Let previous element be the element immediately above last
1331 # table in the stack of open elements.
1333 # huh? how could it not have a parent?
1334 previous_element = open_els[last_table_i + 1]
1335 # 7. Let adjusted insertion location be inside previous
1336 # element, after its last child (if any).
1337 target = previous_element
1338 target_i = target.children.length
1339 # Note: These steps are involved in part because it's possible
1340 # for elements, the table element in this case in particular,
1341 # to have been moved by a script around in the DOM, or indeed
1342 # removed from the DOM entirely, after the element was inserted
1344 break # don't really loop
1346 # Otherwise Let adjusted insertion location be inside target, after
1347 # its last child (if any).
1348 target_i = target.children.length
1350 # 3. If the adjusted insertion location is inside a template element,
1351 # let it instead be inside the template element's template contents,
1352 # after its last child (if any).
1353 # fixfull (template)
1355 # 4. Return the adjusted insertion location.
1356 return [target, target_i]
1358 # http://www.w3.org/TR/html5/syntax.html#create-an-element-for-the-token
1359 # aka create_an_element_for_token
1360 token_to_element = (t, namespace, intended_parent) ->
1361 # convert attributes into a hash
1364 attrs[a[0]] = a[1] # TODO check what to do with dupilcate attrs
1365 el = new Node TYPE_TAG, name: t.name, namespace: namespace, attrs: attrs, token: t
1367 # TODO 2. If the newly created element has an xmlns attribute in the
1368 # XMLNS namespace whose value is not exactly the same as the element's
1369 # namespace, that is a parse error. Similarly, if the newly created
1370 # element has an xmlns:xlink attribute in the XMLNS namespace whose
1371 # value is not the XLink Namespace, that is a parse error.
1373 # fixfull: the spec says stuff about form pointers and ownerDocument
1377 # http://www.w3.org/TR/html5/syntax.html#insert-a-foreign-element
1378 insert_foreign_element = (token, namespace) ->
1379 ail = adjusted_insertion_location()
1382 el = token_to_element token, namespace, ail_el
1383 # TODO skip this next step if it's broken (eg ail_el is document with child already)
1385 ail_el.children.splice ail_i, 0, el
1388 # http://www.w3.org/TR/html5/syntax.html#insert-an-html-element
1389 insert_html_element = (token) ->
1390 insert_foreign_element token, NS_HTML
1392 # http://www.w3.org/TR/html5/syntax.html#insert-a-comment
1393 # position should be [node, index_within_children]
1394 insert_comment = (t, position = null) ->
1395 position ?= adjusted_insertion_location()
1396 position[0].children.splice position[1], 0, t
1399 # http://www.w3.org/TR/html5/syntax.html#generic-raw-text-element-parsing-algorithm
1400 parse_generic_raw_text = (t) ->
1401 insert_html_element t
1402 tok_state = tok_state_rawtext
1403 original_ins_mode = ins_mode
1404 ins_mode = ins_mode_text
1405 parse_generic_rcdata_text = (t) ->
1406 insert_html_element t
1407 tok_state = tok_state_rcdata
1408 original_ins_mode = ins_mode
1409 ins_mode = ins_mode_text
1411 # 8.2.5.3 http://www.w3.org/TR/html5/syntax.html#closing-elements-that-have-implied-end-tags
1412 # http://www.w3.org/TR/html5/syntax.html#generate-implied-end-tags
1413 generate_implied_end_tags = (except = null) ->
1414 while end_tag_implied[open_els[0].name] is open_els[0].namespace and open_els[0].name isnt except
1417 # 8.2.5.4 The rules for parsing tokens in HTML content
1418 # http://www.w3.org/TR/html5/syntax.html#parsing-main-inhtml
1420 # 8.2.5.4.1 The "initial" insertion mode
1421 # http://www.w3.org/TR/html5/syntax.html#the-initial-insertion-mode
1422 ins_mode_initial = (t) ->
1425 if t.type is TYPE_COMMENT
1429 if t.type is TYPE_DOCTYPE
1430 # FIXME check identifiers, set quirks, etc
1433 ins_mode = ins_mode_before_html
1436 #fixfull (iframe, quirks)
1437 ins_mode = ins_mode_before_html
1441 # 8.2.5.4.2 http://www.w3.org/TR/html5/syntax.html#the-before-html-insertion-mode
1442 ins_mode_before_html = (t) ->
1443 if t.type is TYPE_DOCTYPE
1446 if t.type is TYPE_COMMENT
1451 if t.type is TYPE_START_TAG and t.name is 'html'
1452 el = token_to_element t, NS_HTML, doc
1453 doc.children.push el
1454 open_els.unshift(el)
1455 # fixfull (big paragraph in spec about manifest, fragment, urls, etc)
1456 ins_mode = ins_mode_before_head
1458 if t.type is TYPE_END_TAG
1459 if t.name is 'head' or t.name is 'body' or t.name is 'html' or t.name is 'br'
1460 # fall through to "anything else"
1465 html_tok = new_open_tag 'html'
1466 el = token_to_element html_tok, NS_HTML, doc
1467 doc.children.push el
1469 # ?fixfull browsing context
1470 ins_mode = ins_mode_before_head
1474 # 8.2.5.4.3 http://www.w3.org/TR/html5/syntax.html#the-before-head-insertion-mode
1475 ins_mode_before_head = (t) ->
1478 if t.type is TYPE_COMMENT
1481 if t.type is TYPE_DOCTYPE
1484 if t.type is TYPE_START_TAG and t.name is 'html'
1487 if t.type is TYPE_START_TAG and t.name is 'head'
1488 el = insert_html_element t
1489 head_element_pointer = el
1490 ins_mode = ins_mode_in_head
1492 if t.type is TYPE_END_TAG
1493 if t.name is 'head' or t.name is 'body' or t.name is 'html' or t.name is 'br'
1494 # fall through to Anything else below
1499 head_tok = new_open_tag 'head'
1500 el = insert_html_element head_tok
1501 head_element_pointer = el
1502 ins_mode = ins_mode_in_head
1505 # 8.2.5.4.4 http://www.w3.org/TR/html5/syntax.html#parsing-main-inhead
1506 ins_mode_in_head_else = (t) -> # factored out for same-as-spec flow control
1507 open_els.shift() # spec says this will be a 'head' node
1508 ins_mode = ins_mode_after_head
1510 ins_mode_in_head = (t) ->
1511 if t.type is TYPE_TEXT and (t.text is "\t" or t.text is "\n" or t.text is "\u000c" or t.text is ' ')
1514 if t.type is TYPE_COMMENT
1517 if t.type is TYPE_DOCTYPE
1520 if t.type is TYPE_START_TAG and t.name is 'html'
1523 if t.type is TYPE_START_TAG and (t.name is 'base' or t.name is 'basefont' or t.name is 'bgsound' or t.name is 'link')
1524 el = insert_html_element t
1526 t.acknowledge_self_closing()
1528 if t.type is TYPE_START_TAG and t.name is 'meta'
1529 el = insert_html_element t
1531 t.acknowledge_self_closing()
1532 # fixfull encoding stuff
1534 if t.type is TYPE_START_TAG and t.name is 'title'
1535 parse_generic_rcdata_text t
1537 if t.type is TYPE_START_TAG and ((t.name is 'noscript' and flag_scripting) or t.name is 'noframes' or t.name is 'style')
1538 parse_generic_raw_text t
1540 if t.type is TYPE_START_TAG and t.name is 'noscript' and flag_scripting is false
1541 insert_html_element t
1542 ins_mode = ins_mode_in_head_noscript
1544 if t.type is TYPE_START_TAG and t.name is 'script'
1545 ail = adjusted_insertion_location()
1546 el = token_to_element t, NS_HTML, ail
1547 el.flag 'parser-inserted', true
1548 # fixfull frament case
1549 ail[0].children.splice ail[1], 0, el
1551 tok_state = tok_state_script_data
1552 original_ins_mode = ins_mode # make sure orig... is defined
1553 ins_mode = ins_mode_text
1555 if t.type is TYPE_END_TAG and t.name is 'head'
1556 open_els.shift() # will be a head element... spec says so
1557 ins_mode = ins_mode_after_head
1559 if t.type is TYPE_END_TAG and (t.name is 'body' or t.name is 'html' or t.name is 'br')
1560 ins_mode_in_head_else t
1562 if t.type is TYPE_START_TAG and t.name is 'template'
1563 insert_html_element t
1565 flag_frameset_ok = false
1566 ins_mode = ins_mode_in_template
1567 template_ins_modes.unshift ins_mode_in_template
1569 if t.type is TYPE_END_TAG and t.name is 'template'
1570 if template_tag_is_open()
1571 generate_implied_end_tags
1572 if open_els[0].name isnt 'template'
1575 el = open_els.shift()
1576 if el.name is 'template' and el.namespace is NS_HTML
1578 clear_afe_to_marker()
1579 template_ins_modes.shift()
1584 if (t.type is TYPE_START_TAG and t.name is 'head') or t.type is TYPE_END_TAG
1587 ins_mode_in_head_else t
1589 # 8.2.5.4.5 http://www.w3.org/TR/html5/syntax.html#parsing-main-inheadnoscript
1590 ins_mode_in_head_noscript_else = (t) ->
1593 ins_mode = ins_mode_in_head
1595 ins_mode_in_head_noscript = (t) ->
1596 if t.type is TYPE_DOCTYPE
1599 if t.type is TYPE_START_TAG and t.name is 'html'
1602 if t.type is TYPE_END_TAG and t.name is 'noscript'
1604 ins_mode = ins_mode_in_head
1606 if is_space_tok(t) or t.type is TYPE_COMMENT or (t.type is TYPE_START_TAG and (t.name is 'basefont' or t.name is 'bgsound' or t.name is 'link' or t.name is 'meta' or t.name is 'noframes' or t.name is 'style'))
1609 if t.type is TYPE_END_TAG and t.name is 'br'
1610 ins_mode_in_head_noscript_else t
1612 if (t.type is TYPE_START_TAG and (t.name is 'head' or t.name is 'noscript')) or t.type is TYPE_END_TAG
1616 ins_mode_in_head_noscript_else t
1621 # 8.2.5.4.6 http://www.w3.org/TR/html5/syntax.html#the-after-head-insertion-mode
1622 ins_mode_after_head_else = (t) ->
1623 body_tok = new_open_tag 'body'
1624 insert_html_element body_tok
1625 ins_mode = ins_mode_in_body
1628 ins_mode_after_head = (t) ->
1632 if t.type is TYPE_COMMENT
1635 if t.type is TYPE_DOCTYPE
1638 if t.type is TYPE_START_TAG and t.name is 'html'
1641 if t.type is TYPE_START_TAG and t.name is 'body'
1642 insert_html_element t
1643 flag_frameset_ok = false
1644 ins_mode = ins_mode_in_body
1646 if t.type is TYPE_START_TAG and t.name is 'frameset'
1647 insert_html_element t
1648 ins_mode = ins_mode_in_frameset
1650 if t.type is TYPE_START_TAG and (t.name is 'base' or t.name is 'basefont' or t.name is 'bgsound' or t.name is 'link' or t.name is 'meta' or t.name is 'noframes' or t.name is 'script' or t.name is 'style' or t.name is 'template' or t.name is 'title')
1652 open_els.unshift head_element_pointer
1654 for el, i in open_els
1655 if el is head_element_pointer
1656 open_els.splice i, 1
1658 console.log "warning: 23904 couldn't find head element in open_els"
1660 if t.type is TYPE_END_TAG and t.name is 'template'
1663 if t.type is TYPE_END_TAG and (t.name is 'body' or t.name is 'html' or t.name is 'br')
1664 ins_mode_after_head_else t
1666 if (t.type is TYPE_START_TAG and t.name is 'head') or t.type is TYPE_END_TAG
1670 ins_mode_after_head_else t
1672 # 8.2.5.4.7 http://www.w3.org/TR/html5/syntax.html#parsing-main-inbody
1673 in_body_any_other_end_tag = (name) -> # factored out because adoption agency calls it
1674 for el, i in open_els
1675 if el.name is name and el.namespace is NS_HTML
1676 generate_implied_end_tags name # arg is exception
1677 parse_error() unless i is 0
1682 if special_elements[el.name] is el.namespace
1686 ins_mode_in_body = (t) ->
1687 if t.type is TYPE_TEXT and t.text is "\u0000"
1694 if t.type is TYPE_TEXT
1697 flag_frameset_ok = false
1699 if t.type is TYPE_COMMENT
1702 if t.type is TYPE_DOCTYPE
1705 if t.type is TYPE_START_TAG and t.name is 'html'
1707 return if template_tag_is_open()
1708 root_attrs = open_els[open_els.length - 1].attrs
1710 root_attrs[a[0]] = a[1] unless root_attrs[a[0]]?
1713 if (t.type is TYPE_START_TAG and (t.name is 'base' or t.name is 'basefont' or t.name is 'bgsound' or t.name is 'link' or t.name is 'meta' or t.name is 'noframes' or t.name is 'script' or t.name is 'style' or t.name is 'template' or t.name is 'title')) or (t.type is TYPE_END_TAG and t.name is 'template')
1716 if t.type is TYPE_START_TAG and t.name is 'body'
1718 return if open_els.length < 2
1719 second = open_els[open_els.length - 2]
1720 return unless second.namespace is NS_HTML
1721 return unless second.name is 'body'
1722 return if template_tag_is_open()
1723 flag_frameset_ok = false
1725 second.attrs[a[0]] = a[1] unless second.attrs[a[0]]?
1727 if t.type is TYPE_START_TAG and t.name is 'frameset'
1729 return if open_els.length < 2
1730 second_i = open_els.length - 2
1731 second = open_els[second_i]
1732 return unless second.namespace is NS_HTML
1733 return unless second.name is 'body'
1734 if flag_frameset_ok is false
1737 for el, i in second.parent.children
1739 second.parent.children.splice i, 1
1741 open_els.splice second_i, 1
1742 # pop everything except the "root html element"
1743 while open_els.length > 1
1745 insert_html_element t
1746 ins_mode = ins_mode_in_frameset
1748 if t.type is TYPE_EOF
1750 dd:NS_HTML, dt:NS_HTML, li:NS_HTML, p:NS_HTML, tbody:NS_HTML,
1751 td:NS_HTML, tfoot:NS_HTML, th:NS_HTML, thead:NS_HTML,
1752 tr:NS_HTML, body:NS_HTML, html:NS_HTML,
1755 unless ok_tags[t.name] is el.namespace
1758 if template_ins_modes.length > 0
1759 ins_mode_in_template t
1763 if t.type is TYPE_END_TAG and t.name is 'body'
1764 unless is_in_scope 'body', NS_HTML
1768 dd:NS_HTML, dt:NS_HTML, li:NS_HTML, optgroup:NS_HTML,
1769 option:NS_HTML, p:NS_HTML, rb:NS_HTML, rp:NS_HTML, rt:NS_HTML,
1770 rtc:NS_HTML, tbody:NS_HTML, td:NS_HTML, tfoot:NS_HTML,
1771 th:NS_HTML, thead:NS_HTML, tr:NS_HTML, body:NS_HTML,
1775 unless ok_tags[t.name] is el.namespace
1778 ins_mode = ins_mode_after_body
1780 if t.type is TYPE_END_TAG and t.name is 'html'
1781 unless is_in_scope 'body', NS_HTML
1785 dd:NS_HTML, dt:NS_HTML, li:NS_HTML, optgroup:NS_HTML,
1786 option:NS_HTML, p:NS_HTML, rb:NS_HTML, rp:NS_HTML, rt:NS_HTML,
1787 rtc:NS_HTML, tbody:NS_HTML, td:NS_HTML, tfoot:NS_HTML,
1788 th:NS_HTML, thead:NS_HTML, tr:NS_HTML, body:NS_HTML,
1792 unless ok_tags[t.name] is el.namespace
1795 ins_mode = ins_mode_after_body
1798 if t.type is TYPE_START_TAG and (t.name is 'address' or t.name is 'article' or t.name is 'aside' or t.name is 'blockquote' or t.name is 'center' or t.name is 'details' or t.name is 'dialog' or t.name is 'dir' or t.name is 'div' or t.name is 'dl' or t.name is 'fieldset' or t.name is 'figcaption' or t.name is 'figure' or t.name is 'footer' or t.name is 'header' or t.name is 'hgroup' or t.name is 'main' or t.name is 'nav' or t.name is 'ol' or t.name is 'p' or t.name is 'section' or t.name is 'summary' or t.name is 'ul')
1799 close_p_if_in_button_scope()
1800 insert_html_element t
1802 if t.type is TYPE_START_TAG and h_tags[t.name]?
1803 close_p_if_in_button_scope()
1804 if h_tags[open_els[0].name] is open_els[0].namespace
1807 insert_html_element t
1809 if t.type is TYPE_START_TAG and (t.name is 'pre' or t.name is 'listing')
1810 close_p_if_in_button_scope()
1811 insert_html_element t
1812 # spec: If the next token is a "LF" (U+000A) character token, then
1813 # ignore that token and move on to the next one. (Newlines at the
1814 # start of pre blocks are ignored as an authoring convenience.)
1815 if txt.charAt(cur) is "\u000a" # FIXME check for crlf?
1817 flag_frameset_ok = false
1819 if t.type is TYPE_START_TAG and t.name is 'form'
1820 unless form_element_pointer is null or template_tag_is_open()
1823 close_p_if_in_button_scope()
1824 el = insert_html_element t
1825 unless template_tag_is_open()
1826 form_element_pointer = el
1828 if t.type is TYPE_START_TAG and t.name is 'li'
1829 flag_frameset_ok = false
1830 for node in open_els
1831 if node.name is 'li' and node.namespace is NS_HTML
1832 generate_implied_end_tags 'li' # arg is exception
1833 if open_els[0].name isnt 'li' or open_els[0].namespace isnt NS_HTML
1836 el = open_els.shift()
1837 if el.name is 'li' and el.namespace is NS_HTML
1840 if el_is_special_not_adp node
1842 close_p_if_in_button_scope()
1843 insert_html_element t
1845 if t.type is TYPE_START_TAG and (t.name is 'dd' or t.name is 'dt')
1846 flag_frameset_ok = false
1847 for node in open_els
1848 if node.name is 'dd' and node.namespace is NS_HTML
1849 generate_implied_end_tags 'dd' # arg is exception
1850 if open_els[0].name isnt 'dd' or open_els[0].namespace isnt NS_HTML
1853 el = open_els.shift()
1854 if el.name is 'dd' and el.namespace is NS_HTML
1857 if node.name is 'dt' and node.namespace is NS_HTML
1858 generate_implied_end_tags 'dt' # arg is exception
1859 if open_els[0].name isnt 'dt' or open_els[0].namespace isnt NS_HTML
1862 el = open_els.shift()
1863 if el.name is 'dt' and el.namespace is NS_HTML
1866 if el_is_special_not_adp node
1868 close_p_if_in_button_scope()
1869 insert_html_element t
1871 if t.type is TYPE_START_TAG and t.name is 'plaintext'
1872 close_p_if_in_button_scope()
1873 insert_html_element t
1874 tok_state = tok_state_plaintext
1876 if t.type is TYPE_START_TAG and t.name is 'button'
1877 if is_in_scope 'button', NS_HTML
1879 generate_implied_end_tags()
1881 el = open_els.shift()
1882 if el.name is 'button' and el.namespace is NS_HTML
1885 insert_html_element t
1886 flag_frameset_ok = false
1888 if t.type is TYPE_END_TAG and (t.name is 'address' or t.name is 'article' or t.name is 'aside' or t.name is 'blockquote' or t.name is 'button' or t.name is 'center' or t.name is 'details' or t.name is 'dialog' or t.name is 'dir' or t.name is 'div' or t.name is 'dl' or t.name is 'fieldset' or t.name is 'figcaption' or t.name is 'figure' or t.name is 'footer' or t.name is 'header' or t.name is 'hgroup' or t.name is 'listing' or t.name is 'main' or t.name is 'nav' or t.name is 'ol' or t.name is 'pre' or t.name is 'section' or t.name is 'summary' or t.name is 'ul')
1889 unless is_in_scope t.name, NS_HTML
1892 generate_implied_end_tags()
1893 unless open_els[0].name is t.name and open_els[0].namespace is NS_HTML
1896 el = open_els.shift()
1897 if el.name is t.name and el.namespace is NS_HTML
1900 if t.type is TYPE_END_TAG and t.name is 'form'
1901 unless template_tag_is_open()
1902 node = form_element_pointer
1903 form_element_pointer = null
1904 if node is null or not el_is_in_scope node
1907 generate_implied_end_tags()
1908 if open_els[0] isnt node
1910 for el, i in open_els
1912 open_els.splice i, 1
1915 unless is_in_scope 'form', NS_HTML
1918 generate_implied_end_tags()
1919 if open_els[0].name isnt 'form' or open_els[0].namespace isnt NS_HTML
1922 el = open_els.shift()
1923 if el.name is 'form' and el.namespace is NS_HTML
1926 if t.type is TYPE_END_TAG and t.name is 'p'
1927 unless is_in_button_scope 'p', NS_HTML
1929 insert_html_element new_open_tag 'p'
1932 if t.type is TYPE_END_TAG and t.name is 'li'
1933 unless is_in_li_scope 'li', NS_HTML
1936 generate_implied_end_tags 'li' # arg is exception
1937 if open_els[0].name isnt 'li' or open_els[0].namespace isnt NS_HTML
1940 el = open_els.shift()
1941 if el.name is 'li' and el.namespace is NS_HTML
1944 if t.type is TYPE_END_TAG and (t.name is 'dd' or t.name is 'dt')
1945 unless is_in_scope t.name, NS_HTML
1948 generate_implied_end_tags t.name # arg is exception
1949 if open_els[0].name isnt t.name or open_els[0].namespace isnt NS_HTML
1952 el = open_els.shift()
1953 if el.name is t.name and el.namespace is NS_HTML
1956 if t.type is TYPE_END_TAG and h_tags[t.name]?
1959 if h_tags[el.name] is el.namespace
1962 if standard_scopers[el.name] is el.namespace
1967 generate_implied_end_tags()
1968 if open_els[0].name isnt t.name or open_els[0].namespace isnt NS_HTML
1971 el = open_els.shift()
1972 if h_tags[el.name] is el.namespace
1976 if t.type is TYPE_START_TAG and t.name is 'a'
1977 # If the list of active formatting elements contains an a element
1978 # between the end of the list and the last marker on the list (or
1979 # the start of the list if there is no marker on the list), then
1980 # this is a parse error; run the adoption agency algorithm for the
1981 # tag name "a", then remove that element from the list of active
1982 # formatting elements and the stack of open elements if the
1983 # adoption agency algorithm didn't already remove it (it might not
1984 # have if the element is not in table scope).
1987 if el.type is TYPE_AFE_MARKER
1989 if el.name is 'a' and el.namespace is NS_HTML
1997 for el, i in open_els
1999 open_els.splice i, 1
2001 el = insert_html_element t
2004 if t.type is TYPE_START_TAG and (t.name is 'b' or t.name is 'big' or t.name is 'code' or t.name is 'em' or t.name is 'font' or t.name is 'i' or t.name is 's' or t.name is 'small' or t.name is 'strike' or t.name is 'strong' or t.name is 'tt' or t.name is 'u')
2006 el = insert_html_element t
2009 if t.type is TYPE_START_TAG and t.name is 'nobr'
2011 el = insert_html_element t
2014 if t.type is TYPE_END_TAG and (t.name is 'a' or t.name is 'b' or t.name is 'big' or t.name is 'code' or t.name is 'em' or t.name is 'font' or t.name is 'i' or t.name is 'nobr' or t.name is 's' or t.name is 'small' or t.name is 'strike' or t.name is 'strong' or t.name is 'tt' or t.name is 'u')
2015 adoption_agency t.name
2017 if t.type is TYPE_START_TAG and (t.name is 'applet' or t.name is 'marquee' or t.name is 'object')
2019 insert_html_element t
2021 flag_frameset_ok = false
2023 if t.type is TYPE_END_TAG and (t.name is 'applet' or t.name is 'marquee' or t.name is 'object')
2024 unless is_in_scope t.name, NS_HTML
2027 generate_implied_end_tags()
2028 if open_els[0].name isnt t.name or open_els[0].namespace isnt NS_HTML
2031 el = open_els.shift()
2032 if el.name is t.name and el.namespace is NS_HTML
2034 clear_afe_to_marker()
2036 if t.type is TYPE_START_TAG and t.name is 'table'
2037 close_p_if_in_button_scope() # fixfull quirksmode thing
2038 insert_html_element t
2039 flag_frameset_ok = false
2040 ins_mode = ins_mode_in_table
2042 if t.type is TYPE_END_TAG and t.name is 'br'
2044 t.type is TYPE_START_TAG
2046 if t.type is TYPE_START_TAG and (t.name is 'area' or t.name is 'br' or t.name is 'embed' or t.name is 'img' or t.name is 'keygen' or t.name is 'wbr')
2048 insert_html_element t
2050 t.acknowledge_self_closing()
2051 flag_frameset_ok = false
2053 if t.type is TYPE_START_TAG and t.name is 'input'
2055 insert_html_element t
2057 t.acknowledge_self_closing()
2058 unless is_input_hidden_tok t
2059 flag_frameset_ok = false
2061 if t.type is TYPE_START_TAG and (t.name is 'param' or t.name is 'source' or t.name is 'track')
2062 insert_html_element t
2064 t.acknowledge_self_closing()
2066 if t.type is TYPE_START_TAG and t.name is 'hr'
2067 close_p_if_in_button_scope()
2068 insert_html_element t
2070 t.acknowledge_self_closing()
2071 flag_frameset_ok = false
2073 if t.type is TYPE_START_TAG and t.name is 'image'
2078 if t.type is TYPE_START_TAG and t.name is 'isindex'
2080 if template_tag_is_open() is false and form_element_pointer isnt null
2082 t.acknowledge_self_closing()
2083 flag_frameset_ok = false
2084 close_p_if_in_button_scope()
2085 el = insert_html_element new_open_tag 'form'
2086 unless template_tag_is_open()
2087 form_element_pointer = el
2090 el.attrs['action'] = a[1]
2092 insert_html_element new_open_tag 'hr'
2095 insert_html_element new_open_tag 'label'
2096 # note: this is a little out-of-spec-order so we only have to scan t.attrs_a once
2097 input_el = new_open_tag 'input'
2102 if a[0] isnt 'name' and a[0] isnt 'action' and a[0] isnt 'prompt'
2103 input_el.attrs_a.push [a[0], a[1]]
2104 input_el.attrs_a.push ['name', 'isindex']
2105 # fixfull this next bit is in english... internationalize?
2106 prompt ?= "This is a searchable index. Enter search keywords: "
2107 insert_character new_character_token prompt # fixfull split
2108 # TODO submit typo "balue" in spec
2109 insert_html_element input_el
2111 # insert_character '' # you can put chars here if promt attr missing
2113 insert_html_element new_open_tag 'hr'
2116 unless template_tag_is_open()
2117 form_element_pointer = null
2119 if t.type is TYPE_START_TAG and t.name is 'textarea'
2120 insert_html_element t
2121 if txt.charAt(cur) is "\u000a" # FIXME check for crlf?
2123 tok_state = tok_state_rcdata
2124 original_ins_mode = ins_mode
2125 flag_frameset_ok = false
2126 ins_mode = ins_mode_text
2128 if t.type is TYPE_START_TAG and t.name is 'xmp'
2129 close_p_if_in_button_scope()
2131 flag_frameset_ok = false
2132 parse_generic_raw_text t
2134 if t.type is TYPE_START_TAG and t.name is 'iframe'
2135 flag_frameset_ok = false
2136 parse_generic_raw_text t
2138 if t.type is TYPE_START_TAG and (t.name is 'noembed' or (t.name is 'noscript' and flag_scripting))
2139 parse_generic_raw_text t
2141 if t.type is TYPE_START_TAG and t.name is 'select'
2143 insert_html_element t
2144 flag_frameset_ok = false
2145 if ins_mode is ins_mode_in_table or ins_mode is ins_mode_in_caption or ins_mode is ins_mode_in_table_body or ins_mode is ins_mode_in_row or ins_mode is ins_mode_in_cell
2146 ins_mode = ins_mode_in_select_in_table
2148 ins_mode = ins_mode_in_select
2150 if t.type is TYPE_START_TAG and (t.name is 'optgroup' or t.name is 'option')
2151 if open_els[0].name is 'option' and open_els[0].namespace is NS_HTML
2154 insert_html_element t
2156 # this comment block implements the W3C spec
2157 # if t.type is TYPE_START_TAG and (t.name is 'rb' or t.name is 'rp' or t.name is 'rtc')
2158 # if is_in_scope 'ruby', NS_HTML
2159 # generate_implied_end_tags()
2160 # unless open_els[0].name is 'ruby' and open_els[0].namespace is NS_HTML
2162 # insert_html_element t
2164 # if t.type is TYPE_START_TAG and t.name is 'rt'
2165 # if is_in_scope 'ruby', NS_HTML
2166 # generate_implied_end_tags 'rtc' # arg is exception
2167 # unless (open_els[0].name is 'ruby' or open_els[0].name is 'rtc') and open_els[0].namespace is NS_HTML
2169 # insert_html_element t
2171 # below implements the WHATWG spec https://html.spec.whatwg.org/multipage/syntax.html#parsing-main-inbody
2172 if t.type is TYPE_START_TAG and (t.name is 'rb' or t.name is 'rtc')
2173 if is_in_scope 'ruby', NS_HTML
2174 generate_implied_end_tags()
2175 unless open_els[0].name is 'ruby' and open_els[0].namespace is NS_HTML
2177 insert_html_element t
2179 if t.type is TYPE_START_TAG and (t.name is 'rp' or t.name is 'rt')
2180 if is_in_scope 'ruby', NS_HTML
2181 generate_implied_end_tags 'rtc'
2182 unless (open_els[0].name is 'ruby' or open_els[0].name is 'rtc') and open_els[0].namespace is NS_HTML
2184 insert_html_element t
2187 if t.type is TYPE_START_TAG and t.name is 'math'
2189 adjust_mathml_attributes t
2190 adjust_foreign_attributes t
2191 insert_foreign_element t, NS_MATHML
2192 if t.flag 'self-closing'
2194 t.acknowledge_self_closing()
2196 if t.type is TYPE_START_TAG and t.name is 'svg'
2198 adjust_svg_attributes t
2199 adjust_foreign_attributes t
2200 insert_foreign_element t, NS_SVG
2201 if t.flag 'self-closing'
2203 t.acknowledge_self_closing()
2205 if t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'frame' or t.name is 'head' or t.name is 'tbody' or t.name is 'td' or t.name is 'tfoot' or t.name is 'th' or t.name is 'thead' or t.name is 'tr')
2208 if t.type is TYPE_START_TAG # any other start tag
2210 insert_html_element t
2212 if t.type is TYPE_END_TAG # any other end tag
2213 in_body_any_other_end_tag t.name
2217 # 8.2.5.4.8 http://www.w3.org/TR/html5/syntax.html#parsing-main-incdata
2218 ins_mode_text = (t) ->
2219 if t.type is TYPE_TEXT
2222 if t.type is TYPE_EOF
2224 if open_els[0].name is 'script' and open_els[0].namespace is NS_HTML
2225 open_els[0].flag 'already started', true
2227 ins_mode = original_ins_mode
2230 if t.type is TYPE_END_TAG and t.name is 'script'
2232 ins_mode = original_ins_mode
2233 # fixfull the spec seems to assume that I'm going to run the script
2234 # http://www.w3.org/TR/html5/syntax.html#scriptEndTag
2236 if t.type is TYPE_END_TAG
2238 ins_mode = original_ins_mode
2240 console.log 'warning: end of ins_mode_text reached'
2242 # the functions below implement the tokenizer stats described here:
2243 # http://www.w3.org/TR/html5/syntax.html#tokenization
2245 # 8.2.5.4.9 http://www.w3.org/TR/html5/syntax.html#parsing-main-intable
2246 ins_mode_in_table_else = (t) ->
2248 flag_foster_parenting = true
2250 flag_foster_parenting = false
2252 ins_mode_in_table = (t) ->
2255 if (open_els[0].name is 'table' or open_els[0].name is 'tbody' or open_els[0].name is 'tfoot' or open_els[0].name is 'thead' or open_els[0].name is 'tr') and open_els[0].namespace is NS_HTML
2256 pending_table_character_tokens = []
2257 original_ins_mode = ins_mode
2258 ins_mode = ins_mode_in_table_text
2261 ins_mode_in_table_else t
2269 clear_stack_to_table_context()
2271 insert_html_element t
2272 ins_mode = ins_mode_in_caption
2274 clear_stack_to_table_context()
2275 insert_html_element t
2276 ins_mode = ins_mode_in_column_group
2278 clear_stack_to_table_context()
2279 insert_html_element new_open_tag 'colgroup'
2280 ins_mode = ins_mode_in_column_group
2282 when 'tbody', 'tfoot', 'thead'
2283 clear_stack_to_table_context()
2284 insert_html_element t
2285 ins_mode = ins_mode_in_table_body
2286 when 'td', 'th', 'tr'
2287 clear_stack_to_table_context()
2288 insert_html_element new_open_tag 'tbody'
2289 ins_mode = ins_mode_in_table_body
2293 if is_in_table_scope 'table', NS_HTML
2295 el = open_els.shift()
2296 if el.name is 'table' and el.namespace is NS_HTML
2300 when 'style', 'script', 'template'
2303 unless is_input_hidden_tok t
2304 ins_mode_in_table_else t
2307 el = insert_html_element t
2309 t.acknowledge_self_closing()
2312 if form_element_pointer?
2314 if template_tag_is_open()
2316 form_element_pointer = insert_html_element t
2319 ins_mode_in_table_else t
2323 if is_in_table_scope 'table', NS_HTML
2325 el = open_els.shift()
2326 if el.name is 'table' and el.namespace is NS_HTML
2331 when 'body', 'caption', 'col', 'colgroup', 'html', 'tbody', 'td', 'tfoot', 'th', 'thead', 'tr'
2336 ins_mode_in_table_else t
2340 ins_mode_in_table_else t
2343 # 8.2.5.4.10 http://www.w3.org/TR/html5/syntax.html#parsing-main-intabletext
2344 ins_mode_in_table_text = (t) ->
2345 if t.type is TYPE_TEXT and t.text is "\u0000"
2349 if t.type is TYPE_TEXT
2350 pending_table_character_tokens.push t
2354 for old in pending_table_character_tokens
2355 unless is_space_tok old
2359 for old in pending_table_character_tokens
2360 insert_character old
2362 for old in pending_table_character_tokens
2363 ins_mode_in_table_else old
2364 pending_table_character_tokens = []
2365 ins_mode = original_ins_mode
2368 # 8.2.5.4.11 http://www.w3.org/TR/html5/syntax.html#parsing-main-incaption
2369 ins_mode_in_caption = (t) ->
2370 if t.type is TYPE_END_TAG and t.name is 'caption'
2371 if is_in_table_scope 'caption', NS_HTML
2372 generate_implied_end_tags()
2373 if open_els[0].name isnt 'caption'
2376 el = open_els.shift()
2377 if el.name is 'caption' and el.namespace is NS_HTML
2379 clear_afe_to_marker()
2380 ins_mode = ins_mode_in_table
2385 if (t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'tbody' or t.name is 'td' or t.name is 'tfoot' or t.name is 'th' or t.name is 'thead' or t.name is 'tr')) or t.type is TYPE_END_TAG and t.name is 'table'
2387 if is_in_table_scope 'caption', NS_HTML
2389 el = open_els.shift()
2390 if el.name is 'caption' and el.namespace is NS_HTML
2392 clear_afe_to_marker()
2393 ins_mode = ins_mode_in_table
2395 # else fragment case
2397 if t.type is TYPE_END_TAG and (t.name is 'body' or t.name is 'col' or t.name is 'colgroup' or t.name is 'html' or t.name is 'tbody' or t.name is 'td' or t.name is 'tfoot' or t.name is 'th' or t.name is 'thead' or t.name is 'tr')
2403 # 8.2.5.4.12 http://www.w3.org/TR/html5/syntax.html#parsing-main-incolgroup
2404 ins_mode_in_column_group = (t) ->
2408 if t.type is TYPE_COMMENT
2411 if t.type is TYPE_DOCTYPE
2414 if t.type is TYPE_START_TAG and t.name is 'html'
2417 if t.type is TYPE_START_TAG and t.name is 'col'
2418 el = insert_html_element t
2420 t.acknowledge_self_closing()
2422 if t.type is TYPE_END_TAG and t.name is 'colgroup'
2423 if open_els[0].name is 'colgroup' and open_els.namespace is NS_HTML
2425 ins_mode = ins_mode_in_table
2429 if t.type is TYPE_END_TAG and t.name is 'col'
2432 if (t.type is TYPE_START_TAG or t.type is TYPE_END_TAG) and t.name is 'template'
2435 if t.type is TYPE_EOF
2439 if open_els[0].name isnt 'colgroup'
2443 ins_mode = ins_mode_in_table
2447 # 8.2.5.4.13 http://www.w3.org/TR/html5/syntax.html#parsing-main-intbody
2448 ins_mode_in_table_body = (t) ->
2449 if t.type is TYPE_START_TAG and t.name is 'tr'
2450 clear_stack_to_table_body_context()
2451 insert_html_element t
2452 ins_mode = ins_mode_in_row
2454 if t.type is TYPE_START_TAG and (t.name is 'th' or t.name is 'td')
2456 clear_stack_to_table_body_context()
2457 insert_html_element new_open_tag 'tr'
2458 ins_mode = ins_mode_in_row
2461 if t.type is TYPE_END_TAG and (t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead')
2462 unless is_in_table_scope t.name, NS_HTML
2465 clear_stack_to_table_body_context()
2467 ins_mode = ins_mode_in_table
2469 if (t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead')) or (t.type is TYPE_END_TAG and t.name is 'table')
2472 if el.namespace is NS_HTML and (el.name is 'tbody' or el.name is 'tfoot' or el.name is 'thead')
2475 if table_scopers[el.name] is el.namespace
2480 clear_stack_to_table_body_context()
2482 ins_mode = ins_mode_in_table
2485 if t.type is TYPE_END_TAG and (t.name is 'body' or t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'html' or t.name is 'td' or t.name is 'th' or t.name is 'tr')
2491 # 8.2.5.4.14 http://www.w3.org/TR/html5/syntax.html#parsing-main-intr
2492 ins_mode_in_row = (t) ->
2493 if t.type is TYPE_START_TAG and (t.name is 'th' or t.name is 'td')
2494 clear_stack_to_table_row_context()
2495 insert_html_element t
2496 ins_mode = ins_mode_in_cell
2499 if t.type is TYPE_END_TAG and t.name is 'tr'
2500 if is_in_table_scope 'tr', NS_HTML
2501 clear_stack_to_table_row_context()
2503 ins_mode = ins_mode_in_table_body
2507 if (t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead' or t.name is 'tr')) or t.type is TYPE_END_TAG and t.name is 'table'
2508 if is_in_table_scope 'tr', NS_HTML
2509 clear_stack_to_table_row_context()
2511 ins_mode = ins_mode_in_table_body
2516 if t.type is TYPE_END_TAG and (t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead')
2517 if is_in_table_scope t.name, NS_HTML
2518 if is_in_table_scope 'tr', NS_HTML
2519 clear_stack_to_table_row_context()
2521 ins_mode = ins_mode_in_table_body
2526 if t.type is TYPE_END_TAG and (t.name is 'body' or t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'html' or t.name is 'td' or t.name is 'th')
2532 # http://www.w3.org/TR/html5/syntax.html#close-the-cell
2534 generate_implied_end_tags()
2535 unless (open_els[0].name is 'td' or open_els[0] is 'th') and open_els[0].namespace is NS_HTML
2538 el = open_els.shift()
2539 if el.namespace is NS_HTML and (el.name is 'td' or el.name is 'th')
2541 clear_afe_to_marker()
2542 ins_mode = ins_mode_in_row
2544 # 8.2.5.4.15 http://www.w3.org/TR/html5/syntax.html#parsing-main-intd
2545 ins_mode_in_cell = (t) ->
2546 if t.type is TYPE_END_TAG and (t.name is 'td' or t.name is 'th')
2547 if is_in_table_scope t.name, NS_HTML
2548 generate_implied_end_tags()
2549 unless (open_els[0].name is t.name) and open_els[0].namespace is NS_HTML
2552 el = open_els.shift()
2553 if el.name is t.name and el.namespace is NS_HTML
2555 clear_afe_to_marker()
2556 ins_mode = ins_mode_in_row
2560 if t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'tbody' or t.name is 'td' or t.name is 'tfoot' or t.name is 'th' or t.name is 'thead' or t.name is 'tr')
2563 if el.namespace is NS_HTML and (el.name is 'td' or el.name is 'th')
2566 if table_scopers[el.name] is el.namespace
2574 if t.type is TYPE_END_TAG and (t.name is 'body' or t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'html')
2577 if t.type is TYPE_END_TAG and (t.name is 'table' or t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead' or t.name is 'tr')
2578 if is_in_table_scope t.name, NS_HTML
2587 # 8.2.5.4.16 http://www.w3.org/TR/html5/syntax.html#parsing-main-inselect
2588 ins_mode_in_select = (t) ->
2589 if t.type is TYPE_TEXT and t.text is "\u0000"
2592 if t.type is TYPE_TEXT
2595 if t.type is TYPE_COMMENT
2598 if t.type is TYPE_DOCTYPE
2601 if t.type is TYPE_START_TAG and t.name is 'html'
2604 if t.type is TYPE_START_TAG and t.name is 'option'
2605 if open_els[0].name is 'option' and open_els[0].namespace is NS_HTML
2607 insert_html_element t
2609 if t.type is TYPE_START_TAG and t.name is 'optgroup'
2610 if open_els[0].name is 'option' and open_els[0].namespace is NS_HTML
2612 if open_els[0].name is 'optgroup' and open_els[0].namespace is NS_HTML
2614 insert_html_element t
2616 if t.type is TYPE_END_TAG and t.name is 'optgroup'
2617 if open_els[0].name is 'option' and open_els[0].namespace in NS_HTML
2618 if open_els[1].name is 'optgroup' and open_els[0].namespace is NS_HTML
2620 if open_els[0].name is 'optgroup' and open_els[0].namespace is NS_HTML
2625 if t.type is TYPE_END_TAG and t.name is 'option'
2626 if open_els[0].name is 'option' and open_els[0].namespace is NS_HTML
2631 if t.type is TYPE_END_TAG and t.name is 'select'
2632 if is_in_select_scope 'select', NS_HTML
2634 el = open_els.shift()
2635 if el.name is 'select' and el.namespace is NS_HTML
2641 if t.type is TYPE_START_TAG and t.name is 'select'
2644 el = open_els.shift()
2645 if el.name is 'select' and el.namespace is NS_HTML
2648 # spec says that this is the same as </select> but it doesn't say
2649 # to check scope first
2651 if t.type is TYPE_START_TAG and (t.name is 'input' or t.name is 'keygen' or t.name is 'textarea')
2653 if is_in_select_scope 'select', NS_HTML
2656 el = open_els.shift()
2657 if el.name is 'select' and el.namespace is NS_HTML
2662 if t.type is TYPE_START_TAG and (t.name is 'script' or t.name is 'template')
2665 if t.type is TYPE_EOF
2672 # 8.2.5.4.17 http://www.w3.org/TR/html5/syntax.html#parsing-main-inselectintable
2673 ins_mode_in_select_in_table = (t) ->
2674 if t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'table' or t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead' or t.name is 'tr' or t.name is 'td' or t.name is 'th')
2677 el = open_els.shift()
2678 if el.name is 'select' and el.namespace is NS_HTML
2683 if t.type is TYPE_END_TAG and (t.name is 'caption' or t.name is 'table' or t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead' or t.name is 'tr' or t.name is 'td' or t.name is 'th')
2685 unless is_in_table_scope t.name, NS_HTML
2688 el = open_els.shift()
2689 if el.name is 'select' and el.namespace is NS_HTML
2695 ins_mode_in_select t
2698 # 8.2.5.4.18 http://www.w3.org/TR/html5/syntax.html#parsing-main-intemplate
2699 ins_mode_in_template = (t) ->
2700 if t.type is TYPE_TEXT or t.type is TYPE_COMMENT or t.type is TYPE_DOCTYPE
2703 if (t.type is TYPE_START_TAG and (t.name is 'base' or t.name is 'basefont' or t.name is 'bgsound' or t.name is 'link' or t.name is 'meta' or t.name is 'noframes' or t.name is 'script' or t.name is 'style' or t.name is 'template' or t.name is 'title')) or (t.type is TYPE_END_TAG and t.name is 'template')
2706 if t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'colgroup' or t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead')
2707 template_ins_modes.shift()
2708 template_ins_modes.unshift ins_mode_in_table
2709 ins_mode = ins_mode_in_table
2712 if t.type is TYPE_START_TAG and t.name is 'col'
2713 template_ins_modes.shift()
2714 template_ins_modes.unshift ins_mode_in_column_group
2715 ins_mode = ins_mode_in_column_group
2718 if t.type is TYPE_START_TAG and t.name is 'tr'
2719 template_ins_modes.shift()
2720 template_ins_modes.unshift ins_mode_in_table_body
2721 ins_mode = ins_mode_in_table_body
2724 if t.type is TYPE_START_TAG and (t.name is 'td' or t.name is 'th')
2725 template_ins_modes.shift()
2726 template_ins_modes.unshift ins_mode_in_row
2727 ins_mode = ins_mode_in_row
2730 if t.type is TYPE_START_TAG
2731 template_ins_modes.shift()
2732 template_ins_modes.unshift ins_mode_in_body
2733 ins_mode = ins_mode_in_body
2736 if t.type is TYPE_END_TAG
2739 if t.type is TYPE_EOF
2740 unless template_tag_is_open()
2745 el = open_els.shift()
2746 if el.name is 'template' and el.namespace is NS_HTML
2748 clear_afe_to_marker()
2749 template_ins_modes.shift()
2753 # 8.2.5.4.19 http://www.w3.org/TR/html5/syntax.html#parsing-main-afterbody
2754 ins_mode_after_body = (t) ->
2758 if t.type is TYPE_COMMENT
2759 first = open_els[open_els.length - 1]
2760 insert_comment t, [first, first.children.length]
2762 if t.type is TYPE_DOCTYPE
2765 if t.type is TYPE_START_TAG and t.name is 'html'
2768 if t.type is TYPE_END_TAG and t.name is 'html'
2769 if flag_fragment_parsing
2772 ins_mode = ins_mode_after_after_body
2774 if t.type is TYPE_EOF
2779 ins_mode = ins_mode_in_body
2782 # 8.2.5.4.20 http://www.w3.org/TR/html5/syntax.html#parsing-main-inframeset
2783 ins_mode_in_frameset = (t) ->
2787 if t.type is TYPE_COMMENT
2790 if t.type is TYPE_DOCTYPE
2793 if t.type is TYPE_START_TAG and t.name is 'html'
2796 if t.type is TYPE_START_TAG and t.name is 'frameset'
2797 insert_html_element t
2799 if t.type is TYPE_END_TAG and t.name is 'frameset'
2800 if open_els.length is 1
2802 return # fragment case
2804 if flag_fragment_parsing is false and open_els[0].name isnt 'frameset'
2805 ins_mode = ins_mode_after_frameset
2807 if t.type is TYPE_START_TAG and t.name is 'frame'
2808 insert_html_element t
2810 t.acknowledge_self_closing()
2812 if t.type is TYPE_START_TAG and t.name is 'noframes'
2815 if t.type is TYPE_EOF
2816 if open_els.length isnt 1
2824 # 8.2.5.4.21 http://www.w3.org/TR/html5/syntax.html#parsing-main-afterframeset
2825 ins_mode_after_frameset = (t) ->
2829 if t.type is TYPE_COMMENT
2832 if t.type is TYPE_DOCTYPE
2835 if t.type is TYPE_START_TAG and t.name is 'html'
2838 if t.type is TYPE_END_TAG and t.name is 'html'
2839 ins_mode = ins_mode_after_after_frameset
2841 if t.type is TYPE_START_TAG and t.name is 'noframes'
2844 if t.type is TYPE_EOF
2851 # 8.2.5.4.22 http://www.w3.org/TR/html5/syntax.html#the-after-after-body-insertion-mode
2852 ins_mode_after_after_body = (t) ->
2853 if t.type is TYPE_COMMENT
2854 insert_comment t, [doc, doc.children.length]
2856 if t.type is TYPE_DOCTYPE or is_space_tok(t) or (t.type is TYPE_START_TAG and t.name is 'html')
2859 if t.type is TYPE_EOF
2864 ins_mode = ins_mode_in_body
2868 # 8.2.5.4.23 http://www.w3.org/TR/html5/syntax.html#the-after-after-frameset-insertion-mode
2869 ins_mode_after_after_frameset = (t) ->
2870 if t.type is TYPE_COMMENT
2871 insert_comment t, [doc, doc.children.length]
2873 if t.type is TYPE_DOCTYPE or is_space_tok(t) or (t.type is TYPE_START_TAG and t.name is 'html')
2876 if t.type is TYPE_EOF
2879 if t.type is TYPE_START_TAG and t.name is 'noframes'
2886 # 8.2.5.5 http://www.w3.org/TR/html5/syntax.html#parsing-main-inforeign
2887 has_color_face_or_size = (t) ->
2889 if a[0] is 'color' or a[0] is 'face' or a[0] is 'size'
2892 in_foreign_content_end_script = ->
2896 in_foreign_content_other_start = (t) ->
2897 acn = adjusted_current_node()
2898 if acn.namespace is NS_MATHML
2899 adjust_mathml_attributes t
2900 if acn.namespace is NS_SVG and svg_name_fixes[t.name]?
2901 t.name = svg_name_fixes[t.name]
2902 if acn.namespace is NS_SVG
2903 adjust_svg_attributes t
2904 adjust_foreign_attributes t
2905 insert_foreign_element t, acn.namespace
2906 if t.flag 'self-closing'
2907 if t.name is 'script'
2908 t.acknowledge_self_closing()
2909 in_foreign_content_end_script()
2913 t.acknowledge_self_closing()
2915 in_foreign_content = (t) ->
2916 if t.type is TYPE_TEXT and t.text is "\u0000"
2918 insert_character new_character_token "\ufffd"
2923 if t.type is TYPE_TEXT
2924 flag_frameset_ok = false
2927 if t.type is TYPE_COMMENT
2930 if t.type is TYPE_DOCTYPE
2933 if t.type is TYPE_START_TAG and (t.name is 'b' or t.name is 'big' or t.name is 'blockquote' or t.name is 'body' or t.name is 'br' or t.name is 'center' or t.name is 'code' or t.name is 'dd' or t.name is 'div' or t.name is 'dl' or t.name is 'dt' or t.name is 'em' or t.name is 'embed' or t.name is 'h1' or t.name is 'h2' or t.name is 'h3' or t.name is 'h4' or t.name is 'h5' or t.name is 'h6' or t.name is 'head' or t.name is 'hr' or t.name is 'i' or t.name is 'img' or t.name is 'li' or t.name is 'listing' or t.name is 'main' or t.name is 'meta' or t.name is 'nobr' or t.name is 'ol' or t.name is 'p' or t.name is 'pre' or t.name is 'ruby' or t.name is 's' or t.name is 'small' or t.name is 'span' or t.name is 'strong' or t.name is 'strike' or t.name is 'sub' or t.name is 'sup' or t.name is 'table' or t.name is 'tt' or t.name is 'u' or t.name is 'ul' or t.name is 'var' or (t.name is 'font' and has_color_face_or_size(t)))
2935 if flag_fragment_parsing
2936 in_foreign_content_other_start t
2938 loop # is this safe?
2940 if is_mathml_text_integration_point(open_els[0]) or is_html_integration(open_els[0]) or open_els[0].namespace is NS_HTML
2944 if t.type is TYPE_START_TAG
2945 in_foreign_content_other_start t
2947 if t.type is TYPE_END_TAG and t.name is 'script' and open_els[0].name is 'script' and open_els[0].namespace is NS_SVG
2948 in_foreign_content_end_script()
2950 if t.type is TYPE_END_TAG
2953 if node.name.toLowerCase() isnt t.name
2956 if node is open_els[open_els.length - 1]
2958 if node.name.toLowerCase() is t.name
2960 el = open_els.shift()
2965 if node.namespace is NS_HTML
2967 ins_mode t # explicitly call HTML insertion mode
2970 # 8.2.4.1 http://www.w3.org/TR/html5/syntax.html#data-state
2972 switch c = txt.charAt(cur++)
2974 return new_text_node parse_character_reference()
2976 tok_state = tok_state_tag_open
2979 return new_text_node "\ufffd"
2981 return new_eof_token()
2983 return new_text_node c
2986 # 8.2.4.2 http://www.w3.org/TR/html5/syntax.html#character-reference-in-data-state
2987 # not needed: tok_state_character_reference_in_data = ->
2988 # just call parse_character_reference()
2990 # 8.2.4.3 http://www.w3.org/TR/html5/syntax.html#rcdata-state
2991 tok_state_rcdata = ->
2992 switch c = txt.charAt(cur++)
2994 return new_text_node parse_character_reference()
2996 tok_state = tok_state_rcdata_less_than_sign
2999 return new_character_token "\ufffd"
3001 return new_eof_token()
3003 return new_character_token c
3006 # 8.2.4.4 http://www.w3.org/TR/html5/syntax.html#character-reference-in-rcdata-state
3007 # not needed: tok_state_character_reference_in_rcdata = ->
3008 # just call parse_character_reference()
3010 # 8.2.4.5 http://www.w3.org/TR/html5/syntax.html#rawtext-state
3011 tok_state_rawtext = ->
3012 switch c = txt.charAt(cur++)
3014 tok_state = tok_state_rawtext_less_than_sign
3017 return new_character_token "\ufffd"
3019 return new_eof_token()
3021 return new_character_token c
3024 # 8.2.4.6 http://www.w3.org/TR/html5/syntax.html#script-data-state
3025 tok_state_script_data = ->
3026 switch c = txt.charAt(cur++)
3028 tok_state = tok_state_script_data_less_than_sign
3031 return new_character_token "\ufffd"
3033 return new_eof_token()
3035 return new_character_token c
3038 # 8.2.4.7 http://www.w3.org/TR/html5/syntax.html#plaintext-state
3039 tok_state_plaintext = ->
3040 switch c = txt.charAt(cur++)
3043 return new_character_token "\ufffd"
3045 return new_eof_token()
3047 return new_character_token c
3051 # 8.2.4.8 http://www.w3.org/TR/html5/syntax.html#tag-open-state
3052 tok_state_tag_open = ->
3053 c = txt.charAt(cur++)
3055 tok_state = tok_state_markup_declaration_open
3058 tok_state = tok_state_end_tag_open
3061 tok_cur_tag = new_open_tag c.toLowerCase()
3062 tok_state = tok_state_tag_name
3065 tok_cur_tag = new_open_tag c
3066 tok_state = tok_state_tag_name
3070 tok_cur_tag = new_comment_token '?' # FIXME right?
3071 tok_state = tok_state_bogus_comment
3075 tok_state = tok_state_data
3076 cur -= 1 # we didn't parse/handle the char after <
3077 return new_text_node '<'
3079 # 8.2.4.9 http://www.w3.org/TR/html5/syntax.html#end-tag-open-state
3080 tok_state_end_tag_open = ->
3081 c = txt.charAt(cur++)
3083 tok_cur_tag = new_end_tag c.toLowerCase()
3084 tok_state = tok_state_tag_name
3087 tok_cur_tag = new_end_tag c
3088 tok_state = tok_state_tag_name
3092 tok_state = tok_state_data
3096 tok_state = tok_state_data
3097 return new_text_node '</'
3100 tok_cur_tag = new_comment_token c
3101 tok_state = tok_state_bogus_comment
3104 # 8.2.4.10 http://www.w3.org/TR/html5/syntax.html#tag-name-state
3105 tok_state_tag_name = ->
3106 switch c = txt.charAt(cur++)
3107 when "\t", "\n", "\u000c", ' '
3108 tok_state = tok_state_before_attribute_name
3110 tok_state = tok_state_self_closing_start_tag
3112 tok_state = tok_state_data
3118 tok_cur_tag.name += "\ufffd"
3121 tok_state = tok_state_data
3124 tok_cur_tag.name += c.toLowerCase()
3126 tok_cur_tag.name += c
3129 # 8.2.4.11 http://www.w3.org/TR/html5/syntax.html#rcdata-less-than-sign-state
3130 tok_state_rcdata_less_than_sign = ->
3131 c = txt.charAt(cur++)
3133 temporary_buffer = ''
3134 tok_state = tok_state_rcdata_end_tag_open
3137 tok_state = tok_state_rcdata
3138 cur -= 1 # reconsume the input character
3139 return new_character_token '<'
3141 # 8.2.4.12 http://www.w3.org/TR/html5/syntax.html#rcdata-end-tag-open-state
3142 tok_state_rcdata_end_tag_open = ->
3143 c = txt.charAt(cur++)
3145 tok_cur_tag = new_end_tag c.toLowerCase()
3146 temporary_buffer += c
3147 tok_state = tok_state_rcdata_end_tag_name
3150 tok_cur_tag = new_end_tag c
3151 temporary_buffer += c
3152 tok_state = tok_state_rcdata_end_tag_name
3155 tok_state = tok_state_rcdata
3156 cur -= 1 # reconsume the input character
3157 return new_character_token "</" # fixfull separate these
3159 # http://www.w3.org/TR/html5/syntax.html#appropriate-end-tag-token
3160 is_appropriate_end_tag = (t) ->
3161 # spec says to check against "the tag name of the last start tag to
3162 # have been emitted from this tokenizer", but this is only called from
3163 # the various "raw" states, so it's hopefully ok to assume that
3164 # open_els[0].name will work instead TODO: verify this after the script
3165 # data states are implemented
3166 debug_log "#{t.type}, #{t.name} open_els: #{serialize_els open_els, true, true}"
3167 return t.type is TYPE_END_TAG and t.name is open_els[0].name
3169 # 8.2.4.13 http://www.w3.org/TR/html5/syntax.html#rcdata-end-tag-name-state
3170 tok_state_rcdata_end_tag_name = ->
3171 c = txt.charAt(cur++)
3172 if c is "\t" or c is "\n" or c is "\u000c" or c is ' '
3173 if is_appropriate_end_tag tok_cur_tag
3174 tok_state = tok_state_before_attribute_name
3176 # else fall through to "Anything else"
3178 if is_appropriate_end_tag tok_cur_tag
3179 tok_state = tok_state_self_closing_start_tag # FIXME spec typo?
3181 # else fall through to "Anything else"
3183 if is_appropriate_end_tag tok_cur_tag
3184 tok_state = tok_state_data
3186 # else fall through to "Anything else"
3188 tok_cur_tag.name += c.toLowerCase()
3189 temporary_buffer += c
3192 tok_cur_tag.name += c
3193 temporary_buffer += c
3196 tok_state = tok_state_rcdata
3197 cur -= 1 # reconsume the input character
3198 return new_character_token '</' + temporary_buffer # fixfull separate these
3200 # 8.2.4.14 http://www.w3.org/TR/html5/syntax.html#rawtext-less-than-sign-state
3201 tok_state_rawtext_less_than_sign = ->
3202 c = txt.charAt(cur++)
3204 temporary_buffer = ''
3205 tok_state = tok_state_rawtext_end_tag_open
3208 tok_state = tok_state_rawtext
3209 cur -= 1 # reconsume the input character
3210 return new_character_token '<'
3212 # 8.2.4.15 http://www.w3.org/TR/html5/syntax.html#rawtext-end-tag-open-state
3213 tok_state_rawtext_end_tag_open = ->
3214 c = txt.charAt(cur++)
3216 tok_cur_tag = new_end_tag c.toLowerCase()
3217 temporary_buffer += c
3218 tok_state = tok_state_rawtext_end_tag_name
3221 tok_cur_tag = new_end_tag c
3222 temporary_buffer += c
3223 tok_state = tok_state_rawtext_end_tag_name
3226 tok_state = tok_state_rawtext
3227 cur -= 1 # reconsume the input character
3228 return new_character_token "</" # fixfull separate these
3230 # 8.2.4.16 http://www.w3.org/TR/html5/syntax.html#rawtext-end-tag-name-state
3231 tok_state_rawtext_end_tag_name = ->
3232 c = txt.charAt(cur++)
3233 if c is "\t" or c is "\n" or c is "\u000c" or c is ' '
3234 if is_appropriate_end_tag tok_cur_tag
3235 tok_state = tok_state_before_attribute_name
3237 # else fall through to "Anything else"
3239 if is_appropriate_end_tag tok_cur_tag
3240 tok_state = tok_state_self_closing_start_tag
3242 # else fall through to "Anything else"
3244 if is_appropriate_end_tag tok_cur_tag
3245 tok_state = tok_state_data
3247 # else fall through to "Anything else"
3249 tok_cur_tag.name += c.toLowerCase()
3250 temporary_buffer += c
3253 tok_cur_tag.name += c
3254 temporary_buffer += c
3257 tok_state = tok_state_rawtext
3258 cur -= 1 # reconsume the input character
3259 return new_character_token '</' + temporary_buffer # fixfull separate these
3261 # 8.2.4.17 http://www.w3.org/TR/html5/syntax.html#script-data-less-than-sign-state
3262 tok_state_script_data_less_than_sign = ->
3263 c = txt.charAt(cur++)
3265 temporary_buffer = ''
3266 tok_state = tok_state_script_data_end_tag_open
3269 tok_state = tok_state_script_data_escape_start
3270 return new_character_token '<!' # fixfull split
3272 tok_state = tok_state_script_data
3273 cur -= 1 # Reconsume
3274 return new_character_token '<'
3276 # 8.2.4.18 http://www.w3.org/TR/html5/syntax.html#script-data-end-tag-open-state
3277 tok_state_script_data_end_tag_open = ->
3278 c = txt.charAt(cur++)
3280 tok_cur_tag = new_end_tag c.toLowerCase()
3281 temporary_buffer += c
3282 tok_state = tok_state_script_data_end_tag_name
3285 tok_cur_tag = new_end_tag c
3286 temporary_buffer += c
3287 tok_state = tok_state_script_data_end_tag_name
3290 tok_state = tok_state_script_data
3291 cur -= 1 # Reconsume
3292 return new_character_token '</'
3294 # 8.2.4.19 http://www.w3.org/TR/html5/syntax.html#script-data-end-tag-open-state
3295 tok_state_script_data_end_tag_name = ->
3296 c = txt.charAt(cur++)
3297 if c is "\t" or c is "\n" or c is "\u000c" or c is ' '
3298 if is_appropriate_end_tag tok_cur_tag
3299 tok_state = tok_state_before_attribute_name
3303 if is_appropriate_end_tag tok_cur_tag
3304 tok_state = tok_state_self_closing_start_tag
3308 if is_appropriate_end_tag tok_cur_tag
3309 tok_state = tok_state_data
3313 tok_cur_tag.name += c.toLowerCase()
3314 temporary_buffer += c
3317 tok_cur_tag.name += c
3318 temporary_buffer += c
3321 tok_state = tok_state_script_data
3322 cur -= 1 # Reconsume
3323 return new_character_token "</#{temporary_buffer}" # fixfull split
3325 # 8.2.4.20 http://www.w3.org/TR/html5/syntax.html#script-data-escape-start-state
3326 tok_state_script_data_escape_start = ->
3327 c = txt.charAt(cur++)
3329 tok_state = tok_state_script_data_escape_start_dash
3330 return new_character_token '-'
3332 tok_state = tok_state_script_data
3333 cur -= 1 # Reconsume
3336 # 8.2.4.21 http://www.w3.org/TR/html5/syntax.html#script-data-escape-start-dash-state
3337 tok_state_script_data_escape_start_dash = ->
3338 c = txt.charAt(cur++)
3340 tok_state = tok_state_script_data_escaped_dash_dash
3341 return new_character_token '-'
3343 tok_state = tok_state_script_data
3344 cur -= 1 # Reconsume
3347 # 8.2.4.22 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-state
3348 tok_state_script_data_escaped = ->
3349 c = txt.charAt(cur++)
3351 tok_state = tok_state_script_data_escaped_dash
3352 return new_character_token '-'
3354 tok_state = tok_state_script_data_escaped_less_than_sign
3358 return new_character_token "\ufffd"
3360 tok_state = tok_state_data
3362 cur -= 1 # Reconsume
3365 return new_character_token c
3367 # 8.2.4.23 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-dash-state
3368 tok_state_script_data_escaped_dash = ->
3369 c = txt.charAt(cur++)
3371 tok_state = tok_state_script_data_escaped_dash_dash
3372 return new_character_token '-'
3374 tok_state = tok_state_script_data_escaped_less_than_sign
3378 tok_state = tok_state_script_data_escaped
3379 return new_character_token "\ufffd"
3381 tok_state = tok_state_data
3383 cur -= 1 # Reconsume
3386 tok_state = tok_state_script_data_escaped
3387 return new_character_token c
3389 # 8.2.4.24 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-dash-dash-state
3390 tok_state_script_data_escaped_dash_dash = ->
3391 c = txt.charAt(cur++)
3393 return new_character_token '-'
3395 tok_state = tok_state_script_data_escaped_less_than_sign
3398 tok_state = tok_state_script_data
3399 return new_character_token '>'
3402 tok_state = tok_state_script_data_escaped
3403 return new_character_token "\ufffd"
3406 tok_state = tok_state_data
3407 cur -= 1 # Reconsume
3410 tok_state = tok_state_script_data_escaped
3411 return new_character_token c
3413 # 8.2.4.25 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-less-than-sign-state
3414 tok_state_script_data_escaped_less_than_sign = ->
3415 c = txt.charAt(cur++)
3417 temporary_buffer = ''
3418 tok_state = tok_state_script_data_escaped_end_tag_open
3421 temporary_buffer = c.toLowerCase() # yes, really
3422 tok_state = tok_state_script_data_double_escape_start
3423 return new_character_token "<#{c}" # fixfull split
3425 temporary_buffer = c
3426 tok_state = tok_state_script_data_double_escape_start
3427 return new_character_token "<#{c}" # fixfull split
3429 tok_state = tok_state_script_data_escaped
3430 cur -= 1 # Reconsume
3431 return new_character_token '<'
3433 # 8.2.4.26 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-end-tag-open-state
3434 tok_state_script_data_escaped_end_tag_open = ->
3435 c = txt.charAt(cur++)
3437 tok_cur_tag = new_end_tag c.toLowerCase()
3438 temporary_buffer += c
3439 tok_state = tok_state_script_data_escaped_end_tag_name
3442 tok_cur_tag = new_end_tag c
3443 temporary_buffer += c
3444 tok_state = tok_state_script_data_escaped_end_tag_name
3447 tok_state = tok_state_script_data_escaped
3448 cur -= 1 # Reconsume
3449 return new_character_token '</' # fixfull split
3451 # 8.2.4.27 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-end-tag-name-state
3452 tok_state_script_data_escaped_end_tag_name = ->
3453 c = txt.charAt(cur++)
3454 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
3455 if is_appropriate_end_tag tok_cur_tag
3456 tok_state = tok_state_before_attribute_name
3460 if is_appropriate_end_tag tok_cur_tag
3461 tok_state = tok_state_self_closing_start_tag
3465 if is_appropriate_end_tag tok_cur_tag
3466 tok_state = tok_state_data
3470 tok_cur_tag.name += c.toLowerCase()
3471 temporary_buffer += c.toLowerCase()
3474 tok_cur_tag.name += c
3475 temporary_buffer += c.toLowerCase()
3478 tok_state = tok_state_script_data_escaped
3479 cur -= 1 # Reconsume
3480 return new_character_token "</#{temporary_buffer}" # fixfull split
3482 # 8.2.4.28 http://www.w3.org/TR/html5/syntax.html#script-data-double-escape-start-state
3483 tok_state_script_data_double_escape_start = ->
3484 c = txt.charAt(cur++)
3485 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' ' or c is '/' or c is '>'
3486 if temporary_buffer is 'script'
3487 tok_state = tok_state_script_data_double_escaped
3489 tok_state = tok_state_script_data_escaped
3490 return new_character_token c
3492 temporary_buffer += c.toLowerCase() # yes, really lowercase
3493 return new_character_token c
3495 temporary_buffer += c
3496 return new_character_token c
3498 tok_state = tok_state_script_data_escaped
3499 cur -= 1 # Reconsume
3502 # 8.2.4.29 http://www.w3.org/TR/html5/syntax.html#script-data-double-escaped-state
3503 tok_state_script_data_double_escaped = ->
3504 c = txt.charAt(cur++)
3506 tok_state = tok_state_script_data_double_escaped_dash
3507 return new_character_token '-'
3509 tok_state = tok_state_script_data_double_escaped_less_than_sign
3510 return new_character_token '<'
3513 return new_character_token "\ufffd"
3516 tok_state = tok_state_data
3517 cur -= 1 # Reconsume
3520 return new_character_token c
3522 # 8.2.4.30 http://www.w3.org/TR/html5/syntax.html#script-data-double-escaped-dash-state
3523 tok_state_script_data_double_escaped_dash = ->
3524 c = txt.charAt(cur++)
3526 tok_state = tok_state_script_data_double_escaped_dash_dash
3527 return new_character_token '-'
3529 tok_state = tok_state_script_data_double_escaped_less_than_sign
3530 return new_character_token '<'
3533 tok_state = tok_state_script_data_double_escaped
3534 return new_character_token "\ufffd"
3537 tok_state = tok_state_data
3538 cur -= 1 # Reconsume
3541 tok_state = tok_state_script_data_double_escaped
3542 return new_character_token c
3544 # 8.2.4.31 http://www.w3.org/TR/html5/syntax.html#script-data-double-escaped-dash-dash-state
3545 tok_state_script_data_double_escaped_dash_dash = ->
3546 c = txt.charAt(cur++)
3548 return new_character_token '-'
3550 tok_state = tok_state_script_data_double_escaped_less_than_sign
3551 return new_character_token '<'
3553 tok_state = tok_state_script_data
3554 return new_character_token '>'
3557 tok_state = tok_state_script_data_double_escaped
3558 return new_character_token "\ufffd"
3561 tok_state = tok_state_data
3562 cur -= 1 # Reconsume
3565 tok_state = tok_state_script_data_double_escaped
3566 return new_character_token c
3568 # 8.2.4.32 http://www.w3.org/TR/html5/syntax.html#script-data-double-escaped-less-than-sign-state
3569 tok_state_script_data_double_escaped_less_than_sign = ->
3570 c = txt.charAt(cur++)
3572 temporary_buffer = ''
3573 tok_state = tok_state_script_data_double_escape_end
3574 return new_character_token '/'
3576 tok_state = tok_state_script_data_double_escaped
3577 cur -= 1 # Reconsume
3580 # 8.2.4.33 http://www.w3.org/TR/html5/syntax.html#script-data-double-escape-end-state
3581 tok_state_script_data_double_escape_end = ->
3582 c = txt.charAt(cur++)
3583 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' ' or c is '/' or c is '>'
3584 if temporary_buffer is 'script'
3585 tok_state = tok_state_script_data_escaped
3587 tok_state = tok_state_script_data_double_escaped
3588 return new_character_token c
3590 temporary_buffer += c.toLowerCase() # yes, really lowercase
3591 return new_character_token c
3593 temporary_buffer += c
3594 return new_character_token c
3596 tok_state = tok_state_script_data_double_escaped
3597 cur -= 1 # Reconsume
3600 # 8.2.4.34 http://www.w3.org/TR/html5/syntax.html#before-attribute-name-state
3601 tok_state_before_attribute_name = ->
3603 switch c = txt.charAt(cur++)
3604 when "\t", "\n", "\u000c", ' '
3607 tok_state = tok_state_self_closing_start_tag
3610 tok_state = tok_state_data
3616 attr_name = "\ufffd"
3617 when '"', "'", '<', '='
3622 tok_state = tok_state_data
3625 attr_name = c.toLowerCase()
3629 tok_cur_tag.attrs_a.unshift [attr_name, '']
3630 tok_state = tok_state_attribute_name
3633 # 8.2.4.35 http://www.w3.org/TR/html5/syntax.html#attribute-name-state
3634 tok_state_attribute_name = ->
3635 switch c = txt.charAt(cur++)
3636 when "\t", "\n", "\u000c", ' '
3637 tok_state = tok_state_after_attribute_name
3639 tok_state = tok_state_self_closing_start_tag
3641 tok_state = tok_state_before_attribute_value
3643 tok_state = tok_state_data
3649 tok_cur_tag.attrs_a[0][0] += "\ufffd"
3652 tok_cur_tag.attrs_a[0][0] += c
3655 tok_state = tok_state_data
3658 tok_cur_tag.attrs_a[0][0] += c.toLowerCase()
3660 tok_cur_tag.attrs_a[0][0] += c
3663 # 8.2.4.36 http://www.w3.org/TR/html5/syntax.html#after-attribute-name-state
3664 tok_state_after_attribute_name = ->
3665 c = txt.charAt(cur++)
3666 if c is "\t" or c is "\n" or c is "\u000c" or c is ' '
3669 tok_state = tok_state_self_closing_start_tag
3672 tok_state = tok_state_before_attribute_value
3675 tok_state = tok_state_data
3678 tok_cur_tag.attrs_a.unshift [c.toLowerCase(), '']
3679 tok_state = tok_state_attribute_name
3683 tok_cur_tag.attrs_a.unshift ["\ufffd", '']
3684 tok_state = tok_state_attribute_name
3688 tok_state = tok_state_data
3689 cur -= 1 # reconsume
3691 if c is '"' or c is "'" or c is '<'
3693 # fall through to Anything else
3695 tok_cur_tag.attrs_a.unshift [c, '']
3696 tok_state = tok_state_attribute_name
3698 # 8.2.4.37 http://www.w3.org/TR/html5/syntax.html#before-attribute-value-state
3699 tok_state_before_attribute_value = ->
3700 switch c = txt.charAt(cur++)
3701 when "\t", "\n", "\u000c", ' '
3704 tok_state = tok_state_attribute_value_double_quoted
3706 tok_state = tok_state_attribute_value_unquoted
3709 tok_state = tok_state_attribute_value_single_quoted
3712 tok_cur_tag.attrs_a[0][1] += "\ufffd"
3713 tok_state = tok_state_attribute_value_unquoted
3716 tok_state = tok_state_data
3722 tok_state = tok_state_data
3724 tok_cur_tag.attrs_a[0][1] += c
3725 tok_state = tok_state_attribute_value_unquoted
3728 # 8.2.4.38 http://www.w3.org/TR/html5/syntax.html#attribute-value-(double-quoted)-state
3729 tok_state_attribute_value_double_quoted = ->
3730 switch c = txt.charAt(cur++)
3732 tok_state = tok_state_after_attribute_value_quoted
3734 tok_cur_tag.attrs_a[0][1] += parse_character_reference '"', true
3737 tok_cur_tag.attrs_a[0][1] += "\ufffd"
3740 tok_state = tok_state_data
3742 tok_cur_tag.attrs_a[0][1] += c
3745 # 8.2.4.39 http://www.w3.org/TR/html5/syntax.html#attribute-value-(single-quoted)-state
3746 tok_state_attribute_value_single_quoted = ->
3747 switch c = txt.charAt(cur++)
3749 tok_state = tok_state_after_attribute_value_quoted
3751 tok_cur_tag.attrs_a[0][1] += parse_character_reference "'", true
3754 tok_cur_tag.attrs_a[0][1] += "\ufffd"
3757 tok_state = tok_state_data
3759 tok_cur_tag.attrs_a[0][1] += c
3762 # 8.2.4.40 http://www.w3.org/TR/html5/syntax.html#attribute-value-(unquoted)-state
3763 tok_state_attribute_value_unquoted = ->
3764 switch c = txt.charAt(cur++)
3765 when "\t", "\n", "\u000c", ' '
3766 tok_state = tok_state_before_attribute_name
3768 tok_cur_tag.attrs_a[0][1] += parse_character_reference '>', true
3770 tok_state = tok_state_data
3775 tok_cur_tag.attrs_a[0][1] += "\ufffd"
3778 tok_state = tok_state_data
3780 # Parse Error if ', <, = or ` (backtick)
3781 tok_cur_tag.attrs_a[0][1] += c
3784 # 8.2.4.42 http://www.w3.org/TR/html5/syntax.html#after-attribute-value-(quoted)-state
3785 tok_state_after_attribute_value_quoted = ->
3786 switch c = txt.charAt(cur++)
3787 when "\t", "\n", "\u000c", ' '
3788 tok_state = tok_state_before_attribute_name
3790 tok_state = tok_state_self_closing_start_tag
3792 tok_state = tok_state_data
3798 tok_state = tok_state_data
3801 tok_state = tok_state_before_attribute_name
3802 cur -= 1 # we didn't handle that char
3805 # 8.2.4.43 http://www.w3.org/TR/html5/syntax.html#self-closing-start-tag-state
3806 tok_state_self_closing_start_tag = ->
3807 c = txt.charAt(cur++)
3809 tok_cur_tag.flag 'self-closing', true
3810 tok_state = tok_state_data
3814 tok_state = tok_state_data
3815 cur -= 1 # Reconsume
3819 tok_state = tok_state_before_attribute_name
3820 cur -= 1 # Reconsume
3823 # 8.2.4.44 http://www.w3.org/TR/html5/syntax.html#bogus-comment-state
3824 # WARNING: put a comment token in tok_cur_tag before setting this state
3825 tok_state_bogus_comment = ->
3826 next_gt = txt.indexOf '>', cur
3828 val = txt.substr cur
3831 val = txt.substr cur, (next_gt - cur)
3833 val = val.replace(new RegExp("\u0000", 'g'), "\ufffd")
3834 tok_cur_tag.text += val
3835 tok_state = tok_state_data
3838 # 8.2.4.45 http://www.w3.org/TR/html5/syntax.html#markup-declaration-open-state
3839 tok_state_markup_declaration_open = ->
3840 if txt.substr(cur, 2) is '--'
3842 tok_cur_tag = new_comment_token ''
3843 tok_state = tok_state_comment_start
3845 if txt.substr(cur, 7).toLowerCase() is 'doctype'
3847 tok_state = tok_state_doctype
3849 acn = adjusted_current_node()
3850 if acn and acn.namespace isnt NS_HTML and txt.substr(cur, 7) is '[CDATA['
3852 tok_state = tok_state_cdata_section
3856 tok_cur_tag = new_comment_token ''
3857 tok_state = tok_state_bogus_comment
3860 # 8.2.4.46 http://www.w3.org/TR/html5/syntax.html#comment-start-state
3861 tok_state_comment_start = ->
3862 switch c = txt.charAt(cur++)
3864 tok_state = tok_state_comment_start_dash
3867 tok_state = tok_state_comment
3868 return new_character_token "\ufffd"
3871 tok_state = tok_state_data
3875 tok_state = tok_state_data
3876 cur -= 1 # Reconsume
3879 tok_cur_tag.text += c
3880 tok_state = tok_state_comment
3883 # 8.2.4.47 http://www.w3.org/TR/html5/syntax.html#comment-start-dash-state
3884 tok_state_comment_start_dash = ->
3885 switch c = txt.charAt(cur++)
3887 tok_state = tok_state_comment_end
3890 tok_cur_tag.text += "-\ufffd"
3891 tok_state = tok_state_comment
3894 tok_state = tok_state_data
3898 tok_state = tok_state_data
3899 cur -= 1 # Reconsume
3902 tok_cur_tag.text += "-#{c}"
3903 tok_state = tok_state_comment
3906 # 8.2.4.48 http://www.w3.org/TR/html5/syntax.html#comment-state
3907 tok_state_comment = ->
3908 switch c = txt.charAt(cur++)
3910 tok_state = tok_state_comment_end_dash
3913 tok_cur_tag.text += "\ufffd"
3916 tok_state = tok_state_data
3917 cur -= 1 # Reconsume
3920 tok_cur_tag.text += c
3923 # 8.2.4.49 http://www.w3.org/TR/html5/syntax.html#comment-end-dash-state
3924 tok_state_comment_end_dash = ->
3925 switch c = txt.charAt(cur++)
3927 tok_state = tok_state_comment_end
3930 tok_cur_tag.text += "-\ufffd"
3931 tok_state = tok_state_comment
3934 tok_state = tok_state_data
3935 cur -= 1 # Reconsume
3938 tok_cur_tag.text += "-#{c}"
3939 tok_state = tok_state_comment
3942 # 8.2.4.50 http://www.w3.org/TR/html5/syntax.html#comment-end-state
3943 tok_state_comment_end = ->
3944 switch c = txt.charAt(cur++)
3946 tok_state = tok_state_data
3950 tok_cur_tag.text += "--\ufffd"
3951 tok_state = tok_state_comment
3954 tok_state = tok_state_comment_end_bang
3957 tok_cur_tag.text += '-'
3960 tok_state = tok_state_data
3961 cur -= 1 # Reconsume
3965 tok_cur_tag.text += "--#{c}"
3966 tok_state = tok_state_comment
3969 # 8.2.4.51 http://www.w3.org/TR/html5/syntax.html#comment-end-bang-state
3970 tok_state_comment_end_bang = ->
3971 switch c = txt.charAt(cur++)
3973 tok_cur_tag.text += "--!#{c}"
3974 tok_state = tok_state_comment_end_dash
3976 tok_state = tok_state_data
3980 tok_cur_tag.text += "--!\ufffd"
3981 tok_state = tok_state_comment
3984 tok_state = tok_state_data
3985 cur -= 1 # Reconsume
3988 tok_cur_tag.text += "--!#{c}"
3989 tok_state = tok_state_comment
3992 # 8.2.4.52 http://www.w3.org/TR/html5/syntax.html#doctype-state
3993 tok_state_doctype = ->
3994 switch c = txt.charAt(cur++)
3995 when "\t", "\u000a", "\u000c", ' '
3996 tok_state = tok_state_before_doctype_name
3999 tok_state = tok_state_data
4000 el = new_doctype_token ''
4001 el.flag 'force-quirks', true
4002 cur -= 1 # Reconsume
4006 tok_state = tok_state_before_doctype_name
4007 cur -= 1 # Reconsume
4010 # 8.2.4.52 http://www.w3.org/TR/html5/syntax.html#doctype-state
4011 tok_state_before_doctype_name = ->
4012 c = txt.charAt(cur++)
4013 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4016 tok_cur_tag = new_doctype_token c.toLowerCase()
4017 tok_state = tok_state_doctype_name
4021 tok_cur_tag = new_doctype_token "\ufffd"
4022 tok_state = tok_state_doctype_name
4026 el = new_doctype_token ''
4027 el.flag 'force-quirks', true
4028 tok_state = tok_state_data
4032 tok_state = tok_state_data
4033 el = new_doctype_token ''
4034 el.flag 'force-quirks', true
4035 cur -= 1 # Reconsume
4038 tok_cur_tag = new_doctype_token c
4039 tok_state = tok_state_doctype_name
4042 # 8.2.4.54 http://www.w3.org/TR/html5/syntax.html#doctype-name-state
4043 tok_state_doctype_name = ->
4044 c = txt.charAt(cur++)
4045 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4046 tok_state = tok_state_after_doctype_name
4049 tok_state = tok_state_data
4052 tok_cur_tag.name += c.toLowerCase()
4056 tok_cur_tag.name += "\ufffd"
4060 tok_state = tok_state_data
4061 tok_cur_tag.flag 'force-quirks', true
4062 cur -= 1 # Reconsume
4065 tok_cur_tag.name += c
4068 # 8.2.4.55 http://www.w3.org/TR/html5/syntax.html#after-doctype-name-state
4069 tok_state_after_doctype_name = ->
4070 c = txt.charAt(cur++)
4071 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4074 tok_state = tok_state_data
4078 tok_state = tok_state_data
4079 tok_cur_tag.flag 'force-quirks', true
4080 cur -= 1 # Reconsume
4083 if txt.substr(cur - 1, 6).toLowerCase() is 'public'
4085 tok_state = tok_state_after_doctype_public_keyword
4087 if txt.substr(cur - 1, 6).toLowerCase() is 'system'
4089 tok_state = tok_state_after_doctype_system_keyword
4092 tok_cur_tag.flag 'force-quirks', true
4093 tok_state = tok_state_bogus_doctype
4096 # 8.2.4.56 http://www.w3.org/TR/html5/syntax.html#after-doctype-public-keyword-state
4097 tok_state_after_doctype_public_keyword = ->
4098 c = txt.charAt(cur++)
4099 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4100 tok_state = tok_state_before_doctype_public_identifier
4104 tok_cur_tag.public_identifier = ''
4105 tok_state = tok_state_doctype_public_identifier_double_quoted
4109 tok_cur_tag.public_identifier = ''
4110 tok_state = tok_state_doctype_public_identifier_single_quoted
4114 tok_cur_tag.flag 'force-quirks', true
4115 tok_state = tok_state_data
4119 tok_state = tok_state_data
4120 tok_cur_tag.flag 'force-quirks', true
4121 cur -= 1 # Reconsume
4125 tok_cur_tag.flag 'force-quirks', true
4126 tok_state = tok_state_bogus_doctype
4129 # 8.2.4.57 http://www.w3.org/TR/html5/syntax.html#before-doctype-public-identifier-state
4130 tok_state_before_doctype_public_identifier = ->
4131 c = txt.charAt(cur++)
4132 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4136 tok_cur_tag.public_identifier = ''
4137 tok_state = tok_state_doctype_public_identifier_double_quoted
4141 tok_cur_tag.public_identifier = ''
4142 tok_state = tok_state_doctype_public_identifier_single_quoted
4146 tok_cur_tag.flag 'force-quirks', true
4147 tok_state = tok_state_data
4151 tok_state = tok_state_data
4152 tok_cur_tag.flag 'force-quirks', true
4153 cur -= 1 # Reconsume
4157 tok_cur_tag.flag 'force-quirks', true
4158 tok_state = tok_state_bogus_doctype
4162 # 8.2.4.58 http://www.w3.org/TR/html5/syntax.html#doctype-public-identifier-(double-quoted)-state
4163 tok_state_doctype_public_identifier_double_quoted = ->
4164 c = txt.charAt(cur++)
4166 tok_state = tok_state_after_doctype_public_identifier
4170 tok_cur_tag.public_identifier += "\ufffd"
4174 tok_cur_tag.flag 'force-quirks', true
4175 tok_state = tok_state_data
4179 tok_state = tok_state_data
4180 tok_cur_tag.flag 'force-quirks', true
4181 cur -= 1 # Reconsume
4184 tok_cur_tag.public_identifier += c
4187 # 8.2.4.59 http://www.w3.org/TR/html5/syntax.html#doctype-public-identifier-(single-quoted)-state
4188 tok_state_doctype_public_identifier_single_quoted = ->
4189 c = txt.charAt(cur++)
4191 tok_state = tok_state_after_doctype_public_identifier
4195 tok_cur_tag.public_identifier += "\ufffd"
4199 tok_cur_tag.flag 'force-quirks', true
4200 tok_state = tok_state_data
4204 tok_state = tok_state_data
4205 tok_cur_tag.flag 'force-quirks', true
4206 cur -= 1 # Reconsume
4209 tok_cur_tag.public_identifier += c
4212 # 8.2.4.60 http://www.w3.org/TR/html5/syntax.html#after-doctype-public-identifier-state
4213 tok_state_after_doctype_public_identifier = ->
4214 c = txt.charAt(cur++)
4215 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4216 tok_state = tok_state_between_doctype_public_and_system_identifiers
4219 tok_state = tok_state_data
4223 tok_cur_tag.system_identifier = ''
4224 tok_state = tok_state_doctype_system_identifier_double_quoted
4228 tok_cur_tag.system_identifier = ''
4229 tok_state = tok_state_doctype_system_identifier_single_quoted
4233 tok_state = tok_state_data
4234 tok_cur_tag.flag 'force-quirks', true
4235 cur -= 1 # Reconsume
4239 tok_cur_tag.flag 'force-quirks', true
4240 tok_state = tok_state_bogus_doctype
4243 # 8.2.4.61 http://www.w3.org/TR/html5/syntax.html#between-doctype-public-and-system-identifiers-state
4244 tok_state_between_doctype_public_and_system_identifiers = ->
4245 c = txt.charAt(cur++)
4246 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4249 tok_state = tok_state_data
4253 tok_cur_tag.system_identifier = ''
4254 tok_state = tok_state_doctype_system_identifier_double_quoted
4258 tok_cur_tag.system_identifier = ''
4259 tok_state = tok_state_doctype_system_identifier_single_quoted
4263 tok_state = tok_state_data
4264 tok_cur_tag.flag 'force-quirks', true
4265 cur -= 1 # Reconsume
4269 tok_cur_tag.flag 'force-quirks', true
4270 tok_state = tok_state_bogus_doctype
4273 # 8.2.4.62 http://www.w3.org/TR/html5/syntax.html#after-doctype-system-keyword-state
4274 tok_state_after_doctype_system_keyword = ->
4275 c = txt.charAt(cur++)
4276 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4277 tok_state = tok_state_before_doctype_system_identifier
4281 tok_cur_tag.system_identifier = ''
4282 tok_state = tok_state_doctype_system_identifier_double_quoted
4286 tok_cur_tag.system_identifier = ''
4287 tok_state = tok_state_doctype_system_identifier_single_quoted
4291 tok_cur_tag.flag 'force-quirks', true
4292 tok_state = tok_state_data
4296 tok_state = tok_state_data
4297 tok_cur_tag.flag 'force-quirks', true
4298 cur -= 1 # Reconsume
4302 tok_cur_tag.flag 'force-quirks', true
4303 tok_state = tok_state_bogus_doctype
4306 # 8.2.4.63 http://www.w3.org/TR/html5/syntax.html#before-doctype-system-identifier-state
4307 tok_state_before_doctype_system_identifier = ->
4308 c = txt.charAt(cur++)
4309 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4312 tok_cur_tag.system_identifier = ''
4313 tok_state = tok_state_doctype_system_identifier_double_quoted
4316 tok_cur_tag.system_identifier = ''
4317 tok_state = tok_state_doctype_system_identifier_single_quoted
4321 tok_cur_tag.flag 'force-quirks', true
4322 tok_state = tok_state_data
4326 tok_state = tok_state_data
4327 tok_cur_tag.flag 'force-quirks', true
4328 cur -= 1 # Reconsume
4332 tok_cur_tag.flag 'force-quirks', true
4333 tok_state = tok_state_bogus_doctype
4336 # 8.2.4.64 http://www.w3.org/TR/html5/syntax.html#doctype-system-identifier-(double-quoted)-state
4337 tok_state_doctype_system_identifier_double_quoted = ->
4338 c = txt.charAt(cur++)
4340 tok_state = tok_state_after_doctype_system_identifier
4344 tok_cur_tag.system_identifier += "\ufffd"
4348 tok_cur_tag.flag 'force-quirks', true
4349 tok_state = tok_state_data
4353 tok_state = tok_state_data
4354 tok_cur_tag.flag 'force-quirks', true
4355 cur -= 1 # Reconsume
4358 tok_cur_tag.system_identifier += c
4361 # 8.2.4.65 http://www.w3.org/TR/html5/syntax.html#doctype-system-identifier-(single-quoted)-state
4362 tok_state_doctype_system_identifier_single_quoted = ->
4363 c = txt.charAt(cur++)
4365 tok_state = tok_state_after_doctype_system_identifier
4369 tok_cur_tag.system_identifier += "\ufffd"
4373 tok_cur_tag.flag 'force-quirks', true
4374 tok_state = tok_state_data
4378 tok_state = tok_state_data
4379 tok_cur_tag.flag 'force-quirks', true
4380 cur -= 1 # Reconsume
4383 tok_cur_tag.system_identifier += c
4386 # 8.2.4.66 http://www.w3.org/TR/html5/syntax.html#after-doctype-system-identifier-state
4387 tok_state_after_doctype_system_identifier = ->
4388 c = txt.charAt(cur++)
4389 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4392 tok_state = tok_state_data
4396 tok_state = tok_state_data
4397 tok_cur_tag.flag 'force-quirks', true
4398 cur -= 1 # Reconsume
4402 # do _not_ tok_cur_tag.flag 'force-quirks', true
4403 tok_state = tok_state_bogus_doctype
4406 # 8.2.4.67 http://www.w3.org/TR/html5/syntax.html#bogus-doctype-state
4407 tok_state_bogus_doctype = ->
4408 c = txt.charAt(cur++)
4410 tok_state = tok_state_data
4413 tok_state = tok_state_data
4414 cur -= 1 # Reconsume
4419 # 8.2.4.68 http://www.w3.org/TR/html5/syntax.html#cdata-section-state
4420 tok_state_cdata_section = ->
4421 tok_state = tok_state_data
4422 next_gt = txt.indexOf ']]>', cur
4424 val = txt.substr cur
4427 val = txt.substr cur, (next_gt - cur)
4429 return new_character_token val # fixfull split
4431 # 8.2.4.69 http://www.w3.org/TR/html5/syntax.html#consume-a-character-reference
4432 # Don't set this as a state, just call it
4433 # returns a string (NOT a text node)
4434 parse_character_reference = (allowed_char = null, in_attr = false) ->
4435 if cur >= txt.length
4437 switch c = txt.charAt(cur)
4438 when "\t", "\n", "\u000c", ' ', '<', '&', '', allowed_char
4439 # explicitly not a parse error
4442 # there has to be "one or more" alnums between & and ; to be a parse error
4445 if cur + 1 >= txt.length
4447 if txt.charAt(cur + 1).toLowerCase() is 'x'
4456 while start + i < txt.length and charset.indexOf(txt.charAt(start + i)) > -1
4461 if txt.charAt(start + i) is ';'
4465 code_point = txt.substr(start, i)
4466 while code_point.charAt(0) is '0' and code_point.length > 1
4467 code_point = code_point.substr 1
4468 code_point = parseInt(code_point, base)
4469 if unicode_fixes[code_point]?
4471 return unicode_fixes[code_point]
4473 if (code_point >= 0xd800 and code_point <= 0xdfff) or code_point > 0x10ffff
4477 if (code_point >= 0x0001 and code_point <= 0x0008) or (code_point >= 0x000D and code_point <= 0x001F) or (code_point >= 0x007F and code_point <= 0x009F) or (code_point >= 0xFDD0 and code_point <= 0xFDEF) or code_point is 0x000B or code_point is 0xFFFE or code_point is 0xFFFF or code_point is 0x1FFFE or code_point is 0x1FFFF or code_point is 0x2FFFE or code_point is 0x2FFFF or code_point is 0x3FFFE or code_point is 0x3FFFF or code_point is 0x4FFFE or code_point is 0x4FFFF or code_point is 0x5FFFE or code_point is 0x5FFFF or code_point is 0x6FFFE or code_point is 0x6FFFF or code_point is 0x7FFFE or code_point is 0x7FFFF or code_point is 0x8FFFE or code_point is 0x8FFFF or code_point is 0x9FFFE or code_point is 0x9FFFF or code_point is 0xAFFFE or code_point is 0xAFFFF or code_point is 0xBFFFE or code_point is 0xBFFFF or code_point is 0xCFFFE or code_point is 0xCFFFF or code_point is 0xDFFFE or code_point is 0xDFFFF or code_point is 0xEFFFE or code_point is 0xEFFFF or code_point is 0xFFFFE or code_point is 0xFFFFF or code_point is 0x10FFFE or code_point is 0x10FFFF
4479 return from_code_point code_point
4483 if alnum.indexOf(txt.charAt(cur + i)) is -1
4486 # exit early, because parse_error() below needs at least one alnum
4488 if txt.charAt(cur + i) is ';'
4489 i += 1 # include ';' terminator in value
4490 decoded = decode_named_char_ref txt.substr(cur, i)
4497 # no ';' terminator (only legacy char refs)
4499 for i in [2..max] # no prefix matches, so ok to check shortest first
4500 c = legacy_char_refs[txt.substr(cur, i)]
4503 if txt.charAt(cur + i) is '='
4504 # "because some legacy user agents will
4505 # misinterpret the markup in those cases"
4508 if alnum.indexOf(txt.charAt(cur + i)) > -1
4509 # this makes attributes forgiving about url args
4511 # ok, and besides the weird exceptions for attributes...
4512 # return the matching char
4513 cur += i # consume entity chars
4514 parse_error() # because no terminating ";"
4518 return # never reached
4520 # tree constructor initialization
4521 # see comments on TYPE_TAG/etc for the structure of this data
4524 doc = new Node TYPE_TAG, name: 'html', namespace: NS_HTML
4526 afe = [] # active formatting elements
4527 template_ins_modes = []
4528 ins_mode = ins_mode_initial
4529 original_ins_mode = ins_mode # TODO check spec
4530 flag_scripting = args.scripting ? true # TODO might need an extra flag to get <noscript> to parse correctly
4531 flag_frameset_ok = true
4533 flag_foster_parenting = false
4534 form_element_pointer = null
4535 temporary_buffer = null
4536 pending_table_character_tokens = []
4537 head_element_pointer = null
4538 flag_fragment_parsing = false # parser originally created as part of the html fragment parsing algorithm (fragment case)
4539 context_element = null # FIXME initialize from args.fragment http://www.w3.org/TR/html5/syntax.html#parsing-html-fragments
4540 prev_node_id = 0 # just for debugging
4542 # tokenizer initialization
4543 tok_state = tok_state_data
4545 # text pre-processing
4546 # FIXME http://www.w3.org/TR/html5/syntax.html#preprocessing-the-input-stream
4547 txt = txt.replace(new RegExp("\u0000", 'g'), "\ufffd") # fixfull spec doesn't say this
4548 txt = txt.replace(new RegExp("\r\n", 'g'), "\n") # fixfull spec doesn't say this
4549 txt = txt.replace(new RegExp("\r", 'g'), "\n") # fixfull spec doesn't say this
4551 if args.name is "tests18.dat #17"
4554 # http://www.w3.org/TR/html5/syntax.html#tree-construction
4559 # fixfull parse error if has self-closing flag, but it wasn't acknolwedged
4562 serialize_els = (els, shallow, show_ids) ->
4568 serialized += t.serialize shallow, show_ids
4571 module.exports.parse_html = parse_html
4572 module.exports.debug_log_reset = debug_log_reset
4573 module.exports.debug_log_each = debug_log_each
4574 module.exports.TYPE_TAG = TYPE_TAG
4575 module.exports.TYPE_TEXT = TYPE_TEXT
4576 module.exports.TYPE_COMMENT = TYPE_COMMENT
4577 module.exports.TYPE_DOCTYPE = TYPE_DOCTYPE
4578 module.exports.NS_HTML = NS_HTML
4579 module.exports.NS_MATHML = NS_MATHML
4580 module.exports.NS_SVG = NS_SVG