JasonWoof Got questions, comments, patches, etc.? Contact Jason Woofenden
replace del key hack with one that uses backspace code
[peach-html5-editor.git] / parser.coffee
1 # Copyright 2015 Jason Woofenden
2 # This file implements an HTML5 parser
3 #
4 # This program is free software: you can redistribute it and/or modify it under
5 # the terms of the GNU Affero General Public License as published by the Free
6 # Software Foundation, either version 3 of the License, or (at your option) any
7 # later version.
8 #
9 # This program is distributed in the hope that it will be useful, but WITHOUT
10 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11 # FOR A PARTICULAR PURPOSE.  See the GNU Affero General Public License for more
12 # details.
13 #
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
16
17
18 # This file implements a thorough parser for html5, meant to be used by a
19 # WYSIWYG editor.
20
21 # The implementation is a pretty direct implementation of the parsing algorithm
22 # described here:
23 #
24 #     http://www.w3.org/TR/html5/syntax.html
25 #
26 # except for some places marked "WHATWG" that are implemented as described here:
27 #
28 #     https://html.spec.whatwg.org/multipage/syntax.html
29 #
30 # This code passes all of the tests in the .dat files at:
31 #
32 #     https://github.com/JasonWoof/html5lib-tests/tree/patch-1/tree-construction
33
34
35 ##################################
36 ## how to use this code
37 ##################################
38 #
39 # See README.md for how to run this file in the browser or in node.js.
40 #
41 # This file exports a single useful function: parse_tml, and some constants
42 # (see the bottom of this file for those.)
43 #
44 # Call it like this:
45 #
46 #     peach_parser.parse("<p><b>hi</p>")
47 #
48 # Or, if you don't want <html><head><body>/etc, do this:
49 #
50 #     peach_parser.parse("<p><b>hi</p>", {fragment: "body"})
51 #
52 # return value is an array of Nodes, see "class Node" below.
53
54 # This code is a work in progress, eg try search this file for "fixfull",
55 # "TODO" and "FIXME"
56
57
58 # Notes:  stacks/lists
59 #
60 # Jason was frequently confused by the terminology used to refer to different
61 # parts of the stacks and lists in the spec, so he made this chart to help keep
62 # his head straight:
63 #
64 # stacks grow downward (current element is index=0)
65 #
66 # example: open_els = [a, b, c, d, e, f, g]
67 #
68 # "grows downwards" means it's visualized like this: (index: el "names")
69 #
70 #   6: g "start of the list", "topmost", "first"
71 #   5: f
72 #   4: e "previous" (to d), "above", "before"
73 #   3: d   (previous/next are relative to this element)
74 #   2: c "next", "after", "lower", "below"
75 #   1: b
76 #   0: a "end of the list", "current node", "bottommost", "last"
77
78 if (typeof module) isnt 'undefined' and module.exports?
79         context = 'module'
80         exports = module.exports
81 else
82         context = 'browser'
83         window.peach_parser = {}
84         exports = window.peach_parser
85
86 from_code_point = (x) ->
87         if String.fromCodePoint?
88                 return String.fromCodePoint x
89         else
90                 if x <= 0xffff
91                         return String.fromCharCode x
92                 x -= 0x10000
93                 return String.fromCharCode((x >> 10) + 0xd800, (x % 0x400) + 0xdc00)
94
95 # Each node is an obect of the Node class. Here are the Node types:
96 TYPE_TAG = 'tag' # name, {attributes}, [children]
97 TYPE_TEXT = 'text' # "text"
98 TYPE_COMMENT = 'comment'
99 TYPE_DOCTYPE = 'doctype'
100 # the following types are emited by the tokenizer, but shouldn't end up in the tree:
101 TYPE_START_TAG = 4 # name, [attributes ([key,value]...) in reverse order], [children]
102 TYPE_END_TAG = 5 # name
103 TYPE_EOF = 6
104 TYPE_AFE_MARKER = 7 # http://www.w3.org/TR/html5/syntax.html#reconstruct-the-active-formatting-elements
105 TYPE_AAA_BOOKMARK = 8 # http://www.w3.org/TR/html5/syntax.html#adoption-agency-algorithm
106
107 # namespace constants
108 NS_HTML = 'html'
109 NS_MATHML = 'mathml'
110 NS_SVG = 'svg'
111
112 # quirks mode constants
113 QUIRKS_NO = 'no'
114 QUIRKS_LIMITED = 'limited'
115 QUIRKS_YES = 'yes'
116
117 # queue up debug logs, so eg they can be shown only for tests that fail
118 g_debug_log = []
119 debug_log_reset = ->
120         g_debug_log = []
121         return
122 debug_log = (str) ->
123         g_debug_log.push str
124         return
125 debug_log_each = (cb) ->
126         for str in g_debug_log
127                 cb str
128         return
129
130 prev_node_id = 0
131 class Node
132         constructor: (type, args = {}) ->
133                 @type = type # one of the TYPE_* constants above
134                 @name = args.name ? '' # tag name
135                 @text = args.text ? '' # contents for text/comment nodes
136                 @attrs = args.attrs ? {}
137                 @attrs_a = args.attr_k ? [] # attrs in progress, TYPE_START_TAG only
138                 @children = args.children ? []
139                 @namespace = args.namespace ? NS_HTML
140                 @parent = args.parent ? null
141                 @token = args.token ? null
142                 @flags = args.flags ? {}
143                 if args.id?
144                         @id = "#{args.id}+"
145                 else
146                         @id = "#{++prev_node_id}"
147         acknowledge_self_closing: ->
148                 if @token?
149                         @token.flag 'did_self_close', true
150                 else
151                         @flag 'did_self_close', true
152                 return
153         flag: (key, value = null) ->
154                 if value?
155                         @flags[key] = value
156                 else
157                         return @flags[key]
158                 return
159
160 # helpers: (only take args that are normally known when parser creates nodes)
161 new_open_tag = (name) ->
162         return new Node TYPE_START_TAG, name: name
163 new_end_tag = (name) ->
164         return new Node TYPE_END_TAG, name: name
165 new_element = (name) ->
166         return new Node TYPE_TAG, name: name
167 new_text_node = (txt) ->
168         return new Node TYPE_TEXT, text: txt
169 new_character_token = new_text_node
170 new_comment_token = (txt) ->
171         return new Node TYPE_COMMENT, text: txt
172 new_doctype_token = (name) ->
173         return new Node TYPE_DOCTYPE, name: name
174 new_eof_token = ->
175         return new Node TYPE_EOF
176 new_afe_marker = ->
177         return new Node TYPE_AFE_MARKER
178 new_aaa_bookmark = ->
179         return new Node TYPE_AAA_BOOKMARK
180
181 lc_alpha = "abcdefghijklmnopqrstuvwxyz"
182 uc_alpha = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
183 digits = "0123456789"
184 alnum = lc_alpha + uc_alpha + digits
185 hex_chars = digits + "abcdefABCDEF"
186
187 is_uc_alpha = (str) ->
188         return str.length is 1 and uc_alpha.indexOf(str) > -1
189 is_lc_alpha = (str) ->
190         return str.length is 1 and lc_alpha.indexOf(str) > -1
191
192 # some SVG elements have dashes in them
193 tag_name_chars = alnum + "-"
194
195 # http://www.w3.org/TR/html5/infrastructure.html#space-character
196 space_chars = "\u0009\u000a\u000c\u000d\u0020"
197 is_space = (txt) ->
198         return txt.length is 1 and space_chars.indexOf(txt) > -1
199 is_space_tok = (t) ->
200         return t.type is TYPE_TEXT && t.text.length is 1 and space_chars.indexOf(t.text) > -1
201
202 is_input_hidden_tok = (t) ->
203         return false unless t.type is TYPE_START_TAG
204         for a in t.attrs_a
205                 if a[0] is 'type'
206                         if a[1].toLowerCase() is 'hidden'
207                                 return true
208                         return false
209         return false
210
211 # https://en.wikipedia.org/wiki/Whitespace_character#Unicode
212 whitespace_chars = "\u0009\u000a\u000b\u000c\u000d\u0020\u0085\u00a0\u1680\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200a\u2028\u2029\u202f\u205f\u3000"
213
214 unicode_fixes = {}
215 unicode_fixes[0x00] = "\uFFFD"
216 unicode_fixes[0x80] = "\u20AC"
217 unicode_fixes[0x82] = "\u201A"
218 unicode_fixes[0x83] = "\u0192"
219 unicode_fixes[0x84] = "\u201E"
220 unicode_fixes[0x85] = "\u2026"
221 unicode_fixes[0x86] = "\u2020"
222 unicode_fixes[0x87] = "\u2021"
223 unicode_fixes[0x88] = "\u02C6"
224 unicode_fixes[0x89] = "\u2030"
225 unicode_fixes[0x8A] = "\u0160"
226 unicode_fixes[0x8B] = "\u2039"
227 unicode_fixes[0x8C] = "\u0152"
228 unicode_fixes[0x8E] = "\u017D"
229 unicode_fixes[0x91] = "\u2018"
230 unicode_fixes[0x92] = "\u2019"
231 unicode_fixes[0x93] = "\u201C"
232 unicode_fixes[0x94] = "\u201D"
233 unicode_fixes[0x95] = "\u2022"
234 unicode_fixes[0x96] = "\u2013"
235 unicode_fixes[0x97] = "\u2014"
236 unicode_fixes[0x98] = "\u02DC"
237 unicode_fixes[0x99] = "\u2122"
238 unicode_fixes[0x9A] = "\u0161"
239 unicode_fixes[0x9B] = "\u203A"
240 unicode_fixes[0x9C] = "\u0153"
241 unicode_fixes[0x9E] = "\u017E"
242 unicode_fixes[0x9F] = "\u0178"
243
244 quirks_yes_pi_prefixes = [
245         "+//silmaril//dtd html pro v0r11 19970101//"
246         "-//as//dtd html 3.0 aswedit + extensions//"
247         "-//advasoft ltd//dtd html 3.0 aswedit + extensions//"
248         "-//ietf//dtd html 2.0 level 1//"
249         "-//ietf//dtd html 2.0 level 2//"
250         "-//ietf//dtd html 2.0 strict level 1//"
251         "-//ietf//dtd html 2.0 strict level 2//"
252         "-//ietf//dtd html 2.0 strict//"
253         "-//ietf//dtd html 2.0//"
254         "-//ietf//dtd html 2.1e//"
255         "-//ietf//dtd html 3.0//"
256         "-//ietf//dtd html 3.2 final//"
257         "-//ietf//dtd html 3.2//"
258         "-//ietf//dtd html 3//"
259         "-//ietf//dtd html level 0//"
260         "-//ietf//dtd html level 1//"
261         "-//ietf//dtd html level 2//"
262         "-//ietf//dtd html level 3//"
263         "-//ietf//dtd html strict level 0//"
264         "-//ietf//dtd html strict level 1//"
265         "-//ietf//dtd html strict level 2//"
266         "-//ietf//dtd html strict level 3//"
267         "-//ietf//dtd html strict//"
268         "-//ietf//dtd html//"
269         "-//metrius//dtd metrius presentational//"
270         "-//microsoft//dtd internet explorer 2.0 html strict//"
271         "-//microsoft//dtd internet explorer 2.0 html//"
272         "-//microsoft//dtd internet explorer 2.0 tables//"
273         "-//microsoft//dtd internet explorer 3.0 html strict//"
274         "-//microsoft//dtd internet explorer 3.0 html//"
275         "-//microsoft//dtd internet explorer 3.0 tables//"
276         "-//netscape comm. corp.//dtd html//"
277         "-//netscape comm. corp.//dtd strict html//"
278         "-//o'reilly and associates//dtd html 2.0//"
279         "-//o'reilly and associates//dtd html extended 1.0//"
280         "-//o'reilly and associates//dtd html extended relaxed 1.0//"
281         "-//sq//dtd html 2.0 hotmetal + extensions//"
282         "-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//"
283         "-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//"
284         "-//spyglass//dtd html 2.0 extended//"
285         "-//sun microsystems corp.//dtd hotjava html//"
286         "-//sun microsystems corp.//dtd hotjava strict html//"
287         "-//w3c//dtd html 3 1995-03-24//"
288         "-//w3c//dtd html 3.2 draft//"
289         "-//w3c//dtd html 3.2 final//"
290         "-//w3c//dtd html 3.2//"
291         "-//w3c//dtd html 3.2s draft//"
292         "-//w3c//dtd html 4.0 frameset//"
293         "-//w3c//dtd html 4.0 transitional//"
294         "-//w3c//dtd html experimental 19960712//"
295         "-//w3c//dtd html experimental 970421//"
296         "-//w3c//dtd w3 html//"
297         "-//w3o//dtd w3 html 3.0//"
298         "-//webtechs//dtd mozilla html 2.0//"
299         "-//webtechs//dtd mozilla html//"
300 ]
301
302 # These are the character references that don't need a terminating semicolon
303 # min length: 2, max: 6, none are a prefix of any other.
304 legacy_char_refs = {
305         Aacute: 'Á', aacute: 'á', Acirc: 'Â', acirc: 'â', acute: '´', AElig: 'Æ',
306         aelig: 'æ', Agrave: 'À', agrave: 'à', AMP: '&', amp: '&', Aring: 'Å',
307         aring: 'å', Atilde: 'Ã', atilde: 'ã', Auml: 'Ä', auml: 'ä', brvbar: '¦',
308         Ccedil: 'Ç', ccedil: 'ç', cedil: '¸', cent: '¢', COPY: '©', copy: '©',
309         curren: '¤', deg: '°', divide: '÷', Eacute: 'É', eacute: 'é', Ecirc: 'Ê',
310         ecirc: 'ê', Egrave: 'È', egrave: 'è', ETH: 'Ð', eth: 'ð', Euml: 'Ë',
311         euml: 'ë', frac12: '½', frac14: '¼', frac34: '¾', GT: '>', gt: '>',
312         Iacute: 'Í', iacute: 'í', Icirc: 'Î', icirc: 'î', iexcl: '¡', Igrave: 'Ì',
313         igrave: 'ì', iquest: '¿', Iuml: 'Ï', iuml: 'ï', laquo: '«', LT: '<',
314         lt: '<', macr: '¯', micro: 'µ', middot: '·', nbsp: "\u00a0", not: '¬',
315         Ntilde: 'Ñ', ntilde: 'ñ', Oacute: 'Ó', oacute: 'ó', Ocirc: 'Ô', ocirc: 'ô',
316         Ograve: 'Ò', ograve: 'ò', ordf: 'ª', ordm: 'º', Oslash: 'Ø', oslash: 'ø',
317         Otilde: 'Õ', otilde: 'õ', Ouml: 'Ö', ouml: 'ö', para: '¶', plusmn: '±',
318         pound: '£', QUOT: '"', quot: '"', raquo: '»', REG: '®', reg: '®', sect: '§',
319         shy: '­', sup1: '¹', sup2: '²', sup3: '³', szlig: 'ß', THORN: 'Þ', thorn: 'þ',
320         times: '×', Uacute: 'Ú', uacute: 'ú', Ucirc: 'Û', ucirc: 'û', Ugrave: 'Ù',
321         ugrave: 'ù', uml: '¨', Uuml: 'Ü', uuml: 'ü', Yacute: 'Ý', yacute: 'ý',
322         yen: '¥', yuml: 'ÿ'
323 }
324
325 #void_elements = ['area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input', 'keygen', 'link', 'meta', 'param', 'source', 'track', 'wbr']
326 #raw_text_elements = ['script', 'style']
327 #escapable_raw_text_elements = ['textarea', 'title']
328 # http://www.w3.org/TR/SVG/ 1.1 (Second Edition)
329 svg_elements = [
330         'a', 'altGlyph', 'altGlyphDef', 'altGlyphItem', 'animate', 'animateColor',
331         'animateMotion', 'animateTransform', 'circle', 'clipPath', 'color-profile',
332         'cursor', 'defs', 'desc', 'ellipse', 'feBlend', 'feColorMatrix',
333         'feComponentTransfer', 'feComposite', 'feConvolveMatrix',
334         'feDiffuseLighting', 'feDisplacementMap', 'feDistantLight', 'feFlood',
335         'feFuncA', 'feFuncB', 'feFuncG', 'feFuncR', 'feGaussianBlur', 'feImage',
336         'feMerge', 'feMergeNode', 'feMorphology', 'feOffset', 'fePointLight',
337         'feSpecularLighting', 'feSpotLight', 'feTile', 'feTurbulence', 'filter',
338         'font', 'font-face', 'font-face-format', 'font-face-name', 'font-face-src',
339         'font-face-uri', 'foreignObject', 'g', 'glyph', 'glyphRef', 'hkern',
340         'image', 'line', 'linearGradient', 'marker', 'mask', 'metadata',
341         'missing-glyph', 'mpath', 'path', 'pattern', 'polygon', 'polyline',
342         'radialGradient', 'rect', 'script', 'set', 'stop', 'style', 'svg',
343         'switch', 'symbol', 'text', 'textPath', 'title', 'tref', 'tspan', 'use',
344         'view', 'vkern'
345 ]
346
347 # http://www.w3.org/TR/MathML/ Version 3.0 2nd Edition
348 mathml_elements = [
349         'abs', 'and', 'annotation', 'annotation-xml', 'apply', 'approx', 'arccos',
350         'arccosh', 'arccot', 'arccoth', 'arccsc', 'arccsch', 'arcsec', 'arcsech',
351         'arcsin', 'arcsinh', 'arctan', 'arctanh', 'arg', 'bind', 'bvar', 'card',
352         'cartesianproduct', 'cbytes', 'ceiling', 'cerror', 'ci', 'cn', 'codomain',
353         'complexes', 'compose', 'condition', 'conjugate', 'cos', 'cosh', 'cot',
354         'coth', 'cs', 'csc', 'csch', 'csymbol', 'curl', 'declare', 'degree',
355         'determinant', 'diff', 'divergence', 'divide', 'domain',
356         'domainofapplication', 'emptyset', 'eq', 'equivalent', 'eulergamma',
357         'exists', 'exp', 'exponentiale', 'factorial', 'factorof', 'false', 'floor',
358         'fn', 'forall', 'gcd', 'geq', 'grad', 'gt', 'ident', 'image', 'imaginary',
359         'imaginaryi', 'implies', 'in', 'infinity', 'int', 'integers', 'intersect',
360         'interval', 'inverse', 'lambda', 'laplacian', 'lcm', 'leq', 'limit',
361         'list', 'ln', 'log', 'logbase', 'lowlimit', 'lt', 'maction', 'maligngroup',
362         'malignmark', 'math', 'matrix', 'matrixrow', 'max', 'mean', 'median',
363         'menclose', 'merror', 'mfenced', 'mfrac', 'mglyph', 'mi', 'mi', 'min',
364         'minus', 'mlabeledtr', 'mlongdiv', 'mmultiscripts', 'mn', 'mo', 'mode',
365         'moment', 'momentabout', 'mover', 'mpadded', 'mphantom', 'mprescripts',
366         'mroot', 'mrow', 'ms', 'mscarries', 'mscarry', 'msgroup', 'msline',
367         'mspace', 'msqrt', 'msrow', 'mstack', 'mstyle', 'msub', 'msubsup', 'msup',
368         'mtable', 'mtd', 'mtext', 'mtr', 'munder', 'munderover', 'naturalnumbers',
369         'neq', 'none', 'not', 'notanumber', 'notin', 'notprsubset', 'notsubset',
370         'or', 'otherwise', 'outerproduct', 'partialdiff', 'pi', 'piece',
371         'piecewise', 'plus', 'power', 'primes', 'product', 'prsubset', 'quotient',
372         'rationals', 'real', 'reals', 'reln', 'rem', 'root', 'scalarproduct',
373         'sdev', 'sec', 'sech', 'selector', 'semantics', 'sep', 'set', 'setdiff',
374         'share', 'sin', 'sinh', 'span', 'subset', 'sum', 'tan', 'tanh', 'tendsto',
375         'times', 'transpose', 'true', 'union', 'uplimit', 'variance', 'vector',
376         'vectorproduct', 'xor'
377 ]
378 # foreign_elements = [svg_elements..., mathml_elements...]
379 #normal_elements = All other allowed HTML elements are normal elements.
380
381 special_elements = {
382         # HTML:
383         address:NS_HTML, applet:NS_HTML, area:NS_HTML, article:NS_HTML,
384         aside:NS_HTML, base:NS_HTML, basefont:NS_HTML, bgsound:NS_HTML,
385         blockquote:NS_HTML, body:NS_HTML, br:NS_HTML, button:NS_HTML,
386         caption:NS_HTML, center:NS_HTML, col:NS_HTML, colgroup:NS_HTML, dd:NS_HTML,
387         details:NS_HTML, dir:NS_HTML, div:NS_HTML, dl:NS_HTML, dt:NS_HTML,
388         embed:NS_HTML, fieldset:NS_HTML, figcaption:NS_HTML, figure:NS_HTML,
389         footer:NS_HTML, form:NS_HTML, frame:NS_HTML, frameset:NS_HTML, h1:NS_HTML,
390         h2:NS_HTML, h3:NS_HTML, h4:NS_HTML, h5:NS_HTML, h6:NS_HTML, head:NS_HTML,
391         header:NS_HTML, hgroup:NS_HTML, hr:NS_HTML, html:NS_HTML, iframe:NS_HTML,
392         img:NS_HTML, input:NS_HTML, isindex:NS_HTML, li:NS_HTML, link:NS_HTML,
393         listing:NS_HTML, main:NS_HTML, marquee:NS_HTML,
394
395         menu:NS_HTML,menuitem:NS_HTML, # WHATWG adds these
396
397         meta:NS_HTML, nav:NS_HTML, noembed:NS_HTML, noframes:NS_HTML,
398         noscript:NS_HTML, object:NS_HTML, ol:NS_HTML, p:NS_HTML, param:NS_HTML,
399         plaintext:NS_HTML, pre:NS_HTML, script:NS_HTML, section:NS_HTML,
400         select:NS_HTML, source:NS_HTML, style:NS_HTML, summary:NS_HTML,
401         table:NS_HTML, tbody:NS_HTML, td:NS_HTML, template:NS_HTML,
402         textarea:NS_HTML, tfoot:NS_HTML, th:NS_HTML, thead:NS_HTML, title:NS_HTML,
403         tr:NS_HTML, track:NS_HTML, ul:NS_HTML, wbr:NS_HTML, xmp:NS_HTML,
404
405         # MathML:
406         mi:NS_MATHML, mo:NS_MATHML, mn:NS_MATHML, ms:NS_MATHML, mtext:NS_MATHML,
407         'annotation-xml':NS_MATHML,
408
409         # SVG:
410         foreignObject:NS_SVG, desc:NS_SVG, title:NS_SVG
411 }
412
413 formatting_elements = {
414          a: true, b: true, big: true, code: true, em: true, font: true, i: true,
415          nobr: true, s: true, small: true, strike: true, strong: true, tt: true,
416          u: true
417 }
418
419 mathml_text_integration = {
420         mi: NS_MATHML, mo: NS_MATHML, mn: NS_MATHML, ms: NS_MATHML, mtext: NS_MATHML
421 }
422 is_mathml_text_integration_point = (el) ->
423         return mathml_text_integration[el.name] is el.namespace
424 is_html_integration = (el) -> # DON'T PASS A TOKEN
425         if el.namespace is NS_MATHML
426                 if el.name is 'annotation-xml'
427                         if el.attrs.encoding?
428                                 if el.attrs.encoding.toLowerCase() is 'text/html'
429                                         return true
430                                 if el.attrs.encoding.toLowerCase() is 'application/xhtml+xml'
431                                         return true
432                 return false
433         if el.namespace is NS_SVG
434                 if el.name is 'foreignObject' or el.name is 'desc' or el.name is 'title'
435                         return true
436         return false
437
438 h_tags = {
439         h1:NS_HTML, h2:NS_HTML, h3:NS_HTML, h4:NS_HTML, h5:NS_HTML, h6:NS_HTML
440 }
441
442 foster_parenting_targets = {
443         table: NS_HTML
444         tbody: NS_HTML
445         tfoot: NS_HTML
446         thead: NS_HTML
447         tr: NS_HTML
448 }
449
450 end_tag_implied = {
451         dd: NS_HTML
452         dt: NS_HTML
453         li: NS_HTML
454         option: NS_HTML
455         optgroup: NS_HTML
456         p: NS_HTML
457         rb: NS_HTML
458         rp: NS_HTML
459         rt: NS_HTML
460         rtc: NS_HTML
461 }
462
463 el_is_special = (e) ->
464         return special_elements[e.name] is e.namespace
465
466 adp_els = { address: NS_HTML, div: NS_HTML, p: NS_HTML }
467 el_is_special_not_adp = (el) ->
468         return special_elements[el.name] is el.namespace and adp_els[el.name] isnt el.namespace
469
470 svg_name_fixes = {
471         altglyph: 'altGlyph'
472         altglyphdef: 'altGlyphDef'
473         altglyphitem: 'altGlyphItem'
474         animatecolor: 'animateColor'
475         animatemotion: 'animateMotion'
476         animatetransform: 'animateTransform'
477         clippath: 'clipPath'
478         feblend: 'feBlend'
479         fecolormatrix: 'feColorMatrix'
480         fecomponenttransfer: 'feComponentTransfer'
481         fecomposite: 'feComposite'
482         feconvolvematrix: 'feConvolveMatrix'
483         fediffuselighting: 'feDiffuseLighting'
484         fedisplacementmap: 'feDisplacementMap'
485         fedistantlight: 'feDistantLight'
486         fedropshadow: 'feDropShadow'
487         feflood: 'feFlood'
488         fefunca: 'feFuncA'
489         fefuncb: 'feFuncB'
490         fefuncg: 'feFuncG'
491         fefuncr: 'feFuncR'
492         fegaussianblur: 'feGaussianBlur'
493         feimage: 'feImage'
494         femerge: 'feMerge'
495         femergenode: 'feMergeNode'
496         femorphology: 'feMorphology'
497         feoffset: 'feOffset'
498         fepointlight: 'fePointLight'
499         fespecularlighting: 'feSpecularLighting'
500         fespotlight: 'feSpotLight'
501         fetile: 'feTile'
502         feturbulence: 'feTurbulence'
503         foreignobject: 'foreignObject'
504         glyphref: 'glyphRef'
505         lineargradient: 'linearGradient'
506         radialgradient: 'radialGradient'
507         textpath: 'textPath'
508 }
509 svg_attribute_fixes = {
510         attributename: 'attributeName'
511         attributetype: 'attributeType'
512         basefrequency: 'baseFrequency'
513         baseprofile: 'baseProfile'
514         calcmode: 'calcMode'
515         clippathunits: 'clipPathUnits'
516         contentscripttype: 'contentScriptType'
517         contentstyletype: 'contentStyleType'
518         diffuseconstant: 'diffuseConstant'
519         edgemode: 'edgeMode'
520         externalresourcesrequired: 'externalResourcesRequired'
521         # WHATWG removes this: filterres: 'filterRes'
522         filterunits: 'filterUnits'
523         glyphref: 'glyphRef'
524         gradienttransform: 'gradientTransform'
525         gradientunits: 'gradientUnits'
526         kernelmatrix: 'kernelMatrix'
527         kernelunitlength: 'kernelUnitLength'
528         keypoints: 'keyPoints'
529         keysplines: 'keySplines'
530         keytimes: 'keyTimes'
531         lengthadjust: 'lengthAdjust'
532         limitingconeangle: 'limitingConeAngle'
533         markerheight: 'markerHeight'
534         markerunits: 'markerUnits'
535         markerwidth: 'markerWidth'
536         maskcontentunits: 'maskContentUnits'
537         maskunits: 'maskUnits'
538         numoctaves: 'numOctaves'
539         pathlength: 'pathLength'
540         patterncontentunits: 'patternContentUnits'
541         patterntransform: 'patternTransform'
542         patternunits: 'patternUnits'
543         pointsatx: 'pointsAtX'
544         pointsaty: 'pointsAtY'
545         pointsatz: 'pointsAtZ'
546         preservealpha: 'preserveAlpha'
547         preserveaspectratio: 'preserveAspectRatio'
548         primitiveunits: 'primitiveUnits'
549         refx: 'refX'
550         refy: 'refY'
551         repeatcount: 'repeatCount'
552         repeatdur: 'repeatDur'
553         requiredextensions: 'requiredExtensions'
554         requiredfeatures: 'requiredFeatures'
555         specularconstant: 'specularConstant'
556         specularexponent: 'specularExponent'
557         spreadmethod: 'spreadMethod'
558         startoffset: 'startOffset'
559         stddeviation: 'stdDeviation'
560         stitchtiles: 'stitchTiles'
561         surfacescale: 'surfaceScale'
562         systemlanguage: 'systemLanguage'
563         tablevalues: 'tableValues'
564         targetx: 'targetX'
565         targety: 'targetY'
566         textlength: 'textLength'
567         viewbox: 'viewBox'
568         viewtarget: 'viewTarget'
569         xchannelselector: 'xChannelSelector'
570         ychannelselector: 'yChannelSelector'
571         zoomandpan: 'zoomAndPan'
572 }
573 foreign_attr_fixes = {
574         'xlink:actuate': 'xlink actuate'
575         'xlink:arcrole': 'xlink arcrole'
576         'xlink:href': 'xlink href'
577         'xlink:role': 'xlink role'
578         'xlink:show': 'xlink show'
579         'xlink:title': 'xlink title'
580         'xlink:type': 'xlink type'
581         'xml:base': 'xml base'
582         'xml:lang': 'xml lang'
583         'xml:space': 'xml space'
584         'xmlns': 'xmlns'
585         'xmlns:xlink': 'xmlns xlink'
586 }
587 adjust_mathml_attributes = (t) ->
588         for a in t.attrs_a
589                 if a[0] is 'definitionurl'
590                         a[0] = 'definitionURL'
591         return
592 adjust_svg_attributes = (t) ->
593         for a in t.attrs_a
594                 if svg_attribute_fixes[a[0]]?
595                         a[0] = svg_attribute_fixes[a[0]]
596         return
597 adjust_foreign_attributes = (t) ->
598         # fixfull
599         for a in t.attrs_a
600                 if foreign_attr_fixes[a[0]]?
601                         a[0] = foreign_attr_fixes[a[0]]
602         return
603
604 # decode_named_char_ref()
605 #
606 # The list of named character references is _huge_ so if we're running in a
607 # browser, we get the browser to decode them, rather than increasing the code
608 # size to include the table.
609 if context is 'module'
610         _decode_named_char_ref = require './parser_no_browser_helper.coffee'
611 else
612         # TODO test this in IE8
613         decode_named_char_ref_el = document.createElement('textarea')
614         _decode_named_char_ref = (txt) ->
615                 txt = "&#{txt};"
616                 decode_named_char_ref_el.innerHTML = txt
617                 decoded = decode_named_char_ref_el.value
618                 return null if decoded is txt
619                 return decoded
620 # Pass the name of a named entity _that has a terminating semicolon_
621 # Entities without terminating semicolons should use legacy_char_refs[]
622 # Do not include the "&" or ";" in your argument, eg pass "alpha"
623 decode_named_char_ref_cache = {}
624 decode_named_char_ref = (txt) ->
625         decoded = decode_named_char_ref_cache[txt]
626         return decoded if decoded?
627         decoded = _decode_named_char_ref txt
628         return decode_named_char_ref_cache[txt] = decoded
629
630 parse_html = (args_html, args = {}) ->
631         txt = null
632         cur = null # index of next char in txt to be parsed
633         # declare doc and tokenizer variables so they're in scope below
634         doc = null
635         open_els = null # stack of open elements
636         afe = null # active formatting elements
637         template_ins_modes = null
638         ins_mode = null
639         original_ins_mode = null
640         tok_state = null
641         tok_cur_tag = null # partially parsed tag
642         flag_scripting = null
643         flag_frameset_ok = null
644         flag_parsing = null
645         flag_foster_parenting = null
646         form_element_pointer = null
647         temporary_buffer = null
648         pending_table_character_tokens = null
649         head_element_pointer = null
650         flag_fragment_parsing = null
651         context_element = null
652
653         stop_parsing = ->
654                 flag_parsing = false
655                 return
656
657         parse_error = ->
658                 if args.error_cb?
659                         args.error_cb cur
660                 return
661
662         # http://www.w3.org/TR/html5/syntax.html#push-onto-the-list-of-active-formatting-elements
663         # "Noah's Ark clause" but with three
664         afe_push = (new_el) ->
665                 matches = 0
666                 for el, i in afe
667                         if el.type is TYPE_AFE_MARKER
668                                 break
669                         if el.name is new_el.name and el.namespace is new_el.namespace
670                                 attrs_match = true
671                                 for k, v of el.attrs
672                                         unless new_el.attrs[k] is v
673                                                 attrs_match = false
674                                                 break
675                                 if attrs_match
676                                         for k, v of new_el.attrs
677                                                 unless el.attrs[k] is v
678                                                         attrs_match = false
679                                                         break
680                                 if attrs_match
681                                         matches += 1
682                                         if matches is 3
683                                                 afe.splice i, 1
684                                                 break
685                 afe.unshift new_el
686                 return
687
688         afe_push_marker = ->
689                 afe.unshift new_afe_marker()
690                 return
691
692         # the functions below impliment the Tree Contstruction algorithm
693         # http://www.w3.org/TR/html5/syntax.html#tree-construction
694
695         # But first... the helpers
696         template_tag_is_open = ->
697                 for el in open_els
698                         if el.name is 'template' and el.namespace is NS_HTML
699                                 return true
700                 return false
701         is_in_scope_x = (tag_name, scope, namespace) ->
702                 for el in open_els
703                         if el.name is tag_name and (namespace is null or namespace is el.namespace)
704                                 return true
705                         if scope[el.name] is el.namespace
706                                 return false
707                 return false
708         is_in_scope_x_y = (tag_name, scope, scope2, namespace) ->
709                 for el in open_els
710                         if el.name is tag_name and (namespace is null or namespace is el.namespace)
711                                 return true
712                         if scope[el.name] is el.namespace
713                                 return false
714                         if scope2[el.name] is el.namespace
715                                 return false
716                 return false
717         standard_scopers = {
718                 applet: NS_HTML, caption: NS_HTML, html: NS_HTML, table: NS_HTML,
719                 td: NS_HTML, th: NS_HTML, marquee: NS_HTML, object: NS_HTML,
720                 template: NS_HTML,
721
722                 mi: NS_MATHML, mo: NS_MATHML, mn: NS_MATHML, ms: NS_MATHML,
723                 mtext: NS_MATHML, 'annotation-xml': NS_MATHML,
724
725                 foreignObject: NS_SVG, desc: NS_SVG, title: NS_SVG
726         }
727         button_scopers = button: NS_HTML
728         li_scopers = ol: NS_HTML, ul: NS_HTML
729         table_scopers = html: NS_HTML, table: NS_HTML, template: NS_HTML
730         is_in_scope = (tag_name, namespace = null) ->
731                 return is_in_scope_x tag_name, standard_scopers, namespace
732         is_in_button_scope = (tag_name, namespace = null) ->
733                 return is_in_scope_x_y tag_name, standard_scopers, button_scopers, namespace
734         is_in_table_scope = (tag_name, namespace = null) ->
735                 return is_in_scope_x tag_name, table_scopers, namespace
736         # aka is_in_list_item_scope
737         is_in_li_scope = (tag_name, namespace = null) ->
738                 return is_in_scope_x_y tag_name, standard_scopers, li_scopers, namespace
739         is_in_select_scope = (tag_name, namespace = null) ->
740                 for t in open_els
741                         if t.name is tag_name and (namespace is null or namespace is t.namespace)
742                                 return true
743                         if t.namespace isnt NS_HTML and t.name isnt 'optgroup' and t.name isnt 'option'
744                                 return false
745                 return false
746         # this checks for a particular element, not by name
747         # this requires a namespace match
748         el_is_in_scope = (needle) ->
749                 for el in open_els
750                         if el is needle
751                                 return true
752                         if standard_scopers[el.name] is el.namespace
753                                 return false
754                 return false
755
756         clear_to_table_stopers = {
757                 'table': true
758                 'template': true
759                 'html': true
760         }
761         clear_stack_to_table_context = ->
762                 loop
763                         if clear_to_table_stopers[open_els[0].name]?
764                                 break
765                         open_els.shift()
766                 return
767         clear_to_table_body_stopers = {
768                 tbody: NS_HTML
769                 tfoot: NS_HTML
770                 thead: NS_HTML
771                 template: NS_HTML
772                 html: NS_HTML
773         }
774         clear_stack_to_table_body_context = ->
775                 loop
776                         if clear_to_table_body_stopers[open_els[0].name] is open_els[0].namespace
777                                 break
778                         open_els.shift()
779                 return
780         clear_to_table_row_stopers = {
781                 'tr': true
782                 'template': true
783                 'html': true
784         }
785         clear_stack_to_table_row_context = ->
786                 loop
787                         if clear_to_table_row_stopers[open_els[0].name]?
788                                 break
789                         open_els.shift()
790                 return
791         clear_afe_to_marker = ->
792                 loop
793                         return unless afe.length > 0 # this happens in fragment case, ?spec error
794                         el = afe.shift()
795                         if el.type is TYPE_AFE_MARKER
796                                 return
797                 return
798
799         # 8.2.3.1 ...
800         # http://www.w3.org/TR/html5/syntax.html#reset-the-insertion-mode-appropriately
801         reset_ins_mode = ->
802                 # 1. Let last be false.
803                 last = false
804                 # 2. Let node be the last node in the stack of open elements.
805                 node_i = 0
806                 node = open_els[node_i]
807                 # 3. Loop: If node is the first node in the stack of open elements,
808                 # then set last to true, and, if the parser was originally created as
809                 # part of the HTML fragment parsing algorithm (fragment case) set node
810                 # to the context element.
811                 loop
812                         if node_i is open_els.length - 1
813                                 last = true
814                                 if flag_fragment_parsing
815                                         node = context_element
816                         # 4. If node is a select element, run these substeps:
817                         if node.name is 'select' and node.namespace is NS_HTML
818                                 # 1. If last is true, jump to the step below labeled done.
819                                 unless last
820                                         # 2. Let ancestor be node.
821                                         ancestor_i = node_i
822                                         ancestor = node
823                                         # 3. Loop: If ancestor is the first node in the stack of
824                                         # open elements, jump to the step below labeled done.
825                                         loop
826                                                 if ancestor_i is open_els.length - 1
827                                                         break
828                                                 # 4. Let ancestor be the node before ancestor in the stack
829                                                 # of open elements.
830                                                 ancestor_i += 1
831                                                 ancestor = open_els[ancestor_i]
832                                                 # 5. If ancestor is a template node, jump to the step below
833                                                 # labeled done.
834                                                 if ancestor.name is 'template' and ancestor.namespace is NS_HTML
835                                                         break
836                                                 # 6. If ancestor is a table node, switch the insertion mode
837                                                 # to "in select in table" and abort these steps.
838                                                 if ancestor.name is 'table' and ancestor.namespace is NS_HTML
839                                                         ins_mode = ins_mode_in_select_in_table
840                                                         return
841                                                 # 7. Jump back to the step labeled loop.
842                                 # 8. Done: Switch the insertion mode to "in select" and abort
843                                 # these steps.
844                                 ins_mode = ins_mode_in_select
845                                 return
846                         # 5. If node is a td or th element and last is false, then switch
847                         # the insertion mode to "in cell" and abort these steps.
848                         if (node.name is 'td' or node.name is 'th') and node.namespace is NS_HTML and last is false
849                                 ins_mode = ins_mode_in_cell
850                                 return
851                         # 6. If node is a tr element, then switch the insertion mode to "in
852                         # row" and abort these steps.
853                         if node.name is 'tr' and node.namespace is NS_HTML
854                                 ins_mode = ins_mode_in_row
855                                 return
856                         # 7. If node is a tbody, thead, or tfoot element, then switch the
857                         # insertion mode to "in table body" and abort these steps.
858                         if (node.name is 'tbody' or node.name is 'thead' or node.name is 'tfoot') and node.namespace is NS_HTML
859                                 ins_mode = ins_mode_in_table_body
860                                 return
861                         # 8. If node is a caption element, then switch the insertion mode
862                         # to "in caption" and abort these steps.
863                         if node.name is 'caption' and node.namespace is NS_HTML
864                                 ins_mode = ins_mode_in_caption
865                                 return
866                         # 9. If node is a colgroup element, then switch the insertion mode
867                         # to "in column group" and abort these steps.
868                         if node.name is 'colgroup' and node.namespace is NS_HTML
869                                 ins_mode = ins_mode_in_column_group
870                                 return
871                         # 10. If node is a table element, then switch the insertion mode to
872                         # "in table" and abort these steps.
873                         if node.name is 'table' and node.namespace is NS_HTML
874                                 ins_mode = ins_mode_in_table
875                                 return
876                         # 11. If node is a template element, then switch the insertion mode
877                         # to the current template insertion mode and abort these steps.
878                         if node.name is 'template' and node.namespace is NS_HTML
879                                 ins_mode = template_ins_modes[0]
880                                 return
881                         # 12. If node is a head element and last is true, then switch the
882                         # insertion mode to "in body" ("in body"! not "in head"!) and abort
883                         # these steps. (fragment case)
884                         if node.name is 'head' and node.namespace is NS_HTML and last
885                                 ins_mode = ins_mode_in_body
886                                 return
887                         # 13. If node is a head element and last is false, then switch the
888                         # insertion mode to "in head" and abort these steps.
889                         if node.name is 'head' and node.namespace is NS_HTML and last is false
890                                 ins_mode = ins_mode_in_head
891                                 return
892                         # 14. If node is a body element, then switch the insertion mode to
893                         # "in body" and abort these steps.
894                         if node.name is 'body' and node.namespace is NS_HTML
895                                 ins_mode = ins_mode_in_body
896                                 return
897                         # 15. If node is a frameset element, then switch the insertion mode
898                         # to "in frameset" and abort these steps. (fragment case)
899                         if node.name is 'frameset' and node.namespace is NS_HTML
900                                 ins_mode = ins_mode_in_frameset
901                                 return
902                         # 16. If node is an html element, run these substeps:
903                         if node.name is 'html' and node.namespace is NS_HTML
904                                 # 1. If the head element pointer is null, switch the insertion
905                                 # mode to "before head" and abort these steps. (fragment case)
906                                 if head_element_pointer is null
907                                         ins_mode = ins_mode_before_head
908                                 else
909                                         # 2. Otherwise, the head element pointer is not null,
910                                         # switch the insertion mode to "after head" and abort these
911                                         # steps.
912                                         ins_mode = ins_mode_after_head
913                                 return
914                         # 17. If last is true, then switch the insertion mode to "in body"
915                         # and abort these steps. (fragment case)
916                         if last
917                                 ins_mode = ins_mode_in_body
918                                 return
919                         # 18. Let node now be the node before node in the stack of open
920                         # elements.
921                         node_i += 1
922                         node = open_els[node_i]
923                         # 19. Return to the step labeled loop.
924                 return
925
926         # 8.2.3.2
927
928         # http://www.w3.org/TR/html5/syntax.html#adjusted-current-node
929         adjusted_current_node = ->
930                 if open_els.length is 1 and flag_fragment_parsing
931                         return context_element
932                 return open_els[0]
933
934         # http://www.w3.org/TR/html5/syntax.html#reconstruct-the-active-formatting-elements
935         # this implementation is structured (mostly) as described at the link above.
936         # capitalized comments are the "labels" described at the link above.
937         reconstruct_afe = ->
938                 return if afe.length is 0
939                 if afe[0].type is TYPE_AFE_MARKER or afe[0] in open_els
940                         return
941                 # Rewind
942                 i = 0
943                 loop
944                         if i is afe.length - 1
945                                 break
946                         i += 1
947                         if afe[i].type is TYPE_AFE_MARKER or afe[i] in open_els
948                                 i -= 1 # Advance
949                                 break
950                 # Create
951                 loop
952                         el = insert_html_element afe[i].token
953                         afe[i] = el
954                         break if i is 0
955                         i -= 1 # Advance
956                 return
957
958         # http://www.w3.org/TR/html5/syntax.html#adoption-agency-algorithm
959         # adoption agency algorithm
960         # overview here:
961         #   http://www.w3.org/TR/html5/syntax.html#misnested-tags:-b-i-/b-/i
962         #   http://www.w3.org/TR/html5/syntax.html#misnested-tags:-b-p-/b-/p
963         #   http://www.w3.org/TR/html5/syntax.html#unclosed-formatting-elements
964         adoption_agency = (subject) ->
965 # this block implements tha W3C spec
966 #               # 1. If the current node is an HTML element whose tag name is subject,
967 #               # then run these substeps:
968 #               #
969 #               # 1. Let element be the current node.
970 #               #
971 #               # 2. Pop element off the stack of open elements.
972 #               #
973 #               # 3. If element is also in the list of active formatting elements,
974 #               # remove the element from the list.
975 #               #
976 #               # 4. Abort the adoption agency algorithm.
977 #               if open_els[0].name is subject and open_els[0].namespace is NS_HTML
978 #                       el = open_els.shift()
979 #                       # remove it from the list of active formatting elements (if found)
980 #                       for t, i in afe
981 #                               if t is el
982 #                                       afe.splice i, 1
983 #                                       break
984 #                       return
985 # WHATWG: https://html.spec.whatwg.org/multipage/syntax.html#adoption-agency-algorithm
986                 # If the current node is an HTML element whose tag name is subject, and
987                 # the current node is not in the list of active formatting elements,
988                 # then pop the current node off the stack of open elements, and abort
989                 # these steps.
990                 if open_els[0].name is subject and open_els[0].namespace is NS_HTML
991                         # remove it from the list of active formatting elements (if found)
992                         in_afe = false
993                         for el, i in afe
994                                 if el is open_els[0]
995                                         in_afe = true
996                                         break
997                         unless in_afe
998                                 open_els.shift()
999                                 return
1000                         # fall through
1001 # END WHATWG
1002                 outer = 0
1003                 loop
1004                         if outer >= 8
1005                                 return
1006                         outer += 1
1007                         # 5. Let formatting element be the last element in the list of
1008                         # active formatting elements that: is between the end of the list
1009                         # and the last scope marker in the list, if any, or the start of
1010                         # the list otherwise, and  has the tag name subject.
1011                         fe = null
1012                         for t, fe_of_afe in afe
1013                                 if t.type is TYPE_AFE_MARKER
1014                                         break
1015                                 if t.name is subject
1016                                         fe = t
1017                                         break
1018                         # If there is no such element, then abort these steps and instead
1019                         # act as described in the "any other end tag" entry above.
1020                         if fe is null
1021                                 in_body_any_other_end_tag subject
1022                                 return
1023                         # 6. If formatting element is not in the stack of open elements,
1024                         # then this is a parse error; remove the element from the list, and
1025                         # abort these steps.
1026                         in_open_els = false
1027                         for t, fe_of_open_els in open_els
1028                                 if t is fe
1029                                         in_open_els = true
1030                                         break
1031                         unless in_open_els
1032                                 parse_error()
1033                                 # "remove it from the list" must mean afe, since it's not in open_els
1034                                 afe.splice fe_of_afe, 1
1035                                 return
1036                         # 7. If formatting element is in the stack of open elements, but
1037                         # the element is not in scope, then this is a parse error; abort
1038                         # these steps.
1039                         unless el_is_in_scope fe
1040                                 parse_error()
1041                                 return
1042                         # 8. If formatting element is not the current node, this is a parse
1043                         # error. (But do not abort these steps.)
1044                         unless open_els[0] is fe
1045                                 parse_error()
1046                                 # continue
1047                         # 9. Let furthest block be the topmost node in the stack of open
1048                         # elements that is lower in the stack than formatting element, and
1049                         # is an element in the special category. There might not be one.
1050                         fb = null
1051                         fb_of_open_els = null
1052                         for t, i in open_els
1053                                 if t is fe
1054                                         break
1055                                 if el_is_special t
1056                                         fb = t
1057                                         fb_of_open_els = i
1058                                         # and continue, to see if there's one that's more "topmost"
1059                         # 10. If there is no furthest block, then the UA must first pop all
1060                         # the nodes from the bottom of the stack of open elements, from the
1061                         # current node up to and including formatting element, then remove
1062                         # formatting element from the list of active formatting elements,
1063                         # and finally abort these steps.
1064                         if fb is null
1065                                 loop
1066                                         t = open_els.shift()
1067                                         if t is fe
1068                                                 afe.splice fe_of_afe, 1
1069                                                 return
1070                         # 11. Let common ancestor be the element immediately above
1071                         # formatting element in the stack of open elements.
1072                         ca = open_els[fe_of_open_els + 1] # common ancestor
1073
1074                         node_above = open_els[fb_of_open_els + 1] # next node if node isn't in open_els anymore
1075                         # 12. Let a bookmark note the position of formatting element in the list of active formatting elements relative to the elements on either side of it in the list.
1076                         bookmark = new_aaa_bookmark()
1077                         for t, i in afe
1078                                 if t is fe
1079                                         afe.splice i, 0, bookmark
1080                                         break
1081                         node = last_node = fb
1082                         inner = 0
1083                         loop
1084                                 inner += 1
1085                                 # 3. Let node be the element immediately above node in the
1086                                 # stack of open elements, or if node is no longer in the stack
1087                                 # of open elements (e.g. because it got removed by this
1088                                 # algorithm), the element that was immediately above node in
1089                                 # the stack of open elements before node was removed.
1090                                 node_next = null
1091                                 for t, i in open_els
1092                                         if t is node
1093                                                 node_next = open_els[i + 1]
1094                                                 break
1095                                 node = node_next ? node_above
1096                                 # TODO make sure node_above gets re-set if/when node is removed from open_els
1097
1098                                 # 4. If node is formatting element, then go to the next step in
1099                                 # the overall algorithm.
1100                                 if node is fe
1101                                         break
1102                                 # 5. If inner loop counter is greater than three and node is in
1103                                 # the list of active formatting elements, then remove node from
1104                                 # the list of active formatting elements.
1105                                 node_in_afe = false
1106                                 for t, i in afe
1107                                         if t is node
1108                                                 if inner > 3
1109                                                         afe.splice i, 1
1110                                                 else
1111                                                         node_in_afe = true
1112                                                 break
1113                                 # 6. If node is not in the list of active formatting elements,
1114                                 # then remove node from the stack of open elements and then go
1115                                 # back to the step labeled inner loop.
1116                                 unless node_in_afe
1117                                         for t, i in open_els
1118                                                 if t is node
1119                                                         node_above = open_els[i + 1]
1120                                                         open_els.splice i, 1
1121                                                         break
1122                                         continue
1123                                 # 7. create an element for the token for which the element node
1124                                 # was created, in the HTML namespace, with common ancestor as
1125                                 # the intended parent; replace the entry for node in the list
1126                                 # of active formatting elements with an entry for the new
1127                                 # element, replace the entry for node in the stack of open
1128                                 # elements with an entry for the new element, and let node be
1129                                 # the new element.
1130                                 new_node = token_to_element node.token, NS_HTML, ca
1131                                 for t, i in afe
1132                                         if t is node
1133                                                 afe[i] = new_node
1134                                                 break
1135                                 for t, i in open_els
1136                                         if t is node
1137                                                 node_above = open_els[i + 1]
1138                                                 open_els[i] = new_node
1139                                                 break
1140                                 node = new_node
1141                                 # 8. If last node is furthest block, then move the
1142                                 # aforementioned bookmark to be immediately after the new node
1143                                 # in the list of active formatting elements.
1144                                 if last_node is fb
1145                                         for t, i in afe
1146                                                 if t is bookmark
1147                                                         afe.splice i, 1
1148                                                         break
1149                                         for t, i in afe
1150                                                 if t is node
1151                                                         # "after" means lower
1152                                                         afe.splice i, 0, bookmark # "after as <-
1153                                                         break
1154                                 # 9. Insert last node into node, first removing it from its
1155                                 # previous parent node if any.
1156                                 if last_node.parent?
1157                                         for c, i in last_node.parent.children
1158                                                 if c is last_node
1159                                                         last_node.parent.children.splice i, 1
1160                                                         break
1161                                 node.children.push last_node
1162                                 last_node.parent = node
1163                                 # 10. Let last node be node.
1164                                 last_node = node
1165                                 # 11. Return to the step labeled inner loop.
1166                         # 14. Insert whatever last node ended up being in the previous step
1167                         # at the appropriate place for inserting a node, but using common
1168                         # ancestor as the override target.
1169
1170                         # In the case where fe is immediately followed by fb:
1171                         #   * inner loop exits out early (node==fe)
1172                         #   * last_node is fb
1173                         #   * last_node is still in the tree (not a duplicate)
1174                         if last_node.parent?
1175                                 for c, i in last_node.parent.children
1176                                         if c is last_node
1177                                                 last_node.parent.children.splice i, 1
1178                                                 break
1179                         # can't use standard insert token thing, because it's already in
1180                         # open_els and must stay at it's current position in open_els
1181                         dest = adjusted_insertion_location ca
1182                         dest[0].children.splice dest[1], 0, last_node
1183                         last_node.parent = dest[0]
1184                         # 15. Create an element for the token for which formatting element
1185                         # was created, in the HTML namespace, with furthest block as the
1186                         # intended parent.
1187                         new_element = token_to_element fe.token, NS_HTML, fb
1188                         # 16. Take all of the child nodes of furthest block and append them
1189                         # to the element created in the last step.
1190                         while fb.children.length
1191                                 t = fb.children.shift()
1192                                 t.parent = new_element
1193                                 new_element.children.push t
1194                         # 17. Append that new element to furthest block.
1195                         new_element.parent = fb
1196                         fb.children.push new_element
1197                         # 18. Remove formatting element from the list of active formatting
1198                         # elements, and insert the new element into the list of active
1199                         # formatting elements at the position of the aforementioned
1200                         # bookmark.
1201                         for t, i in afe
1202                                 if t is fe
1203                                         afe.splice i, 1
1204                                         break
1205                         for t, i in afe
1206                                 if t is bookmark
1207                                         afe[i] = new_element
1208                                         break
1209                         # 19. Remove formatting element from the stack of open elements,
1210                         # and insert the new element into the stack of open elements
1211                         # immediately below the position of furthest block in that stack.
1212                         for t, i in open_els
1213                                 if t is fe
1214                                         open_els.splice i, 1
1215                                         break
1216                         for t, i in open_els
1217                                 if t is fb
1218                                         open_els.splice i, 0, new_element
1219                                         break
1220                         # 20. Jump back to the step labeled outer loop.
1221                 return
1222
1223         # http://www.w3.org/TR/html5/syntax.html#close-a-p-element
1224         close_p_element = ->
1225                 generate_implied_end_tags 'p' # arg is exception
1226                 unless open_els[0].name is 'p' and open_els[0].namespace is NS_HTML
1227                         parse_error()
1228                 while open_els.length > 1 # just in case
1229                         el = open_els.shift()
1230                         if el.name is 'p' and el.namespace is NS_HTML
1231                                 return
1232                 return
1233         close_p_if_in_button_scope = ->
1234                 if is_in_button_scope 'p', NS_HTML
1235                         close_p_element()
1236                 return
1237
1238         # http://www.w3.org/TR/html5/syntax.html#insert-a-character
1239         # aka insert_a_character = (t) ->
1240         insert_character = (t) ->
1241                 dest = adjusted_insertion_location()
1242                 # fixfull check for Document node
1243                 if dest[1] > 0
1244                         prev = dest[0].children[dest[1] - 1]
1245                         if prev.type is TYPE_TEXT
1246                                 prev.text += t.text
1247                                 return
1248                 dest[0].children.splice dest[1], 0, t
1249                 t.parent = dest[0]
1250                 return
1251
1252         # 8.2.5 http://www.w3.org/TR/html5/syntax.html#tree-construction
1253         process_token = (t) ->
1254                 acn = adjusted_current_node()
1255                 unless acn?
1256                         ins_mode t
1257                         return
1258                 if acn.namespace is NS_HTML
1259                         ins_mode t
1260                         return
1261                 if is_mathml_text_integration_point(acn)
1262                         if t.type is TYPE_START_TAG and not (t.name is 'mglyph' or t.name is 'malignmark')
1263                                 ins_mode t
1264                                 return
1265                         if t.type is TYPE_TEXT
1266                                 ins_mode t
1267                                 return
1268                 if acn.namespace is NS_MATHML and acn.name is 'annotation-xml' and t.type is TYPE_START_TAG and t.name is 'svg'
1269                         ins_mode t
1270                         return
1271                 if is_html_integration acn
1272                         if t.type is TYPE_START_TAG or t.type is TYPE_TEXT
1273                                 ins_mode t
1274                                 return
1275                 if t.type is TYPE_EOF
1276                         ins_mode t
1277                         return
1278                 in_foreign_content t
1279                 return
1280
1281         # 8.2.5.1
1282         # http://www.w3.org/TR/html5/syntax.html#creating-and-inserting-nodes
1283         # http://www.w3.org/TR/html5/syntax.html#appropriate-place-for-inserting-a-node
1284         adjusted_insertion_location = (override_target = null) ->
1285                 # 1. If there was an override target specified, then let target be the
1286                 # override target.
1287                 if override_target?
1288                         target = override_target
1289                 else # Otherwise, let target be the current node.
1290                         target = open_els[0]
1291                 # 2. Determine the adjusted insertion location using the first matching
1292                 # steps from the following list:
1293                 #
1294                 # If foster parenting is enabled and target is a table, tbody, tfoot,
1295                 # thead, or tr element Foster parenting happens when content is
1296                 # misnested in tables.
1297                 if flag_foster_parenting and foster_parenting_targets[target.name] is target.namespace
1298                         loop # once. this is here so we can ``break`` to "abort these substeps"
1299                                 # 1. Let last template be the last template element in the
1300                                 # stack of open elements, if any.
1301                                 last_template = null
1302                                 last_template_i = null
1303                                 for el, i in open_els
1304                                         if el.name is 'template' and el.namespace is NS_HTML
1305                                                 last_template = el
1306                                                 last_template_i = i
1307                                                 break
1308                                 # 2. Let last table be the last table element in the stack of
1309                                 # open elements, if any.
1310                                 last_table = null
1311                                 last_table_i
1312                                 for el, i in open_els
1313                                         if el.name is 'table' and el.namespace is NS_HTML
1314                                                 last_table = el
1315                                                 last_table_i = i
1316                                                 break
1317                                 # 3. If there is a last template and either there is no last
1318                                 # table, or there is one, but last template is lower (more
1319                                 # recently added) than last table in the stack of open
1320                                 # elements, then: let adjusted insertion location be inside
1321                                 # last template's template contents, after its last child (if
1322                                 # any), and abort these substeps.
1323                                 if last_template and (last_table is null or last_template_i < last_table_i)
1324                                         target = last_template # fixfull should be it's contents
1325                                         target_i = target.children.length
1326                                         break
1327                                 # 4. If there is no last table, then let adjusted insertion
1328                                 # location be inside the first element in the stack of open
1329                                 # elements (the html element), after its last child (if any),
1330                                 # and abort these substeps. (fragment case)
1331                                 if last_table is null
1332                                         # this is odd
1333                                         target = open_els[open_els.length - 1]
1334                                         target_i = target.children.length
1335                                         break
1336                                 # 5. If last table has a parent element, then let adjusted
1337                                 # insertion location be inside last table's parent element,
1338                                 # immediately before last table, and abort these substeps.
1339                                 if last_table.parent?
1340                                         for c, i in last_table.parent.children
1341                                                 if c is last_table
1342                                                         target = last_table.parent
1343                                                         target_i = i
1344                                                         break
1345                                         break
1346                                 # 6. Let previous element be the element immediately above last
1347                                 # table in the stack of open elements.
1348                                 #
1349                                 # huh? how could it not have a parent?
1350                                 previous_element = open_els[last_table_i + 1]
1351                                 # 7. Let adjusted insertion location be inside previous
1352                                 # element, after its last child (if any).
1353                                 target = previous_element
1354                                 target_i = target.children.length
1355                                 # Note: These steps are involved in part because it's possible
1356                                 # for elements, the table element in this case in particular,
1357                                 # to have been moved by a script around in the DOM, or indeed
1358                                 # removed from the DOM entirely, after the element was inserted
1359                                 # by the parser.
1360                                 break # don't really loop
1361                 else
1362                         # Otherwise Let adjusted insertion location be inside target, after
1363                         # its last child (if any).
1364                         target_i = target.children.length
1365
1366                 # 3. If the adjusted insertion location is inside a template element,
1367                 # let it instead be inside the template element's template contents,
1368                 # after its last child (if any).
1369                 # fixfull (template)
1370
1371                 # 4. Return the adjusted insertion location.
1372                 return [target, target_i]
1373
1374         # http://www.w3.org/TR/html5/syntax.html#create-an-element-for-the-token
1375         # aka create_an_element_for_token
1376         token_to_element = (t, namespace, intended_parent) ->
1377                 # convert attributes into a hash
1378                 attrs = {}
1379                 for a in t.attrs_a
1380                         attrs[a[0]] = a[1] # TODO check what to do with dupilcate attrs
1381                 el = new Node TYPE_TAG, name: t.name, namespace: namespace, attrs: attrs, token: t
1382
1383                 # TODO 2. If the newly created element has an xmlns attribute in the
1384                 # XMLNS namespace whose value is not exactly the same as the element's
1385                 # namespace, that is a parse error. Similarly, if the newly created
1386                 # element has an xmlns:xlink attribute in the XMLNS namespace whose
1387                 # value is not the XLink Namespace, that is a parse error.
1388
1389                 # fixfull: the spec says stuff about form pointers and ownerDocument
1390
1391                 return el
1392
1393         # http://www.w3.org/TR/html5/syntax.html#insert-a-foreign-element
1394         insert_foreign_element = (token, namespace) ->
1395                 ail = adjusted_insertion_location()
1396                 ail_el = ail[0]
1397                 ail_i = ail[1]
1398                 el = token_to_element token, namespace, ail_el
1399                 # TODO skip this next step if it's broken (eg ail_el is document with child already)
1400                 el.parent = ail_el
1401                 ail_el.children.splice ail_i, 0, el
1402                 open_els.unshift el
1403                 return el
1404         # http://www.w3.org/TR/html5/syntax.html#insert-an-html-element
1405         insert_html_element = (token) ->
1406                 return insert_foreign_element token, NS_HTML
1407
1408         # http://www.w3.org/TR/html5/syntax.html#insert-a-comment
1409         # position should be [node, index_within_children]
1410         insert_comment = (t, position = null) ->
1411                 position ?= adjusted_insertion_location()
1412                 position[0].children.splice position[1], 0, t
1413                 return
1414
1415         # 8.2.5.2
1416         # http://www.w3.org/TR/html5/syntax.html#generic-raw-text-element-parsing-algorithm
1417         parse_generic_raw_text = (t) ->
1418                 insert_html_element t
1419                 tok_state = tok_state_rawtext
1420                 original_ins_mode = ins_mode
1421                 ins_mode = ins_mode_text
1422                 return
1423         parse_generic_rcdata_text = (t) ->
1424                 insert_html_element t
1425                 tok_state = tok_state_rcdata
1426                 original_ins_mode = ins_mode
1427                 ins_mode = ins_mode_text
1428                 return
1429
1430         # 8.2.5.3 http://www.w3.org/TR/html5/syntax.html#closing-elements-that-have-implied-end-tags
1431         # http://www.w3.org/TR/html5/syntax.html#generate-implied-end-tags
1432         generate_implied_end_tags = (except = null) ->
1433                 while end_tag_implied[open_els[0].name] is open_els[0].namespace and open_els[0].name isnt except
1434                         open_els.shift()
1435                 return
1436
1437         # 8.2.5.4 The rules for parsing tokens in HTML content
1438         # http://www.w3.org/TR/html5/syntax.html#parsing-main-inhtml
1439
1440         # 8.2.5.4.1 The "initial" insertion mode
1441         # http://www.w3.org/TR/html5/syntax.html#the-initial-insertion-mode
1442         is_quirks_yes_doctype = (t) ->
1443                 if t.flag 'force-quirks'
1444                         return true
1445                 if t.name isnt 'html'
1446                         return true
1447                 if t.public_identifier?
1448                         pi = t.public_identifier.toLowerCase()
1449                         for p in quirks_yes_pi_prefixes
1450                                 if pi.substr(0, p.length) is p
1451                                         return true
1452                         if pi is '-//w3o//dtd w3 html strict 3.0//en//' or pi is '-/w3c/dtd html 4.0 transitional/en' or pi is 'html'
1453                                 return true
1454                 if t.system_identifier?
1455                         if t.system_identifier.toLowerCase() is 'http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd'
1456                                 return true
1457                 else if t.public_identifier?
1458                         # already did this: pi = t.public_identifier.toLowerCase()
1459                         if pi.substr(0, 32) is '-//w3c//dtd html 4.01 frameset//' or pi.substr(0, 36) is '-//w3c//dtd html 4.01 transitional//'
1460                                 return true
1461                 return false
1462         is_quirks_limited_doctype = (t) ->
1463                 if t.public_identifier?
1464                         pi = t.public_identifier.toLowerCase()
1465                         if pi.substr(0, 32) is '-//w3c//dtd xhtml 1.0 frameset//' or pi.substr(0, 36) is '-//w3c//dtd xhtml 1.0 transitional//'
1466                                 return true
1467                         if t.system_identifier?
1468                                 if pi.substr(0, 32) is '-//w3c//dtd html 4.01 frameset//' or pi.substr(0, 36) is '-//w3c//dtd html 4.01 transitional//'
1469                                         return true
1470                 return false
1471         ins_mode_initial = (t) ->
1472                 if is_space_tok t
1473                         return
1474                 if t.type is TYPE_COMMENT
1475                         # ?fixfull
1476                         doc.children.push t
1477                         return
1478                 if t.type is TYPE_DOCTYPE
1479                         # fixfull syntax error from first paragraph and following bullets
1480                         # fixfull set doc.doctype
1481                         # fixfull is the "not an iframe srcdoc" thing relevant?
1482                         if is_quirks_yes_doctype t
1483                                 doc.flag 'quirks mode', QUIRKS_YES
1484                         else if is_quirks_limited_doctype t
1485                                 doc.flag 'quirks mode', QUIRKS_LIMITED
1486                         doc.children.push t
1487                         ins_mode = ins_mode_before_html
1488                         return
1489                 # Anything else
1490                 # fixfull not iframe srcdoc?
1491                 parse_error()
1492                 doc.flag 'quirks mode', QUIRKS_YES
1493                 ins_mode = ins_mode_before_html
1494                 process_token t
1495                 return
1496
1497         # 8.2.5.4.2 http://www.w3.org/TR/html5/syntax.html#the-before-html-insertion-mode
1498         ins_mode_before_html = (t) ->
1499                 if t.type is TYPE_DOCTYPE
1500                         parse_error()
1501                         return
1502                 if t.type is TYPE_COMMENT
1503                         doc.children.push t
1504                         return
1505                 if is_space_tok t
1506                         return
1507                 if t.type is TYPE_START_TAG and t.name is 'html'
1508                         el = token_to_element t, NS_HTML, doc
1509                         doc.children.push el
1510                         el.document = doc
1511                         open_els.unshift(el)
1512                         # fixfull (big paragraph in spec about manifest, fragment, urls, etc)
1513                         ins_mode = ins_mode_before_head
1514                         return
1515                 if t.type is TYPE_END_TAG
1516                         if t.name is 'head' or t.name is 'body' or t.name is 'html' or t.name is 'br'
1517                                 # fall through to "anything else"
1518                         else
1519                                 parse_error()
1520                                 return
1521                 # Anything else
1522                 el = token_to_element new_open_tag('html'), NS_HTML, doc
1523                 doc.children.push el
1524                 el.document = doc
1525                 open_els.unshift el
1526                 # ?fixfull browsing context
1527                 ins_mode = ins_mode_before_head
1528                 process_token t
1529                 return
1530
1531         # 8.2.5.4.3 http://www.w3.org/TR/html5/syntax.html#the-before-head-insertion-mode
1532         ins_mode_before_head = (t) ->
1533                 if is_space_tok t
1534                         return
1535                 if t.type is TYPE_COMMENT
1536                         insert_comment t
1537                         return
1538                 if t.type is TYPE_DOCTYPE
1539                         parse_error()
1540                         return
1541                 if t.type is TYPE_START_TAG and t.name is 'html'
1542                         ins_mode_in_body t
1543                         return
1544                 if t.type is TYPE_START_TAG and t.name is 'head'
1545                         el = insert_html_element t
1546                         head_element_pointer = el
1547                         ins_mode = ins_mode_in_head
1548                         return
1549                 if t.type is TYPE_END_TAG
1550                         if t.name is 'head' or t.name is 'body' or t.name is 'html' or t.name is 'br'
1551                                 # fall through to Anything else below
1552                         else
1553                                 parse_error()
1554                                 return
1555                 # Anything else
1556                 el = insert_html_element new_open_tag 'head'
1557                 head_element_pointer = el
1558                 ins_mode = ins_mode_in_head
1559                 process_token t
1560                 return
1561
1562         # 8.2.5.4.4 http://www.w3.org/TR/html5/syntax.html#parsing-main-inhead
1563         ins_mode_in_head_else = (t) -> # factored out for same-as-spec flow control
1564                 open_els.shift() # spec says this will be a 'head' node
1565                 ins_mode = ins_mode_after_head
1566                 process_token t
1567                 return
1568         ins_mode_in_head = (t) ->
1569                 if t.type is TYPE_TEXT and (t.text is "\t" or t.text is "\n" or t.text is "\u000c" or t.text is ' ')
1570                         insert_character t
1571                         return
1572                 if t.type is TYPE_COMMENT
1573                         insert_comment t
1574                         return
1575                 if t.type is TYPE_DOCTYPE
1576                         parse_error()
1577                         return
1578                 if t.type is TYPE_START_TAG and t.name is 'html'
1579                         ins_mode_in_body t
1580                         return
1581                 if t.type is TYPE_START_TAG and (t.name is 'base' or t.name is 'basefont' or t.name is 'bgsound' or t.name is 'link')
1582                         el = insert_html_element t
1583                         open_els.shift()
1584                         t.acknowledge_self_closing()
1585                         return
1586                 if t.type is TYPE_START_TAG and t.name is 'meta'
1587                         el = insert_html_element t
1588                         open_els.shift()
1589                         t.acknowledge_self_closing()
1590                         # fixfull encoding stuff
1591                         return
1592                 if t.type is TYPE_START_TAG and t.name is 'title'
1593                         parse_generic_rcdata_text t
1594                         return
1595                 if t.type is TYPE_START_TAG and ((t.name is 'noscript' and flag_scripting) or t.name is 'noframes' or t.name is 'style')
1596                         parse_generic_raw_text t
1597                         return
1598                 if t.type is TYPE_START_TAG and t.name is 'noscript' and flag_scripting is false
1599                         insert_html_element t
1600                         ins_mode = ins_mode_in_head_noscript
1601                         return
1602                 if t.type is TYPE_START_TAG and t.name is 'script'
1603                         ail = adjusted_insertion_location()
1604                         el = token_to_element t, NS_HTML, ail
1605                         el.flag 'parser-inserted', true
1606                         # fixfull frament case
1607                         ail[0].children.splice ail[1], 0, el
1608                         open_els.unshift el
1609                         tok_state = tok_state_script_data
1610                         original_ins_mode = ins_mode # make sure orig... is defined
1611                         ins_mode = ins_mode_text
1612                         return
1613                 if t.type is TYPE_END_TAG and t.name is 'head'
1614                         open_els.shift() # will be a head element... spec says so
1615                         ins_mode = ins_mode_after_head
1616                         return
1617                 if t.type is TYPE_END_TAG and (t.name is 'body' or t.name is 'html' or t.name is 'br')
1618                         ins_mode_in_head_else t
1619                         return
1620                 if t.type is TYPE_START_TAG and t.name is 'template'
1621                         insert_html_element t
1622                         afe_push_marker()
1623                         flag_frameset_ok = false
1624                         ins_mode = ins_mode_in_template
1625                         template_ins_modes.unshift ins_mode_in_template
1626                         return
1627                 if t.type is TYPE_END_TAG and t.name is 'template'
1628                         if template_tag_is_open()
1629                                 generate_implied_end_tags
1630                                 if open_els[0].name isnt 'template'
1631                                         parse_error()
1632                                 loop
1633                                         el = open_els.shift()
1634                                         if el.name is 'template' and el.namespace is NS_HTML
1635                                                 break
1636                                 clear_afe_to_marker()
1637                                 template_ins_modes.shift()
1638                                 reset_ins_mode()
1639                         else
1640                                 parse_error()
1641                         return
1642                 if (t.type is TYPE_START_TAG and t.name is 'head') or t.type is TYPE_END_TAG
1643                         parse_error()
1644                         return
1645                 ins_mode_in_head_else t
1646                 return
1647
1648         # 8.2.5.4.5 http://www.w3.org/TR/html5/syntax.html#parsing-main-inheadnoscript
1649         ins_mode_in_head_noscript_else = (t) ->
1650                 parse_error()
1651                 open_els.shift()
1652                 ins_mode = ins_mode_in_head
1653                 process_token t
1654                 return
1655         ins_mode_in_head_noscript = (t) ->
1656                 if t.type is TYPE_DOCTYPE
1657                         parse_error()
1658                         return
1659                 if t.type is TYPE_START_TAG and t.name is 'html'
1660                         ins_mode_in_body t
1661                         return
1662                 if t.type is TYPE_END_TAG and t.name is 'noscript'
1663                         open_els.shift()
1664                         ins_mode = ins_mode_in_head
1665                         return
1666                 if is_space_tok(t) or t.type is TYPE_COMMENT or (t.type is TYPE_START_TAG and (t.name is 'basefont' or t.name is 'bgsound' or t.name is 'link' or t.name is 'meta' or t.name is 'noframes' or t.name is 'style'))
1667                         ins_mode_in_head t
1668                         return
1669                 if t.type is TYPE_END_TAG and t.name is 'br'
1670                         ins_mode_in_head_noscript_else t
1671                         return
1672                 if (t.type is TYPE_START_TAG and (t.name is 'head' or t.name is 'noscript')) or t.type is TYPE_END_TAG
1673                         parse_error()
1674                         return
1675                 # Anything else
1676                 ins_mode_in_head_noscript_else t
1677                 return
1678
1679         # 8.2.5.4.6 http://www.w3.org/TR/html5/syntax.html#the-after-head-insertion-mode
1680         ins_mode_after_head_else = (t) ->
1681                 body_tok = new_open_tag 'body'
1682                 insert_html_element body_tok
1683                 ins_mode = ins_mode_in_body
1684                 process_token t
1685                 return
1686         ins_mode_after_head = (t) ->
1687                 if is_space_tok t
1688                         insert_character t
1689                         return
1690                 if t.type is TYPE_COMMENT
1691                         insert_comment t
1692                         return
1693                 if t.type is TYPE_DOCTYPE
1694                         parse_error()
1695                         return
1696                 if t.type is TYPE_START_TAG and t.name is 'html'
1697                         ins_mode_in_body t
1698                         return
1699                 if t.type is TYPE_START_TAG and t.name is 'body'
1700                         insert_html_element t
1701                         flag_frameset_ok = false
1702                         ins_mode = ins_mode_in_body
1703                         return
1704                 if t.type is TYPE_START_TAG and t.name is 'frameset'
1705                         insert_html_element t
1706                         ins_mode = ins_mode_in_frameset
1707                         return
1708                 if t.type is TYPE_START_TAG and (t.name is 'base' or t.name is 'basefont' or t.name is 'bgsound' or t.name is 'link' or t.name is 'meta' or t.name is 'noframes' or t.name is 'script' or t.name is 'style' or t.name is 'template' or t.name is 'title')
1709                         parse_error()
1710                         open_els.unshift head_element_pointer
1711                         ins_mode_in_head t
1712                         for el, i in open_els
1713                                 if el is head_element_pointer
1714                                         open_els.splice i, 1
1715                                         return
1716                         return
1717                 if t.type is TYPE_END_TAG and t.name is 'template'
1718                         ins_mode_in_head t
1719                         return
1720                 if t.type is TYPE_END_TAG and (t.name is 'body' or t.name is 'html' or t.name is 'br')
1721                         ins_mode_after_head_else t
1722                         return
1723                 if (t.type is TYPE_START_TAG and t.name is 'head') or t.type is TYPE_END_TAG
1724                         parse_error()
1725                         return
1726                 # Anything else
1727                 ins_mode_after_head_else t
1728                 return
1729
1730         # 8.2.5.4.7 http://www.w3.org/TR/html5/syntax.html#parsing-main-inbody
1731         in_body_any_other_end_tag = (name) -> # factored out because adoption agency calls it
1732                 node = open_els[0]
1733                 loop
1734                         if node.name is name and node.namespace is NS_HTML
1735                                 generate_implied_end_tags name # arg is exception
1736                                 unless node is open_els[0]
1737                                         parse_error()
1738                                 loop
1739                                         el = open_els.shift()
1740                                         if el is node
1741                                                 return
1742                         if special_elements[node.name] is node.namespace
1743                                 parse_error()
1744                                 return
1745                         for el, i in open_els
1746                                 if node is el
1747                                         node = open_els[i + 1]
1748                                         break
1749                 return
1750         ins_mode_in_body = (t) ->
1751                 if t.type is TYPE_TEXT and t.text is "\u0000"
1752                         parse_error()
1753                         return
1754                 if is_space_tok t
1755                         reconstruct_afe()
1756                         insert_character t
1757                         return
1758                 if t.type is TYPE_TEXT
1759                         reconstruct_afe()
1760                         insert_character t
1761                         flag_frameset_ok = false
1762                         return
1763                 if t.type is TYPE_COMMENT
1764                         insert_comment t
1765                         return
1766                 if t.type is TYPE_DOCTYPE
1767                         parse_error()
1768                         return
1769                 if t.type is TYPE_START_TAG and t.name is 'html'
1770                         parse_error()
1771                         return if template_tag_is_open()
1772                         root_attrs = open_els[open_els.length - 1].attrs
1773                         for a in t.attrs_a
1774                                 root_attrs[a[0]] = a[1] unless root_attrs[a[0]]?
1775                         return
1776
1777                 if (t.type is TYPE_START_TAG and (t.name is 'base' or t.name is 'basefont' or t.name is 'bgsound' or t.name is 'link' or t.name is 'meta' or t.name is 'noframes' or t.name is 'script' or t.name is 'style' or t.name is 'template' or t.name is 'title')) or (t.type is TYPE_END_TAG and t.name is 'template')
1778                         ins_mode_in_head t
1779                         return
1780                 if t.type is TYPE_START_TAG and t.name is 'body'
1781                         parse_error()
1782                         return if open_els.length < 2
1783                         second = open_els[open_els.length - 2]
1784                         return unless second.namespace is NS_HTML
1785                         return unless second.name is 'body'
1786                         return if template_tag_is_open()
1787                         flag_frameset_ok = false
1788                         for a in t.attrs_a
1789                                 second.attrs[a[0]] = a[1] unless second.attrs[a[0]]?
1790                         return
1791                 if t.type is TYPE_START_TAG and t.name is 'frameset'
1792                         parse_error()
1793                         return if open_els.length < 2
1794                         second_i = open_els.length - 2
1795                         second = open_els[second_i]
1796                         return unless second.namespace is NS_HTML
1797                         return unless second.name is 'body'
1798                         if flag_frameset_ok is false
1799                                 return
1800                         if second.parent?
1801                                 for el, i in second.parent.children
1802                                         if el is second
1803                                                 second.parent.children.splice i, 1
1804                                                 break
1805                         open_els.splice second_i, 1
1806                         # pop everything except the "root html element"
1807                         while open_els.length > 1
1808                                 open_els.shift()
1809                         insert_html_element t
1810                         ins_mode = ins_mode_in_frameset
1811                         return
1812                 if t.type is TYPE_EOF
1813                         ok_tags = {
1814                                 dd:NS_HTML, dt:NS_HTML, li:NS_HTML, p:NS_HTML, tbody:NS_HTML,
1815                                 td:NS_HTML, tfoot:NS_HTML, th:NS_HTML, thead:NS_HTML,
1816                                 tr:NS_HTML, body:NS_HTML, html:NS_HTML,
1817                         }
1818                         for el in open_els
1819                                 unless ok_tags[t.name] is el.namespace
1820                                         parse_error()
1821                                         break
1822                         if template_ins_modes.length > 0
1823                                 ins_mode_in_template t
1824                         else
1825                                 stop_parsing()
1826                         return
1827                 if t.type is TYPE_END_TAG and t.name is 'body'
1828                         unless is_in_scope 'body', NS_HTML
1829                                 parse_error()
1830                                 return
1831                         ok_tags = {
1832                                 dd:NS_HTML, dt:NS_HTML, li:NS_HTML, optgroup:NS_HTML,
1833                                 option:NS_HTML, p:NS_HTML, rb:NS_HTML, rp:NS_HTML, rt:NS_HTML,
1834                                 rtc:NS_HTML, tbody:NS_HTML, td:NS_HTML, tfoot:NS_HTML,
1835                                 th:NS_HTML, thead:NS_HTML, tr:NS_HTML, body:NS_HTML,
1836                                 html:NS_HTML
1837                         }
1838                         for el in open_els
1839                                 unless ok_tags[t.name] is el.namespace
1840                                         parse_error()
1841                                         break
1842                         ins_mode = ins_mode_after_body
1843                         return
1844                 if t.type is TYPE_END_TAG and t.name is 'html'
1845                         unless is_in_scope 'body', NS_HTML
1846                                 parse_error()
1847                                 return
1848                         ok_tags = {
1849                                 dd:NS_HTML, dt:NS_HTML, li:NS_HTML, optgroup:NS_HTML,
1850                                 option:NS_HTML, p:NS_HTML, rb:NS_HTML, rp:NS_HTML, rt:NS_HTML,
1851                                 rtc:NS_HTML, tbody:NS_HTML, td:NS_HTML, tfoot:NS_HTML,
1852                                 th:NS_HTML, thead:NS_HTML, tr:NS_HTML, body:NS_HTML,
1853                                 html:NS_HTML
1854                         }
1855                         for el in open_els
1856                                 unless ok_tags[t.name] is el.namespace
1857                                         parse_error()
1858                                         break
1859                         ins_mode = ins_mode_after_body
1860                         process_token t
1861                         return
1862                 if t.type is TYPE_START_TAG and (t.name is 'address' or t.name is 'article' or t.name is 'aside' or t.name is 'blockquote' or t.name is 'center' or t.name is 'details' or t.name is 'dialog' or t.name is 'dir' or t.name is 'div' or t.name is 'dl' or t.name is 'fieldset' or t.name is 'figcaption' or t.name is 'figure' or t.name is 'footer' or t.name is 'header' or t.name is 'hgroup' or t.name is 'main' or t.name is 'nav' or t.name is 'ol' or t.name is 'p' or t.name is 'section' or t.name is 'summary' or t.name is 'ul')
1863                         close_p_if_in_button_scope()
1864                         insert_html_element t
1865                         return
1866                 if t.type is TYPE_START_TAG and h_tags[t.name]?
1867                         close_p_if_in_button_scope()
1868                         if h_tags[open_els[0].name] is open_els[0].namespace
1869                                 parse_error()
1870                                 open_els.shift()
1871                         insert_html_element t
1872                         return
1873                 if t.type is TYPE_START_TAG and (t.name is 'pre' or t.name is 'listing')
1874                         close_p_if_in_button_scope()
1875                         insert_html_element t
1876                         eat_next_token_if_newline()
1877                         flag_frameset_ok = false
1878                         return
1879                 if t.type is TYPE_START_TAG and t.name is 'form'
1880                         unless form_element_pointer is null or template_tag_is_open()
1881                                 parse_error()
1882                                 return
1883                         close_p_if_in_button_scope()
1884                         el = insert_html_element t
1885                         unless template_tag_is_open()
1886                                 form_element_pointer = el
1887                         return
1888                 if t.type is TYPE_START_TAG and t.name is 'li'
1889                         flag_frameset_ok = false
1890                         for node in open_els
1891                                 if node.name is 'li' and node.namespace is NS_HTML
1892                                         generate_implied_end_tags 'li' # arg is exception
1893                                         if open_els[0].name isnt 'li' or open_els[0].namespace isnt NS_HTML
1894                                                 parse_error()
1895                                         loop
1896                                                 el = open_els.shift()
1897                                                 if el.name is 'li' and el.namespace is NS_HTML
1898                                                         break
1899                                         break
1900                                 if el_is_special_not_adp node
1901                                                 break
1902                         close_p_if_in_button_scope()
1903                         insert_html_element t
1904                         return
1905                 if t.type is TYPE_START_TAG and (t.name is 'dd' or t.name is 'dt')
1906                         flag_frameset_ok = false
1907                         for node in open_els
1908                                 if node.name is 'dd' and node.namespace is NS_HTML
1909                                         generate_implied_end_tags 'dd' # arg is exception
1910                                         if open_els[0].name isnt 'dd' or open_els[0].namespace isnt NS_HTML
1911                                                 parse_error()
1912                                         loop
1913                                                 el = open_els.shift()
1914                                                 if el.name is 'dd' and el.namespace is NS_HTML
1915                                                         break
1916                                         break
1917                                 if node.name is 'dt' and node.namespace is NS_HTML
1918                                         generate_implied_end_tags 'dt' # arg is exception
1919                                         if open_els[0].name isnt 'dt' or open_els[0].namespace isnt NS_HTML
1920                                                 parse_error()
1921                                         loop
1922                                                 el = open_els.shift()
1923                                                 if el.name is 'dt' and el.namespace is NS_HTML
1924                                                         break
1925                                         break
1926                                 if el_is_special_not_adp node
1927                                         break
1928                         close_p_if_in_button_scope()
1929                         insert_html_element t
1930                         return
1931                 if t.type is TYPE_START_TAG and t.name is 'plaintext'
1932                         close_p_if_in_button_scope()
1933                         insert_html_element t
1934                         tok_state = tok_state_plaintext
1935                         return
1936                 if t.type is TYPE_START_TAG and t.name is 'button'
1937                         if is_in_scope 'button', NS_HTML
1938                                 parse_error()
1939                                 generate_implied_end_tags()
1940                                 loop
1941                                         el = open_els.shift()
1942                                         if el.name is 'button' and el.namespace is NS_HTML
1943                                                 break
1944                         reconstruct_afe()
1945                         insert_html_element t
1946                         flag_frameset_ok = false
1947                         return
1948                 if t.type is TYPE_END_TAG and (t.name is 'address' or t.name is 'article' or t.name is 'aside' or t.name is 'blockquote' or t.name is 'button' or t.name is 'center' or t.name is 'details' or t.name is 'dialog' or t.name is 'dir' or t.name is 'div' or t.name is 'dl' or t.name is 'fieldset' or t.name is 'figcaption' or t.name is 'figure' or t.name is 'footer' or t.name is 'header' or t.name is 'hgroup' or t.name is 'listing' or t.name is 'main' or t.name is 'nav' or t.name is 'ol' or t.name is 'pre' or t.name is 'section' or t.name is 'summary' or t.name is 'ul')
1949                         unless is_in_scope t.name, NS_HTML
1950                                 parse_error()
1951                                 return
1952                         generate_implied_end_tags()
1953                         unless open_els[0].name is t.name and open_els[0].namespace is NS_HTML
1954                                 parse_error()
1955                         loop
1956                                 el = open_els.shift()
1957                                 if el.name is t.name and el.namespace is NS_HTML
1958                                         return
1959                         return
1960                 if t.type is TYPE_END_TAG and t.name is 'form'
1961                         unless template_tag_is_open()
1962                                 node = form_element_pointer
1963                                 form_element_pointer = null
1964                                 if node is null or not el_is_in_scope node
1965                                         parse_error()
1966                                         return
1967                                 generate_implied_end_tags()
1968                                 if open_els[0] isnt node
1969                                         parse_error()
1970                                 for el, i in open_els
1971                                         if el is node
1972                                                 open_els.splice i, 1
1973                                                 break
1974                         else
1975                                 unless is_in_scope 'form', NS_HTML
1976                                         parse_error()
1977                                         return
1978                                 generate_implied_end_tags()
1979                                 if open_els[0].name isnt 'form' or open_els[0].namespace isnt NS_HTML
1980                                         parse_error()
1981                                 loop
1982                                         el = open_els.shift()
1983                                         if el.name is 'form' and el.namespace is NS_HTML
1984                                                 break
1985                         return
1986                 if t.type is TYPE_END_TAG and t.name is 'p'
1987                         unless is_in_button_scope 'p', NS_HTML
1988                                 parse_error()
1989                                 insert_html_element new_open_tag 'p'
1990                         close_p_element()
1991                         return
1992                 if t.type is TYPE_END_TAG and t.name is 'li'
1993                         unless is_in_li_scope 'li', NS_HTML
1994                                 parse_error()
1995                                 return
1996                         generate_implied_end_tags 'li' # arg is exception
1997                         if open_els[0].name isnt 'li' or open_els[0].namespace isnt NS_HTML
1998                                 parse_error()
1999                         loop
2000                                 el = open_els.shift()
2001                                 if el.name is 'li' and el.namespace is NS_HTML
2002                                         break
2003                         return
2004                 if t.type is TYPE_END_TAG and (t.name is 'dd' or t.name is 'dt')
2005                         unless is_in_scope t.name, NS_HTML
2006                                 parse_error()
2007                                 return
2008                         generate_implied_end_tags t.name # arg is exception
2009                         if open_els[0].name isnt t.name or open_els[0].namespace isnt NS_HTML
2010                                 parse_error()
2011                         loop
2012                                 el = open_els.shift()
2013                                 if el.name is t.name and el.namespace is NS_HTML
2014                                         break
2015                         return
2016                 if t.type is TYPE_END_TAG and h_tags[t.name]?
2017                         h_in_scope = false
2018                         for el in open_els
2019                                 if h_tags[el.name] is el.namespace
2020                                         h_in_scope = true
2021                                         break
2022                                 if standard_scopers[el.name] is el.namespace
2023                                         break
2024                         unless h_in_scope
2025                                 parse_error()
2026                                 return
2027                         generate_implied_end_tags()
2028                         if open_els[0].name isnt t.name or open_els[0].namespace isnt NS_HTML
2029                                 parse_error()
2030                         loop
2031                                 el = open_els.shift()
2032                                 if h_tags[el.name] is el.namespace
2033                                         break
2034                         return
2035                 # deep breath!
2036                 if t.type is TYPE_START_TAG and t.name is 'a'
2037                         # If the list of active formatting elements contains an a element
2038                         # between the end of the list and the last marker on the list (or
2039                         # the start of the list if there is no marker on the list), then
2040                         # this is a parse error; run the adoption agency algorithm for the
2041                         # tag name "a", then remove that element from the list of active
2042                         # formatting elements and the stack of open elements if the
2043                         # adoption agency algorithm didn't already remove it (it might not
2044                         # have if the element is not in table scope).
2045                         found = false
2046                         for el in afe
2047                                 if el.type is TYPE_AFE_MARKER
2048                                         break
2049                                 if el.name is 'a' and el.namespace is NS_HTML
2050                                         found = el
2051                         if found?
2052                                 parse_error()
2053                                 adoption_agency 'a'
2054                                 for el, i in afe
2055                                         if el is found
2056                                                 afe.splice i, 1
2057                                 for el, i in open_els
2058                                         if el is found
2059                                                 open_els.splice i, 1
2060                         reconstruct_afe()
2061                         el = insert_html_element t
2062                         afe_push el
2063                         return
2064                 if t.type is TYPE_START_TAG and (t.name is 'b' or t.name is 'big' or t.name is 'code' or t.name is 'em' or t.name is 'font' or t.name is 'i' or t.name is 's' or t.name is 'small' or t.name is 'strike' or t.name is 'strong' or t.name is 'tt' or t.name is 'u')
2065                         reconstruct_afe()
2066                         el = insert_html_element t
2067                         afe_push el
2068                         return
2069                 if t.type is TYPE_START_TAG and t.name is 'nobr'
2070                         reconstruct_afe()
2071                         if is_in_scope 'nobr', NS_HTML
2072                                 parse_error()
2073                                 adoption_agency 'nobr'
2074                                 reconstruct_afe()
2075                         el = insert_html_element t
2076                         afe_push el
2077                         return
2078                 if t.type is TYPE_END_TAG and (t.name is 'a' or t.name is 'b' or t.name is 'big' or t.name is 'code' or t.name is 'em' or t.name is 'font' or t.name is 'i' or t.name is 'nobr' or t.name is 's' or t.name is 'small' or t.name is 'strike' or t.name is 'strong' or t.name is 'tt' or t.name is 'u')
2079                         adoption_agency t.name
2080                         return
2081                 if t.type is TYPE_START_TAG and (t.name is 'applet' or t.name is 'marquee' or t.name is 'object')
2082                         reconstruct_afe()
2083                         insert_html_element t
2084                         afe_push_marker()
2085                         flag_frameset_ok = false
2086                         return
2087                 if t.type is TYPE_END_TAG and (t.name is 'applet' or t.name is 'marquee' or t.name is 'object')
2088                         unless is_in_scope t.name, NS_HTML
2089                                 parse_error()
2090                                 return
2091                         generate_implied_end_tags()
2092                         if open_els[0].name isnt t.name or open_els[0].namespace isnt NS_HTML
2093                                 parse_error()
2094                         loop
2095                                 el = open_els.shift()
2096                                 if el.name is t.name and el.namespace is NS_HTML
2097                                         break
2098                         clear_afe_to_marker()
2099                         return
2100                 if t.type is TYPE_START_TAG and t.name is 'table'
2101                         unless doc.flag('quirks mode') is QUIRKS_YES
2102                                 close_p_if_in_button_scope() # test
2103                         insert_html_element t
2104                         flag_frameset_ok = false
2105                         ins_mode = ins_mode_in_table
2106                         return
2107                 if t.type is TYPE_END_TAG and t.name is 'br'
2108                         parse_error()
2109                         # W3C: t.type = TYPE_START_TAG
2110                         t = new_open_tag 'br' # WHATWG
2111                         # fall through
2112                 if t.type is TYPE_START_TAG and (t.name is 'area' or t.name is 'br' or t.name is 'embed' or t.name is 'img' or t.name is 'keygen' or t.name is 'wbr')
2113                         reconstruct_afe()
2114                         insert_html_element t
2115                         open_els.shift()
2116                         t.acknowledge_self_closing()
2117                         flag_frameset_ok = false
2118                         return
2119                 if t.type is TYPE_START_TAG and t.name is 'input'
2120                         reconstruct_afe()
2121                         insert_html_element t
2122                         open_els.shift()
2123                         t.acknowledge_self_closing()
2124                         unless is_input_hidden_tok t
2125                                 flag_frameset_ok = false
2126                         return
2127                 if t.type is TYPE_START_TAG and (t.name is 'menuitem' or t.name is 'param' or t.name is 'source' or t.name is 'track')
2128                         # WHATWG adds 'menuitem' for this block
2129                         insert_html_element t
2130                         open_els.shift()
2131                         t.acknowledge_self_closing()
2132                         return
2133                 if t.type is TYPE_START_TAG and t.name is 'hr'
2134                         close_p_if_in_button_scope()
2135                         insert_html_element t
2136                         open_els.shift()
2137                         t.acknowledge_self_closing()
2138                         flag_frameset_ok = false
2139                         return
2140                 if t.type is TYPE_START_TAG and t.name is 'image'
2141                         parse_error()
2142                         t.name = 'img'
2143                         process_token t
2144                         return
2145                 if t.type is TYPE_START_TAG and t.name is 'isindex'
2146                         parse_error()
2147                         if template_tag_is_open() is false and form_element_pointer isnt null
2148                                 return
2149                         t.acknowledge_self_closing()
2150                         flag_frameset_ok = false
2151                         close_p_if_in_button_scope()
2152                         el = insert_html_element new_open_tag 'form'
2153                         unless template_tag_is_open()
2154                                 form_element_pointer = el
2155                         for a in t.attrs_a
2156                                 if a[0] is 'action'
2157                                         el.attrs['action'] = a[1]
2158                                         break
2159                         insert_html_element new_open_tag 'hr'
2160                         open_els.shift()
2161                         reconstruct_afe()
2162                         insert_html_element new_open_tag 'label'
2163                         # note: this is a little out-of-spec-order so we only have to scan t.attrs_a once
2164                         input_el = new_open_tag 'input'
2165                         prompt = null
2166                         for a in t.attrs_a
2167                                 if a[0] is 'prompt'
2168                                         prompt = a[1]
2169                                 if a[0] isnt 'name' and a[0] isnt 'action' and a[0] isnt 'prompt'
2170                                         input_el.attrs_a.push [a[0], a[1]]
2171                         input_el.attrs_a.push ['name', 'isindex']
2172                         # fixfull this next bit is in english... internationalize?
2173                         prompt ?= "This is a searchable index. Enter search keywords: "
2174                         insert_character new_character_token prompt # fixfull split
2175                         # TODO submit typo "balue" in spec
2176                         insert_html_element input_el
2177                         open_els.shift()
2178                         # insert_character '' # you can put chars here if promt attr missing
2179                         open_els.shift()
2180                         insert_html_element new_open_tag 'hr'
2181                         open_els.shift()
2182                         open_els.shift()
2183                         unless template_tag_is_open()
2184                                 form_element_pointer = null
2185                         return
2186                 if t.type is TYPE_START_TAG and t.name is 'textarea'
2187                         insert_html_element t
2188                         eat_next_token_if_newline()
2189                         tok_state = tok_state_rcdata
2190                         original_ins_mode = ins_mode
2191                         flag_frameset_ok = false
2192                         ins_mode = ins_mode_text
2193                         return
2194                 if t.type is TYPE_START_TAG and t.name is 'xmp'
2195                         close_p_if_in_button_scope()
2196                         reconstruct_afe()
2197                         flag_frameset_ok = false
2198                         parse_generic_raw_text t
2199                         return
2200                 if t.type is TYPE_START_TAG and t.name is 'iframe'
2201                         flag_frameset_ok = false
2202                         parse_generic_raw_text t
2203                         return
2204                 if t.type is TYPE_START_TAG and (t.name is 'noembed' or (t.name is 'noscript' and flag_scripting))
2205                         parse_generic_raw_text t
2206                         return
2207                 if t.type is TYPE_START_TAG and t.name is 'select'
2208                         reconstruct_afe()
2209                         insert_html_element t
2210                         flag_frameset_ok = false
2211                         if ins_mode is ins_mode_in_table or ins_mode is ins_mode_in_caption or ins_mode is ins_mode_in_table_body or ins_mode is ins_mode_in_row or ins_mode is ins_mode_in_cell
2212                                 ins_mode = ins_mode_in_select_in_table
2213                         else
2214                                 ins_mode = ins_mode_in_select
2215                         return
2216                 if t.type is TYPE_START_TAG and (t.name is 'optgroup' or t.name is 'option')
2217                         if open_els[0].name is 'option' and open_els[0].namespace is NS_HTML
2218                                 open_els.shift()
2219                         reconstruct_afe()
2220                         insert_html_element t
2221                         return
2222 # this comment block implements the W3C spec
2223 #               if t.type is TYPE_START_TAG and (t.name is 'rb' or t.name is 'rp' or t.name is 'rtc')
2224 #                       if is_in_scope 'ruby', NS_HTML
2225 #                               generate_implied_end_tags()
2226 #                               unless open_els[0].name is 'ruby' and open_els[0].namespace is NS_HTML
2227 #                                       parse_error()
2228 #                       insert_html_element t
2229 #                       return
2230 #               if t.type is TYPE_START_TAG and t.name is 'rt'
2231 #                       if is_in_scope 'ruby', NS_HTML
2232 #                               generate_implied_end_tags 'rtc' # arg is exception
2233 #                               unless (open_els[0].name is 'ruby' or open_els[0].name is 'rtc') and open_els[0].namespace is NS_HTML
2234 #                                       parse_error()
2235 #                       insert_html_element t
2236 #                       return
2237 # below implements the WHATWG spec https://html.spec.whatwg.org/multipage/syntax.html#parsing-main-inbody
2238                 if t.type is TYPE_START_TAG and (t.name is 'rb' or t.name is 'rtc')
2239                         if is_in_scope 'ruby', NS_HTML
2240                                 generate_implied_end_tags()
2241                                 unless open_els[0].name is 'ruby' and open_els[0].namespace is NS_HTML
2242                                         parse_error()
2243                         insert_html_element t
2244                         return
2245                 if t.type is TYPE_START_TAG and (t.name is 'rp' or t.name is 'rt')
2246                         if is_in_scope 'ruby', NS_HTML
2247                                 generate_implied_end_tags 'rtc'
2248                                 unless (open_els[0].name is 'ruby' or open_els[0].name is 'rtc') and open_els[0].namespace is NS_HTML
2249                                         parse_error()
2250                         insert_html_element t
2251                         return
2252 # end WHATWG chunk
2253                 if t.type is TYPE_START_TAG and t.name is 'math'
2254                         reconstruct_afe()
2255                         adjust_mathml_attributes t
2256                         adjust_foreign_attributes t
2257                         insert_foreign_element t, NS_MATHML
2258                         if t.flag 'self-closing'
2259                                 open_els.shift()
2260                                 t.acknowledge_self_closing()
2261                         return
2262                 if t.type is TYPE_START_TAG and t.name is 'svg'
2263                         reconstruct_afe()
2264                         adjust_svg_attributes t
2265                         adjust_foreign_attributes t
2266                         insert_foreign_element t, NS_SVG
2267                         if t.flag 'self-closing'
2268                                 open_els.shift()
2269                                 t.acknowledge_self_closing()
2270                         return
2271                 if t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'frame' or t.name is 'head' or t.name is 'tbody' or t.name is 'td' or t.name is 'tfoot' or t.name is 'th' or t.name is 'thead' or t.name is 'tr')
2272                         parse_error()
2273                         return
2274                 if t.type is TYPE_START_TAG # any other start tag
2275                         reconstruct_afe()
2276                         insert_html_element t
2277                         return
2278                 if t.type is TYPE_END_TAG # any other end tag
2279                         in_body_any_other_end_tag t.name
2280                         return
2281                 return
2282
2283         # 8.2.5.4.8 http://www.w3.org/TR/html5/syntax.html#parsing-main-incdata
2284         ins_mode_text = (t) ->
2285                 if t.type is TYPE_TEXT
2286                         insert_character t
2287                         return
2288                 if t.type is TYPE_EOF
2289                         parse_error()
2290                         if open_els[0].name is 'script' and open_els[0].namespace is NS_HTML
2291                                 open_els[0].flag 'already started', true
2292                         open_els.shift()
2293                         ins_mode = original_ins_mode
2294                         process_token t
2295                         return
2296                 if t.type is TYPE_END_TAG and t.name is 'script'
2297                         open_els.shift()
2298                         ins_mode = original_ins_mode
2299                         # fixfull the spec seems to assume that I'm going to run the script
2300                         # http://www.w3.org/TR/html5/syntax.html#scriptEndTag
2301                         return
2302                 if t.type is TYPE_END_TAG
2303                         open_els.shift()
2304                         ins_mode = original_ins_mode
2305                         return
2306                 return
2307
2308         # the functions below implement the tokenizer stats described here:
2309         # http://www.w3.org/TR/html5/syntax.html#tokenization
2310
2311         # 8.2.5.4.9 http://www.w3.org/TR/html5/syntax.html#parsing-main-intable
2312         ins_mode_in_table_else = (t) ->
2313                 parse_error()
2314                 flag_foster_parenting = true
2315                 ins_mode_in_body t
2316                 flag_foster_parenting = false
2317                 return
2318         ins_mode_in_table = (t) ->
2319                 switch t.type
2320                         when TYPE_TEXT
2321                                 if (open_els[0].name is 'table' or open_els[0].name is 'tbody' or open_els[0].name is 'tfoot' or open_els[0].name is 'thead' or open_els[0].name is 'tr') and open_els[0].namespace is NS_HTML
2322                                         pending_table_character_tokens = []
2323                                         original_ins_mode = ins_mode
2324                                         ins_mode = ins_mode_in_table_text
2325                                         process_token t
2326                                 else
2327                                         ins_mode_in_table_else t
2328                         when TYPE_COMMENT
2329                                 insert_comment t
2330                         when TYPE_DOCTYPE
2331                                 parse_error()
2332                         when TYPE_START_TAG
2333                                 switch t.name
2334                                         when 'caption'
2335                                                 clear_stack_to_table_context()
2336                                                 afe_push_marker()
2337                                                 insert_html_element t
2338                                                 ins_mode = ins_mode_in_caption
2339                                         when 'colgroup'
2340                                                 clear_stack_to_table_context()
2341                                                 insert_html_element t
2342                                                 ins_mode = ins_mode_in_column_group
2343                                         when 'col'
2344                                                 clear_stack_to_table_context()
2345                                                 insert_html_element new_open_tag 'colgroup'
2346                                                 ins_mode = ins_mode_in_column_group
2347                                                 process_token t
2348                                         when 'tbody', 'tfoot', 'thead'
2349                                                 clear_stack_to_table_context()
2350                                                 insert_html_element t
2351                                                 ins_mode = ins_mode_in_table_body
2352                                         when 'td', 'th', 'tr'
2353                                                 clear_stack_to_table_context()
2354                                                 insert_html_element new_open_tag 'tbody'
2355                                                 ins_mode = ins_mode_in_table_body
2356                                                 process_token t
2357                                         when 'table'
2358                                                 parse_error()
2359                                                 if is_in_table_scope 'table', NS_HTML
2360                                                         loop
2361                                                                 el = open_els.shift()
2362                                                                 if el.name is 'table' and el.namespace is NS_HTML
2363                                                                         break
2364                                                         reset_ins_mode()
2365                                                         process_token t
2366                                         when 'style', 'script', 'template'
2367                                                 ins_mode_in_head t
2368                                         when 'input'
2369                                                 unless is_input_hidden_tok t
2370                                                         ins_mode_in_table_else t
2371                                                 else
2372                                                         parse_error()
2373                                                         el = insert_html_element t
2374                                                         open_els.shift()
2375                                                         t.acknowledge_self_closing()
2376                                         when 'form'
2377                                                 parse_error()
2378                                                 if form_element_pointer?
2379                                                         return
2380                                                 if template_tag_is_open()
2381                                                         return
2382                                                 form_element_pointer = insert_html_element t
2383                                                 open_els.shift()
2384                                         else
2385                                                 ins_mode_in_table_else t
2386                         when TYPE_END_TAG
2387                                 switch t.name
2388                                         when 'table'
2389                                                 if is_in_table_scope 'table', NS_HTML
2390                                                         loop
2391                                                                 el = open_els.shift()
2392                                                                 if el.name is 'table' and el.namespace is NS_HTML
2393                                                                         break
2394                                                         reset_ins_mode()
2395                                                 else
2396                                                         parse_error()
2397                                         when 'body', 'caption', 'col', 'colgroup', 'html', 'tbody', 'td', 'tfoot', 'th', 'thead', 'tr'
2398                                                 parse_error()
2399                                         when 'template'
2400                                                 ins_mode_in_head t
2401                                         else
2402                                                 ins_mode_in_table_else t
2403                         when TYPE_EOF
2404                                 ins_mode_in_body t
2405                         else
2406                                 ins_mode_in_table_else t
2407                 return
2408
2409
2410         # 8.2.5.4.10 http://www.w3.org/TR/html5/syntax.html#parsing-main-intabletext
2411         ins_mode_in_table_text = (t) ->
2412                 if t.type is TYPE_TEXT and t.text is "\u0000"
2413                         # from javascript?
2414                         parse_error()
2415                         return
2416                 if t.type is TYPE_TEXT
2417                         pending_table_character_tokens.push t
2418                         return
2419                 # Anything else
2420                 all_space = true
2421                 for old in pending_table_character_tokens
2422                         unless is_space_tok old
2423                                 all_space = false
2424                                 break
2425                 if all_space
2426                         for old in pending_table_character_tokens
2427                                 insert_character old
2428                 else
2429                         for old in pending_table_character_tokens
2430                                 ins_mode_in_table_else old
2431                 pending_table_character_tokens = []
2432                 ins_mode = original_ins_mode
2433                 process_token t
2434                 return
2435
2436         # 8.2.5.4.11 http://www.w3.org/TR/html5/syntax.html#parsing-main-incaption
2437         ins_mode_in_caption = (t) ->
2438                 if t.type is TYPE_END_TAG and t.name is 'caption'
2439                         if is_in_table_scope 'caption', NS_HTML
2440                                 generate_implied_end_tags()
2441                                 if open_els[0].name isnt 'caption'
2442                                         parse_error()
2443                                 loop
2444                                         el = open_els.shift()
2445                                         if el.name is 'caption' and el.namespace is NS_HTML
2446                                                 break
2447                                 clear_afe_to_marker()
2448                                 ins_mode = ins_mode_in_table
2449                         else
2450                                 parse_error()
2451                                 # fragment case
2452                         return
2453                 if (t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'tbody' or t.name is 'td' or t.name is 'tfoot' or t.name is 'th' or t.name is 'thead' or t.name is 'tr')) or t.type is TYPE_END_TAG and t.name is 'table'
2454                         parse_error()
2455                         if is_in_table_scope 'caption', NS_HTML
2456                                 loop
2457                                         el = open_els.shift()
2458                                         if el.name is 'caption' and el.namespace is NS_HTML
2459                                                 break
2460                                 clear_afe_to_marker()
2461                                 ins_mode = ins_mode_in_table
2462                                 process_token t
2463                         # else fragment case
2464                         return
2465                 if t.type is TYPE_END_TAG and (t.name is 'body' or t.name is 'col' or t.name is 'colgroup' or t.name is 'html' or t.name is 'tbody' or t.name is 'td' or t.name is 'tfoot' or t.name is 'th' or t.name is 'thead' or t.name is 'tr')
2466                         parse_error()
2467                         return
2468                 # Anything else
2469                 ins_mode_in_body t
2470                 return
2471
2472         # 8.2.5.4.12 http://www.w3.org/TR/html5/syntax.html#parsing-main-incolgroup
2473         ins_mode_in_column_group = (t) ->
2474                 if is_space_tok t
2475                         insert_character t
2476                         return
2477                 if t.type is TYPE_COMMENT
2478                         insert_comment t
2479                         return
2480                 if t.type is TYPE_DOCTYPE
2481                         parse_error()
2482                         return
2483                 if t.type is TYPE_START_TAG and t.name is 'html'
2484                         ins_mode_in_body t
2485                         return
2486                 if t.type is TYPE_START_TAG and t.name is 'col'
2487                         el = insert_html_element t
2488                         open_els.shift()
2489                         t.acknowledge_self_closing()
2490                         return
2491                 if t.type is TYPE_END_TAG and t.name is 'colgroup'
2492                         if open_els[0].name is 'colgroup' and open_els.namespace is NS_HTML
2493                                 open_els.shift()
2494                                 ins_mode = ins_mode_in_table
2495                         else
2496                                 parse_error()
2497                         return
2498                 if t.type is TYPE_END_TAG and t.name is 'col'
2499                         parse_error()
2500                         return
2501                 if (t.type is TYPE_START_TAG or t.type is TYPE_END_TAG) and t.name is 'template'
2502                         ins_mode_in_head t
2503                         return
2504                 if t.type is TYPE_EOF
2505                         ins_mode_in_body t
2506                         return
2507                 # Anything else
2508                 if open_els[0].name isnt 'colgroup'
2509                         parse_error()
2510                         return
2511                 open_els.shift()
2512                 ins_mode = ins_mode_in_table
2513                 process_token t
2514                 return
2515
2516         # 8.2.5.4.13 http://www.w3.org/TR/html5/syntax.html#parsing-main-intbody
2517         ins_mode_in_table_body = (t) ->
2518                 if t.type is TYPE_START_TAG and t.name is 'tr'
2519                         clear_stack_to_table_body_context()
2520                         insert_html_element t
2521                         ins_mode = ins_mode_in_row
2522                         return
2523                 if t.type is TYPE_START_TAG and (t.name is 'th' or t.name is 'td')
2524                         parse_error()
2525                         clear_stack_to_table_body_context()
2526                         insert_html_element new_open_tag 'tr'
2527                         ins_mode = ins_mode_in_row
2528                         process_token t
2529                         return
2530                 if t.type is TYPE_END_TAG and (t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead')
2531                         unless is_in_table_scope t.name, NS_HTML
2532                                 parse_error()
2533                                 return
2534                         clear_stack_to_table_body_context()
2535                         open_els.shift()
2536                         ins_mode = ins_mode_in_table
2537                         return
2538                 if (t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead')) or (t.type is TYPE_END_TAG and t.name is 'table')
2539                         has = false
2540                         for el in open_els
2541                                 if el.namespace is NS_HTML and (el.name is 'tbody' or el.name is 'tfoot' or el.name is 'thead')
2542                                         has = true
2543                                         break
2544                                 if table_scopers[el.name] is el.namespace
2545                                         break
2546                         if !has
2547                                 parse_error()
2548                                 return
2549                         clear_stack_to_table_body_context()
2550                         open_els.shift()
2551                         ins_mode = ins_mode_in_table
2552                         process_token t
2553                         return
2554                 if t.type is TYPE_END_TAG and (t.name is 'body' or t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'html' or t.name is 'td' or t.name is 'th' or t.name is 'tr')
2555                         parse_error()
2556                         return
2557                 # Anything else
2558                 ins_mode_in_table t
2559                 return
2560
2561         # 8.2.5.4.14 http://www.w3.org/TR/html5/syntax.html#parsing-main-intr
2562         ins_mode_in_row = (t) ->
2563                 if t.type is TYPE_START_TAG and (t.name is 'th' or t.name is 'td')
2564                         clear_stack_to_table_row_context()
2565                         insert_html_element t
2566                         ins_mode = ins_mode_in_cell
2567                         afe_push_marker()
2568                         return
2569                 if t.type is TYPE_END_TAG and t.name is 'tr'
2570                         if is_in_table_scope 'tr', NS_HTML
2571                                 clear_stack_to_table_row_context()
2572                                 open_els.shift()
2573                                 ins_mode = ins_mode_in_table_body
2574                         else
2575                                 parse_error()
2576                         return
2577                 if (t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead' or t.name is 'tr')) or t.type is TYPE_END_TAG and t.name is 'table'
2578                         if is_in_table_scope 'tr', NS_HTML
2579                                 clear_stack_to_table_row_context()
2580                                 open_els.shift()
2581                                 ins_mode = ins_mode_in_table_body
2582                                 process_token t
2583                         else
2584                                 parse_error()
2585                         return
2586                 if t.type is TYPE_END_TAG and (t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead')
2587                         if is_in_table_scope t.name, NS_HTML
2588                                 if is_in_table_scope 'tr', NS_HTML
2589                                         clear_stack_to_table_row_context()
2590                                         open_els.shift()
2591                                         ins_mode = ins_mode_in_table_body
2592                                         process_token t
2593                         else
2594                                 parse_error()
2595                         return
2596                 if t.type is TYPE_END_TAG and (t.name is 'body' or t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'html' or t.name is 'td' or t.name is 'th')
2597                         parse_error()
2598                         return
2599                 # Anything else
2600                 ins_mode_in_table t
2601                 return
2602
2603         # http://www.w3.org/TR/html5/syntax.html#close-the-cell
2604         close_the_cell = ->
2605                 generate_implied_end_tags()
2606                 unless (open_els[0].name is 'td' or open_els[0] is 'th') and open_els[0].namespace is NS_HTML
2607                         parse_error()
2608                 loop
2609                         el = open_els.shift()
2610                         if el.namespace is NS_HTML and (el.name is 'td' or el.name is 'th')
2611                                 break
2612                 clear_afe_to_marker()
2613                 ins_mode = ins_mode_in_row
2614                 return
2615
2616         # 8.2.5.4.15 http://www.w3.org/TR/html5/syntax.html#parsing-main-intd
2617         ins_mode_in_cell = (t) ->
2618                 if t.type is TYPE_END_TAG and (t.name is 'td' or t.name is 'th')
2619                         if is_in_table_scope t.name, NS_HTML
2620                                 generate_implied_end_tags()
2621                                 unless (open_els[0].name is t.name) and open_els[0].namespace is NS_HTML
2622                                         parse_error()
2623                                 loop
2624                                         el = open_els.shift()
2625                                         if el.name is t.name and el.namespace is NS_HTML
2626                                                 break
2627                                 clear_afe_to_marker()
2628                                 ins_mode = ins_mode_in_row
2629                         else
2630                                 parse_error()
2631                         return
2632                 if t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'tbody' or t.name is 'td' or t.name is 'tfoot' or t.name is 'th' or t.name is 'thead' or t.name is 'tr')
2633                         has = false
2634                         for el in open_els
2635                                 if el.namespace is NS_HTML and (el.name is 'td' or el.name is 'th')
2636                                         has = true
2637                                         break
2638                                 if table_scopers[el.name] is el.namespace
2639                                         break
2640                         if !has
2641                                 parse_error()
2642                                 return
2643                         close_the_cell()
2644                         process_token t
2645                         return
2646                 if t.type is TYPE_END_TAG and (t.name is 'body' or t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'html')
2647                         parse_error()
2648                         return
2649                 if t.type is TYPE_END_TAG and (t.name is 'table' or t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead' or t.name is 'tr')
2650                         if is_in_table_scope t.name, NS_HTML
2651                                 close_the_cell()
2652                                 process_token t
2653                         else
2654                                 parse_error()
2655                         return
2656                 # Anything Else
2657                 ins_mode_in_body t
2658                 return
2659
2660         # 8.2.5.4.16 http://www.w3.org/TR/html5/syntax.html#parsing-main-inselect
2661         ins_mode_in_select = (t) ->
2662                 if t.type is TYPE_TEXT and t.text is "\u0000"
2663                         parse_error()
2664                         return
2665                 if t.type is TYPE_TEXT
2666                         insert_character t
2667                         return
2668                 if t.type is TYPE_COMMENT
2669                         insert_comment t
2670                         return
2671                 if t.type is TYPE_DOCTYPE
2672                         parse_error()
2673                         return
2674                 if t.type is TYPE_START_TAG and t.name is 'html'
2675                         ins_mode_in_body t
2676                         return
2677                 if t.type is TYPE_START_TAG and t.name is 'option'
2678                         if open_els[0].name is 'option' and open_els[0].namespace is NS_HTML
2679                                 open_els.shift()
2680                         insert_html_element t
2681                         return
2682                 if t.type is TYPE_START_TAG and t.name is 'optgroup'
2683                         if open_els[0].name is 'option' and open_els[0].namespace is NS_HTML
2684                                 open_els.shift()
2685                         if open_els[0].name is 'optgroup' and open_els[0].namespace is NS_HTML
2686                                 open_els.shift()
2687                         insert_html_element t
2688                         return
2689                 if t.type is TYPE_END_TAG and t.name is 'optgroup'
2690                         if open_els[0].name is 'option' and open_els[0].namespace is NS_HTML
2691                                 if open_els[1].name is 'optgroup' and open_els[0].namespace is NS_HTML
2692                                         open_els.shift()
2693                         if open_els[0].name is 'optgroup' and open_els[0].namespace is NS_HTML
2694                                 open_els.shift()
2695                         else
2696                                 parse_error()
2697                         return
2698                 if t.type is TYPE_END_TAG and t.name is 'option'
2699                         if open_els[0].name is 'option' and open_els[0].namespace is NS_HTML
2700                                 open_els.shift()
2701                         else
2702                                 parse_error()
2703                         return
2704                 if t.type is TYPE_END_TAG and t.name is 'select'
2705                         if is_in_select_scope 'select', NS_HTML
2706                                 loop
2707                                         el = open_els.shift()
2708                                         if el.name is 'select' and el.namespace is NS_HTML
2709                                                 break
2710                                 reset_ins_mode()
2711                         else
2712                                 parse_error()
2713                         return
2714                 if t.type is TYPE_START_TAG and t.name is 'select'
2715                         parse_error()
2716                         loop
2717                                 el = open_els.shift()
2718                                 if el.name is 'select' and el.namespace is NS_HTML
2719                                         break
2720                         reset_ins_mode()
2721                         # spec says that this is the same as </select> but it doesn't say
2722                         # to check scope first
2723                         return
2724                 if t.type is TYPE_START_TAG and (t.name is 'input' or t.name is 'keygen' or t.name is 'textarea')
2725                         parse_error()
2726                         unless is_in_select_scope 'select', NS_HTML
2727                                 return
2728                         loop
2729                                 el = open_els.shift()
2730                                 if el.name is 'select' and el.namespace is NS_HTML
2731                                         break
2732                         reset_ins_mode()
2733                         process_token t
2734                         return
2735                 if t.type is TYPE_START_TAG and (t.name is 'script' or t.name is 'template')
2736                         ins_mode_in_head t
2737                         return
2738                 if t.type is TYPE_EOF
2739                         ins_mode_in_body t
2740                         return
2741                 # Anything else
2742                 parse_error()
2743                 return
2744
2745         # 8.2.5.4.17 http://www.w3.org/TR/html5/syntax.html#parsing-main-inselectintable
2746         ins_mode_in_select_in_table = (t) ->
2747                 if t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'table' or t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead' or t.name is 'tr' or t.name is 'td' or t.name is 'th')
2748                         parse_error()
2749                         loop
2750                                 el = open_els.shift()
2751                                 if el.name is 'select' and el.namespace is NS_HTML
2752                                         break
2753                         reset_ins_mode()
2754                         process_token t
2755                         return
2756                 if t.type is TYPE_END_TAG and (t.name is 'caption' or t.name is 'table' or t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead' or t.name is 'tr' or t.name is 'td' or t.name is 'th')
2757                         parse_error()
2758                         unless is_in_table_scope t.name, NS_HTML
2759                                 return
2760                         loop
2761                                 el = open_els.shift()
2762                                 if el.name is 'select' and el.namespace is NS_HTML
2763                                         break
2764                         reset_ins_mode()
2765                         process_token t
2766                         return
2767                 # Anything else
2768                 ins_mode_in_select t
2769                 return
2770
2771         # 8.2.5.4.18 http://www.w3.org/TR/html5/syntax.html#parsing-main-intemplate
2772         ins_mode_in_template = (t) ->
2773                 if t.type is TYPE_TEXT or t.type is TYPE_COMMENT or t.type is TYPE_DOCTYPE
2774                         ins_mode_in_body t
2775                         return
2776                 if (t.type is TYPE_START_TAG and (t.name is 'base' or t.name is 'basefont' or t.name is 'bgsound' or t.name is 'link' or t.name is 'meta' or t.name is 'noframes' or t.name is 'script' or t.name is 'style' or t.name is 'template' or t.name is 'title')) or (t.type is TYPE_END_TAG and t.name is 'template')
2777                         ins_mode_in_head t
2778                         return
2779                 if t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'colgroup' or t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead')
2780                         template_ins_modes.shift()
2781                         template_ins_modes.unshift ins_mode_in_table
2782                         ins_mode = ins_mode_in_table
2783                         process_token t
2784                         return
2785                 if t.type is TYPE_START_TAG and t.name is 'col'
2786                         template_ins_modes.shift()
2787                         template_ins_modes.unshift ins_mode_in_column_group
2788                         ins_mode = ins_mode_in_column_group
2789                         process_token t
2790                         return
2791                 if t.type is TYPE_START_TAG and t.name is 'tr'
2792                         template_ins_modes.shift()
2793                         template_ins_modes.unshift ins_mode_in_table_body
2794                         ins_mode = ins_mode_in_table_body
2795                         process_token t
2796                         return
2797                 if t.type is TYPE_START_TAG and (t.name is 'td' or t.name is 'th')
2798                         template_ins_modes.shift()
2799                         template_ins_modes.unshift ins_mode_in_row
2800                         ins_mode = ins_mode_in_row
2801                         process_token t
2802                         return
2803                 if t.type is TYPE_START_TAG
2804                         template_ins_modes.shift()
2805                         template_ins_modes.unshift ins_mode_in_body
2806                         ins_mode = ins_mode_in_body
2807                         process_token t
2808                         return
2809                 if t.type is TYPE_END_TAG
2810                         parse_error()
2811                         return
2812                 if t.type is TYPE_EOF
2813                         unless template_tag_is_open()
2814                                 stop_parsing()
2815                                 return
2816                         parse_error()
2817                         loop
2818                                 el = open_els.shift()
2819                                 if el.name is 'template' and el.namespace is NS_HTML
2820                                         break
2821                         clear_afe_to_marker()
2822                         template_ins_modes.shift()
2823                         reset_ins_mode()
2824                         process_token t
2825                 return
2826
2827         # 8.2.5.4.19 http://www.w3.org/TR/html5/syntax.html#parsing-main-afterbody
2828         ins_mode_after_body = (t) ->
2829                 if is_space_tok t
2830                         ins_mode_in_body t
2831                         return
2832                 if t.type is TYPE_COMMENT
2833                         first = open_els[open_els.length - 1]
2834                         insert_comment t, [first, first.children.length]
2835                         return
2836                 if t.type is TYPE_DOCTYPE
2837                         parse_error()
2838                         return
2839                 if t.type is TYPE_START_TAG and t.name is 'html'
2840                         ins_mode_in_body t
2841                         return
2842                 if t.type is TYPE_END_TAG and t.name is 'html'
2843                         if flag_fragment_parsing
2844                                 parse_error()
2845                                 return
2846                         ins_mode = ins_mode_after_after_body
2847                         return
2848                 if t.type is TYPE_EOF
2849                         stop_parsing()
2850                         return
2851                 # Anything ELse
2852                 parse_error()
2853                 ins_mode = ins_mode_in_body
2854                 process_token t
2855                 return
2856
2857         # 8.2.5.4.20 http://www.w3.org/TR/html5/syntax.html#parsing-main-inframeset
2858         ins_mode_in_frameset = (t) ->
2859                 if is_space_tok t
2860                         insert_character t
2861                         return
2862                 if t.type is TYPE_COMMENT
2863                         insert_comment t
2864                         return
2865                 if t.type is TYPE_DOCTYPE
2866                         parse_error()
2867                         return
2868                 if t.type is TYPE_START_TAG and t.name is 'html'
2869                         ins_mode_in_body t
2870                         return
2871                 if t.type is TYPE_START_TAG and t.name is 'frameset'
2872                         insert_html_element t
2873                         return
2874                 if t.type is TYPE_END_TAG and t.name is 'frameset'
2875                         if open_els.length is 1
2876                                 parse_error()
2877                                 return # fragment case
2878                         open_els.shift()
2879                         if flag_fragment_parsing is false and open_els[0].name isnt 'frameset'
2880                                 ins_mode = ins_mode_after_frameset
2881                         return
2882                 if t.type is TYPE_START_TAG and t.name is 'frame'
2883                         insert_html_element t
2884                         open_els.shift()
2885                         t.acknowledge_self_closing()
2886                         return
2887                 if t.type is TYPE_START_TAG and t.name is 'noframes'
2888                         ins_mode_in_head t
2889                         return
2890                 if t.type is TYPE_EOF
2891                         if open_els.length isnt 1
2892                                 parse_error()
2893                         stop_parsing()
2894                         return
2895                 # Anything else
2896                 parse_error()
2897                 return
2898
2899         # 8.2.5.4.21 http://www.w3.org/TR/html5/syntax.html#parsing-main-afterframeset
2900         ins_mode_after_frameset = (t) ->
2901                 if is_space_tok t
2902                         insert_character t
2903                         return
2904                 if t.type is TYPE_COMMENT
2905                         insert_comment t
2906                         return
2907                 if t.type is TYPE_DOCTYPE
2908                         parse_error()
2909                         return
2910                 if t.type is TYPE_START_TAG and t.name is 'html'
2911                         ins_mode_in_body t
2912                         return
2913                 if t.type is TYPE_END_TAG and t.name is 'html'
2914                         ins_mode = ins_mode_after_after_frameset
2915                         return
2916                 if t.type is TYPE_START_TAG and t.name is 'noframes'
2917                         ins_mode_in_head t
2918                         return
2919                 if t.type is TYPE_EOF
2920                         stop_parsing()
2921                         return
2922                 # Anything else
2923                 parse_error()
2924                 return
2925
2926         # 8.2.5.4.22 http://www.w3.org/TR/html5/syntax.html#the-after-after-body-insertion-mode
2927         ins_mode_after_after_body = (t) ->
2928                 if t.type is TYPE_COMMENT
2929                         insert_comment t, [doc, doc.children.length]
2930                         return
2931                 if t.type is TYPE_DOCTYPE or is_space_tok(t) or (t.type is TYPE_START_TAG and t.name is 'html')
2932                         ins_mode_in_body t
2933                         return
2934                 if t.type is TYPE_EOF
2935                         stop_parsing()
2936                         return
2937                 # Anything else
2938                 parse_error()
2939                 ins_mode = ins_mode_in_body
2940                 process_token t
2941                 return
2942
2943         # 8.2.5.4.23 http://www.w3.org/TR/html5/syntax.html#the-after-after-frameset-insertion-mode
2944         ins_mode_after_after_frameset = (t) ->
2945                 if t.type is TYPE_COMMENT
2946                         insert_comment t, [doc, doc.children.length]
2947                         return
2948                 if t.type is TYPE_DOCTYPE or is_space_tok(t) or (t.type is TYPE_START_TAG and t.name is 'html')
2949                         ins_mode_in_body t
2950                         return
2951                 if t.type is TYPE_EOF
2952                         stop_parsing()
2953                         return
2954                 if t.type is TYPE_START_TAG and t.name is 'noframes'
2955                         ins_mode_in_head t
2956                         return
2957                 # Anything else
2958                 parse_error()
2959                 return
2960
2961         # 8.2.5.5 http://www.w3.org/TR/html5/syntax.html#parsing-main-inforeign
2962         has_color_face_or_size = (t) ->
2963                 for a in t.attrs_a
2964                         if a[0] is 'color' or a[0] is 'face' or a[0] is 'size'
2965                                 return true
2966                 return false
2967         in_foreign_content_end_script = ->
2968                 open_els.shift()
2969                 # fixfull
2970                 return
2971         in_foreign_content_other_start = (t) ->
2972                 acn = adjusted_current_node()
2973                 if acn.namespace is NS_MATHML
2974                         adjust_mathml_attributes t
2975                 if acn.namespace is NS_SVG and svg_name_fixes[t.name]?
2976                         t.name = svg_name_fixes[t.name]
2977                 if acn.namespace is NS_SVG
2978                         adjust_svg_attributes t
2979                 adjust_foreign_attributes t
2980                 insert_foreign_element t, acn.namespace
2981                 if t.flag 'self-closing'
2982                         if t.name is 'script'
2983                                 t.acknowledge_self_closing()
2984                                 in_foreign_content_end_script()
2985                                 # fixfull
2986                         else
2987                                 open_els.shift()
2988                                 t.acknowledge_self_closing()
2989                 return
2990         in_foreign_content = (t) ->
2991                 if t.type is TYPE_TEXT and t.text is "\u0000"
2992                         parse_error()
2993                         insert_character new_character_token "\ufffd"
2994                         return
2995                 if is_space_tok t
2996                         insert_character t
2997                         return
2998                 if t.type is TYPE_TEXT
2999                         flag_frameset_ok = false
3000                         insert_character t
3001                         return
3002                 if t.type is TYPE_COMMENT
3003                         insert_comment t
3004                         return
3005                 if t.type is TYPE_DOCTYPE
3006                         parse_error()
3007                         return
3008                 if t.type is TYPE_START_TAG and (t.name is 'b' or t.name is 'big' or t.name is 'blockquote' or t.name is 'body' or t.name is 'br' or t.name is 'center' or t.name is 'code' or t.name is 'dd' or t.name is 'div' or t.name is 'dl' or t.name is 'dt' or t.name is 'em' or t.name is 'embed' or t.name is 'h1' or t.name is 'h2' or t.name is 'h3' or t.name is 'h4' or t.name is 'h5' or t.name is 'h6' or t.name is 'head' or t.name is 'hr' or t.name is 'i' or t.name is 'img' or t.name is 'li' or t.name is 'listing' or t.name is 'main' or t.name is 'meta' or t.name is 'nobr' or t.name is 'ol' or t.name is 'p' or t.name is 'pre' or t.name is 'ruby' or t.name is 's' or t.name is 'small' or t.name is 'span' or t.name is 'strong' or t.name is 'strike' or t.name is 'sub' or t.name is 'sup' or t.name is 'table' or t.name is 'tt' or t.name is 'u' or t.name is 'ul' or t.name is 'var' or (t.name is 'font' and has_color_face_or_size(t)))
3009                         parse_error()
3010                         if flag_fragment_parsing
3011                                 in_foreign_content_other_start t
3012                                 return
3013                         loop # is this safe?
3014                                 open_els.shift()
3015                                 if is_mathml_text_integration_point(open_els[0]) or is_html_integration(open_els[0]) or open_els[0].namespace is NS_HTML
3016                                         break
3017                         process_token t
3018                         return
3019                 if t.type is TYPE_START_TAG
3020                         in_foreign_content_other_start t
3021                         return
3022                 if t.type is TYPE_END_TAG and t.name is 'script' and open_els[0].name is 'script' and open_els[0].namespace is NS_SVG
3023                         in_foreign_content_end_script()
3024                         return
3025                 if t.type is TYPE_END_TAG
3026                         i = 0
3027                         node = open_els[i]
3028                         if node.name.toLowerCase() isnt t.name
3029                                 parse_error()
3030                         loop
3031                                 if node is open_els[open_els.length - 1]
3032                                         return
3033                                 if node.name.toLowerCase() is t.name
3034                                         loop
3035                                                 el = open_els.shift()
3036                                                 if el is node
3037                                                         return
3038                                 i += 1
3039                                 node = open_els[i]
3040                                 if node.namespace is NS_HTML
3041                                         break
3042                         ins_mode t # explicitly call HTML insertion mode
3043                 return
3044
3045
3046         # 8.2.4.1 http://www.w3.org/TR/html5/syntax.html#data-state
3047         tok_state_data = ->
3048                 switch c = txt.charAt(cur++)
3049                         when '&'
3050                                 return new_text_node parse_character_reference()
3051                         when '<'
3052                                 tok_state = tok_state_tag_open
3053                         when "\u0000"
3054                                 parse_error()
3055                                 return new_text_node c
3056                         when '' # EOF
3057                                 return new_eof_token()
3058                         else
3059                                 return new_text_node c
3060                 return null
3061
3062         # 8.2.4.2 http://www.w3.org/TR/html5/syntax.html#character-reference-in-data-state
3063         # not needed: tok_state_character_reference_in_data = ->
3064         # just call parse_character_reference()
3065
3066         # 8.2.4.3 http://www.w3.org/TR/html5/syntax.html#rcdata-state
3067         tok_state_rcdata = ->
3068                 switch c = txt.charAt(cur++)
3069                         when '&'
3070                                 return new_text_node parse_character_reference()
3071                         when '<'
3072                                 tok_state = tok_state_rcdata_less_than_sign
3073                         when "\u0000"
3074                                 parse_error()
3075                                 return new_character_token "\ufffd"
3076                         when '' # EOF
3077                                 return new_eof_token()
3078                         else
3079                                 return new_character_token c
3080                 return null
3081
3082         # 8.2.4.4 http://www.w3.org/TR/html5/syntax.html#character-reference-in-rcdata-state
3083         # not needed: tok_state_character_reference_in_rcdata = ->
3084         # just call parse_character_reference()
3085
3086         # 8.2.4.5 http://www.w3.org/TR/html5/syntax.html#rawtext-state
3087         tok_state_rawtext = ->
3088                 switch c = txt.charAt(cur++)
3089                         when '<'
3090                                 tok_state = tok_state_rawtext_less_than_sign
3091                         when "\u0000"
3092                                 parse_error()
3093                                 return new_character_token "\ufffd"
3094                         when '' # EOF
3095                                 return new_eof_token()
3096                         else
3097                                 return new_character_token c
3098                 return null
3099
3100         # 8.2.4.6 http://www.w3.org/TR/html5/syntax.html#script-data-state
3101         tok_state_script_data = ->
3102                 switch c = txt.charAt(cur++)
3103                         when '<'
3104                                 tok_state = tok_state_script_data_less_than_sign
3105                         when "\u0000"
3106                                 parse_error()
3107                                 return new_character_token "\ufffd"
3108                         when '' # EOF
3109                                 return new_eof_token()
3110                         else
3111                                 return new_character_token c
3112                 return null
3113
3114         # 8.2.4.7 http://www.w3.org/TR/html5/syntax.html#plaintext-state
3115         tok_state_plaintext = ->
3116                 switch c = txt.charAt(cur++)
3117                         when "\u0000"
3118                                 parse_error()
3119                                 return new_character_token "\ufffd"
3120                         when '' # EOF
3121                                 return new_eof_token()
3122                         else
3123                                 return new_character_token c
3124                 return null
3125
3126
3127         # 8.2.4.8 http://www.w3.org/TR/html5/syntax.html#tag-open-state
3128         tok_state_tag_open = ->
3129                 c = txt.charAt(cur++)
3130                 if c is '!'
3131                         tok_state = tok_state_markup_declaration_open
3132                         return
3133                 if c is '/'
3134                         tok_state = tok_state_end_tag_open
3135                         return
3136                 if is_uc_alpha(c)
3137                         tok_cur_tag = new_open_tag c.toLowerCase()
3138                         tok_state = tok_state_tag_name
3139                         return
3140                 if is_lc_alpha(c)
3141                         tok_cur_tag = new_open_tag c
3142                         tok_state = tok_state_tag_name
3143                         return
3144                 if c is '?'
3145                         parse_error()
3146                         tok_cur_tag = new_comment_token '?' # FIXME right?
3147                         tok_state = tok_state_bogus_comment
3148                         return
3149                 # Anything else
3150                 parse_error()
3151                 tok_state = tok_state_data
3152                 cur -= 1 # we didn't parse/handle the char after <
3153                 return new_text_node '<'
3154
3155         # 8.2.4.9 http://www.w3.org/TR/html5/syntax.html#end-tag-open-state
3156         tok_state_end_tag_open = ->
3157                 c = txt.charAt(cur++)
3158                 if is_uc_alpha(c)
3159                         tok_cur_tag = new_end_tag c.toLowerCase()
3160                         tok_state = tok_state_tag_name
3161                         return
3162                 if is_lc_alpha(c)
3163                         tok_cur_tag = new_end_tag c
3164                         tok_state = tok_state_tag_name
3165                         return
3166                 if c is '>'
3167                         parse_error()
3168                         tok_state = tok_state_data
3169                         return
3170                 if c is '' # EOF
3171                         parse_error()
3172                         tok_state = tok_state_data
3173                         return new_text_node '</'
3174                 # Anything else
3175                 parse_error()
3176                 tok_cur_tag = new_comment_token c
3177                 tok_state = tok_state_bogus_comment
3178                 return null
3179
3180         # 8.2.4.10 http://www.w3.org/TR/html5/syntax.html#tag-name-state
3181         tok_state_tag_name = ->
3182                 switch c = txt.charAt(cur++)
3183                         when "\t", "\n", "\u000c", ' '
3184                                 tok_state = tok_state_before_attribute_name
3185                         when '/'
3186                                 tok_state = tok_state_self_closing_start_tag
3187                         when '>'
3188                                 tok_state = tok_state_data
3189                                 tmp = tok_cur_tag
3190                                 tok_cur_tag = null
3191                                 return tmp
3192                         when "\u0000"
3193                                 parse_error()
3194                                 tok_cur_tag.name += "\ufffd"
3195                         when '' # EOF
3196                                 parse_error()
3197                                 tok_state = tok_state_data
3198                         else
3199                                 if is_uc_alpha(c)
3200                                         tok_cur_tag.name += c.toLowerCase()
3201                                 else
3202                                         tok_cur_tag.name += c
3203                 return null
3204
3205         # 8.2.4.11 http://www.w3.org/TR/html5/syntax.html#rcdata-less-than-sign-state
3206         tok_state_rcdata_less_than_sign = ->
3207                 c = txt.charAt(cur++)
3208                 if c is '/'
3209                         temporary_buffer = ''
3210                         tok_state = tok_state_rcdata_end_tag_open
3211                         return null
3212                 # Anything else
3213                 tok_state = tok_state_rcdata
3214                 cur -= 1 # reconsume the input character
3215                 return new_character_token '<'
3216
3217         # 8.2.4.12 http://www.w3.org/TR/html5/syntax.html#rcdata-end-tag-open-state
3218         tok_state_rcdata_end_tag_open = ->
3219                 c = txt.charAt(cur++)
3220                 if is_uc_alpha(c)
3221                         tok_cur_tag = new_end_tag c.toLowerCase()
3222                         temporary_buffer += c
3223                         tok_state = tok_state_rcdata_end_tag_name
3224                         return null
3225                 if is_lc_alpha(c)
3226                         tok_cur_tag = new_end_tag c
3227                         temporary_buffer += c
3228                         tok_state = tok_state_rcdata_end_tag_name
3229                         return null
3230                 # Anything else
3231                 tok_state = tok_state_rcdata
3232                 cur -= 1 # reconsume the input character
3233                 return new_character_token "</" # fixfull separate these
3234
3235         # http://www.w3.org/TR/html5/syntax.html#appropriate-end-tag-token
3236         is_appropriate_end_tag = (t) ->
3237                 # fixfull: this assumes that open_els[0].name is "the tag name of the last
3238                 # start tag to have been emitted from this tokenizer"
3239                 return t.type is TYPE_END_TAG and t.name is open_els[0].name
3240
3241         # 8.2.4.13 http://www.w3.org/TR/html5/syntax.html#rcdata-end-tag-name-state
3242         tok_state_rcdata_end_tag_name = ->
3243                 c = txt.charAt(cur++)
3244                 if c is "\t" or c is "\n" or c is "\u000c" or c is ' '
3245                         if is_appropriate_end_tag tok_cur_tag
3246                                 tok_state = tok_state_before_attribute_name
3247                                 return
3248                         # else fall through to "Anything else"
3249                 if c is '/'
3250                         if is_appropriate_end_tag tok_cur_tag
3251                                 tok_state = tok_state_self_closing_start_tag # FIXME spec typo?
3252                                 return
3253                         # else fall through to "Anything else"
3254                 if c is '>'
3255                         if is_appropriate_end_tag tok_cur_tag
3256                                 tok_state = tok_state_data
3257                                 return tok_cur_tag
3258                         # else fall through to "Anything else"
3259                 if is_uc_alpha(c)
3260                         tok_cur_tag.name += c.toLowerCase()
3261                         temporary_buffer += c
3262                         return null
3263                 if is_lc_alpha(c)
3264                         tok_cur_tag.name += c
3265                         temporary_buffer += c
3266                         return null
3267                 # Anything else
3268                 tok_state = tok_state_rcdata
3269                 cur -= 1 # reconsume the input character
3270                 return new_character_token '</' + temporary_buffer # fixfull separate these
3271
3272         # 8.2.4.14 http://www.w3.org/TR/html5/syntax.html#rawtext-less-than-sign-state
3273         tok_state_rawtext_less_than_sign = ->
3274                 c = txt.charAt(cur++)
3275                 if c is '/'
3276                         temporary_buffer = ''
3277                         tok_state = tok_state_rawtext_end_tag_open
3278                         return null
3279                 # Anything else
3280                 tok_state = tok_state_rawtext
3281                 cur -= 1 # reconsume the input character
3282                 return new_character_token '<'
3283
3284         # 8.2.4.15 http://www.w3.org/TR/html5/syntax.html#rawtext-end-tag-open-state
3285         tok_state_rawtext_end_tag_open = ->
3286                 c = txt.charAt(cur++)
3287                 if is_uc_alpha(c)
3288                         tok_cur_tag = new_end_tag c.toLowerCase()
3289                         temporary_buffer += c
3290                         tok_state = tok_state_rawtext_end_tag_name
3291                         return null
3292                 if is_lc_alpha(c)
3293                         tok_cur_tag = new_end_tag c
3294                         temporary_buffer += c
3295                         tok_state = tok_state_rawtext_end_tag_name
3296                         return null
3297                 # Anything else
3298                 tok_state = tok_state_rawtext
3299                 cur -= 1 # reconsume the input character
3300                 return new_character_token "</" # fixfull separate these
3301
3302         # 8.2.4.16 http://www.w3.org/TR/html5/syntax.html#rawtext-end-tag-name-state
3303         tok_state_rawtext_end_tag_name = ->
3304                 c = txt.charAt(cur++)
3305                 if c is "\t" or c is "\n" or c is "\u000c" or c is ' '
3306                         if is_appropriate_end_tag tok_cur_tag
3307                                 tok_state = tok_state_before_attribute_name
3308                                 return
3309                         # else fall through to "Anything else"
3310                 if c is '/'
3311                         if is_appropriate_end_tag tok_cur_tag
3312                                 tok_state = tok_state_self_closing_start_tag
3313                                 return
3314                         # else fall through to "Anything else"
3315                 if c is '>'
3316                         if is_appropriate_end_tag tok_cur_tag
3317                                 tok_state = tok_state_data
3318                                 return tok_cur_tag
3319                         # else fall through to "Anything else"
3320                 if is_uc_alpha(c)
3321                         tok_cur_tag.name += c.toLowerCase()
3322                         temporary_buffer += c
3323                         return null
3324                 if is_lc_alpha(c)
3325                         tok_cur_tag.name += c
3326                         temporary_buffer += c
3327                         return null
3328                 # Anything else
3329                 tok_state = tok_state_rawtext
3330                 cur -= 1 # reconsume the input character
3331                 return new_character_token '</' + temporary_buffer # fixfull separate these
3332
3333         # 8.2.4.17 http://www.w3.org/TR/html5/syntax.html#script-data-less-than-sign-state
3334         tok_state_script_data_less_than_sign = ->
3335                 c = txt.charAt(cur++)
3336                 if c is '/'
3337                         temporary_buffer = ''
3338                         tok_state = tok_state_script_data_end_tag_open
3339                         return
3340                 if c is '!'
3341                         tok_state = tok_state_script_data_escape_start
3342                         return new_character_token '<!' # fixfull split
3343                 # Anything else
3344                 tok_state = tok_state_script_data
3345                 cur -= 1 # Reconsume
3346                 return new_character_token '<'
3347
3348         # 8.2.4.18 http://www.w3.org/TR/html5/syntax.html#script-data-end-tag-open-state
3349         tok_state_script_data_end_tag_open = ->
3350                 c = txt.charAt(cur++)
3351                 if is_uc_alpha(c)
3352                         tok_cur_tag = new_end_tag c.toLowerCase()
3353                         temporary_buffer += c
3354                         tok_state = tok_state_script_data_end_tag_name
3355                         return
3356                 if is_lc_alpha(c)
3357                         tok_cur_tag = new_end_tag c
3358                         temporary_buffer += c
3359                         tok_state = tok_state_script_data_end_tag_name
3360                         return
3361                 # Anything else
3362                 tok_state = tok_state_script_data
3363                 cur -= 1 # Reconsume
3364                 return new_character_token '</'
3365
3366         # 8.2.4.19 http://www.w3.org/TR/html5/syntax.html#script-data-end-tag-open-state
3367         tok_state_script_data_end_tag_name = ->
3368                 c = txt.charAt(cur++)
3369                 if c is "\t" or c is "\n" or c is "\u000c" or c is ' '
3370                         if is_appropriate_end_tag tok_cur_tag
3371                                 tok_state = tok_state_before_attribute_name
3372                                 return
3373                         # fall through
3374                 if c is '/'
3375                         if is_appropriate_end_tag tok_cur_tag
3376                                 tok_state = tok_state_self_closing_start_tag
3377                                 return
3378                         # fall through
3379                 if c is '>'
3380                         if is_appropriate_end_tag tok_cur_tag
3381                                 tok_state = tok_state_data
3382                                 return tok_cur_tag
3383                         # fall through
3384                 if is_uc_alpha(c)
3385                         tok_cur_tag.name += c.toLowerCase()
3386                         temporary_buffer += c
3387                         return
3388                 if is_lc_alpha(c)
3389                         tok_cur_tag.name += c
3390                         temporary_buffer += c
3391                         return
3392                 # Anything else
3393                 tok_state = tok_state_script_data
3394                 cur -= 1 # Reconsume
3395                 return new_character_token "</#{temporary_buffer}" # fixfull split
3396
3397         # 8.2.4.20 http://www.w3.org/TR/html5/syntax.html#script-data-escape-start-state
3398         tok_state_script_data_escape_start = ->
3399                 c = txt.charAt(cur++)
3400                 if c is '-'
3401                         tok_state = tok_state_script_data_escape_start_dash
3402                         return new_character_token '-'
3403                 # Anything else
3404                 tok_state = tok_state_script_data
3405                 cur -= 1 # Reconsume
3406                 return
3407
3408         # 8.2.4.21 http://www.w3.org/TR/html5/syntax.html#script-data-escape-start-dash-state
3409         tok_state_script_data_escape_start_dash = ->
3410                 c = txt.charAt(cur++)
3411                 if c is '-'
3412                         tok_state = tok_state_script_data_escaped_dash_dash
3413                         return new_character_token '-'
3414                 # Anything else
3415                 tok_state = tok_state_script_data
3416                 cur -= 1 # Reconsume
3417                 return
3418
3419         # 8.2.4.22 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-state
3420         tok_state_script_data_escaped = ->
3421                 c = txt.charAt(cur++)
3422                 if c is '-'
3423                         tok_state = tok_state_script_data_escaped_dash
3424                         return new_character_token '-'
3425                 if c is '<'
3426                         tok_state = tok_state_script_data_escaped_less_than_sign
3427                         return
3428                 if c is "\u0000"
3429                         parse_error()
3430                         return new_character_token "\ufffd"
3431                 if c is '' # EOF
3432                         tok_state = tok_state_data
3433                         parse_error()
3434                         cur -= 1 # Reconsume
3435                         return
3436                 # Anything else
3437                 return new_character_token c
3438
3439         # 8.2.4.23 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-dash-state
3440         tok_state_script_data_escaped_dash = ->
3441                 c = txt.charAt(cur++)
3442                 if c is '-'
3443                         tok_state = tok_state_script_data_escaped_dash_dash
3444                         return new_character_token '-'
3445                 if c is '<'
3446                         tok_state = tok_state_script_data_escaped_less_than_sign
3447                         return
3448                 if c is "\u0000"
3449                         parse_error()
3450                         tok_state = tok_state_script_data_escaped
3451                         return new_character_token "\ufffd"
3452                 if c is '' # EOF
3453                         tok_state = tok_state_data
3454                         parse_error()
3455                         cur -= 1 # Reconsume
3456                         return
3457                 # Anything else
3458                 tok_state = tok_state_script_data_escaped
3459                 return new_character_token c
3460
3461         # 8.2.4.24 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-dash-dash-state
3462         tok_state_script_data_escaped_dash_dash = ->
3463                 c = txt.charAt(cur++)
3464                 if c is '-'
3465                         return new_character_token '-'
3466                 if c is '<'
3467                         tok_state = tok_state_script_data_escaped_less_than_sign
3468                         return
3469                 if c is '>'
3470                         tok_state = tok_state_script_data
3471                         return new_character_token '>'
3472                 if c is "\u0000"
3473                         parse_error()
3474                         tok_state = tok_state_script_data_escaped
3475                         return new_character_token "\ufffd"
3476                 if c is '' # EOF
3477                         parse_error()
3478                         tok_state = tok_state_data
3479                         cur -= 1 # Reconsume
3480                         return
3481                 # Anything else
3482                 tok_state = tok_state_script_data_escaped
3483                 return new_character_token c
3484
3485         # 8.2.4.25 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-less-than-sign-state
3486         tok_state_script_data_escaped_less_than_sign = ->
3487                 c = txt.charAt(cur++)
3488                 if c is '/'
3489                         temporary_buffer = ''
3490                         tok_state = tok_state_script_data_escaped_end_tag_open
3491                         return
3492                 if is_uc_alpha(c)
3493                         temporary_buffer = c.toLowerCase() # yes, really
3494                         tok_state = tok_state_script_data_double_escape_start
3495                         return new_character_token "<#{c}" # fixfull split
3496                 if is_lc_alpha(c)
3497                         temporary_buffer = c
3498                         tok_state = tok_state_script_data_double_escape_start
3499                         return new_character_token "<#{c}" # fixfull split
3500                 # Anything else
3501                 tok_state = tok_state_script_data_escaped
3502                 cur -= 1 # Reconsume
3503                 return new_character_token '<'
3504
3505         # 8.2.4.26 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-end-tag-open-state
3506         tok_state_script_data_escaped_end_tag_open = ->
3507                 c = txt.charAt(cur++)
3508                 if is_uc_alpha(c)
3509                         tok_cur_tag = new_end_tag c.toLowerCase()
3510                         temporary_buffer += c
3511                         tok_state = tok_state_script_data_escaped_end_tag_name
3512                         return
3513                 if is_lc_alpha(c)
3514                         tok_cur_tag = new_end_tag c
3515                         temporary_buffer += c
3516                         tok_state = tok_state_script_data_escaped_end_tag_name
3517                         return
3518                 # Anything else
3519                 tok_state = tok_state_script_data_escaped
3520                 cur -= 1 # Reconsume
3521                 return new_character_token '</' # fixfull split
3522
3523         # 8.2.4.27 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-end-tag-name-state
3524         tok_state_script_data_escaped_end_tag_name = ->
3525                 c = txt.charAt(cur++)
3526                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
3527                         if is_appropriate_end_tag tok_cur_tag
3528                                 tok_state = tok_state_before_attribute_name
3529                                 return
3530                         # fall through
3531                 if c is '/'
3532                         if is_appropriate_end_tag tok_cur_tag
3533                                 tok_state = tok_state_self_closing_start_tag
3534                                 return
3535                         # fall through
3536                 if c is '>'
3537                         if is_appropriate_end_tag tok_cur_tag
3538                                 tok_state = tok_state_data
3539                                 return tok_cur_tag
3540                         # fall through
3541                 if is_uc_alpha(c)
3542                         tok_cur_tag.name += c.toLowerCase()
3543                         temporary_buffer += c.toLowerCase()
3544                         return
3545                 if is_lc_alpha(c)
3546                         tok_cur_tag.name += c
3547                         temporary_buffer += c.toLowerCase()
3548                         return
3549                 # Anything else
3550                 tok_state = tok_state_script_data_escaped
3551                 cur -= 1 # Reconsume
3552                 return new_character_token "</#{temporary_buffer}" # fixfull split
3553
3554         # 8.2.4.28 http://www.w3.org/TR/html5/syntax.html#script-data-double-escape-start-state
3555         tok_state_script_data_double_escape_start = ->
3556                 c = txt.charAt(cur++)
3557                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' ' or c is '/' or c is '>'
3558                         if temporary_buffer is 'script'
3559                                 tok_state = tok_state_script_data_double_escaped
3560                         else
3561                                 tok_state = tok_state_script_data_escaped
3562                         return new_character_token c
3563                 if is_uc_alpha(c)
3564                         temporary_buffer += c.toLowerCase() # yes, really lowercase
3565                         return new_character_token c
3566                 if is_lc_alpha(c)
3567                         temporary_buffer += c
3568                         return new_character_token c
3569                 # Anything else
3570                 tok_state = tok_state_script_data_escaped
3571                 cur -= 1 # Reconsume
3572                 return
3573
3574         # 8.2.4.29 http://www.w3.org/TR/html5/syntax.html#script-data-double-escaped-state
3575         tok_state_script_data_double_escaped = ->
3576                 c = txt.charAt(cur++)
3577                 if c is '-'
3578                         tok_state = tok_state_script_data_double_escaped_dash
3579                         return new_character_token '-'
3580                 if c is '<'
3581                         tok_state = tok_state_script_data_double_escaped_less_than_sign
3582                         return new_character_token '<'
3583                 if c is "\u0000"
3584                         parse_error()
3585                         return new_character_token "\ufffd"
3586                 if c is '' # EOF
3587                         parse_error()
3588                         tok_state = tok_state_data
3589                         cur -= 1 # Reconsume
3590                         return
3591                 # Anything else
3592                 return new_character_token c
3593
3594         # 8.2.4.30 http://www.w3.org/TR/html5/syntax.html#script-data-double-escaped-dash-state
3595         tok_state_script_data_double_escaped_dash = ->
3596                 c = txt.charAt(cur++)
3597                 if c is '-'
3598                         tok_state = tok_state_script_data_double_escaped_dash_dash
3599                         return new_character_token '-'
3600                 if c is '<'
3601                         tok_state = tok_state_script_data_double_escaped_less_than_sign
3602                         return new_character_token '<'
3603                 if c is "\u0000"
3604                         parse_error()
3605                         tok_state = tok_state_script_data_double_escaped
3606                         return new_character_token "\ufffd"
3607                 if c is '' # EOF
3608                         parse_error()
3609                         tok_state = tok_state_data
3610                         cur -= 1 # Reconsume
3611                         return
3612                 # Anything else
3613                 tok_state = tok_state_script_data_double_escaped
3614                 return new_character_token c
3615
3616         # 8.2.4.31 http://www.w3.org/TR/html5/syntax.html#script-data-double-escaped-dash-dash-state
3617         tok_state_script_data_double_escaped_dash_dash = ->
3618                 c = txt.charAt(cur++)
3619                 if c is '-'
3620                         return new_character_token '-'
3621                 if c is '<'
3622                         tok_state = tok_state_script_data_double_escaped_less_than_sign
3623                         return new_character_token '<'
3624                 if c is '>'
3625                         tok_state = tok_state_script_data
3626                         return new_character_token '>'
3627                 if c is "\u0000"
3628                         parse_error()
3629                         tok_state = tok_state_script_data_double_escaped
3630                         return new_character_token "\ufffd"
3631                 if c is '' # EOF
3632                         parse_error()
3633                         tok_state = tok_state_data
3634                         cur -= 1 # Reconsume
3635                         return
3636                 # Anything else
3637                 tok_state = tok_state_script_data_double_escaped
3638                 return new_character_token c
3639
3640         # 8.2.4.32 http://www.w3.org/TR/html5/syntax.html#script-data-double-escaped-less-than-sign-state
3641         tok_state_script_data_double_escaped_less_than_sign = ->
3642                 c = txt.charAt(cur++)
3643                 if c is '/'
3644                         temporary_buffer = ''
3645                         tok_state = tok_state_script_data_double_escape_end
3646                         return new_character_token '/'
3647                 # Anything else
3648                 tok_state = tok_state_script_data_double_escaped
3649                 cur -= 1 # Reconsume
3650                 return
3651
3652         # 8.2.4.33 http://www.w3.org/TR/html5/syntax.html#script-data-double-escape-end-state
3653         tok_state_script_data_double_escape_end = ->
3654                 c = txt.charAt(cur++)
3655                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' ' or c is '/' or c is '>'
3656                         if temporary_buffer is 'script'
3657                                 tok_state = tok_state_script_data_escaped
3658                         else
3659                                 tok_state = tok_state_script_data_double_escaped
3660                         return new_character_token c
3661                 if is_uc_alpha(c)
3662                         temporary_buffer += c.toLowerCase() # yes, really lowercase
3663                         return new_character_token c
3664                 if is_lc_alpha(c)
3665                         temporary_buffer += c
3666                         return new_character_token c
3667                 # Anything else
3668                 tok_state = tok_state_script_data_double_escaped
3669                 cur -= 1 # Reconsume
3670                 return
3671
3672         # 8.2.4.34 http://www.w3.org/TR/html5/syntax.html#before-attribute-name-state
3673         tok_state_before_attribute_name = ->
3674                 attr_name = null
3675                 switch c = txt.charAt(cur++)
3676                         when "\t", "\n", "\u000c", ' '
3677                                 return null
3678                         when '/'
3679                                 tok_state = tok_state_self_closing_start_tag
3680                                 return null
3681                         when '>'
3682                                 tok_state = tok_state_data
3683                                 tmp = tok_cur_tag
3684                                 tok_cur_tag = null
3685                                 return tmp
3686                         when "\u0000"
3687                                 parse_error()
3688                                 attr_name = "\ufffd"
3689                         when '"', "'", '<', '='
3690                                 parse_error()
3691                                 attr_name = c
3692                         when '' # EOF
3693                                 parse_error()
3694                                 tok_state = tok_state_data
3695                         else
3696                                 if is_uc_alpha(c)
3697                                         attr_name = c.toLowerCase()
3698                                 else
3699                                         attr_name = c
3700                 if attr_name?
3701                         tok_cur_tag.attrs_a.unshift [attr_name, '']
3702                         tok_state = tok_state_attribute_name
3703                 return null
3704
3705         # 8.2.4.35 http://www.w3.org/TR/html5/syntax.html#attribute-name-state
3706         tok_state_attribute_name = ->
3707                 switch c = txt.charAt(cur++)
3708                         when "\t", "\n", "\u000c", ' '
3709                                 tok_state = tok_state_after_attribute_name
3710                         when '/'
3711                                 tok_state = tok_state_self_closing_start_tag
3712                         when '='
3713                                 tok_state = tok_state_before_attribute_value
3714                         when '>'
3715                                 tok_state = tok_state_data
3716                                 tmp = tok_cur_tag
3717                                 tok_cur_tag = null
3718                                 return tmp
3719                         when "\u0000"
3720                                 parse_error()
3721                                 tok_cur_tag.attrs_a[0][0] += "\ufffd"
3722                         when '"', "'", '<'
3723                                 parse_error()
3724                                 tok_cur_tag.attrs_a[0][0] += c
3725                         when '' # EOF
3726                                 parse_error()
3727                                 tok_state = tok_state_data
3728                         else
3729                                 if is_uc_alpha(c)
3730                                         tok_cur_tag.attrs_a[0][0] += c.toLowerCase()
3731                                 else
3732                                         tok_cur_tag.attrs_a[0][0] += c
3733                 return null
3734
3735         # 8.2.4.36 http://www.w3.org/TR/html5/syntax.html#after-attribute-name-state
3736         tok_state_after_attribute_name = ->
3737                 c = txt.charAt(cur++)
3738                 if c is "\t" or c is "\n" or c is "\u000c" or c is ' '
3739                         return
3740                 if c is '/'
3741                         tok_state = tok_state_self_closing_start_tag
3742                         return
3743                 if c is '='
3744                         tok_state = tok_state_before_attribute_value
3745                         return
3746                 if c is '>'
3747                         tok_state = tok_state_data
3748                         return tok_cur_tag
3749                 if is_uc_alpha(c)
3750                         tok_cur_tag.attrs_a.unshift [c.toLowerCase(), '']
3751                         tok_state = tok_state_attribute_name
3752                         return
3753                 if c is "\u0000"
3754                         parse_error()
3755                         tok_cur_tag.attrs_a.unshift ["\ufffd", '']
3756                         tok_state = tok_state_attribute_name
3757                         return
3758                 if c is '' # EOF
3759                         parse_error()
3760                         tok_state = tok_state_data
3761                         cur -= 1 # reconsume
3762                         return
3763                 if c is '"' or c is "'" or c is '<'
3764                         parse_error()
3765                         # fall through to Anything else
3766                 # Anything else
3767                 tok_cur_tag.attrs_a.unshift [c, '']
3768                 tok_state = tok_state_attribute_name
3769                 return
3770
3771         # 8.2.4.37 http://www.w3.org/TR/html5/syntax.html#before-attribute-value-state
3772         tok_state_before_attribute_value = ->
3773                 switch c = txt.charAt(cur++)
3774                         when "\t", "\n", "\u000c", ' '
3775                                 return null
3776                         when '"'
3777                                 tok_state = tok_state_attribute_value_double_quoted
3778                         when '&'
3779                                 tok_state = tok_state_attribute_value_unquoted
3780                                 cur -= 1
3781                         when "'"
3782                                 tok_state = tok_state_attribute_value_single_quoted
3783                         when "\u0000"
3784                                 # Parse error
3785                                 tok_cur_tag.attrs_a[0][1] += "\ufffd"
3786                                 tok_state = tok_state_attribute_value_unquoted
3787                         when '>'
3788                                 # Parse error
3789                                 tok_state = tok_state_data
3790                                 tmp = tok_cur_tag
3791                                 tok_cur_tag = null
3792                                 return tmp
3793                         when '' # EOF
3794                                 parse_error()
3795                                 tok_state = tok_state_data
3796                         else
3797                                 tok_cur_tag.attrs_a[0][1] += c
3798                                 tok_state = tok_state_attribute_value_unquoted
3799                 return null
3800
3801         # 8.2.4.38 http://www.w3.org/TR/html5/syntax.html#attribute-value-(double-quoted)-state
3802         tok_state_attribute_value_double_quoted = ->
3803                 switch c = txt.charAt(cur++)
3804                         when '"'
3805                                 tok_state = tok_state_after_attribute_value_quoted
3806                         when '&'
3807                                 tok_cur_tag.attrs_a[0][1] += parse_character_reference '"', true
3808                         when "\u0000"
3809                                 # Parse error
3810                                 tok_cur_tag.attrs_a[0][1] += "\ufffd"
3811                         when '' # EOF
3812                                 parse_error()
3813                                 tok_state = tok_state_data
3814                         else
3815                                 tok_cur_tag.attrs_a[0][1] += c
3816                 return null
3817
3818         # 8.2.4.39 http://www.w3.org/TR/html5/syntax.html#attribute-value-(single-quoted)-state
3819         tok_state_attribute_value_single_quoted = ->
3820                 switch c = txt.charAt(cur++)
3821                         when "'"
3822                                 tok_state = tok_state_after_attribute_value_quoted
3823                         when '&'
3824                                 tok_cur_tag.attrs_a[0][1] += parse_character_reference "'", true
3825                         when "\u0000"
3826                                 # Parse error
3827                                 tok_cur_tag.attrs_a[0][1] += "\ufffd"
3828                         when '' # EOF
3829                                 parse_error()
3830                                 tok_state = tok_state_data
3831                         else
3832                                 tok_cur_tag.attrs_a[0][1] += c
3833                 return null
3834
3835         # 8.2.4.40 http://www.w3.org/TR/html5/syntax.html#attribute-value-(unquoted)-state
3836         tok_state_attribute_value_unquoted = ->
3837                 switch c = txt.charAt(cur++)
3838                         when "\t", "\n", "\u000c", ' '
3839                                 tok_state = tok_state_before_attribute_name
3840                         when '&'
3841                                 tok_cur_tag.attrs_a[0][1] += parse_character_reference '>', true
3842                         when '>'
3843                                 tok_state = tok_state_data
3844                                 tmp = tok_cur_tag
3845                                 tok_cur_tag = null
3846                                 return tmp
3847                         when "\u0000"
3848                                 tok_cur_tag.attrs_a[0][1] += "\ufffd"
3849                         when '' # EOF
3850                                 parse_error()
3851                                 tok_state = tok_state_data
3852                         else
3853                                 # Parse Error if ', <, = or ` (backtick)
3854                                 tok_cur_tag.attrs_a[0][1] += c
3855                 return null
3856
3857         # 8.2.4.42 http://www.w3.org/TR/html5/syntax.html#after-attribute-value-(quoted)-state
3858         tok_state_after_attribute_value_quoted = ->
3859                 switch c = txt.charAt(cur++)
3860                         when "\t", "\n", "\u000c", ' '
3861                                 tok_state = tok_state_before_attribute_name
3862                         when '/'
3863                                 tok_state = tok_state_self_closing_start_tag
3864                         when '>'
3865                                 tok_state = tok_state_data
3866                                 tmp = tok_cur_tag
3867                                 tok_cur_tag = null
3868                                 return tmp
3869                         when '' # EOF
3870                                 parse_error()
3871                                 tok_state = tok_state_data
3872                         else
3873                                 # Parse Error
3874                                 tok_state = tok_state_before_attribute_name
3875                                 cur -= 1 # we didn't handle that char
3876                 return null
3877
3878         # 8.2.4.43 http://www.w3.org/TR/html5/syntax.html#self-closing-start-tag-state
3879         tok_state_self_closing_start_tag = ->
3880                 c = txt.charAt(cur++)
3881                 if c is '>'
3882                         tok_cur_tag.flag 'self-closing', true
3883                         tok_state = tok_state_data
3884                         return tok_cur_tag
3885                 if c is ''
3886                         parse_error()
3887                         tok_state = tok_state_data
3888                         cur -= 1 # Reconsume
3889                         return
3890                 # Anything else
3891                 parse_error()
3892                 tok_state = tok_state_before_attribute_name
3893                 cur -= 1 # Reconsume
3894                 return
3895
3896         # 8.2.4.44 http://www.w3.org/TR/html5/syntax.html#bogus-comment-state
3897         # WARNING: put a comment token in tok_cur_tag before setting this state
3898         tok_state_bogus_comment = ->
3899                 next_gt = txt.indexOf '>', cur
3900                 if next_gt is -1
3901                         val = txt.substr cur
3902                         cur = txt.length
3903                 else
3904                         val = txt.substr cur, (next_gt - cur)
3905                         cur = next_gt + 1
3906                 val = val.replace(new RegExp("\u0000", 'g'), "\ufffd")
3907                 tok_cur_tag.text += val
3908                 tok_state = tok_state_data
3909                 return tok_cur_tag
3910
3911         # 8.2.4.45 http://www.w3.org/TR/html5/syntax.html#markup-declaration-open-state
3912         tok_state_markup_declaration_open = ->
3913                 if txt.substr(cur, 2) is '--'
3914                         cur += 2
3915                         tok_cur_tag = new_comment_token ''
3916                         tok_state = tok_state_comment_start
3917                         return
3918                 if txt.substr(cur, 7).toLowerCase() is 'doctype'
3919                         cur += 7
3920                         tok_state = tok_state_doctype
3921                         return
3922                 acn = adjusted_current_node()
3923                 if acn and acn.namespace isnt NS_HTML and txt.substr(cur, 7) is '[CDATA['
3924                         cur += 7
3925                         tok_state = tok_state_cdata_section
3926                         return
3927                 # Otherwise
3928                 parse_error()
3929                 tok_cur_tag = new_comment_token ''
3930                 tok_state = tok_state_bogus_comment
3931                 return
3932
3933         # 8.2.4.46 http://www.w3.org/TR/html5/syntax.html#comment-start-state
3934         tok_state_comment_start = ->
3935                 switch c = txt.charAt(cur++)
3936                         when '-'
3937                                 tok_state = tok_state_comment_start_dash
3938                         when "\u0000"
3939                                 parse_error()
3940                                 tok_state = tok_state_comment
3941                                 return new_character_token "\ufffd"
3942                         when '>'
3943                                 parse_error()
3944                                 tok_state = tok_state_data
3945                                 return tok_cur_tag
3946                         when '' # EOF
3947                                 parse_error()
3948                                 tok_state = tok_state_data
3949                                 cur -= 1 # Reconsume
3950                                 return tok_cur_tag
3951                         else
3952                                 tok_cur_tag.text += c
3953                                 tok_state = tok_state_comment
3954                 return null
3955
3956         # 8.2.4.47 http://www.w3.org/TR/html5/syntax.html#comment-start-dash-state
3957         tok_state_comment_start_dash = ->
3958                 switch c = txt.charAt(cur++)
3959                         when '-'
3960                                 tok_state = tok_state_comment_end
3961                         when "\u0000"
3962                                 parse_error()
3963                                 tok_cur_tag.text += "-\ufffd"
3964                                 tok_state = tok_state_comment
3965                         when '>'
3966                                 parse_error()
3967                                 tok_state = tok_state_data
3968                                 return tok_cur_tag
3969                         when '' # EOF
3970                                 parse_error()
3971                                 tok_state = tok_state_data
3972                                 cur -= 1 # Reconsume
3973                                 return tok_cur_tag
3974                         else
3975                                 tok_cur_tag.text += "-#{c}"
3976                                 tok_state = tok_state_comment
3977                 return null
3978
3979         # 8.2.4.48 http://www.w3.org/TR/html5/syntax.html#comment-state
3980         tok_state_comment = ->
3981                 switch c = txt.charAt(cur++)
3982                         when '-'
3983                                 tok_state = tok_state_comment_end_dash
3984                         when "\u0000"
3985                                 parse_error()
3986                                 tok_cur_tag.text += "\ufffd"
3987                         when '' # EOF
3988                                 parse_error()
3989                                 tok_state = tok_state_data
3990                                 cur -= 1 # Reconsume
3991                                 return tok_cur_tag
3992                         else
3993                                 tok_cur_tag.text += c
3994                 return null
3995
3996         # 8.2.4.49 http://www.w3.org/TR/html5/syntax.html#comment-end-dash-state
3997         tok_state_comment_end_dash = ->
3998                 switch c = txt.charAt(cur++)
3999                         when '-'
4000                                 tok_state = tok_state_comment_end
4001                         when "\u0000"
4002                                 parse_error()
4003                                 tok_cur_tag.text += "-\ufffd"
4004                                 tok_state = tok_state_comment
4005                         when '' # EOF
4006                                 parse_error()
4007                                 tok_state = tok_state_data
4008                                 cur -= 1 # Reconsume
4009                                 return tok_cur_tag
4010                         else
4011                                 tok_cur_tag.text += "-#{c}"
4012                                 tok_state = tok_state_comment
4013                 return null
4014
4015         # 8.2.4.50 http://www.w3.org/TR/html5/syntax.html#comment-end-state
4016         tok_state_comment_end = ->
4017                 switch c = txt.charAt(cur++)
4018                         when '>'
4019                                 tok_state = tok_state_data
4020                                 return tok_cur_tag
4021                         when "\u0000"
4022                                 parse_error()
4023                                 tok_cur_tag.text += "--\ufffd"
4024                                 tok_state = tok_state_comment
4025                         when '!'
4026                                 parse_error()
4027                                 tok_state = tok_state_comment_end_bang
4028                         when '-'
4029                                 parse_error()
4030                                 tok_cur_tag.text += '-'
4031                         when '' # EOF
4032                                 parse_error()
4033                                 tok_state = tok_state_data
4034                                 cur -= 1 # Reconsume
4035                                 return tok_cur_tag
4036                         else
4037                                 parse_error()
4038                                 tok_cur_tag.text += "--#{c}"
4039                                 tok_state = tok_state_comment
4040                 return null
4041
4042         # 8.2.4.51 http://www.w3.org/TR/html5/syntax.html#comment-end-bang-state
4043         tok_state_comment_end_bang = ->
4044                 switch c = txt.charAt(cur++)
4045                         when '-'
4046                                 tok_cur_tag.text += "--!#{c}"
4047                                 tok_state = tok_state_comment_end_dash
4048                         when '>'
4049                                 tok_state = tok_state_data
4050                                 return tok_cur_tag
4051                         when "\u0000"
4052                                 parse_error()
4053                                 tok_cur_tag.text += "--!\ufffd"
4054                                 tok_state = tok_state_comment
4055                         when '' # EOF
4056                                 parse_error()
4057                                 tok_state = tok_state_data
4058                                 cur -= 1 # Reconsume
4059                                 return tok_cur_tag
4060                         else
4061                                 tok_cur_tag.text += "--!#{c}"
4062                                 tok_state = tok_state_comment
4063                 return null
4064
4065         # 8.2.4.52 http://www.w3.org/TR/html5/syntax.html#doctype-state
4066         tok_state_doctype = ->
4067                 switch c = txt.charAt(cur++)
4068                         when "\t", "\u000a", "\u000c", ' '
4069                                 tok_state = tok_state_before_doctype_name
4070                         when '' # EOF
4071                                 parse_error()
4072                                 tok_state = tok_state_data
4073                                 el = new_doctype_token ''
4074                                 el.flag 'force-quirks', true
4075                                 cur -= 1 # Reconsume
4076                                 return el
4077                         else
4078                                 parse_error()
4079                                 tok_state = tok_state_before_doctype_name
4080                                 cur -= 1 # Reconsume
4081                 return null
4082
4083         # 8.2.4.52 http://www.w3.org/TR/html5/syntax.html#doctype-state
4084         tok_state_before_doctype_name = ->
4085                 c = txt.charAt(cur++)
4086                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4087                         return
4088                 if is_uc_alpha(c)
4089                         tok_cur_tag = new_doctype_token c.toLowerCase()
4090                         tok_state = tok_state_doctype_name
4091                         return
4092                 if c is "\u0000"
4093                         parse_error()
4094                         tok_cur_tag = new_doctype_token "\ufffd"
4095                         tok_state = tok_state_doctype_name
4096                         return
4097                 if c is '>'
4098                         parse_error()
4099                         el = new_doctype_token ''
4100                         el.flag 'force-quirks', true
4101                         tok_state = tok_state_data
4102                         return el
4103                 if c is '' # EOF
4104                         parse_error()
4105                         tok_state = tok_state_data
4106                         el = new_doctype_token ''
4107                         el.flag 'force-quirks', true
4108                         cur -= 1 # Reconsume
4109                         return el
4110                 # Anything else
4111                 tok_cur_tag = new_doctype_token c
4112                 tok_state = tok_state_doctype_name
4113                 return null
4114
4115         # 8.2.4.54 http://www.w3.org/TR/html5/syntax.html#doctype-name-state
4116         tok_state_doctype_name = ->
4117                 c = txt.charAt(cur++)
4118                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4119                         tok_state = tok_state_after_doctype_name
4120                         return
4121                 if c is '>'
4122                         tok_state = tok_state_data
4123                         return tok_cur_tag
4124                 if is_uc_alpha(c)
4125                         tok_cur_tag.name += c.toLowerCase()
4126                         return
4127                 if c is "\u0000"
4128                         parse_error()
4129                         tok_cur_tag.name += "\ufffd"
4130                         return
4131                 if c is '' # EOF
4132                         parse_error()
4133                         tok_state = tok_state_data
4134                         tok_cur_tag.flag 'force-quirks', true
4135                         cur -= 1 # Reconsume
4136                         return tok_cur_tag
4137                 # Anything else
4138                 tok_cur_tag.name += c
4139                 return null
4140
4141         # 8.2.4.55 http://www.w3.org/TR/html5/syntax.html#after-doctype-name-state
4142         tok_state_after_doctype_name = ->
4143                 c = txt.charAt(cur++)
4144                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4145                         return
4146                 if c is '>'
4147                         tok_state = tok_state_data
4148                         return tok_cur_tag
4149                 if c is '' # EOF
4150                         parse_error()
4151                         tok_state = tok_state_data
4152                         tok_cur_tag.flag 'force-quirks', true
4153                         cur -= 1 # Reconsume
4154                         return tok_cur_tag
4155                 # Anything else
4156                 if txt.substr(cur - 1, 6).toLowerCase() is 'public'
4157                         cur += 5
4158                         tok_state = tok_state_after_doctype_public_keyword
4159                         return
4160                 if txt.substr(cur - 1, 6).toLowerCase() is 'system'
4161                         cur += 5
4162                         tok_state = tok_state_after_doctype_system_keyword
4163                         return
4164                 parse_error()
4165                 tok_cur_tag.flag 'force-quirks', true
4166                 tok_state = tok_state_bogus_doctype
4167                 return null
4168
4169         # 8.2.4.56 http://www.w3.org/TR/html5/syntax.html#after-doctype-public-keyword-state
4170         tok_state_after_doctype_public_keyword = ->
4171                 c = txt.charAt(cur++)
4172                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4173                         tok_state = tok_state_before_doctype_public_identifier
4174                         return
4175                 if c is '"'
4176                         parse_error()
4177                         tok_cur_tag.public_identifier = ''
4178                         tok_state = tok_state_doctype_public_identifier_double_quoted
4179                         return
4180                 if c is "'"
4181                         parse_error()
4182                         tok_cur_tag.public_identifier = ''
4183                         tok_state = tok_state_doctype_public_identifier_single_quoted
4184                         return
4185                 if c is '>'
4186                         parse_error()
4187                         tok_cur_tag.flag 'force-quirks', true
4188                         tok_state = tok_state_data
4189                         return tok_cur_tag
4190                 if c is '' # EOF
4191                         parse_error()
4192                         tok_state = tok_state_data
4193                         tok_cur_tag.flag 'force-quirks', true
4194                         cur -= 1 # Reconsume
4195                         return tok_cur_tag
4196                 # Anything else
4197                 parse_error()
4198                 tok_cur_tag.flag 'force-quirks', true
4199                 tok_state = tok_state_bogus_doctype
4200                 return null
4201
4202         # 8.2.4.57 http://www.w3.org/TR/html5/syntax.html#before-doctype-public-identifier-state
4203         tok_state_before_doctype_public_identifier = ->
4204                 c = txt.charAt(cur++)
4205                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4206                         return
4207                 if c is '"'
4208                         parse_error()
4209                         tok_cur_tag.public_identifier = ''
4210                         tok_state = tok_state_doctype_public_identifier_double_quoted
4211                         return
4212                 if c is "'"
4213                         parse_error()
4214                         tok_cur_tag.public_identifier = ''
4215                         tok_state = tok_state_doctype_public_identifier_single_quoted
4216                         return
4217                 if c is '>'
4218                         parse_error()
4219                         tok_cur_tag.flag 'force-quirks', true
4220                         tok_state = tok_state_data
4221                         return tok_cur_tag
4222                 if c is '' # EOF
4223                         parse_error()
4224                         tok_state = tok_state_data
4225                         tok_cur_tag.flag 'force-quirks', true
4226                         cur -= 1 # Reconsume
4227                         return tok_cur_tag
4228                 # Anything else
4229                 parse_error()
4230                 tok_cur_tag.flag 'force-quirks', true
4231                 tok_state = tok_state_bogus_doctype
4232                 return null
4233
4234
4235         # 8.2.4.58 http://www.w3.org/TR/html5/syntax.html#doctype-public-identifier-(double-quoted)-state
4236         tok_state_doctype_public_identifier_double_quoted = ->
4237                 c = txt.charAt(cur++)
4238                 if c is '"'
4239                         tok_state = tok_state_after_doctype_public_identifier
4240                         return
4241                 if c is "\u0000"
4242                         parse_error()
4243                         tok_cur_tag.public_identifier += "\ufffd"
4244                         return
4245                 if c is '>'
4246                         parse_error()
4247                         tok_cur_tag.flag 'force-quirks', true
4248                         tok_state = tok_state_data
4249                         return tok_cur_tag
4250                 if c is '' # EOF
4251                         parse_error()
4252                         tok_state = tok_state_data
4253                         tok_cur_tag.flag 'force-quirks', true
4254                         cur -= 1 # Reconsume
4255                         return tok_cur_tag
4256                 # Anything else
4257                 tok_cur_tag.public_identifier += c
4258                 return null
4259
4260         # 8.2.4.59 http://www.w3.org/TR/html5/syntax.html#doctype-public-identifier-(single-quoted)-state
4261         tok_state_doctype_public_identifier_single_quoted = ->
4262                 c = txt.charAt(cur++)
4263                 if c is "'"
4264                         tok_state = tok_state_after_doctype_public_identifier
4265                         return
4266                 if c is "\u0000"
4267                         parse_error()
4268                         tok_cur_tag.public_identifier += "\ufffd"
4269                         return
4270                 if c is '>'
4271                         parse_error()
4272                         tok_cur_tag.flag 'force-quirks', true
4273                         tok_state = tok_state_data
4274                         return tok_cur_tag
4275                 if c is '' # EOF
4276                         parse_error()
4277                         tok_state = tok_state_data
4278                         tok_cur_tag.flag 'force-quirks', true
4279                         cur -= 1 # Reconsume
4280                         return tok_cur_tag
4281                 # Anything else
4282                 tok_cur_tag.public_identifier += c
4283                 return null
4284
4285         # 8.2.4.60 http://www.w3.org/TR/html5/syntax.html#after-doctype-public-identifier-state
4286         tok_state_after_doctype_public_identifier = ->
4287                 c = txt.charAt(cur++)
4288                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4289                         tok_state = tok_state_between_doctype_public_and_system_identifiers
4290                         return
4291                 if c is '>'
4292                         tok_state = tok_state_data
4293                         return tok_cur_tag
4294                 if c is '"'
4295                         parse_error()
4296                         tok_cur_tag.system_identifier = ''
4297                         tok_state = tok_state_doctype_system_identifier_double_quoted
4298                         return
4299                 if c is "'"
4300                         parse_error()
4301                         tok_cur_tag.system_identifier = ''
4302                         tok_state = tok_state_doctype_system_identifier_single_quoted
4303                         return
4304                 if c is '' # EOF
4305                         parse_error()
4306                         tok_state = tok_state_data
4307                         tok_cur_tag.flag 'force-quirks', true
4308                         cur -= 1 # Reconsume
4309                         return tok_cur_tag
4310                 # Anything else
4311                 parse_error()
4312                 tok_cur_tag.flag 'force-quirks', true
4313                 tok_state = tok_state_bogus_doctype
4314                 return null
4315
4316         # 8.2.4.61 http://www.w3.org/TR/html5/syntax.html#between-doctype-public-and-system-identifiers-state
4317         tok_state_between_doctype_public_and_system_identifiers = ->
4318                 c = txt.charAt(cur++)
4319                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4320                         return
4321                 if c is '>'
4322                         tok_state = tok_state_data
4323                         return tok_cur_tag
4324                 if c is '"'
4325                         parse_error()
4326                         tok_cur_tag.system_identifier = ''
4327                         tok_state = tok_state_doctype_system_identifier_double_quoted
4328                         return
4329                 if c is "'"
4330                         parse_error()
4331                         tok_cur_tag.system_identifier = ''
4332                         tok_state = tok_state_doctype_system_identifier_single_quoted
4333                         return
4334                 if c is '' # EOF
4335                         parse_error()
4336                         tok_state = tok_state_data
4337                         tok_cur_tag.flag 'force-quirks', true
4338                         cur -= 1 # Reconsume
4339                         return tok_cur_tag
4340                 # Anything else
4341                 parse_error()
4342                 tok_cur_tag.flag 'force-quirks', true
4343                 tok_state = tok_state_bogus_doctype
4344                 return null
4345
4346         # 8.2.4.62 http://www.w3.org/TR/html5/syntax.html#after-doctype-system-keyword-state
4347         tok_state_after_doctype_system_keyword = ->
4348                 c = txt.charAt(cur++)
4349                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4350                         tok_state = tok_state_before_doctype_system_identifier
4351                         return
4352                 if c is '"'
4353                         parse_error()
4354                         tok_cur_tag.system_identifier = ''
4355                         tok_state = tok_state_doctype_system_identifier_double_quoted
4356                         return
4357                 if c is "'"
4358                         parse_error()
4359                         tok_cur_tag.system_identifier = ''
4360                         tok_state = tok_state_doctype_system_identifier_single_quoted
4361                         return
4362                 if c is '>'
4363                         parse_error()
4364                         tok_cur_tag.flag 'force-quirks', true
4365                         tok_state = tok_state_data
4366                         return tok_cur_tag
4367                 if c is '' # EOF
4368                         parse_error()
4369                         tok_state = tok_state_data
4370                         tok_cur_tag.flag 'force-quirks', true
4371                         cur -= 1 # Reconsume
4372                         return tok_cur_tag
4373                 # Anything else
4374                 parse_error()
4375                 tok_cur_tag.flag 'force-quirks', true
4376                 tok_state = tok_state_bogus_doctype
4377                 return null
4378
4379         # 8.2.4.63 http://www.w3.org/TR/html5/syntax.html#before-doctype-system-identifier-state
4380         tok_state_before_doctype_system_identifier = ->
4381                 c = txt.charAt(cur++)
4382                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4383                         return
4384                 if c is '"'
4385                         tok_cur_tag.system_identifier = ''
4386                         tok_state = tok_state_doctype_system_identifier_double_quoted
4387                         return
4388                 if c is "'"
4389                         tok_cur_tag.system_identifier = ''
4390                         tok_state = tok_state_doctype_system_identifier_single_quoted
4391                         return
4392                 if c is '>'
4393                         parse_error()
4394                         tok_cur_tag.flag 'force-quirks', true
4395                         tok_state = tok_state_data
4396                         return tok_cur_tag
4397                 if c is '' # EOF
4398                         parse_error()
4399                         tok_state = tok_state_data
4400                         tok_cur_tag.flag 'force-quirks', true
4401                         cur -= 1 # Reconsume
4402                         return tok_cur_tag
4403                 # Anything else
4404                 parse_error()
4405                 tok_cur_tag.flag 'force-quirks', true
4406                 tok_state = tok_state_bogus_doctype
4407                 return null
4408
4409         # 8.2.4.64 http://www.w3.org/TR/html5/syntax.html#doctype-system-identifier-(double-quoted)-state
4410         tok_state_doctype_system_identifier_double_quoted = ->
4411                 c = txt.charAt(cur++)
4412                 if c is '"'
4413                         tok_state = tok_state_after_doctype_system_identifier
4414                         return
4415                 if c is "\u0000"
4416                         parse_error()
4417                         tok_cur_tag.system_identifier += "\ufffd"
4418                         return
4419                 if c is '>'
4420                         parse_error()
4421                         tok_cur_tag.flag 'force-quirks', true
4422                         tok_state = tok_state_data
4423                         return tok_cur_tag
4424                 if c is '' # EOF
4425                         parse_error()
4426                         tok_state = tok_state_data
4427                         tok_cur_tag.flag 'force-quirks', true
4428                         cur -= 1 # Reconsume
4429                         return tok_cur_tag
4430                 # Anything else
4431                 tok_cur_tag.system_identifier += c
4432                 return null
4433
4434         # 8.2.4.65 http://www.w3.org/TR/html5/syntax.html#doctype-system-identifier-(single-quoted)-state
4435         tok_state_doctype_system_identifier_single_quoted = ->
4436                 c = txt.charAt(cur++)
4437                 if c is "'"
4438                         tok_state = tok_state_after_doctype_system_identifier
4439                         return
4440                 if c is "\u0000"
4441                         parse_error()
4442                         tok_cur_tag.system_identifier += "\ufffd"
4443                         return
4444                 if c is '>'
4445                         parse_error()
4446                         tok_cur_tag.flag 'force-quirks', true
4447                         tok_state = tok_state_data
4448                         return tok_cur_tag
4449                 if c is '' # EOF
4450                         parse_error()
4451                         tok_state = tok_state_data
4452                         tok_cur_tag.flag 'force-quirks', true
4453                         cur -= 1 # Reconsume
4454                         return tok_cur_tag
4455                 # Anything else
4456                 tok_cur_tag.system_identifier += c
4457                 return null
4458
4459         # 8.2.4.66 http://www.w3.org/TR/html5/syntax.html#after-doctype-system-identifier-state
4460         tok_state_after_doctype_system_identifier = ->
4461                 c = txt.charAt(cur++)
4462                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4463                         return
4464                 if c is '>'
4465                         tok_state = tok_state_data
4466                         return tok_cur_tag
4467                 if c is '' # EOF
4468                         parse_error()
4469                         tok_state = tok_state_data
4470                         tok_cur_tag.flag 'force-quirks', true
4471                         cur -= 1 # Reconsume
4472                         return tok_cur_tag
4473                 # Anything else
4474                 parse_error()
4475                 # do _not_ tok_cur_tag.flag 'force-quirks', true
4476                 tok_state = tok_state_bogus_doctype
4477                 return null
4478
4479         # 8.2.4.67 http://www.w3.org/TR/html5/syntax.html#bogus-doctype-state
4480         tok_state_bogus_doctype = ->
4481                 c = txt.charAt(cur++)
4482                 if c is '>'
4483                         tok_state = tok_state_data
4484                         return tok_cur_tag
4485                 if c is '' # EOF
4486                         tok_state = tok_state_data
4487                         cur -= 1 # Reconsume
4488                         return tok_cur_tag
4489                 # Anything else
4490                 return null
4491
4492         # 8.2.4.68 http://www.w3.org/TR/html5/syntax.html#cdata-section-state
4493         tok_state_cdata_section = ->
4494                 tok_state = tok_state_data
4495                 next_gt = txt.indexOf ']]>', cur
4496                 if next_gt is -1
4497                         val = txt.substr cur
4498                         cur = txt.length
4499                 else
4500                         val = txt.substr cur, (next_gt - cur)
4501                         cur = next_gt + 3
4502                 val = val.replace(new RegExp("\u0000", 'g'), "\ufffd")
4503                 if val.length > 0
4504                         return new_character_token val # fixfull split
4505                 return null
4506
4507         # 8.2.4.69 http://www.w3.org/TR/html5/syntax.html#consume-a-character-reference
4508         # Don't set this as a state, just call it
4509         # returns a string (NOT a text node)
4510         parse_character_reference = (allowed_char = null, in_attr = false) ->
4511                 if cur >= txt.length
4512                         return '&'
4513                 switch c = txt.charAt(cur)
4514                         when "\t", "\n", "\u000c", ' ', '<', '&', '', allowed_char
4515                                 # explicitly not a parse error
4516                                 return '&'
4517                         when ';'
4518                                 # there has to be "one or more" alnums between & and ; to be a parse error
4519                                 return '&'
4520                         when '#'
4521                                 if cur + 1 >= txt.length
4522                                         return '&'
4523                                 if txt.charAt(cur + 1).toLowerCase() is 'x'
4524                                         base = 16
4525                                         charset = hex_chars
4526                                         start = cur + 2
4527                                 else
4528                                         charset = digits
4529                                         start = cur + 1
4530                                         base = 10
4531                                 i = 0
4532                                 while start + i < txt.length and charset.indexOf(txt.charAt(start + i)) > -1
4533                                         i += 1
4534                                 if i is 0
4535                                         return '&'
4536                                 cur = start + i
4537                                 if txt.charAt(start + i) is ';'
4538                                         cur += 1
4539                                 else
4540                                         parse_error()
4541                                 code_point = txt.substr(start, i)
4542                                 while code_point.charAt(0) is '0' and code_point.length > 1
4543                                         code_point = code_point.substr 1
4544                                 code_point = parseInt(code_point, base)
4545                                 if unicode_fixes[code_point]?
4546                                         parse_error()
4547                                         return unicode_fixes[code_point]
4548                                 else
4549                                         if (code_point >= 0xd800 and code_point <= 0xdfff) or code_point > 0x10ffff
4550                                                 parse_error()
4551                                                 return "\ufffd"
4552                                         else
4553                                                 if (code_point >= 0x0001 and code_point <= 0x0008) or (code_point >= 0x000D and code_point <= 0x001F) or (code_point >= 0x007F and code_point <= 0x009F) or (code_point >= 0xFDD0 and code_point <= 0xFDEF) or code_point is 0x000B or code_point is 0xFFFE or code_point is 0xFFFF or code_point is 0x1FFFE or code_point is 0x1FFFF or code_point is 0x2FFFE or code_point is 0x2FFFF or code_point is 0x3FFFE or code_point is 0x3FFFF or code_point is 0x4FFFE or code_point is 0x4FFFF or code_point is 0x5FFFE or code_point is 0x5FFFF or code_point is 0x6FFFE or code_point is 0x6FFFF or code_point is 0x7FFFE or code_point is 0x7FFFF or code_point is 0x8FFFE or code_point is 0x8FFFF or code_point is 0x9FFFE or code_point is 0x9FFFF or code_point is 0xAFFFE or code_point is 0xAFFFF or code_point is 0xBFFFE or code_point is 0xBFFFF or code_point is 0xCFFFE or code_point is 0xCFFFF or code_point is 0xDFFFE or code_point is 0xDFFFF or code_point is 0xEFFFE or code_point is 0xEFFFF or code_point is 0xFFFFE or code_point is 0xFFFFF or code_point is 0x10FFFE or code_point is 0x10FFFF
4554                                                         parse_error()
4555                                                 return from_code_point code_point
4556                                 return
4557                         else
4558                                 for i in [0...31]
4559                                         if alnum.indexOf(txt.charAt(cur + i)) is -1
4560                                                 break
4561                                 if i is 0
4562                                         # exit early, because parse_error() below needs at least one alnum
4563                                         return '&'
4564                                 if txt.charAt(cur + i) is ';'
4565                                         decoded = decode_named_char_ref txt.substr(cur, i)
4566                                         i += 1 # scan past the ';' (after, so we dno't pass it to decode)
4567                                         if decoded?
4568                                                 cur += i
4569                                                 return decoded
4570                                         # else FALL THROUGH (check for match without last char(s) or ";")
4571                                 # no ';' terminator (only legacy char refs)
4572                                 max = i
4573                                 for i in [2..max] # no prefix matches, so ok to check shortest first
4574                                         c = legacy_char_refs[txt.substr(cur, i)]
4575                                         if c?
4576                                                 if in_attr
4577                                                         if txt.charAt(cur + i) is '='
4578                                                                 # "because some legacy user agents will
4579                                                                 # misinterpret the markup in those cases"
4580                                                                 parse_error()
4581                                                                 return '&'
4582                                                         if alnum.indexOf(txt.charAt(cur + i)) > -1
4583                                                                 # this makes attributes forgiving about url args
4584                                                                 return '&'
4585                                                 # ok, and besides the weird exceptions for attributes...
4586                                                 # return the matching char
4587                                                 cur += i # consume entity chars
4588                                                 parse_error() # because no terminating ";"
4589                                                 return c
4590                                 parse_error()
4591                                 return '&'
4592                 return # never reached
4593
4594         eat_next_token_if_newline = ->
4595                 old_cur = cur
4596                 t = null
4597                 until t?
4598                         t = tok_state()
4599                 if t.type is TYPE_TEXT
4600                         # definition of a newline depends on whether it was a character ref or not
4601                         if cur - old_cur is 1
4602                                 # not a character reference
4603                                 if t.text is "\u000d" or t.text is "\u000a"
4604                                         return
4605                         else
4606                                 if t.text is "\u000a"
4607                                         return
4608                 # not a "newline"
4609                 cur = old_cur
4610                 return
4611
4612         # tree constructor initialization
4613         # see comments on TYPE_TAG/etc for the structure of this data
4614         txt = args_html
4615         cur = 0
4616         doc = new Node TYPE_TAG, name: 'document', namespace: NS_HTML
4617         doc.flag 'quirks mode', QUIRKS_NO # TODO bugreport spec for not specifying this
4618         fragment_root = null # fragment parsing algorithm returns children of this
4619         open_els = []
4620         afe = [] # active formatting elements
4621         template_ins_modes = []
4622         ins_mode = ins_mode_initial
4623         original_ins_mode = ins_mode # TODO check spec
4624         flag_scripting = args.scripting ? true # TODO might need an extra flag to get <noscript> to parse correctly
4625         flag_frameset_ok = true
4626         flag_parsing = true
4627         flag_foster_parenting = false
4628         form_element_pointer = null
4629         temporary_buffer = null
4630         pending_table_character_tokens = []
4631         head_element_pointer = null
4632         flag_fragment_parsing = false
4633         context_element = null
4634         prev_node_id = 0 # just for debugging
4635
4636         # tokenizer initialization
4637         tok_state = tok_state_data
4638
4639         parse_init = ->
4640                 # fragment parsing (text arg)
4641                 if args.fragment?
4642                         # this handles the fragment from the tests in the format described here:
4643                         # https://github.com/html5lib/html5lib-tests/blob/master/tree-construction/README.md
4644                         f = args.fragment
4645                         ns = NS_HTML
4646                         if f.substr(0, 5) is 'math '
4647                                 f = f.substr 5
4648                                 ns = NS_MATHML
4649                         else if f.substr(0, 4) is 'svg '
4650                                 f = f.substr 4
4651                                 ns = NS_SVG
4652                         t = new_open_tag f
4653                         context_element = token_to_element t, ns
4654                         context_element.document = new Node TYPE_TAG, name: 'document', namespace: NS_HTML
4655                         context_element.document.flag 'quirks mode', QUIRKS_NO
4656                 # fragment parsing (Node arg)
4657                 if args.context?
4658                         context_element = args.context
4659
4660                 # http://www.w3.org/TR/html5/syntax.html#parsing-html-fragments
4661                 # fragment parsing algorithm
4662                 if context_element?
4663                         flag_fragment_parsing = true
4664                         doc = new Node TYPE_TAG, name: 'html', namespace: NS_HTML
4665                         # search up the tree from context, to try to find it's document,
4666                         # because this file only puts a "document" property on the root
4667                         # element.
4668                         old_doc = null
4669                         el = context_element
4670                         loop
4671                                 if el.document?
4672                                         old_doc = el.document
4673                                         break
4674                                 if el.parent
4675                                         el = el.parent
4676                                 else
4677                                         break
4678                         if old_doc
4679                                 doc.flag 'quirks mode', old_doc.flag 'quirks mode'
4680                         # set tok_state
4681                         if context_element.namespace is NS_HTML
4682                                 switch context_element.name
4683                                         when 'title', 'textarea'
4684                                                 tok_state = tok_state_rcdata
4685                                         when 'style', 'xmp', 'iframe', 'noembed', 'noframes'
4686                                                 tok_state = tok_state_rawtext
4687                                         when 'script'
4688                                                 tok_state = tok_state_script_data
4689                                         when 'noscript'
4690                                                 if flag_scripting
4691                                                         tok_state = tok_state_rawtext
4692                                         when 'plaintext'
4693                                                 tok_state = tok_state_plaintext
4694                         fragment_root = new Node TYPE_TAG, name: 'html', namespace: NS_HTML
4695                         doc.children.push fragment_root
4696                         fragment_root.document = doc
4697                         open_els = [fragment_root]
4698                         if context_element.name is 'template' and context_element.namespace is NS_HTML
4699                                 template_ins_modes.unshift ins_mode_in_template
4700                         # fixfull create token for context (it should have it's original one already)
4701                         reset_ins_mode()
4702                         # set form_element pointer... in the foreign doc?!
4703                         el = context_element
4704                         loop
4705                                 if el.name is 'form' and el.namespace is NS_HTML
4706                                         form_element_pointer = el
4707                                         break
4708                                 if el.parent
4709                                         el = el.parent
4710                                 else
4711                                         break
4712
4713                 # text pre-processing
4714                 # FIXME check http://www.w3.org/TR/html5/syntax.html#preprocessing-the-input-stream
4715                 txt = txt.replace(new RegExp("\r\n", 'g'), "\n") # fixfull spec doesn't say this
4716                 txt = txt.replace(new RegExp("\r", 'g'), "\n") # fixfull spec doesn't say this
4717
4718                 return
4719
4720         # http://www.w3.org/TR/html5/syntax.html#tree-construction
4721         parse_main_loop = ->
4722                 while flag_parsing
4723                         t = tok_state()
4724                         if t?
4725                                 process_token t
4726                                 # fixfull parse error if has self-closing flag, but it wasn't acknolwedged
4727                 return
4728         parse_init()
4729         parse_main_loop()
4730
4731         if flag_fragment_parsing
4732                 return fragment_root.children
4733         return doc.children
4734
4735 exports.parse = parse_html
4736 exports.Node = Node
4737 exports.debug_log_reset = debug_log_reset
4738 exports.debug_log_each = debug_log_each
4739 exports.TYPE_TAG = TYPE_TAG
4740 exports.TYPE_TEXT = TYPE_TEXT
4741 exports.TYPE_COMMENT = TYPE_COMMENT
4742 exports.TYPE_DOCTYPE = TYPE_DOCTYPE
4743 exports.NS_HTML = NS_HTML
4744 exports.NS_MATHML = NS_MATHML
4745 exports.NS_SVG = NS_SVG
4746 exports.QUIRKS_NO = QUIRKS_NO
4747 exports.QUIRKS_LIMITED = QUIRKS_LIMITED
4748 exports.QUIRKS_YES = QUIRKS_YES