JasonWoof Got questions, comments, patches, etc.? Contact Jason Woofenden
update .gitignore
[peach-html5-editor.git] / parse-html.coffee
1 # HTML parser meant to run in a browser, in support of WYSIWYG editor
2 # Copyright 2015 Jason Woofenden
3 #
4 # This program is free software: you can redistribute it and/or modify it under
5 # the terms of the GNU Affero General Public License as published by the Free
6 # Software Foundation, either version 3 of the License, or (at your option) any
7 # later version.
8 #
9 # This program is distributed in the hope that it will be useful, but WITHOUT
10 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11 # FOR A PARTICULAR PURPOSE.  See the GNU Affero General Public License for more
12 # details.
13 #
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
16
17
18 # This file implements a thorough parser for html5, meant to be used by a
19 # WYSIWYG editor.
20
21 # The implementation is a pretty direct implementation of the parsing algorithm
22 # described here:
23 #
24 #     http://www.w3.org/TR/html5/syntax.html
25 #
26 # except for some places marked "WHATWG" that are implemented as described here:
27 #
28 #     https://html.spec.whatwg.org/multipage/syntax.html
29 #
30 # This code passes all of the tests in the .dat files at:
31 #
32 #     https://github.com/JasonWoof/html5lib-tests/tree/patch-1/tree-construction
33
34
35 ##################################
36 ## how to use this code
37 ##################################
38 #
39 # See README.md for how to run this file in the browser or in node.js.
40 #
41 # This file exports a single useful function: parse_tml, and some constants
42 # (see the bottom of this file for those.)
43 #
44 # Call it like this:
45 #
46 #     wheic.parse_html("<p><b>hi</p>")
47 #
48 # Or, if you don't want <html><head><body>/etc, do this:
49 #
50 #     wheic.parse_html("<p><b>hi</p>", {fragment: "body"})
51 #
52 # return value is an array of Nodes, see "class Node" below.
53
54 # This code is a work in progress, eg try search this file for "fixfull",
55 # "TODO" and "FIXME"
56
57
58 # Notes:  stacks/lists
59 #
60 # Jason was frequently confused by the terminology used to refer to different
61 # parts of the stacks and lists in the spec, so he made this chart to help keep
62 # his head straight:
63 #
64 # stacks grow downward (current element is index=0)
65 #
66 # example: open_els = [a, b, c, d, e, f, g]
67 #
68 # "grows downwards" means it's visualized like this: (index: el "names")
69 #
70 #   6: g "start of the list", "topmost", "first"
71 #   5: f
72 #   4: e "previous" (to d), "above", "before"
73 #   3: d   (previous/next are relative to this element)
74 #   2: c "next", "after", "lower", "below"
75 #   1: b
76 #   0: a "end of the list", "current node", "bottommost", "last"
77
78 if (typeof module) isnt 'undefined' and module.exports?
79         context = 'module'
80         exports = module.exports
81 else
82         context = 'browser'
83         window.wheic = {}
84         exports = window.wheic
85
86 from_code_point = (x) ->
87         if String.fromCodePoint?
88                 return String.fromCodePoint x
89         else
90                 if x <= 0xffff
91                         return String.fromCharCode x
92                 x -= 0x10000
93                 return String.fromCharCode((x >> 10) + 0xd800, (x % 0x400) + 0xdc00)
94
95 # Each node is an obect of the Node class. Here are the Node types:
96 TYPE_TAG = 0 # name, {attributes}, [children]
97 TYPE_TEXT = 1 # "text"
98 TYPE_COMMENT = 2
99 TYPE_DOCTYPE = 3
100 # the following types are emited by the tokenizer, but shouldn't end up in the tree:
101 TYPE_START_TAG = 4 # name, [attributes ([key,value]...) in reverse order], [children]
102 TYPE_END_TAG = 5 # name
103 TYPE_EOF = 6
104 TYPE_AFE_MARKER = 7 # http://www.w3.org/TR/html5/syntax.html#reconstruct-the-active-formatting-elements
105 TYPE_AAA_BOOKMARK = 8 # http://www.w3.org/TR/html5/syntax.html#adoption-agency-algorithm
106
107 # namespace constants
108 NS_HTML = 1
109 NS_MATHML = 2
110 NS_SVG = 3
111
112 # quirks mode constants
113 QUIRKS_NO = 1
114 QUIRKS_LIMITED = 2
115 QUIRKS_YES = 3
116
117 # queue up debug logs, so eg they can be shown only for tests that fail
118 g_debug_log = []
119 debug_log_reset = ->
120         g_debug_log = []
121         return
122 debug_log = (str) ->
123         g_debug_log.push str
124         return
125 debug_log_each = (cb) ->
126         for str in g_debug_log
127                 cb str
128         return
129
130 prev_node_id = 0
131 class Node
132         constructor: (type, args = {}) ->
133                 @type = type # one of the TYPE_* constants above
134                 @name = args.name ? '' # tag name
135                 @text = args.text ? '' # contents for text/comment nodes
136                 @attrs = args.attrs ? {}
137                 @attrs_a = args.attr_k ? [] # attrs in progress, TYPE_START_TAG only
138                 @children = args.children ? []
139                 @namespace = args.namespace ? NS_HTML
140                 @parent = args.parent ? null
141                 @token = args.token ? null
142                 @flags = args.flags ? {}
143                 if args.id?
144                         @id = "#{args.id}+"
145                 else
146                         @id = "#{++prev_node_id}"
147         acknowledge_self_closing: ->
148                 if @token?
149                         @token.flag 'did_self_close', true
150                 else
151                         @flag 'did_self_close', true
152                 return
153         flag: (key, value = null) ->
154                 if value?
155                         @flags[key] = value
156                 else
157                         return @flags[key]
158                 return
159
160 # helpers: (only take args that are normally known when parser creates nodes)
161 new_open_tag = (name) ->
162         return new Node TYPE_START_TAG, name: name
163 new_end_tag = (name) ->
164         return new Node TYPE_END_TAG, name: name
165 new_element = (name) ->
166         return new Node TYPE_TAG, name: name
167 new_text_node = (txt) ->
168         return new Node TYPE_TEXT, text: txt
169 new_character_token = new_text_node
170 new_comment_token = (txt) ->
171         return new Node TYPE_COMMENT, text: txt
172 new_doctype_token = (name) ->
173         return new Node TYPE_DOCTYPE, name: name
174 new_eof_token = ->
175         return new Node TYPE_EOF
176 new_afe_marker = ->
177         return new Node TYPE_AFE_MARKER
178 new_aaa_bookmark = ->
179         return new Node TYPE_AAA_BOOKMARK
180
181 lc_alpha = "abcdefghijklmnopqrstuvwxyz"
182 uc_alpha = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
183 digits = "0123456789"
184 alnum = lc_alpha + uc_alpha + digits
185 hex_chars = digits + "abcdefABCDEF"
186
187 is_uc_alpha = (str) ->
188         return str.length is 1 and uc_alpha.indexOf(str) > -1
189 is_lc_alpha = (str) ->
190         return str.length is 1 and lc_alpha.indexOf(str) > -1
191
192 # some SVG elements have dashes in them
193 tag_name_chars = alnum + "-"
194
195 # http://www.w3.org/TR/html5/infrastructure.html#space-character
196 space_chars = "\u0009\u000a\u000c\u000d\u0020"
197 is_space = (txt) ->
198         return txt.length is 1 and space_chars.indexOf(txt) > -1
199 is_space_tok = (t) ->
200         return t.type is TYPE_TEXT && t.text.length is 1 and space_chars.indexOf(t.text) > -1
201
202 is_input_hidden_tok = (t) ->
203         return false unless t.type is TYPE_START_TAG
204         for a in t.attrs_a
205                 if a[0] is 'type'
206                         if a[1].toLowerCase() is 'hidden'
207                                 return true
208                         return false
209         return false
210
211 # https://en.wikipedia.org/wiki/Whitespace_character#Unicode
212 whitespace_chars = "\u0009\u000a\u000b\u000c\u000d\u0020\u0085\u00a0\u1680\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200a\u2028\u2029\u202f\u205f\u3000"
213
214 unicode_fixes = {}
215 unicode_fixes[0x00] = "\uFFFD"
216 unicode_fixes[0x80] = "\u20AC"
217 unicode_fixes[0x82] = "\u201A"
218 unicode_fixes[0x83] = "\u0192"
219 unicode_fixes[0x84] = "\u201E"
220 unicode_fixes[0x85] = "\u2026"
221 unicode_fixes[0x86] = "\u2020"
222 unicode_fixes[0x87] = "\u2021"
223 unicode_fixes[0x88] = "\u02C6"
224 unicode_fixes[0x89] = "\u2030"
225 unicode_fixes[0x8A] = "\u0160"
226 unicode_fixes[0x8B] = "\u2039"
227 unicode_fixes[0x8C] = "\u0152"
228 unicode_fixes[0x8E] = "\u017D"
229 unicode_fixes[0x91] = "\u2018"
230 unicode_fixes[0x92] = "\u2019"
231 unicode_fixes[0x93] = "\u201C"
232 unicode_fixes[0x94] = "\u201D"
233 unicode_fixes[0x95] = "\u2022"
234 unicode_fixes[0x96] = "\u2013"
235 unicode_fixes[0x97] = "\u2014"
236 unicode_fixes[0x98] = "\u02DC"
237 unicode_fixes[0x99] = "\u2122"
238 unicode_fixes[0x9A] = "\u0161"
239 unicode_fixes[0x9B] = "\u203A"
240 unicode_fixes[0x9C] = "\u0153"
241 unicode_fixes[0x9E] = "\u017E"
242 unicode_fixes[0x9F] = "\u0178"
243
244 quirks_yes_pi_prefixes = [
245         "+//silmaril//dtd html pro v0r11 19970101//"
246         "-//as//dtd html 3.0 aswedit + extensions//"
247         "-//advasoft ltd//dtd html 3.0 aswedit + extensions//"
248         "-//ietf//dtd html 2.0 level 1//"
249         "-//ietf//dtd html 2.0 level 2//"
250         "-//ietf//dtd html 2.0 strict level 1//"
251         "-//ietf//dtd html 2.0 strict level 2//"
252         "-//ietf//dtd html 2.0 strict//"
253         "-//ietf//dtd html 2.0//"
254         "-//ietf//dtd html 2.1e//"
255         "-//ietf//dtd html 3.0//"
256         "-//ietf//dtd html 3.2 final//"
257         "-//ietf//dtd html 3.2//"
258         "-//ietf//dtd html 3//"
259         "-//ietf//dtd html level 0//"
260         "-//ietf//dtd html level 1//"
261         "-//ietf//dtd html level 2//"
262         "-//ietf//dtd html level 3//"
263         "-//ietf//dtd html strict level 0//"
264         "-//ietf//dtd html strict level 1//"
265         "-//ietf//dtd html strict level 2//"
266         "-//ietf//dtd html strict level 3//"
267         "-//ietf//dtd html strict//"
268         "-//ietf//dtd html//"
269         "-//metrius//dtd metrius presentational//"
270         "-//microsoft//dtd internet explorer 2.0 html strict//"
271         "-//microsoft//dtd internet explorer 2.0 html//"
272         "-//microsoft//dtd internet explorer 2.0 tables//"
273         "-//microsoft//dtd internet explorer 3.0 html strict//"
274         "-//microsoft//dtd internet explorer 3.0 html//"
275         "-//microsoft//dtd internet explorer 3.0 tables//"
276         "-//netscape comm. corp.//dtd html//"
277         "-//netscape comm. corp.//dtd strict html//"
278         "-//o'reilly and associates//dtd html 2.0//"
279         "-//o'reilly and associates//dtd html extended 1.0//"
280         "-//o'reilly and associates//dtd html extended relaxed 1.0//"
281         "-//sq//dtd html 2.0 hotmetal + extensions//"
282         "-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//"
283         "-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//"
284         "-//spyglass//dtd html 2.0 extended//"
285         "-//sun microsystems corp.//dtd hotjava html//"
286         "-//sun microsystems corp.//dtd hotjava strict html//"
287         "-//w3c//dtd html 3 1995-03-24//"
288         "-//w3c//dtd html 3.2 draft//"
289         "-//w3c//dtd html 3.2 final//"
290         "-//w3c//dtd html 3.2//"
291         "-//w3c//dtd html 3.2s draft//"
292         "-//w3c//dtd html 4.0 frameset//"
293         "-//w3c//dtd html 4.0 transitional//"
294         "-//w3c//dtd html experimental 19960712//"
295         "-//w3c//dtd html experimental 970421//"
296         "-//w3c//dtd w3 html//"
297         "-//w3o//dtd w3 html 3.0//"
298         "-//webtechs//dtd mozilla html 2.0//"
299         "-//webtechs//dtd mozilla html//"
300 ]
301
302 # These are the character references that don't need a terminating semicolon
303 # min length: 2, max: 6, none are a prefix of any other.
304 legacy_char_refs = {
305         Aacute: 'Á', aacute: 'á', Acirc: 'Â', acirc: 'â', acute: '´', AElig: 'Æ',
306         aelig: 'æ', Agrave: 'À', agrave: 'à', AMP: '&', amp: '&', Aring: 'Å',
307         aring: 'å', Atilde: 'Ã', atilde: 'ã', Auml: 'Ä', auml: 'ä', brvbar: '¦',
308         Ccedil: 'Ç', ccedil: 'ç', cedil: '¸', cent: '¢', COPY: '©', copy: '©',
309         curren: '¤', deg: '°', divide: '÷', Eacute: 'É', eacute: 'é', Ecirc: 'Ê',
310         ecirc: 'ê', Egrave: 'È', egrave: 'è', ETH: 'Ð', eth: 'ð', Euml: 'Ë',
311         euml: 'ë', frac12: '½', frac14: '¼', frac34: '¾', GT: '>', gt: '>',
312         Iacute: 'Í', iacute: 'í', Icirc: 'Î', icirc: 'î', iexcl: '¡', Igrave: 'Ì',
313         igrave: 'ì', iquest: '¿', Iuml: 'Ï', iuml: 'ï', laquo: '«', LT: '<',
314         lt: '<', macr: '¯', micro: 'µ', middot: '·', nbsp: "\u00a0", not: '¬',
315         Ntilde: 'Ñ', ntilde: 'ñ', Oacute: 'Ó', oacute: 'ó', Ocirc: 'Ô', ocirc: 'ô',
316         Ograve: 'Ò', ograve: 'ò', ordf: 'ª', ordm: 'º', Oslash: 'Ø', oslash: 'ø',
317         Otilde: 'Õ', otilde: 'õ', Ouml: 'Ö', ouml: 'ö', para: '¶', plusmn: '±',
318         pound: '£', QUOT: '"', quot: '"', raquo: '»', REG: '®', reg: '®', sect: '§',
319         shy: '­', sup1: '¹', sup2: '²', sup3: '³', szlig: 'ß', THORN: 'Þ', thorn: 'þ',
320         times: '×', Uacute: 'Ú', uacute: 'ú', Ucirc: 'Û', ucirc: 'û', Ugrave: 'Ù',
321         ugrave: 'ù', uml: '¨', Uuml: 'Ü', uuml: 'ü', Yacute: 'Ý', yacute: 'ý',
322         yen: '¥', yuml: 'ÿ'
323 }
324
325 void_elements = ['area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input', 'keygen', 'link', 'meta', 'param', 'source', 'track', 'wbr']
326 raw_text_elements = ['script', 'style']
327 escapable_raw_text_elements = ['textarea', 'title']
328 # http://www.w3.org/TR/SVG/ 1.1 (Second Edition)
329 svg_elements = [
330         'a', 'altGlyph', 'altGlyphDef', 'altGlyphItem', 'animate', 'animateColor',
331         'animateMotion', 'animateTransform', 'circle', 'clipPath', 'color-profile',
332         'cursor', 'defs', 'desc', 'ellipse', 'feBlend', 'feColorMatrix',
333         'feComponentTransfer', 'feComposite', 'feConvolveMatrix',
334         'feDiffuseLighting', 'feDisplacementMap', 'feDistantLight', 'feFlood',
335         'feFuncA', 'feFuncB', 'feFuncG', 'feFuncR', 'feGaussianBlur', 'feImage',
336         'feMerge', 'feMergeNode', 'feMorphology', 'feOffset', 'fePointLight',
337         'feSpecularLighting', 'feSpotLight', 'feTile', 'feTurbulence', 'filter',
338         'font', 'font-face', 'font-face-format', 'font-face-name', 'font-face-src',
339         'font-face-uri', 'foreignObject', 'g', 'glyph', 'glyphRef', 'hkern',
340         'image', 'line', 'linearGradient', 'marker', 'mask', 'metadata',
341         'missing-glyph', 'mpath', 'path', 'pattern', 'polygon', 'polyline',
342         'radialGradient', 'rect', 'script', 'set', 'stop', 'style', 'svg',
343         'switch', 'symbol', 'text', 'textPath', 'title', 'tref', 'tspan', 'use',
344         'view', 'vkern'
345 ]
346
347 # http://www.w3.org/TR/MathML/ Version 3.0 2nd Edition
348 mathml_elements = [
349         'abs', 'and', 'annotation', 'annotation-xml', 'apply', 'approx', 'arccos',
350         'arccosh', 'arccot', 'arccoth', 'arccsc', 'arccsch', 'arcsec', 'arcsech',
351         'arcsin', 'arcsinh', 'arctan', 'arctanh', 'arg', 'bind', 'bvar', 'card',
352         'cartesianproduct', 'cbytes', 'ceiling', 'cerror', 'ci', 'cn', 'codomain',
353         'complexes', 'compose', 'condition', 'conjugate', 'cos', 'cosh', 'cot',
354         'coth', 'cs', 'csc', 'csch', 'csymbol', 'curl', 'declare', 'degree',
355         'determinant', 'diff', 'divergence', 'divide', 'domain',
356         'domainofapplication', 'emptyset', 'eq', 'equivalent', 'eulergamma',
357         'exists', 'exp', 'exponentiale', 'factorial', 'factorof', 'false', 'floor',
358         'fn', 'forall', 'gcd', 'geq', 'grad', 'gt', 'ident', 'image', 'imaginary',
359         'imaginaryi', 'implies', 'in', 'infinity', 'int', 'integers', 'intersect',
360         'interval', 'inverse', 'lambda', 'laplacian', 'lcm', 'leq', 'limit',
361         'list', 'ln', 'log', 'logbase', 'lowlimit', 'lt', 'maction', 'maligngroup',
362         'malignmark', 'math', 'matrix', 'matrixrow', 'max', 'mean', 'median',
363         'menclose', 'merror', 'mfenced', 'mfrac', 'mglyph', 'mi', 'mi', 'min',
364         'minus', 'mlabeledtr', 'mlongdiv', 'mmultiscripts', 'mn', 'mo', 'mode',
365         'moment', 'momentabout', 'mover', 'mpadded', 'mphantom', 'mprescripts',
366         'mroot', 'mrow', 'ms', 'mscarries', 'mscarry', 'msgroup', 'msline',
367         'mspace', 'msqrt', 'msrow', 'mstack', 'mstyle', 'msub', 'msubsup', 'msup',
368         'mtable', 'mtd', 'mtext', 'mtr', 'munder', 'munderover', 'naturalnumbers',
369         'neq', 'none', 'not', 'notanumber', 'notin', 'notprsubset', 'notsubset',
370         'or', 'otherwise', 'outerproduct', 'partialdiff', 'pi', 'piece',
371         'piecewise', 'plus', 'power', 'primes', 'product', 'prsubset', 'quotient',
372         'rationals', 'real', 'reals', 'reln', 'rem', 'root', 'scalarproduct',
373         'sdev', 'sec', 'sech', 'selector', 'semantics', 'sep', 'set', 'setdiff',
374         'share', 'sin', 'sinh', 'span', 'subset', 'sum', 'tan', 'tanh', 'tendsto',
375         'times', 'transpose', 'true', 'union', 'uplimit', 'variance', 'vector',
376         'vectorproduct', 'xor'
377 ]
378 # foreign_elements = [svg_elements..., mathml_elements...]
379 #normal_elements = All other allowed HTML elements are normal elements.
380
381 special_elements = {
382         # HTML:
383         address:NS_HTML, applet:NS_HTML, area:NS_HTML, article:NS_HTML,
384         aside:NS_HTML, base:NS_HTML, basefont:NS_HTML, bgsound:NS_HTML,
385         blockquote:NS_HTML, body:NS_HTML, br:NS_HTML, button:NS_HTML,
386         caption:NS_HTML, center:NS_HTML, col:NS_HTML, colgroup:NS_HTML, dd:NS_HTML,
387         details:NS_HTML, dir:NS_HTML, div:NS_HTML, dl:NS_HTML, dt:NS_HTML,
388         embed:NS_HTML, fieldset:NS_HTML, figcaption:NS_HTML, figure:NS_HTML,
389         footer:NS_HTML, form:NS_HTML, frame:NS_HTML, frameset:NS_HTML, h1:NS_HTML,
390         h2:NS_HTML, h3:NS_HTML, h4:NS_HTML, h5:NS_HTML, h6:NS_HTML, head:NS_HTML,
391         header:NS_HTML, hgroup:NS_HTML, hr:NS_HTML, html:NS_HTML, iframe:NS_HTML,
392         img:NS_HTML, input:NS_HTML, isindex:NS_HTML, li:NS_HTML, link:NS_HTML,
393         listing:NS_HTML, main:NS_HTML, marquee:NS_HTML,
394
395         menu:NS_HTML,menuitem:NS_HTML, # WHATWG adds these
396
397         meta:NS_HTML, nav:NS_HTML, noembed:NS_HTML, noframes:NS_HTML,
398         noscript:NS_HTML, object:NS_HTML, ol:NS_HTML, p:NS_HTML, param:NS_HTML,
399         plaintext:NS_HTML, pre:NS_HTML, script:NS_HTML, section:NS_HTML,
400         select:NS_HTML, source:NS_HTML, style:NS_HTML, summary:NS_HTML,
401         table:NS_HTML, tbody:NS_HTML, td:NS_HTML, template:NS_HTML,
402         textarea:NS_HTML, tfoot:NS_HTML, th:NS_HTML, thead:NS_HTML, title:NS_HTML,
403         tr:NS_HTML, track:NS_HTML, ul:NS_HTML, wbr:NS_HTML, xmp:NS_HTML,
404
405         # MathML:
406         mi:NS_MATHML, mo:NS_MATHML, mn:NS_MATHML, ms:NS_MATHML, mtext:NS_MATHML,
407         'annotation-xml':NS_MATHML,
408
409         # SVG:
410         foreignObject:NS_SVG, desc:NS_SVG, title:NS_SVG
411 }
412
413 formatting_elements = {
414          a: true, b: true, big: true, code: true, em: true, font: true, i: true,
415          nobr: true, s: true, small: true, strike: true, strong: true, tt: true,
416          u: true
417 }
418
419 mathml_text_integration = {
420         mi: NS_MATHML, mo: NS_MATHML, mn: NS_MATHML, ms: NS_MATHML, mtext: NS_MATHML
421 }
422 is_mathml_text_integration_point = (el) ->
423         return mathml_text_integration[el.name] is el.namespace
424 is_html_integration = (el) -> # DON'T PASS A TOKEN
425         if el.namespace is NS_MATHML
426                 if el.name is 'annotation-xml'
427                         if el.attrs.encoding?
428                                 if el.attrs.encoding.toLowerCase() is 'text/html'
429                                         return true
430                                 if el.attrs.encoding.toLowerCase() is 'application/xhtml+xml'
431                                         return true
432                 return false
433         if el.namespace is NS_SVG
434                 if el.name is 'foreignObject' or el.name is 'desc' or el.name is 'title'
435                         return true
436         return false
437
438 h_tags = {
439         h1:NS_HTML, h2:NS_HTML, h3:NS_HTML, h4:NS_HTML, h5:NS_HTML, h6:NS_HTML
440 }
441
442 foster_parenting_targets = {
443         table: NS_HTML
444         tbody: NS_HTML
445         tfoot: NS_HTML
446         thead: NS_HTML
447         tr: NS_HTML
448 }
449
450 end_tag_implied = {
451         dd: NS_HTML
452         dt: NS_HTML
453         li: NS_HTML
454         option: NS_HTML
455         optgroup: NS_HTML
456         p: NS_HTML
457         rb: NS_HTML
458         rp: NS_HTML
459         rt: NS_HTML
460         rtc: NS_HTML
461 }
462
463 el_is_special = (e) ->
464         return special_elements[e.name] is e.namespace
465
466 adp_els = { address: NS_HTML, div: NS_HTML, p: NS_HTML }
467 el_is_special_not_adp = (el) ->
468         return special_elements[el.name] is el.namespace and adp_els[el.name] isnt el.namespace
469
470 svg_name_fixes = {
471         altglyph: 'altGlyph'
472         altglyphdef: 'altGlyphDef'
473         altglyphitem: 'altGlyphItem'
474         animatecolor: 'animateColor'
475         animatemotion: 'animateMotion'
476         animatetransform: 'animateTransform'
477         clippath: 'clipPath'
478         feblend: 'feBlend'
479         fecolormatrix: 'feColorMatrix'
480         fecomponenttransfer: 'feComponentTransfer'
481         fecomposite: 'feComposite'
482         feconvolvematrix: 'feConvolveMatrix'
483         fediffuselighting: 'feDiffuseLighting'
484         fedisplacementmap: 'feDisplacementMap'
485         fedistantlight: 'feDistantLight'
486         fedropshadow: 'feDropShadow'
487         feflood: 'feFlood'
488         fefunca: 'feFuncA'
489         fefuncb: 'feFuncB'
490         fefuncg: 'feFuncG'
491         fefuncr: 'feFuncR'
492         fegaussianblur: 'feGaussianBlur'
493         feimage: 'feImage'
494         femerge: 'feMerge'
495         femergenode: 'feMergeNode'
496         femorphology: 'feMorphology'
497         feoffset: 'feOffset'
498         fepointlight: 'fePointLight'
499         fespecularlighting: 'feSpecularLighting'
500         fespotlight: 'feSpotLight'
501         fetile: 'feTile'
502         feturbulence: 'feTurbulence'
503         foreignobject: 'foreignObject'
504         glyphref: 'glyphRef'
505         lineargradient: 'linearGradient'
506         radialgradient: 'radialGradient'
507         textpath: 'textPath'
508 }
509 svg_attribute_fixes = {
510         attributename: 'attributeName'
511         attributetype: 'attributeType'
512         basefrequency: 'baseFrequency'
513         baseprofile: 'baseProfile'
514         calcmode: 'calcMode'
515         clippathunits: 'clipPathUnits'
516         contentscripttype: 'contentScriptType'
517         contentstyletype: 'contentStyleType'
518         diffuseconstant: 'diffuseConstant'
519         edgemode: 'edgeMode'
520         externalresourcesrequired: 'externalResourcesRequired'
521         # WHATWG removes this: filterres: 'filterRes'
522         filterunits: 'filterUnits'
523         glyphref: 'glyphRef'
524         gradienttransform: 'gradientTransform'
525         gradientunits: 'gradientUnits'
526         kernelmatrix: 'kernelMatrix'
527         kernelunitlength: 'kernelUnitLength'
528         keypoints: 'keyPoints'
529         keysplines: 'keySplines'
530         keytimes: 'keyTimes'
531         lengthadjust: 'lengthAdjust'
532         limitingconeangle: 'limitingConeAngle'
533         markerheight: 'markerHeight'
534         markerunits: 'markerUnits'
535         markerwidth: 'markerWidth'
536         maskcontentunits: 'maskContentUnits'
537         maskunits: 'maskUnits'
538         numoctaves: 'numOctaves'
539         pathlength: 'pathLength'
540         patterncontentunits: 'patternContentUnits'
541         patterntransform: 'patternTransform'
542         patternunits: 'patternUnits'
543         pointsatx: 'pointsAtX'
544         pointsaty: 'pointsAtY'
545         pointsatz: 'pointsAtZ'
546         preservealpha: 'preserveAlpha'
547         preserveaspectratio: 'preserveAspectRatio'
548         primitiveunits: 'primitiveUnits'
549         refx: 'refX'
550         refy: 'refY'
551         repeatcount: 'repeatCount'
552         repeatdur: 'repeatDur'
553         requiredextensions: 'requiredExtensions'
554         requiredfeatures: 'requiredFeatures'
555         specularconstant: 'specularConstant'
556         specularexponent: 'specularExponent'
557         spreadmethod: 'spreadMethod'
558         startoffset: 'startOffset'
559         stddeviation: 'stdDeviation'
560         stitchtiles: 'stitchTiles'
561         surfacescale: 'surfaceScale'
562         systemlanguage: 'systemLanguage'
563         tablevalues: 'tableValues'
564         targetx: 'targetX'
565         targety: 'targetY'
566         textlength: 'textLength'
567         viewbox: 'viewBox'
568         viewtarget: 'viewTarget'
569         xchannelselector: 'xChannelSelector'
570         ychannelselector: 'yChannelSelector'
571         zoomandpan: 'zoomAndPan'
572 }
573 foreign_attr_fixes = {
574         'xlink:actuate': 'xlink actuate'
575         'xlink:arcrole': 'xlink arcrole'
576         'xlink:href': 'xlink href'
577         'xlink:role': 'xlink role'
578         'xlink:show': 'xlink show'
579         'xlink:title': 'xlink title'
580         'xlink:type': 'xlink type'
581         'xml:base': 'xml base'
582         'xml:lang': 'xml lang'
583         'xml:space': 'xml space'
584         'xmlns': 'xmlns'
585         'xmlns:xlink': 'xmlns xlink'
586 }
587 adjust_mathml_attributes = (t) ->
588         for a in t.attrs_a
589                 if a[0] is 'definitionurl'
590                         a[0] = 'definitionURL'
591         return
592 adjust_svg_attributes = (t) ->
593         for a in t.attrs_a
594                 if svg_attribute_fixes[a[0]]?
595                         a[0] = svg_attribute_fixes[a[0]]
596         return
597 adjust_foreign_attributes = (t) ->
598         # fixfull
599         for a in t.attrs_a
600                 if foreign_attr_fixes[a[0]]?
601                         a[0] = foreign_attr_fixes[a[0]]
602         return
603
604 # decode_named_char_ref()
605 #
606 # The list of named character references is _huge_ so if we're running in a
607 # browser, we get the browser to decode them, rather than increasing the code
608 # size to include the table.
609 if context is 'module'
610         _decode_named_char_ref = require './html5-named-entities.coffee'
611 else
612         # TODO test this in IE8
613         decode_named_char_ref_el = document.createElement('textarea')
614         _decode_named_char_ref = (txt) ->
615                 txt = "&#{txt};"
616                 decode_named_char_ref_el.innerHTML = txt
617                 decoded = decode_named_char_ref_el.value
618                 return null if decoded is txt
619                 return decoded
620 # Pass the name of a named entity _that has a terminating semicolon_
621 # Entities without terminating semicolons should use legacy_char_refs[]
622 # Do not include the "&" or ";" in your argument, eg pass "alpha"
623 decode_named_char_ref_cache = {}
624 decode_named_char_ref = (txt) ->
625         decoded = decode_named_char_ref_cache[txt]
626         return decoded if decoded?
627         decoded = _decode_named_char_ref txt
628         return decode_named_char_ref_cache[txt] = decoded
629
630 parse_html = (args_html, args = {}) ->
631         txt = null
632         cur = null # index of next char in txt to be parsed
633         # declare doc and tokenizer variables so they're in scope below
634         doc = null
635         open_els = null # stack of open elements
636         afe = null # active formatting elements
637         template_ins_modes = null
638         ins_mode = null
639         original_ins_mode = null
640         tok_state = null
641         tok_cur_tag = null # partially parsed tag
642         flag_scripting = null
643         flag_frameset_ok = null
644         flag_parsing = null
645         flag_foster_parenting = null
646         form_element_pointer = null
647         temporary_buffer = null
648         pending_table_character_tokens = null
649         head_element_pointer = null
650         flag_fragment_parsing = null
651         context_element = null
652
653         stop_parsing = ->
654                 flag_parsing = false
655                 return
656
657         parse_error = ->
658                 if args.error_cb?
659                         args.error_cb cur
660                 else
661                         console.log "Parse error at character #{cur} of #{txt.length}"
662                 return
663
664         # http://www.w3.org/TR/html5/syntax.html#push-onto-the-list-of-active-formatting-elements
665         # "Noah's Ark clause" but with three
666         afe_push = (new_el) ->
667                 matches = 0
668                 for el, i in afe
669                         if el.type is TYPE_AFE_MARKER
670                                 break
671                         if el.name is new_el.name and el.namespace is new_el.namespace
672                                 attrs_match = true
673                                 for k, v of el.attrs
674                                         unless new_el.attrs[k] is v
675                                                 attrs_match = false
676                                                 break
677                                 if attrs_match
678                                         for k, v of new_el.attrs
679                                                 unless el.attrs[k] is v
680                                                         attrs_match = false
681                                                         break
682                                 if attrs_match
683                                         matches += 1
684                                         if matches is 3
685                                                 afe.splice i, 1
686                                                 break
687                 afe.unshift new_el
688                 return
689
690         afe_push_marker = ->
691                 afe.unshift new_afe_marker()
692                 return
693
694         # the functions below impliment the Tree Contstruction algorithm
695         # http://www.w3.org/TR/html5/syntax.html#tree-construction
696
697         # But first... the helpers
698         template_tag_is_open = ->
699                 for el in open_els
700                         if el.name is 'template' and el.namespace is NS_HTML
701                                 return true
702                 return false
703         is_in_scope_x = (tag_name, scope, namespace) ->
704                 for el in open_els
705                         if el.name is tag_name and (namespace is null or namespace is el.namespace)
706                                 return true
707                         if scope[el.name] is el.namespace
708                                 return false
709                 return false
710         is_in_scope_x_y = (tag_name, scope, scope2, namespace) ->
711                 for el in open_els
712                         if el.name is tag_name and (namespace is null or namespace is el.namespace)
713                                 return true
714                         if scope[el.name] is el.namespace
715                                 return false
716                         if scope2[el.name] is el.namespace
717                                 return false
718                 return false
719         standard_scopers = {
720                 applet: NS_HTML, caption: NS_HTML, html: NS_HTML, table: NS_HTML,
721                 td: NS_HTML, th: NS_HTML, marquee: NS_HTML, object: NS_HTML,
722                 template: NS_HTML,
723
724                 mi: NS_MATHML, mo: NS_MATHML, mn: NS_MATHML, ms: NS_MATHML,
725                 mtext: NS_MATHML, 'annotation-xml': NS_MATHML,
726
727                 foreignObject: NS_SVG, desc: NS_SVG, title: NS_SVG
728         }
729         button_scopers = button: NS_HTML
730         li_scopers = ol: NS_HTML, ul: NS_HTML
731         table_scopers = html: NS_HTML, table: NS_HTML, template: NS_HTML
732         is_in_scope = (tag_name, namespace = null) ->
733                 return is_in_scope_x tag_name, standard_scopers, namespace
734         is_in_button_scope = (tag_name, namespace = null) ->
735                 return is_in_scope_x_y tag_name, standard_scopers, button_scopers, namespace
736         is_in_table_scope = (tag_name, namespace = null) ->
737                 return is_in_scope_x tag_name, table_scopers, namespace
738         # aka is_in_list_item_scope
739         is_in_li_scope = (tag_name, namespace = null) ->
740                 return is_in_scope_x_y tag_name, standard_scopers, li_scopers, namespace
741         is_in_select_scope = (tag_name, namespace = null) ->
742                 for t in open_els
743                         if t.name is tag_name and (namespace is null or namespace is t.namespace)
744                                 return true
745                         if t.namespace isnt NS_HTML and t.name isnt 'optgroup' and t.name isnt 'option'
746                                 return false
747                 return false
748         # this checks for a particular element, not by name
749         # this requires a namespace match
750         el_is_in_scope = (needle) ->
751                 for el in open_els
752                         if el is needle
753                                 return true
754                         if standard_scopers[el.name] is el.namespace
755                                 return false
756                 return false
757
758         clear_to_table_stopers = {
759                 'table': true
760                 'template': true
761                 'html': true
762         }
763         clear_stack_to_table_context = ->
764                 loop
765                         if clear_to_table_stopers[open_els[0].name]?
766                                 break
767                         open_els.shift()
768                 return
769         clear_to_table_body_stopers = {
770                 tbody: NS_HTML
771                 tfoot: NS_HTML
772                 thead: NS_HTML
773                 template: NS_HTML
774                 html: NS_HTML
775         }
776         clear_stack_to_table_body_context = ->
777                 loop
778                         if clear_to_table_body_stopers[open_els[0].name] is open_els[0].namespace
779                                 break
780                         open_els.shift()
781                 return
782         clear_to_table_row_stopers = {
783                 'tr': true
784                 'template': true
785                 'html': true
786         }
787         clear_stack_to_table_row_context = ->
788                 loop
789                         if clear_to_table_row_stopers[open_els[0].name]?
790                                 break
791                         open_els.shift()
792                 return
793         clear_afe_to_marker = ->
794                 loop
795                         return unless afe.length > 0 # this happens in fragment case, ?spec error
796                         el = afe.shift()
797                         if el.type is TYPE_AFE_MARKER
798                                 return
799                 return
800
801         # 8.2.3.1 ...
802         # http://www.w3.org/TR/html5/syntax.html#reset-the-insertion-mode-appropriately
803         reset_ins_mode = ->
804                 # 1. Let last be false.
805                 last = false
806                 # 2. Let node be the last node in the stack of open elements.
807                 node_i = 0
808                 node = open_els[node_i]
809                 # 3. Loop: If node is the first node in the stack of open elements,
810                 # then set last to true, and, if the parser was originally created as
811                 # part of the HTML fragment parsing algorithm (fragment case) set node
812                 # to the context element.
813                 loop
814                         if node_i is open_els.length - 1
815                                 last = true
816                                 if flag_fragment_parsing
817                                         node = context_element
818                         # 4. If node is a select element, run these substeps:
819                         if node.name is 'select' and node.namespace is NS_HTML
820                                 # 1. If last is true, jump to the step below labeled done.
821                                 unless last
822                                         # 2. Let ancestor be node.
823                                         ancestor_i = node_i
824                                         ancestor = node
825                                         # 3. Loop: If ancestor is the first node in the stack of
826                                         # open elements, jump to the step below labeled done.
827                                         loop
828                                                 if ancestor_i is open_els.length - 1
829                                                         break
830                                                 # 4. Let ancestor be the node before ancestor in the stack
831                                                 # of open elements.
832                                                 ancestor_i += 1
833                                                 ancestor = open_els[ancestor_i]
834                                                 # 5. If ancestor is a template node, jump to the step below
835                                                 # labeled done.
836                                                 if ancestor.name is 'template' and ancestor.namespace is NS_HTML
837                                                         break
838                                                 # 6. If ancestor is a table node, switch the insertion mode
839                                                 # to "in select in table" and abort these steps.
840                                                 if ancestor.name is 'table' and ancestor.namespace is NS_HTML
841                                                         ins_mode = ins_mode_in_select_in_table
842                                                         return
843                                                 # 7. Jump back to the step labeled loop.
844                                 # 8. Done: Switch the insertion mode to "in select" and abort
845                                 # these steps.
846                                 ins_mode = ins_mode_in_select
847                                 return
848                         # 5. If node is a td or th element and last is false, then switch
849                         # the insertion mode to "in cell" and abort these steps.
850                         if (node.name is 'td' or node.name is 'th') and node.namespace is NS_HTML and last is false
851                                 ins_mode = ins_mode_in_cell
852                                 return
853                         # 6. If node is a tr element, then switch the insertion mode to "in
854                         # row" and abort these steps.
855                         if node.name is 'tr' and node.namespace is NS_HTML
856                                 ins_mode = ins_mode_in_row
857                                 return
858                         # 7. If node is a tbody, thead, or tfoot element, then switch the
859                         # insertion mode to "in table body" and abort these steps.
860                         if (node.name is 'tbody' or node.name is 'thead' or node.name is 'tfoot') and node.namespace is NS_HTML
861                                 ins_mode = ins_mode_in_table_body
862                                 return
863                         # 8. If node is a caption element, then switch the insertion mode
864                         # to "in caption" and abort these steps.
865                         if node.name is 'caption' and node.namespace is NS_HTML
866                                 ins_mode = ins_mode_in_caption
867                                 return
868                         # 9. If node is a colgroup element, then switch the insertion mode
869                         # to "in column group" and abort these steps.
870                         if node.name is 'colgroup' and node.namespace is NS_HTML
871                                 ins_mode = ins_mode_in_column_group
872                                 return
873                         # 10. If node is a table element, then switch the insertion mode to
874                         # "in table" and abort these steps.
875                         if node.name is 'table' and node.namespace is NS_HTML
876                                 ins_mode = ins_mode_in_table
877                                 return
878                         # 11. If node is a template element, then switch the insertion mode
879                         # to the current template insertion mode and abort these steps.
880                         if node.name is 'template' and node.namespace is NS_HTML
881                                 ins_mode = template_ins_modes[0]
882                                 return
883                         # 12. If node is a head element and last is true, then switch the
884                         # insertion mode to "in body" ("in body"! not "in head"!) and abort
885                         # these steps. (fragment case)
886                         if node.name is 'head' and node.namespace is NS_HTML and last
887                                 ins_mode = ins_mode_in_body
888                                 return
889                         # 13. If node is a head element and last is false, then switch the
890                         # insertion mode to "in head" and abort these steps.
891                         if node.name is 'head' and node.namespace is NS_HTML and last is false
892                                 ins_mode = ins_mode_in_head
893                                 return
894                         # 14. If node is a body element, then switch the insertion mode to
895                         # "in body" and abort these steps.
896                         if node.name is 'body' and node.namespace is NS_HTML
897                                 ins_mode = ins_mode_in_body
898                                 return
899                         # 15. If node is a frameset element, then switch the insertion mode
900                         # to "in frameset" and abort these steps. (fragment case)
901                         if node.name is 'frameset' and node.namespace is NS_HTML
902                                 ins_mode = ins_mode_in_frameset
903                                 return
904                         # 16. If node is an html element, run these substeps:
905                         if node.name is 'html' and node.namespace is NS_HTML
906                                 # 1. If the head element pointer is null, switch the insertion
907                                 # mode to "before head" and abort these steps. (fragment case)
908                                 if head_element_pointer is null
909                                         ins_mode = ins_mode_before_head
910                                 else
911                                         # 2. Otherwise, the head element pointer is not null,
912                                         # switch the insertion mode to "after head" and abort these
913                                         # steps.
914                                         ins_mode = ins_mode_after_head
915                                 return
916                         # 17. If last is true, then switch the insertion mode to "in body"
917                         # and abort these steps. (fragment case)
918                         if last
919                                 ins_mode = ins_mode_in_body
920                                 return
921                         # 18. Let node now be the node before node in the stack of open
922                         # elements.
923                         node_i += 1
924                         node = open_els[node_i]
925                         # 19. Return to the step labeled loop.
926                 return
927
928         # 8.2.3.2
929
930         # http://www.w3.org/TR/html5/syntax.html#adjusted-current-node
931         adjusted_current_node = ->
932                 if open_els.length is 1 and flag_fragment_parsing
933                         return context_element
934                 return open_els[0]
935
936         # http://www.w3.org/TR/html5/syntax.html#reconstruct-the-active-formatting-elements
937         # this implementation is structured (mostly) as described at the link above.
938         # capitalized comments are the "labels" described at the link above.
939         reconstruct_afe = ->
940                 return if afe.length is 0
941                 if afe[0].type is TYPE_AFE_MARKER or afe[0] in open_els
942                         return
943                 # Rewind
944                 i = 0
945                 loop
946                         if i is afe.length - 1
947                                 break
948                         i += 1
949                         if afe[i].type is TYPE_AFE_MARKER or afe[i] in open_els
950                                 i -= 1 # Advance
951                                 break
952                 # Create
953                 loop
954                         el = insert_html_element afe[i].token
955                         afe[i] = el
956                         break if i is 0
957                         i -= 1 # Advance
958                 return
959
960         # http://www.w3.org/TR/html5/syntax.html#adoption-agency-algorithm
961         # adoption agency algorithm
962         # overview here:
963         #   http://www.w3.org/TR/html5/syntax.html#misnested-tags:-b-i-/b-/i
964         #   http://www.w3.org/TR/html5/syntax.html#misnested-tags:-b-p-/b-/p
965         #   http://www.w3.org/TR/html5/syntax.html#unclosed-formatting-elements
966         adoption_agency = (subject) ->
967 # this block implements tha W3C spec
968 #               # 1. If the current node is an HTML element whose tag name is subject,
969 #               # then run these substeps:
970 #               #
971 #               # 1. Let element be the current node.
972 #               #
973 #               # 2. Pop element off the stack of open elements.
974 #               #
975 #               # 3. If element is also in the list of active formatting elements,
976 #               # remove the element from the list.
977 #               #
978 #               # 4. Abort the adoption agency algorithm.
979 #               if open_els[0].name is subject and open_els[0].namespace is NS_HTML
980 #                       el = open_els.shift()
981 #                       # remove it from the list of active formatting elements (if found)
982 #                       for t, i in afe
983 #                               if t is el
984 #                                       afe.splice i, 1
985 #                                       break
986 #                       return
987 # WHATWG: https://html.spec.whatwg.org/multipage/syntax.html#adoption-agency-algorithm
988                 # If the current node is an HTML element whose tag name is subject, and
989                 # the current node is not in the list of active formatting elements,
990                 # then pop the current node off the stack of open elements, and abort
991                 # these steps.
992                 if open_els[0].name is subject and open_els[0].namespace is NS_HTML
993                         # remove it from the list of active formatting elements (if found)
994                         in_afe = false
995                         for el, i in afe
996                                 if el is open_els[0]
997                                         in_afe = true
998                                         break
999                         unless in_afe
1000                                 open_els.shift()
1001                                 return
1002                         # fall through
1003 # END WHATWG
1004                 outer = 0
1005                 loop
1006                         if outer >= 8
1007                                 return
1008                         outer += 1
1009                         # 5. Let formatting element be the last element in the list of
1010                         # active formatting elements that: is between the end of the list
1011                         # and the last scope marker in the list, if any, or the start of
1012                         # the list otherwise, and  has the tag name subject.
1013                         fe = null
1014                         for t, fe_of_afe in afe
1015                                 if t.type is TYPE_AFE_MARKER
1016                                         break
1017                                 if t.name is subject
1018                                         fe = t
1019                                         break
1020                         # If there is no such element, then abort these steps and instead
1021                         # act as described in the "any other end tag" entry above.
1022                         if fe is null
1023                                 in_body_any_other_end_tag subject
1024                                 return
1025                         # 6. If formatting element is not in the stack of open elements,
1026                         # then this is a parse error; remove the element from the list, and
1027                         # abort these steps.
1028                         in_open_els = false
1029                         for t, fe_of_open_els in open_els
1030                                 if t is fe
1031                                         in_open_els = true
1032                                         break
1033                         unless in_open_els
1034                                 parse_error()
1035                                 # "remove it from the list" must mean afe, since it's not in open_els
1036                                 afe.splice fe_of_afe, 1
1037                                 return
1038                         # 7. If formatting element is in the stack of open elements, but
1039                         # the element is not in scope, then this is a parse error; abort
1040                         # these steps.
1041                         unless el_is_in_scope fe
1042                                 parse_error()
1043                                 return
1044                         # 8. If formatting element is not the current node, this is a parse
1045                         # error. (But do not abort these steps.)
1046                         unless open_els[0] is fe
1047                                 parse_error()
1048                                 # continue
1049                         # 9. Let furthest block be the topmost node in the stack of open
1050                         # elements that is lower in the stack than formatting element, and
1051                         # is an element in the special category. There might not be one.
1052                         fb = null
1053                         fb_of_open_els = null
1054                         for t, i in open_els
1055                                 if t is fe
1056                                         break
1057                                 if el_is_special t
1058                                         fb = t
1059                                         fb_of_open_els = i
1060                                         # and continue, to see if there's one that's more "topmost"
1061                         # 10. If there is no furthest block, then the UA must first pop all
1062                         # the nodes from the bottom of the stack of open elements, from the
1063                         # current node up to and including formatting element, then remove
1064                         # formatting element from the list of active formatting elements,
1065                         # and finally abort these steps.
1066                         if fb is null
1067                                 loop
1068                                         t = open_els.shift()
1069                                         if t is fe
1070                                                 afe.splice fe_of_afe, 1
1071                                                 return
1072                         # 11. Let common ancestor be the element immediately above
1073                         # formatting element in the stack of open elements.
1074                         ca = open_els[fe_of_open_els + 1] # common ancestor
1075
1076                         node_above = open_els[fb_of_open_els + 1] # next node if node isn't in open_els anymore
1077                         # 12. Let a bookmark note the position of formatting element in the list of active formatting elements relative to the elements on either side of it in the list.
1078                         bookmark = new_aaa_bookmark()
1079                         for t, i in afe
1080                                 if t is fe
1081                                         afe.splice i, 0, bookmark
1082                                         break
1083                         node = last_node = fb
1084                         inner = 0
1085                         loop
1086                                 inner += 1
1087                                 # 3. Let node be the element immediately above node in the
1088                                 # stack of open elements, or if node is no longer in the stack
1089                                 # of open elements (e.g. because it got removed by this
1090                                 # algorithm), the element that was immediately above node in
1091                                 # the stack of open elements before node was removed.
1092                                 node_next = null
1093                                 for t, i in open_els
1094                                         if t is node
1095                                                 node_next = open_els[i + 1]
1096                                                 break
1097                                 node = node_next ? node_above
1098                                 # TODO make sure node_above gets re-set if/when node is removed from open_els
1099
1100                                 # 4. If node is formatting element, then go to the next step in
1101                                 # the overall algorithm.
1102                                 if node is fe
1103                                         break
1104                                 # 5. If inner loop counter is greater than three and node is in
1105                                 # the list of active formatting elements, then remove node from
1106                                 # the list of active formatting elements.
1107                                 node_in_afe = false
1108                                 for t, i in afe
1109                                         if t is node
1110                                                 if inner > 3
1111                                                         afe.splice i, 1
1112                                                 else
1113                                                         node_in_afe = true
1114                                                 break
1115                                 # 6. If node is not in the list of active formatting elements,
1116                                 # then remove node from the stack of open elements and then go
1117                                 # back to the step labeled inner loop.
1118                                 unless node_in_afe
1119                                         for t, i in open_els
1120                                                 if t is node
1121                                                         node_above = open_els[i + 1]
1122                                                         open_els.splice i, 1
1123                                                         break
1124                                         continue
1125                                 # 7. create an element for the token for which the element node
1126                                 # was created, in the HTML namespace, with common ancestor as
1127                                 # the intended parent; replace the entry for node in the list
1128                                 # of active formatting elements with an entry for the new
1129                                 # element, replace the entry for node in the stack of open
1130                                 # elements with an entry for the new element, and let node be
1131                                 # the new element.
1132                                 new_node = token_to_element node.token, NS_HTML, ca
1133                                 for t, i in afe
1134                                         if t is node
1135                                                 afe[i] = new_node
1136                                                 break
1137                                 for t, i in open_els
1138                                         if t is node
1139                                                 node_above = open_els[i + 1]
1140                                                 open_els[i] = new_node
1141                                                 break
1142                                 node = new_node
1143                                 # 8. If last node is furthest block, then move the
1144                                 # aforementioned bookmark to be immediately after the new node
1145                                 # in the list of active formatting elements.
1146                                 if last_node is fb
1147                                         for t, i in afe
1148                                                 if t is bookmark
1149                                                         afe.splice i, 1
1150                                                         break
1151                                         for t, i in afe
1152                                                 if t is node
1153                                                         # "after" means lower
1154                                                         afe.splice i, 0, bookmark # "after as <-
1155                                                         break
1156                                 # 9. Insert last node into node, first removing it from its
1157                                 # previous parent node if any.
1158                                 if last_node.parent?
1159                                         for c, i in last_node.parent.children
1160                                                 if c is last_node
1161                                                         last_node.parent.children.splice i, 1
1162                                                         break
1163                                 node.children.push last_node
1164                                 last_node.parent = node
1165                                 # 10. Let last node be node.
1166                                 last_node = node
1167                                 # 11. Return to the step labeled inner loop.
1168                         # 14. Insert whatever last node ended up being in the previous step
1169                         # at the appropriate place for inserting a node, but using common
1170                         # ancestor as the override target.
1171
1172                         # In the case where fe is immediately followed by fb:
1173                         #   * inner loop exits out early (node==fe)
1174                         #   * last_node is fb
1175                         #   * last_node is still in the tree (not a duplicate)
1176                         if last_node.parent?
1177                                 for c, i in last_node.parent.children
1178                                         if c is last_node
1179                                                 last_node.parent.children.splice i, 1
1180                                                 break
1181                         # can't use standard insert token thing, because it's already in
1182                         # open_els and must stay at it's current position in open_els
1183                         dest = adjusted_insertion_location ca
1184                         dest[0].children.splice dest[1], 0, last_node
1185                         last_node.parent = dest[0]
1186                         # 15. Create an element for the token for which formatting element
1187                         # was created, in the HTML namespace, with furthest block as the
1188                         # intended parent.
1189                         new_element = token_to_element fe.token, NS_HTML, fb
1190                         # 16. Take all of the child nodes of furthest block and append them
1191                         # to the element created in the last step.
1192                         while fb.children.length
1193                                 t = fb.children.shift()
1194                                 t.parent = new_element
1195                                 new_element.children.push t
1196                         # 17. Append that new element to furthest block.
1197                         new_element.parent = fb
1198                         fb.children.push new_element
1199                         # 18. Remove formatting element from the list of active formatting
1200                         # elements, and insert the new element into the list of active
1201                         # formatting elements at the position of the aforementioned
1202                         # bookmark.
1203                         for t, i in afe
1204                                 if t is fe
1205                                         afe.splice i, 1
1206                                         break
1207                         for t, i in afe
1208                                 if t is bookmark
1209                                         afe[i] = new_element
1210                                         break
1211                         # 19. Remove formatting element from the stack of open elements,
1212                         # and insert the new element into the stack of open elements
1213                         # immediately below the position of furthest block in that stack.
1214                         for t, i in open_els
1215                                 if t is fe
1216                                         open_els.splice i, 1
1217                                         break
1218                         for t, i in open_els
1219                                 if t is fb
1220                                         open_els.splice i, 0, new_element
1221                                         break
1222                         # 20. Jump back to the step labeled outer loop.
1223                 return
1224
1225         # http://www.w3.org/TR/html5/syntax.html#close-a-p-element
1226         close_p_element = ->
1227                 generate_implied_end_tags 'p' # arg is exception
1228                 unless open_els[0].name is 'p' and open_els[0].namespace is NS_HTML
1229                         parse_error()
1230                 while open_els.length > 1 # just in case
1231                         el = open_els.shift()
1232                         if el.name is 'p' and el.namespace is NS_HTML
1233                                 return
1234                 return
1235         close_p_if_in_button_scope = ->
1236                 if is_in_button_scope 'p', NS_HTML
1237                         close_p_element()
1238                 return
1239
1240         # http://www.w3.org/TR/html5/syntax.html#insert-a-character
1241         # aka insert_a_character = (t) ->
1242         insert_character = (t) ->
1243                 dest = adjusted_insertion_location()
1244                 # fixfull check for Document node
1245                 if dest[1] > 0
1246                         prev = dest[0].children[dest[1] - 1]
1247                         if prev.type is TYPE_TEXT
1248                                 prev.text += t.text
1249                                 return
1250                 dest[0].children.splice dest[1], 0, t
1251                 return
1252
1253         # 8.2.5 http://www.w3.org/TR/html5/syntax.html#tree-construction
1254         process_token = (t) ->
1255                 acn = adjusted_current_node()
1256                 unless acn?
1257                         ins_mode t
1258                         return
1259                 if acn.namespace is NS_HTML
1260                         ins_mode t
1261                         return
1262                 if is_mathml_text_integration_point(acn)
1263                         if t.type is TYPE_START_TAG and not (t.name is 'mglyph' or t.name is 'malignmark')
1264                                 ins_mode t
1265                                 return
1266                         if t.type is TYPE_TEXT
1267                                 ins_mode t
1268                                 return
1269                 if acn.namespace is NS_MATHML and acn.name is 'annotation-xml' and t.type is TYPE_START_TAG and t.name is 'svg'
1270                         ins_mode t
1271                         return
1272                 if is_html_integration acn
1273                         if t.type is TYPE_START_TAG or t.type is TYPE_TEXT
1274                                 ins_mode t
1275                                 return
1276                 if t.type is TYPE_EOF
1277                         ins_mode t
1278                         return
1279                 in_foreign_content t
1280                 return
1281
1282         # 8.2.5.1
1283         # http://www.w3.org/TR/html5/syntax.html#creating-and-inserting-nodes
1284         # http://www.w3.org/TR/html5/syntax.html#appropriate-place-for-inserting-a-node
1285         adjusted_insertion_location = (override_target = null) ->
1286                 # 1. If there was an override target specified, then let target be the
1287                 # override target.
1288                 if override_target?
1289                         target = override_target
1290                 else # Otherwise, let target be the current node.
1291                         target = open_els[0]
1292                 # 2. Determine the adjusted insertion location using the first matching
1293                 # steps from the following list:
1294                 #
1295                 # If foster parenting is enabled and target is a table, tbody, tfoot,
1296                 # thead, or tr element Foster parenting happens when content is
1297                 # misnested in tables.
1298                 if flag_foster_parenting and foster_parenting_targets[target.name] is target.namespace
1299                         loop # once. this is here so we can ``break`` to "abort these substeps"
1300                                 # 1. Let last template be the last template element in the
1301                                 # stack of open elements, if any.
1302                                 last_template = null
1303                                 last_template_i = null
1304                                 for el, i in open_els
1305                                         if el.name is 'template' and el.namespace is NS_HTML
1306                                                 last_template = el
1307                                                 last_template_i = i
1308                                                 break
1309                                 # 2. Let last table be the last table element in the stack of
1310                                 # open elements, if any.
1311                                 last_table = null
1312                                 last_table_i
1313                                 for el, i in open_els
1314                                         if el.name is 'table' and el.namespace is NS_HTML
1315                                                 last_table = el
1316                                                 last_table_i = i
1317                                                 break
1318                                 # 3. If there is a last template and either there is no last
1319                                 # table, or there is one, but last template is lower (more
1320                                 # recently added) than last table in the stack of open
1321                                 # elements, then: let adjusted insertion location be inside
1322                                 # last template's template contents, after its last child (if
1323                                 # any), and abort these substeps.
1324                                 if last_template and (last_table is null or last_template_i < last_table_i)
1325                                         target = last_template # fixfull should be it's contents
1326                                         target_i = target.children.length
1327                                         break
1328                                 # 4. If there is no last table, then let adjusted insertion
1329                                 # location be inside the first element in the stack of open
1330                                 # elements (the html element), after its last child (if any),
1331                                 # and abort these substeps. (fragment case)
1332                                 if last_table is null
1333                                         # this is odd
1334                                         target = open_els[open_els.length - 1]
1335                                         target_i = target.children.length
1336                                         break
1337                                 # 5. If last table has a parent element, then let adjusted
1338                                 # insertion location be inside last table's parent element,
1339                                 # immediately before last table, and abort these substeps.
1340                                 if last_table.parent?
1341                                         for c, i in last_table.parent.children
1342                                                 if c is last_table
1343                                                         target = last_table.parent
1344                                                         target_i = i
1345                                                         break
1346                                         break
1347                                 # 6. Let previous element be the element immediately above last
1348                                 # table in the stack of open elements.
1349                                 #
1350                                 # huh? how could it not have a parent?
1351                                 previous_element = open_els[last_table_i + 1]
1352                                 # 7. Let adjusted insertion location be inside previous
1353                                 # element, after its last child (if any).
1354                                 target = previous_element
1355                                 target_i = target.children.length
1356                                 # Note: These steps are involved in part because it's possible
1357                                 # for elements, the table element in this case in particular,
1358                                 # to have been moved by a script around in the DOM, or indeed
1359                                 # removed from the DOM entirely, after the element was inserted
1360                                 # by the parser.
1361                                 break # don't really loop
1362                 else
1363                         # Otherwise Let adjusted insertion location be inside target, after
1364                         # its last child (if any).
1365                         target_i = target.children.length
1366
1367                 # 3. If the adjusted insertion location is inside a template element,
1368                 # let it instead be inside the template element's template contents,
1369                 # after its last child (if any).
1370                 # fixfull (template)
1371
1372                 # 4. Return the adjusted insertion location.
1373                 return [target, target_i]
1374
1375         # http://www.w3.org/TR/html5/syntax.html#create-an-element-for-the-token
1376         # aka create_an_element_for_token
1377         token_to_element = (t, namespace, intended_parent) ->
1378                 # convert attributes into a hash
1379                 attrs = {}
1380                 for a in t.attrs_a
1381                         attrs[a[0]] = a[1] # TODO check what to do with dupilcate attrs
1382                 el = new Node TYPE_TAG, name: t.name, namespace: namespace, attrs: attrs, token: t
1383
1384                 # TODO 2. If the newly created element has an xmlns attribute in the
1385                 # XMLNS namespace whose value is not exactly the same as the element's
1386                 # namespace, that is a parse error. Similarly, if the newly created
1387                 # element has an xmlns:xlink attribute in the XMLNS namespace whose
1388                 # value is not the XLink Namespace, that is a parse error.
1389
1390                 # fixfull: the spec says stuff about form pointers and ownerDocument
1391
1392                 return el
1393
1394         # http://www.w3.org/TR/html5/syntax.html#insert-a-foreign-element
1395         insert_foreign_element = (token, namespace) ->
1396                 ail = adjusted_insertion_location()
1397                 ail_el = ail[0]
1398                 ail_i = ail[1]
1399                 el = token_to_element token, namespace, ail_el
1400                 # TODO skip this next step if it's broken (eg ail_el is document with child already)
1401                 el.parent = ail_el
1402                 ail_el.children.splice ail_i, 0, el
1403                 open_els.unshift el
1404                 return el
1405         # http://www.w3.org/TR/html5/syntax.html#insert-an-html-element
1406         insert_html_element = (token) ->
1407                 return insert_foreign_element token, NS_HTML
1408
1409         # http://www.w3.org/TR/html5/syntax.html#insert-a-comment
1410         # position should be [node, index_within_children]
1411         insert_comment = (t, position = null) ->
1412                 position ?= adjusted_insertion_location()
1413                 position[0].children.splice position[1], 0, t
1414                 return
1415
1416         # 8.2.5.2
1417         # http://www.w3.org/TR/html5/syntax.html#generic-raw-text-element-parsing-algorithm
1418         parse_generic_raw_text = (t) ->
1419                 insert_html_element t
1420                 tok_state = tok_state_rawtext
1421                 original_ins_mode = ins_mode
1422                 ins_mode = ins_mode_text
1423                 return
1424         parse_generic_rcdata_text = (t) ->
1425                 insert_html_element t
1426                 tok_state = tok_state_rcdata
1427                 original_ins_mode = ins_mode
1428                 ins_mode = ins_mode_text
1429                 return
1430
1431         # 8.2.5.3 http://www.w3.org/TR/html5/syntax.html#closing-elements-that-have-implied-end-tags
1432         # http://www.w3.org/TR/html5/syntax.html#generate-implied-end-tags
1433         generate_implied_end_tags = (except = null) ->
1434                 while end_tag_implied[open_els[0].name] is open_els[0].namespace and open_els[0].name isnt except
1435                         open_els.shift()
1436                 return
1437
1438         # 8.2.5.4 The rules for parsing tokens in HTML content
1439         # http://www.w3.org/TR/html5/syntax.html#parsing-main-inhtml
1440
1441         # 8.2.5.4.1 The "initial" insertion mode
1442         # http://www.w3.org/TR/html5/syntax.html#the-initial-insertion-mode
1443         is_quirks_yes_doctype = (t) ->
1444                 if t.flag 'force-quirks'
1445                         return true
1446                 if t.name isnt 'html'
1447                         return true
1448                 if t.public_identifier?
1449                         pi = t.public_identifier.toLowerCase()
1450                         for p in quirks_yes_pi_prefixes
1451                                 if pi.substr(0, p.length) is p
1452                                         return true
1453                         if pi is '-//w3o//dtd w3 html strict 3.0//en//' or pi is '-/w3c/dtd html 4.0 transitional/en' or pi is 'html'
1454                                 return true
1455                 if t.system_identifier?
1456                         if t.system_identifier.toLowerCase() is 'http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd'
1457                                 return true
1458                 else if t.public_identifier?
1459                         # already did this: pi = t.public_identifier.toLowerCase()
1460                         if pi.substr(0, 32) is '-//w3c//dtd html 4.01 frameset//' or pi.substr(0, 36) is '-//w3c//dtd html 4.01 transitional//'
1461                                 return true
1462                 return false
1463         is_quirks_limited_doctype = (t) ->
1464                 if t.public_identifier?
1465                         pi = t.public_identifier.toLowerCase()
1466                         if pi.substr(0, 32) is '-//w3c//dtd xhtml 1.0 frameset//' or pi.substr(0, 36) is '-//w3c//dtd xhtml 1.0 transitional//'
1467                                 return true
1468                         if t.system_identifier?
1469                                 if pi.substr(0, 32) is '-//w3c//dtd html 4.01 frameset//' or pi.substr(0, 36) is '-//w3c//dtd html 4.01 transitional//'
1470                                         return true
1471                 return false
1472         ins_mode_initial = (t) ->
1473                 if is_space_tok t
1474                         return
1475                 if t.type is TYPE_COMMENT
1476                         # ?fixfull
1477                         doc.children.push t
1478                         return
1479                 if t.type is TYPE_DOCTYPE
1480                         # fixfull syntax error from first paragraph and following bullets
1481                         # fixfull set doc.doctype
1482                         # fixfull is the "not an iframe srcdoc" thing relevant?
1483                         if is_quirks_yes_doctype t
1484                                 doc.flag 'quirks mode', QUIRKS_YES
1485                         else if is_quirks_limited_doctype t
1486                                 doc.flag 'quirks mode', QUIRKS_LIMITED
1487                         doc.children.push t
1488                         ins_mode = ins_mode_before_html
1489                         return
1490                 # Anything else
1491                 # fixfull not iframe srcdoc?
1492                 parse_error()
1493                 doc.flag 'quirks mode', QUIRKS_YES
1494                 ins_mode = ins_mode_before_html
1495                 process_token t
1496                 return
1497
1498         # 8.2.5.4.2 http://www.w3.org/TR/html5/syntax.html#the-before-html-insertion-mode
1499         ins_mode_before_html = (t) ->
1500                 if t.type is TYPE_DOCTYPE
1501                         parse_error()
1502                         return
1503                 if t.type is TYPE_COMMENT
1504                         doc.children.push t
1505                         return
1506                 if is_space_tok t
1507                         return
1508                 if t.type is TYPE_START_TAG and t.name is 'html'
1509                         el = token_to_element t, NS_HTML, doc
1510                         doc.children.push el
1511                         el.document = doc
1512                         open_els.unshift(el)
1513                         # fixfull (big paragraph in spec about manifest, fragment, urls, etc)
1514                         ins_mode = ins_mode_before_head
1515                         return
1516                 if t.type is TYPE_END_TAG
1517                         if t.name is 'head' or t.name is 'body' or t.name is 'html' or t.name is 'br'
1518                                 # fall through to "anything else"
1519                         else
1520                                 parse_error()
1521                                 return
1522                 # Anything else
1523                 el = token_to_element new_open_tag('html'), NS_HTML, doc
1524                 doc.children.push el
1525                 el.document = doc
1526                 open_els.unshift el
1527                 # ?fixfull browsing context
1528                 ins_mode = ins_mode_before_head
1529                 process_token t
1530                 return
1531
1532         # 8.2.5.4.3 http://www.w3.org/TR/html5/syntax.html#the-before-head-insertion-mode
1533         ins_mode_before_head = (t) ->
1534                 if is_space_tok t
1535                         return
1536                 if t.type is TYPE_COMMENT
1537                         insert_comment t
1538                         return
1539                 if t.type is TYPE_DOCTYPE
1540                         parse_error()
1541                         return
1542                 if t.type is TYPE_START_TAG and t.name is 'html'
1543                         ins_mode_in_body t
1544                         return
1545                 if t.type is TYPE_START_TAG and t.name is 'head'
1546                         el = insert_html_element t
1547                         head_element_pointer = el
1548                         ins_mode = ins_mode_in_head
1549                         return
1550                 if t.type is TYPE_END_TAG
1551                         if t.name is 'head' or t.name is 'body' or t.name is 'html' or t.name is 'br'
1552                                 # fall through to Anything else below
1553                         else
1554                                 parse_error()
1555                                 return
1556                 # Anything else
1557                 el = insert_html_element new_open_tag 'head'
1558                 head_element_pointer = el
1559                 ins_mode = ins_mode_in_head
1560                 process_token t
1561                 return
1562
1563         # 8.2.5.4.4 http://www.w3.org/TR/html5/syntax.html#parsing-main-inhead
1564         ins_mode_in_head_else = (t) -> # factored out for same-as-spec flow control
1565                 open_els.shift() # spec says this will be a 'head' node
1566                 ins_mode = ins_mode_after_head
1567                 process_token t
1568                 return
1569         ins_mode_in_head = (t) ->
1570                 if t.type is TYPE_TEXT and (t.text is "\t" or t.text is "\n" or t.text is "\u000c" or t.text is ' ')
1571                         insert_character t
1572                         return
1573                 if t.type is TYPE_COMMENT
1574                         insert_comment t
1575                         return
1576                 if t.type is TYPE_DOCTYPE
1577                         parse_error()
1578                         return
1579                 if t.type is TYPE_START_TAG and t.name is 'html'
1580                         ins_mode_in_body t
1581                         return
1582                 if t.type is TYPE_START_TAG and (t.name is 'base' or t.name is 'basefont' or t.name is 'bgsound' or t.name is 'link')
1583                         el = insert_html_element t
1584                         open_els.shift()
1585                         t.acknowledge_self_closing()
1586                         return
1587                 if t.type is TYPE_START_TAG and t.name is 'meta'
1588                         el = insert_html_element t
1589                         open_els.shift()
1590                         t.acknowledge_self_closing()
1591                         # fixfull encoding stuff
1592                         return
1593                 if t.type is TYPE_START_TAG and t.name is 'title'
1594                         parse_generic_rcdata_text t
1595                         return
1596                 if t.type is TYPE_START_TAG and ((t.name is 'noscript' and flag_scripting) or t.name is 'noframes' or t.name is 'style')
1597                         parse_generic_raw_text t
1598                         return
1599                 if t.type is TYPE_START_TAG and t.name is 'noscript' and flag_scripting is false
1600                         insert_html_element t
1601                         ins_mode = ins_mode_in_head_noscript
1602                         return
1603                 if t.type is TYPE_START_TAG and t.name is 'script'
1604                         ail = adjusted_insertion_location()
1605                         el = token_to_element t, NS_HTML, ail
1606                         el.flag 'parser-inserted', true
1607                         # fixfull frament case
1608                         ail[0].children.splice ail[1], 0, el
1609                         open_els.unshift el
1610                         tok_state = tok_state_script_data
1611                         original_ins_mode = ins_mode # make sure orig... is defined
1612                         ins_mode = ins_mode_text
1613                         return
1614                 if t.type is TYPE_END_TAG and t.name is 'head'
1615                         open_els.shift() # will be a head element... spec says so
1616                         ins_mode = ins_mode_after_head
1617                         return
1618                 if t.type is TYPE_END_TAG and (t.name is 'body' or t.name is 'html' or t.name is 'br')
1619                         ins_mode_in_head_else t
1620                         return
1621                 if t.type is TYPE_START_TAG and t.name is 'template'
1622                         insert_html_element t
1623                         afe_push_marker()
1624                         flag_frameset_ok = false
1625                         ins_mode = ins_mode_in_template
1626                         template_ins_modes.unshift ins_mode_in_template
1627                         return
1628                 if t.type is TYPE_END_TAG and t.name is 'template'
1629                         if template_tag_is_open()
1630                                 generate_implied_end_tags
1631                                 if open_els[0].name isnt 'template'
1632                                         parse_error()
1633                                 loop
1634                                         el = open_els.shift()
1635                                         if el.name is 'template' and el.namespace is NS_HTML
1636                                                 break
1637                                 clear_afe_to_marker()
1638                                 template_ins_modes.shift()
1639                                 reset_ins_mode()
1640                         else
1641                                 parse_error()
1642                         return
1643                 if (t.type is TYPE_START_TAG and t.name is 'head') or t.type is TYPE_END_TAG
1644                         parse_error()
1645                         return
1646                 ins_mode_in_head_else t
1647                 return
1648
1649         # 8.2.5.4.5 http://www.w3.org/TR/html5/syntax.html#parsing-main-inheadnoscript
1650         ins_mode_in_head_noscript_else = (t) ->
1651                 parse_error()
1652                 open_els.shift()
1653                 ins_mode = ins_mode_in_head
1654                 process_token t
1655                 return
1656         ins_mode_in_head_noscript = (t) ->
1657                 if t.type is TYPE_DOCTYPE
1658                         parse_error()
1659                         return
1660                 if t.type is TYPE_START_TAG and t.name is 'html'
1661                         ins_mode_in_body t
1662                         return
1663                 if t.type is TYPE_END_TAG and t.name is 'noscript'
1664                         open_els.shift()
1665                         ins_mode = ins_mode_in_head
1666                         return
1667                 if is_space_tok(t) or t.type is TYPE_COMMENT or (t.type is TYPE_START_TAG and (t.name is 'basefont' or t.name is 'bgsound' or t.name is 'link' or t.name is 'meta' or t.name is 'noframes' or t.name is 'style'))
1668                         ins_mode_in_head t
1669                         return
1670                 if t.type is TYPE_END_TAG and t.name is 'br'
1671                         ins_mode_in_head_noscript_else t
1672                         return
1673                 if (t.type is TYPE_START_TAG and (t.name is 'head' or t.name is 'noscript')) or t.type is TYPE_END_TAG
1674                         parse_error()
1675                         return
1676                 # Anything else
1677                 ins_mode_in_head_noscript_else t
1678                 return
1679
1680         # 8.2.5.4.6 http://www.w3.org/TR/html5/syntax.html#the-after-head-insertion-mode
1681         ins_mode_after_head_else = (t) ->
1682                 body_tok = new_open_tag 'body'
1683                 insert_html_element body_tok
1684                 ins_mode = ins_mode_in_body
1685                 process_token t
1686                 return
1687         ins_mode_after_head = (t) ->
1688                 if is_space_tok t
1689                         insert_character t
1690                         return
1691                 if t.type is TYPE_COMMENT
1692                         insert_comment t
1693                         return
1694                 if t.type is TYPE_DOCTYPE
1695                         parse_error()
1696                         return
1697                 if t.type is TYPE_START_TAG and t.name is 'html'
1698                         ins_mode_in_body t
1699                         return
1700                 if t.type is TYPE_START_TAG and t.name is 'body'
1701                         insert_html_element t
1702                         flag_frameset_ok = false
1703                         ins_mode = ins_mode_in_body
1704                         return
1705                 if t.type is TYPE_START_TAG and t.name is 'frameset'
1706                         insert_html_element t
1707                         ins_mode = ins_mode_in_frameset
1708                         return
1709                 if t.type is TYPE_START_TAG and (t.name is 'base' or t.name is 'basefont' or t.name is 'bgsound' or t.name is 'link' or t.name is 'meta' or t.name is 'noframes' or t.name is 'script' or t.name is 'style' or t.name is 'template' or t.name is 'title')
1710                         parse_error()
1711                         open_els.unshift head_element_pointer
1712                         ins_mode_in_head t
1713                         for el, i in open_els
1714                                 if el is head_element_pointer
1715                                         open_els.splice i, 1
1716                                         return
1717                         return
1718                 if t.type is TYPE_END_TAG and t.name is 'template'
1719                         ins_mode_in_head t
1720                         return
1721                 if t.type is TYPE_END_TAG and (t.name is 'body' or t.name is 'html' or t.name is 'br')
1722                         ins_mode_after_head_else t
1723                         return
1724                 if (t.type is TYPE_START_TAG and t.name is 'head') or t.type is TYPE_END_TAG
1725                         parse_error()
1726                         return
1727                 # Anything else
1728                 ins_mode_after_head_else t
1729                 return
1730
1731         # 8.2.5.4.7 http://www.w3.org/TR/html5/syntax.html#parsing-main-inbody
1732         in_body_any_other_end_tag = (name) -> # factored out because adoption agency calls it
1733                 node = open_els[0]
1734                 loop
1735                         if node.name is name and node.namespace is NS_HTML
1736                                 generate_implied_end_tags name # arg is exception
1737                                 unless node is open_els[0]
1738                                         parse_error()
1739                                 loop
1740                                         el = open_els.shift()
1741                                         if el is node
1742                                                 return
1743                         if special_elements[node.name] is node.namespace
1744                                 parse_error()
1745                                 return
1746                         for el, i in open_els
1747                                 if node is el
1748                                         node = open_els[i + 1]
1749                                         break
1750                 return
1751         ins_mode_in_body = (t) ->
1752                 if t.type is TYPE_TEXT and t.text is "\u0000"
1753                         parse_error()
1754                         return
1755                 if is_space_tok t
1756                         reconstruct_afe()
1757                         insert_character t
1758                         return
1759                 if t.type is TYPE_TEXT
1760                         reconstruct_afe()
1761                         insert_character t
1762                         flag_frameset_ok = false
1763                         return
1764                 if t.type is TYPE_COMMENT
1765                         insert_comment t
1766                         return
1767                 if t.type is TYPE_DOCTYPE
1768                         parse_error()
1769                         return
1770                 if t.type is TYPE_START_TAG and t.name is 'html'
1771                         parse_error()
1772                         return if template_tag_is_open()
1773                         root_attrs = open_els[open_els.length - 1].attrs
1774                         for a in t.attrs_a
1775                                 root_attrs[a[0]] = a[1] unless root_attrs[a[0]]?
1776                         return
1777
1778                 if (t.type is TYPE_START_TAG and (t.name is 'base' or t.name is 'basefont' or t.name is 'bgsound' or t.name is 'link' or t.name is 'meta' or t.name is 'noframes' or t.name is 'script' or t.name is 'style' or t.name is 'template' or t.name is 'title')) or (t.type is TYPE_END_TAG and t.name is 'template')
1779                         ins_mode_in_head t
1780                         return
1781                 if t.type is TYPE_START_TAG and t.name is 'body'
1782                         parse_error()
1783                         return if open_els.length < 2
1784                         second = open_els[open_els.length - 2]
1785                         return unless second.namespace is NS_HTML
1786                         return unless second.name is 'body'
1787                         return if template_tag_is_open()
1788                         flag_frameset_ok = false
1789                         for a in t.attrs_a
1790                                 second.attrs[a[0]] = a[1] unless second.attrs[a[0]]?
1791                         return
1792                 if t.type is TYPE_START_TAG and t.name is 'frameset'
1793                         parse_error()
1794                         return if open_els.length < 2
1795                         second_i = open_els.length - 2
1796                         second = open_els[second_i]
1797                         return unless second.namespace is NS_HTML
1798                         return unless second.name is 'body'
1799                         if flag_frameset_ok is false
1800                                 return
1801                         if second.parent?
1802                                 for el, i in second.parent.children
1803                                         if el is second
1804                                                 second.parent.children.splice i, 1
1805                                                 break
1806                         open_els.splice second_i, 1
1807                         # pop everything except the "root html element"
1808                         while open_els.length > 1
1809                                 open_els.shift()
1810                         insert_html_element t
1811                         ins_mode = ins_mode_in_frameset
1812                         return
1813                 if t.type is TYPE_EOF
1814                         ok_tags = {
1815                                 dd:NS_HTML, dt:NS_HTML, li:NS_HTML, p:NS_HTML, tbody:NS_HTML,
1816                                 td:NS_HTML, tfoot:NS_HTML, th:NS_HTML, thead:NS_HTML,
1817                                 tr:NS_HTML, body:NS_HTML, html:NS_HTML,
1818                         }
1819                         for el in open_els
1820                                 unless ok_tags[t.name] is el.namespace
1821                                         parse_error()
1822                                         break
1823                         if template_ins_modes.length > 0
1824                                 ins_mode_in_template t
1825                         else
1826                                 stop_parsing()
1827                         return
1828                 if t.type is TYPE_END_TAG and t.name is 'body'
1829                         unless is_in_scope 'body', NS_HTML
1830                                 parse_error()
1831                                 return
1832                         ok_tags = {
1833                                 dd:NS_HTML, dt:NS_HTML, li:NS_HTML, optgroup:NS_HTML,
1834                                 option:NS_HTML, p:NS_HTML, rb:NS_HTML, rp:NS_HTML, rt:NS_HTML,
1835                                 rtc:NS_HTML, tbody:NS_HTML, td:NS_HTML, tfoot:NS_HTML,
1836                                 th:NS_HTML, thead:NS_HTML, tr:NS_HTML, body:NS_HTML,
1837                                 html:NS_HTML
1838                         }
1839                         for el in open_els
1840                                 unless ok_tags[t.name] is el.namespace
1841                                         parse_error()
1842                                         break
1843                         ins_mode = ins_mode_after_body
1844                         return
1845                 if t.type is TYPE_END_TAG and t.name is 'html'
1846                         unless is_in_scope 'body', NS_HTML
1847                                 parse_error()
1848                                 return
1849                         ok_tags = {
1850                                 dd:NS_HTML, dt:NS_HTML, li:NS_HTML, optgroup:NS_HTML,
1851                                 option:NS_HTML, p:NS_HTML, rb:NS_HTML, rp:NS_HTML, rt:NS_HTML,
1852                                 rtc:NS_HTML, tbody:NS_HTML, td:NS_HTML, tfoot:NS_HTML,
1853                                 th:NS_HTML, thead:NS_HTML, tr:NS_HTML, body:NS_HTML,
1854                                 html:NS_HTML
1855                         }
1856                         for el in open_els
1857                                 unless ok_tags[t.name] is el.namespace
1858                                         parse_error()
1859                                         break
1860                         ins_mode = ins_mode_after_body
1861                         process_token t
1862                         return
1863                 if t.type is TYPE_START_TAG and (t.name is 'address' or t.name is 'article' or t.name is 'aside' or t.name is 'blockquote' or t.name is 'center' or t.name is 'details' or t.name is 'dialog' or t.name is 'dir' or t.name is 'div' or t.name is 'dl' or t.name is 'fieldset' or t.name is 'figcaption' or t.name is 'figure' or t.name is 'footer' or t.name is 'header' or t.name is 'hgroup' or t.name is 'main' or t.name is 'nav' or t.name is 'ol' or t.name is 'p' or t.name is 'section' or t.name is 'summary' or t.name is 'ul')
1864                         close_p_if_in_button_scope()
1865                         insert_html_element t
1866                         return
1867                 if t.type is TYPE_START_TAG and h_tags[t.name]?
1868                         close_p_if_in_button_scope()
1869                         if h_tags[open_els[0].name] is open_els[0].namespace
1870                                 parse_error()
1871                                 open_els.shift()
1872                         insert_html_element t
1873                         return
1874                 if t.type is TYPE_START_TAG and (t.name is 'pre' or t.name is 'listing')
1875                         close_p_if_in_button_scope()
1876                         insert_html_element t
1877                         eat_next_token_if_newline()
1878                         flag_frameset_ok = false
1879                         return
1880                 if t.type is TYPE_START_TAG and t.name is 'form'
1881                         unless form_element_pointer is null or template_tag_is_open()
1882                                 parse_error()
1883                                 return
1884                         close_p_if_in_button_scope()
1885                         el = insert_html_element t
1886                         unless template_tag_is_open()
1887                                 form_element_pointer = el
1888                         return
1889                 if t.type is TYPE_START_TAG and t.name is 'li'
1890                         flag_frameset_ok = false
1891                         for node in open_els
1892                                 if node.name is 'li' and node.namespace is NS_HTML
1893                                         generate_implied_end_tags 'li' # arg is exception
1894                                         if open_els[0].name isnt 'li' or open_els[0].namespace isnt NS_HTML
1895                                                 parse_error()
1896                                         loop
1897                                                 el = open_els.shift()
1898                                                 if el.name is 'li' and el.namespace is NS_HTML
1899                                                         break
1900                                         break
1901                                 if el_is_special_not_adp node
1902                                                 break
1903                         close_p_if_in_button_scope()
1904                         insert_html_element t
1905                         return
1906                 if t.type is TYPE_START_TAG and (t.name is 'dd' or t.name is 'dt')
1907                         flag_frameset_ok = false
1908                         for node in open_els
1909                                 if node.name is 'dd' and node.namespace is NS_HTML
1910                                         generate_implied_end_tags 'dd' # arg is exception
1911                                         if open_els[0].name isnt 'dd' or open_els[0].namespace isnt NS_HTML
1912                                                 parse_error()
1913                                         loop
1914                                                 el = open_els.shift()
1915                                                 if el.name is 'dd' and el.namespace is NS_HTML
1916                                                         break
1917                                         break
1918                                 if node.name is 'dt' and node.namespace is NS_HTML
1919                                         generate_implied_end_tags 'dt' # arg is exception
1920                                         if open_els[0].name isnt 'dt' or open_els[0].namespace isnt NS_HTML
1921                                                 parse_error()
1922                                         loop
1923                                                 el = open_els.shift()
1924                                                 if el.name is 'dt' and el.namespace is NS_HTML
1925                                                         break
1926                                         break
1927                                 if el_is_special_not_adp node
1928                                         break
1929                         close_p_if_in_button_scope()
1930                         insert_html_element t
1931                         return
1932                 if t.type is TYPE_START_TAG and t.name is 'plaintext'
1933                         close_p_if_in_button_scope()
1934                         insert_html_element t
1935                         tok_state = tok_state_plaintext
1936                         return
1937                 if t.type is TYPE_START_TAG and t.name is 'button'
1938                         if is_in_scope 'button', NS_HTML
1939                                 parse_error()
1940                                 generate_implied_end_tags()
1941                                 loop
1942                                         el = open_els.shift()
1943                                         if el.name is 'button' and el.namespace is NS_HTML
1944                                                 break
1945                         reconstruct_afe()
1946                         insert_html_element t
1947                         flag_frameset_ok = false
1948                         return
1949                 if t.type is TYPE_END_TAG and (t.name is 'address' or t.name is 'article' or t.name is 'aside' or t.name is 'blockquote' or t.name is 'button' or t.name is 'center' or t.name is 'details' or t.name is 'dialog' or t.name is 'dir' or t.name is 'div' or t.name is 'dl' or t.name is 'fieldset' or t.name is 'figcaption' or t.name is 'figure' or t.name is 'footer' or t.name is 'header' or t.name is 'hgroup' or t.name is 'listing' or t.name is 'main' or t.name is 'nav' or t.name is 'ol' or t.name is 'pre' or t.name is 'section' or t.name is 'summary' or t.name is 'ul')
1950                         unless is_in_scope t.name, NS_HTML
1951                                 parse_error()
1952                                 return
1953                         generate_implied_end_tags()
1954                         unless open_els[0].name is t.name and open_els[0].namespace is NS_HTML
1955                                 parse_error()
1956                         loop
1957                                 el = open_els.shift()
1958                                 if el.name is t.name and el.namespace is NS_HTML
1959                                         return
1960                         return
1961                 if t.type is TYPE_END_TAG and t.name is 'form'
1962                         unless template_tag_is_open()
1963                                 node = form_element_pointer
1964                                 form_element_pointer = null
1965                                 if node is null or not el_is_in_scope node
1966                                         parse_error()
1967                                         return
1968                                 generate_implied_end_tags()
1969                                 if open_els[0] isnt node
1970                                         parse_error()
1971                                 for el, i in open_els
1972                                         if el is node
1973                                                 open_els.splice i, 1
1974                                                 break
1975                         else
1976                                 unless is_in_scope 'form', NS_HTML
1977                                         parse_error()
1978                                         return
1979                                 generate_implied_end_tags()
1980                                 if open_els[0].name isnt 'form' or open_els[0].namespace isnt NS_HTML
1981                                         parse_error()
1982                                 loop
1983                                         el = open_els.shift()
1984                                         if el.name is 'form' and el.namespace is NS_HTML
1985                                                 break
1986                         return
1987                 if t.type is TYPE_END_TAG and t.name is 'p'
1988                         unless is_in_button_scope 'p', NS_HTML
1989                                 parse_error()
1990                                 insert_html_element new_open_tag 'p'
1991                         close_p_element()
1992                         return
1993                 if t.type is TYPE_END_TAG and t.name is 'li'
1994                         unless is_in_li_scope 'li', NS_HTML
1995                                 parse_error()
1996                                 return
1997                         generate_implied_end_tags 'li' # arg is exception
1998                         if open_els[0].name isnt 'li' or open_els[0].namespace isnt NS_HTML
1999                                 parse_error()
2000                         loop
2001                                 el = open_els.shift()
2002                                 if el.name is 'li' and el.namespace is NS_HTML
2003                                         break
2004                         return
2005                 if t.type is TYPE_END_TAG and (t.name is 'dd' or t.name is 'dt')
2006                         unless is_in_scope t.name, NS_HTML
2007                                 parse_error()
2008                                 return
2009                         generate_implied_end_tags t.name # arg is exception
2010                         if open_els[0].name isnt t.name or open_els[0].namespace isnt NS_HTML
2011                                 parse_error()
2012                         loop
2013                                 el = open_els.shift()
2014                                 if el.name is t.name and el.namespace is NS_HTML
2015                                         break
2016                         return
2017                 if t.type is TYPE_END_TAG and h_tags[t.name]?
2018                         h_in_scope = false
2019                         for el in open_els
2020                                 if h_tags[el.name] is el.namespace
2021                                         h_in_scope = true
2022                                         break
2023                                 if standard_scopers[el.name] is el.namespace
2024                                         break
2025                         unless h_in_scope
2026                                 parse_error()
2027                                 return
2028                         generate_implied_end_tags()
2029                         if open_els[0].name isnt t.name or open_els[0].namespace isnt NS_HTML
2030                                 parse_error()
2031                         loop
2032                                 el = open_els.shift()
2033                                 if h_tags[el.name] is el.namespace
2034                                         break
2035                         return
2036                 # deep breath!
2037                 if t.type is TYPE_START_TAG and t.name is 'a'
2038                         # If the list of active formatting elements contains an a element
2039                         # between the end of the list and the last marker on the list (or
2040                         # the start of the list if there is no marker on the list), then
2041                         # this is a parse error; run the adoption agency algorithm for the
2042                         # tag name "a", then remove that element from the list of active
2043                         # formatting elements and the stack of open elements if the
2044                         # adoption agency algorithm didn't already remove it (it might not
2045                         # have if the element is not in table scope).
2046                         found = false
2047                         for el in afe
2048                                 if el.type is TYPE_AFE_MARKER
2049                                         break
2050                                 if el.name is 'a' and el.namespace is NS_HTML
2051                                         found = el
2052                         if found?
2053                                 parse_error()
2054                                 adoption_agency 'a'
2055                                 for el, i in afe
2056                                         if el is found
2057                                                 afe.splice i, 1
2058                                 for el, i in open_els
2059                                         if el is found
2060                                                 open_els.splice i, 1
2061                         reconstruct_afe()
2062                         el = insert_html_element t
2063                         afe_push el
2064                         return
2065                 if t.type is TYPE_START_TAG and (t.name is 'b' or t.name is 'big' or t.name is 'code' or t.name is 'em' or t.name is 'font' or t.name is 'i' or t.name is 's' or t.name is 'small' or t.name is 'strike' or t.name is 'strong' or t.name is 'tt' or t.name is 'u')
2066                         reconstruct_afe()
2067                         el = insert_html_element t
2068                         afe_push el
2069                         return
2070                 if t.type is TYPE_START_TAG and t.name is 'nobr'
2071                         reconstruct_afe()
2072                         if is_in_scope 'nobr', NS_HTML
2073                                 parse_error()
2074                                 adoption_agency 'nobr'
2075                                 reconstruct_afe()
2076                         el = insert_html_element t
2077                         afe_push el
2078                         return
2079                 if t.type is TYPE_END_TAG and (t.name is 'a' or t.name is 'b' or t.name is 'big' or t.name is 'code' or t.name is 'em' or t.name is 'font' or t.name is 'i' or t.name is 'nobr' or t.name is 's' or t.name is 'small' or t.name is 'strike' or t.name is 'strong' or t.name is 'tt' or t.name is 'u')
2080                         adoption_agency t.name
2081                         return
2082                 if t.type is TYPE_START_TAG and (t.name is 'applet' or t.name is 'marquee' or t.name is 'object')
2083                         reconstruct_afe()
2084                         insert_html_element t
2085                         afe_push_marker()
2086                         flag_frameset_ok = false
2087                         return
2088                 if t.type is TYPE_END_TAG and (t.name is 'applet' or t.name is 'marquee' or t.name is 'object')
2089                         unless is_in_scope t.name, NS_HTML
2090                                 parse_error()
2091                                 return
2092                         generate_implied_end_tags()
2093                         if open_els[0].name isnt t.name or open_els[0].namespace isnt NS_HTML
2094                                 parse_error()
2095                         loop
2096                                 el = open_els.shift()
2097                                 if el.name is t.name and el.namespace is NS_HTML
2098                                         break
2099                         clear_afe_to_marker()
2100                         return
2101                 if t.type is TYPE_START_TAG and t.name is 'table'
2102                         unless doc.flag('quirks mode') is QUIRKS_YES
2103                                 close_p_if_in_button_scope() # test
2104                         insert_html_element t
2105                         flag_frameset_ok = false
2106                         ins_mode = ins_mode_in_table
2107                         return
2108                 if t.type is TYPE_END_TAG and t.name is 'br'
2109                         parse_error()
2110                         # W3C: t.type = TYPE_START_TAG
2111                         t = new_open_tag 'br' # WHATWG
2112                         # fall through
2113                 if t.type is TYPE_START_TAG and (t.name is 'area' or t.name is 'br' or t.name is 'embed' or t.name is 'img' or t.name is 'keygen' or t.name is 'wbr')
2114                         reconstruct_afe()
2115                         insert_html_element t
2116                         open_els.shift()
2117                         t.acknowledge_self_closing()
2118                         flag_frameset_ok = false
2119                         return
2120                 if t.type is TYPE_START_TAG and t.name is 'input'
2121                         reconstruct_afe()
2122                         insert_html_element t
2123                         open_els.shift()
2124                         t.acknowledge_self_closing()
2125                         unless is_input_hidden_tok t
2126                                 flag_frameset_ok = false
2127                         return
2128                 if t.type is TYPE_START_TAG and (t.name is 'menuitem' or t.name is 'param' or t.name is 'source' or t.name is 'track')
2129                         # WHATWG adds 'menuitem' for this block
2130                         insert_html_element t
2131                         open_els.shift()
2132                         t.acknowledge_self_closing()
2133                         return
2134                 if t.type is TYPE_START_TAG and t.name is 'hr'
2135                         close_p_if_in_button_scope()
2136                         insert_html_element t
2137                         open_els.shift()
2138                         t.acknowledge_self_closing()
2139                         flag_frameset_ok = false
2140                         return
2141                 if t.type is TYPE_START_TAG and t.name is 'image'
2142                         parse_error()
2143                         t.name = 'img'
2144                         process_token t
2145                         return
2146                 if t.type is TYPE_START_TAG and t.name is 'isindex'
2147                         parse_error()
2148                         if template_tag_is_open() is false and form_element_pointer isnt null
2149                                 return
2150                         t.acknowledge_self_closing()
2151                         flag_frameset_ok = false
2152                         close_p_if_in_button_scope()
2153                         el = insert_html_element new_open_tag 'form'
2154                         unless template_tag_is_open()
2155                                 form_element_pointer = el
2156                         for a in t.attrs_a
2157                                 if a[0] is 'action'
2158                                         el.attrs['action'] = a[1]
2159                                         break
2160                         insert_html_element new_open_tag 'hr'
2161                         open_els.shift()
2162                         reconstruct_afe()
2163                         insert_html_element new_open_tag 'label'
2164                         # note: this is a little out-of-spec-order so we only have to scan t.attrs_a once
2165                         input_el = new_open_tag 'input'
2166                         prompt = null
2167                         for a in t.attrs_a
2168                                 if a[0] is 'prompt'
2169                                         prompt = a[1]
2170                                 if a[0] isnt 'name' and a[0] isnt 'action' and a[0] isnt 'prompt'
2171                                         input_el.attrs_a.push [a[0], a[1]]
2172                         input_el.attrs_a.push ['name', 'isindex']
2173                         # fixfull this next bit is in english... internationalize?
2174                         prompt ?= "This is a searchable index. Enter search keywords: "
2175                         insert_character new_character_token prompt # fixfull split
2176                         # TODO submit typo "balue" in spec
2177                         insert_html_element input_el
2178                         open_els.shift()
2179                         # insert_character '' # you can put chars here if promt attr missing
2180                         open_els.shift()
2181                         insert_html_element new_open_tag 'hr'
2182                         open_els.shift()
2183                         open_els.shift()
2184                         unless template_tag_is_open()
2185                                 form_element_pointer = null
2186                         return
2187                 if t.type is TYPE_START_TAG and t.name is 'textarea'
2188                         insert_html_element t
2189                         eat_next_token_if_newline()
2190                         tok_state = tok_state_rcdata
2191                         original_ins_mode = ins_mode
2192                         flag_frameset_ok = false
2193                         ins_mode = ins_mode_text
2194                         return
2195                 if t.type is TYPE_START_TAG and t.name is 'xmp'
2196                         close_p_if_in_button_scope()
2197                         reconstruct_afe()
2198                         flag_frameset_ok = false
2199                         parse_generic_raw_text t
2200                         return
2201                 if t.type is TYPE_START_TAG and t.name is 'iframe'
2202                         flag_frameset_ok = false
2203                         parse_generic_raw_text t
2204                         return
2205                 if t.type is TYPE_START_TAG and (t.name is 'noembed' or (t.name is 'noscript' and flag_scripting))
2206                         parse_generic_raw_text t
2207                         return
2208                 if t.type is TYPE_START_TAG and t.name is 'select'
2209                         reconstruct_afe()
2210                         insert_html_element t
2211                         flag_frameset_ok = false
2212                         if ins_mode is ins_mode_in_table or ins_mode is ins_mode_in_caption or ins_mode is ins_mode_in_table_body or ins_mode is ins_mode_in_row or ins_mode is ins_mode_in_cell
2213                                 ins_mode = ins_mode_in_select_in_table
2214                         else
2215                                 ins_mode = ins_mode_in_select
2216                         return
2217                 if t.type is TYPE_START_TAG and (t.name is 'optgroup' or t.name is 'option')
2218                         if open_els[0].name is 'option' and open_els[0].namespace is NS_HTML
2219                                 open_els.shift()
2220                         reconstruct_afe()
2221                         insert_html_element t
2222                         return
2223 # this comment block implements the W3C spec
2224 #               if t.type is TYPE_START_TAG and (t.name is 'rb' or t.name is 'rp' or t.name is 'rtc')
2225 #                       if is_in_scope 'ruby', NS_HTML
2226 #                               generate_implied_end_tags()
2227 #                               unless open_els[0].name is 'ruby' and open_els[0].namespace is NS_HTML
2228 #                                       parse_error()
2229 #                       insert_html_element t
2230 #                       return
2231 #               if t.type is TYPE_START_TAG and t.name is 'rt'
2232 #                       if is_in_scope 'ruby', NS_HTML
2233 #                               generate_implied_end_tags 'rtc' # arg is exception
2234 #                               unless (open_els[0].name is 'ruby' or open_els[0].name is 'rtc') and open_els[0].namespace is NS_HTML
2235 #                                       parse_error()
2236 #                       insert_html_element t
2237 #                       return
2238 # below implements the WHATWG spec https://html.spec.whatwg.org/multipage/syntax.html#parsing-main-inbody
2239                 if t.type is TYPE_START_TAG and (t.name is 'rb' or t.name is 'rtc')
2240                         if is_in_scope 'ruby', NS_HTML
2241                                 generate_implied_end_tags()
2242                                 unless open_els[0].name is 'ruby' and open_els[0].namespace is NS_HTML
2243                                         parse_error()
2244                         insert_html_element t
2245                         return
2246                 if t.type is TYPE_START_TAG and (t.name is 'rp' or t.name is 'rt')
2247                         if is_in_scope 'ruby', NS_HTML
2248                                 generate_implied_end_tags 'rtc'
2249                                 unless (open_els[0].name is 'ruby' or open_els[0].name is 'rtc') and open_els[0].namespace is NS_HTML
2250                                         parse_error()
2251                         insert_html_element t
2252                         return
2253 # end WHATWG chunk
2254                 if t.type is TYPE_START_TAG and t.name is 'math'
2255                         reconstruct_afe()
2256                         adjust_mathml_attributes t
2257                         adjust_foreign_attributes t
2258                         insert_foreign_element t, NS_MATHML
2259                         if t.flag 'self-closing'
2260                                 open_els.shift()
2261                                 t.acknowledge_self_closing()
2262                         return
2263                 if t.type is TYPE_START_TAG and t.name is 'svg'
2264                         reconstruct_afe()
2265                         adjust_svg_attributes t
2266                         adjust_foreign_attributes t
2267                         insert_foreign_element t, NS_SVG
2268                         if t.flag 'self-closing'
2269                                 open_els.shift()
2270                                 t.acknowledge_self_closing()
2271                         return
2272                 if t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'frame' or t.name is 'head' or t.name is 'tbody' or t.name is 'td' or t.name is 'tfoot' or t.name is 'th' or t.name is 'thead' or t.name is 'tr')
2273                         parse_error()
2274                         return
2275                 if t.type is TYPE_START_TAG # any other start tag
2276                         reconstruct_afe()
2277                         insert_html_element t
2278                         return
2279                 if t.type is TYPE_END_TAG # any other end tag
2280                         in_body_any_other_end_tag t.name
2281                         return
2282                 return
2283
2284         # 8.2.5.4.8 http://www.w3.org/TR/html5/syntax.html#parsing-main-incdata
2285         ins_mode_text = (t) ->
2286                 if t.type is TYPE_TEXT
2287                         insert_character t
2288                         return
2289                 if t.type is TYPE_EOF
2290                         parse_error()
2291                         if open_els[0].name is 'script' and open_els[0].namespace is NS_HTML
2292                                 open_els[0].flag 'already started', true
2293                         open_els.shift()
2294                         ins_mode = original_ins_mode
2295                         process_token t
2296                         return
2297                 if t.type is TYPE_END_TAG and t.name is 'script'
2298                         open_els.shift()
2299                         ins_mode = original_ins_mode
2300                         # fixfull the spec seems to assume that I'm going to run the script
2301                         # http://www.w3.org/TR/html5/syntax.html#scriptEndTag
2302                         return
2303                 if t.type is TYPE_END_TAG
2304                         open_els.shift()
2305                         ins_mode = original_ins_mode
2306                         return
2307                 return
2308
2309         # the functions below implement the tokenizer stats described here:
2310         # http://www.w3.org/TR/html5/syntax.html#tokenization
2311
2312         # 8.2.5.4.9 http://www.w3.org/TR/html5/syntax.html#parsing-main-intable
2313         ins_mode_in_table_else = (t) ->
2314                 parse_error()
2315                 flag_foster_parenting = true
2316                 ins_mode_in_body t
2317                 flag_foster_parenting = false
2318                 return
2319         ins_mode_in_table = (t) ->
2320                 switch t.type
2321                         when TYPE_TEXT
2322                                 if (open_els[0].name is 'table' or open_els[0].name is 'tbody' or open_els[0].name is 'tfoot' or open_els[0].name is 'thead' or open_els[0].name is 'tr') and open_els[0].namespace is NS_HTML
2323                                         pending_table_character_tokens = []
2324                                         original_ins_mode = ins_mode
2325                                         ins_mode = ins_mode_in_table_text
2326                                         process_token t
2327                                 else
2328                                         ins_mode_in_table_else t
2329                         when TYPE_COMMENT
2330                                 insert_comment t
2331                         when TYPE_DOCTYPE
2332                                 parse_error()
2333                         when TYPE_START_TAG
2334                                 switch t.name
2335                                         when 'caption'
2336                                                 clear_stack_to_table_context()
2337                                                 afe_push_marker()
2338                                                 insert_html_element t
2339                                                 ins_mode = ins_mode_in_caption
2340                                         when 'colgroup'
2341                                                 clear_stack_to_table_context()
2342                                                 insert_html_element t
2343                                                 ins_mode = ins_mode_in_column_group
2344                                         when 'col'
2345                                                 clear_stack_to_table_context()
2346                                                 insert_html_element new_open_tag 'colgroup'
2347                                                 ins_mode = ins_mode_in_column_group
2348                                                 process_token t
2349                                         when 'tbody', 'tfoot', 'thead'
2350                                                 clear_stack_to_table_context()
2351                                                 insert_html_element t
2352                                                 ins_mode = ins_mode_in_table_body
2353                                         when 'td', 'th', 'tr'
2354                                                 clear_stack_to_table_context()
2355                                                 insert_html_element new_open_tag 'tbody'
2356                                                 ins_mode = ins_mode_in_table_body
2357                                                 process_token t
2358                                         when 'table'
2359                                                 parse_error()
2360                                                 if is_in_table_scope 'table', NS_HTML
2361                                                         loop
2362                                                                 el = open_els.shift()
2363                                                                 if el.name is 'table' and el.namespace is NS_HTML
2364                                                                         break
2365                                                         reset_ins_mode()
2366                                                         process_token t
2367                                         when 'style', 'script', 'template'
2368                                                 ins_mode_in_head t
2369                                         when 'input'
2370                                                 unless is_input_hidden_tok t
2371                                                         ins_mode_in_table_else t
2372                                                 else
2373                                                         parse_error()
2374                                                         el = insert_html_element t
2375                                                         open_els.shift()
2376                                                         t.acknowledge_self_closing()
2377                                         when 'form'
2378                                                 parse_error()
2379                                                 if form_element_pointer?
2380                                                         return
2381                                                 if template_tag_is_open()
2382                                                         return
2383                                                 form_element_pointer = insert_html_element t
2384                                                 open_els.shift()
2385                                         else
2386                                                 ins_mode_in_table_else t
2387                         when TYPE_END_TAG
2388                                 switch t.name
2389                                         when 'table'
2390                                                 if is_in_table_scope 'table', NS_HTML
2391                                                         loop
2392                                                                 el = open_els.shift()
2393                                                                 if el.name is 'table' and el.namespace is NS_HTML
2394                                                                         break
2395                                                         reset_ins_mode()
2396                                                 else
2397                                                         parse_error()
2398                                         when 'body', 'caption', 'col', 'colgroup', 'html', 'tbody', 'td', 'tfoot', 'th', 'thead', 'tr'
2399                                                 parse_error()
2400                                         when 'template'
2401                                                 ins_mode_in_head t
2402                                         else
2403                                                 ins_mode_in_table_else t
2404                         when TYPE_EOF
2405                                 ins_mode_in_body t
2406                         else
2407                                 ins_mode_in_table_else t
2408                 return
2409
2410
2411         # 8.2.5.4.10 http://www.w3.org/TR/html5/syntax.html#parsing-main-intabletext
2412         ins_mode_in_table_text = (t) ->
2413                 if t.type is TYPE_TEXT and t.text is "\u0000"
2414                         # from javascript?
2415                         parse_error()
2416                         return
2417                 if t.type is TYPE_TEXT
2418                         pending_table_character_tokens.push t
2419                         return
2420                 # Anything else
2421                 all_space = true
2422                 for old in pending_table_character_tokens
2423                         unless is_space_tok old
2424                                 all_space = false
2425                                 break
2426                 if all_space
2427                         for old in pending_table_character_tokens
2428                                 insert_character old
2429                 else
2430                         for old in pending_table_character_tokens
2431                                 ins_mode_in_table_else old
2432                 pending_table_character_tokens = []
2433                 ins_mode = original_ins_mode
2434                 process_token t
2435                 return
2436
2437         # 8.2.5.4.11 http://www.w3.org/TR/html5/syntax.html#parsing-main-incaption
2438         ins_mode_in_caption = (t) ->
2439                 if t.type is TYPE_END_TAG and t.name is 'caption'
2440                         if is_in_table_scope 'caption', NS_HTML
2441                                 generate_implied_end_tags()
2442                                 if open_els[0].name isnt 'caption'
2443                                         parse_error()
2444                                 loop
2445                                         el = open_els.shift()
2446                                         if el.name is 'caption' and el.namespace is NS_HTML
2447                                                 break
2448                                 clear_afe_to_marker()
2449                                 ins_mode = ins_mode_in_table
2450                         else
2451                                 parse_error()
2452                                 # fragment case
2453                         return
2454                 if (t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'tbody' or t.name is 'td' or t.name is 'tfoot' or t.name is 'th' or t.name is 'thead' or t.name is 'tr')) or t.type is TYPE_END_TAG and t.name is 'table'
2455                         parse_error()
2456                         if is_in_table_scope 'caption', NS_HTML
2457                                 loop
2458                                         el = open_els.shift()
2459                                         if el.name is 'caption' and el.namespace is NS_HTML
2460                                                 break
2461                                 clear_afe_to_marker()
2462                                 ins_mode = ins_mode_in_table
2463                                 process_token t
2464                         # else fragment case
2465                         return
2466                 if t.type is TYPE_END_TAG and (t.name is 'body' or t.name is 'col' or t.name is 'colgroup' or t.name is 'html' or t.name is 'tbody' or t.name is 'td' or t.name is 'tfoot' or t.name is 'th' or t.name is 'thead' or t.name is 'tr')
2467                         parse_error()
2468                         return
2469                 # Anything else
2470                 ins_mode_in_body t
2471                 return
2472
2473         # 8.2.5.4.12 http://www.w3.org/TR/html5/syntax.html#parsing-main-incolgroup
2474         ins_mode_in_column_group = (t) ->
2475                 if is_space_tok t
2476                         insert_character t
2477                         return
2478                 if t.type is TYPE_COMMENT
2479                         insert_comment t
2480                         return
2481                 if t.type is TYPE_DOCTYPE
2482                         parse_error()
2483                         return
2484                 if t.type is TYPE_START_TAG and t.name is 'html'
2485                         ins_mode_in_body t
2486                         return
2487                 if t.type is TYPE_START_TAG and t.name is 'col'
2488                         el = insert_html_element t
2489                         open_els.shift()
2490                         t.acknowledge_self_closing()
2491                         return
2492                 if t.type is TYPE_END_TAG and t.name is 'colgroup'
2493                         if open_els[0].name is 'colgroup' and open_els.namespace is NS_HTML
2494                                 open_els.shift()
2495                                 ins_mode = ins_mode_in_table
2496                         else
2497                                 parse_error()
2498                         return
2499                 if t.type is TYPE_END_TAG and t.name is 'col'
2500                         parse_error()
2501                         return
2502                 if (t.type is TYPE_START_TAG or t.type is TYPE_END_TAG) and t.name is 'template'
2503                         ins_mode_in_head t
2504                         return
2505                 if t.type is TYPE_EOF
2506                         ins_mode_in_body t
2507                         return
2508                 # Anything else
2509                 if open_els[0].name isnt 'colgroup'
2510                         parse_error()
2511                         return
2512                 open_els.shift()
2513                 ins_mode = ins_mode_in_table
2514                 process_token t
2515                 return
2516
2517         # 8.2.5.4.13 http://www.w3.org/TR/html5/syntax.html#parsing-main-intbody
2518         ins_mode_in_table_body = (t) ->
2519                 if t.type is TYPE_START_TAG and t.name is 'tr'
2520                         clear_stack_to_table_body_context()
2521                         insert_html_element t
2522                         ins_mode = ins_mode_in_row
2523                         return
2524                 if t.type is TYPE_START_TAG and (t.name is 'th' or t.name is 'td')
2525                         parse_error()
2526                         clear_stack_to_table_body_context()
2527                         insert_html_element new_open_tag 'tr'
2528                         ins_mode = ins_mode_in_row
2529                         process_token t
2530                         return
2531                 if t.type is TYPE_END_TAG and (t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead')
2532                         unless is_in_table_scope t.name, NS_HTML
2533                                 parse_error()
2534                                 return
2535                         clear_stack_to_table_body_context()
2536                         open_els.shift()
2537                         ins_mode = ins_mode_in_table
2538                         return
2539                 if (t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead')) or (t.type is TYPE_END_TAG and t.name is 'table')
2540                         has = false
2541                         for el in open_els
2542                                 if el.namespace is NS_HTML and (el.name is 'tbody' or el.name is 'tfoot' or el.name is 'thead')
2543                                         has = true
2544                                         break
2545                                 if table_scopers[el.name] is el.namespace
2546                                         break
2547                         if !has
2548                                 parse_error()
2549                                 return
2550                         clear_stack_to_table_body_context()
2551                         open_els.shift()
2552                         ins_mode = ins_mode_in_table
2553                         process_token t
2554                         return
2555                 if t.type is TYPE_END_TAG and (t.name is 'body' or t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'html' or t.name is 'td' or t.name is 'th' or t.name is 'tr')
2556                         parse_error()
2557                         return
2558                 # Anything else
2559                 ins_mode_in_table t
2560                 return
2561
2562         # 8.2.5.4.14 http://www.w3.org/TR/html5/syntax.html#parsing-main-intr
2563         ins_mode_in_row = (t) ->
2564                 if t.type is TYPE_START_TAG and (t.name is 'th' or t.name is 'td')
2565                         clear_stack_to_table_row_context()
2566                         insert_html_element t
2567                         ins_mode = ins_mode_in_cell
2568                         afe_push_marker()
2569                         return
2570                 if t.type is TYPE_END_TAG and t.name is 'tr'
2571                         if is_in_table_scope 'tr', NS_HTML
2572                                 clear_stack_to_table_row_context()
2573                                 open_els.shift()
2574                                 ins_mode = ins_mode_in_table_body
2575                         else
2576                                 parse_error()
2577                         return
2578                 if (t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead' or t.name is 'tr')) or t.type is TYPE_END_TAG and t.name is 'table'
2579                         if is_in_table_scope 'tr', NS_HTML
2580                                 clear_stack_to_table_row_context()
2581                                 open_els.shift()
2582                                 ins_mode = ins_mode_in_table_body
2583                                 process_token t
2584                         else
2585                                 parse_error()
2586                         return
2587                 if t.type is TYPE_END_TAG and (t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead')
2588                         if is_in_table_scope t.name, NS_HTML
2589                                 if is_in_table_scope 'tr', NS_HTML
2590                                         clear_stack_to_table_row_context()
2591                                         open_els.shift()
2592                                         ins_mode = ins_mode_in_table_body
2593                                         process_token t
2594                         else
2595                                 parse_error()
2596                         return
2597                 if t.type is TYPE_END_TAG and (t.name is 'body' or t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'html' or t.name is 'td' or t.name is 'th')
2598                         parse_error()
2599                         return
2600                 # Anything else
2601                 ins_mode_in_table t
2602                 return
2603
2604         # http://www.w3.org/TR/html5/syntax.html#close-the-cell
2605         close_the_cell = ->
2606                 generate_implied_end_tags()
2607                 unless (open_els[0].name is 'td' or open_els[0] is 'th') and open_els[0].namespace is NS_HTML
2608                         parse_error()
2609                 loop
2610                         el = open_els.shift()
2611                         if el.namespace is NS_HTML and (el.name is 'td' or el.name is 'th')
2612                                 break
2613                 clear_afe_to_marker()
2614                 ins_mode = ins_mode_in_row
2615                 return
2616
2617         # 8.2.5.4.15 http://www.w3.org/TR/html5/syntax.html#parsing-main-intd
2618         ins_mode_in_cell = (t) ->
2619                 if t.type is TYPE_END_TAG and (t.name is 'td' or t.name is 'th')
2620                         if is_in_table_scope t.name, NS_HTML
2621                                 generate_implied_end_tags()
2622                                 unless (open_els[0].name is t.name) and open_els[0].namespace is NS_HTML
2623                                         parse_error()
2624                                 loop
2625                                         el = open_els.shift()
2626                                         if el.name is t.name and el.namespace is NS_HTML
2627                                                 break
2628                                 clear_afe_to_marker()
2629                                 ins_mode = ins_mode_in_row
2630                         else
2631                                 parse_error()
2632                         return
2633                 if t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'tbody' or t.name is 'td' or t.name is 'tfoot' or t.name is 'th' or t.name is 'thead' or t.name is 'tr')
2634                         has = false
2635                         for el in open_els
2636                                 if el.namespace is NS_HTML and (el.name is 'td' or el.name is 'th')
2637                                         has = true
2638                                         break
2639                                 if table_scopers[el.name] is el.namespace
2640                                         break
2641                         if !has
2642                                 parse_error()
2643                                 return
2644                         close_the_cell()
2645                         process_token t
2646                         return
2647                 if t.type is TYPE_END_TAG and (t.name is 'body' or t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'html')
2648                         parse_error()
2649                         return
2650                 if t.type is TYPE_END_TAG and (t.name is 'table' or t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead' or t.name is 'tr')
2651                         if is_in_table_scope t.name, NS_HTML
2652                                 close_the_cell()
2653                                 process_token t
2654                         else
2655                                 parse_error()
2656                         return
2657                 # Anything Else
2658                 ins_mode_in_body t
2659                 return
2660
2661         # 8.2.5.4.16 http://www.w3.org/TR/html5/syntax.html#parsing-main-inselect
2662         ins_mode_in_select = (t) ->
2663                 if t.type is TYPE_TEXT and t.text is "\u0000"
2664                         parse_error()
2665                         return
2666                 if t.type is TYPE_TEXT
2667                         insert_character t
2668                         return
2669                 if t.type is TYPE_COMMENT
2670                         insert_comment t
2671                         return
2672                 if t.type is TYPE_DOCTYPE
2673                         parse_error()
2674                         return
2675                 if t.type is TYPE_START_TAG and t.name is 'html'
2676                         ins_mode_in_body t
2677                         return
2678                 if t.type is TYPE_START_TAG and t.name is 'option'
2679                         if open_els[0].name is 'option' and open_els[0].namespace is NS_HTML
2680                                 open_els.shift()
2681                         insert_html_element t
2682                         return
2683                 if t.type is TYPE_START_TAG and t.name is 'optgroup'
2684                         if open_els[0].name is 'option' and open_els[0].namespace is NS_HTML
2685                                 open_els.shift()
2686                         if open_els[0].name is 'optgroup' and open_els[0].namespace is NS_HTML
2687                                 open_els.shift()
2688                         insert_html_element t
2689                         return
2690                 if t.type is TYPE_END_TAG and t.name is 'optgroup'
2691                         if open_els[0].name is 'option' and open_els[0].namespace is NS_HTML
2692                                 if open_els[1].name is 'optgroup' and open_els[0].namespace is NS_HTML
2693                                         open_els.shift()
2694                         if open_els[0].name is 'optgroup' and open_els[0].namespace is NS_HTML
2695                                 open_els.shift()
2696                         else
2697                                 parse_error()
2698                         return
2699                 if t.type is TYPE_END_TAG and t.name is 'option'
2700                         if open_els[0].name is 'option' and open_els[0].namespace is NS_HTML
2701                                 open_els.shift()
2702                         else
2703                                 parse_error()
2704                         return
2705                 if t.type is TYPE_END_TAG and t.name is 'select'
2706                         if is_in_select_scope 'select', NS_HTML
2707                                 loop
2708                                         el = open_els.shift()
2709                                         if el.name is 'select' and el.namespace is NS_HTML
2710                                                 break
2711                                 reset_ins_mode()
2712                         else
2713                                 parse_error()
2714                         return
2715                 if t.type is TYPE_START_TAG and t.name is 'select'
2716                         parse_error()
2717                         loop
2718                                 el = open_els.shift()
2719                                 if el.name is 'select' and el.namespace is NS_HTML
2720                                         break
2721                         reset_ins_mode()
2722                         # spec says that this is the same as </select> but it doesn't say
2723                         # to check scope first
2724                         return
2725                 if t.type is TYPE_START_TAG and (t.name is 'input' or t.name is 'keygen' or t.name is 'textarea')
2726                         parse_error()
2727                         unless is_in_select_scope 'select', NS_HTML
2728                                 return
2729                         loop
2730                                 el = open_els.shift()
2731                                 if el.name is 'select' and el.namespace is NS_HTML
2732                                         break
2733                         reset_ins_mode()
2734                         process_token t
2735                         return
2736                 if t.type is TYPE_START_TAG and (t.name is 'script' or t.name is 'template')
2737                         ins_mode_in_head t
2738                         return
2739                 if t.type is TYPE_EOF
2740                         ins_mode_in_body t
2741                         return
2742                 # Anything else
2743                 parse_error()
2744                 return
2745
2746         # 8.2.5.4.17 http://www.w3.org/TR/html5/syntax.html#parsing-main-inselectintable
2747         ins_mode_in_select_in_table = (t) ->
2748                 if t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'table' or t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead' or t.name is 'tr' or t.name is 'td' or t.name is 'th')
2749                         parse_error()
2750                         loop
2751                                 el = open_els.shift()
2752                                 if el.name is 'select' and el.namespace is NS_HTML
2753                                         break
2754                         reset_ins_mode()
2755                         process_token t
2756                         return
2757                 if t.type is TYPE_END_TAG and (t.name is 'caption' or t.name is 'table' or t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead' or t.name is 'tr' or t.name is 'td' or t.name is 'th')
2758                         parse_error()
2759                         unless is_in_table_scope t.name, NS_HTML
2760                                 return
2761                         loop
2762                                 el = open_els.shift()
2763                                 if el.name is 'select' and el.namespace is NS_HTML
2764                                         break
2765                         reset_ins_mode()
2766                         process_token t
2767                         return
2768                 # Anything else
2769                 ins_mode_in_select t
2770                 return
2771
2772         # 8.2.5.4.18 http://www.w3.org/TR/html5/syntax.html#parsing-main-intemplate
2773         ins_mode_in_template = (t) ->
2774                 if t.type is TYPE_TEXT or t.type is TYPE_COMMENT or t.type is TYPE_DOCTYPE
2775                         ins_mode_in_body t
2776                         return
2777                 if (t.type is TYPE_START_TAG and (t.name is 'base' or t.name is 'basefont' or t.name is 'bgsound' or t.name is 'link' or t.name is 'meta' or t.name is 'noframes' or t.name is 'script' or t.name is 'style' or t.name is 'template' or t.name is 'title')) or (t.type is TYPE_END_TAG and t.name is 'template')
2778                         ins_mode_in_head t
2779                         return
2780                 if t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'colgroup' or t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead')
2781                         template_ins_modes.shift()
2782                         template_ins_modes.unshift ins_mode_in_table
2783                         ins_mode = ins_mode_in_table
2784                         process_token t
2785                         return
2786                 if t.type is TYPE_START_TAG and t.name is 'col'
2787                         template_ins_modes.shift()
2788                         template_ins_modes.unshift ins_mode_in_column_group
2789                         ins_mode = ins_mode_in_column_group
2790                         process_token t
2791                         return
2792                 if t.type is TYPE_START_TAG and t.name is 'tr'
2793                         template_ins_modes.shift()
2794                         template_ins_modes.unshift ins_mode_in_table_body
2795                         ins_mode = ins_mode_in_table_body
2796                         process_token t
2797                         return
2798                 if t.type is TYPE_START_TAG and (t.name is 'td' or t.name is 'th')
2799                         template_ins_modes.shift()
2800                         template_ins_modes.unshift ins_mode_in_row
2801                         ins_mode = ins_mode_in_row
2802                         process_token t
2803                         return
2804                 if t.type is TYPE_START_TAG
2805                         template_ins_modes.shift()
2806                         template_ins_modes.unshift ins_mode_in_body
2807                         ins_mode = ins_mode_in_body
2808                         process_token t
2809                         return
2810                 if t.type is TYPE_END_TAG
2811                         parse_error()
2812                         return
2813                 if t.type is TYPE_EOF
2814                         unless template_tag_is_open()
2815                                 stop_parsing()
2816                                 return
2817                         parse_error()
2818                         loop
2819                                 el = open_els.shift()
2820                                 if el.name is 'template' and el.namespace is NS_HTML
2821                                         break
2822                         clear_afe_to_marker()
2823                         template_ins_modes.shift()
2824                         reset_ins_mode()
2825                         process_token t
2826                 return
2827
2828         # 8.2.5.4.19 http://www.w3.org/TR/html5/syntax.html#parsing-main-afterbody
2829         ins_mode_after_body = (t) ->
2830                 if is_space_tok t
2831                         ins_mode_in_body t
2832                         return
2833                 if t.type is TYPE_COMMENT
2834                         first = open_els[open_els.length - 1]
2835                         insert_comment t, [first, first.children.length]
2836                         return
2837                 if t.type is TYPE_DOCTYPE
2838                         parse_error()
2839                         return
2840                 if t.type is TYPE_START_TAG and t.name is 'html'
2841                         ins_mode_in_body t
2842                         return
2843                 if t.type is TYPE_END_TAG and t.name is 'html'
2844                         if flag_fragment_parsing
2845                                 parse_error()
2846                                 return
2847                         ins_mode = ins_mode_after_after_body
2848                         return
2849                 if t.type is TYPE_EOF
2850                         stop_parsing()
2851                         return
2852                 # Anything ELse
2853                 parse_error()
2854                 ins_mode = ins_mode_in_body
2855                 process_token t
2856                 return
2857
2858         # 8.2.5.4.20 http://www.w3.org/TR/html5/syntax.html#parsing-main-inframeset
2859         ins_mode_in_frameset = (t) ->
2860                 if is_space_tok t
2861                         insert_character t
2862                         return
2863                 if t.type is TYPE_COMMENT
2864                         insert_comment t
2865                         return
2866                 if t.type is TYPE_DOCTYPE
2867                         parse_error()
2868                         return
2869                 if t.type is TYPE_START_TAG and t.name is 'html'
2870                         ins_mode_in_body t
2871                         return
2872                 if t.type is TYPE_START_TAG and t.name is 'frameset'
2873                         insert_html_element t
2874                         return
2875                 if t.type is TYPE_END_TAG and t.name is 'frameset'
2876                         if open_els.length is 1
2877                                 parse_error()
2878                                 return # fragment case
2879                         open_els.shift()
2880                         if flag_fragment_parsing is false and open_els[0].name isnt 'frameset'
2881                                 ins_mode = ins_mode_after_frameset
2882                         return
2883                 if t.type is TYPE_START_TAG and t.name is 'frame'
2884                         insert_html_element t
2885                         open_els.shift()
2886                         t.acknowledge_self_closing()
2887                         return
2888                 if t.type is TYPE_START_TAG and t.name is 'noframes'
2889                         ins_mode_in_head t
2890                         return
2891                 if t.type is TYPE_EOF
2892                         if open_els.length isnt 1
2893                                 parse_error()
2894                         stop_parsing()
2895                         return
2896                 # Anything else
2897                 parse_error()
2898                 return
2899
2900         # 8.2.5.4.21 http://www.w3.org/TR/html5/syntax.html#parsing-main-afterframeset
2901         ins_mode_after_frameset = (t) ->
2902                 if is_space_tok t
2903                         insert_character t
2904                         return
2905                 if t.type is TYPE_COMMENT
2906                         insert_comment t
2907                         return
2908                 if t.type is TYPE_DOCTYPE
2909                         parse_error()
2910                         return
2911                 if t.type is TYPE_START_TAG and t.name is 'html'
2912                         ins_mode_in_body t
2913                         return
2914                 if t.type is TYPE_END_TAG and t.name is 'html'
2915                         ins_mode = ins_mode_after_after_frameset
2916                         return
2917                 if t.type is TYPE_START_TAG and t.name is 'noframes'
2918                         ins_mode_in_head t
2919                         return
2920                 if t.type is TYPE_EOF
2921                         stop_parsing()
2922                         return
2923                 # Anything else
2924                 parse_error()
2925                 return
2926
2927         # 8.2.5.4.22 http://www.w3.org/TR/html5/syntax.html#the-after-after-body-insertion-mode
2928         ins_mode_after_after_body = (t) ->
2929                 if t.type is TYPE_COMMENT
2930                         insert_comment t, [doc, doc.children.length]
2931                         return
2932                 if t.type is TYPE_DOCTYPE or is_space_tok(t) or (t.type is TYPE_START_TAG and t.name is 'html')
2933                         ins_mode_in_body t
2934                         return
2935                 if t.type is TYPE_EOF
2936                         stop_parsing()
2937                         return
2938                 # Anything else
2939                 parse_error()
2940                 ins_mode = ins_mode_in_body
2941                 process_token t
2942                 return
2943
2944         # 8.2.5.4.23 http://www.w3.org/TR/html5/syntax.html#the-after-after-frameset-insertion-mode
2945         ins_mode_after_after_frameset = (t) ->
2946                 if t.type is TYPE_COMMENT
2947                         insert_comment t, [doc, doc.children.length]
2948                         return
2949                 if t.type is TYPE_DOCTYPE or is_space_tok(t) or (t.type is TYPE_START_TAG and t.name is 'html')
2950                         ins_mode_in_body t
2951                         return
2952                 if t.type is TYPE_EOF
2953                         stop_parsing()
2954                         return
2955                 if t.type is TYPE_START_TAG and t.name is 'noframes'
2956                         ins_mode_in_head t
2957                         return
2958                 # Anything else
2959                 parse_error()
2960                 return
2961
2962         # 8.2.5.5 http://www.w3.org/TR/html5/syntax.html#parsing-main-inforeign
2963         has_color_face_or_size = (t) ->
2964                 for a in t.attrs_a
2965                         if a[0] is 'color' or a[0] is 'face' or a[0] is 'size'
2966                                 return true
2967                 return false
2968         in_foreign_content_end_script = ->
2969                 open_els.shift()
2970                 # fixfull
2971                 return
2972         in_foreign_content_other_start = (t) ->
2973                 acn = adjusted_current_node()
2974                 if acn.namespace is NS_MATHML
2975                         adjust_mathml_attributes t
2976                 if acn.namespace is NS_SVG and svg_name_fixes[t.name]?
2977                         t.name = svg_name_fixes[t.name]
2978                 if acn.namespace is NS_SVG
2979                         adjust_svg_attributes t
2980                 adjust_foreign_attributes t
2981                 insert_foreign_element t, acn.namespace
2982                 if t.flag 'self-closing'
2983                         if t.name is 'script'
2984                                 t.acknowledge_self_closing()
2985                                 in_foreign_content_end_script()
2986                                 # fixfull
2987                         else
2988                                 open_els.shift()
2989                                 t.acknowledge_self_closing()
2990                 return
2991         in_foreign_content = (t) ->
2992                 if t.type is TYPE_TEXT and t.text is "\u0000"
2993                         parse_error()
2994                         insert_character new_character_token "\ufffd"
2995                         return
2996                 if is_space_tok t
2997                         insert_character t
2998                         return
2999                 if t.type is TYPE_TEXT
3000                         flag_frameset_ok = false
3001                         insert_character t
3002                         return
3003                 if t.type is TYPE_COMMENT
3004                         insert_comment t
3005                         return
3006                 if t.type is TYPE_DOCTYPE
3007                         parse_error()
3008                         return
3009                 if t.type is TYPE_START_TAG and (t.name is 'b' or t.name is 'big' or t.name is 'blockquote' or t.name is 'body' or t.name is 'br' or t.name is 'center' or t.name is 'code' or t.name is 'dd' or t.name is 'div' or t.name is 'dl' or t.name is 'dt' or t.name is 'em' or t.name is 'embed' or t.name is 'h1' or t.name is 'h2' or t.name is 'h3' or t.name is 'h4' or t.name is 'h5' or t.name is 'h6' or t.name is 'head' or t.name is 'hr' or t.name is 'i' or t.name is 'img' or t.name is 'li' or t.name is 'listing' or t.name is 'main' or t.name is 'meta' or t.name is 'nobr' or t.name is 'ol' or t.name is 'p' or t.name is 'pre' or t.name is 'ruby' or t.name is 's' or t.name is 'small' or t.name is 'span' or t.name is 'strong' or t.name is 'strike' or t.name is 'sub' or t.name is 'sup' or t.name is 'table' or t.name is 'tt' or t.name is 'u' or t.name is 'ul' or t.name is 'var' or (t.name is 'font' and has_color_face_or_size(t)))
3010                         parse_error()
3011                         if flag_fragment_parsing
3012                                 in_foreign_content_other_start t
3013                                 return
3014                         loop # is this safe?
3015                                 open_els.shift()
3016                                 if is_mathml_text_integration_point(open_els[0]) or is_html_integration(open_els[0]) or open_els[0].namespace is NS_HTML
3017                                         break
3018                         process_token t
3019                         return
3020                 if t.type is TYPE_START_TAG
3021                         in_foreign_content_other_start t
3022                         return
3023                 if t.type is TYPE_END_TAG and t.name is 'script' and open_els[0].name is 'script' and open_els[0].namespace is NS_SVG
3024                         in_foreign_content_end_script()
3025                         return
3026                 if t.type is TYPE_END_TAG
3027                         i = 0
3028                         node = open_els[i]
3029                         if node.name.toLowerCase() isnt t.name
3030                                 parse_error()
3031                         loop
3032                                 if node is open_els[open_els.length - 1]
3033                                         return
3034                                 if node.name.toLowerCase() is t.name
3035                                         loop
3036                                                 el = open_els.shift()
3037                                                 if el is node
3038                                                         return
3039                                 i += 1
3040                                 node = open_els[i]
3041                                 if node.namespace is NS_HTML
3042                                         break
3043                         ins_mode t # explicitly call HTML insertion mode
3044                 return
3045
3046
3047         # 8.2.4.1 http://www.w3.org/TR/html5/syntax.html#data-state
3048         tok_state_data = ->
3049                 switch c = txt.charAt(cur++)
3050                         when '&'
3051                                 return new_text_node parse_character_reference()
3052                         when '<'
3053                                 tok_state = tok_state_tag_open
3054                         when "\u0000"
3055                                 parse_error()
3056                                 return new_text_node c
3057                         when '' # EOF
3058                                 return new_eof_token()
3059                         else
3060                                 return new_text_node c
3061                 return null
3062
3063         # 8.2.4.2 http://www.w3.org/TR/html5/syntax.html#character-reference-in-data-state
3064         # not needed: tok_state_character_reference_in_data = ->
3065         # just call parse_character_reference()
3066
3067         # 8.2.4.3 http://www.w3.org/TR/html5/syntax.html#rcdata-state
3068         tok_state_rcdata = ->
3069                 switch c = txt.charAt(cur++)
3070                         when '&'
3071                                 return new_text_node parse_character_reference()
3072                         when '<'
3073                                 tok_state = tok_state_rcdata_less_than_sign
3074                         when "\u0000"
3075                                 parse_error()
3076                                 return new_character_token "\ufffd"
3077                         when '' # EOF
3078                                 return new_eof_token()
3079                         else
3080                                 return new_character_token c
3081                 return null
3082
3083         # 8.2.4.4 http://www.w3.org/TR/html5/syntax.html#character-reference-in-rcdata-state
3084         # not needed: tok_state_character_reference_in_rcdata = ->
3085         # just call parse_character_reference()
3086
3087         # 8.2.4.5 http://www.w3.org/TR/html5/syntax.html#rawtext-state
3088         tok_state_rawtext = ->
3089                 switch c = txt.charAt(cur++)
3090                         when '<'
3091                                 tok_state = tok_state_rawtext_less_than_sign
3092                         when "\u0000"
3093                                 parse_error()
3094                                 return new_character_token "\ufffd"
3095                         when '' # EOF
3096                                 return new_eof_token()
3097                         else
3098                                 return new_character_token c
3099                 return null
3100
3101         # 8.2.4.6 http://www.w3.org/TR/html5/syntax.html#script-data-state
3102         tok_state_script_data = ->
3103                 switch c = txt.charAt(cur++)
3104                         when '<'
3105                                 tok_state = tok_state_script_data_less_than_sign
3106                         when "\u0000"
3107                                 parse_error()
3108                                 return new_character_token "\ufffd"
3109                         when '' # EOF
3110                                 return new_eof_token()
3111                         else
3112                                 return new_character_token c
3113                 return null
3114
3115         # 8.2.4.7 http://www.w3.org/TR/html5/syntax.html#plaintext-state
3116         tok_state_plaintext = ->
3117                 switch c = txt.charAt(cur++)
3118                         when "\u0000"
3119                                 parse_error()
3120                                 return new_character_token "\ufffd"
3121                         when '' # EOF
3122                                 return new_eof_token()
3123                         else
3124                                 return new_character_token c
3125                 return null
3126
3127
3128         # 8.2.4.8 http://www.w3.org/TR/html5/syntax.html#tag-open-state
3129         tok_state_tag_open = ->
3130                 c = txt.charAt(cur++)
3131                 if c is '!'
3132                         tok_state = tok_state_markup_declaration_open
3133                         return
3134                 if c is '/'
3135                         tok_state = tok_state_end_tag_open
3136                         return
3137                 if is_uc_alpha(c)
3138                         tok_cur_tag = new_open_tag c.toLowerCase()
3139                         tok_state = tok_state_tag_name
3140                         return
3141                 if is_lc_alpha(c)
3142                         tok_cur_tag = new_open_tag c
3143                         tok_state = tok_state_tag_name
3144                         return
3145                 if c is '?'
3146                         parse_error()
3147                         tok_cur_tag = new_comment_token '?' # FIXME right?
3148                         tok_state = tok_state_bogus_comment
3149                         return
3150                 # Anything else
3151                 parse_error()
3152                 tok_state = tok_state_data
3153                 cur -= 1 # we didn't parse/handle the char after <
3154                 return new_text_node '<'
3155
3156         # 8.2.4.9 http://www.w3.org/TR/html5/syntax.html#end-tag-open-state
3157         tok_state_end_tag_open = ->
3158                 c = txt.charAt(cur++)
3159                 if is_uc_alpha(c)
3160                         tok_cur_tag = new_end_tag c.toLowerCase()
3161                         tok_state = tok_state_tag_name
3162                         return
3163                 if is_lc_alpha(c)
3164                         tok_cur_tag = new_end_tag c
3165                         tok_state = tok_state_tag_name
3166                         return
3167                 if c is '>'
3168                         parse_error()
3169                         tok_state = tok_state_data
3170                         return
3171                 if c is '' # EOF
3172                         parse_error()
3173                         tok_state = tok_state_data
3174                         return new_text_node '</'
3175                 # Anything else
3176                 parse_error()
3177                 tok_cur_tag = new_comment_token c
3178                 tok_state = tok_state_bogus_comment
3179                 return null
3180
3181         # 8.2.4.10 http://www.w3.org/TR/html5/syntax.html#tag-name-state
3182         tok_state_tag_name = ->
3183                 switch c = txt.charAt(cur++)
3184                         when "\t", "\n", "\u000c", ' '
3185                                 tok_state = tok_state_before_attribute_name
3186                         when '/'
3187                                 tok_state = tok_state_self_closing_start_tag
3188                         when '>'
3189                                 tok_state = tok_state_data
3190                                 tmp = tok_cur_tag
3191                                 tok_cur_tag = null
3192                                 return tmp
3193                         when "\u0000"
3194                                 parse_error()
3195                                 tok_cur_tag.name += "\ufffd"
3196                         when '' # EOF
3197                                 parse_error()
3198                                 tok_state = tok_state_data
3199                         else
3200                                 if is_uc_alpha(c)
3201                                         tok_cur_tag.name += c.toLowerCase()
3202                                 else
3203                                         tok_cur_tag.name += c
3204                 return null
3205
3206         # 8.2.4.11 http://www.w3.org/TR/html5/syntax.html#rcdata-less-than-sign-state
3207         tok_state_rcdata_less_than_sign = ->
3208                 c = txt.charAt(cur++)
3209                 if c is '/'
3210                         temporary_buffer = ''
3211                         tok_state = tok_state_rcdata_end_tag_open
3212                         return null
3213                 # Anything else
3214                 tok_state = tok_state_rcdata
3215                 cur -= 1 # reconsume the input character
3216                 return new_character_token '<'
3217
3218         # 8.2.4.12 http://www.w3.org/TR/html5/syntax.html#rcdata-end-tag-open-state
3219         tok_state_rcdata_end_tag_open = ->
3220                 c = txt.charAt(cur++)
3221                 if is_uc_alpha(c)
3222                         tok_cur_tag = new_end_tag c.toLowerCase()
3223                         temporary_buffer += c
3224                         tok_state = tok_state_rcdata_end_tag_name
3225                         return null
3226                 if is_lc_alpha(c)
3227                         tok_cur_tag = new_end_tag c
3228                         temporary_buffer += c
3229                         tok_state = tok_state_rcdata_end_tag_name
3230                         return null
3231                 # Anything else
3232                 tok_state = tok_state_rcdata
3233                 cur -= 1 # reconsume the input character
3234                 return new_character_token "</" # fixfull separate these
3235
3236         # http://www.w3.org/TR/html5/syntax.html#appropriate-end-tag-token
3237         is_appropriate_end_tag = (t) ->
3238                 # fixfull: this assumes that open_els[0].name is "the tag name of the last
3239                 # start tag to have been emitted from this tokenizer"
3240                 return t.type is TYPE_END_TAG and t.name is open_els[0].name
3241
3242         # 8.2.4.13 http://www.w3.org/TR/html5/syntax.html#rcdata-end-tag-name-state
3243         tok_state_rcdata_end_tag_name = ->
3244                 c = txt.charAt(cur++)
3245                 if c is "\t" or c is "\n" or c is "\u000c" or c is ' '
3246                         if is_appropriate_end_tag tok_cur_tag
3247                                 tok_state = tok_state_before_attribute_name
3248                                 return
3249                         # else fall through to "Anything else"
3250                 if c is '/'
3251                         if is_appropriate_end_tag tok_cur_tag
3252                                 tok_state = tok_state_self_closing_start_tag # FIXME spec typo?
3253                                 return
3254                         # else fall through to "Anything else"
3255                 if c is '>'
3256                         if is_appropriate_end_tag tok_cur_tag
3257                                 tok_state = tok_state_data
3258                                 return tok_cur_tag
3259                         # else fall through to "Anything else"
3260                 if is_uc_alpha(c)
3261                         tok_cur_tag.name += c.toLowerCase()
3262                         temporary_buffer += c
3263                         return null
3264                 if is_lc_alpha(c)
3265                         tok_cur_tag.name += c
3266                         temporary_buffer += c
3267                         return null
3268                 # Anything else
3269                 tok_state = tok_state_rcdata
3270                 cur -= 1 # reconsume the input character
3271                 return new_character_token '</' + temporary_buffer # fixfull separate these
3272
3273         # 8.2.4.14 http://www.w3.org/TR/html5/syntax.html#rawtext-less-than-sign-state
3274         tok_state_rawtext_less_than_sign = ->
3275                 c = txt.charAt(cur++)
3276                 if c is '/'
3277                         temporary_buffer = ''
3278                         tok_state = tok_state_rawtext_end_tag_open
3279                         return null
3280                 # Anything else
3281                 tok_state = tok_state_rawtext
3282                 cur -= 1 # reconsume the input character
3283                 return new_character_token '<'
3284
3285         # 8.2.4.15 http://www.w3.org/TR/html5/syntax.html#rawtext-end-tag-open-state
3286         tok_state_rawtext_end_tag_open = ->
3287                 c = txt.charAt(cur++)
3288                 if is_uc_alpha(c)
3289                         tok_cur_tag = new_end_tag c.toLowerCase()
3290                         temporary_buffer += c
3291                         tok_state = tok_state_rawtext_end_tag_name
3292                         return null
3293                 if is_lc_alpha(c)
3294                         tok_cur_tag = new_end_tag c
3295                         temporary_buffer += c
3296                         tok_state = tok_state_rawtext_end_tag_name
3297                         return null
3298                 # Anything else
3299                 tok_state = tok_state_rawtext
3300                 cur -= 1 # reconsume the input character
3301                 return new_character_token "</" # fixfull separate these
3302
3303         # 8.2.4.16 http://www.w3.org/TR/html5/syntax.html#rawtext-end-tag-name-state
3304         tok_state_rawtext_end_tag_name = ->
3305                 c = txt.charAt(cur++)
3306                 if c is "\t" or c is "\n" or c is "\u000c" or c is ' '
3307                         if is_appropriate_end_tag tok_cur_tag
3308                                 tok_state = tok_state_before_attribute_name
3309                                 return
3310                         # else fall through to "Anything else"
3311                 if c is '/'
3312                         if is_appropriate_end_tag tok_cur_tag
3313                                 tok_state = tok_state_self_closing_start_tag
3314                                 return
3315                         # else fall through to "Anything else"
3316                 if c is '>'
3317                         if is_appropriate_end_tag tok_cur_tag
3318                                 tok_state = tok_state_data
3319                                 return tok_cur_tag
3320                         # else fall through to "Anything else"
3321                 if is_uc_alpha(c)
3322                         tok_cur_tag.name += c.toLowerCase()
3323                         temporary_buffer += c
3324                         return null
3325                 if is_lc_alpha(c)
3326                         tok_cur_tag.name += c
3327                         temporary_buffer += c
3328                         return null
3329                 # Anything else
3330                 tok_state = tok_state_rawtext
3331                 cur -= 1 # reconsume the input character
3332                 return new_character_token '</' + temporary_buffer # fixfull separate these
3333
3334         # 8.2.4.17 http://www.w3.org/TR/html5/syntax.html#script-data-less-than-sign-state
3335         tok_state_script_data_less_than_sign = ->
3336                 c = txt.charAt(cur++)
3337                 if c is '/'
3338                         temporary_buffer = ''
3339                         tok_state = tok_state_script_data_end_tag_open
3340                         return
3341                 if c is '!'
3342                         tok_state = tok_state_script_data_escape_start
3343                         return new_character_token '<!' # fixfull split
3344                 # Anything else
3345                 tok_state = tok_state_script_data
3346                 cur -= 1 # Reconsume
3347                 return new_character_token '<'
3348
3349         # 8.2.4.18 http://www.w3.org/TR/html5/syntax.html#script-data-end-tag-open-state
3350         tok_state_script_data_end_tag_open = ->
3351                 c = txt.charAt(cur++)
3352                 if is_uc_alpha(c)
3353                         tok_cur_tag = new_end_tag c.toLowerCase()
3354                         temporary_buffer += c
3355                         tok_state = tok_state_script_data_end_tag_name
3356                         return
3357                 if is_lc_alpha(c)
3358                         tok_cur_tag = new_end_tag c
3359                         temporary_buffer += c
3360                         tok_state = tok_state_script_data_end_tag_name
3361                         return
3362                 # Anything else
3363                 tok_state = tok_state_script_data
3364                 cur -= 1 # Reconsume
3365                 return new_character_token '</'
3366
3367         # 8.2.4.19 http://www.w3.org/TR/html5/syntax.html#script-data-end-tag-open-state
3368         tok_state_script_data_end_tag_name = ->
3369                 c = txt.charAt(cur++)
3370                 if c is "\t" or c is "\n" or c is "\u000c" or c is ' '
3371                         if is_appropriate_end_tag tok_cur_tag
3372                                 tok_state = tok_state_before_attribute_name
3373                                 return
3374                         # fall through
3375                 if c is '/'
3376                         if is_appropriate_end_tag tok_cur_tag
3377                                 tok_state = tok_state_self_closing_start_tag
3378                                 return
3379                         # fall through
3380                 if c is '>'
3381                         if is_appropriate_end_tag tok_cur_tag
3382                                 tok_state = tok_state_data
3383                                 return tok_cur_tag
3384                         # fall through
3385                 if is_uc_alpha(c)
3386                         tok_cur_tag.name += c.toLowerCase()
3387                         temporary_buffer += c
3388                         return
3389                 if is_lc_alpha(c)
3390                         tok_cur_tag.name += c
3391                         temporary_buffer += c
3392                         return
3393                 # Anything else
3394                 tok_state = tok_state_script_data
3395                 cur -= 1 # Reconsume
3396                 return new_character_token "</#{temporary_buffer}" # fixfull split
3397
3398         # 8.2.4.20 http://www.w3.org/TR/html5/syntax.html#script-data-escape-start-state
3399         tok_state_script_data_escape_start = ->
3400                 c = txt.charAt(cur++)
3401                 if c is '-'
3402                         tok_state = tok_state_script_data_escape_start_dash
3403                         return new_character_token '-'
3404                 # Anything else
3405                 tok_state = tok_state_script_data
3406                 cur -= 1 # Reconsume
3407                 return
3408
3409         # 8.2.4.21 http://www.w3.org/TR/html5/syntax.html#script-data-escape-start-dash-state
3410         tok_state_script_data_escape_start_dash = ->
3411                 c = txt.charAt(cur++)
3412                 if c is '-'
3413                         tok_state = tok_state_script_data_escaped_dash_dash
3414                         return new_character_token '-'
3415                 # Anything else
3416                 tok_state = tok_state_script_data
3417                 cur -= 1 # Reconsume
3418                 return
3419
3420         # 8.2.4.22 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-state
3421         tok_state_script_data_escaped = ->
3422                 c = txt.charAt(cur++)
3423                 if c is '-'
3424                         tok_state = tok_state_script_data_escaped_dash
3425                         return new_character_token '-'
3426                 if c is '<'
3427                         tok_state = tok_state_script_data_escaped_less_than_sign
3428                         return
3429                 if c is "\u0000"
3430                         parse_error()
3431                         return new_character_token "\ufffd"
3432                 if c is '' # EOF
3433                         tok_state = tok_state_data
3434                         parse_error()
3435                         cur -= 1 # Reconsume
3436                         return
3437                 # Anything else
3438                 return new_character_token c
3439
3440         # 8.2.4.23 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-dash-state
3441         tok_state_script_data_escaped_dash = ->
3442                 c = txt.charAt(cur++)
3443                 if c is '-'
3444                         tok_state = tok_state_script_data_escaped_dash_dash
3445                         return new_character_token '-'
3446                 if c is '<'
3447                         tok_state = tok_state_script_data_escaped_less_than_sign
3448                         return
3449                 if c is "\u0000"
3450                         parse_error()
3451                         tok_state = tok_state_script_data_escaped
3452                         return new_character_token "\ufffd"
3453                 if c is '' # EOF
3454                         tok_state = tok_state_data
3455                         parse_error()
3456                         cur -= 1 # Reconsume
3457                         return
3458                 # Anything else
3459                 tok_state = tok_state_script_data_escaped
3460                 return new_character_token c
3461
3462         # 8.2.4.24 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-dash-dash-state
3463         tok_state_script_data_escaped_dash_dash = ->
3464                 c = txt.charAt(cur++)
3465                 if c is '-'
3466                         return new_character_token '-'
3467                 if c is '<'
3468                         tok_state = tok_state_script_data_escaped_less_than_sign
3469                         return
3470                 if c is '>'
3471                         tok_state = tok_state_script_data
3472                         return new_character_token '>'
3473                 if c is "\u0000"
3474                         parse_error()
3475                         tok_state = tok_state_script_data_escaped
3476                         return new_character_token "\ufffd"
3477                 if c is '' # EOF
3478                         parse_error()
3479                         tok_state = tok_state_data
3480                         cur -= 1 # Reconsume
3481                         return
3482                 # Anything else
3483                 tok_state = tok_state_script_data_escaped
3484                 return new_character_token c
3485
3486         # 8.2.4.25 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-less-than-sign-state
3487         tok_state_script_data_escaped_less_than_sign = ->
3488                 c = txt.charAt(cur++)
3489                 if c is '/'
3490                         temporary_buffer = ''
3491                         tok_state = tok_state_script_data_escaped_end_tag_open
3492                         return
3493                 if is_uc_alpha(c)
3494                         temporary_buffer = c.toLowerCase() # yes, really
3495                         tok_state = tok_state_script_data_double_escape_start
3496                         return new_character_token "<#{c}" # fixfull split
3497                 if is_lc_alpha(c)
3498                         temporary_buffer = c
3499                         tok_state = tok_state_script_data_double_escape_start
3500                         return new_character_token "<#{c}" # fixfull split
3501                 # Anything else
3502                 tok_state = tok_state_script_data_escaped
3503                 cur -= 1 # Reconsume
3504                 return new_character_token '<'
3505
3506         # 8.2.4.26 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-end-tag-open-state
3507         tok_state_script_data_escaped_end_tag_open = ->
3508                 c = txt.charAt(cur++)
3509                 if is_uc_alpha(c)
3510                         tok_cur_tag = new_end_tag c.toLowerCase()
3511                         temporary_buffer += c
3512                         tok_state = tok_state_script_data_escaped_end_tag_name
3513                         return
3514                 if is_lc_alpha(c)
3515                         tok_cur_tag = new_end_tag c
3516                         temporary_buffer += c
3517                         tok_state = tok_state_script_data_escaped_end_tag_name
3518                         return
3519                 # Anything else
3520                 tok_state = tok_state_script_data_escaped
3521                 cur -= 1 # Reconsume
3522                 return new_character_token '</' # fixfull split
3523
3524         # 8.2.4.27 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-end-tag-name-state
3525         tok_state_script_data_escaped_end_tag_name = ->
3526                 c = txt.charAt(cur++)
3527                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
3528                         if is_appropriate_end_tag tok_cur_tag
3529                                 tok_state = tok_state_before_attribute_name
3530                                 return
3531                         # fall through
3532                 if c is '/'
3533                         if is_appropriate_end_tag tok_cur_tag
3534                                 tok_state = tok_state_self_closing_start_tag
3535                                 return
3536                         # fall through
3537                 if c is '>'
3538                         if is_appropriate_end_tag tok_cur_tag
3539                                 tok_state = tok_state_data
3540                                 return tok_cur_tag
3541                         # fall through
3542                 if is_uc_alpha(c)
3543                         tok_cur_tag.name += c.toLowerCase()
3544                         temporary_buffer += c.toLowerCase()
3545                         return
3546                 if is_lc_alpha(c)
3547                         tok_cur_tag.name += c
3548                         temporary_buffer += c.toLowerCase()
3549                         return
3550                 # Anything else
3551                 tok_state = tok_state_script_data_escaped
3552                 cur -= 1 # Reconsume
3553                 return new_character_token "</#{temporary_buffer}" # fixfull split
3554
3555         # 8.2.4.28 http://www.w3.org/TR/html5/syntax.html#script-data-double-escape-start-state
3556         tok_state_script_data_double_escape_start = ->
3557                 c = txt.charAt(cur++)
3558                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' ' or c is '/' or c is '>'
3559                         if temporary_buffer is 'script'
3560                                 tok_state = tok_state_script_data_double_escaped
3561                         else
3562                                 tok_state = tok_state_script_data_escaped
3563                         return new_character_token c
3564                 if is_uc_alpha(c)
3565                         temporary_buffer += c.toLowerCase() # yes, really lowercase
3566                         return new_character_token c
3567                 if is_lc_alpha(c)
3568                         temporary_buffer += c
3569                         return new_character_token c
3570                 # Anything else
3571                 tok_state = tok_state_script_data_escaped
3572                 cur -= 1 # Reconsume
3573                 return
3574
3575         # 8.2.4.29 http://www.w3.org/TR/html5/syntax.html#script-data-double-escaped-state
3576         tok_state_script_data_double_escaped = ->
3577                 c = txt.charAt(cur++)
3578                 if c is '-'
3579                         tok_state = tok_state_script_data_double_escaped_dash
3580                         return new_character_token '-'
3581                 if c is '<'
3582                         tok_state = tok_state_script_data_double_escaped_less_than_sign
3583                         return new_character_token '<'
3584                 if c is "\u0000"
3585                         parse_error()
3586                         return new_character_token "\ufffd"
3587                 if c is '' # EOF
3588                         parse_error()
3589                         tok_state = tok_state_data
3590                         cur -= 1 # Reconsume
3591                         return
3592                 # Anything else
3593                 return new_character_token c
3594
3595         # 8.2.4.30 http://www.w3.org/TR/html5/syntax.html#script-data-double-escaped-dash-state
3596         tok_state_script_data_double_escaped_dash = ->
3597                 c = txt.charAt(cur++)
3598                 if c is '-'
3599                         tok_state = tok_state_script_data_double_escaped_dash_dash
3600                         return new_character_token '-'
3601                 if c is '<'
3602                         tok_state = tok_state_script_data_double_escaped_less_than_sign
3603                         return new_character_token '<'
3604                 if c is "\u0000"
3605                         parse_error()
3606                         tok_state = tok_state_script_data_double_escaped
3607                         return new_character_token "\ufffd"
3608                 if c is '' # EOF
3609                         parse_error()
3610                         tok_state = tok_state_data
3611                         cur -= 1 # Reconsume
3612                         return
3613                 # Anything else
3614                 tok_state = tok_state_script_data_double_escaped
3615                 return new_character_token c
3616
3617         # 8.2.4.31 http://www.w3.org/TR/html5/syntax.html#script-data-double-escaped-dash-dash-state
3618         tok_state_script_data_double_escaped_dash_dash = ->
3619                 c = txt.charAt(cur++)
3620                 if c is '-'
3621                         return new_character_token '-'
3622                 if c is '<'
3623                         tok_state = tok_state_script_data_double_escaped_less_than_sign
3624                         return new_character_token '<'
3625                 if c is '>'
3626                         tok_state = tok_state_script_data
3627                         return new_character_token '>'
3628                 if c is "\u0000"
3629                         parse_error()
3630                         tok_state = tok_state_script_data_double_escaped
3631                         return new_character_token "\ufffd"
3632                 if c is '' # EOF
3633                         parse_error()
3634                         tok_state = tok_state_data
3635                         cur -= 1 # Reconsume
3636                         return
3637                 # Anything else
3638                 tok_state = tok_state_script_data_double_escaped
3639                 return new_character_token c
3640
3641         # 8.2.4.32 http://www.w3.org/TR/html5/syntax.html#script-data-double-escaped-less-than-sign-state
3642         tok_state_script_data_double_escaped_less_than_sign = ->
3643                 c = txt.charAt(cur++)
3644                 if c is '/'
3645                         temporary_buffer = ''
3646                         tok_state = tok_state_script_data_double_escape_end
3647                         return new_character_token '/'
3648                 # Anything else
3649                 tok_state = tok_state_script_data_double_escaped
3650                 cur -= 1 # Reconsume
3651                 return
3652
3653         # 8.2.4.33 http://www.w3.org/TR/html5/syntax.html#script-data-double-escape-end-state
3654         tok_state_script_data_double_escape_end = ->
3655                 c = txt.charAt(cur++)
3656                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' ' or c is '/' or c is '>'
3657                         if temporary_buffer is 'script'
3658                                 tok_state = tok_state_script_data_escaped
3659                         else
3660                                 tok_state = tok_state_script_data_double_escaped
3661                         return new_character_token c
3662                 if is_uc_alpha(c)
3663                         temporary_buffer += c.toLowerCase() # yes, really lowercase
3664                         return new_character_token c
3665                 if is_lc_alpha(c)
3666                         temporary_buffer += c
3667                         return new_character_token c
3668                 # Anything else
3669                 tok_state = tok_state_script_data_double_escaped
3670                 cur -= 1 # Reconsume
3671                 return
3672
3673         # 8.2.4.34 http://www.w3.org/TR/html5/syntax.html#before-attribute-name-state
3674         tok_state_before_attribute_name = ->
3675                 attr_name = null
3676                 switch c = txt.charAt(cur++)
3677                         when "\t", "\n", "\u000c", ' '
3678                                 return null
3679                         when '/'
3680                                 tok_state = tok_state_self_closing_start_tag
3681                                 return null
3682                         when '>'
3683                                 tok_state = tok_state_data
3684                                 tmp = tok_cur_tag
3685                                 tok_cur_tag = null
3686                                 return tmp
3687                         when "\u0000"
3688                                 parse_error()
3689                                 attr_name = "\ufffd"
3690                         when '"', "'", '<', '='
3691                                 parse_error()
3692                                 attr_name = c
3693                         when '' # EOF
3694                                 parse_error()
3695                                 tok_state = tok_state_data
3696                         else
3697                                 if is_uc_alpha(c)
3698                                         attr_name = c.toLowerCase()
3699                                 else
3700                                         attr_name = c
3701                 if attr_name?
3702                         tok_cur_tag.attrs_a.unshift [attr_name, '']
3703                         tok_state = tok_state_attribute_name
3704                 return null
3705
3706         # 8.2.4.35 http://www.w3.org/TR/html5/syntax.html#attribute-name-state
3707         tok_state_attribute_name = ->
3708                 switch c = txt.charAt(cur++)
3709                         when "\t", "\n", "\u000c", ' '
3710                                 tok_state = tok_state_after_attribute_name
3711                         when '/'
3712                                 tok_state = tok_state_self_closing_start_tag
3713                         when '='
3714                                 tok_state = tok_state_before_attribute_value
3715                         when '>'
3716                                 tok_state = tok_state_data
3717                                 tmp = tok_cur_tag
3718                                 tok_cur_tag = null
3719                                 return tmp
3720                         when "\u0000"
3721                                 parse_error()
3722                                 tok_cur_tag.attrs_a[0][0] += "\ufffd"
3723                         when '"', "'", '<'
3724                                 parse_error()
3725                                 tok_cur_tag.attrs_a[0][0] += c
3726                         when '' # EOF
3727                                 parse_error()
3728                                 tok_state = tok_state_data
3729                         else
3730                                 if is_uc_alpha(c)
3731                                         tok_cur_tag.attrs_a[0][0] += c.toLowerCase()
3732                                 else
3733                                         tok_cur_tag.attrs_a[0][0] += c
3734                 return null
3735
3736         # 8.2.4.36 http://www.w3.org/TR/html5/syntax.html#after-attribute-name-state
3737         tok_state_after_attribute_name = ->
3738                 c = txt.charAt(cur++)
3739                 if c is "\t" or c is "\n" or c is "\u000c" or c is ' '
3740                         return
3741                 if c is '/'
3742                         tok_state = tok_state_self_closing_start_tag
3743                         return
3744                 if c is '='
3745                         tok_state = tok_state_before_attribute_value
3746                         return
3747                 if c is '>'
3748                         tok_state = tok_state_data
3749                         return tok_cur_tag
3750                 if is_uc_alpha(c)
3751                         tok_cur_tag.attrs_a.unshift [c.toLowerCase(), '']
3752                         tok_state = tok_state_attribute_name
3753                         return
3754                 if c is "\u0000"
3755                         parse_error()
3756                         tok_cur_tag.attrs_a.unshift ["\ufffd", '']
3757                         tok_state = tok_state_attribute_name
3758                         return
3759                 if c is '' # EOF
3760                         parse_error()
3761                         tok_state = tok_state_data
3762                         cur -= 1 # reconsume
3763                         return
3764                 if c is '"' or c is "'" or c is '<'
3765                         parse_error()
3766                         # fall through to Anything else
3767                 # Anything else
3768                 tok_cur_tag.attrs_a.unshift [c, '']
3769                 tok_state = tok_state_attribute_name
3770                 return
3771
3772         # 8.2.4.37 http://www.w3.org/TR/html5/syntax.html#before-attribute-value-state
3773         tok_state_before_attribute_value = ->
3774                 switch c = txt.charAt(cur++)
3775                         when "\t", "\n", "\u000c", ' '
3776                                 return null
3777                         when '"'
3778                                 tok_state = tok_state_attribute_value_double_quoted
3779                         when '&'
3780                                 tok_state = tok_state_attribute_value_unquoted
3781                                 cur -= 1
3782                         when "'"
3783                                 tok_state = tok_state_attribute_value_single_quoted
3784                         when "\u0000"
3785                                 # Parse error
3786                                 tok_cur_tag.attrs_a[0][1] += "\ufffd"
3787                                 tok_state = tok_state_attribute_value_unquoted
3788                         when '>'
3789                                 # Parse error
3790                                 tok_state = tok_state_data
3791                                 tmp = tok_cur_tag
3792                                 tok_cur_tag = null
3793                                 return tmp
3794                         when '' # EOF
3795                                 parse_error()
3796                                 tok_state = tok_state_data
3797                         else
3798                                 tok_cur_tag.attrs_a[0][1] += c
3799                                 tok_state = tok_state_attribute_value_unquoted
3800                 return null
3801
3802         # 8.2.4.38 http://www.w3.org/TR/html5/syntax.html#attribute-value-(double-quoted)-state
3803         tok_state_attribute_value_double_quoted = ->
3804                 switch c = txt.charAt(cur++)
3805                         when '"'
3806                                 tok_state = tok_state_after_attribute_value_quoted
3807                         when '&'
3808                                 tok_cur_tag.attrs_a[0][1] += parse_character_reference '"', true
3809                         when "\u0000"
3810                                 # Parse error
3811                                 tok_cur_tag.attrs_a[0][1] += "\ufffd"
3812                         when '' # EOF
3813                                 parse_error()
3814                                 tok_state = tok_state_data
3815                         else
3816                                 tok_cur_tag.attrs_a[0][1] += c
3817                 return null
3818
3819         # 8.2.4.39 http://www.w3.org/TR/html5/syntax.html#attribute-value-(single-quoted)-state
3820         tok_state_attribute_value_single_quoted = ->
3821                 switch c = txt.charAt(cur++)
3822                         when "'"
3823                                 tok_state = tok_state_after_attribute_value_quoted
3824                         when '&'
3825                                 tok_cur_tag.attrs_a[0][1] += parse_character_reference "'", true
3826                         when "\u0000"
3827                                 # Parse error
3828                                 tok_cur_tag.attrs_a[0][1] += "\ufffd"
3829                         when '' # EOF
3830                                 parse_error()
3831                                 tok_state = tok_state_data
3832                         else
3833                                 tok_cur_tag.attrs_a[0][1] += c
3834                 return null
3835
3836         # 8.2.4.40 http://www.w3.org/TR/html5/syntax.html#attribute-value-(unquoted)-state
3837         tok_state_attribute_value_unquoted = ->
3838                 switch c = txt.charAt(cur++)
3839                         when "\t", "\n", "\u000c", ' '
3840                                 tok_state = tok_state_before_attribute_name
3841                         when '&'
3842                                 tok_cur_tag.attrs_a[0][1] += parse_character_reference '>', true
3843                         when '>'
3844                                 tok_state = tok_state_data
3845                                 tmp = tok_cur_tag
3846                                 tok_cur_tag = null
3847                                 return tmp
3848                         when "\u0000"
3849                                 tok_cur_tag.attrs_a[0][1] += "\ufffd"
3850                         when '' # EOF
3851                                 parse_error()
3852                                 tok_state = tok_state_data
3853                         else
3854                                 # Parse Error if ', <, = or ` (backtick)
3855                                 tok_cur_tag.attrs_a[0][1] += c
3856                 return null
3857
3858         # 8.2.4.42 http://www.w3.org/TR/html5/syntax.html#after-attribute-value-(quoted)-state
3859         tok_state_after_attribute_value_quoted = ->
3860                 switch c = txt.charAt(cur++)
3861                         when "\t", "\n", "\u000c", ' '
3862                                 tok_state = tok_state_before_attribute_name
3863                         when '/'
3864                                 tok_state = tok_state_self_closing_start_tag
3865                         when '>'
3866                                 tok_state = tok_state_data
3867                                 tmp = tok_cur_tag
3868                                 tok_cur_tag = null
3869                                 return tmp
3870                         when '' # EOF
3871                                 parse_error()
3872                                 tok_state = tok_state_data
3873                         else
3874                                 # Parse Error
3875                                 tok_state = tok_state_before_attribute_name
3876                                 cur -= 1 # we didn't handle that char
3877                 return null
3878
3879         # 8.2.4.43 http://www.w3.org/TR/html5/syntax.html#self-closing-start-tag-state
3880         tok_state_self_closing_start_tag = ->
3881                 c = txt.charAt(cur++)
3882                 if c is '>'
3883                         tok_cur_tag.flag 'self-closing', true
3884                         tok_state = tok_state_data
3885                         return tok_cur_tag
3886                 if c is ''
3887                         parse_error()
3888                         tok_state = tok_state_data
3889                         cur -= 1 # Reconsume
3890                         return
3891                 # Anything else
3892                 parse_error()
3893                 tok_state = tok_state_before_attribute_name
3894                 cur -= 1 # Reconsume
3895                 return
3896
3897         # 8.2.4.44 http://www.w3.org/TR/html5/syntax.html#bogus-comment-state
3898         # WARNING: put a comment token in tok_cur_tag before setting this state
3899         tok_state_bogus_comment = ->
3900                 next_gt = txt.indexOf '>', cur
3901                 if next_gt is -1
3902                         val = txt.substr cur
3903                         cur = txt.length
3904                 else
3905                         val = txt.substr cur, (next_gt - cur)
3906                         cur = next_gt + 1
3907                 val = val.replace(new RegExp("\u0000", 'g'), "\ufffd")
3908                 tok_cur_tag.text += val
3909                 tok_state = tok_state_data
3910                 return tok_cur_tag
3911
3912         # 8.2.4.45 http://www.w3.org/TR/html5/syntax.html#markup-declaration-open-state
3913         tok_state_markup_declaration_open = ->
3914                 if txt.substr(cur, 2) is '--'
3915                         cur += 2
3916                         tok_cur_tag = new_comment_token ''
3917                         tok_state = tok_state_comment_start
3918                         return
3919                 if txt.substr(cur, 7).toLowerCase() is 'doctype'
3920                         cur += 7
3921                         tok_state = tok_state_doctype
3922                         return
3923                 acn = adjusted_current_node()
3924                 if acn and acn.namespace isnt NS_HTML and txt.substr(cur, 7) is '[CDATA['
3925                         cur += 7
3926                         tok_state = tok_state_cdata_section
3927                         return
3928                 # Otherwise
3929                 parse_error()
3930                 tok_cur_tag = new_comment_token ''
3931                 tok_state = tok_state_bogus_comment
3932                 return
3933
3934         # 8.2.4.46 http://www.w3.org/TR/html5/syntax.html#comment-start-state
3935         tok_state_comment_start = ->
3936                 switch c = txt.charAt(cur++)
3937                         when '-'
3938                                 tok_state = tok_state_comment_start_dash
3939                         when "\u0000"
3940                                 parse_error()
3941                                 tok_state = tok_state_comment
3942                                 return new_character_token "\ufffd"
3943                         when '>'
3944                                 parse_error()
3945                                 tok_state = tok_state_data
3946                                 return tok_cur_tag
3947                         when '' # EOF
3948                                 parse_error()
3949                                 tok_state = tok_state_data
3950                                 cur -= 1 # Reconsume
3951                                 return tok_cur_tag
3952                         else
3953                                 tok_cur_tag.text += c
3954                                 tok_state = tok_state_comment
3955                 return null
3956
3957         # 8.2.4.47 http://www.w3.org/TR/html5/syntax.html#comment-start-dash-state
3958         tok_state_comment_start_dash = ->
3959                 switch c = txt.charAt(cur++)
3960                         when '-'
3961                                 tok_state = tok_state_comment_end
3962                         when "\u0000"
3963                                 parse_error()
3964                                 tok_cur_tag.text += "-\ufffd"
3965                                 tok_state = tok_state_comment
3966                         when '>'
3967                                 parse_error()
3968                                 tok_state = tok_state_data
3969                                 return tok_cur_tag
3970                         when '' # EOF
3971                                 parse_error()
3972                                 tok_state = tok_state_data
3973                                 cur -= 1 # Reconsume
3974                                 return tok_cur_tag
3975                         else
3976                                 tok_cur_tag.text += "-#{c}"
3977                                 tok_state = tok_state_comment
3978                 return null
3979
3980         # 8.2.4.48 http://www.w3.org/TR/html5/syntax.html#comment-state
3981         tok_state_comment = ->
3982                 switch c = txt.charAt(cur++)
3983                         when '-'
3984                                 tok_state = tok_state_comment_end_dash
3985                         when "\u0000"
3986                                 parse_error()
3987                                 tok_cur_tag.text += "\ufffd"
3988                         when '' # EOF
3989                                 parse_error()
3990                                 tok_state = tok_state_data
3991                                 cur -= 1 # Reconsume
3992                                 return tok_cur_tag
3993                         else
3994                                 tok_cur_tag.text += c
3995                 return null
3996
3997         # 8.2.4.49 http://www.w3.org/TR/html5/syntax.html#comment-end-dash-state
3998         tok_state_comment_end_dash = ->
3999                 switch c = txt.charAt(cur++)
4000                         when '-'
4001                                 tok_state = tok_state_comment_end
4002                         when "\u0000"
4003                                 parse_error()
4004                                 tok_cur_tag.text += "-\ufffd"
4005                                 tok_state = tok_state_comment
4006                         when '' # EOF
4007                                 parse_error()
4008                                 tok_state = tok_state_data
4009                                 cur -= 1 # Reconsume
4010                                 return tok_cur_tag
4011                         else
4012                                 tok_cur_tag.text += "-#{c}"
4013                                 tok_state = tok_state_comment
4014                 return null
4015
4016         # 8.2.4.50 http://www.w3.org/TR/html5/syntax.html#comment-end-state
4017         tok_state_comment_end = ->
4018                 switch c = txt.charAt(cur++)
4019                         when '>'
4020                                 tok_state = tok_state_data
4021                                 return tok_cur_tag
4022                         when "\u0000"
4023                                 parse_error()
4024                                 tok_cur_tag.text += "--\ufffd"
4025                                 tok_state = tok_state_comment
4026                         when '!'
4027                                 parse_error()
4028                                 tok_state = tok_state_comment_end_bang
4029                         when '-'
4030                                 parse_error()
4031                                 tok_cur_tag.text += '-'
4032                         when '' # EOF
4033                                 parse_error()
4034                                 tok_state = tok_state_data
4035                                 cur -= 1 # Reconsume
4036                                 return tok_cur_tag
4037                         else
4038                                 parse_error()
4039                                 tok_cur_tag.text += "--#{c}"
4040                                 tok_state = tok_state_comment
4041                 return null
4042
4043         # 8.2.4.51 http://www.w3.org/TR/html5/syntax.html#comment-end-bang-state
4044         tok_state_comment_end_bang = ->
4045                 switch c = txt.charAt(cur++)
4046                         when '-'
4047                                 tok_cur_tag.text += "--!#{c}"
4048                                 tok_state = tok_state_comment_end_dash
4049                         when '>'
4050                                 tok_state = tok_state_data
4051                                 return tok_cur_tag
4052                         when "\u0000"
4053                                 parse_error()
4054                                 tok_cur_tag.text += "--!\ufffd"
4055                                 tok_state = tok_state_comment
4056                         when '' # EOF
4057                                 parse_error()
4058                                 tok_state = tok_state_data
4059                                 cur -= 1 # Reconsume
4060                                 return tok_cur_tag
4061                         else
4062                                 tok_cur_tag.text += "--!#{c}"
4063                                 tok_state = tok_state_comment
4064                 return null
4065
4066         # 8.2.4.52 http://www.w3.org/TR/html5/syntax.html#doctype-state
4067         tok_state_doctype = ->
4068                 switch c = txt.charAt(cur++)
4069                         when "\t", "\u000a", "\u000c", ' '
4070                                 tok_state = tok_state_before_doctype_name
4071                         when '' # EOF
4072                                 parse_error()
4073                                 tok_state = tok_state_data
4074                                 el = new_doctype_token ''
4075                                 el.flag 'force-quirks', true
4076                                 cur -= 1 # Reconsume
4077                                 return el
4078                         else
4079                                 parse_error()
4080                                 tok_state = tok_state_before_doctype_name
4081                                 cur -= 1 # Reconsume
4082                 return null
4083
4084         # 8.2.4.52 http://www.w3.org/TR/html5/syntax.html#doctype-state
4085         tok_state_before_doctype_name = ->
4086                 c = txt.charAt(cur++)
4087                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4088                         return
4089                 if is_uc_alpha(c)
4090                         tok_cur_tag = new_doctype_token c.toLowerCase()
4091                         tok_state = tok_state_doctype_name
4092                         return
4093                 if c is "\u0000"
4094                         parse_error()
4095                         tok_cur_tag = new_doctype_token "\ufffd"
4096                         tok_state = tok_state_doctype_name
4097                         return
4098                 if c is '>'
4099                         parse_error()
4100                         el = new_doctype_token ''
4101                         el.flag 'force-quirks', true
4102                         tok_state = tok_state_data
4103                         return el
4104                 if c is '' # EOF
4105                         parse_error()
4106                         tok_state = tok_state_data
4107                         el = new_doctype_token ''
4108                         el.flag 'force-quirks', true
4109                         cur -= 1 # Reconsume
4110                         return el
4111                 # Anything else
4112                 tok_cur_tag = new_doctype_token c
4113                 tok_state = tok_state_doctype_name
4114                 return null
4115
4116         # 8.2.4.54 http://www.w3.org/TR/html5/syntax.html#doctype-name-state
4117         tok_state_doctype_name = ->
4118                 c = txt.charAt(cur++)
4119                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4120                         tok_state = tok_state_after_doctype_name
4121                         return
4122                 if c is '>'
4123                         tok_state = tok_state_data
4124                         return tok_cur_tag
4125                 if is_uc_alpha(c)
4126                         tok_cur_tag.name += c.toLowerCase()
4127                         return
4128                 if c is "\u0000"
4129                         parse_error()
4130                         tok_cur_tag.name += "\ufffd"
4131                         return
4132                 if c is '' # EOF
4133                         parse_error()
4134                         tok_state = tok_state_data
4135                         tok_cur_tag.flag 'force-quirks', true
4136                         cur -= 1 # Reconsume
4137                         return tok_cur_tag
4138                 # Anything else
4139                 tok_cur_tag.name += c
4140                 return null
4141
4142         # 8.2.4.55 http://www.w3.org/TR/html5/syntax.html#after-doctype-name-state
4143         tok_state_after_doctype_name = ->
4144                 c = txt.charAt(cur++)
4145                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4146                         return
4147                 if c is '>'
4148                         tok_state = tok_state_data
4149                         return tok_cur_tag
4150                 if c is '' # EOF
4151                         parse_error()
4152                         tok_state = tok_state_data
4153                         tok_cur_tag.flag 'force-quirks', true
4154                         cur -= 1 # Reconsume
4155                         return tok_cur_tag
4156                 # Anything else
4157                 if txt.substr(cur - 1, 6).toLowerCase() is 'public'
4158                         cur += 5
4159                         tok_state = tok_state_after_doctype_public_keyword
4160                         return
4161                 if txt.substr(cur - 1, 6).toLowerCase() is 'system'
4162                         cur += 5
4163                         tok_state = tok_state_after_doctype_system_keyword
4164                         return
4165                 parse_error()
4166                 tok_cur_tag.flag 'force-quirks', true
4167                 tok_state = tok_state_bogus_doctype
4168                 return null
4169
4170         # 8.2.4.56 http://www.w3.org/TR/html5/syntax.html#after-doctype-public-keyword-state
4171         tok_state_after_doctype_public_keyword = ->
4172                 c = txt.charAt(cur++)
4173                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4174                         tok_state = tok_state_before_doctype_public_identifier
4175                         return
4176                 if c is '"'
4177                         parse_error()
4178                         tok_cur_tag.public_identifier = ''
4179                         tok_state = tok_state_doctype_public_identifier_double_quoted
4180                         return
4181                 if c is "'"
4182                         parse_error()
4183                         tok_cur_tag.public_identifier = ''
4184                         tok_state = tok_state_doctype_public_identifier_single_quoted
4185                         return
4186                 if c is '>'
4187                         parse_error()
4188                         tok_cur_tag.flag 'force-quirks', true
4189                         tok_state = tok_state_data
4190                         return tok_cur_tag
4191                 if c is '' # EOF
4192                         parse_error()
4193                         tok_state = tok_state_data
4194                         tok_cur_tag.flag 'force-quirks', true
4195                         cur -= 1 # Reconsume
4196                         return tok_cur_tag
4197                 # Anything else
4198                 parse_error()
4199                 tok_cur_tag.flag 'force-quirks', true
4200                 tok_state = tok_state_bogus_doctype
4201                 return null
4202
4203         # 8.2.4.57 http://www.w3.org/TR/html5/syntax.html#before-doctype-public-identifier-state
4204         tok_state_before_doctype_public_identifier = ->
4205                 c = txt.charAt(cur++)
4206                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4207                         return
4208                 if c is '"'
4209                         parse_error()
4210                         tok_cur_tag.public_identifier = ''
4211                         tok_state = tok_state_doctype_public_identifier_double_quoted
4212                         return
4213                 if c is "'"
4214                         parse_error()
4215                         tok_cur_tag.public_identifier = ''
4216                         tok_state = tok_state_doctype_public_identifier_single_quoted
4217                         return
4218                 if c is '>'
4219                         parse_error()
4220                         tok_cur_tag.flag 'force-quirks', true
4221                         tok_state = tok_state_data
4222                         return tok_cur_tag
4223                 if c is '' # EOF
4224                         parse_error()
4225                         tok_state = tok_state_data
4226                         tok_cur_tag.flag 'force-quirks', true
4227                         cur -= 1 # Reconsume
4228                         return tok_cur_tag
4229                 # Anything else
4230                 parse_error()
4231                 tok_cur_tag.flag 'force-quirks', true
4232                 tok_state = tok_state_bogus_doctype
4233                 return null
4234
4235
4236         # 8.2.4.58 http://www.w3.org/TR/html5/syntax.html#doctype-public-identifier-(double-quoted)-state
4237         tok_state_doctype_public_identifier_double_quoted = ->
4238                 c = txt.charAt(cur++)
4239                 if c is '"'
4240                         tok_state = tok_state_after_doctype_public_identifier
4241                         return
4242                 if c is "\u0000"
4243                         parse_error()
4244                         tok_cur_tag.public_identifier += "\ufffd"
4245                         return
4246                 if c is '>'
4247                         parse_error()
4248                         tok_cur_tag.flag 'force-quirks', true
4249                         tok_state = tok_state_data
4250                         return tok_cur_tag
4251                 if c is '' # EOF
4252                         parse_error()
4253                         tok_state = tok_state_data
4254                         tok_cur_tag.flag 'force-quirks', true
4255                         cur -= 1 # Reconsume
4256                         return tok_cur_tag
4257                 # Anything else
4258                 tok_cur_tag.public_identifier += c
4259                 return null
4260
4261         # 8.2.4.59 http://www.w3.org/TR/html5/syntax.html#doctype-public-identifier-(single-quoted)-state
4262         tok_state_doctype_public_identifier_single_quoted = ->
4263                 c = txt.charAt(cur++)
4264                 if c is "'"
4265                         tok_state = tok_state_after_doctype_public_identifier
4266                         return
4267                 if c is "\u0000"
4268                         parse_error()
4269                         tok_cur_tag.public_identifier += "\ufffd"
4270                         return
4271                 if c is '>'
4272                         parse_error()
4273                         tok_cur_tag.flag 'force-quirks', true
4274                         tok_state = tok_state_data
4275                         return tok_cur_tag
4276                 if c is '' # EOF
4277                         parse_error()
4278                         tok_state = tok_state_data
4279                         tok_cur_tag.flag 'force-quirks', true
4280                         cur -= 1 # Reconsume
4281                         return tok_cur_tag
4282                 # Anything else
4283                 tok_cur_tag.public_identifier += c
4284                 return null
4285
4286         # 8.2.4.60 http://www.w3.org/TR/html5/syntax.html#after-doctype-public-identifier-state
4287         tok_state_after_doctype_public_identifier = ->
4288                 c = txt.charAt(cur++)
4289                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4290                         tok_state = tok_state_between_doctype_public_and_system_identifiers
4291                         return
4292                 if c is '>'
4293                         tok_state = tok_state_data
4294                         return tok_cur_tag
4295                 if c is '"'
4296                         parse_error()
4297                         tok_cur_tag.system_identifier = ''
4298                         tok_state = tok_state_doctype_system_identifier_double_quoted
4299                         return
4300                 if c is "'"
4301                         parse_error()
4302                         tok_cur_tag.system_identifier = ''
4303                         tok_state = tok_state_doctype_system_identifier_single_quoted
4304                         return
4305                 if c is '' # EOF
4306                         parse_error()
4307                         tok_state = tok_state_data
4308                         tok_cur_tag.flag 'force-quirks', true
4309                         cur -= 1 # Reconsume
4310                         return tok_cur_tag
4311                 # Anything else
4312                 parse_error()
4313                 tok_cur_tag.flag 'force-quirks', true
4314                 tok_state = tok_state_bogus_doctype
4315                 return null
4316
4317         # 8.2.4.61 http://www.w3.org/TR/html5/syntax.html#between-doctype-public-and-system-identifiers-state
4318         tok_state_between_doctype_public_and_system_identifiers = ->
4319                 c = txt.charAt(cur++)
4320                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4321                         return
4322                 if c is '>'
4323                         tok_state = tok_state_data
4324                         return tok_cur_tag
4325                 if c is '"'
4326                         parse_error()
4327                         tok_cur_tag.system_identifier = ''
4328                         tok_state = tok_state_doctype_system_identifier_double_quoted
4329                         return
4330                 if c is "'"
4331                         parse_error()
4332                         tok_cur_tag.system_identifier = ''
4333                         tok_state = tok_state_doctype_system_identifier_single_quoted
4334                         return
4335                 if c is '' # EOF
4336                         parse_error()
4337                         tok_state = tok_state_data
4338                         tok_cur_tag.flag 'force-quirks', true
4339                         cur -= 1 # Reconsume
4340                         return tok_cur_tag
4341                 # Anything else
4342                 parse_error()
4343                 tok_cur_tag.flag 'force-quirks', true
4344                 tok_state = tok_state_bogus_doctype
4345                 return null
4346
4347         # 8.2.4.62 http://www.w3.org/TR/html5/syntax.html#after-doctype-system-keyword-state
4348         tok_state_after_doctype_system_keyword = ->
4349                 c = txt.charAt(cur++)
4350                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4351                         tok_state = tok_state_before_doctype_system_identifier
4352                         return
4353                 if c is '"'
4354                         parse_error()
4355                         tok_cur_tag.system_identifier = ''
4356                         tok_state = tok_state_doctype_system_identifier_double_quoted
4357                         return
4358                 if c is "'"
4359                         parse_error()
4360                         tok_cur_tag.system_identifier = ''
4361                         tok_state = tok_state_doctype_system_identifier_single_quoted
4362                         return
4363                 if c is '>'
4364                         parse_error()
4365                         tok_cur_tag.flag 'force-quirks', true
4366                         tok_state = tok_state_data
4367                         return tok_cur_tag
4368                 if c is '' # EOF
4369                         parse_error()
4370                         tok_state = tok_state_data
4371                         tok_cur_tag.flag 'force-quirks', true
4372                         cur -= 1 # Reconsume
4373                         return tok_cur_tag
4374                 # Anything else
4375                 parse_error()
4376                 tok_cur_tag.flag 'force-quirks', true
4377                 tok_state = tok_state_bogus_doctype
4378                 return null
4379
4380         # 8.2.4.63 http://www.w3.org/TR/html5/syntax.html#before-doctype-system-identifier-state
4381         tok_state_before_doctype_system_identifier = ->
4382                 c = txt.charAt(cur++)
4383                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4384                         return
4385                 if c is '"'
4386                         tok_cur_tag.system_identifier = ''
4387                         tok_state = tok_state_doctype_system_identifier_double_quoted
4388                         return
4389                 if c is "'"
4390                         tok_cur_tag.system_identifier = ''
4391                         tok_state = tok_state_doctype_system_identifier_single_quoted
4392                         return
4393                 if c is '>'
4394                         parse_error()
4395                         tok_cur_tag.flag 'force-quirks', true
4396                         tok_state = tok_state_data
4397                         return tok_cur_tag
4398                 if c is '' # EOF
4399                         parse_error()
4400                         tok_state = tok_state_data
4401                         tok_cur_tag.flag 'force-quirks', true
4402                         cur -= 1 # Reconsume
4403                         return tok_cur_tag
4404                 # Anything else
4405                 parse_error()
4406                 tok_cur_tag.flag 'force-quirks', true
4407                 tok_state = tok_state_bogus_doctype
4408                 return null
4409
4410         # 8.2.4.64 http://www.w3.org/TR/html5/syntax.html#doctype-system-identifier-(double-quoted)-state
4411         tok_state_doctype_system_identifier_double_quoted = ->
4412                 c = txt.charAt(cur++)
4413                 if c is '"'
4414                         tok_state = tok_state_after_doctype_system_identifier
4415                         return
4416                 if c is "\u0000"
4417                         parse_error()
4418                         tok_cur_tag.system_identifier += "\ufffd"
4419                         return
4420                 if c is '>'
4421                         parse_error()
4422                         tok_cur_tag.flag 'force-quirks', true
4423                         tok_state = tok_state_data
4424                         return tok_cur_tag
4425                 if c is '' # EOF
4426                         parse_error()
4427                         tok_state = tok_state_data
4428                         tok_cur_tag.flag 'force-quirks', true
4429                         cur -= 1 # Reconsume
4430                         return tok_cur_tag
4431                 # Anything else
4432                 tok_cur_tag.system_identifier += c
4433                 return null
4434
4435         # 8.2.4.65 http://www.w3.org/TR/html5/syntax.html#doctype-system-identifier-(single-quoted)-state
4436         tok_state_doctype_system_identifier_single_quoted = ->
4437                 c = txt.charAt(cur++)
4438                 if c is "'"
4439                         tok_state = tok_state_after_doctype_system_identifier
4440                         return
4441                 if c is "\u0000"
4442                         parse_error()
4443                         tok_cur_tag.system_identifier += "\ufffd"
4444                         return
4445                 if c is '>'
4446                         parse_error()
4447                         tok_cur_tag.flag 'force-quirks', true
4448                         tok_state = tok_state_data
4449                         return tok_cur_tag
4450                 if c is '' # EOF
4451                         parse_error()
4452                         tok_state = tok_state_data
4453                         tok_cur_tag.flag 'force-quirks', true
4454                         cur -= 1 # Reconsume
4455                         return tok_cur_tag
4456                 # Anything else
4457                 tok_cur_tag.system_identifier += c
4458                 return null
4459
4460         # 8.2.4.66 http://www.w3.org/TR/html5/syntax.html#after-doctype-system-identifier-state
4461         tok_state_after_doctype_system_identifier = ->
4462                 c = txt.charAt(cur++)
4463                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4464                         return
4465                 if c is '>'
4466                         tok_state = tok_state_data
4467                         return tok_cur_tag
4468                 if c is '' # EOF
4469                         parse_error()
4470                         tok_state = tok_state_data
4471                         tok_cur_tag.flag 'force-quirks', true
4472                         cur -= 1 # Reconsume
4473                         return tok_cur_tag
4474                 # Anything else
4475                 parse_error()
4476                 # do _not_ tok_cur_tag.flag 'force-quirks', true
4477                 tok_state = tok_state_bogus_doctype
4478                 return null
4479
4480         # 8.2.4.67 http://www.w3.org/TR/html5/syntax.html#bogus-doctype-state
4481         tok_state_bogus_doctype = ->
4482                 c = txt.charAt(cur++)
4483                 if c is '>'
4484                         tok_state = tok_state_data
4485                         return tok_cur_tag
4486                 if c is '' # EOF
4487                         tok_state = tok_state_data
4488                         cur -= 1 # Reconsume
4489                         return tok_cur_tag
4490                 # Anything else
4491                 return null
4492
4493         # 8.2.4.68 http://www.w3.org/TR/html5/syntax.html#cdata-section-state
4494         tok_state_cdata_section = ->
4495                 tok_state = tok_state_data
4496                 next_gt = txt.indexOf ']]>', cur
4497                 if next_gt is -1
4498                         val = txt.substr cur
4499                         cur = txt.length
4500                 else
4501                         val = txt.substr cur, (next_gt - cur)
4502                         cur = next_gt + 3
4503                 val = val.replace(new RegExp("\u0000", 'g'), "\ufffd")
4504                 if val.length > 0
4505                         return new_character_token val # fixfull split
4506                 return null
4507
4508         # 8.2.4.69 http://www.w3.org/TR/html5/syntax.html#consume-a-character-reference
4509         # Don't set this as a state, just call it
4510         # returns a string (NOT a text node)
4511         parse_character_reference = (allowed_char = null, in_attr = false) ->
4512                 if cur >= txt.length
4513                         return '&'
4514                 switch c = txt.charAt(cur)
4515                         when "\t", "\n", "\u000c", ' ', '<', '&', '', allowed_char
4516                                 # explicitly not a parse error
4517                                 return '&'
4518                         when ';'
4519                                 # there has to be "one or more" alnums between & and ; to be a parse error
4520                                 return '&'
4521                         when '#'
4522                                 if cur + 1 >= txt.length
4523                                         return '&'
4524                                 if txt.charAt(cur + 1).toLowerCase() is 'x'
4525                                         base = 16
4526                                         charset = hex_chars
4527                                         start = cur + 2
4528                                 else
4529                                         charset = digits
4530                                         start = cur + 1
4531                                         base = 10
4532                                 i = 0
4533                                 while start + i < txt.length and charset.indexOf(txt.charAt(start + i)) > -1
4534                                         i += 1
4535                                 if i is 0
4536                                         return '&'
4537                                 cur = start + i
4538                                 if txt.charAt(start + i) is ';'
4539                                         cur += 1
4540                                 else
4541                                         parse_error()
4542                                 code_point = txt.substr(start, i)
4543                                 while code_point.charAt(0) is '0' and code_point.length > 1
4544                                         code_point = code_point.substr 1
4545                                 code_point = parseInt(code_point, base)
4546                                 if unicode_fixes[code_point]?
4547                                         parse_error()
4548                                         return unicode_fixes[code_point]
4549                                 else
4550                                         if (code_point >= 0xd800 and code_point <= 0xdfff) or code_point > 0x10ffff
4551                                                 parse_error()
4552                                                 return "\ufffd"
4553                                         else
4554                                                 if (code_point >= 0x0001 and code_point <= 0x0008) or (code_point >= 0x000D and code_point <= 0x001F) or (code_point >= 0x007F and code_point <= 0x009F) or (code_point >= 0xFDD0 and code_point <= 0xFDEF) or code_point is 0x000B or code_point is 0xFFFE or code_point is 0xFFFF or code_point is 0x1FFFE or code_point is 0x1FFFF or code_point is 0x2FFFE or code_point is 0x2FFFF or code_point is 0x3FFFE or code_point is 0x3FFFF or code_point is 0x4FFFE or code_point is 0x4FFFF or code_point is 0x5FFFE or code_point is 0x5FFFF or code_point is 0x6FFFE or code_point is 0x6FFFF or code_point is 0x7FFFE or code_point is 0x7FFFF or code_point is 0x8FFFE or code_point is 0x8FFFF or code_point is 0x9FFFE or code_point is 0x9FFFF or code_point is 0xAFFFE or code_point is 0xAFFFF or code_point is 0xBFFFE or code_point is 0xBFFFF or code_point is 0xCFFFE or code_point is 0xCFFFF or code_point is 0xDFFFE or code_point is 0xDFFFF or code_point is 0xEFFFE or code_point is 0xEFFFF or code_point is 0xFFFFE or code_point is 0xFFFFF or code_point is 0x10FFFE or code_point is 0x10FFFF
4555                                                         parse_error()
4556                                                 return from_code_point code_point
4557                                 return
4558                         else
4559                                 for i in [0...31]
4560                                         if alnum.indexOf(txt.charAt(cur + i)) is -1
4561                                                 break
4562                                 if i is 0
4563                                         # exit early, because parse_error() below needs at least one alnum
4564                                         return '&'
4565                                 if txt.charAt(cur + i) is ';'
4566                                         decoded = decode_named_char_ref txt.substr(cur, i)
4567                                         i += 1 # scan past the ';' (after, so we dno't pass it to decode)
4568                                         if decoded?
4569                                                 cur += i
4570                                                 return decoded
4571                                         # else FALL THROUGH (check for match without last char(s) or ";")
4572                                 # no ';' terminator (only legacy char refs)
4573                                 max = i
4574                                 for i in [2..max] # no prefix matches, so ok to check shortest first
4575                                         c = legacy_char_refs[txt.substr(cur, i)]
4576                                         if c?
4577                                                 if in_attr
4578                                                         if txt.charAt(cur + i) is '='
4579                                                                 # "because some legacy user agents will
4580                                                                 # misinterpret the markup in those cases"
4581                                                                 parse_error()
4582                                                                 return '&'
4583                                                         if alnum.indexOf(txt.charAt(cur + i)) > -1
4584                                                                 # this makes attributes forgiving about url args
4585                                                                 return '&'
4586                                                 # ok, and besides the weird exceptions for attributes...
4587                                                 # return the matching char
4588                                                 cur += i # consume entity chars
4589                                                 parse_error() # because no terminating ";"
4590                                                 return c
4591                                 parse_error()
4592                                 return '&'
4593                 return # never reached
4594
4595         eat_next_token_if_newline = ->
4596                 old_cur = cur
4597                 t = null
4598                 until t?
4599                         t = tok_state()
4600                 if t.type is TYPE_TEXT
4601                         # definition of a newline depends on whether it was a character ref or not
4602                         if cur - old_cur is 1
4603                                 # not a character reference
4604                                 if t.text is "\u000d" or t.text is "\u000a"
4605                                         return
4606                         else
4607                                 if t.text is "\u000a"
4608                                         return
4609                 # not a "newline"
4610                 cur = old_cur
4611                 return
4612
4613         # tree constructor initialization
4614         # see comments on TYPE_TAG/etc for the structure of this data
4615         txt = args_html
4616         cur = 0
4617         doc = new Node TYPE_TAG, name: 'document', namespace: NS_HTML
4618         doc.flag 'quirks mode', QUIRKS_NO # TODO bugreport spec for not specifying this
4619         fragment_root = null # fragment parsing algorithm returns children of this
4620         open_els = []
4621         afe = [] # active formatting elements
4622         template_ins_modes = []
4623         ins_mode = ins_mode_initial
4624         original_ins_mode = ins_mode # TODO check spec
4625         flag_scripting = args.scripting ? true # TODO might need an extra flag to get <noscript> to parse correctly
4626         flag_frameset_ok = true
4627         flag_parsing = true
4628         flag_foster_parenting = false
4629         form_element_pointer = null
4630         temporary_buffer = null
4631         pending_table_character_tokens = []
4632         head_element_pointer = null
4633         flag_fragment_parsing = false
4634         context_element = null
4635         prev_node_id = 0 # just for debugging
4636
4637         # tokenizer initialization
4638         tok_state = tok_state_data
4639
4640         parse_init = ->
4641                 # fragment parsing (text arg)
4642                 if args.fragment?
4643                         # this handles the fragment from the tests in the format described here:
4644                         # https://github.com/html5lib/html5lib-tests/blob/master/tree-construction/README.md
4645                         f = args.fragment
4646                         ns = NS_HTML
4647                         if f.substr(0, 5) is 'math '
4648                                 f = f.substr 5
4649                                 ns = NS_MATHML
4650                         else if f.substr(0, 4) is 'svg '
4651                                 f = f.substr 4
4652                                 ns = NS_SVG
4653                         t = new_open_tag f
4654                         context_element = token_to_element t, ns
4655                         context_element.document = new Node TYPE_TAG, name: 'document', namespace: NS_HTML
4656                         context_element.document.flag 'quirks mode', QUIRKS_NO
4657                 # fragment parsing (Node arg)
4658                 if args.context?
4659                         context_element = args.context
4660
4661                 # http://www.w3.org/TR/html5/syntax.html#parsing-html-fragments
4662                 # fragment parsing algorithm
4663                 if context_element?
4664                         flag_fragment_parsing = true
4665                         doc = new Node TYPE_TAG, name: 'html', namespace: NS_HTML
4666                         # search up the tree from context, to try to find it's document,
4667                         # because this file only puts a "document" property on the root
4668                         # element.
4669                         old_doc = null
4670                         el = context_element
4671                         loop
4672                                 if el.document?
4673                                         old_doc = el.document
4674                                         break
4675                                 if el.parent
4676                                         el = el.parent
4677                                 else
4678                                         break
4679                         if old_doc
4680                                 doc.flag 'quirks mode', old_doc.flag 'quirks mode'
4681                         # set tok_state
4682                         if context_element.namespace is NS_HTML
4683                                 switch context_element.name
4684                                         when 'title', 'textarea'
4685                                                 tok_state = tok_state_rcdata
4686                                         when 'style', 'xmp', 'iframe', 'noembed', 'noframes'
4687                                                 tok_state = tok_state_rawtext
4688                                         when 'script'
4689                                                 tok_state = tok_state_script_data
4690                                         when 'noscript'
4691                                                 if flag_scripting
4692                                                         tok_state = tok_state_rawtext
4693                                         when 'plaintext'
4694                                                 tok_state = tok_state_plaintext
4695                         fragment_root = new Node TYPE_TAG, name: 'html', namespace: NS_HTML
4696                         doc.children.push fragment_root
4697                         fragment_root.document = doc
4698                         open_els = [fragment_root]
4699                         if context_element.name is 'template' and context_element.namespace is NS_HTML
4700                                 template_ins_modes.unshift ins_mode_in_template
4701                         # fixfull create token for context (it should have it's original one already)
4702                         reset_ins_mode()
4703                         # set form_element pointer... in the foreign doc?!
4704                         el = context_element
4705                         loop
4706                                 if el.name is 'form' and el.namespace is NS_HTML
4707                                         form_element_pointer = el
4708                                         break
4709                                 if el.parent
4710                                         el = el.parent
4711                                 else
4712                                         break
4713
4714                 # text pre-processing
4715                 # FIXME check http://www.w3.org/TR/html5/syntax.html#preprocessing-the-input-stream
4716                 txt = txt.replace(new RegExp("\r\n", 'g'), "\n") # fixfull spec doesn't say this
4717                 txt = txt.replace(new RegExp("\r", 'g'), "\n") # fixfull spec doesn't say this
4718
4719                 return
4720
4721         # http://www.w3.org/TR/html5/syntax.html#tree-construction
4722         parse_main_loop = ->
4723                 while flag_parsing
4724                         t = tok_state()
4725                         if t?
4726                                 process_token t
4727                                 # fixfull parse error if has self-closing flag, but it wasn't acknolwedged
4728                 return
4729         parse_init()
4730         parse_main_loop()
4731
4732         if flag_fragment_parsing
4733                 return fragment_root.children
4734         return doc.children
4735
4736 exports.parse_html = parse_html
4737 exports.debug_log_reset = debug_log_reset
4738 exports.debug_log_each = debug_log_each
4739 exports.TYPE_TAG = TYPE_TAG
4740 exports.TYPE_TEXT = TYPE_TEXT
4741 exports.TYPE_COMMENT = TYPE_COMMENT
4742 exports.TYPE_DOCTYPE = TYPE_DOCTYPE
4743 exports.NS_HTML = NS_HTML
4744 exports.NS_MATHML = NS_MATHML
4745 exports.NS_SVG = NS_SVG
4746 exports.QUIRKS_NO = QUIRKS_NO
4747 exports.QUIRKS_LIMITED = QUIRKS_LIMITED
4748 exports.QUIRKS_YES = QUIRKS_YES