JasonWoof Got questions, comments, patches, etc.? Contact Jason Woofenden
handle nulls properly
[peach-html5-editor.git] / parse-html.coffee
1 # HTML parser meant to run in a browser, in support of WYSIWYG editor
2 # Copyright 2015 Jason Woofenden
3 #
4 # This program is free software: you can redistribute it and/or modify it under
5 # the terms of the GNU Affero General Public License as published by the Free
6 # Software Foundation, either version 3 of the License, or (at your option) any
7 # later version.
8 #
9 # This program is distributed in the hope that it will be useful, but WITHOUT
10 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11 # FOR A PARTICULAR PURPOSE.  See the GNU Affero General Public License for more
12 # details.
13 #
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
16
17
18 # This file implements a parser for html snippets, meant to be used by a
19 # WYSIWYG editor.
20
21 # The implementation is a pretty direct implementation of the parsing algorithm
22 # described here:
23 # http://www.w3.org/TR/html5/syntax.html#preprocessing-the-input-stream
24 #
25 # Deviations from that spec:
26 #
27 #   Purposeful: search this file for "WHATWG"
28 #
29 #   Not finished yet: search this file for "fixfull", "TODO" and "FIXME"
30
31
32 # stacks/lists
33 #
34 # the spec uses a many different words do indicate which ends of lists/stacks
35 # they are talking about (and relative movement within the lists/stacks). This
36 # section splains. I'm implementing "lists" (afe and open_els) the same way
37 # (both as stacks)
38 #
39 # stacks grow downward (current element is index=0)
40 #
41 # example: open_els = [a, b, c, d, e, f, g]
42 #
43 # "grows downwards" means it's visualized like this: (index: el, names)
44 #
45 #   6: g "start of the list", "topmost", "first"
46 #   5: f
47 #   4: e "previous" (to d), "above", "before"
48 #   3: d   (previous/next are relative to this element)
49 #   2: c "next", "after", "lower", "below"
50 #   1: b
51 #   0: a "end of the list", "current node", "bottommost", "last"
52
53
54 # browser
55 # note: to get this to run outside a browser, you'll have to write a native
56 # implementation of decode_named_char_ref()
57 unless module?.exports?
58         window.wheic = {}
59         module = exports: window.wheic
60
61 from_code_point = (x) ->
62         if String.fromCodePoint?
63                 return String.fromCodePoint x
64         else
65                 if x <= 0xffff
66                         return String.fromCharCode x
67                 x -= 0x10000
68                 return String.fromCharCode((x >> 10) + 0xd800, (x % 0x400) + 0xdc00)
69
70 # Each node is an obect of the Node class. Here are the Node types:
71 TYPE_TAG = 0 # name, {attributes}, [children]
72 TYPE_TEXT = 1 # "text"
73 TYPE_COMMENT = 2
74 TYPE_DOCTYPE = 3
75 # the following types are emited by the tokenizer, but shouldn't end up in the tree:
76 TYPE_START_TAG = 4 # name, [attributes ([key,value]...) in reverse order], [children]
77 TYPE_END_TAG = 5 # name
78 TYPE_EOF = 6
79 TYPE_AFE_MARKER = 7 # http://www.w3.org/TR/html5/syntax.html#reconstruct-the-active-formatting-elements
80 TYPE_AAA_BOOKMARK = 8 # http://www.w3.org/TR/html5/syntax.html#adoption-agency-algorithm
81
82 # namespace constants
83 NS_HTML = 1
84 NS_MATHML = 2
85 NS_SVG = 3
86
87 # quirks mode constants
88 QUIRKS_NO = 1
89 QUIRKS_LIMITED = 2
90 QUIRKS_YES = 3
91
92 g_debug_log = []
93 debug_log_reset = ->
94         g_debug_log = []
95 debug_log = (str) ->
96         g_debug_log.push str
97 debug_log_each = (cb) ->
98         for str in g_debug_log
99                 cb str
100
101 prev_node_id = 0
102 class Node
103         constructor: (type, args = {}) ->
104                 @type = type # one of the TYPE_* constants above
105                 @name = args.name ? '' # tag name
106                 @text = args.text ? '' # contents for text/comment nodes
107                 @attrs = args.attrs ? {}
108                 @attrs_a = args.attr_k ? [] # attrs in progress, TYPE_START_TAG only
109                 @children = args.children ? []
110                 @namespace = args.namespace ? NS_HTML
111                 @parent = args.parent ? null
112                 @token = args.token ? null
113                 @flags = args.flags ? {}
114                 if args.id?
115                         @id = "#{args.id}+"
116                 else
117                         @id = "#{++prev_node_id}"
118         acknowledge_self_closing: ->
119                 if @token?
120                         @token.flag 'did_self_close', true
121                 else
122                         @flag 'did_self_close', true
123         flag: (key, value = null) ->
124                 if value?
125                         @flags[key] = value
126                 else
127                         return @flags[key]
128         serialize: (shallow = false, show_ids = false) -> # for unit tests
129                 ret = ''
130                 switch @type
131                         when TYPE_TAG
132                                 ret += 'tag:'
133                                 ret += JSON.stringify @name
134                                 ret += ','
135                                 if show_ids
136                                         ret += "##{@id},"
137                                 if shallow
138                                         break
139                                 attr_keys = []
140                                 for k of @attrs
141                                         attr_keys.push k
142                                 attr_keys.sort()
143                                 ret += '{'
144                                 sep = ''
145                                 for k in attr_keys
146                                         ret += sep
147                                         sep = ','
148                                         ret += "#{JSON.stringify k}:#{JSON.stringify @attrs[k]}"
149                                 ret += '},['
150                                 sep = ''
151                                 for c in @children
152                                         ret += sep
153                                         sep = ','
154                                         ret += c.serialize shallow, show_ids
155                                 ret += ']'
156                         when TYPE_TEXT
157                                 ret += 'text:'
158                                 ret += JSON.stringify @text
159                         when TYPE_COMMENT
160                                 ret += 'comment:'
161                                 ret += JSON.stringify @text
162                         when TYPE_DOCTYPE
163                                 ret += "doctype:#{@name},#{JSON.stringify(@public_identifier ? '')},#{JSON.stringify(@system_identifier ? '')}"
164                         when TYPE_AFE_MARKER
165                                 ret += 'marker'
166                         when TYPE_AAA_BOOKMARK
167                                 ret += 'aaa_bookmark'
168                         else
169                                 ret += 'unknown:'
170                                 console.log "unknown: #{JSON.stringify @}" # backtrace is just as well
171                 return ret
172
173 # helpers: (only take args that are normally known when parser creates nodes)
174 new_open_tag = (name) ->
175         return new Node TYPE_START_TAG, name: name
176 new_end_tag = (name) ->
177         return new Node TYPE_END_TAG, name: name
178 new_element = (name) ->
179         return new Node TYPE_TAG, name: name
180 new_text_node = (txt) ->
181         return new Node TYPE_TEXT, text: txt
182 new_character_token = new_text_node
183 new_comment_token = (txt) ->
184         return new Node TYPE_COMMENT, text: txt
185 new_doctype_token = (name) ->
186         return new Node TYPE_DOCTYPE, name: name
187 new_eof_token = ->
188         return new Node TYPE_EOF
189 new_afe_marker = ->
190         return new Node TYPE_AFE_MARKER
191 new_aaa_bookmark = ->
192         return new Node TYPE_AAA_BOOKMARK
193
194 lc_alpha = "abcdefghijklmnopqrstuvwxyz"
195 uc_alpha = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
196 digits = "0123456789"
197 alnum = lc_alpha + uc_alpha + digits
198 hex_chars = digits + "abcdefABCDEF"
199
200 is_uc_alpha = (str) ->
201         return str.length is 1 and uc_alpha.indexOf(str) > -1
202 is_lc_alpha = (str) ->
203         return str.length is 1 and lc_alpha.indexOf(str) > -1
204
205 # some SVG elements have dashes in them
206 tag_name_chars = alnum + "-"
207
208 # http://www.w3.org/TR/html5/infrastructure.html#space-character
209 space_chars = "\u0009\u000a\u000c\u000d\u0020"
210 is_space = (txt) ->
211         return txt.length is 1 and space_chars.indexOf(txt) > -1
212 is_space_tok = (t) ->
213         return t.type is TYPE_TEXT && t.text.length is 1 and space_chars.indexOf(t.text) > -1
214
215 is_input_hidden_tok = (t) ->
216         return false unless t.type is TYPE_START_TAG
217         for a in t.attrs_a
218                 if a[0] is 'type'
219                         if a[1].toLowerCase() is 'hidden'
220                                 return true
221                         return false
222         return false
223
224 # https://en.wikipedia.org/wiki/Whitespace_character#Unicode
225 whitespace_chars = "\u0009\u000a\u000b\u000c\u000d\u0020\u0085\u00a0\u1680\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200a\u2028\u2029\u202f\u205f\u3000"
226
227 unicode_fixes = {}
228 unicode_fixes[0x00] = "\uFFFD"
229 unicode_fixes[0x80] = "\u20AC"
230 unicode_fixes[0x82] = "\u201A"
231 unicode_fixes[0x83] = "\u0192"
232 unicode_fixes[0x84] = "\u201E"
233 unicode_fixes[0x85] = "\u2026"
234 unicode_fixes[0x86] = "\u2020"
235 unicode_fixes[0x87] = "\u2021"
236 unicode_fixes[0x88] = "\u02C6"
237 unicode_fixes[0x89] = "\u2030"
238 unicode_fixes[0x8A] = "\u0160"
239 unicode_fixes[0x8B] = "\u2039"
240 unicode_fixes[0x8C] = "\u0152"
241 unicode_fixes[0x8E] = "\u017D"
242 unicode_fixes[0x91] = "\u2018"
243 unicode_fixes[0x92] = "\u2019"
244 unicode_fixes[0x93] = "\u201C"
245 unicode_fixes[0x94] = "\u201D"
246 unicode_fixes[0x95] = "\u2022"
247 unicode_fixes[0x96] = "\u2013"
248 unicode_fixes[0x97] = "\u2014"
249 unicode_fixes[0x98] = "\u02DC"
250 unicode_fixes[0x99] = "\u2122"
251 unicode_fixes[0x9A] = "\u0161"
252 unicode_fixes[0x9B] = "\u203A"
253 unicode_fixes[0x9C] = "\u0153"
254 unicode_fixes[0x9E] = "\u017E"
255 unicode_fixes[0x9F] = "\u0178"
256
257 quirks_yes_pi_prefixes = [
258         "+//silmaril//dtd html pro v0r11 19970101//"
259         "-//as//dtd html 3.0 aswedit + extensions//"
260         "-//advasoft ltd//dtd html 3.0 aswedit + extensions//"
261         "-//ietf//dtd html 2.0 level 1//"
262         "-//ietf//dtd html 2.0 level 2//"
263         "-//ietf//dtd html 2.0 strict level 1//"
264         "-//ietf//dtd html 2.0 strict level 2//"
265         "-//ietf//dtd html 2.0 strict//"
266         "-//ietf//dtd html 2.0//"
267         "-//ietf//dtd html 2.1e//"
268         "-//ietf//dtd html 3.0//"
269         "-//ietf//dtd html 3.2 final//"
270         "-//ietf//dtd html 3.2//"
271         "-//ietf//dtd html 3//"
272         "-//ietf//dtd html level 0//"
273         "-//ietf//dtd html level 1//"
274         "-//ietf//dtd html level 2//"
275         "-//ietf//dtd html level 3//"
276         "-//ietf//dtd html strict level 0//"
277         "-//ietf//dtd html strict level 1//"
278         "-//ietf//dtd html strict level 2//"
279         "-//ietf//dtd html strict level 3//"
280         "-//ietf//dtd html strict//"
281         "-//ietf//dtd html//"
282         "-//metrius//dtd metrius presentational//"
283         "-//microsoft//dtd internet explorer 2.0 html strict//"
284         "-//microsoft//dtd internet explorer 2.0 html//"
285         "-//microsoft//dtd internet explorer 2.0 tables//"
286         "-//microsoft//dtd internet explorer 3.0 html strict//"
287         "-//microsoft//dtd internet explorer 3.0 html//"
288         "-//microsoft//dtd internet explorer 3.0 tables//"
289         "-//netscape comm. corp.//dtd html//"
290         "-//netscape comm. corp.//dtd strict html//"
291         "-//o'reilly and associates//dtd html 2.0//"
292         "-//o'reilly and associates//dtd html extended 1.0//"
293         "-//o'reilly and associates//dtd html extended relaxed 1.0//"
294         "-//sq//dtd html 2.0 hotmetal + extensions//"
295         "-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//"
296         "-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//"
297         "-//spyglass//dtd html 2.0 extended//"
298         "-//sun microsystems corp.//dtd hotjava html//"
299         "-//sun microsystems corp.//dtd hotjava strict html//"
300         "-//w3c//dtd html 3 1995-03-24//"
301         "-//w3c//dtd html 3.2 draft//"
302         "-//w3c//dtd html 3.2 final//"
303         "-//w3c//dtd html 3.2//"
304         "-//w3c//dtd html 3.2s draft//"
305         "-//w3c//dtd html 4.0 frameset//"
306         "-//w3c//dtd html 4.0 transitional//"
307         "-//w3c//dtd html experimental 19960712//"
308         "-//w3c//dtd html experimental 970421//"
309         "-//w3c//dtd w3 html//"
310         "-//w3o//dtd w3 html 3.0//"
311         "-//webtechs//dtd mozilla html 2.0//"
312         "-//webtechs//dtd mozilla html//"
313 ]
314
315 # These are the character references that don't need a terminating semicolon
316 # min length: 2, max: 6, none are a prefix of any other.
317 legacy_char_refs = {
318         Aacute: 'Á', aacute: 'á', Acirc: 'Â', acirc: 'â', acute: '´', AElig: 'Æ',
319         aelig: 'æ', Agrave: 'À', agrave: 'à', AMP: '&', amp: '&', Aring: 'Å',
320         aring: 'å', Atilde: 'Ã', atilde: 'ã', Auml: 'Ä', auml: 'ä', brvbar: '¦',
321         Ccedil: 'Ç', ccedil: 'ç', cedil: '¸', cent: '¢', COPY: '©', copy: '©',
322         curren: '¤', deg: '°', divide: '÷', Eacute: 'É', eacute: 'é', Ecirc: 'Ê',
323         ecirc: 'ê', Egrave: 'È', egrave: 'è', ETH: 'Ð', eth: 'ð', Euml: 'Ë',
324         euml: 'ë', frac12: '½', frac14: '¼', frac34: '¾', GT: '>', gt: '>',
325         Iacute: 'Í', iacute: 'í', Icirc: 'Î', icirc: 'î', iexcl: '¡', Igrave: 'Ì',
326         igrave: 'ì', iquest: '¿', Iuml: 'Ï', iuml: 'ï', laquo: '«', LT: '<',
327         lt: '<', macr: '¯', micro: 'µ', middot: '·', nbsp: "\u00a0", not: '¬',
328         Ntilde: 'Ñ', ntilde: 'ñ', Oacute: 'Ó', oacute: 'ó', Ocirc: 'Ô', ocirc: 'ô',
329         Ograve: 'Ò', ograve: 'ò', ordf: 'ª', ordm: 'º', Oslash: 'Ø', oslash: 'ø',
330         Otilde: 'Õ', otilde: 'õ', Ouml: 'Ö', ouml: 'ö', para: '¶', plusmn: '±',
331         pound: '£', QUOT: '"', quot: '"', raquo: '»', REG: '®', reg: '®', sect: '§',
332         shy: '­', sup1: '¹', sup2: '²', sup3: '³', szlig: 'ß', THORN: 'Þ', thorn: 'þ',
333         times: '×', Uacute: 'Ú', uacute: 'ú', Ucirc: 'Û', ucirc: 'û', Ugrave: 'Ù',
334         ugrave: 'ù', uml: '¨', Uuml: 'Ü', uuml: 'ü', Yacute: 'Ý', yacute: 'ý',
335         yen: '¥', yuml: 'ÿ'
336 }
337
338 void_elements = ['area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input', 'keygen', 'link', 'meta', 'param', 'source', 'track', 'wbr']
339 raw_text_elements = ['script', 'style']
340 escapable_raw_text_elements = ['textarea', 'title']
341 # http://www.w3.org/TR/SVG/ 1.1 (Second Edition)
342 svg_elements = [
343         'a', 'altGlyph', 'altGlyphDef', 'altGlyphItem', 'animate', 'animateColor',
344         'animateMotion', 'animateTransform', 'circle', 'clipPath', 'color-profile',
345         'cursor', 'defs', 'desc', 'ellipse', 'feBlend', 'feColorMatrix',
346         'feComponentTransfer', 'feComposite', 'feConvolveMatrix',
347         'feDiffuseLighting', 'feDisplacementMap', 'feDistantLight', 'feFlood',
348         'feFuncA', 'feFuncB', 'feFuncG', 'feFuncR', 'feGaussianBlur', 'feImage',
349         'feMerge', 'feMergeNode', 'feMorphology', 'feOffset', 'fePointLight',
350         'feSpecularLighting', 'feSpotLight', 'feTile', 'feTurbulence', 'filter',
351         'font', 'font-face', 'font-face-format', 'font-face-name', 'font-face-src',
352         'font-face-uri', 'foreignObject', 'g', 'glyph', 'glyphRef', 'hkern',
353         'image', 'line', 'linearGradient', 'marker', 'mask', 'metadata',
354         'missing-glyph', 'mpath', 'path', 'pattern', 'polygon', 'polyline',
355         'radialGradient', 'rect', 'script', 'set', 'stop', 'style', 'svg',
356         'switch', 'symbol', 'text', 'textPath', 'title', 'tref', 'tspan', 'use',
357         'view', 'vkern'
358 ]
359
360 # http://www.w3.org/TR/MathML/ Version 3.0 2nd Edition
361 mathml_elements = [
362         'abs', 'and', 'annotation', 'annotation-xml', 'apply', 'approx', 'arccos',
363         'arccosh', 'arccot', 'arccoth', 'arccsc', 'arccsch', 'arcsec', 'arcsech',
364         'arcsin', 'arcsinh', 'arctan', 'arctanh', 'arg', 'bind', 'bvar', 'card',
365         'cartesianproduct', 'cbytes', 'ceiling', 'cerror', 'ci', 'cn', 'codomain',
366         'complexes', 'compose', 'condition', 'conjugate', 'cos', 'cosh', 'cot',
367         'coth', 'cs', 'csc', 'csch', 'csymbol', 'curl', 'declare', 'degree',
368         'determinant', 'diff', 'divergence', 'divide', 'domain',
369         'domainofapplication', 'emptyset', 'eq', 'equivalent', 'eulergamma',
370         'exists', 'exp', 'exponentiale', 'factorial', 'factorof', 'false', 'floor',
371         'fn', 'forall', 'gcd', 'geq', 'grad', 'gt', 'ident', 'image', 'imaginary',
372         'imaginaryi', 'implies', 'in', 'infinity', 'int', 'integers', 'intersect',
373         'interval', 'inverse', 'lambda', 'laplacian', 'lcm', 'leq', 'limit',
374         'list', 'ln', 'log', 'logbase', 'lowlimit', 'lt', 'maction', 'maligngroup',
375         'malignmark', 'math', 'matrix', 'matrixrow', 'max', 'mean', 'median',
376         'menclose', 'merror', 'mfenced', 'mfrac', 'mglyph', 'mi', 'mi', 'min',
377         'minus', 'mlabeledtr', 'mlongdiv', 'mmultiscripts', 'mn', 'mo', 'mode',
378         'moment', 'momentabout', 'mover', 'mpadded', 'mphantom', 'mprescripts',
379         'mroot', 'mrow', 'ms', 'mscarries', 'mscarry', 'msgroup', 'msline',
380         'mspace', 'msqrt', 'msrow', 'mstack', 'mstyle', 'msub', 'msubsup', 'msup',
381         'mtable', 'mtd', 'mtext', 'mtr', 'munder', 'munderover', 'naturalnumbers',
382         'neq', 'none', 'not', 'notanumber', 'notin', 'notprsubset', 'notsubset',
383         'or', 'otherwise', 'outerproduct', 'partialdiff', 'pi', 'piece',
384         'piecewise', 'plus', 'power', 'primes', 'product', 'prsubset', 'quotient',
385         'rationals', 'real', 'reals', 'reln', 'rem', 'root', 'scalarproduct',
386         'sdev', 'sec', 'sech', 'selector', 'semantics', 'sep', 'set', 'setdiff',
387         'share', 'sin', 'sinh', 'span', 'subset', 'sum', 'tan', 'tanh', 'tendsto',
388         'times', 'transpose', 'true', 'union', 'uplimit', 'variance', 'vector',
389         'vectorproduct', 'xor'
390 ]
391 # foreign_elements = [svg_elements..., mathml_elements...]
392 #normal_elements = All other allowed HTML elements are normal elements.
393
394 special_elements = {
395         # HTML:
396         address:NS_HTML, applet:NS_HTML, area:NS_HTML, article:NS_HTML,
397         aside:NS_HTML, base:NS_HTML, basefont:NS_HTML, bgsound:NS_HTML,
398         blockquote:NS_HTML, body:NS_HTML, br:NS_HTML, button:NS_HTML,
399         caption:NS_HTML, center:NS_HTML, col:NS_HTML, colgroup:NS_HTML, dd:NS_HTML,
400         details:NS_HTML, dir:NS_HTML, div:NS_HTML, dl:NS_HTML, dt:NS_HTML,
401         embed:NS_HTML, fieldset:NS_HTML, figcaption:NS_HTML, figure:NS_HTML,
402         footer:NS_HTML, form:NS_HTML, frame:NS_HTML, frameset:NS_HTML, h1:NS_HTML,
403         h2:NS_HTML, h3:NS_HTML, h4:NS_HTML, h5:NS_HTML, h6:NS_HTML, head:NS_HTML,
404         header:NS_HTML, hgroup:NS_HTML, hr:NS_HTML, html:NS_HTML, iframe:NS_HTML,
405         img:NS_HTML, input:NS_HTML, isindex:NS_HTML, li:NS_HTML, link:NS_HTML,
406         listing:NS_HTML, main:NS_HTML, marquee:NS_HTML,
407
408         menu:NS_HTML,menuitem:NS_HTML, # WHATWG adds these
409
410         meta:NS_HTML, nav:NS_HTML, noembed:NS_HTML, noframes:NS_HTML,
411         noscript:NS_HTML, object:NS_HTML, ol:NS_HTML, p:NS_HTML, param:NS_HTML,
412         plaintext:NS_HTML, pre:NS_HTML, script:NS_HTML, section:NS_HTML,
413         select:NS_HTML, source:NS_HTML, style:NS_HTML, summary:NS_HTML,
414         table:NS_HTML, tbody:NS_HTML, td:NS_HTML, template:NS_HTML,
415         textarea:NS_HTML, tfoot:NS_HTML, th:NS_HTML, thead:NS_HTML, title:NS_HTML,
416         tr:NS_HTML, track:NS_HTML, ul:NS_HTML, wbr:NS_HTML, xmp:NS_HTML,
417
418         # MathML:
419         mi:NS_MATHML, mo:NS_MATHML, mn:NS_MATHML, ms:NS_MATHML, mtext:NS_MATHML,
420         'annotation-xml':NS_MATHML,
421
422         # SVG:
423         foreignObject:NS_SVG, desc:NS_SVG, title:NS_SVG
424 }
425
426 formatting_elements = {
427          a: true, b: true, big: true, code: true, em: true, font: true, i: true,
428          nobr: true, s: true, small: true, strike: true, strong: true, tt: true,
429          u: true
430 }
431
432 mathml_text_integration = {
433         mi: NS_MATHML, mo: NS_MATHML, mn: NS_MATHML, ms: NS_MATHML, mtext: NS_MATHML
434 }
435 is_mathml_text_integration_point = (el) ->
436         return mathml_text_integration[el.name] is el.namespace
437 is_html_integration = (el) -> # DON'T PASS A TOKEN
438         if el.namespace is NS_MATHML
439                 if el.name is 'annotation-xml'
440                         if el.attrs.encoding?
441                                 if el.attrs.encoding.toLowerCase() is 'text/html'
442                                         return true
443                                 if el.attrs.encoding.toLowerCase() is 'application/xhtml+xml'
444                                         return true
445                 return false
446         if el.namespace is NS_SVG
447                 if el.name is 'foreignObject' or el.name is 'desc' or el.name is 'title'
448                         return true
449         return false
450
451 h_tags = {
452         h1:NS_HTML, h2:NS_HTML, h3:NS_HTML, h4:NS_HTML, h5:NS_HTML, h6:NS_HTML
453 }
454
455 foster_parenting_targets = {
456         table: NS_HTML
457         tbody: NS_HTML
458         tfoot: NS_HTML
459         thead: NS_HTML
460         tr: NS_HTML
461 }
462
463 end_tag_implied = {
464         dd: NS_HTML
465         dt: NS_HTML
466         li: NS_HTML
467         option: NS_HTML
468         optgroup: NS_HTML
469         p: NS_HTML
470         rb: NS_HTML
471         rp: NS_HTML
472         rt: NS_HTML
473         rtc: NS_HTML
474 }
475
476 el_is_special = (e) ->
477         return special_elements[e.name] is e.namespace
478
479 adp_els = { address: NS_HTML, div: NS_HTML, p: NS_HTML }
480 el_is_special_not_adp = (el) ->
481         return special_elements[el.name] is el.namespace and adp_els[el.name] isnt el.namespace
482
483 svg_name_fixes = {
484         altglyph: 'altGlyph'
485         altglyphdef: 'altGlyphDef'
486         altglyphitem: 'altGlyphItem'
487         animatecolor: 'animateColor'
488         animatemotion: 'animateMotion'
489         animatetransform: 'animateTransform'
490         clippath: 'clipPath'
491         feblend: 'feBlend'
492         fecolormatrix: 'feColorMatrix'
493         fecomponenttransfer: 'feComponentTransfer'
494         fecomposite: 'feComposite'
495         feconvolvematrix: 'feConvolveMatrix'
496         fediffuselighting: 'feDiffuseLighting'
497         fedisplacementmap: 'feDisplacementMap'
498         fedistantlight: 'feDistantLight'
499         fedropshadow: 'feDropShadow'
500         feflood: 'feFlood'
501         fefunca: 'feFuncA'
502         fefuncb: 'feFuncB'
503         fefuncg: 'feFuncG'
504         fefuncr: 'feFuncR'
505         fegaussianblur: 'feGaussianBlur'
506         feimage: 'feImage'
507         femerge: 'feMerge'
508         femergenode: 'feMergeNode'
509         femorphology: 'feMorphology'
510         feoffset: 'feOffset'
511         fepointlight: 'fePointLight'
512         fespecularlighting: 'feSpecularLighting'
513         fespotlight: 'feSpotLight'
514         fetile: 'feTile'
515         feturbulence: 'feTurbulence'
516         foreignobject: 'foreignObject'
517         glyphref: 'glyphRef'
518         lineargradient: 'linearGradient'
519         radialgradient: 'radialGradient'
520         textpath: 'textPath'
521 }
522 svg_attribute_fixes = {
523         attributename: 'attributeName'
524         attributetype: 'attributeType'
525         basefrequency: 'baseFrequency'
526         baseprofile: 'baseProfile'
527         calcmode: 'calcMode'
528         clippathunits: 'clipPathUnits'
529         contentscripttype: 'contentScriptType'
530         contentstyletype: 'contentStyleType'
531         diffuseconstant: 'diffuseConstant'
532         edgemode: 'edgeMode'
533         externalresourcesrequired: 'externalResourcesRequired'
534         # WHATWG removes this: filterres: 'filterRes'
535         filterunits: 'filterUnits'
536         glyphref: 'glyphRef'
537         gradienttransform: 'gradientTransform'
538         gradientunits: 'gradientUnits'
539         kernelmatrix: 'kernelMatrix'
540         kernelunitlength: 'kernelUnitLength'
541         keypoints: 'keyPoints'
542         keysplines: 'keySplines'
543         keytimes: 'keyTimes'
544         lengthadjust: 'lengthAdjust'
545         limitingconeangle: 'limitingConeAngle'
546         markerheight: 'markerHeight'
547         markerunits: 'markerUnits'
548         markerwidth: 'markerWidth'
549         maskcontentunits: 'maskContentUnits'
550         maskunits: 'maskUnits'
551         numoctaves: 'numOctaves'
552         pathlength: 'pathLength'
553         patterncontentunits: 'patternContentUnits'
554         patterntransform: 'patternTransform'
555         patternunits: 'patternUnits'
556         pointsatx: 'pointsAtX'
557         pointsaty: 'pointsAtY'
558         pointsatz: 'pointsAtZ'
559         preservealpha: 'preserveAlpha'
560         preserveaspectratio: 'preserveAspectRatio'
561         primitiveunits: 'primitiveUnits'
562         refx: 'refX'
563         refy: 'refY'
564         repeatcount: 'repeatCount'
565         repeatdur: 'repeatDur'
566         requiredextensions: 'requiredExtensions'
567         requiredfeatures: 'requiredFeatures'
568         specularconstant: 'specularConstant'
569         specularexponent: 'specularExponent'
570         spreadmethod: 'spreadMethod'
571         startoffset: 'startOffset'
572         stddeviation: 'stdDeviation'
573         stitchtiles: 'stitchTiles'
574         surfacescale: 'surfaceScale'
575         systemlanguage: 'systemLanguage'
576         tablevalues: 'tableValues'
577         targetx: 'targetX'
578         targety: 'targetY'
579         textlength: 'textLength'
580         viewbox: 'viewBox'
581         viewtarget: 'viewTarget'
582         xchannelselector: 'xChannelSelector'
583         ychannelselector: 'yChannelSelector'
584         zoomandpan: 'zoomAndPan'
585 }
586 foreign_attr_fixes = {
587         'xlink:actuate': 'xlink actuate'
588         'xlink:arcrole': 'xlink arcrole'
589         'xlink:href': 'xlink href'
590         'xlink:role': 'xlink role'
591         'xlink:show': 'xlink show'
592         'xlink:title': 'xlink title'
593         'xlink:type': 'xlink type'
594         'xml:base': 'xml base'
595         'xml:lang': 'xml lang'
596         'xml:space': 'xml space'
597         'xmlns': 'xmlns'
598         'xmlns:xlink': 'xmlns xlink'
599 }
600 adjust_mathml_attributes = (t) ->
601         for a in t.attrs_a
602                 if a[0] is 'definitionurl'
603                         a[0] = 'definitionURL'
604         return
605 adjust_svg_attributes = (t) ->
606         for a in t.attrs_a
607                 if svg_attribute_fixes[a[0]]?
608                         a[0] = svg_attribute_fixes[a[0]]
609         return
610 adjust_foreign_attributes = (t) ->
611         # fixfull
612         for a in t.attrs_a
613                 if foreign_attr_fixes[a[0]]?
614                         a[0] = foreign_attr_fixes[a[0]]
615         return
616
617 # decode_named_char_ref()
618 #
619 # The list of named character references is _huge_ so ask the browser to decode
620 # for us instead of wasting bandwidth/space on including the table here.
621 #
622 # Pass without the "&" but with the ";" examples:
623 #    for "&amp" pass "amp;"
624 #    for "&#x2032" pass "x2032;"
625 g_dncr = {
626         cache: {}
627         textarea: document.createElement('textarea')
628 }
629 # TODO test this in IE8
630 decode_named_char_ref = (txt) ->
631         txt = "&#{txt}"
632         decoded = g_dncr.cache[txt]
633         return decoded if decoded?
634         g_dncr.textarea.innerHTML = txt
635         decoded = g_dncr.textarea.value
636         return null if decoded is txt
637         return g_dncr.cache[txt] = decoded
638
639 parse_html = (args) ->
640         txt = null
641         cur = null # index of next char in txt to be parsed
642         # declare doc and tokenizer variables so they're in scope below
643         doc = null
644         open_els = null # stack of open elements
645         afe = null # active formatting elements
646         template_ins_modes = null
647         ins_mode = null
648         original_ins_mode = null
649         tok_state = null
650         tok_cur_tag = null # partially parsed tag
651         flag_scripting = null
652         flag_frameset_ok = null
653         flag_parsing = null
654         flag_foster_parenting = null
655         form_element_pointer = null
656         temporary_buffer = null
657         pending_table_character_tokens = null
658         head_element_pointer = null
659         flag_fragment_parsing = null
660         context_element = null
661
662         stop_parsing = ->
663                 flag_parsing = false
664
665         parse_error = ->
666                 if args.error_cb?
667                         args.error_cb cur
668                 else
669                         console.log "Parse error at character #{cur} of #{txt.length}"
670
671         # http://www.w3.org/TR/html5/syntax.html#push-onto-the-list-of-active-formatting-elements
672         # "Noah's Ark clause" but with three
673         afe_push = (new_el) ->
674                 matches = 0
675                 for el, i in afe
676                         if el.type is TYPE_AFE_MARKER
677                                 break
678                         if el.name is new_el.name and el.namespace is new_el.namespace
679                                 attrs_match = true
680                                 for k, v of el.attrs
681                                         unless new_el.attrs[k] is v
682                                                 attrs_match = false
683                                                 break
684                                 if attrs_match
685                                         for k, v of new_el.attrs
686                                                 unless el.attrs[k] is v
687                                                         attrs_match = false
688                                                         break
689                                 if attrs_match
690                                         matches += 1
691                                         if matches is 3
692                                                 afe.splice i, 1
693                                                 break
694                 afe.unshift new_el
695         afe_push_marker = ->
696                 afe.unshift new_afe_marker()
697
698         # the functions below impliment the Tree Contstruction algorithm
699         # http://www.w3.org/TR/html5/syntax.html#tree-construction
700
701         # But first... the helpers
702         template_tag_is_open = ->
703                 for el in open_els
704                         if el.name is 'template' and el.namespace is NS_HTML
705                                 return true
706                 return false
707         is_in_scope_x = (tag_name, scope, namespace) ->
708                 for el in open_els
709                         if el.name is tag_name and (namespace is null or namespace is el.namespace)
710                                 return true
711                         if scope[el.name] is el.namespace
712                                 return false
713                 return false
714         is_in_scope_x_y = (tag_name, scope, scope2, namespace) ->
715                 for el in open_els
716                         if el.name is tag_name and (namespace is null or namespace is el.namespace)
717                                 return true
718                         if scope[el.name] is el.namespace
719                                 return false
720                         if scope2[el.name] is el.namespace
721                                 return false
722                 return false
723         standard_scopers = {
724                 applet: NS_HTML, caption: NS_HTML, html: NS_HTML, table: NS_HTML,
725                 td: NS_HTML, th: NS_HTML, marquee: NS_HTML, object: NS_HTML,
726                 template: NS_HTML,
727
728                 mi: NS_MATHML, mo: NS_MATHML, mn: NS_MATHML, ms: NS_MATHML,
729                 mtext: NS_MATHML, 'annotation-xml': NS_MATHML,
730
731                 foreignObject: NS_SVG, desc: NS_SVG, title: NS_SVG
732         }
733         button_scopers = button: NS_HTML
734         li_scopers = ol: NS_HTML, ul: NS_HTML
735         table_scopers = html: NS_HTML, table: NS_HTML, template: NS_HTML
736         is_in_scope = (tag_name, namespace = null) ->
737                 return is_in_scope_x tag_name, standard_scopers, namespace
738         is_in_button_scope = (tag_name, namespace = null) ->
739                 return is_in_scope_x_y tag_name, standard_scopers, button_scopers, namespace
740         is_in_table_scope = (tag_name, namespace = null) ->
741                 return is_in_scope_x tag_name, table_scopers, namespace
742         # aka is_in_list_item_scope
743         is_in_li_scope = (tag_name, namespace = null) ->
744                 return is_in_scope_x_y tag_name, standard_scopers, li_scopers, namespace
745         is_in_select_scope = (tag_name, namespace = null) ->
746                 for t in open_els
747                         if t.name is tag_name and (namespace is null or namespace is t.namespace)
748                                 return true
749                         if t.namespace isnt NS_HTML and t.name isnt 'optgroup' and t.name isnt 'option'
750                                 return false
751                 return false
752         # this checks for a particular element, not by name
753         # this requires a namespace match
754         el_is_in_scope = (needle) ->
755                 for el in open_els
756                         if el is needle
757                                 return true
758                         if standard_scopers[el.name] is el.namespace
759                                 return false
760                 return false
761
762         clear_to_table_stopers = {
763                 'table': true
764                 'template': true
765                 'html': true
766         }
767         clear_stack_to_table_context = ->
768                 loop
769                         if clear_to_table_stopers[open_els[0].name]?
770                                 break
771                         open_els.shift()
772                 return
773         clear_to_table_body_stopers = {
774                 tbody: NS_HTML
775                 tfoot: NS_HTML
776                 thead: NS_HTML
777                 template: NS_HTML
778                 html: NS_HTML
779         }
780         clear_stack_to_table_body_context = ->
781                 loop
782                         if clear_to_table_body_stopers[open_els[0].name] is open_els[0].namespace
783                                 break
784                         open_els.shift()
785                 return
786         clear_to_table_row_stopers = {
787                 'tr': true
788                 'template': true
789                 'html': true
790         }
791         clear_stack_to_table_row_context = ->
792                 loop
793                         if clear_to_table_row_stopers[open_els[0].name]?
794                                 break
795                         open_els.shift()
796                 return
797         clear_afe_to_marker = ->
798                 loop
799                         return unless afe.length > 0 # this happens in fragment case, ?spec error
800                         el = afe.shift()
801                         if el.type is TYPE_AFE_MARKER
802                                 return
803                 return
804
805         # 8.2.3.1 ...
806         # http://www.w3.org/TR/html5/syntax.html#reset-the-insertion-mode-appropriately
807         reset_ins_mode = ->
808                 # 1. Let last be false.
809                 last = false
810                 # 2. Let node be the last node in the stack of open elements.
811                 node_i = 0
812                 node = open_els[node_i]
813                 # 3. Loop: If node is the first node in the stack of open elements,
814                 # then set last to true, and, if the parser was originally created as
815                 # part of the HTML fragment parsing algorithm (fragment case) set node
816                 # to the context element.
817                 loop
818                         if node_i is open_els.length - 1
819                                 last = true
820                                 # fixfull (fragment case)
821
822                         # 4. If node is a select element, run these substeps:
823                         if node.name is 'select' and node.namespace is NS_HTML
824                                 # 1. If last is true, jump to the step below labeled done.
825                                 unless last
826                                         # 2. Let ancestor be node.
827                                         ancestor_i = node_i
828                                         ancestor = node
829                                         # 3. Loop: If ancestor is the first node in the stack of
830                                         # open elements, jump to the step below labeled done.
831                                         loop
832                                                 if ancestor_i is open_els.length - 1
833                                                         break
834                                                 # 4. Let ancestor be the node before ancestor in the stack
835                                                 # of open elements.
836                                                 ancestor_i += 1
837                                                 ancestor = open_els[ancestor_i]
838                                                 # 5. If ancestor is a template node, jump to the step below
839                                                 # labeled done.
840                                                 if ancestor.name is 'template' and ancestor.namespace is NS_HTML
841                                                         break
842                                                 # 6. If ancestor is a table node, switch the insertion mode
843                                                 # to "in select in table" and abort these steps.
844                                                 if ancestor.name is 'table' and ancestor.namespace is NS_HTML
845                                                         ins_mode = ins_mode_in_select_in_table
846                                                         return
847                                                 # 7. Jump back to the step labeled loop.
848                                 # 8. Done: Switch the insertion mode to "in select" and abort
849                                 # these steps.
850                                 ins_mode = ins_mode_in_select
851                                 return
852                         # 5. If node is a td or th element and last is false, then switch
853                         # the insertion mode to "in cell" and abort these steps.
854                         if (node.name is 'td' or node.name is 'th') and node.namespace is NS_HTML and last is false
855                                 ins_mode = ins_mode_in_cell
856                                 return
857                         # 6. If node is a tr element, then switch the insertion mode to "in
858                         # row" and abort these steps.
859                         if node.name is 'tr' and node.namespace is NS_HTML
860                                 ins_mode = ins_mode_in_row
861                                 return
862                         # 7. If node is a tbody, thead, or tfoot element, then switch the
863                         # insertion mode to "in table body" and abort these steps.
864                         if (node.name is 'tbody' or node.name is 'thead' or node.name is 'tfoot') and node.namespace is NS_HTML
865                                 ins_mode = ins_mode_in_table_body
866                                 return
867                         # 8. If node is a caption element, then switch the insertion mode
868                         # to "in caption" and abort these steps.
869                         if node.name is 'caption' and node.namespace is NS_HTML
870                                 ins_mode = ins_mode_in_caption
871                                 return
872                         # 9. If node is a colgroup element, then switch the insertion mode
873                         # to "in column group" and abort these steps.
874                         if node.name is 'colgroup' and node.namespace is NS_HTML
875                                 ins_mode = ins_mode_in_column_group
876                                 return
877                         # 10. If node is a table element, then switch the insertion mode to
878                         # "in table" and abort these steps.
879                         if node.name is 'table' and node.namespace is NS_HTML
880                                 ins_mode = ins_mode_in_table
881                                 return
882                         # 11. If node is a template element, then switch the insertion mode
883                         # to the current template insertion mode and abort these steps.
884                         if node.name is 'template' and node.namespace is NS_HTML
885                                 ins_mode = template_ins_modes[0]
886                                 return
887                         # 12. If node is a head element and last is true, then switch the
888                         # insertion mode to "in body" ("in body"! not "in head"!) and abort
889                         # these steps. (fragment case)
890                         if node.name is 'head' and node.namespace is NS_HTML and last
891                                 ins_mode = ins_mode_in_body
892                                 return
893                         # 13. If node is a head element and last is false, then switch the
894                         # insertion mode to "in head" and abort these steps.
895                         if node.name is 'head' and node.namespace is NS_HTML and last is false
896                                 ins_mode = ins_mode_in_head
897                                 return
898                         # 14. If node is a body element, then switch the insertion mode to
899                         # "in body" and abort these steps.
900                         if node.name is 'body' and node.namespace is NS_HTML
901                                 ins_mode = ins_mode_in_body
902                                 return
903                         # 15. If node is a frameset element, then switch the insertion mode
904                         # to "in frameset" and abort these steps. (fragment case)
905                         if node.name is 'frameset' and node.namespace is NS_HTML
906                                 ins_mode = ins_mode_in_frameset
907                                 return
908                         # 16. If node is an html element, run these substeps:
909                         if node.name is 'html' and node.namespace is NS_HTML
910                                 # 1. If the head element pointer is null, switch the insertion
911                                 # mode to "before head" and abort these steps. (fragment case)
912                                 if head_element_pointer is null
913                                         ins_mode = ins_mode_before_head
914                                 else
915                                         # 2. Otherwise, the head element pointer is not null,
916                                         # switch the insertion mode to "after head" and abort these
917                                         # steps.
918                                         ins_mode = ins_mode_after_head
919                                 return
920                         # 17. If last is true, then switch the insertion mode to "in body"
921                         # and abort these steps. (fragment case)
922                         if last
923                                 ins_mode = ins_mode_in_body
924                                 return
925                         # 18. Let node now be the node before node in the stack of open
926                         # elements.
927                         node_i += 1
928                         node = open_els[node_i]
929                         # 19. Return to the step labeled loop.
930
931         # 8.2.3.2
932
933         # http://www.w3.org/TR/html5/syntax.html#adjusted-current-node
934         adjusted_current_node = ->
935                 if open_els.length is 1 and flag_fragment_parsing
936                         return context_element
937                 return open_els[0]
938
939         # http://www.w3.org/TR/html5/syntax.html#reconstruct-the-active-formatting-elements
940         # this implementation is structured (mostly) as described at the link above.
941         # capitalized comments are the "labels" described at the link above.
942         reconstruct_afe = ->
943                 return if afe.length is 0
944                 if afe[0].type is TYPE_AFE_MARKER or afe[0] in open_els
945                         return
946                 # Rewind
947                 i = 0
948                 loop
949                         if i is afe.length - 1
950                                 break
951                         i += 1
952                         if afe[i].type is TYPE_AFE_MARKER or afe[i] in open_els
953                                 i -= 1 # Advance
954                                 break
955                 # Create
956                 loop
957                         el = insert_html_element afe[i].token
958                         afe[i] = el
959                         break if i is 0
960                         i -= 1 # Advance
961
962         # http://www.w3.org/TR/html5/syntax.html#adoption-agency-algorithm
963         # adoption agency algorithm
964         # overview here:
965         #   http://www.w3.org/TR/html5/syntax.html#misnested-tags:-b-i-/b-/i
966         #   http://www.w3.org/TR/html5/syntax.html#misnested-tags:-b-p-/b-/p
967         #   http://www.w3.org/TR/html5/syntax.html#unclosed-formatting-elements
968         adoption_agency = (subject) ->
969                 debug_log "adoption_agency()"
970                 debug_log "tree: #{serialize_els doc.children, false, true}"
971                 debug_log "open_els: #{serialize_els open_els, true, true}"
972                 debug_log "afe: #{serialize_els afe, true, true}"
973 # this block implements tha W3C spec
974 #               # 1. If the current node is an HTML element whose tag name is subject,
975 #               # then run these substeps:
976 #               #
977 #               # 1. Let element be the current node.
978 #               #
979 #               # 2. Pop element off the stack of open elements.
980 #               #
981 #               # 3. If element is also in the list of active formatting elements,
982 #               # remove the element from the list.
983 #               #
984 #               # 4. Abort the adoption agency algorithm.
985 #               if open_els[0].name is subject and open_els[0].namespace is NS_HTML
986 #                       el = open_els.shift()
987 #                       # remove it from the list of active formatting elements (if found)
988 #                       for t, i in afe
989 #                               if t is el
990 #                                       afe.splice i, 1
991 #                                       break
992 #                       debug_log "aaa: starting off with subject on top of stack, exiting"
993 #                       return
994 # WHATWG: https://html.spec.whatwg.org/multipage/syntax.html#adoption-agency-algorithm
995                 # If the current node is an HTML element whose tag name is subject, and
996                 # the current node is not in the list of active formatting elements,
997                 # then pop the current node off the stack of open elements, and abort
998                 # these steps.
999                 if open_els[0].name is subject and open_els[0].namespace is NS_HTML
1000                         debug_log "aaa: starting off with subject on top of stack, exiting"
1001                         # remove it from the list of active formatting elements (if found)
1002                         in_afe = false
1003                         for el, i in afe
1004                                 if el is open_els[0]
1005                                         in_afe = true
1006                                         break
1007                         unless in_afe
1008                                 debug_log "aaa: ...and not in afe, aaa done"
1009                                 open_els.shift()
1010                                 return
1011                         # fall through
1012 # END WHATWG
1013                 outer = 0
1014                 loop
1015                         if outer >= 8
1016                                 return
1017                         outer += 1
1018                         # 5. Let formatting element be the last element in the list of
1019                         # active formatting elements that: is between the end of the list
1020                         # and the last scope marker in the list, if any, or the start of
1021                         # the list otherwise, and  has the tag name subject.
1022                         fe = null
1023                         for t, fe_of_afe in afe
1024                                 if t.type is TYPE_AFE_MARKER
1025                                         break
1026                                 if t.name is subject
1027                                         fe = t
1028                                         break
1029                         # If there is no such element, then abort these steps and instead
1030                         # act as described in the "any other end tag" entry above.
1031                         if fe is null
1032                                 debug_log "aaa: fe not found in afe"
1033                                 in_body_any_other_end_tag subject
1034                                 return
1035                         # 6. If formatting element is not in the stack of open elements,
1036                         # then this is a parse error; remove the element from the list, and
1037                         # abort these steps.
1038                         in_open_els = false
1039                         for t, fe_of_open_els in open_els
1040                                 if t is fe
1041                                         in_open_els = true
1042                                         break
1043                         unless in_open_els
1044                                 debug_log "aaa: fe not found in open_els"
1045                                 parse_error()
1046                                 # "remove it from the list" must mean afe, since it's not in open_els
1047                                 afe.splice fe_of_afe, 1
1048                                 return
1049                         # 7. If formatting element is in the stack of open elements, but
1050                         # the element is not in scope, then this is a parse error; abort
1051                         # these steps.
1052                         unless el_is_in_scope fe
1053                                 debug_log "aaa: fe not in scope"
1054                                 parse_error()
1055                                 return
1056                         # 8. If formatting element is not the current node, this is a parse
1057                         # error. (But do not abort these steps.)
1058                         unless open_els[0] is fe
1059                                 parse_error()
1060                                 # continue
1061                         # 9. Let furthest block be the topmost node in the stack of open
1062                         # elements that is lower in the stack than formatting element, and
1063                         # is an element in the special category. There might not be one.
1064                         fb = null
1065                         fb_of_open_els = null
1066                         for t, i in open_els
1067                                 if t is fe
1068                                         break
1069                                 if el_is_special t
1070                                         fb = t
1071                                         fb_of_open_els = i
1072                                         # and continue, to see if there's one that's more "topmost"
1073                         # 10. If there is no furthest block, then the UA must first pop all
1074                         # the nodes from the bottom of the stack of open elements, from the
1075                         # current node up to and including formatting element, then remove
1076                         # formatting element from the list of active formatting elements,
1077                         # and finally abort these steps.
1078                         if fb is null
1079                                 debug_log "aaa: no fb"
1080                                 loop
1081                                         t = open_els.shift()
1082                                         if t is fe
1083                                                 afe.splice fe_of_afe, 1
1084                                                 return
1085                         # 11. Let common ancestor be the element immediately above
1086                         # formatting element in the stack of open elements.
1087                         ca = open_els[fe_of_open_els + 1] # common ancestor
1088
1089                         node_above = open_els[fb_of_open_els + 1] # next node if node isn't in open_els anymore
1090                         # 12. Let a bookmark note the position of formatting element in the list of active formatting elements relative to the elements on either side of it in the list.
1091                         bookmark = new_aaa_bookmark()
1092                         for t, i in afe
1093                                 if t is fe
1094                                         afe.splice i, 0, bookmark
1095                                         break
1096                         node = last_node = fb
1097                         inner = 0
1098                         loop
1099                                 inner += 1
1100                                 # 3. Let node be the element immediately above node in the
1101                                 # stack of open elements, or if node is no longer in the stack
1102                                 # of open elements (e.g. because it got removed by this
1103                                 # algorithm), the element that was immediately above node in
1104                                 # the stack of open elements before node was removed.
1105                                 node_next = null
1106                                 for t, i in open_els
1107                                         if t is node
1108                                                 node_next = open_els[i + 1]
1109                                                 break
1110                                 node = node_next ? node_above
1111                                 debug_log "inner loop #{inner}"
1112                                 debug_log "tree: #{serialize_els doc.children, false, true}"
1113                                 debug_log "open_els: #{serialize_els open_els, true, true}"
1114                                 debug_log "afe: #{serialize_els afe, true, true}"
1115                                 debug_log "ca: #{ca.name}##{ca.id} children: #{serialize_els ca.children, true, true}"
1116                                 debug_log "fe: #{fe.name}##{fe.id} children: #{serialize_els fe.children, true, true}"
1117                                 debug_log "fb: #{fb.name}##{fb.id} children: #{serialize_els fb.children, true, true}"
1118                                 debug_log "node: #{node.serialize true, true}"
1119                                 # TODO make sure node_above gets re-set if/when node is removed from open_els
1120
1121                                 # 4. If node is formatting element, then go to the next step in
1122                                 # the overall algorithm.
1123                                 if node is fe
1124                                         break
1125                                 debug_log "the meat"
1126                                 # 5. If inner loop counter is greater than three and node is in
1127                                 # the list of active formatting elements, then remove node from
1128                                 # the list of active formatting elements.
1129                                 node_in_afe = false
1130                                 for t, i in afe
1131                                         if t is node
1132                                                 if inner > 3
1133                                                         afe.splice i, 1
1134                                                         debug_log "max out inner"
1135                                                 else
1136                                                         node_in_afe = true
1137                                                         debug_log "in afe"
1138                                                 break
1139                                 # 6. If node is not in the list of active formatting elements,
1140                                 # then remove node from the stack of open elements and then go
1141                                 # back to the step labeled inner loop.
1142                                 unless node_in_afe
1143                                         debug_log "not in afe"
1144                                         for t, i in open_els
1145                                                 if t is node
1146                                                         node_above = open_els[i + 1]
1147                                                         open_els.splice i, 1
1148                                                         break
1149                                         continue
1150                                 debug_log "the bones"
1151                                 # 7. create an element for the token for which the element node
1152                                 # was created, in the HTML namespace, with common ancestor as
1153                                 # the intended parent; replace the entry for node in the list
1154                                 # of active formatting elements with an entry for the new
1155                                 # element, replace the entry for node in the stack of open
1156                                 # elements with an entry for the new element, and let node be
1157                                 # the new element.
1158                                 new_node = token_to_element node.token, NS_HTML, ca
1159                                 for t, i in afe
1160                                         if t is node
1161                                                 afe[i] = new_node
1162                                                 debug_log "replaced in afe"
1163                                                 break
1164                                 for t, i in open_els
1165                                         if t is node
1166                                                 node_above = open_els[i + 1]
1167                                                 open_els[i] = new_node
1168                                                 debug_log "replaced in open_els"
1169                                                 break
1170                                 node = new_node
1171                                 # 8. If last node is furthest block, then move the
1172                                 # aforementioned bookmark to be immediately after the new node
1173                                 # in the list of active formatting elements.
1174                                 if last_node is fb
1175                                         for t, i in afe
1176                                                 if t is bookmark
1177                                                         afe.splice i, 1
1178                                                         debug_log "removed bookmark"
1179                                                         break
1180                                         for t, i in afe
1181                                                 if t is node
1182                                                         # "after" means lower
1183                                                         afe.splice i, 0, bookmark # "after as <-
1184                                                         debug_log "placed bookmark after node"
1185                                                         debug_log "node: #{node.id} afe: #{serialize_els afe, true, true}"
1186                                                         break
1187                                 # 9. Insert last node into node, first removing it from its
1188                                 # previous parent node if any.
1189                                 if last_node.parent?
1190                                         debug_log "last_node has parent"
1191                                         for c, i in last_node.parent.children
1192                                                 if c is last_node
1193                                                         debug_log "removing last_node from parent"
1194                                                         last_node.parent.children.splice i, 1
1195                                                         break
1196                                 node.children.push last_node
1197                                 last_node.parent = node
1198                                 # 10. Let last node be node.
1199                                 last_node = node
1200                                 debug_log "at last"
1201                                 # 11. Return to the step labeled inner loop.
1202                         # 14. Insert whatever last node ended up being in the previous step
1203                         # at the appropriate place for inserting a node, but using common
1204                         # ancestor as the override target.
1205
1206                         # In the case where fe is immediately followed by fb:
1207                         #   * inner loop exits out early (node==fe)
1208                         #   * last_node is fb
1209                         #   * last_node is still in the tree (not a duplicate)
1210                         if last_node.parent?
1211                                 debug_log "FEFIRST? last_node has parent"
1212                                 for c, i in last_node.parent.children
1213                                         if c is last_node
1214                                                 debug_log "removing last_node from parent"
1215                                                 last_node.parent.children.splice i, 1
1216                                                 break
1217
1218                         debug_log "after aaa inner loop"
1219                         debug_log "ca: #{ca.name}##{ca.id} children: #{serialize_els ca.children, true, true}"
1220                         debug_log "fe: #{fe.name}##{fe.id} children: #{serialize_els fe.children, true, true}"
1221                         debug_log "fb: #{fb.name}##{fb.id} children: #{serialize_els fb.children, true, true}"
1222                         debug_log "last_node: #{last_node.name}##{last_node.id} children: #{serialize_els last_node.children, true, true}"
1223                         debug_log "tree: #{serialize_els doc.children, false, true}"
1224
1225                         debug_log "insert"
1226
1227
1228                         # can't use standard insert token thing, because it's already in
1229                         # open_els and must stay at it's current position in open_els
1230                         dest = adjusted_insertion_location ca
1231                         dest[0].children.splice dest[1], 0, last_node
1232                         last_node.parent = dest[0]
1233
1234
1235                         debug_log "ca: #{ca.name}##{ca.id} children: #{serialize_els ca.children, true, true}"
1236                         debug_log "fe: #{fe.name}##{fe.id} children: #{serialize_els fe.children, true, true}"
1237                         debug_log "fb: #{fb.name}##{fb.id} children: #{serialize_els fb.children, true, true}"
1238                         debug_log "last_node: #{last_node.name}##{last_node.id} children: #{serialize_els last_node.children, true, true}"
1239                         debug_log "tree: #{serialize_els doc.children, false, true}"
1240
1241                         # 15. Create an element for the token for which formatting element
1242                         # was created, in the HTML namespace, with furthest block as the
1243                         # intended parent.
1244                         new_element = token_to_element fe.token, NS_HTML, fb
1245                         # 16. Take all of the child nodes of furthest block and append them
1246                         # to the element created in the last step.
1247                         while fb.children.length
1248                                 t = fb.children.shift()
1249                                 t.parent = new_element
1250                                 new_element.children.push t
1251                         # 17. Append that new element to furthest block.
1252                         new_element.parent = fb
1253                         fb.children.push new_element
1254                         # 18. Remove formatting element from the list of active formatting
1255                         # elements, and insert the new element into the list of active
1256                         # formatting elements at the position of the aforementioned
1257                         # bookmark.
1258                         for t, i in afe
1259                                 if t is fe
1260                                         afe.splice i, 1
1261                                         break
1262                         for t, i in afe
1263                                 if t is bookmark
1264                                         afe[i] = new_element
1265                                         break
1266                         # 19. Remove formatting element from the stack of open elements,
1267                         # and insert the new element into the stack of open elements
1268                         # immediately below the position of furthest block in that stack.
1269                         for t, i in open_els
1270                                 if t is fe
1271                                         open_els.splice i, 1
1272                                         break
1273                         for t, i in open_els
1274                                 if t is fb
1275                                         open_els.splice i, 0, new_element
1276                                         break
1277                         # 20. Jump back to the step labeled outer loop.
1278                         debug_log "done wrapping fb's children. new_element: #{new_element.name}##{new_element.id}"
1279                         debug_log "tree: #{serialize_els doc.children, false, true}"
1280                         debug_log "open_els: #{serialize_els open_els, true, true}"
1281                         debug_log "afe: #{serialize_els afe, true, true}"
1282                 debug_log "AAA DONE"
1283
1284         # http://www.w3.org/TR/html5/syntax.html#close-a-p-element
1285         close_p_element = ->
1286                 generate_implied_end_tags 'p' # arg is exception
1287                 unless open_els[0].name is 'p' and open_els[0].namespace is NS_HTML
1288                         parse_error()
1289                 while open_els.length > 1 # just in case
1290                         el = open_els.shift()
1291                         if el.name is 'p' and el.namespace is NS_HTML
1292                                 return
1293         close_p_if_in_button_scope = ->
1294                 if is_in_button_scope 'p', NS_HTML
1295                         close_p_element()
1296
1297         # http://www.w3.org/TR/html5/syntax.html#insert-a-character
1298         # aka insert_a_character = (t) ->
1299         insert_character = (t) ->
1300                 dest = adjusted_insertion_location()
1301                 # fixfull check for Document node
1302                 if dest[1] > 0
1303                         prev = dest[0].children[dest[1] - 1]
1304                         if prev.type is TYPE_TEXT
1305                                 prev.text += t.text
1306                                 return
1307                 dest[0].children.splice dest[1], 0, t
1308
1309
1310         # 8.2.5 http://www.w3.org/TR/html5/syntax.html#tree-construction
1311         process_token = (t) ->
1312                 acn = adjusted_current_node()
1313                 unless acn?
1314                         ins_mode t
1315                         return
1316                 if acn.namespace is NS_HTML
1317                         ins_mode t
1318                         return
1319                 if is_mathml_text_integration_point(acn)
1320                         if t.type is TYPE_START_TAG and not (t.name is 'mglyph' or t.name is 'malignmark')
1321                                 ins_mode t
1322                                 return
1323                         if t.type is TYPE_TEXT
1324                                 ins_mode t
1325                                 return
1326                 if acn.namespace is NS_MATHML and acn.name is 'annotation-xml' and t.type is TYPE_START_TAG and t.name is 'svg'
1327                         ins_mode t
1328                         return
1329                 if is_html_integration acn
1330                         if t.type is TYPE_START_TAG or t.type is TYPE_TEXT
1331                                 ins_mode t
1332                                 return
1333                 if t.type is TYPE_EOF
1334                         ins_mode t
1335                         return
1336                 in_foreign_content t
1337                 return
1338
1339         # 8.2.5.1
1340         # http://www.w3.org/TR/html5/syntax.html#creating-and-inserting-nodes
1341         # http://www.w3.org/TR/html5/syntax.html#appropriate-place-for-inserting-a-node
1342         adjusted_insertion_location = (override_target = null) ->
1343                 # 1. If there was an override target specified, then let target be the
1344                 # override target.
1345                 if override_target?
1346                         target = override_target
1347                 else # Otherwise, let target be the current node.
1348                         target = open_els[0]
1349                 # 2. Determine the adjusted insertion location using the first matching
1350                 # steps from the following list:
1351                 #
1352                 # If foster parenting is enabled and target is a table, tbody, tfoot,
1353                 # thead, or tr element Foster parenting happens when content is
1354                 # misnested in tables.
1355                 if flag_foster_parenting and foster_parenting_targets[target.name] is target.namespace
1356                         loop # once. this is here so we can ``break`` to "abort these substeps"
1357                                 # 1. Let last template be the last template element in the
1358                                 # stack of open elements, if any.
1359                                 last_template = null
1360                                 last_template_i = null
1361                                 for el, i in open_els
1362                                         if el.name is 'template' and el.namespace is NS_HTML
1363                                                 last_template = el
1364                                                 last_template_i = i
1365                                                 break
1366                                 # 2. Let last table be the last table element in the stack of
1367                                 # open elements, if any.
1368                                 last_table = null
1369                                 last_table_i
1370                                 for el, i in open_els
1371                                         if el.name is 'table' and el.namespace is NS_HTML
1372                                                 last_table = el
1373                                                 last_table_i = i
1374                                                 break
1375                                 # 3. If there is a last template and either there is no last
1376                                 # table, or there is one, but last template is lower (more
1377                                 # recently added) than last table in the stack of open
1378                                 # elements, then: let adjusted insertion location be inside
1379                                 # last template's template contents, after its last child (if
1380                                 # any), and abort these substeps.
1381                                 if last_template and (last_table is null or last_template_i < last_table_i)
1382                                         target = last_template # fixfull should be it's contents
1383                                         target_i = target.children.length
1384                                         break
1385                                 # 4. If there is no last table, then let adjusted insertion
1386                                 # location be inside the first element in the stack of open
1387                                 # elements (the html element), after its last child (if any),
1388                                 # and abort these substeps. (fragment case)
1389                                 if last_table is null
1390                                         # this is odd
1391                                         target = open_els[open_els.length - 1]
1392                                         target_i = target.children.length
1393                                         break
1394                                 # 5. If last table has a parent element, then let adjusted
1395                                 # insertion location be inside last table's parent element,
1396                                 # immediately before last table, and abort these substeps.
1397                                 if last_table.parent?
1398                                         for c, i in last_table.parent.children
1399                                                 if c is last_table
1400                                                         target = last_table.parent
1401                                                         target_i = i
1402                                                         break
1403                                         break
1404                                 # 6. Let previous element be the element immediately above last
1405                                 # table in the stack of open elements.
1406                                 #
1407                                 # huh? how could it not have a parent?
1408                                 previous_element = open_els[last_table_i + 1]
1409                                 # 7. Let adjusted insertion location be inside previous
1410                                 # element, after its last child (if any).
1411                                 target = previous_element
1412                                 target_i = target.children.length
1413                                 # Note: These steps are involved in part because it's possible
1414                                 # for elements, the table element in this case in particular,
1415                                 # to have been moved by a script around in the DOM, or indeed
1416                                 # removed from the DOM entirely, after the element was inserted
1417                                 # by the parser.
1418                                 break # don't really loop
1419                 else
1420                         # Otherwise Let adjusted insertion location be inside target, after
1421                         # its last child (if any).
1422                         target_i = target.children.length
1423
1424                 # 3. If the adjusted insertion location is inside a template element,
1425                 # let it instead be inside the template element's template contents,
1426                 # after its last child (if any).
1427                 # fixfull (template)
1428
1429                 # 4. Return the adjusted insertion location.
1430                 return [target, target_i]
1431
1432         # http://www.w3.org/TR/html5/syntax.html#create-an-element-for-the-token
1433         # aka create_an_element_for_token
1434         token_to_element = (t, namespace, intended_parent) ->
1435                 # convert attributes into a hash
1436                 attrs = {}
1437                 for a in t.attrs_a
1438                         attrs[a[0]] = a[1] # TODO check what to do with dupilcate attrs
1439                 el = new Node TYPE_TAG, name: t.name, namespace: namespace, attrs: attrs, token: t
1440
1441                 # TODO 2. If the newly created element has an xmlns attribute in the
1442                 # XMLNS namespace whose value is not exactly the same as the element's
1443                 # namespace, that is a parse error. Similarly, if the newly created
1444                 # element has an xmlns:xlink attribute in the XMLNS namespace whose
1445                 # value is not the XLink Namespace, that is a parse error.
1446
1447                 # fixfull: the spec says stuff about form pointers and ownerDocument
1448
1449                 return el
1450
1451         # http://www.w3.org/TR/html5/syntax.html#insert-a-foreign-element
1452         insert_foreign_element = (token, namespace) ->
1453                 ail = adjusted_insertion_location()
1454                 ail_el = ail[0]
1455                 ail_i = ail[1]
1456                 el = token_to_element token, namespace, ail_el
1457                 # TODO skip this next step if it's broken (eg ail_el is document with child already)
1458                 el.parent = ail_el
1459                 ail_el.children.splice ail_i, 0, el
1460                 open_els.unshift el
1461                 return el
1462         # http://www.w3.org/TR/html5/syntax.html#insert-an-html-element
1463         insert_html_element = (token) ->
1464                 insert_foreign_element token, NS_HTML
1465
1466         # http://www.w3.org/TR/html5/syntax.html#insert-a-comment
1467         # position should be [node, index_within_children]
1468         insert_comment = (t, position = null) ->
1469                 position ?= adjusted_insertion_location()
1470                 position[0].children.splice position[1], 0, t
1471
1472         # 8.2.5.2
1473         # http://www.w3.org/TR/html5/syntax.html#generic-raw-text-element-parsing-algorithm
1474         parse_generic_raw_text = (t) ->
1475                 insert_html_element t
1476                 tok_state = tok_state_rawtext
1477                 original_ins_mode = ins_mode
1478                 ins_mode = ins_mode_text
1479         parse_generic_rcdata_text = (t) ->
1480                 insert_html_element t
1481                 tok_state = tok_state_rcdata
1482                 original_ins_mode = ins_mode
1483                 ins_mode = ins_mode_text
1484
1485         # 8.2.5.3 http://www.w3.org/TR/html5/syntax.html#closing-elements-that-have-implied-end-tags
1486         # http://www.w3.org/TR/html5/syntax.html#generate-implied-end-tags
1487         generate_implied_end_tags = (except = null) ->
1488                 while end_tag_implied[open_els[0].name] is open_els[0].namespace and open_els[0].name isnt except
1489                         open_els.shift()
1490
1491         # 8.2.5.4 The rules for parsing tokens in HTML content
1492         # http://www.w3.org/TR/html5/syntax.html#parsing-main-inhtml
1493
1494         # 8.2.5.4.1 The "initial" insertion mode
1495         # http://www.w3.org/TR/html5/syntax.html#the-initial-insertion-mode
1496         is_quirks_yes_doctype = (t) ->
1497                 if t.flag 'force-quirks'
1498                         return true
1499                 if t.name isnt 'html'
1500                         return true
1501                 if t.public_identifier?
1502                         pi = t.public_identifier.toLowerCase()
1503                         for p in quirks_yes_pi_prefixes
1504                                 if pi.substr(0, p.length) is p
1505                                         return true
1506                         if pi is '-//w3o//dtd w3 html strict 3.0//en//' or pi is '-/w3c/dtd html 4.0 transitional/en' or pi is 'html'
1507                                 return true
1508                 if t.system_identifier?
1509                         if t.system_identifier.toLowerCase() is 'http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd'
1510                                 return true
1511                 else if t.public_identifier?
1512                         # already did this: pi = t.public_identifier.toLowerCase()
1513                         if pi.substr(0, 32) is '-//w3c//dtd html 4.01 frameset//' or pi.substr(0, 36) is '-//w3c//dtd html 4.01 transitional//'
1514                                 return true
1515                 return false
1516         is_quirks_limited_doctype = (t) ->
1517                 if t.public_identifier?
1518                         pi = t.public_identifier.toLowerCase()
1519                         if pi.substr(0, 32) is '-//w3c//dtd xhtml 1.0 frameset//' or pi.substr(0, 36) is '-//w3c//dtd xhtml 1.0 transitional//'
1520                                 return true
1521                         if t.system_identifier?
1522                                 if pi.substr(0, 32) is '-//w3c//dtd html 4.01 frameset//' or pi.substr(0, 36) is '-//w3c//dtd html 4.01 transitional//'
1523                                         return true
1524                 return false
1525         ins_mode_initial = (t) ->
1526                 if is_space_tok t
1527                         return
1528                 if t.type is TYPE_COMMENT
1529                         # ?fixfull
1530                         doc.children.push t
1531                         return
1532                 if t.type is TYPE_DOCTYPE
1533                         # fixfull syntax error from first paragraph and following bullets
1534                         # fixfull set doc.doctype
1535                         # fixfull is the "not an iframe srcdoc" thing relevant?
1536                         if is_quirks_yes_doctype t
1537                                 doc.flag 'quirks mode', QUIRKS_YES
1538                         else if is_quirks_limited_doctype t
1539                                 doc.flag 'quirks mode', QUIRKS_LIMITED
1540                         doc.children.push t
1541                         ins_mode = ins_mode_before_html
1542                         return
1543                 # Anything else
1544                 # fixfull not iframe srcdoc?
1545                 parse_error()
1546                 doc.flag 'quirks mode', QUIRKS_YES
1547                 ins_mode = ins_mode_before_html
1548                 process_token t
1549                 return
1550
1551         # 8.2.5.4.2 http://www.w3.org/TR/html5/syntax.html#the-before-html-insertion-mode
1552         ins_mode_before_html = (t) ->
1553                 if t.type is TYPE_DOCTYPE
1554                         parse_error()
1555                         return
1556                 if t.type is TYPE_COMMENT
1557                         doc.children.push t
1558                         return
1559                 if is_space_tok t
1560                         return
1561                 if t.type is TYPE_START_TAG and t.name is 'html'
1562                         el = token_to_element t, NS_HTML, doc
1563                         doc.children.push el
1564                         open_els.unshift(el)
1565                         # fixfull (big paragraph in spec about manifest, fragment, urls, etc)
1566                         ins_mode = ins_mode_before_head
1567                         return
1568                 if t.type is TYPE_END_TAG
1569                         if t.name is 'head' or t.name is 'body' or t.name is 'html' or t.name is 'br'
1570                                 # fall through to "anything else"
1571                         else
1572                                 parse_error()
1573                                 return
1574                 # Anything else
1575                 el = token_to_element new_open_tag('html'), NS_HTML, doc
1576                 doc.children.push el
1577                 el.parent = doc
1578                 open_els.unshift el
1579                 # ?fixfull browsing context
1580                 ins_mode = ins_mode_before_head
1581                 process_token t
1582                 return
1583
1584         # 8.2.5.4.3 http://www.w3.org/TR/html5/syntax.html#the-before-head-insertion-mode
1585         ins_mode_before_head = (t) ->
1586                 if is_space_tok t
1587                         return
1588                 if t.type is TYPE_COMMENT
1589                         insert_comment t
1590                         return
1591                 if t.type is TYPE_DOCTYPE
1592                         parse_error()
1593                         return
1594                 if t.type is TYPE_START_TAG and t.name is 'html'
1595                         ins_mode_in_body t
1596                         return
1597                 if t.type is TYPE_START_TAG and t.name is 'head'
1598                         el = insert_html_element t
1599                         head_element_pointer = el
1600                         ins_mode = ins_mode_in_head
1601                         return
1602                 if t.type is TYPE_END_TAG
1603                         if t.name is 'head' or t.name is 'body' or t.name is 'html' or t.name is 'br'
1604                                 # fall through to Anything else below
1605                         else
1606                                 parse_error()
1607                                 return
1608                 # Anything else
1609                 el = insert_html_element new_open_tag 'head'
1610                 head_element_pointer = el
1611                 ins_mode = ins_mode_in_head
1612                 process_token t
1613
1614         # 8.2.5.4.4 http://www.w3.org/TR/html5/syntax.html#parsing-main-inhead
1615         ins_mode_in_head_else = (t) -> # factored out for same-as-spec flow control
1616                 open_els.shift() # spec says this will be a 'head' node
1617                 ins_mode = ins_mode_after_head
1618                 process_token t
1619         ins_mode_in_head = (t) ->
1620                 if t.type is TYPE_TEXT and (t.text is "\t" or t.text is "\n" or t.text is "\u000c" or t.text is ' ')
1621                         insert_character t
1622                         return
1623                 if t.type is TYPE_COMMENT
1624                         insert_comment t
1625                         return
1626                 if t.type is TYPE_DOCTYPE
1627                         parse_error()
1628                         return
1629                 if t.type is TYPE_START_TAG and t.name is 'html'
1630                         ins_mode_in_body t
1631                         return
1632                 if t.type is TYPE_START_TAG and (t.name is 'base' or t.name is 'basefont' or t.name is 'bgsound' or t.name is 'link')
1633                         el = insert_html_element t
1634                         open_els.shift()
1635                         t.acknowledge_self_closing()
1636                         return
1637                 if t.type is TYPE_START_TAG and t.name is 'meta'
1638                         el = insert_html_element t
1639                         open_els.shift()
1640                         t.acknowledge_self_closing()
1641                         # fixfull encoding stuff
1642                         return
1643                 if t.type is TYPE_START_TAG and t.name is 'title'
1644                         parse_generic_rcdata_text t
1645                         return
1646                 if t.type is TYPE_START_TAG and ((t.name is 'noscript' and flag_scripting) or t.name is 'noframes' or t.name is 'style')
1647                         parse_generic_raw_text t
1648                         return
1649                 if t.type is TYPE_START_TAG and t.name is 'noscript' and flag_scripting is false
1650                         insert_html_element t
1651                         ins_mode = ins_mode_in_head_noscript
1652                         return
1653                 if t.type is TYPE_START_TAG and t.name is 'script'
1654                         ail = adjusted_insertion_location()
1655                         el = token_to_element t, NS_HTML, ail
1656                         el.flag 'parser-inserted', true
1657                         # fixfull frament case
1658                         ail[0].children.splice ail[1], 0, el
1659                         open_els.unshift el
1660                         tok_state = tok_state_script_data
1661                         original_ins_mode = ins_mode # make sure orig... is defined
1662                         ins_mode = ins_mode_text
1663                         return
1664                 if t.type is TYPE_END_TAG and t.name is 'head'
1665                         open_els.shift() # will be a head element... spec says so
1666                         ins_mode = ins_mode_after_head
1667                         return
1668                 if t.type is TYPE_END_TAG and (t.name is 'body' or t.name is 'html' or t.name is 'br')
1669                         ins_mode_in_head_else t
1670                         return
1671                 if t.type is TYPE_START_TAG and t.name is 'template'
1672                         insert_html_element t
1673                         afe_push_marker()
1674                         flag_frameset_ok = false
1675                         ins_mode = ins_mode_in_template
1676                         template_ins_modes.unshift ins_mode_in_template
1677                         return
1678                 if t.type is TYPE_END_TAG and t.name is 'template'
1679                         if template_tag_is_open()
1680                                 generate_implied_end_tags
1681                                 if open_els[0].name isnt 'template'
1682                                         parse_error()
1683                                 loop
1684                                         el = open_els.shift()
1685                                         if el.name is 'template' and el.namespace is NS_HTML
1686                                                 break
1687                                 clear_afe_to_marker()
1688                                 template_ins_modes.shift()
1689                                 reset_ins_mode()
1690                         else
1691                                 parse_error()
1692                         return
1693                 if (t.type is TYPE_START_TAG and t.name is 'head') or t.type is TYPE_END_TAG
1694                         parse_error()
1695                         return
1696                 ins_mode_in_head_else t
1697
1698         # 8.2.5.4.5 http://www.w3.org/TR/html5/syntax.html#parsing-main-inheadnoscript
1699         ins_mode_in_head_noscript_else = (t) ->
1700                 parse_error()
1701                 open_els.shift()
1702                 ins_mode = ins_mode_in_head
1703                 process_token t
1704         ins_mode_in_head_noscript = (t) ->
1705                 if t.type is TYPE_DOCTYPE
1706                         parse_error()
1707                         return
1708                 if t.type is TYPE_START_TAG and t.name is 'html'
1709                         ins_mode_in_body t
1710                         return
1711                 if t.type is TYPE_END_TAG and t.name is 'noscript'
1712                         open_els.shift()
1713                         ins_mode = ins_mode_in_head
1714                         return
1715                 if is_space_tok(t) or t.type is TYPE_COMMENT or (t.type is TYPE_START_TAG and (t.name is 'basefont' or t.name is 'bgsound' or t.name is 'link' or t.name is 'meta' or t.name is 'noframes' or t.name is 'style'))
1716                         ins_mode_in_head t
1717                         return
1718                 if t.type is TYPE_END_TAG and t.name is 'br'
1719                         ins_mode_in_head_noscript_else t
1720                         return
1721                 if (t.type is TYPE_START_TAG and (t.name is 'head' or t.name is 'noscript')) or t.type is TYPE_END_TAG
1722                         parse_error()
1723                         return
1724                 # Anything else
1725                 ins_mode_in_head_noscript_else t
1726                 return
1727
1728
1729
1730         # 8.2.5.4.6 http://www.w3.org/TR/html5/syntax.html#the-after-head-insertion-mode
1731         ins_mode_after_head_else = (t) ->
1732                 body_tok = new_open_tag 'body'
1733                 insert_html_element body_tok
1734                 ins_mode = ins_mode_in_body
1735                 process_token t
1736                 return
1737         ins_mode_after_head = (t) ->
1738                 if is_space_tok t
1739                         insert_character t
1740                         return
1741                 if t.type is TYPE_COMMENT
1742                         insert_comment t
1743                         return
1744                 if t.type is TYPE_DOCTYPE
1745                         parse_error()
1746                         return
1747                 if t.type is TYPE_START_TAG and t.name is 'html'
1748                         ins_mode_in_body t
1749                         return
1750                 if t.type is TYPE_START_TAG and t.name is 'body'
1751                         insert_html_element t
1752                         flag_frameset_ok = false
1753                         ins_mode = ins_mode_in_body
1754                         return
1755                 if t.type is TYPE_START_TAG and t.name is 'frameset'
1756                         insert_html_element t
1757                         ins_mode = ins_mode_in_frameset
1758                         return
1759                 if t.type is TYPE_START_TAG and (t.name is 'base' or t.name is 'basefont' or t.name is 'bgsound' or t.name is 'link' or t.name is 'meta' or t.name is 'noframes' or t.name is 'script' or t.name is 'style' or t.name is 'template' or t.name is 'title')
1760                         parse_error()
1761                         open_els.unshift head_element_pointer
1762                         ins_mode_in_head t
1763                         for el, i in open_els
1764                                 if el is head_element_pointer
1765                                         open_els.splice i, 1
1766                                         return
1767                         console.log "warning: 23904 couldn't find head element in open_els"
1768                         return
1769                 if t.type is TYPE_END_TAG and t.name is 'template'
1770                         ins_mode_in_head t
1771                         return
1772                 if t.type is TYPE_END_TAG and (t.name is 'body' or t.name is 'html' or t.name is 'br')
1773                         ins_mode_after_head_else t
1774                         return
1775                 if (t.type is TYPE_START_TAG and t.name is 'head') or t.type is TYPE_END_TAG
1776                         parse_error()
1777                         return
1778                 # Anything else
1779                 ins_mode_after_head_else t
1780
1781         # 8.2.5.4.7 http://www.w3.org/TR/html5/syntax.html#parsing-main-inbody
1782         in_body_any_other_end_tag = (name) -> # factored out because adoption agency calls it
1783                 node = open_els[0]
1784                 loop
1785                         if node.name is name and node.namespace is NS_HTML
1786                                 generate_implied_end_tags name # arg is exception
1787                                 unless node is open_els[0]
1788                                         parse_error()
1789                                 loop
1790                                         el = open_els.shift()
1791                                         if el is node
1792                                                 return
1793                         if special_elements[node.name] is node.namespace
1794                                 parse_error()
1795                                 return
1796                         for el, i in open_els
1797                                 if node is el
1798                                         node = open_els[i + 1]
1799                                         break
1800                 return
1801         ins_mode_in_body = (t) ->
1802                 if t.type is TYPE_TEXT and t.text is "\u0000"
1803                         parse_error()
1804                         return
1805                 if is_space_tok t
1806                         reconstruct_afe()
1807                         insert_character t
1808                         return
1809                 if t.type is TYPE_TEXT
1810                         reconstruct_afe()
1811                         insert_character t
1812                         flag_frameset_ok = false
1813                         return
1814                 if t.type is TYPE_COMMENT
1815                         insert_comment t
1816                         return
1817                 if t.type is TYPE_DOCTYPE
1818                         parse_error()
1819                         return
1820                 if t.type is TYPE_START_TAG and t.name is 'html'
1821                         parse_error()
1822                         return if template_tag_is_open()
1823                         root_attrs = open_els[open_els.length - 1].attrs
1824                         for a in t.attrs_a
1825                                 root_attrs[a[0]] = a[1] unless root_attrs[a[0]]?
1826                         return
1827
1828                 if (t.type is TYPE_START_TAG and (t.name is 'base' or t.name is 'basefont' or t.name is 'bgsound' or t.name is 'link' or t.name is 'meta' or t.name is 'noframes' or t.name is 'script' or t.name is 'style' or t.name is 'template' or t.name is 'title')) or (t.type is TYPE_END_TAG and t.name is 'template')
1829                         ins_mode_in_head t
1830                         return
1831                 if t.type is TYPE_START_TAG and t.name is 'body'
1832                         parse_error()
1833                         return if open_els.length < 2
1834                         second = open_els[open_els.length - 2]
1835                         return unless second.namespace is NS_HTML
1836                         return unless second.name is 'body'
1837                         return if template_tag_is_open()
1838                         flag_frameset_ok = false
1839                         for a in t.attrs_a
1840                                 second.attrs[a[0]] = a[1] unless second.attrs[a[0]]?
1841                         return
1842                 if t.type is TYPE_START_TAG and t.name is 'frameset'
1843                         parse_error()
1844                         return if open_els.length < 2
1845                         second_i = open_els.length - 2
1846                         second = open_els[second_i]
1847                         return unless second.namespace is NS_HTML
1848                         return unless second.name is 'body'
1849                         if flag_frameset_ok is false
1850                                 return
1851                         if second.parent?
1852                                 for el, i in second.parent.children
1853                                         if el is second
1854                                                 second.parent.children.splice i, 1
1855                                                 break
1856                         open_els.splice second_i, 1
1857                         # pop everything except the "root html element"
1858                         while open_els.length > 1
1859                                 open_els.shift()
1860                         insert_html_element t
1861                         ins_mode = ins_mode_in_frameset
1862                         return
1863                 if t.type is TYPE_EOF
1864                         ok_tags = {
1865                                 dd:NS_HTML, dt:NS_HTML, li:NS_HTML, p:NS_HTML, tbody:NS_HTML,
1866                                 td:NS_HTML, tfoot:NS_HTML, th:NS_HTML, thead:NS_HTML,
1867                                 tr:NS_HTML, body:NS_HTML, html:NS_HTML,
1868                         }
1869                         for el in open_els
1870                                 unless ok_tags[t.name] is el.namespace
1871                                         parse_error()
1872                                         break
1873                         if template_ins_modes.length > 0
1874                                 ins_mode_in_template t
1875                         else
1876                                 stop_parsing()
1877                         return
1878                 if t.type is TYPE_END_TAG and t.name is 'body'
1879                         unless is_in_scope 'body', NS_HTML
1880                                 parse_error()
1881                                 return
1882                         ok_tags = {
1883                                 dd:NS_HTML, dt:NS_HTML, li:NS_HTML, optgroup:NS_HTML,
1884                                 option:NS_HTML, p:NS_HTML, rb:NS_HTML, rp:NS_HTML, rt:NS_HTML,
1885                                 rtc:NS_HTML, tbody:NS_HTML, td:NS_HTML, tfoot:NS_HTML,
1886                                 th:NS_HTML, thead:NS_HTML, tr:NS_HTML, body:NS_HTML,
1887                                 html:NS_HTML
1888                         }
1889                         for el in open_els
1890                                 unless ok_tags[t.name] is el.namespace
1891                                         parse_error()
1892                                         break
1893                         ins_mode = ins_mode_after_body
1894                         return
1895                 if t.type is TYPE_END_TAG and t.name is 'html'
1896                         unless is_in_scope 'body', NS_HTML
1897                                 parse_error()
1898                                 return
1899                         ok_tags = {
1900                                 dd:NS_HTML, dt:NS_HTML, li:NS_HTML, optgroup:NS_HTML,
1901                                 option:NS_HTML, p:NS_HTML, rb:NS_HTML, rp:NS_HTML, rt:NS_HTML,
1902                                 rtc:NS_HTML, tbody:NS_HTML, td:NS_HTML, tfoot:NS_HTML,
1903                                 th:NS_HTML, thead:NS_HTML, tr:NS_HTML, body:NS_HTML,
1904                                 html:NS_HTML
1905                         }
1906                         for el in open_els
1907                                 unless ok_tags[t.name] is el.namespace
1908                                         parse_error()
1909                                         break
1910                         ins_mode = ins_mode_after_body
1911                         process_token t
1912                         return
1913                 if t.type is TYPE_START_TAG and (t.name is 'address' or t.name is 'article' or t.name is 'aside' or t.name is 'blockquote' or t.name is 'center' or t.name is 'details' or t.name is 'dialog' or t.name is 'dir' or t.name is 'div' or t.name is 'dl' or t.name is 'fieldset' or t.name is 'figcaption' or t.name is 'figure' or t.name is 'footer' or t.name is 'header' or t.name is 'hgroup' or t.name is 'main' or t.name is 'nav' or t.name is 'ol' or t.name is 'p' or t.name is 'section' or t.name is 'summary' or t.name is 'ul')
1914                         close_p_if_in_button_scope()
1915                         insert_html_element t
1916                         return
1917                 if t.type is TYPE_START_TAG and h_tags[t.name]?
1918                         close_p_if_in_button_scope()
1919                         if h_tags[open_els[0].name] is open_els[0].namespace
1920                                 parse_error()
1921                                 open_els.shift()
1922                         insert_html_element t
1923                         return
1924                 if t.type is TYPE_START_TAG and (t.name is 'pre' or t.name is 'listing')
1925                         close_p_if_in_button_scope()
1926                         insert_html_element t
1927                         eat_next_token_if_newline()
1928                         flag_frameset_ok = false
1929                         return
1930                 if t.type is TYPE_START_TAG and t.name is 'form'
1931                         unless form_element_pointer is null or template_tag_is_open()
1932                                 parse_error()
1933                                 return
1934                         close_p_if_in_button_scope()
1935                         el = insert_html_element t
1936                         unless template_tag_is_open()
1937                                 form_element_pointer = el
1938                         return
1939                 if t.type is TYPE_START_TAG and t.name is 'li'
1940                         flag_frameset_ok = false
1941                         for node in open_els
1942                                 if node.name is 'li' and node.namespace is NS_HTML
1943                                         generate_implied_end_tags 'li' # arg is exception
1944                                         if open_els[0].name isnt 'li' or open_els[0].namespace isnt NS_HTML
1945                                                 parse_error()
1946                                         loop
1947                                                 el = open_els.shift()
1948                                                 if el.name is 'li' and el.namespace is NS_HTML
1949                                                         break
1950                                         break
1951                                 if el_is_special_not_adp node
1952                                                 break
1953                         close_p_if_in_button_scope()
1954                         insert_html_element t
1955                         return
1956                 if t.type is TYPE_START_TAG and (t.name is 'dd' or t.name is 'dt')
1957                         flag_frameset_ok = false
1958                         for node in open_els
1959                                 if node.name is 'dd' and node.namespace is NS_HTML
1960                                         generate_implied_end_tags 'dd' # arg is exception
1961                                         if open_els[0].name isnt 'dd' or open_els[0].namespace isnt NS_HTML
1962                                                 parse_error()
1963                                         loop
1964                                                 el = open_els.shift()
1965                                                 if el.name is 'dd' and el.namespace is NS_HTML
1966                                                         break
1967                                         break
1968                                 if node.name is 'dt' and node.namespace is NS_HTML
1969                                         generate_implied_end_tags 'dt' # arg is exception
1970                                         if open_els[0].name isnt 'dt' or open_els[0].namespace isnt NS_HTML
1971                                                 parse_error()
1972                                         loop
1973                                                 el = open_els.shift()
1974                                                 if el.name is 'dt' and el.namespace is NS_HTML
1975                                                         break
1976                                         break
1977                                 if el_is_special_not_adp node
1978                                         break
1979                         close_p_if_in_button_scope()
1980                         insert_html_element t
1981                         return
1982                 if t.type is TYPE_START_TAG and t.name is 'plaintext'
1983                         close_p_if_in_button_scope()
1984                         insert_html_element t
1985                         tok_state = tok_state_plaintext
1986                         return
1987                 if t.type is TYPE_START_TAG and t.name is 'button'
1988                         if is_in_scope 'button', NS_HTML
1989                                 parse_error()
1990                                 generate_implied_end_tags()
1991                                 loop
1992                                         el = open_els.shift()
1993                                         if el.name is 'button' and el.namespace is NS_HTML
1994                                                 break
1995                         reconstruct_afe()
1996                         insert_html_element t
1997                         flag_frameset_ok = false
1998                         return
1999                 if t.type is TYPE_END_TAG and (t.name is 'address' or t.name is 'article' or t.name is 'aside' or t.name is 'blockquote' or t.name is 'button' or t.name is 'center' or t.name is 'details' or t.name is 'dialog' or t.name is 'dir' or t.name is 'div' or t.name is 'dl' or t.name is 'fieldset' or t.name is 'figcaption' or t.name is 'figure' or t.name is 'footer' or t.name is 'header' or t.name is 'hgroup' or t.name is 'listing' or t.name is 'main' or t.name is 'nav' or t.name is 'ol' or t.name is 'pre' or t.name is 'section' or t.name is 'summary' or t.name is 'ul')
2000                         unless is_in_scope t.name, NS_HTML
2001                                 parse_error()
2002                                 return
2003                         generate_implied_end_tags()
2004                         unless open_els[0].name is t.name and open_els[0].namespace is NS_HTML
2005                                 parse_error()
2006                         loop
2007                                 el = open_els.shift()
2008                                 if el.name is t.name and el.namespace is NS_HTML
2009                                         return
2010                         return
2011                 if t.type is TYPE_END_TAG and t.name is 'form'
2012                         unless template_tag_is_open()
2013                                 node = form_element_pointer
2014                                 form_element_pointer = null
2015                                 if node is null or not el_is_in_scope node
2016                                         parse_error()
2017                                         return
2018                                 generate_implied_end_tags()
2019                                 if open_els[0] isnt node
2020                                         parse_error()
2021                                 for el, i in open_els
2022                                         if el is node
2023                                                 open_els.splice i, 1
2024                                                 break
2025                         else
2026                                 unless is_in_scope 'form', NS_HTML
2027                                         parse_error()
2028                                         return
2029                                 generate_implied_end_tags()
2030                                 if open_els[0].name isnt 'form' or open_els[0].namespace isnt NS_HTML
2031                                         parse_error()
2032                                 loop
2033                                         el = open_els.shift()
2034                                         if el.name is 'form' and el.namespace is NS_HTML
2035                                                 break
2036                         return
2037                 if t.type is TYPE_END_TAG and t.name is 'p'
2038                         unless is_in_button_scope 'p', NS_HTML
2039                                 parse_error()
2040                                 insert_html_element new_open_tag 'p'
2041                         close_p_element()
2042                         return
2043                 if t.type is TYPE_END_TAG and t.name is 'li'
2044                         unless is_in_li_scope 'li', NS_HTML
2045                                 parse_error()
2046                                 return
2047                         generate_implied_end_tags 'li' # arg is exception
2048                         if open_els[0].name isnt 'li' or open_els[0].namespace isnt NS_HTML
2049                                 parse_error()
2050                         loop
2051                                 el = open_els.shift()
2052                                 if el.name is 'li' and el.namespace is NS_HTML
2053                                         break
2054                         return
2055                 if t.type is TYPE_END_TAG and (t.name is 'dd' or t.name is 'dt')
2056                         unless is_in_scope t.name, NS_HTML
2057                                 parse_error()
2058                                 return
2059                         generate_implied_end_tags t.name # arg is exception
2060                         if open_els[0].name isnt t.name or open_els[0].namespace isnt NS_HTML
2061                                 parse_error()
2062                         loop
2063                                 el = open_els.shift()
2064                                 if el.name is t.name and el.namespace is NS_HTML
2065                                         break
2066                         return
2067                 if t.type is TYPE_END_TAG and h_tags[t.name]?
2068                         h_in_scope = false
2069                         for el in open_els
2070                                 if h_tags[el.name] is el.namespace
2071                                         h_in_scope = true
2072                                         break
2073                                 if standard_scopers[el.name] is el.namespace
2074                                         break
2075                         unless h_in_scope
2076                                 parse_error()
2077                                 return
2078                         generate_implied_end_tags()
2079                         if open_els[0].name isnt t.name or open_els[0].namespace isnt NS_HTML
2080                                 parse_error()
2081                         loop
2082                                 el = open_els.shift()
2083                                 if h_tags[el.name] is el.namespace
2084                                         break
2085                         return
2086                 # deep breath!
2087                 if t.type is TYPE_START_TAG and t.name is 'a'
2088                         # If the list of active formatting elements contains an a element
2089                         # between the end of the list and the last marker on the list (or
2090                         # the start of the list if there is no marker on the list), then
2091                         # this is a parse error; run the adoption agency algorithm for the
2092                         # tag name "a", then remove that element from the list of active
2093                         # formatting elements and the stack of open elements if the
2094                         # adoption agency algorithm didn't already remove it (it might not
2095                         # have if the element is not in table scope).
2096                         found = false
2097                         for el in afe
2098                                 if el.type is TYPE_AFE_MARKER
2099                                         break
2100                                 if el.name is 'a' and el.namespace is NS_HTML
2101                                         found = el
2102                         if found?
2103                                 parse_error()
2104                                 adoption_agency 'a'
2105                                 for el, i in afe
2106                                         if el is found
2107                                                 afe.splice i, 1
2108                                 for el, i in open_els
2109                                         if el is found
2110                                                 open_els.splice i, 1
2111                         reconstruct_afe()
2112                         el = insert_html_element t
2113                         afe_push el
2114                         return
2115                 if t.type is TYPE_START_TAG and (t.name is 'b' or t.name is 'big' or t.name is 'code' or t.name is 'em' or t.name is 'font' or t.name is 'i' or t.name is 's' or t.name is 'small' or t.name is 'strike' or t.name is 'strong' or t.name is 'tt' or t.name is 'u')
2116                         reconstruct_afe()
2117                         el = insert_html_element t
2118                         afe_push el
2119                         return
2120                 if t.type is TYPE_START_TAG and t.name is 'nobr'
2121                         reconstruct_afe()
2122                         if is_in_scope 'nobr', NS_HTML
2123                                 parse_error()
2124                                 adoption_agency 'nobr'
2125                                 reconstruct_afe()
2126                         el = insert_html_element t
2127                         afe_push el
2128                         return
2129                 if t.type is TYPE_END_TAG and (t.name is 'a' or t.name is 'b' or t.name is 'big' or t.name is 'code' or t.name is 'em' or t.name is 'font' or t.name is 'i' or t.name is 'nobr' or t.name is 's' or t.name is 'small' or t.name is 'strike' or t.name is 'strong' or t.name is 'tt' or t.name is 'u')
2130                         adoption_agency t.name
2131                         return
2132                 if t.type is TYPE_START_TAG and (t.name is 'applet' or t.name is 'marquee' or t.name is 'object')
2133                         reconstruct_afe()
2134                         insert_html_element t
2135                         afe_push_marker()
2136                         flag_frameset_ok = false
2137                         return
2138                 if t.type is TYPE_END_TAG and (t.name is 'applet' or t.name is 'marquee' or t.name is 'object')
2139                         unless is_in_scope t.name, NS_HTML
2140                                 parse_error()
2141                                 return
2142                         generate_implied_end_tags()
2143                         if open_els[0].name isnt t.name or open_els[0].namespace isnt NS_HTML
2144                                 parse_error()
2145                         loop
2146                                 el = open_els.shift()
2147                                 if el.name is t.name and el.namespace is NS_HTML
2148                                         break
2149                         clear_afe_to_marker()
2150                         return
2151                 if t.type is TYPE_START_TAG and t.name is 'table'
2152                         unless doc.flag('quirks mode') is QUIRKS_YES
2153                                 close_p_if_in_button_scope() # test
2154                         insert_html_element t
2155                         flag_frameset_ok = false
2156                         ins_mode = ins_mode_in_table
2157                         return
2158                 if t.type is TYPE_END_TAG and t.name is 'br'
2159                         parse_error()
2160                         # W3C: t.type = TYPE_START_TAG
2161                         t = new_open_tag 'br' # WHATWG
2162                         # fall through
2163                 if t.type is TYPE_START_TAG and (t.name is 'area' or t.name is 'br' or t.name is 'embed' or t.name is 'img' or t.name is 'keygen' or t.name is 'wbr')
2164                         reconstruct_afe()
2165                         insert_html_element t
2166                         open_els.shift()
2167                         t.acknowledge_self_closing()
2168                         flag_frameset_ok = false
2169                         return
2170                 if t.type is TYPE_START_TAG and t.name is 'input'
2171                         reconstruct_afe()
2172                         insert_html_element t
2173                         open_els.shift()
2174                         t.acknowledge_self_closing()
2175                         unless is_input_hidden_tok t
2176                                 flag_frameset_ok = false
2177                         return
2178                 if t.type is TYPE_START_TAG and (t.name is 'menuitem' or t.name is 'param' or t.name is 'source' or t.name is 'track')
2179                         # WHATWG adds 'menuitem' for this block
2180                         insert_html_element t
2181                         open_els.shift()
2182                         t.acknowledge_self_closing()
2183                         return
2184                 if t.type is TYPE_START_TAG and t.name is 'hr'
2185                         close_p_if_in_button_scope()
2186                         insert_html_element t
2187                         open_els.shift()
2188                         t.acknowledge_self_closing()
2189                         flag_frameset_ok = false
2190                         return
2191                 if t.type is TYPE_START_TAG and t.name is 'image'
2192                         parse_error()
2193                         t.name = 'img'
2194                         process_token t
2195                         return
2196                 if t.type is TYPE_START_TAG and t.name is 'isindex'
2197                         parse_error()
2198                         if template_tag_is_open() is false and form_element_pointer isnt null
2199                                 return
2200                         t.acknowledge_self_closing()
2201                         flag_frameset_ok = false
2202                         close_p_if_in_button_scope()
2203                         el = insert_html_element new_open_tag 'form'
2204                         unless template_tag_is_open()
2205                                 form_element_pointer = el
2206                         for a in t.attrs_a
2207                                 if a[0] is 'action'
2208                                         el.attrs['action'] = a[1]
2209                                         break
2210                         insert_html_element new_open_tag 'hr'
2211                         open_els.shift()
2212                         reconstruct_afe()
2213                         insert_html_element new_open_tag 'label'
2214                         # note: this is a little out-of-spec-order so we only have to scan t.attrs_a once
2215                         input_el = new_open_tag 'input'
2216                         prompt = null
2217                         for a in t.attrs_a
2218                                 if a[0] is 'prompt'
2219                                         prompt = a[1]
2220                                 if a[0] isnt 'name' and a[0] isnt 'action' and a[0] isnt 'prompt'
2221                                         input_el.attrs_a.push [a[0], a[1]]
2222                         input_el.attrs_a.push ['name', 'isindex']
2223                         # fixfull this next bit is in english... internationalize?
2224                         prompt ?= "This is a searchable index. Enter search keywords: "
2225                         insert_character new_character_token prompt # fixfull split
2226                         # TODO submit typo "balue" in spec
2227                         insert_html_element input_el
2228                         open_els.shift()
2229                         # insert_character '' # you can put chars here if promt attr missing
2230                         open_els.shift()
2231                         insert_html_element new_open_tag 'hr'
2232                         open_els.shift()
2233                         open_els.shift()
2234                         unless template_tag_is_open()
2235                                 form_element_pointer = null
2236                         return
2237                 if t.type is TYPE_START_TAG and t.name is 'textarea'
2238                         insert_html_element t
2239                         eat_next_token_if_newline()
2240                         tok_state = tok_state_rcdata
2241                         original_ins_mode = ins_mode
2242                         flag_frameset_ok = false
2243                         ins_mode = ins_mode_text
2244                         return
2245                 if t.type is TYPE_START_TAG and t.name is 'xmp'
2246                         close_p_if_in_button_scope()
2247                         reconstruct_afe()
2248                         flag_frameset_ok = false
2249                         parse_generic_raw_text t
2250                         return
2251                 if t.type is TYPE_START_TAG and t.name is 'iframe'
2252                         flag_frameset_ok = false
2253                         parse_generic_raw_text t
2254                         return
2255                 if t.type is TYPE_START_TAG and (t.name is 'noembed' or (t.name is 'noscript' and flag_scripting))
2256                         parse_generic_raw_text t
2257                         return
2258                 if t.type is TYPE_START_TAG and t.name is 'select'
2259                         reconstruct_afe()
2260                         insert_html_element t
2261                         flag_frameset_ok = false
2262                         if ins_mode is ins_mode_in_table or ins_mode is ins_mode_in_caption or ins_mode is ins_mode_in_table_body or ins_mode is ins_mode_in_row or ins_mode is ins_mode_in_cell
2263                                 ins_mode = ins_mode_in_select_in_table
2264                         else
2265                                 ins_mode = ins_mode_in_select
2266                         return
2267                 if t.type is TYPE_START_TAG and (t.name is 'optgroup' or t.name is 'option')
2268                         if open_els[0].name is 'option' and open_els[0].namespace is NS_HTML
2269                                 open_els.shift()
2270                         reconstruct_afe()
2271                         insert_html_element t
2272                         return
2273 # this comment block implements the W3C spec
2274 #               if t.type is TYPE_START_TAG and (t.name is 'rb' or t.name is 'rp' or t.name is 'rtc')
2275 #                       if is_in_scope 'ruby', NS_HTML
2276 #                               generate_implied_end_tags()
2277 #                               unless open_els[0].name is 'ruby' and open_els[0].namespace is NS_HTML
2278 #                                       parse_error()
2279 #                       insert_html_element t
2280 #                       return
2281 #               if t.type is TYPE_START_TAG and t.name is 'rt'
2282 #                       if is_in_scope 'ruby', NS_HTML
2283 #                               generate_implied_end_tags 'rtc' # arg is exception
2284 #                               unless (open_els[0].name is 'ruby' or open_els[0].name is 'rtc') and open_els[0].namespace is NS_HTML
2285 #                                       parse_error()
2286 #                       insert_html_element t
2287 #                       return
2288 # below implements the WHATWG spec https://html.spec.whatwg.org/multipage/syntax.html#parsing-main-inbody
2289                 if t.type is TYPE_START_TAG and (t.name is 'rb' or t.name is 'rtc')
2290                         if is_in_scope 'ruby', NS_HTML
2291                                 generate_implied_end_tags()
2292                                 unless open_els[0].name is 'ruby' and open_els[0].namespace is NS_HTML
2293                                         parse_error()
2294                         insert_html_element t
2295                         return
2296                 if t.type is TYPE_START_TAG and (t.name is 'rp' or t.name is 'rt')
2297                         if is_in_scope 'ruby', NS_HTML
2298                                 generate_implied_end_tags 'rtc'
2299                                 unless (open_els[0].name is 'ruby' or open_els[0].name is 'rtc') and open_els[0].namespace is NS_HTML
2300                                         parse_error()
2301                         insert_html_element t
2302                         return
2303 # end WHATWG chunk
2304                 if t.type is TYPE_START_TAG and t.name is 'math'
2305                         reconstruct_afe()
2306                         adjust_mathml_attributes t
2307                         adjust_foreign_attributes t
2308                         insert_foreign_element t, NS_MATHML
2309                         if t.flag 'self-closing'
2310                                 open_els.shift()
2311                                 t.acknowledge_self_closing()
2312                         return
2313                 if t.type is TYPE_START_TAG and t.name is 'svg'
2314                         reconstruct_afe()
2315                         adjust_svg_attributes t
2316                         adjust_foreign_attributes t
2317                         insert_foreign_element t, NS_SVG
2318                         if t.flag 'self-closing'
2319                                 open_els.shift()
2320                                 t.acknowledge_self_closing()
2321                         return
2322                 if t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'frame' or t.name is 'head' or t.name is 'tbody' or t.name is 'td' or t.name is 'tfoot' or t.name is 'th' or t.name is 'thead' or t.name is 'tr')
2323                         parse_error()
2324                         return
2325                 if t.type is TYPE_START_TAG # any other start tag
2326                         reconstruct_afe()
2327                         insert_html_element t
2328                         return
2329                 if t.type is TYPE_END_TAG # any other end tag
2330                         in_body_any_other_end_tag t.name
2331                         return
2332                 return
2333
2334         # 8.2.5.4.8 http://www.w3.org/TR/html5/syntax.html#parsing-main-incdata
2335         ins_mode_text = (t) ->
2336                 if t.type is TYPE_TEXT
2337                         insert_character t
2338                         return
2339                 if t.type is TYPE_EOF
2340                         parse_error()
2341                         if open_els[0].name is 'script' and open_els[0].namespace is NS_HTML
2342                                 open_els[0].flag 'already started', true
2343                         open_els.shift()
2344                         ins_mode = original_ins_mode
2345                         process_token t
2346                         return
2347                 if t.type is TYPE_END_TAG and t.name is 'script'
2348                         open_els.shift()
2349                         ins_mode = original_ins_mode
2350                         # fixfull the spec seems to assume that I'm going to run the script
2351                         # http://www.w3.org/TR/html5/syntax.html#scriptEndTag
2352                         return
2353                 if t.type is TYPE_END_TAG
2354                         open_els.shift()
2355                         ins_mode = original_ins_mode
2356                         return
2357                 console.log 'warning: end of ins_mode_text reached'
2358
2359         # the functions below implement the tokenizer stats described here:
2360         # http://www.w3.org/TR/html5/syntax.html#tokenization
2361
2362         # 8.2.5.4.9 http://www.w3.org/TR/html5/syntax.html#parsing-main-intable
2363         ins_mode_in_table_else = (t) ->
2364                 parse_error()
2365                 flag_foster_parenting = true
2366                 ins_mode_in_body t
2367                 flag_foster_parenting = false
2368                 return
2369         ins_mode_in_table = (t) ->
2370                 switch t.type
2371                         when TYPE_TEXT
2372                                 if (open_els[0].name is 'table' or open_els[0].name is 'tbody' or open_els[0].name is 'tfoot' or open_els[0].name is 'thead' or open_els[0].name is 'tr') and open_els[0].namespace is NS_HTML
2373                                         pending_table_character_tokens = []
2374                                         original_ins_mode = ins_mode
2375                                         ins_mode = ins_mode_in_table_text
2376                                         process_token t
2377                                 else
2378                                         ins_mode_in_table_else t
2379                         when TYPE_COMMENT
2380                                 insert_comment t
2381                         when TYPE_DOCTYPE
2382                                 parse_error()
2383                         when TYPE_START_TAG
2384                                 switch t.name
2385                                         when 'caption'
2386                                                 clear_stack_to_table_context()
2387                                                 afe_push_marker()
2388                                                 insert_html_element t
2389                                                 ins_mode = ins_mode_in_caption
2390                                         when 'colgroup'
2391                                                 clear_stack_to_table_context()
2392                                                 insert_html_element t
2393                                                 ins_mode = ins_mode_in_column_group
2394                                         when 'col'
2395                                                 clear_stack_to_table_context()
2396                                                 insert_html_element new_open_tag 'colgroup'
2397                                                 ins_mode = ins_mode_in_column_group
2398                                                 process_token t
2399                                         when 'tbody', 'tfoot', 'thead'
2400                                                 clear_stack_to_table_context()
2401                                                 insert_html_element t
2402                                                 ins_mode = ins_mode_in_table_body
2403                                         when 'td', 'th', 'tr'
2404                                                 clear_stack_to_table_context()
2405                                                 insert_html_element new_open_tag 'tbody'
2406                                                 ins_mode = ins_mode_in_table_body
2407                                                 process_token t
2408                                         when 'table'
2409                                                 parse_error()
2410                                                 if is_in_table_scope 'table', NS_HTML
2411                                                         loop
2412                                                                 el = open_els.shift()
2413                                                                 if el.name is 'table' and el.namespace is NS_HTML
2414                                                                         break
2415                                                         reset_ins_mode()
2416                                                         process_token t
2417                                         when 'style', 'script', 'template'
2418                                                 ins_mode_in_head t
2419                                         when 'input'
2420                                                 unless is_input_hidden_tok t
2421                                                         ins_mode_in_table_else t
2422                                                 else
2423                                                         parse_error()
2424                                                         el = insert_html_element t
2425                                                         open_els.shift()
2426                                                         t.acknowledge_self_closing()
2427                                         when 'form'
2428                                                 parse_error()
2429                                                 if form_element_pointer?
2430                                                         return
2431                                                 if template_tag_is_open()
2432                                                         return
2433                                                 form_element_pointer = insert_html_element t
2434                                                 open_els.shift()
2435                                         else
2436                                                 ins_mode_in_table_else t
2437                         when TYPE_END_TAG
2438                                 switch t.name
2439                                         when 'table'
2440                                                 if is_in_table_scope 'table', NS_HTML
2441                                                         loop
2442                                                                 el = open_els.shift()
2443                                                                 if el.name is 'table' and el.namespace is NS_HTML
2444                                                                         break
2445                                                         reset_ins_mode()
2446                                                 else
2447                                                         parse_error()
2448                                         when 'body', 'caption', 'col', 'colgroup', 'html', 'tbody', 'td', 'tfoot', 'th', 'thead', 'tr'
2449                                                 parse_error()
2450                                         when 'template'
2451                                                 ins_mode_in_head t
2452                                         else
2453                                                 ins_mode_in_table_else t
2454                         when TYPE_EOF
2455                                 ins_mode_in_body t
2456                         else
2457                                 ins_mode_in_table_else t
2458
2459
2460         # 8.2.5.4.10 http://www.w3.org/TR/html5/syntax.html#parsing-main-intabletext
2461         ins_mode_in_table_text = (t) ->
2462                 if t.type is TYPE_TEXT and t.text is "\u0000"
2463                         # from javascript?
2464                         parse_error()
2465                         return
2466                 if t.type is TYPE_TEXT
2467                         pending_table_character_tokens.push t
2468                         return
2469                 # Anything else
2470                 all_space = true
2471                 for old in pending_table_character_tokens
2472                         unless is_space_tok old
2473                                 all_space = false
2474                                 break
2475                 if all_space
2476                         for old in pending_table_character_tokens
2477                                 insert_character old
2478                 else
2479                         for old in pending_table_character_tokens
2480                                 ins_mode_in_table_else old
2481                 pending_table_character_tokens = []
2482                 ins_mode = original_ins_mode
2483                 process_token t
2484
2485         # 8.2.5.4.11 http://www.w3.org/TR/html5/syntax.html#parsing-main-incaption
2486         ins_mode_in_caption = (t) ->
2487                 if t.type is TYPE_END_TAG and t.name is 'caption'
2488                         if is_in_table_scope 'caption', NS_HTML
2489                                 generate_implied_end_tags()
2490                                 if open_els[0].name isnt 'caption'
2491                                         parse_error()
2492                                 loop
2493                                         el = open_els.shift()
2494                                         if el.name is 'caption' and el.namespace is NS_HTML
2495                                                 break
2496                                 clear_afe_to_marker()
2497                                 ins_mode = ins_mode_in_table
2498                         else
2499                                 parse_error()
2500                                 # fragment case
2501                         return
2502                 if (t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'tbody' or t.name is 'td' or t.name is 'tfoot' or t.name is 'th' or t.name is 'thead' or t.name is 'tr')) or t.type is TYPE_END_TAG and t.name is 'table'
2503                         parse_error()
2504                         if is_in_table_scope 'caption', NS_HTML
2505                                 loop
2506                                         el = open_els.shift()
2507                                         if el.name is 'caption' and el.namespace is NS_HTML
2508                                                 break
2509                                 clear_afe_to_marker()
2510                                 ins_mode = ins_mode_in_table
2511                                 process_token t
2512                         # else fragment case
2513                         return
2514                 if t.type is TYPE_END_TAG and (t.name is 'body' or t.name is 'col' or t.name is 'colgroup' or t.name is 'html' or t.name is 'tbody' or t.name is 'td' or t.name is 'tfoot' or t.name is 'th' or t.name is 'thead' or t.name is 'tr')
2515                         parse_error()
2516                         return
2517                 # Anything else
2518                 ins_mode_in_body t
2519
2520         # 8.2.5.4.12 http://www.w3.org/TR/html5/syntax.html#parsing-main-incolgroup
2521         ins_mode_in_column_group = (t) ->
2522                 if is_space_tok t
2523                         insert_character t
2524                         return
2525                 if t.type is TYPE_COMMENT
2526                         insert_comment t
2527                         return
2528                 if t.type is TYPE_DOCTYPE
2529                         parse_error()
2530                         return
2531                 if t.type is TYPE_START_TAG and t.name is 'html'
2532                         ins_mode_in_body t
2533                         return
2534                 if t.type is TYPE_START_TAG and t.name is 'col'
2535                         el = insert_html_element t
2536                         open_els.shift()
2537                         t.acknowledge_self_closing()
2538                         return
2539                 if t.type is TYPE_END_TAG and t.name is 'colgroup'
2540                         if open_els[0].name is 'colgroup' and open_els.namespace is NS_HTML
2541                                 open_els.shift()
2542                                 ins_mode = ins_mode_in_table
2543                         else
2544                                 parse_error()
2545                         return
2546                 if t.type is TYPE_END_TAG and t.name is 'col'
2547                         parse_error()
2548                         return
2549                 if (t.type is TYPE_START_TAG or t.type is TYPE_END_TAG) and t.name is 'template'
2550                         ins_mode_in_head t
2551                         return
2552                 if t.type is TYPE_EOF
2553                         ins_mode_in_body t
2554                         return
2555                 # Anything else
2556                 if open_els[0].name isnt 'colgroup'
2557                         parse_error()
2558                         return
2559                 open_els.shift()
2560                 ins_mode = ins_mode_in_table
2561                 process_token t
2562                 return
2563
2564         # 8.2.5.4.13 http://www.w3.org/TR/html5/syntax.html#parsing-main-intbody
2565         ins_mode_in_table_body = (t) ->
2566                 if t.type is TYPE_START_TAG and t.name is 'tr'
2567                         clear_stack_to_table_body_context()
2568                         insert_html_element t
2569                         ins_mode = ins_mode_in_row
2570                         return
2571                 if t.type is TYPE_START_TAG and (t.name is 'th' or t.name is 'td')
2572                         parse_error()
2573                         clear_stack_to_table_body_context()
2574                         insert_html_element new_open_tag 'tr'
2575                         ins_mode = ins_mode_in_row
2576                         process_token t
2577                         return
2578                 if t.type is TYPE_END_TAG and (t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead')
2579                         unless is_in_table_scope t.name, NS_HTML
2580                                 parse_error()
2581                                 return
2582                         clear_stack_to_table_body_context()
2583                         open_els.shift()
2584                         ins_mode = ins_mode_in_table
2585                         return
2586                 if (t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead')) or (t.type is TYPE_END_TAG and t.name is 'table')
2587                         has = false
2588                         for el in open_els
2589                                 if el.namespace is NS_HTML and (el.name is 'tbody' or el.name is 'tfoot' or el.name is 'thead')
2590                                         has = true
2591                                         break
2592                                 if table_scopers[el.name] is el.namespace
2593                                         break
2594                         if !has
2595                                 parse_error()
2596                                 return
2597                         clear_stack_to_table_body_context()
2598                         open_els.shift()
2599                         ins_mode = ins_mode_in_table
2600                         process_token t
2601                         return
2602                 if t.type is TYPE_END_TAG and (t.name is 'body' or t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'html' or t.name is 'td' or t.name is 'th' or t.name is 'tr')
2603                         parse_error()
2604                         return
2605                 # Anything else
2606                 ins_mode_in_table t
2607
2608         # 8.2.5.4.14 http://www.w3.org/TR/html5/syntax.html#parsing-main-intr
2609         ins_mode_in_row = (t) ->
2610                 if t.type is TYPE_START_TAG and (t.name is 'th' or t.name is 'td')
2611                         clear_stack_to_table_row_context()
2612                         insert_html_element t
2613                         ins_mode = ins_mode_in_cell
2614                         afe_push_marker()
2615                         return
2616                 if t.type is TYPE_END_TAG and t.name is 'tr'
2617                         if is_in_table_scope 'tr', NS_HTML
2618                                 clear_stack_to_table_row_context()
2619                                 open_els.shift()
2620                                 ins_mode = ins_mode_in_table_body
2621                         else
2622                                 parse_error()
2623                         return
2624                 if (t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead' or t.name is 'tr')) or t.type is TYPE_END_TAG and t.name is 'table'
2625                         if is_in_table_scope 'tr', NS_HTML
2626                                 clear_stack_to_table_row_context()
2627                                 open_els.shift()
2628                                 ins_mode = ins_mode_in_table_body
2629                                 process_token t
2630                         else
2631                                 parse_error()
2632                         return
2633                 if t.type is TYPE_END_TAG and (t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead')
2634                         if is_in_table_scope t.name, NS_HTML
2635                                 if is_in_table_scope 'tr', NS_HTML
2636                                         clear_stack_to_table_row_context()
2637                                         open_els.shift()
2638                                         ins_mode = ins_mode_in_table_body
2639                                         process_token t
2640                         else
2641                                 parse_error()
2642                         return
2643                 if t.type is TYPE_END_TAG and (t.name is 'body' or t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'html' or t.name is 'td' or t.name is 'th')
2644                         parse_error()
2645                         return
2646                 # Anything else
2647                 ins_mode_in_table t
2648
2649         # http://www.w3.org/TR/html5/syntax.html#close-the-cell
2650         close_the_cell = ->
2651                 generate_implied_end_tags()
2652                 unless (open_els[0].name is 'td' or open_els[0] is 'th') and open_els[0].namespace is NS_HTML
2653                         parse_error()
2654                 loop
2655                         el = open_els.shift()
2656                         if el.namespace is NS_HTML and (el.name is 'td' or el.name is 'th')
2657                                 break
2658                 clear_afe_to_marker()
2659                 ins_mode = ins_mode_in_row
2660
2661         # 8.2.5.4.15 http://www.w3.org/TR/html5/syntax.html#parsing-main-intd
2662         ins_mode_in_cell = (t) ->
2663                 if t.type is TYPE_END_TAG and (t.name is 'td' or t.name is 'th')
2664                         if is_in_table_scope t.name, NS_HTML
2665                                 generate_implied_end_tags()
2666                                 unless (open_els[0].name is t.name) and open_els[0].namespace is NS_HTML
2667                                         parse_error()
2668                                 loop
2669                                         el = open_els.shift()
2670                                         if el.name is t.name and el.namespace is NS_HTML
2671                                                 break
2672                                 clear_afe_to_marker()
2673                                 ins_mode = ins_mode_in_row
2674                         else
2675                                 parse_error()
2676                         return
2677                 if t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'tbody' or t.name is 'td' or t.name is 'tfoot' or t.name is 'th' or t.name is 'thead' or t.name is 'tr')
2678                         has = false
2679                         for el in open_els
2680                                 if el.namespace is NS_HTML and (el.name is 'td' or el.name is 'th')
2681                                         has = true
2682                                         break
2683                                 if table_scopers[el.name] is el.namespace
2684                                         break
2685                         if !has
2686                                 parse_error()
2687                                 return
2688                         close_the_cell()
2689                         process_token t
2690                         return
2691                 if t.type is TYPE_END_TAG and (t.name is 'body' or t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'html')
2692                         parse_error()
2693                         return
2694                 if t.type is TYPE_END_TAG and (t.name is 'table' or t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead' or t.name is 'tr')
2695                         if is_in_table_scope t.name, NS_HTML
2696                                 close_the_cell()
2697                                 process_token t
2698                         else
2699                                 parse_error()
2700                         return
2701                 # Anything Else
2702                 ins_mode_in_body t
2703
2704         # 8.2.5.4.16 http://www.w3.org/TR/html5/syntax.html#parsing-main-inselect
2705         ins_mode_in_select = (t) ->
2706                 if t.type is TYPE_TEXT and t.text is "\u0000"
2707                         parse_error()
2708                         return
2709                 if t.type is TYPE_TEXT
2710                         insert_character t
2711                         return
2712                 if t.type is TYPE_COMMENT
2713                         insert_comment t
2714                         return
2715                 if t.type is TYPE_DOCTYPE
2716                         parse_error()
2717                         return
2718                 if t.type is TYPE_START_TAG and t.name is 'html'
2719                         ins_mode_in_body t
2720                         return
2721                 if t.type is TYPE_START_TAG and t.name is 'option'
2722                         if open_els[0].name is 'option' and open_els[0].namespace is NS_HTML
2723                                 open_els.shift()
2724                         insert_html_element t
2725                         return
2726                 if t.type is TYPE_START_TAG and t.name is 'optgroup'
2727                         if open_els[0].name is 'option' and open_els[0].namespace is NS_HTML
2728                                 open_els.shift()
2729                         if open_els[0].name is 'optgroup' and open_els[0].namespace is NS_HTML
2730                                 open_els.shift()
2731                         insert_html_element t
2732                         return
2733                 if t.type is TYPE_END_TAG and t.name is 'optgroup'
2734                         if open_els[0].name is 'option' and open_els[0].namespace is NS_HTML
2735                                 if open_els[1].name is 'optgroup' and open_els[0].namespace is NS_HTML
2736                                         open_els.shift()
2737                         if open_els[0].name is 'optgroup' and open_els[0].namespace is NS_HTML
2738                                 open_els.shift()
2739                         else
2740                                 parse_error()
2741                         return
2742                 if t.type is TYPE_END_TAG and t.name is 'option'
2743                         if open_els[0].name is 'option' and open_els[0].namespace is NS_HTML
2744                                 open_els.shift()
2745                         else
2746                                 parse_error()
2747                         return
2748                 if t.type is TYPE_END_TAG and t.name is 'select'
2749                         if is_in_select_scope 'select', NS_HTML
2750                                 loop
2751                                         el = open_els.shift()
2752                                         if el.name is 'select' and el.namespace is NS_HTML
2753                                                 break
2754                                 reset_ins_mode()
2755                         else
2756                                 parse_error()
2757                         return
2758                 if t.type is TYPE_START_TAG and t.name is 'select'
2759                         parse_error()
2760                         loop
2761                                 el = open_els.shift()
2762                                 if el.name is 'select' and el.namespace is NS_HTML
2763                                         break
2764                         reset_ins_mode()
2765                         # spec says that this is the same as </select> but it doesn't say
2766                         # to check scope first
2767                         return
2768                 if t.type is TYPE_START_TAG and (t.name is 'input' or t.name is 'keygen' or t.name is 'textarea')
2769                         parse_error()
2770                         unless is_in_select_scope 'select', NS_HTML
2771                                 return
2772                         loop
2773                                 el = open_els.shift()
2774                                 if el.name is 'select' and el.namespace is NS_HTML
2775                                         break
2776                         reset_ins_mode()
2777                         process_token t
2778                         return
2779                 if t.type is TYPE_START_TAG and (t.name is 'script' or t.name is 'template')
2780                         ins_mode_in_head t
2781                         return
2782                 if t.type is TYPE_EOF
2783                         ins_mode_in_body t
2784                         return
2785                 # Anything else
2786                 parse_error()
2787                 return
2788
2789         # 8.2.5.4.17 http://www.w3.org/TR/html5/syntax.html#parsing-main-inselectintable
2790         ins_mode_in_select_in_table = (t) ->
2791                 if t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'table' or t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead' or t.name is 'tr' or t.name is 'td' or t.name is 'th')
2792                         parse_error()
2793                         loop
2794                                 el = open_els.shift()
2795                                 if el.name is 'select' and el.namespace is NS_HTML
2796                                         break
2797                         reset_ins_mode()
2798                         process_token t
2799                         return
2800                 if t.type is TYPE_END_TAG and (t.name is 'caption' or t.name is 'table' or t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead' or t.name is 'tr' or t.name is 'td' or t.name is 'th')
2801                         parse_error()
2802                         unless is_in_table_scope t.name, NS_HTML
2803                                 return
2804                         loop
2805                                 el = open_els.shift()
2806                                 if el.name is 'select' and el.namespace is NS_HTML
2807                                         break
2808                         reset_ins_mode()
2809                         process_token t
2810                         return
2811                 # Anything else
2812                 ins_mode_in_select t
2813                 return
2814
2815         # 8.2.5.4.18 http://www.w3.org/TR/html5/syntax.html#parsing-main-intemplate
2816         ins_mode_in_template = (t) ->
2817                 if t.type is TYPE_TEXT or t.type is TYPE_COMMENT or t.type is TYPE_DOCTYPE
2818                         ins_mode_in_body t
2819                         return
2820                 if (t.type is TYPE_START_TAG and (t.name is 'base' or t.name is 'basefont' or t.name is 'bgsound' or t.name is 'link' or t.name is 'meta' or t.name is 'noframes' or t.name is 'script' or t.name is 'style' or t.name is 'template' or t.name is 'title')) or (t.type is TYPE_END_TAG and t.name is 'template')
2821                         ins_mode_in_head t
2822                         return
2823                 if t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'colgroup' or t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead')
2824                         template_ins_modes.shift()
2825                         template_ins_modes.unshift ins_mode_in_table
2826                         ins_mode = ins_mode_in_table
2827                         process_token t
2828                         return
2829                 if t.type is TYPE_START_TAG and t.name is 'col'
2830                         template_ins_modes.shift()
2831                         template_ins_modes.unshift ins_mode_in_column_group
2832                         ins_mode = ins_mode_in_column_group
2833                         process_token t
2834                         return
2835                 if t.type is TYPE_START_TAG and t.name is 'tr'
2836                         template_ins_modes.shift()
2837                         template_ins_modes.unshift ins_mode_in_table_body
2838                         ins_mode = ins_mode_in_table_body
2839                         process_token t
2840                         return
2841                 if t.type is TYPE_START_TAG and (t.name is 'td' or t.name is 'th')
2842                         template_ins_modes.shift()
2843                         template_ins_modes.unshift ins_mode_in_row
2844                         ins_mode = ins_mode_in_row
2845                         process_token t
2846                         return
2847                 if t.type is TYPE_START_TAG
2848                         template_ins_modes.shift()
2849                         template_ins_modes.unshift ins_mode_in_body
2850                         ins_mode = ins_mode_in_body
2851                         process_token t
2852                         return
2853                 if t.type is TYPE_END_TAG
2854                         parse_error()
2855                         return
2856                 if t.type is TYPE_EOF
2857                         unless template_tag_is_open()
2858                                 stop_parsing()
2859                                 return
2860                         parse_error()
2861                         loop
2862                                 el = open_els.shift()
2863                                 if el.name is 'template' and el.namespace is NS_HTML
2864                                         break
2865                         clear_afe_to_marker()
2866                         template_ins_modes.shift()
2867                         reset_ins_mode()
2868                         process_token t
2869
2870         # 8.2.5.4.19 http://www.w3.org/TR/html5/syntax.html#parsing-main-afterbody
2871         ins_mode_after_body = (t) ->
2872                 if is_space_tok t
2873                         ins_mode_in_body t
2874                         return
2875                 if t.type is TYPE_COMMENT
2876                         first = open_els[open_els.length - 1]
2877                         insert_comment t, [first, first.children.length]
2878                         return
2879                 if t.type is TYPE_DOCTYPE
2880                         parse_error()
2881                         return
2882                 if t.type is TYPE_START_TAG and t.name is 'html'
2883                         ins_mode_in_body t
2884                         return
2885                 if t.type is TYPE_END_TAG and t.name is 'html'
2886                         if flag_fragment_parsing
2887                                 parse_error()
2888                                 return
2889                         ins_mode = ins_mode_after_after_body
2890                         return
2891                 if t.type is TYPE_EOF
2892                         stop_parsing()
2893                         return
2894                 # Anything ELse
2895                 parse_error()
2896                 ins_mode = ins_mode_in_body
2897                 process_token t
2898
2899         # 8.2.5.4.20 http://www.w3.org/TR/html5/syntax.html#parsing-main-inframeset
2900         ins_mode_in_frameset = (t) ->
2901                 if is_space_tok t
2902                         insert_character t
2903                         return
2904                 if t.type is TYPE_COMMENT
2905                         insert_comment t
2906                         return
2907                 if t.type is TYPE_DOCTYPE
2908                         parse_error()
2909                         return
2910                 if t.type is TYPE_START_TAG and t.name is 'html'
2911                         ins_mode_in_body t
2912                         return
2913                 if t.type is TYPE_START_TAG and t.name is 'frameset'
2914                         insert_html_element t
2915                         return
2916                 if t.type is TYPE_END_TAG and t.name is 'frameset'
2917                         if open_els.length is 1
2918                                 parse_error()
2919                                 return # fragment case
2920                         open_els.shift()
2921                         if flag_fragment_parsing is false and open_els[0].name isnt 'frameset'
2922                                 ins_mode = ins_mode_after_frameset
2923                         return
2924                 if t.type is TYPE_START_TAG and t.name is 'frame'
2925                         insert_html_element t
2926                         open_els.shift()
2927                         t.acknowledge_self_closing()
2928                         return
2929                 if t.type is TYPE_START_TAG and t.name is 'noframes'
2930                         ins_mode_in_head t
2931                         return
2932                 if t.type is TYPE_EOF
2933                         if open_els.length isnt 1
2934                                 parse_error()
2935                         stop_parsing()
2936                         return
2937                 # Anything else
2938                 parse_error()
2939                 return
2940
2941         # 8.2.5.4.21 http://www.w3.org/TR/html5/syntax.html#parsing-main-afterframeset
2942         ins_mode_after_frameset = (t) ->
2943                 if is_space_tok t
2944                         insert_character t
2945                         return
2946                 if t.type is TYPE_COMMENT
2947                         insert_comment t
2948                         return
2949                 if t.type is TYPE_DOCTYPE
2950                         parse_error()
2951                         return
2952                 if t.type is TYPE_START_TAG and t.name is 'html'
2953                         ins_mode_in_body t
2954                         return
2955                 if t.type is TYPE_END_TAG and t.name is 'html'
2956                         ins_mode = ins_mode_after_after_frameset
2957                         return
2958                 if t.type is TYPE_START_TAG and t.name is 'noframes'
2959                         ins_mode_in_head t
2960                         return
2961                 if t.type is TYPE_EOF
2962                         stop_parsing()
2963                         return
2964                 # Anything else
2965                 parse_error()
2966                 return
2967
2968         # 8.2.5.4.22 http://www.w3.org/TR/html5/syntax.html#the-after-after-body-insertion-mode
2969         ins_mode_after_after_body = (t) ->
2970                 if t.type is TYPE_COMMENT
2971                         insert_comment t, [doc, doc.children.length]
2972                         return
2973                 if t.type is TYPE_DOCTYPE or is_space_tok(t) or (t.type is TYPE_START_TAG and t.name is 'html')
2974                         ins_mode_in_body t
2975                         return
2976                 if t.type is TYPE_EOF
2977                         stop_parsing()
2978                         return
2979                 # Anything else
2980                 parse_error()
2981                 ins_mode = ins_mode_in_body
2982                 process_token t
2983                 return
2984
2985         # 8.2.5.4.23 http://www.w3.org/TR/html5/syntax.html#the-after-after-frameset-insertion-mode
2986         ins_mode_after_after_frameset = (t) ->
2987                 if t.type is TYPE_COMMENT
2988                         insert_comment t, [doc, doc.children.length]
2989                         return
2990                 if t.type is TYPE_DOCTYPE or is_space_tok(t) or (t.type is TYPE_START_TAG and t.name is 'html')
2991                         ins_mode_in_body t
2992                         return
2993                 if t.type is TYPE_EOF
2994                         stop_parsing()
2995                         return
2996                 if t.type is TYPE_START_TAG and t.name is 'noframes'
2997                         ins_mode_in_head t
2998                         return
2999                 # Anything else
3000                 parse_error()
3001                 return
3002
3003         # 8.2.5.5 http://www.w3.org/TR/html5/syntax.html#parsing-main-inforeign
3004         has_color_face_or_size = (t) ->
3005                 for a in t.attrs_a
3006                         if a[0] is 'color' or a[0] is 'face' or a[0] is 'size'
3007                                 return true
3008                 return false
3009         in_foreign_content_end_script = ->
3010                 open_els.shift()
3011                 # fixfull
3012                 return
3013         in_foreign_content_other_start = (t) ->
3014                 acn = adjusted_current_node()
3015                 if acn.namespace is NS_MATHML
3016                         adjust_mathml_attributes t
3017                 if acn.namespace is NS_SVG and svg_name_fixes[t.name]?
3018                         t.name = svg_name_fixes[t.name]
3019                 if acn.namespace is NS_SVG
3020                         adjust_svg_attributes t
3021                 adjust_foreign_attributes t
3022                 insert_foreign_element t, acn.namespace
3023                 if t.flag 'self-closing'
3024                         if t.name is 'script'
3025                                 t.acknowledge_self_closing()
3026                                 in_foreign_content_end_script()
3027                                 # fixfull
3028                         else
3029                                 open_els.shift()
3030                                 t.acknowledge_self_closing()
3031                 return
3032         in_foreign_content = (t) ->
3033                 if t.type is TYPE_TEXT and t.text is "\u0000"
3034                         parse_error()
3035                         insert_character new_character_token "\ufffd"
3036                         return
3037                 if is_space_tok t
3038                         insert_character t
3039                         return
3040                 if t.type is TYPE_TEXT
3041                         flag_frameset_ok = false
3042                         insert_character t
3043                         return
3044                 if t.type is TYPE_COMMENT
3045                         insert_comment t
3046                         return
3047                 if t.type is TYPE_DOCTYPE
3048                         parse_error()
3049                         return
3050                 if t.type is TYPE_START_TAG and (t.name is 'b' or t.name is 'big' or t.name is 'blockquote' or t.name is 'body' or t.name is 'br' or t.name is 'center' or t.name is 'code' or t.name is 'dd' or t.name is 'div' or t.name is 'dl' or t.name is 'dt' or t.name is 'em' or t.name is 'embed' or t.name is 'h1' or t.name is 'h2' or t.name is 'h3' or t.name is 'h4' or t.name is 'h5' or t.name is 'h6' or t.name is 'head' or t.name is 'hr' or t.name is 'i' or t.name is 'img' or t.name is 'li' or t.name is 'listing' or t.name is 'main' or t.name is 'meta' or t.name is 'nobr' or t.name is 'ol' or t.name is 'p' or t.name is 'pre' or t.name is 'ruby' or t.name is 's' or t.name is 'small' or t.name is 'span' or t.name is 'strong' or t.name is 'strike' or t.name is 'sub' or t.name is 'sup' or t.name is 'table' or t.name is 'tt' or t.name is 'u' or t.name is 'ul' or t.name is 'var' or (t.name is 'font' and has_color_face_or_size(t)))
3051                         parse_error()
3052                         if flag_fragment_parsing
3053                                 in_foreign_content_other_start t
3054                                 return
3055                         loop # is this safe?
3056                                 open_els.shift()
3057                                 if is_mathml_text_integration_point(open_els[0]) or is_html_integration(open_els[0]) or open_els[0].namespace is NS_HTML
3058                                         break
3059                         process_token t
3060                         return
3061                 if t.type is TYPE_START_TAG
3062                         in_foreign_content_other_start t
3063                         return
3064                 if t.type is TYPE_END_TAG and t.name is 'script' and open_els[0].name is 'script' and open_els[0].namespace is NS_SVG
3065                         in_foreign_content_end_script()
3066                         return
3067                 if t.type is TYPE_END_TAG
3068                         i = 0
3069                         node = open_els[i]
3070                         if node.name.toLowerCase() isnt t.name
3071                                 parse_error()
3072                         loop
3073                                 if node is open_els[open_els.length - 1]
3074                                         return
3075                                 if node.name.toLowerCase() is t.name
3076                                         loop
3077                                                 el = open_els.shift()
3078                                                 if el is node
3079                                                         return
3080                                 i += 1
3081                                 node = open_els[i]
3082                                 if node.namespace is NS_HTML
3083                                         break
3084                         ins_mode t # explicitly call HTML insertion mode
3085
3086
3087         # 8.2.4.1 http://www.w3.org/TR/html5/syntax.html#data-state
3088         tok_state_data = ->
3089                 switch c = txt.charAt(cur++)
3090                         when '&'
3091                                 return new_text_node parse_character_reference()
3092                         when '<'
3093                                 tok_state = tok_state_tag_open
3094                         when "\u0000"
3095                                 parse_error()
3096                                 return new_text_node c
3097                         when '' # EOF
3098                                 return new_eof_token()
3099                         else
3100                                 return new_text_node c
3101                 return null
3102
3103         # 8.2.4.2 http://www.w3.org/TR/html5/syntax.html#character-reference-in-data-state
3104         # not needed: tok_state_character_reference_in_data = ->
3105         # just call parse_character_reference()
3106
3107         # 8.2.4.3 http://www.w3.org/TR/html5/syntax.html#rcdata-state
3108         tok_state_rcdata = ->
3109                 switch c = txt.charAt(cur++)
3110                         when '&'
3111                                 return new_text_node parse_character_reference()
3112                         when '<'
3113                                 tok_state = tok_state_rcdata_less_than_sign
3114                         when "\u0000"
3115                                 parse_error()
3116                                 return new_character_token "\ufffd"
3117                         when '' # EOF
3118                                 return new_eof_token()
3119                         else
3120                                 return new_character_token c
3121                 return null
3122
3123         # 8.2.4.4 http://www.w3.org/TR/html5/syntax.html#character-reference-in-rcdata-state
3124         # not needed: tok_state_character_reference_in_rcdata = ->
3125         # just call parse_character_reference()
3126
3127         # 8.2.4.5 http://www.w3.org/TR/html5/syntax.html#rawtext-state
3128         tok_state_rawtext = ->
3129                 switch c = txt.charAt(cur++)
3130                         when '<'
3131                                 tok_state = tok_state_rawtext_less_than_sign
3132                         when "\u0000"
3133                                 parse_error()
3134                                 return new_character_token "\ufffd"
3135                         when '' # EOF
3136                                 return new_eof_token()
3137                         else
3138                                 return new_character_token c
3139                 return null
3140
3141         # 8.2.4.6 http://www.w3.org/TR/html5/syntax.html#script-data-state
3142         tok_state_script_data = ->
3143                 switch c = txt.charAt(cur++)
3144                         when '<'
3145                                 tok_state = tok_state_script_data_less_than_sign
3146                         when "\u0000"
3147                                 parse_error()
3148                                 return new_character_token "\ufffd"
3149                         when '' # EOF
3150                                 return new_eof_token()
3151                         else
3152                                 return new_character_token c
3153                 return null
3154
3155         # 8.2.4.7 http://www.w3.org/TR/html5/syntax.html#plaintext-state
3156         tok_state_plaintext = ->
3157                 switch c = txt.charAt(cur++)
3158                         when "\u0000"
3159                                 parse_error()
3160                                 return new_character_token "\ufffd"
3161                         when '' # EOF
3162                                 return new_eof_token()
3163                         else
3164                                 return new_character_token c
3165                 return null
3166
3167
3168         # 8.2.4.8 http://www.w3.org/TR/html5/syntax.html#tag-open-state
3169         tok_state_tag_open = ->
3170                 c = txt.charAt(cur++)
3171                 if c is '!'
3172                         tok_state = tok_state_markup_declaration_open
3173                         return
3174                 if c is '/'
3175                         tok_state = tok_state_end_tag_open
3176                         return
3177                 if is_uc_alpha(c)
3178                         tok_cur_tag = new_open_tag c.toLowerCase()
3179                         tok_state = tok_state_tag_name
3180                         return
3181                 if is_lc_alpha(c)
3182                         tok_cur_tag = new_open_tag c
3183                         tok_state = tok_state_tag_name
3184                         return
3185                 if c is '?'
3186                         parse_error()
3187                         tok_cur_tag = new_comment_token '?' # FIXME right?
3188                         tok_state = tok_state_bogus_comment
3189                         return
3190                 # Anything else
3191                 parse_error()
3192                 tok_state = tok_state_data
3193                 cur -= 1 # we didn't parse/handle the char after <
3194                 return new_text_node '<'
3195
3196         # 8.2.4.9 http://www.w3.org/TR/html5/syntax.html#end-tag-open-state
3197         tok_state_end_tag_open = ->
3198                 c = txt.charAt(cur++)
3199                 if is_uc_alpha(c)
3200                         tok_cur_tag = new_end_tag c.toLowerCase()
3201                         tok_state = tok_state_tag_name
3202                         return
3203                 if is_lc_alpha(c)
3204                         tok_cur_tag = new_end_tag c
3205                         tok_state = tok_state_tag_name
3206                         return
3207                 if c is '>'
3208                         parse_error()
3209                         tok_state = tok_state_data
3210                         return
3211                 if c is '' # EOF
3212                         parse_error()
3213                         tok_state = tok_state_data
3214                         return new_text_node '</'
3215                 # Anything else
3216                 parse_error()
3217                 tok_cur_tag = new_comment_token c
3218                 tok_state = tok_state_bogus_comment
3219                 return null
3220
3221         # 8.2.4.10 http://www.w3.org/TR/html5/syntax.html#tag-name-state
3222         tok_state_tag_name = ->
3223                 switch c = txt.charAt(cur++)
3224                         when "\t", "\n", "\u000c", ' '
3225                                 tok_state = tok_state_before_attribute_name
3226                         when '/'
3227                                 tok_state = tok_state_self_closing_start_tag
3228                         when '>'
3229                                 tok_state = tok_state_data
3230                                 tmp = tok_cur_tag
3231                                 tok_cur_tag = null
3232                                 return tmp
3233                         when "\u0000"
3234                                 parse_error()
3235                                 tok_cur_tag.name += "\ufffd"
3236                         when '' # EOF
3237                                 parse_error()
3238                                 tok_state = tok_state_data
3239                         else
3240                                 if is_uc_alpha(c)
3241                                         tok_cur_tag.name += c.toLowerCase()
3242                                 else
3243                                         tok_cur_tag.name += c
3244                 return null
3245
3246         # 8.2.4.11 http://www.w3.org/TR/html5/syntax.html#rcdata-less-than-sign-state
3247         tok_state_rcdata_less_than_sign = ->
3248                 c = txt.charAt(cur++)
3249                 if c is '/'
3250                         temporary_buffer = ''
3251                         tok_state = tok_state_rcdata_end_tag_open
3252                         return null
3253                 # Anything else
3254                 tok_state = tok_state_rcdata
3255                 cur -= 1 # reconsume the input character
3256                 return new_character_token '<'
3257
3258         # 8.2.4.12 http://www.w3.org/TR/html5/syntax.html#rcdata-end-tag-open-state
3259         tok_state_rcdata_end_tag_open = ->
3260                 c = txt.charAt(cur++)
3261                 if is_uc_alpha(c)
3262                         tok_cur_tag = new_end_tag c.toLowerCase()
3263                         temporary_buffer += c
3264                         tok_state = tok_state_rcdata_end_tag_name
3265                         return null
3266                 if is_lc_alpha(c)
3267                         tok_cur_tag = new_end_tag c
3268                         temporary_buffer += c
3269                         tok_state = tok_state_rcdata_end_tag_name
3270                         return null
3271                 # Anything else
3272                 tok_state = tok_state_rcdata
3273                 cur -= 1 # reconsume the input character
3274                 return new_character_token "</" # fixfull separate these
3275
3276         # http://www.w3.org/TR/html5/syntax.html#appropriate-end-tag-token
3277         is_appropriate_end_tag = (t) ->
3278                 # spec says to check against "the tag name of the last start tag to
3279                 # have been emitted from this tokenizer", but this is only called from
3280                 # the various "raw" states, so it's hopefully ok to assume that
3281                 # open_els[0].name will work instead TODO: verify this after the script
3282                 # data states are implemented
3283                 debug_log "#{t.type}, #{t.name} open_els: #{serialize_els open_els, true, true}"
3284                 return t.type is TYPE_END_TAG and t.name is open_els[0].name
3285
3286         # 8.2.4.13 http://www.w3.org/TR/html5/syntax.html#rcdata-end-tag-name-state
3287         tok_state_rcdata_end_tag_name = ->
3288                 c = txt.charAt(cur++)
3289                 if c is "\t" or c is "\n" or c is "\u000c" or c is ' '
3290                         if is_appropriate_end_tag tok_cur_tag
3291                                 tok_state = tok_state_before_attribute_name
3292                                 return
3293                         # else fall through to "Anything else"
3294                 if c is '/'
3295                         if is_appropriate_end_tag tok_cur_tag
3296                                 tok_state = tok_state_self_closing_start_tag # FIXME spec typo?
3297                                 return
3298                         # else fall through to "Anything else"
3299                 if c is '>'
3300                         if is_appropriate_end_tag tok_cur_tag
3301                                 tok_state = tok_state_data
3302                                 return tok_cur_tag
3303                         # else fall through to "Anything else"
3304                 if is_uc_alpha(c)
3305                         tok_cur_tag.name += c.toLowerCase()
3306                         temporary_buffer += c
3307                         return null
3308                 if is_lc_alpha(c)
3309                         tok_cur_tag.name += c
3310                         temporary_buffer += c
3311                         return null
3312                 # Anything else
3313                 tok_state = tok_state_rcdata
3314                 cur -= 1 # reconsume the input character
3315                 return new_character_token '</' + temporary_buffer # fixfull separate these
3316
3317         # 8.2.4.14 http://www.w3.org/TR/html5/syntax.html#rawtext-less-than-sign-state
3318         tok_state_rawtext_less_than_sign = ->
3319                 c = txt.charAt(cur++)
3320                 if c is '/'
3321                         temporary_buffer = ''
3322                         tok_state = tok_state_rawtext_end_tag_open
3323                         return null
3324                 # Anything else
3325                 tok_state = tok_state_rawtext
3326                 cur -= 1 # reconsume the input character
3327                 return new_character_token '<'
3328
3329         # 8.2.4.15 http://www.w3.org/TR/html5/syntax.html#rawtext-end-tag-open-state
3330         tok_state_rawtext_end_tag_open = ->
3331                 c = txt.charAt(cur++)
3332                 if is_uc_alpha(c)
3333                         tok_cur_tag = new_end_tag c.toLowerCase()
3334                         temporary_buffer += c
3335                         tok_state = tok_state_rawtext_end_tag_name
3336                         return null
3337                 if is_lc_alpha(c)
3338                         tok_cur_tag = new_end_tag c
3339                         temporary_buffer += c
3340                         tok_state = tok_state_rawtext_end_tag_name
3341                         return null
3342                 # Anything else
3343                 tok_state = tok_state_rawtext
3344                 cur -= 1 # reconsume the input character
3345                 return new_character_token "</" # fixfull separate these
3346
3347         # 8.2.4.16 http://www.w3.org/TR/html5/syntax.html#rawtext-end-tag-name-state
3348         tok_state_rawtext_end_tag_name = ->
3349                 c = txt.charAt(cur++)
3350                 if c is "\t" or c is "\n" or c is "\u000c" or c is ' '
3351                         if is_appropriate_end_tag tok_cur_tag
3352                                 tok_state = tok_state_before_attribute_name
3353                                 return
3354                         # else fall through to "Anything else"
3355                 if c is '/'
3356                         if is_appropriate_end_tag tok_cur_tag
3357                                 tok_state = tok_state_self_closing_start_tag
3358                                 return
3359                         # else fall through to "Anything else"
3360                 if c is '>'
3361                         if is_appropriate_end_tag tok_cur_tag
3362                                 tok_state = tok_state_data
3363                                 return tok_cur_tag
3364                         # else fall through to "Anything else"
3365                 if is_uc_alpha(c)
3366                         tok_cur_tag.name += c.toLowerCase()
3367                         temporary_buffer += c
3368                         return null
3369                 if is_lc_alpha(c)
3370                         tok_cur_tag.name += c
3371                         temporary_buffer += c
3372                         return null
3373                 # Anything else
3374                 tok_state = tok_state_rawtext
3375                 cur -= 1 # reconsume the input character
3376                 return new_character_token '</' + temporary_buffer # fixfull separate these
3377
3378         # 8.2.4.17 http://www.w3.org/TR/html5/syntax.html#script-data-less-than-sign-state
3379         tok_state_script_data_less_than_sign = ->
3380                 c = txt.charAt(cur++)
3381                 if c is '/'
3382                         temporary_buffer = ''
3383                         tok_state = tok_state_script_data_end_tag_open
3384                         return
3385                 if c is '!'
3386                         tok_state = tok_state_script_data_escape_start
3387                         return new_character_token '<!' # fixfull split
3388                 # Anything else
3389                 tok_state = tok_state_script_data
3390                 cur -= 1 # Reconsume
3391                 return new_character_token '<'
3392
3393         # 8.2.4.18 http://www.w3.org/TR/html5/syntax.html#script-data-end-tag-open-state
3394         tok_state_script_data_end_tag_open = ->
3395                 c = txt.charAt(cur++)
3396                 if is_uc_alpha(c)
3397                         tok_cur_tag = new_end_tag c.toLowerCase()
3398                         temporary_buffer += c
3399                         tok_state = tok_state_script_data_end_tag_name
3400                         return
3401                 if is_lc_alpha(c)
3402                         tok_cur_tag = new_end_tag c
3403                         temporary_buffer += c
3404                         tok_state = tok_state_script_data_end_tag_name
3405                         return
3406                 # Anything else
3407                 tok_state = tok_state_script_data
3408                 cur -= 1 # Reconsume
3409                 return new_character_token '</'
3410
3411         # 8.2.4.19 http://www.w3.org/TR/html5/syntax.html#script-data-end-tag-open-state
3412         tok_state_script_data_end_tag_name = ->
3413                 c = txt.charAt(cur++)
3414                 if c is "\t" or c is "\n" or c is "\u000c" or c is ' '
3415                         if is_appropriate_end_tag tok_cur_tag
3416                                 tok_state = tok_state_before_attribute_name
3417                                 return
3418                         # fall through
3419                 if c is '/'
3420                         if is_appropriate_end_tag tok_cur_tag
3421                                 tok_state = tok_state_self_closing_start_tag
3422                                 return
3423                         # fall through
3424                 if c is '>'
3425                         if is_appropriate_end_tag tok_cur_tag
3426                                 tok_state = tok_state_data
3427                                 return tok_cur_tag
3428                         # fall through
3429                 if is_uc_alpha(c)
3430                         tok_cur_tag.name += c.toLowerCase()
3431                         temporary_buffer += c
3432                         return
3433                 if is_lc_alpha(c)
3434                         tok_cur_tag.name += c
3435                         temporary_buffer += c
3436                         return
3437                 # Anything else
3438                 tok_state = tok_state_script_data
3439                 cur -= 1 # Reconsume
3440                 return new_character_token "</#{temporary_buffer}" # fixfull split
3441
3442         # 8.2.4.20 http://www.w3.org/TR/html5/syntax.html#script-data-escape-start-state
3443         tok_state_script_data_escape_start = ->
3444                 c = txt.charAt(cur++)
3445                 if c is '-'
3446                         tok_state = tok_state_script_data_escape_start_dash
3447                         return new_character_token '-'
3448                 # Anything else
3449                 tok_state = tok_state_script_data
3450                 cur -= 1 # Reconsume
3451                 return
3452
3453         # 8.2.4.21 http://www.w3.org/TR/html5/syntax.html#script-data-escape-start-dash-state
3454         tok_state_script_data_escape_start_dash = ->
3455                 c = txt.charAt(cur++)
3456                 if c is '-'
3457                         tok_state = tok_state_script_data_escaped_dash_dash
3458                         return new_character_token '-'
3459                 # Anything else
3460                 tok_state = tok_state_script_data
3461                 cur -= 1 # Reconsume
3462                 return
3463
3464         # 8.2.4.22 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-state
3465         tok_state_script_data_escaped = ->
3466                 c = txt.charAt(cur++)
3467                 if c is '-'
3468                         tok_state = tok_state_script_data_escaped_dash
3469                         return new_character_token '-'
3470                 if c is '<'
3471                         tok_state = tok_state_script_data_escaped_less_than_sign
3472                         return
3473                 if c is "\u0000"
3474                         parse_error()
3475                         return new_character_token "\ufffd"
3476                 if c is '' # EOF
3477                         tok_state = tok_state_data
3478                         parse_error()
3479                         cur -= 1 # Reconsume
3480                         return
3481                 # Anything else
3482                 return new_character_token c
3483
3484         # 8.2.4.23 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-dash-state
3485         tok_state_script_data_escaped_dash = ->
3486                 c = txt.charAt(cur++)
3487                 if c is '-'
3488                         tok_state = tok_state_script_data_escaped_dash_dash
3489                         return new_character_token '-'
3490                 if c is '<'
3491                         tok_state = tok_state_script_data_escaped_less_than_sign
3492                         return
3493                 if c is "\u0000"
3494                         parse_error()
3495                         tok_state = tok_state_script_data_escaped
3496                         return new_character_token "\ufffd"
3497                 if c is '' # EOF
3498                         tok_state = tok_state_data
3499                         parse_error()
3500                         cur -= 1 # Reconsume
3501                         return
3502                 # Anything else
3503                 tok_state = tok_state_script_data_escaped
3504                 return new_character_token c
3505
3506         # 8.2.4.24 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-dash-dash-state
3507         tok_state_script_data_escaped_dash_dash = ->
3508                 c = txt.charAt(cur++)
3509                 if c is '-'
3510                         return new_character_token '-'
3511                 if c is '<'
3512                         tok_state = tok_state_script_data_escaped_less_than_sign
3513                         return
3514                 if c is '>'
3515                         tok_state = tok_state_script_data
3516                         return new_character_token '>'
3517                 if c is "\u0000"
3518                         parse_error()
3519                         tok_state = tok_state_script_data_escaped
3520                         return new_character_token "\ufffd"
3521                 if c is '' # EOF
3522                         parse_error()
3523                         tok_state = tok_state_data
3524                         cur -= 1 # Reconsume
3525                         return
3526                 # Anything else
3527                 tok_state = tok_state_script_data_escaped
3528                 return new_character_token c
3529
3530         # 8.2.4.25 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-less-than-sign-state
3531         tok_state_script_data_escaped_less_than_sign = ->
3532                 c = txt.charAt(cur++)
3533                 if c is '/'
3534                         temporary_buffer = ''
3535                         tok_state = tok_state_script_data_escaped_end_tag_open
3536                         return
3537                 if is_uc_alpha(c)
3538                         temporary_buffer = c.toLowerCase() # yes, really
3539                         tok_state = tok_state_script_data_double_escape_start
3540                         return new_character_token "<#{c}" # fixfull split
3541                 if is_lc_alpha(c)
3542                         temporary_buffer = c
3543                         tok_state = tok_state_script_data_double_escape_start
3544                         return new_character_token "<#{c}" # fixfull split
3545                 # Anything else
3546                 tok_state = tok_state_script_data_escaped
3547                 cur -= 1 # Reconsume
3548                 return new_character_token '<'
3549
3550         # 8.2.4.26 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-end-tag-open-state
3551         tok_state_script_data_escaped_end_tag_open = ->
3552                 c = txt.charAt(cur++)
3553                 if is_uc_alpha(c)
3554                         tok_cur_tag = new_end_tag c.toLowerCase()
3555                         temporary_buffer += c
3556                         tok_state = tok_state_script_data_escaped_end_tag_name
3557                         return
3558                 if is_lc_alpha(c)
3559                         tok_cur_tag = new_end_tag c
3560                         temporary_buffer += c
3561                         tok_state = tok_state_script_data_escaped_end_tag_name
3562                         return
3563                 # Anything else
3564                 tok_state = tok_state_script_data_escaped
3565                 cur -= 1 # Reconsume
3566                 return new_character_token '</' # fixfull split
3567
3568         # 8.2.4.27 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-end-tag-name-state
3569         tok_state_script_data_escaped_end_tag_name = ->
3570                 c = txt.charAt(cur++)
3571                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
3572                         if is_appropriate_end_tag tok_cur_tag
3573                                 tok_state = tok_state_before_attribute_name
3574                                 return
3575                         # fall through
3576                 if c is '/'
3577                         if is_appropriate_end_tag tok_cur_tag
3578                                 tok_state = tok_state_self_closing_start_tag
3579                                 return
3580                         # fall through
3581                 if c is '>'
3582                         if is_appropriate_end_tag tok_cur_tag
3583                                 tok_state = tok_state_data
3584                                 return tok_cur_tag
3585                         # fall through
3586                 if is_uc_alpha(c)
3587                         tok_cur_tag.name += c.toLowerCase()
3588                         temporary_buffer += c.toLowerCase()
3589                         return
3590                 if is_lc_alpha(c)
3591                         tok_cur_tag.name += c
3592                         temporary_buffer += c.toLowerCase()
3593                         return
3594                 # Anything else
3595                 tok_state = tok_state_script_data_escaped
3596                 cur -= 1 # Reconsume
3597                 return new_character_token "</#{temporary_buffer}" # fixfull split
3598
3599         # 8.2.4.28 http://www.w3.org/TR/html5/syntax.html#script-data-double-escape-start-state
3600         tok_state_script_data_double_escape_start = ->
3601                 c = txt.charAt(cur++)
3602                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' ' or c is '/' or c is '>'
3603                         if temporary_buffer is 'script'
3604                                 tok_state = tok_state_script_data_double_escaped
3605                         else
3606                                 tok_state = tok_state_script_data_escaped
3607                         return new_character_token c
3608                 if is_uc_alpha(c)
3609                         temporary_buffer += c.toLowerCase() # yes, really lowercase
3610                         return new_character_token c
3611                 if is_lc_alpha(c)
3612                         temporary_buffer += c
3613                         return new_character_token c
3614                 # Anything else
3615                 tok_state = tok_state_script_data_escaped
3616                 cur -= 1 # Reconsume
3617                 return
3618
3619         # 8.2.4.29 http://www.w3.org/TR/html5/syntax.html#script-data-double-escaped-state
3620         tok_state_script_data_double_escaped = ->
3621                 c = txt.charAt(cur++)
3622                 if c is '-'
3623                         tok_state = tok_state_script_data_double_escaped_dash
3624                         return new_character_token '-'
3625                 if c is '<'
3626                         tok_state = tok_state_script_data_double_escaped_less_than_sign
3627                         return new_character_token '<'
3628                 if c is "\u0000"
3629                         parse_error()
3630                         return new_character_token "\ufffd"
3631                 if c is '' # EOF
3632                         parse_error()
3633                         tok_state = tok_state_data
3634                         cur -= 1 # Reconsume
3635                         return
3636                 # Anything else
3637                 return new_character_token c
3638
3639         # 8.2.4.30 http://www.w3.org/TR/html5/syntax.html#script-data-double-escaped-dash-state
3640         tok_state_script_data_double_escaped_dash = ->
3641                 c = txt.charAt(cur++)
3642                 if c is '-'
3643                         tok_state = tok_state_script_data_double_escaped_dash_dash
3644                         return new_character_token '-'
3645                 if c is '<'
3646                         tok_state = tok_state_script_data_double_escaped_less_than_sign
3647                         return new_character_token '<'
3648                 if c is "\u0000"
3649                         parse_error()
3650                         tok_state = tok_state_script_data_double_escaped
3651                         return new_character_token "\ufffd"
3652                 if c is '' # EOF
3653                         parse_error()
3654                         tok_state = tok_state_data
3655                         cur -= 1 # Reconsume
3656                         return
3657                 # Anything else
3658                 tok_state = tok_state_script_data_double_escaped
3659                 return new_character_token c
3660
3661         # 8.2.4.31 http://www.w3.org/TR/html5/syntax.html#script-data-double-escaped-dash-dash-state
3662         tok_state_script_data_double_escaped_dash_dash = ->
3663                 c = txt.charAt(cur++)
3664                 if c is '-'
3665                         return new_character_token '-'
3666                 if c is '<'
3667                         tok_state = tok_state_script_data_double_escaped_less_than_sign
3668                         return new_character_token '<'
3669                 if c is '>'
3670                         tok_state = tok_state_script_data
3671                         return new_character_token '>'
3672                 if c is "\u0000"
3673                         parse_error()
3674                         tok_state = tok_state_script_data_double_escaped
3675                         return new_character_token "\ufffd"
3676                 if c is '' # EOF
3677                         parse_error()
3678                         tok_state = tok_state_data
3679                         cur -= 1 # Reconsume
3680                         return
3681                 # Anything else
3682                 tok_state = tok_state_script_data_double_escaped
3683                 return new_character_token c
3684
3685         # 8.2.4.32 http://www.w3.org/TR/html5/syntax.html#script-data-double-escaped-less-than-sign-state
3686         tok_state_script_data_double_escaped_less_than_sign = ->
3687                 c = txt.charAt(cur++)
3688                 if c is '/'
3689                         temporary_buffer = ''
3690                         tok_state = tok_state_script_data_double_escape_end
3691                         return new_character_token '/'
3692                 # Anything else
3693                 tok_state = tok_state_script_data_double_escaped
3694                 cur -= 1 # Reconsume
3695                 return
3696
3697         # 8.2.4.33 http://www.w3.org/TR/html5/syntax.html#script-data-double-escape-end-state
3698         tok_state_script_data_double_escape_end = ->
3699                 c = txt.charAt(cur++)
3700                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' ' or c is '/' or c is '>'
3701                         if temporary_buffer is 'script'
3702                                 tok_state = tok_state_script_data_escaped
3703                         else
3704                                 tok_state = tok_state_script_data_double_escaped
3705                         return new_character_token c
3706                 if is_uc_alpha(c)
3707                         temporary_buffer += c.toLowerCase() # yes, really lowercase
3708                         return new_character_token c
3709                 if is_lc_alpha(c)
3710                         temporary_buffer += c
3711                         return new_character_token c
3712                 # Anything else
3713                 tok_state = tok_state_script_data_double_escaped
3714                 cur -= 1 # Reconsume
3715                 return
3716
3717         # 8.2.4.34 http://www.w3.org/TR/html5/syntax.html#before-attribute-name-state
3718         tok_state_before_attribute_name = ->
3719                 attr_name = null
3720                 switch c = txt.charAt(cur++)
3721                         when "\t", "\n", "\u000c", ' '
3722                                 return null
3723                         when '/'
3724                                 tok_state = tok_state_self_closing_start_tag
3725                                 return null
3726                         when '>'
3727                                 tok_state = tok_state_data
3728                                 tmp = tok_cur_tag
3729                                 tok_cur_tag = null
3730                                 return tmp
3731                         when "\u0000"
3732                                 parse_error()
3733                                 attr_name = "\ufffd"
3734                         when '"', "'", '<', '='
3735                                 parse_error()
3736                                 attr_name = c
3737                         when '' # EOF
3738                                 parse_error()
3739                                 tok_state = tok_state_data
3740                         else
3741                                 if is_uc_alpha(c)
3742                                         attr_name = c.toLowerCase()
3743                                 else
3744                                         attr_name = c
3745                 if attr_name?
3746                         tok_cur_tag.attrs_a.unshift [attr_name, '']
3747                         tok_state = tok_state_attribute_name
3748                 return null
3749
3750         # 8.2.4.35 http://www.w3.org/TR/html5/syntax.html#attribute-name-state
3751         tok_state_attribute_name = ->
3752                 switch c = txt.charAt(cur++)
3753                         when "\t", "\n", "\u000c", ' '
3754                                 tok_state = tok_state_after_attribute_name
3755                         when '/'
3756                                 tok_state = tok_state_self_closing_start_tag
3757                         when '='
3758                                 tok_state = tok_state_before_attribute_value
3759                         when '>'
3760                                 tok_state = tok_state_data
3761                                 tmp = tok_cur_tag
3762                                 tok_cur_tag = null
3763                                 return tmp
3764                         when "\u0000"
3765                                 parse_error()
3766                                 tok_cur_tag.attrs_a[0][0] += "\ufffd"
3767                         when '"', "'", '<'
3768                                 parse_error()
3769                                 tok_cur_tag.attrs_a[0][0] += c
3770                         when '' # EOF
3771                                 parse_error()
3772                                 tok_state = tok_state_data
3773                         else
3774                                 if is_uc_alpha(c)
3775                                         tok_cur_tag.attrs_a[0][0] += c.toLowerCase()
3776                                 else
3777                                         tok_cur_tag.attrs_a[0][0] += c
3778                 return null
3779
3780         # 8.2.4.36 http://www.w3.org/TR/html5/syntax.html#after-attribute-name-state
3781         tok_state_after_attribute_name = ->
3782                 c = txt.charAt(cur++)
3783                 if c is "\t" or c is "\n" or c is "\u000c" or c is ' '
3784                         return
3785                 if c is '/'
3786                         tok_state = tok_state_self_closing_start_tag
3787                         return
3788                 if c is '='
3789                         tok_state = tok_state_before_attribute_value
3790                         return
3791                 if c is '>'
3792                         tok_state = tok_state_data
3793                         return tok_cur_tag
3794                 if is_uc_alpha(c)
3795                         tok_cur_tag.attrs_a.unshift [c.toLowerCase(), '']
3796                         tok_state = tok_state_attribute_name
3797                         return
3798                 if c is "\u0000"
3799                         parse_error()
3800                         tok_cur_tag.attrs_a.unshift ["\ufffd", '']
3801                         tok_state = tok_state_attribute_name
3802                         return
3803                 if c is '' # EOF
3804                         parse_error()
3805                         tok_state = tok_state_data
3806                         cur -= 1 # reconsume
3807                         return
3808                 if c is '"' or c is "'" or c is '<'
3809                         parse_error()
3810                         # fall through to Anything else
3811                 # Anything else
3812                 tok_cur_tag.attrs_a.unshift [c, '']
3813                 tok_state = tok_state_attribute_name
3814
3815         # 8.2.4.37 http://www.w3.org/TR/html5/syntax.html#before-attribute-value-state
3816         tok_state_before_attribute_value = ->
3817                 switch c = txt.charAt(cur++)
3818                         when "\t", "\n", "\u000c", ' '
3819                                 return null
3820                         when '"'
3821                                 tok_state = tok_state_attribute_value_double_quoted
3822                         when '&'
3823                                 tok_state = tok_state_attribute_value_unquoted
3824                                 cur -= 1
3825                         when "'"
3826                                 tok_state = tok_state_attribute_value_single_quoted
3827                         when "\u0000"
3828                                 # Parse error
3829                                 tok_cur_tag.attrs_a[0][1] += "\ufffd"
3830                                 tok_state = tok_state_attribute_value_unquoted
3831                         when '>'
3832                                 # Parse error
3833                                 tok_state = tok_state_data
3834                                 tmp = tok_cur_tag
3835                                 tok_cur_tag = null
3836                                 return tmp
3837                         when '' # EOF
3838                                 parse_error()
3839                                 tok_state = tok_state_data
3840                         else
3841                                 tok_cur_tag.attrs_a[0][1] += c
3842                                 tok_state = tok_state_attribute_value_unquoted
3843                 return null
3844
3845         # 8.2.4.38 http://www.w3.org/TR/html5/syntax.html#attribute-value-(double-quoted)-state
3846         tok_state_attribute_value_double_quoted = ->
3847                 switch c = txt.charAt(cur++)
3848                         when '"'
3849                                 tok_state = tok_state_after_attribute_value_quoted
3850                         when '&'
3851                                 tok_cur_tag.attrs_a[0][1] += parse_character_reference '"', true
3852                         when "\u0000"
3853                                 # Parse error
3854                                 tok_cur_tag.attrs_a[0][1] += "\ufffd"
3855                         when '' # EOF
3856                                 parse_error()
3857                                 tok_state = tok_state_data
3858                         else
3859                                 tok_cur_tag.attrs_a[0][1] += c
3860                 return null
3861
3862         # 8.2.4.39 http://www.w3.org/TR/html5/syntax.html#attribute-value-(single-quoted)-state
3863         tok_state_attribute_value_single_quoted = ->
3864                 switch c = txt.charAt(cur++)
3865                         when "'"
3866                                 tok_state = tok_state_after_attribute_value_quoted
3867                         when '&'
3868                                 tok_cur_tag.attrs_a[0][1] += parse_character_reference "'", true
3869                         when "\u0000"
3870                                 # Parse error
3871                                 tok_cur_tag.attrs_a[0][1] += "\ufffd"
3872                         when '' # EOF
3873                                 parse_error()
3874                                 tok_state = tok_state_data
3875                         else
3876                                 tok_cur_tag.attrs_a[0][1] += c
3877                 return null
3878
3879         # 8.2.4.40 http://www.w3.org/TR/html5/syntax.html#attribute-value-(unquoted)-state
3880         tok_state_attribute_value_unquoted = ->
3881                 switch c = txt.charAt(cur++)
3882                         when "\t", "\n", "\u000c", ' '
3883                                 tok_state = tok_state_before_attribute_name
3884                         when '&'
3885                                 tok_cur_tag.attrs_a[0][1] += parse_character_reference '>', true
3886                         when '>'
3887                                 tok_state = tok_state_data
3888                                 tmp = tok_cur_tag
3889                                 tok_cur_tag = null
3890                                 return tmp
3891                         when "\u0000"
3892                                 tok_cur_tag.attrs_a[0][1] += "\ufffd"
3893                         when '' # EOF
3894                                 parse_error()
3895                                 tok_state = tok_state_data
3896                         else
3897                                 # Parse Error if ', <, = or ` (backtick)
3898                                 tok_cur_tag.attrs_a[0][1] += c
3899                 return null
3900
3901         # 8.2.4.42 http://www.w3.org/TR/html5/syntax.html#after-attribute-value-(quoted)-state
3902         tok_state_after_attribute_value_quoted = ->
3903                 switch c = txt.charAt(cur++)
3904                         when "\t", "\n", "\u000c", ' '
3905                                 tok_state = tok_state_before_attribute_name
3906                         when '/'
3907                                 tok_state = tok_state_self_closing_start_tag
3908                         when '>'
3909                                 tok_state = tok_state_data
3910                                 tmp = tok_cur_tag
3911                                 tok_cur_tag = null
3912                                 return tmp
3913                         when '' # EOF
3914                                 parse_error()
3915                                 tok_state = tok_state_data
3916                         else
3917                                 # Parse Error
3918                                 tok_state = tok_state_before_attribute_name
3919                                 cur -= 1 # we didn't handle that char
3920                 return null
3921
3922         # 8.2.4.43 http://www.w3.org/TR/html5/syntax.html#self-closing-start-tag-state
3923         tok_state_self_closing_start_tag = ->
3924                 c = txt.charAt(cur++)
3925                 if c is '>'
3926                         tok_cur_tag.flag 'self-closing', true
3927                         tok_state = tok_state_data
3928                         return tok_cur_tag
3929                 if c is ''
3930                         parse_error()
3931                         tok_state = tok_state_data
3932                         cur -= 1 # Reconsume
3933                         return
3934                 # Anything else
3935                 parse_error()
3936                 tok_state = tok_state_before_attribute_name
3937                 cur -= 1 # Reconsume
3938                 return
3939
3940         # 8.2.4.44 http://www.w3.org/TR/html5/syntax.html#bogus-comment-state
3941         # WARNING: put a comment token in tok_cur_tag before setting this state
3942         tok_state_bogus_comment = ->
3943                 next_gt = txt.indexOf '>', cur
3944                 if next_gt is -1
3945                         val = txt.substr cur
3946                         cur = txt.length
3947                 else
3948                         val = txt.substr cur, (next_gt - cur)
3949                         cur = next_gt + 1
3950                 val = val.replace(new RegExp("\u0000", 'g'), "\ufffd")
3951                 tok_cur_tag.text += val
3952                 tok_state = tok_state_data
3953                 return tok_cur_tag
3954
3955         # 8.2.4.45 http://www.w3.org/TR/html5/syntax.html#markup-declaration-open-state
3956         tok_state_markup_declaration_open = ->
3957                 if txt.substr(cur, 2) is '--'
3958                         cur += 2
3959                         tok_cur_tag = new_comment_token ''
3960                         tok_state = tok_state_comment_start
3961                         return
3962                 if txt.substr(cur, 7).toLowerCase() is 'doctype'
3963                         cur += 7
3964                         tok_state = tok_state_doctype
3965                         return
3966                 acn = adjusted_current_node()
3967                 if acn and acn.namespace isnt NS_HTML and txt.substr(cur, 7) is '[CDATA['
3968                         cur += 7
3969                         tok_state = tok_state_cdata_section
3970                         return
3971                 # Otherwise
3972                 parse_error()
3973                 tok_cur_tag = new_comment_token ''
3974                 tok_state = tok_state_bogus_comment
3975                 return
3976
3977         # 8.2.4.46 http://www.w3.org/TR/html5/syntax.html#comment-start-state
3978         tok_state_comment_start = ->
3979                 switch c = txt.charAt(cur++)
3980                         when '-'
3981                                 tok_state = tok_state_comment_start_dash
3982                         when "\u0000"
3983                                 parse_error()
3984                                 tok_state = tok_state_comment
3985                                 return new_character_token "\ufffd"
3986                         when '>'
3987                                 parse_error()
3988                                 tok_state = tok_state_data
3989                                 return tok_cur_tag
3990                         when '' # EOF
3991                                 parse_error()
3992                                 tok_state = tok_state_data
3993                                 cur -= 1 # Reconsume
3994                                 return tok_cur_tag
3995                         else
3996                                 tok_cur_tag.text += c
3997                                 tok_state = tok_state_comment
3998                 return null
3999
4000         # 8.2.4.47 http://www.w3.org/TR/html5/syntax.html#comment-start-dash-state
4001         tok_state_comment_start_dash = ->
4002                 switch c = txt.charAt(cur++)
4003                         when '-'
4004                                 tok_state = tok_state_comment_end
4005                         when "\u0000"
4006                                 parse_error()
4007                                 tok_cur_tag.text += "-\ufffd"
4008                                 tok_state = tok_state_comment
4009                         when '>'
4010                                 parse_error()
4011                                 tok_state = tok_state_data
4012                                 return tok_cur_tag
4013                         when '' # EOF
4014                                 parse_error()
4015                                 tok_state = tok_state_data
4016                                 cur -= 1 # Reconsume
4017                                 return tok_cur_tag
4018                         else
4019                                 tok_cur_tag.text += "-#{c}"
4020                                 tok_state = tok_state_comment
4021                 return null
4022
4023         # 8.2.4.48 http://www.w3.org/TR/html5/syntax.html#comment-state
4024         tok_state_comment = ->
4025                 switch c = txt.charAt(cur++)
4026                         when '-'
4027                                 tok_state = tok_state_comment_end_dash
4028                         when "\u0000"
4029                                 parse_error()
4030                                 tok_cur_tag.text += "\ufffd"
4031                         when '' # EOF
4032                                 parse_error()
4033                                 tok_state = tok_state_data
4034                                 cur -= 1 # Reconsume
4035                                 return tok_cur_tag
4036                         else
4037                                 tok_cur_tag.text += c
4038                 return null
4039
4040         # 8.2.4.49 http://www.w3.org/TR/html5/syntax.html#comment-end-dash-state
4041         tok_state_comment_end_dash = ->
4042                 switch c = txt.charAt(cur++)
4043                         when '-'
4044                                 tok_state = tok_state_comment_end
4045                         when "\u0000"
4046                                 parse_error()
4047                                 tok_cur_tag.text += "-\ufffd"
4048                                 tok_state = tok_state_comment
4049                         when '' # EOF
4050                                 parse_error()
4051                                 tok_state = tok_state_data
4052                                 cur -= 1 # Reconsume
4053                                 return tok_cur_tag
4054                         else
4055                                 tok_cur_tag.text += "-#{c}"
4056                                 tok_state = tok_state_comment
4057                 return null
4058
4059         # 8.2.4.50 http://www.w3.org/TR/html5/syntax.html#comment-end-state
4060         tok_state_comment_end = ->
4061                 switch c = txt.charAt(cur++)
4062                         when '>'
4063                                 tok_state = tok_state_data
4064                                 return tok_cur_tag
4065                         when "\u0000"
4066                                 parse_error()
4067                                 tok_cur_tag.text += "--\ufffd"
4068                                 tok_state = tok_state_comment
4069                         when '!'
4070                                 parse_error()
4071                                 tok_state = tok_state_comment_end_bang
4072                         when '-'
4073                                 parse_error()
4074                                 tok_cur_tag.text += '-'
4075                         when '' # EOF
4076                                 parse_error()
4077                                 tok_state = tok_state_data
4078                                 cur -= 1 # Reconsume
4079                                 return tok_cur_tag
4080                         else
4081                                 parse_error()
4082                                 tok_cur_tag.text += "--#{c}"
4083                                 tok_state = tok_state_comment
4084                 return null
4085
4086         # 8.2.4.51 http://www.w3.org/TR/html5/syntax.html#comment-end-bang-state
4087         tok_state_comment_end_bang = ->
4088                 switch c = txt.charAt(cur++)
4089                         when '-'
4090                                 tok_cur_tag.text += "--!#{c}"
4091                                 tok_state = tok_state_comment_end_dash
4092                         when '>'
4093                                 tok_state = tok_state_data
4094                                 return tok_cur_tag
4095                         when "\u0000"
4096                                 parse_error()
4097                                 tok_cur_tag.text += "--!\ufffd"
4098                                 tok_state = tok_state_comment
4099                         when '' # EOF
4100                                 parse_error()
4101                                 tok_state = tok_state_data
4102                                 cur -= 1 # Reconsume
4103                                 return tok_cur_tag
4104                         else
4105                                 tok_cur_tag.text += "--!#{c}"
4106                                 tok_state = tok_state_comment
4107                 return null
4108
4109         # 8.2.4.52 http://www.w3.org/TR/html5/syntax.html#doctype-state
4110         tok_state_doctype = ->
4111                 switch c = txt.charAt(cur++)
4112                         when "\t", "\u000a", "\u000c", ' '
4113                                 tok_state = tok_state_before_doctype_name
4114                         when '' # EOF
4115                                 parse_error()
4116                                 tok_state = tok_state_data
4117                                 el = new_doctype_token ''
4118                                 el.flag 'force-quirks', true
4119                                 cur -= 1 # Reconsume
4120                                 return el
4121                         else
4122                                 parse_error()
4123                                 tok_state = tok_state_before_doctype_name
4124                                 cur -= 1 # Reconsume
4125                 return null
4126
4127         # 8.2.4.52 http://www.w3.org/TR/html5/syntax.html#doctype-state
4128         tok_state_before_doctype_name = ->
4129                 c = txt.charAt(cur++)
4130                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4131                         return
4132                 if is_uc_alpha(c)
4133                         tok_cur_tag = new_doctype_token c.toLowerCase()
4134                         tok_state = tok_state_doctype_name
4135                         return
4136                 if c is "\u0000"
4137                         parse_error()
4138                         tok_cur_tag = new_doctype_token "\ufffd"
4139                         tok_state = tok_state_doctype_name
4140                         return
4141                 if c is '>'
4142                         parse_error()
4143                         el = new_doctype_token ''
4144                         el.flag 'force-quirks', true
4145                         tok_state = tok_state_data
4146                         return el
4147                 if c is '' # EOF
4148                         parse_error()
4149                         tok_state = tok_state_data
4150                         el = new_doctype_token ''
4151                         el.flag 'force-quirks', true
4152                         cur -= 1 # Reconsume
4153                         return el
4154                 # Anything else
4155                 tok_cur_tag = new_doctype_token c
4156                 tok_state = tok_state_doctype_name
4157                 return null
4158
4159         # 8.2.4.54 http://www.w3.org/TR/html5/syntax.html#doctype-name-state
4160         tok_state_doctype_name = ->
4161                 c = txt.charAt(cur++)
4162                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4163                         tok_state = tok_state_after_doctype_name
4164                         return
4165                 if c is '>'
4166                         tok_state = tok_state_data
4167                         return tok_cur_tag
4168                 if is_uc_alpha(c)
4169                         tok_cur_tag.name += c.toLowerCase()
4170                         return
4171                 if c is "\u0000"
4172                         parse_error()
4173                         tok_cur_tag.name += "\ufffd"
4174                         return
4175                 if c is '' # EOF
4176                         parse_error()
4177                         tok_state = tok_state_data
4178                         tok_cur_tag.flag 'force-quirks', true
4179                         cur -= 1 # Reconsume
4180                         return tok_cur_tag
4181                 # Anything else
4182                 tok_cur_tag.name += c
4183                 return null
4184
4185         # 8.2.4.55 http://www.w3.org/TR/html5/syntax.html#after-doctype-name-state
4186         tok_state_after_doctype_name = ->
4187                 c = txt.charAt(cur++)
4188                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4189                         return
4190                 if c is '>'
4191                         tok_state = tok_state_data
4192                         return tok_cur_tag
4193                 if c is '' # EOF
4194                         parse_error()
4195                         tok_state = tok_state_data
4196                         tok_cur_tag.flag 'force-quirks', true
4197                         cur -= 1 # Reconsume
4198                         return tok_cur_tag
4199                 # Anything else
4200                 if txt.substr(cur - 1, 6).toLowerCase() is 'public'
4201                         cur += 5
4202                         tok_state = tok_state_after_doctype_public_keyword
4203                         return
4204                 if txt.substr(cur - 1, 6).toLowerCase() is 'system'
4205                         cur += 5
4206                         tok_state = tok_state_after_doctype_system_keyword
4207                         return
4208                 parse_error()
4209                 tok_cur_tag.flag 'force-quirks', true
4210                 tok_state = tok_state_bogus_doctype
4211                 return null
4212
4213         # 8.2.4.56 http://www.w3.org/TR/html5/syntax.html#after-doctype-public-keyword-state
4214         tok_state_after_doctype_public_keyword = ->
4215                 c = txt.charAt(cur++)
4216                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4217                         tok_state = tok_state_before_doctype_public_identifier
4218                         return
4219                 if c is '"'
4220                         parse_error()
4221                         tok_cur_tag.public_identifier = ''
4222                         tok_state = tok_state_doctype_public_identifier_double_quoted
4223                         return
4224                 if c is "'"
4225                         parse_error()
4226                         tok_cur_tag.public_identifier = ''
4227                         tok_state = tok_state_doctype_public_identifier_single_quoted
4228                         return
4229                 if c is '>'
4230                         parse_error()
4231                         tok_cur_tag.flag 'force-quirks', true
4232                         tok_state = tok_state_data
4233                         return tok_cur_tag
4234                 if c is '' # EOF
4235                         parse_error()
4236                         tok_state = tok_state_data
4237                         tok_cur_tag.flag 'force-quirks', true
4238                         cur -= 1 # Reconsume
4239                         return tok_cur_tag
4240                 # Anything else
4241                 parse_error()
4242                 tok_cur_tag.flag 'force-quirks', true
4243                 tok_state = tok_state_bogus_doctype
4244                 return null
4245
4246         # 8.2.4.57 http://www.w3.org/TR/html5/syntax.html#before-doctype-public-identifier-state
4247         tok_state_before_doctype_public_identifier = ->
4248                 c = txt.charAt(cur++)
4249                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4250                         return
4251                 if c is '"'
4252                         parse_error()
4253                         tok_cur_tag.public_identifier = ''
4254                         tok_state = tok_state_doctype_public_identifier_double_quoted
4255                         return
4256                 if c is "'"
4257                         parse_error()
4258                         tok_cur_tag.public_identifier = ''
4259                         tok_state = tok_state_doctype_public_identifier_single_quoted
4260                         return
4261                 if c is '>'
4262                         parse_error()
4263                         tok_cur_tag.flag 'force-quirks', true
4264                         tok_state = tok_state_data
4265                         return tok_cur_tag
4266                 if c is '' # EOF
4267                         parse_error()
4268                         tok_state = tok_state_data
4269                         tok_cur_tag.flag 'force-quirks', true
4270                         cur -= 1 # Reconsume
4271                         return tok_cur_tag
4272                 # Anything else
4273                 parse_error()
4274                 tok_cur_tag.flag 'force-quirks', true
4275                 tok_state = tok_state_bogus_doctype
4276                 return null
4277
4278
4279         # 8.2.4.58 http://www.w3.org/TR/html5/syntax.html#doctype-public-identifier-(double-quoted)-state
4280         tok_state_doctype_public_identifier_double_quoted = ->
4281                 c = txt.charAt(cur++)
4282                 if c is '"'
4283                         tok_state = tok_state_after_doctype_public_identifier
4284                         return
4285                 if c is "\u0000"
4286                         parse_error()
4287                         tok_cur_tag.public_identifier += "\ufffd"
4288                         return
4289                 if c is '>'
4290                         parse_error()
4291                         tok_cur_tag.flag 'force-quirks', true
4292                         tok_state = tok_state_data
4293                         return tok_cur_tag
4294                 if c is '' # EOF
4295                         parse_error()
4296                         tok_state = tok_state_data
4297                         tok_cur_tag.flag 'force-quirks', true
4298                         cur -= 1 # Reconsume
4299                         return tok_cur_tag
4300                 # Anything else
4301                 tok_cur_tag.public_identifier += c
4302                 return null
4303
4304         # 8.2.4.59 http://www.w3.org/TR/html5/syntax.html#doctype-public-identifier-(single-quoted)-state
4305         tok_state_doctype_public_identifier_single_quoted = ->
4306                 c = txt.charAt(cur++)
4307                 if c is "'"
4308                         tok_state = tok_state_after_doctype_public_identifier
4309                         return
4310                 if c is "\u0000"
4311                         parse_error()
4312                         tok_cur_tag.public_identifier += "\ufffd"
4313                         return
4314                 if c is '>'
4315                         parse_error()
4316                         tok_cur_tag.flag 'force-quirks', true
4317                         tok_state = tok_state_data
4318                         return tok_cur_tag
4319                 if c is '' # EOF
4320                         parse_error()
4321                         tok_state = tok_state_data
4322                         tok_cur_tag.flag 'force-quirks', true
4323                         cur -= 1 # Reconsume
4324                         return tok_cur_tag
4325                 # Anything else
4326                 tok_cur_tag.public_identifier += c
4327                 return null
4328
4329         # 8.2.4.60 http://www.w3.org/TR/html5/syntax.html#after-doctype-public-identifier-state
4330         tok_state_after_doctype_public_identifier = ->
4331                 c = txt.charAt(cur++)
4332                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4333                         tok_state = tok_state_between_doctype_public_and_system_identifiers
4334                         return
4335                 if c is '>'
4336                         tok_state = tok_state_data
4337                         return tok_cur_tag
4338                 if c is '"'
4339                         parse_error()
4340                         tok_cur_tag.system_identifier = ''
4341                         tok_state = tok_state_doctype_system_identifier_double_quoted
4342                         return
4343                 if c is "'"
4344                         parse_error()
4345                         tok_cur_tag.system_identifier = ''
4346                         tok_state = tok_state_doctype_system_identifier_single_quoted
4347                         return
4348                 if c is '' # EOF
4349                         parse_error()
4350                         tok_state = tok_state_data
4351                         tok_cur_tag.flag 'force-quirks', true
4352                         cur -= 1 # Reconsume
4353                         return tok_cur_tag
4354                 # Anything else
4355                 parse_error()
4356                 tok_cur_tag.flag 'force-quirks', true
4357                 tok_state = tok_state_bogus_doctype
4358                 return null
4359
4360         # 8.2.4.61 http://www.w3.org/TR/html5/syntax.html#between-doctype-public-and-system-identifiers-state
4361         tok_state_between_doctype_public_and_system_identifiers = ->
4362                 c = txt.charAt(cur++)
4363                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4364                         return
4365                 if c is '>'
4366                         tok_state = tok_state_data
4367                         return tok_cur_tag
4368                 if c is '"'
4369                         parse_error()
4370                         tok_cur_tag.system_identifier = ''
4371                         tok_state = tok_state_doctype_system_identifier_double_quoted
4372                         return
4373                 if c is "'"
4374                         parse_error()
4375                         tok_cur_tag.system_identifier = ''
4376                         tok_state = tok_state_doctype_system_identifier_single_quoted
4377                         return
4378                 if c is '' # EOF
4379                         parse_error()
4380                         tok_state = tok_state_data
4381                         tok_cur_tag.flag 'force-quirks', true
4382                         cur -= 1 # Reconsume
4383                         return tok_cur_tag
4384                 # Anything else
4385                 parse_error()
4386                 tok_cur_tag.flag 'force-quirks', true
4387                 tok_state = tok_state_bogus_doctype
4388                 return null
4389
4390         # 8.2.4.62 http://www.w3.org/TR/html5/syntax.html#after-doctype-system-keyword-state
4391         tok_state_after_doctype_system_keyword = ->
4392                 c = txt.charAt(cur++)
4393                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4394                         tok_state = tok_state_before_doctype_system_identifier
4395                         return
4396                 if c is '"'
4397                         parse_error()
4398                         tok_cur_tag.system_identifier = ''
4399                         tok_state = tok_state_doctype_system_identifier_double_quoted
4400                         return
4401                 if c is "'"
4402                         parse_error()
4403                         tok_cur_tag.system_identifier = ''
4404                         tok_state = tok_state_doctype_system_identifier_single_quoted
4405                         return
4406                 if c is '>'
4407                         parse_error()
4408                         tok_cur_tag.flag 'force-quirks', true
4409                         tok_state = tok_state_data
4410                         return tok_cur_tag
4411                 if c is '' # EOF
4412                         parse_error()
4413                         tok_state = tok_state_data
4414                         tok_cur_tag.flag 'force-quirks', true
4415                         cur -= 1 # Reconsume
4416                         return tok_cur_tag
4417                 # Anything else
4418                 parse_error()
4419                 tok_cur_tag.flag 'force-quirks', true
4420                 tok_state = tok_state_bogus_doctype
4421                 return null
4422
4423         # 8.2.4.63 http://www.w3.org/TR/html5/syntax.html#before-doctype-system-identifier-state
4424         tok_state_before_doctype_system_identifier = ->
4425                 c = txt.charAt(cur++)
4426                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4427                         return
4428                 if c is '"'
4429                         tok_cur_tag.system_identifier = ''
4430                         tok_state = tok_state_doctype_system_identifier_double_quoted
4431                         return
4432                 if c is "'"
4433                         tok_cur_tag.system_identifier = ''
4434                         tok_state = tok_state_doctype_system_identifier_single_quoted
4435                         return
4436                 if c is '>'
4437                         parse_error()
4438                         tok_cur_tag.flag 'force-quirks', true
4439                         tok_state = tok_state_data
4440                         return tok_cur_tag
4441                 if c is '' # EOF
4442                         parse_error()
4443                         tok_state = tok_state_data
4444                         tok_cur_tag.flag 'force-quirks', true
4445                         cur -= 1 # Reconsume
4446                         return tok_cur_tag
4447                 # Anything else
4448                 parse_error()
4449                 tok_cur_tag.flag 'force-quirks', true
4450                 tok_state = tok_state_bogus_doctype
4451                 return null
4452
4453         # 8.2.4.64 http://www.w3.org/TR/html5/syntax.html#doctype-system-identifier-(double-quoted)-state
4454         tok_state_doctype_system_identifier_double_quoted = ->
4455                 c = txt.charAt(cur++)
4456                 if c is '"'
4457                         tok_state = tok_state_after_doctype_system_identifier
4458                         return
4459                 if c is "\u0000"
4460                         parse_error()
4461                         tok_cur_tag.system_identifier += "\ufffd"
4462                         return
4463                 if c is '>'
4464                         parse_error()
4465                         tok_cur_tag.flag 'force-quirks', true
4466                         tok_state = tok_state_data
4467                         return tok_cur_tag
4468                 if c is '' # EOF
4469                         parse_error()
4470                         tok_state = tok_state_data
4471                         tok_cur_tag.flag 'force-quirks', true
4472                         cur -= 1 # Reconsume
4473                         return tok_cur_tag
4474                 # Anything else
4475                 tok_cur_tag.system_identifier += c
4476                 return null
4477
4478         # 8.2.4.65 http://www.w3.org/TR/html5/syntax.html#doctype-system-identifier-(single-quoted)-state
4479         tok_state_doctype_system_identifier_single_quoted = ->
4480                 c = txt.charAt(cur++)
4481                 if c is "'"
4482                         tok_state = tok_state_after_doctype_system_identifier
4483                         return
4484                 if c is "\u0000"
4485                         parse_error()
4486                         tok_cur_tag.system_identifier += "\ufffd"
4487                         return
4488                 if c is '>'
4489                         parse_error()
4490                         tok_cur_tag.flag 'force-quirks', true
4491                         tok_state = tok_state_data
4492                         return tok_cur_tag
4493                 if c is '' # EOF
4494                         parse_error()
4495                         tok_state = tok_state_data
4496                         tok_cur_tag.flag 'force-quirks', true
4497                         cur -= 1 # Reconsume
4498                         return tok_cur_tag
4499                 # Anything else
4500                 tok_cur_tag.system_identifier += c
4501                 return null
4502
4503         # 8.2.4.66 http://www.w3.org/TR/html5/syntax.html#after-doctype-system-identifier-state
4504         tok_state_after_doctype_system_identifier = ->
4505                 c = txt.charAt(cur++)
4506                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4507                         return
4508                 if c is '>'
4509                         tok_state = tok_state_data
4510                         return tok_cur_tag
4511                 if c is '' # EOF
4512                         parse_error()
4513                         tok_state = tok_state_data
4514                         tok_cur_tag.flag 'force-quirks', true
4515                         cur -= 1 # Reconsume
4516                         return tok_cur_tag
4517                 # Anything else
4518                 parse_error()
4519                 # do _not_ tok_cur_tag.flag 'force-quirks', true
4520                 tok_state = tok_state_bogus_doctype
4521                 return null
4522
4523         # 8.2.4.67 http://www.w3.org/TR/html5/syntax.html#bogus-doctype-state
4524         tok_state_bogus_doctype = ->
4525                 c = txt.charAt(cur++)
4526                 if c is '>'
4527                         tok_state = tok_state_data
4528                         return tok_cur_tag
4529                 if c is '' # EOF
4530                         tok_state = tok_state_data
4531                         cur -= 1 # Reconsume
4532                         return tok_cur_tag
4533                 # Anything else
4534                 return null
4535
4536         # 8.2.4.68 http://www.w3.org/TR/html5/syntax.html#cdata-section-state
4537         tok_state_cdata_section = ->
4538                 tok_state = tok_state_data
4539                 next_gt = txt.indexOf ']]>', cur
4540                 if next_gt is -1
4541                         val = txt.substr cur
4542                         cur = txt.length
4543                 else
4544                         val = txt.substr cur, (next_gt - cur)
4545                         cur = next_gt + 3
4546                 val = val.replace(new RegExp("\u0000", 'g'), "\ufffd")
4547                 if val.length > 0
4548                         return new_character_token val # fixfull split
4549                 return null
4550
4551         # 8.2.4.69 http://www.w3.org/TR/html5/syntax.html#consume-a-character-reference
4552         # Don't set this as a state, just call it
4553         # returns a string (NOT a text node)
4554         parse_character_reference = (allowed_char = null, in_attr = false) ->
4555                 if cur >= txt.length
4556                         return '&'
4557                 switch c = txt.charAt(cur)
4558                         when "\t", "\n", "\u000c", ' ', '<', '&', '', allowed_char
4559                                 # explicitly not a parse error
4560                                 return '&'
4561                         when ';'
4562                                 # there has to be "one or more" alnums between & and ; to be a parse error
4563                                 return '&'
4564                         when '#'
4565                                 if cur + 1 >= txt.length
4566                                         return '&'
4567                                 if txt.charAt(cur + 1).toLowerCase() is 'x'
4568                                         base = 16
4569                                         charset = hex_chars
4570                                         start = cur + 2
4571                                 else
4572                                         charset = digits
4573                                         start = cur + 1
4574                                         base = 10
4575                                 i = 0
4576                                 while start + i < txt.length and charset.indexOf(txt.charAt(start + i)) > -1
4577                                         i += 1
4578                                 if i is 0
4579                                         return '&'
4580                                 cur = start + i
4581                                 if txt.charAt(start + i) is ';'
4582                                         cur += 1
4583                                 else
4584                                         parse_error()
4585                                 code_point = txt.substr(start, i)
4586                                 while code_point.charAt(0) is '0' and code_point.length > 1
4587                                         code_point = code_point.substr 1
4588                                 code_point = parseInt(code_point, base)
4589                                 if unicode_fixes[code_point]?
4590                                         parse_error()
4591                                         return unicode_fixes[code_point]
4592                                 else
4593                                         if (code_point >= 0xd800 and code_point <= 0xdfff) or code_point > 0x10ffff
4594                                                 parse_error()
4595                                                 return "\ufffd"
4596                                         else
4597                                                 if (code_point >= 0x0001 and code_point <= 0x0008) or (code_point >= 0x000D and code_point <= 0x001F) or (code_point >= 0x007F and code_point <= 0x009F) or (code_point >= 0xFDD0 and code_point <= 0xFDEF) or code_point is 0x000B or code_point is 0xFFFE or code_point is 0xFFFF or code_point is 0x1FFFE or code_point is 0x1FFFF or code_point is 0x2FFFE or code_point is 0x2FFFF or code_point is 0x3FFFE or code_point is 0x3FFFF or code_point is 0x4FFFE or code_point is 0x4FFFF or code_point is 0x5FFFE or code_point is 0x5FFFF or code_point is 0x6FFFE or code_point is 0x6FFFF or code_point is 0x7FFFE or code_point is 0x7FFFF or code_point is 0x8FFFE or code_point is 0x8FFFF or code_point is 0x9FFFE or code_point is 0x9FFFF or code_point is 0xAFFFE or code_point is 0xAFFFF or code_point is 0xBFFFE or code_point is 0xBFFFF or code_point is 0xCFFFE or code_point is 0xCFFFF or code_point is 0xDFFFE or code_point is 0xDFFFF or code_point is 0xEFFFE or code_point is 0xEFFFF or code_point is 0xFFFFE or code_point is 0xFFFFF or code_point is 0x10FFFE or code_point is 0x10FFFF
4598                                                         parse_error()
4599                                                 return from_code_point code_point
4600                                 return
4601                         else
4602                                 for i in [0...31]
4603                                         if alnum.indexOf(txt.charAt(cur + i)) is -1
4604                                                 break
4605                                 if i is 0
4606                                         # exit early, because parse_error() below needs at least one alnum
4607                                         return '&'
4608                                 if txt.charAt(cur + i) is ';'
4609                                         i += 1 # include ';' terminator in value
4610                                         decoded = decode_named_char_ref txt.substr(cur, i)
4611                                         if decoded?
4612                                                 cur += i
4613                                                 return decoded
4614                                         parse_error()
4615                                         return '&'
4616                                 else
4617                                         # no ';' terminator (only legacy char refs)
4618                                         max = i
4619                                         for i in [2..max] # no prefix matches, so ok to check shortest first
4620                                                 c = legacy_char_refs[txt.substr(cur, i)]
4621                                                 if c?
4622                                                         if in_attr
4623                                                                 if txt.charAt(cur + i) is '='
4624                                                                         # "because some legacy user agents will
4625                                                                         # misinterpret the markup in those cases"
4626                                                                         parse_error()
4627                                                                         return '&'
4628                                                                 if alnum.indexOf(txt.charAt(cur + i)) > -1
4629                                                                         # this makes attributes forgiving about url args
4630                                                                         return '&'
4631                                                         # ok, and besides the weird exceptions for attributes...
4632                                                         # return the matching char
4633                                                         cur += i # consume entity chars
4634                                                         parse_error() # because no terminating ";"
4635                                                         return c
4636                                         parse_error()
4637                                         return '&'
4638                 return # never reached
4639
4640         eat_next_token_if_newline = ->
4641                 old_cur = cur
4642                 t = null
4643                 until t?
4644                         t = tok_state()
4645                 if t.type is TYPE_TEXT
4646                         # definition of a newline depends on whether it was a character ref or not
4647                         if cur - old_cur is 1
4648                                 # not a character reference
4649                                 if t.text is "\u000d" or t.text is "\u000a"
4650                                         return
4651                         else
4652                                 if t.text is "\u000a"
4653                                         return
4654                 # not a "newline"
4655                 cur = old_cur
4656                 return
4657
4658         # tree constructor initialization
4659         # see comments on TYPE_TAG/etc for the structure of this data
4660         txt = args.html
4661         cur = 0
4662         doc = new Node TYPE_TAG, name: 'html', namespace: NS_HTML
4663         doc.flag 'quirks mode', QUIRKS_NO # TODO bugreport spec for not specifying this
4664         open_els = []
4665         afe = [] # active formatting elements
4666         template_ins_modes = []
4667         ins_mode = ins_mode_initial
4668         original_ins_mode = ins_mode # TODO check spec
4669         flag_scripting = args.scripting ? true # TODO might need an extra flag to get <noscript> to parse correctly
4670         flag_frameset_ok = true
4671         flag_parsing = true
4672         flag_foster_parenting = false
4673         form_element_pointer = null
4674         temporary_buffer = null
4675         pending_table_character_tokens = []
4676         head_element_pointer = null
4677         flag_fragment_parsing = false # parser originally created as part of the html fragment parsing algorithm (fragment case)
4678         context_element = null # FIXME initialize from args.fragment http://www.w3.org/TR/html5/syntax.html#parsing-html-fragments
4679         prev_node_id = 0 # just for debugging
4680
4681         # tokenizer initialization
4682         tok_state = tok_state_data
4683
4684         # text pre-processing
4685         # FIXME http://www.w3.org/TR/html5/syntax.html#preprocessing-the-input-stream
4686         txt = txt.replace(new RegExp("\r\n", 'g'), "\n") # fixfull spec doesn't say this
4687         txt = txt.replace(new RegExp("\r", 'g'), "\n") # fixfull spec doesn't say this
4688
4689         if args.name is "webkit01.dat #12"
4690                 console.log "hi"
4691         # proccess input
4692         # http://www.w3.org/TR/html5/syntax.html#tree-construction
4693         parse_main_loop = ->
4694                 while flag_parsing
4695                         t = tok_state()
4696                         if t?
4697                                 process_token t
4698                                 # fixfull parse error if has self-closing flag, but it wasn't acknolwedged
4699         parse_main_loop()
4700         return doc.children
4701
4702 serialize_els = (els, shallow, show_ids) ->
4703         serialized = ''
4704         sep = ''
4705         for t in els
4706                 serialized += sep
4707                 sep = ','
4708                 serialized += t.serialize shallow, show_ids
4709         return serialized
4710
4711 module.exports.parse_html = parse_html
4712 module.exports.debug_log_reset = debug_log_reset
4713 module.exports.debug_log_each = debug_log_each
4714 module.exports.TYPE_TAG = TYPE_TAG
4715 module.exports.TYPE_TEXT = TYPE_TEXT
4716 module.exports.TYPE_COMMENT = TYPE_COMMENT
4717 module.exports.TYPE_DOCTYPE = TYPE_DOCTYPE
4718 module.exports.NS_HTML = NS_HTML
4719 module.exports.NS_MATHML = NS_MATHML
4720 module.exports.NS_SVG = NS_SVG
4721 module.exports.QUIRKS_NO = QUIRKS_NO
4722 module.exports.QUIRKS_LIMITED = QUIRKS_LIMITED
4723 module.exports.QUIRKS_YES = QUIRKS_YES