JasonWoof Got questions, comments, patches, etc.? Contact Jason Woofenden
implement fragment parsing algorithm
[peach-html5-editor.git] / parse-html.coffee
1 # HTML parser meant to run in a browser, in support of WYSIWYG editor
2 # Copyright 2015 Jason Woofenden
3 #
4 # This program is free software: you can redistribute it and/or modify it under
5 # the terms of the GNU Affero General Public License as published by the Free
6 # Software Foundation, either version 3 of the License, or (at your option) any
7 # later version.
8 #
9 # This program is distributed in the hope that it will be useful, but WITHOUT
10 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11 # FOR A PARTICULAR PURPOSE.  See the GNU Affero General Public License for more
12 # details.
13 #
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
16
17
18 # This file implements a parser for html snippets, meant to be used by a
19 # WYSIWYG editor.
20
21 # The implementation is a pretty direct implementation of the parsing algorithm
22 # described here:
23 # http://www.w3.org/TR/html5/syntax.html#preprocessing-the-input-stream
24 #
25 # Deviations from that spec:
26 #
27 #   Purposeful: search this file for "WHATWG"
28 #
29 #   Not finished yet: search this file for "fixfull", "TODO" and "FIXME"
30
31
32 # stacks/lists
33 #
34 # the spec uses a many different words do indicate which ends of lists/stacks
35 # they are talking about (and relative movement within the lists/stacks). This
36 # section splains. I'm implementing "lists" (afe and open_els) the same way
37 # (both as stacks)
38 #
39 # stacks grow downward (current element is index=0)
40 #
41 # example: open_els = [a, b, c, d, e, f, g]
42 #
43 # "grows downwards" means it's visualized like this: (index: el, names)
44 #
45 #   6: g "start of the list", "topmost", "first"
46 #   5: f
47 #   4: e "previous" (to d), "above", "before"
48 #   3: d   (previous/next are relative to this element)
49 #   2: c "next", "after", "lower", "below"
50 #   1: b
51 #   0: a "end of the list", "current node", "bottommost", "last"
52
53
54 # browser
55 # note: to get this to run outside a browser, you'll have to write a native
56 # implementation of decode_named_char_ref()
57 unless module?.exports?
58         window.wheic = {}
59         module = exports: window.wheic
60
61 from_code_point = (x) ->
62         if String.fromCodePoint?
63                 return String.fromCodePoint x
64         else
65                 if x <= 0xffff
66                         return String.fromCharCode x
67                 x -= 0x10000
68                 return String.fromCharCode((x >> 10) + 0xd800, (x % 0x400) + 0xdc00)
69
70 # Each node is an obect of the Node class. Here are the Node types:
71 TYPE_TAG = 0 # name, {attributes}, [children]
72 TYPE_TEXT = 1 # "text"
73 TYPE_COMMENT = 2
74 TYPE_DOCTYPE = 3
75 # the following types are emited by the tokenizer, but shouldn't end up in the tree:
76 TYPE_START_TAG = 4 # name, [attributes ([key,value]...) in reverse order], [children]
77 TYPE_END_TAG = 5 # name
78 TYPE_EOF = 6
79 TYPE_AFE_MARKER = 7 # http://www.w3.org/TR/html5/syntax.html#reconstruct-the-active-formatting-elements
80 TYPE_AAA_BOOKMARK = 8 # http://www.w3.org/TR/html5/syntax.html#adoption-agency-algorithm
81
82 # namespace constants
83 NS_HTML = 1
84 NS_MATHML = 2
85 NS_SVG = 3
86
87 # quirks mode constants
88 QUIRKS_NO = 1
89 QUIRKS_LIMITED = 2
90 QUIRKS_YES = 3
91
92 g_debug_log = []
93 debug_log_reset = ->
94         g_debug_log = []
95 debug_log = (str) ->
96         g_debug_log.push str
97 debug_log_each = (cb) ->
98         for str in g_debug_log
99                 cb str
100
101 prev_node_id = 0
102 class Node
103         constructor: (type, args = {}) ->
104                 @type = type # one of the TYPE_* constants above
105                 @name = args.name ? '' # tag name
106                 @text = args.text ? '' # contents for text/comment nodes
107                 @attrs = args.attrs ? {}
108                 @attrs_a = args.attr_k ? [] # attrs in progress, TYPE_START_TAG only
109                 @children = args.children ? []
110                 @namespace = args.namespace ? NS_HTML
111                 @parent = args.parent ? null
112                 @token = args.token ? null
113                 @flags = args.flags ? {}
114                 if args.id?
115                         @id = "#{args.id}+"
116                 else
117                         @id = "#{++prev_node_id}"
118         acknowledge_self_closing: ->
119                 if @token?
120                         @token.flag 'did_self_close', true
121                 else
122                         @flag 'did_self_close', true
123         flag: (key, value = null) ->
124                 if value?
125                         @flags[key] = value
126                 else
127                         return @flags[key]
128         serialize: (shallow = false, show_ids = false) -> # for unit tests
129                 ret = ''
130                 switch @type
131                         when TYPE_TAG
132                                 ret += 'tag:'
133                                 ret += JSON.stringify @name
134                                 ret += ','
135                                 if show_ids
136                                         ret += "##{@id},"
137                                 if shallow
138                                         break
139                                 attr_keys = []
140                                 for k of @attrs
141                                         attr_keys.push k
142                                 attr_keys.sort()
143                                 ret += '{'
144                                 sep = ''
145                                 for k in attr_keys
146                                         ret += sep
147                                         sep = ','
148                                         ret += "#{JSON.stringify k}:#{JSON.stringify @attrs[k]}"
149                                 ret += '},['
150                                 sep = ''
151                                 for c in @children
152                                         ret += sep
153                                         sep = ','
154                                         ret += c.serialize shallow, show_ids
155                                 ret += ']'
156                         when TYPE_TEXT
157                                 ret += 'text:'
158                                 ret += JSON.stringify @text
159                         when TYPE_COMMENT
160                                 ret += 'comment:'
161                                 ret += JSON.stringify @text
162                         when TYPE_DOCTYPE
163                                 ret += "doctype:#{@name},#{JSON.stringify(@public_identifier ? '')},#{JSON.stringify(@system_identifier ? '')}"
164                         when TYPE_AFE_MARKER
165                                 ret += 'marker'
166                         when TYPE_AAA_BOOKMARK
167                                 ret += 'aaa_bookmark'
168                         else
169                                 ret += 'unknown:'
170                                 console.log "unknown: #{JSON.stringify @}" # backtrace is just as well
171                 return ret
172
173 # helpers: (only take args that are normally known when parser creates nodes)
174 new_open_tag = (name) ->
175         return new Node TYPE_START_TAG, name: name
176 new_end_tag = (name) ->
177         return new Node TYPE_END_TAG, name: name
178 new_element = (name) ->
179         return new Node TYPE_TAG, name: name
180 new_text_node = (txt) ->
181         return new Node TYPE_TEXT, text: txt
182 new_character_token = new_text_node
183 new_comment_token = (txt) ->
184         return new Node TYPE_COMMENT, text: txt
185 new_doctype_token = (name) ->
186         return new Node TYPE_DOCTYPE, name: name
187 new_eof_token = ->
188         return new Node TYPE_EOF
189 new_afe_marker = ->
190         return new Node TYPE_AFE_MARKER
191 new_aaa_bookmark = ->
192         return new Node TYPE_AAA_BOOKMARK
193
194 lc_alpha = "abcdefghijklmnopqrstuvwxyz"
195 uc_alpha = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
196 digits = "0123456789"
197 alnum = lc_alpha + uc_alpha + digits
198 hex_chars = digits + "abcdefABCDEF"
199
200 is_uc_alpha = (str) ->
201         return str.length is 1 and uc_alpha.indexOf(str) > -1
202 is_lc_alpha = (str) ->
203         return str.length is 1 and lc_alpha.indexOf(str) > -1
204
205 # some SVG elements have dashes in them
206 tag_name_chars = alnum + "-"
207
208 # http://www.w3.org/TR/html5/infrastructure.html#space-character
209 space_chars = "\u0009\u000a\u000c\u000d\u0020"
210 is_space = (txt) ->
211         return txt.length is 1 and space_chars.indexOf(txt) > -1
212 is_space_tok = (t) ->
213         return t.type is TYPE_TEXT && t.text.length is 1 and space_chars.indexOf(t.text) > -1
214
215 is_input_hidden_tok = (t) ->
216         return false unless t.type is TYPE_START_TAG
217         for a in t.attrs_a
218                 if a[0] is 'type'
219                         if a[1].toLowerCase() is 'hidden'
220                                 return true
221                         return false
222         return false
223
224 # https://en.wikipedia.org/wiki/Whitespace_character#Unicode
225 whitespace_chars = "\u0009\u000a\u000b\u000c\u000d\u0020\u0085\u00a0\u1680\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200a\u2028\u2029\u202f\u205f\u3000"
226
227 unicode_fixes = {}
228 unicode_fixes[0x00] = "\uFFFD"
229 unicode_fixes[0x80] = "\u20AC"
230 unicode_fixes[0x82] = "\u201A"
231 unicode_fixes[0x83] = "\u0192"
232 unicode_fixes[0x84] = "\u201E"
233 unicode_fixes[0x85] = "\u2026"
234 unicode_fixes[0x86] = "\u2020"
235 unicode_fixes[0x87] = "\u2021"
236 unicode_fixes[0x88] = "\u02C6"
237 unicode_fixes[0x89] = "\u2030"
238 unicode_fixes[0x8A] = "\u0160"
239 unicode_fixes[0x8B] = "\u2039"
240 unicode_fixes[0x8C] = "\u0152"
241 unicode_fixes[0x8E] = "\u017D"
242 unicode_fixes[0x91] = "\u2018"
243 unicode_fixes[0x92] = "\u2019"
244 unicode_fixes[0x93] = "\u201C"
245 unicode_fixes[0x94] = "\u201D"
246 unicode_fixes[0x95] = "\u2022"
247 unicode_fixes[0x96] = "\u2013"
248 unicode_fixes[0x97] = "\u2014"
249 unicode_fixes[0x98] = "\u02DC"
250 unicode_fixes[0x99] = "\u2122"
251 unicode_fixes[0x9A] = "\u0161"
252 unicode_fixes[0x9B] = "\u203A"
253 unicode_fixes[0x9C] = "\u0153"
254 unicode_fixes[0x9E] = "\u017E"
255 unicode_fixes[0x9F] = "\u0178"
256
257 quirks_yes_pi_prefixes = [
258         "+//silmaril//dtd html pro v0r11 19970101//"
259         "-//as//dtd html 3.0 aswedit + extensions//"
260         "-//advasoft ltd//dtd html 3.0 aswedit + extensions//"
261         "-//ietf//dtd html 2.0 level 1//"
262         "-//ietf//dtd html 2.0 level 2//"
263         "-//ietf//dtd html 2.0 strict level 1//"
264         "-//ietf//dtd html 2.0 strict level 2//"
265         "-//ietf//dtd html 2.0 strict//"
266         "-//ietf//dtd html 2.0//"
267         "-//ietf//dtd html 2.1e//"
268         "-//ietf//dtd html 3.0//"
269         "-//ietf//dtd html 3.2 final//"
270         "-//ietf//dtd html 3.2//"
271         "-//ietf//dtd html 3//"
272         "-//ietf//dtd html level 0//"
273         "-//ietf//dtd html level 1//"
274         "-//ietf//dtd html level 2//"
275         "-//ietf//dtd html level 3//"
276         "-//ietf//dtd html strict level 0//"
277         "-//ietf//dtd html strict level 1//"
278         "-//ietf//dtd html strict level 2//"
279         "-//ietf//dtd html strict level 3//"
280         "-//ietf//dtd html strict//"
281         "-//ietf//dtd html//"
282         "-//metrius//dtd metrius presentational//"
283         "-//microsoft//dtd internet explorer 2.0 html strict//"
284         "-//microsoft//dtd internet explorer 2.0 html//"
285         "-//microsoft//dtd internet explorer 2.0 tables//"
286         "-//microsoft//dtd internet explorer 3.0 html strict//"
287         "-//microsoft//dtd internet explorer 3.0 html//"
288         "-//microsoft//dtd internet explorer 3.0 tables//"
289         "-//netscape comm. corp.//dtd html//"
290         "-//netscape comm. corp.//dtd strict html//"
291         "-//o'reilly and associates//dtd html 2.0//"
292         "-//o'reilly and associates//dtd html extended 1.0//"
293         "-//o'reilly and associates//dtd html extended relaxed 1.0//"
294         "-//sq//dtd html 2.0 hotmetal + extensions//"
295         "-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//"
296         "-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//"
297         "-//spyglass//dtd html 2.0 extended//"
298         "-//sun microsystems corp.//dtd hotjava html//"
299         "-//sun microsystems corp.//dtd hotjava strict html//"
300         "-//w3c//dtd html 3 1995-03-24//"
301         "-//w3c//dtd html 3.2 draft//"
302         "-//w3c//dtd html 3.2 final//"
303         "-//w3c//dtd html 3.2//"
304         "-//w3c//dtd html 3.2s draft//"
305         "-//w3c//dtd html 4.0 frameset//"
306         "-//w3c//dtd html 4.0 transitional//"
307         "-//w3c//dtd html experimental 19960712//"
308         "-//w3c//dtd html experimental 970421//"
309         "-//w3c//dtd w3 html//"
310         "-//w3o//dtd w3 html 3.0//"
311         "-//webtechs//dtd mozilla html 2.0//"
312         "-//webtechs//dtd mozilla html//"
313 ]
314
315 # These are the character references that don't need a terminating semicolon
316 # min length: 2, max: 6, none are a prefix of any other.
317 legacy_char_refs = {
318         Aacute: 'Á', aacute: 'á', Acirc: 'Â', acirc: 'â', acute: '´', AElig: 'Æ',
319         aelig: 'æ', Agrave: 'À', agrave: 'à', AMP: '&', amp: '&', Aring: 'Å',
320         aring: 'å', Atilde: 'Ã', atilde: 'ã', Auml: 'Ä', auml: 'ä', brvbar: '¦',
321         Ccedil: 'Ç', ccedil: 'ç', cedil: '¸', cent: '¢', COPY: '©', copy: '©',
322         curren: '¤', deg: '°', divide: '÷', Eacute: 'É', eacute: 'é', Ecirc: 'Ê',
323         ecirc: 'ê', Egrave: 'È', egrave: 'è', ETH: 'Ð', eth: 'ð', Euml: 'Ë',
324         euml: 'ë', frac12: '½', frac14: '¼', frac34: '¾', GT: '>', gt: '>',
325         Iacute: 'Í', iacute: 'í', Icirc: 'Î', icirc: 'î', iexcl: '¡', Igrave: 'Ì',
326         igrave: 'ì', iquest: '¿', Iuml: 'Ï', iuml: 'ï', laquo: '«', LT: '<',
327         lt: '<', macr: '¯', micro: 'µ', middot: '·', nbsp: "\u00a0", not: '¬',
328         Ntilde: 'Ñ', ntilde: 'ñ', Oacute: 'Ó', oacute: 'ó', Ocirc: 'Ô', ocirc: 'ô',
329         Ograve: 'Ò', ograve: 'ò', ordf: 'ª', ordm: 'º', Oslash: 'Ø', oslash: 'ø',
330         Otilde: 'Õ', otilde: 'õ', Ouml: 'Ö', ouml: 'ö', para: '¶', plusmn: '±',
331         pound: '£', QUOT: '"', quot: '"', raquo: '»', REG: '®', reg: '®', sect: '§',
332         shy: '­', sup1: '¹', sup2: '²', sup3: '³', szlig: 'ß', THORN: 'Þ', thorn: 'þ',
333         times: '×', Uacute: 'Ú', uacute: 'ú', Ucirc: 'Û', ucirc: 'û', Ugrave: 'Ù',
334         ugrave: 'ù', uml: '¨', Uuml: 'Ü', uuml: 'ü', Yacute: 'Ý', yacute: 'ý',
335         yen: '¥', yuml: 'ÿ'
336 }
337
338 void_elements = ['area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input', 'keygen', 'link', 'meta', 'param', 'source', 'track', 'wbr']
339 raw_text_elements = ['script', 'style']
340 escapable_raw_text_elements = ['textarea', 'title']
341 # http://www.w3.org/TR/SVG/ 1.1 (Second Edition)
342 svg_elements = [
343         'a', 'altGlyph', 'altGlyphDef', 'altGlyphItem', 'animate', 'animateColor',
344         'animateMotion', 'animateTransform', 'circle', 'clipPath', 'color-profile',
345         'cursor', 'defs', 'desc', 'ellipse', 'feBlend', 'feColorMatrix',
346         'feComponentTransfer', 'feComposite', 'feConvolveMatrix',
347         'feDiffuseLighting', 'feDisplacementMap', 'feDistantLight', 'feFlood',
348         'feFuncA', 'feFuncB', 'feFuncG', 'feFuncR', 'feGaussianBlur', 'feImage',
349         'feMerge', 'feMergeNode', 'feMorphology', 'feOffset', 'fePointLight',
350         'feSpecularLighting', 'feSpotLight', 'feTile', 'feTurbulence', 'filter',
351         'font', 'font-face', 'font-face-format', 'font-face-name', 'font-face-src',
352         'font-face-uri', 'foreignObject', 'g', 'glyph', 'glyphRef', 'hkern',
353         'image', 'line', 'linearGradient', 'marker', 'mask', 'metadata',
354         'missing-glyph', 'mpath', 'path', 'pattern', 'polygon', 'polyline',
355         'radialGradient', 'rect', 'script', 'set', 'stop', 'style', 'svg',
356         'switch', 'symbol', 'text', 'textPath', 'title', 'tref', 'tspan', 'use',
357         'view', 'vkern'
358 ]
359
360 # http://www.w3.org/TR/MathML/ Version 3.0 2nd Edition
361 mathml_elements = [
362         'abs', 'and', 'annotation', 'annotation-xml', 'apply', 'approx', 'arccos',
363         'arccosh', 'arccot', 'arccoth', 'arccsc', 'arccsch', 'arcsec', 'arcsech',
364         'arcsin', 'arcsinh', 'arctan', 'arctanh', 'arg', 'bind', 'bvar', 'card',
365         'cartesianproduct', 'cbytes', 'ceiling', 'cerror', 'ci', 'cn', 'codomain',
366         'complexes', 'compose', 'condition', 'conjugate', 'cos', 'cosh', 'cot',
367         'coth', 'cs', 'csc', 'csch', 'csymbol', 'curl', 'declare', 'degree',
368         'determinant', 'diff', 'divergence', 'divide', 'domain',
369         'domainofapplication', 'emptyset', 'eq', 'equivalent', 'eulergamma',
370         'exists', 'exp', 'exponentiale', 'factorial', 'factorof', 'false', 'floor',
371         'fn', 'forall', 'gcd', 'geq', 'grad', 'gt', 'ident', 'image', 'imaginary',
372         'imaginaryi', 'implies', 'in', 'infinity', 'int', 'integers', 'intersect',
373         'interval', 'inverse', 'lambda', 'laplacian', 'lcm', 'leq', 'limit',
374         'list', 'ln', 'log', 'logbase', 'lowlimit', 'lt', 'maction', 'maligngroup',
375         'malignmark', 'math', 'matrix', 'matrixrow', 'max', 'mean', 'median',
376         'menclose', 'merror', 'mfenced', 'mfrac', 'mglyph', 'mi', 'mi', 'min',
377         'minus', 'mlabeledtr', 'mlongdiv', 'mmultiscripts', 'mn', 'mo', 'mode',
378         'moment', 'momentabout', 'mover', 'mpadded', 'mphantom', 'mprescripts',
379         'mroot', 'mrow', 'ms', 'mscarries', 'mscarry', 'msgroup', 'msline',
380         'mspace', 'msqrt', 'msrow', 'mstack', 'mstyle', 'msub', 'msubsup', 'msup',
381         'mtable', 'mtd', 'mtext', 'mtr', 'munder', 'munderover', 'naturalnumbers',
382         'neq', 'none', 'not', 'notanumber', 'notin', 'notprsubset', 'notsubset',
383         'or', 'otherwise', 'outerproduct', 'partialdiff', 'pi', 'piece',
384         'piecewise', 'plus', 'power', 'primes', 'product', 'prsubset', 'quotient',
385         'rationals', 'real', 'reals', 'reln', 'rem', 'root', 'scalarproduct',
386         'sdev', 'sec', 'sech', 'selector', 'semantics', 'sep', 'set', 'setdiff',
387         'share', 'sin', 'sinh', 'span', 'subset', 'sum', 'tan', 'tanh', 'tendsto',
388         'times', 'transpose', 'true', 'union', 'uplimit', 'variance', 'vector',
389         'vectorproduct', 'xor'
390 ]
391 # foreign_elements = [svg_elements..., mathml_elements...]
392 #normal_elements = All other allowed HTML elements are normal elements.
393
394 special_elements = {
395         # HTML:
396         address:NS_HTML, applet:NS_HTML, area:NS_HTML, article:NS_HTML,
397         aside:NS_HTML, base:NS_HTML, basefont:NS_HTML, bgsound:NS_HTML,
398         blockquote:NS_HTML, body:NS_HTML, br:NS_HTML, button:NS_HTML,
399         caption:NS_HTML, center:NS_HTML, col:NS_HTML, colgroup:NS_HTML, dd:NS_HTML,
400         details:NS_HTML, dir:NS_HTML, div:NS_HTML, dl:NS_HTML, dt:NS_HTML,
401         embed:NS_HTML, fieldset:NS_HTML, figcaption:NS_HTML, figure:NS_HTML,
402         footer:NS_HTML, form:NS_HTML, frame:NS_HTML, frameset:NS_HTML, h1:NS_HTML,
403         h2:NS_HTML, h3:NS_HTML, h4:NS_HTML, h5:NS_HTML, h6:NS_HTML, head:NS_HTML,
404         header:NS_HTML, hgroup:NS_HTML, hr:NS_HTML, html:NS_HTML, iframe:NS_HTML,
405         img:NS_HTML, input:NS_HTML, isindex:NS_HTML, li:NS_HTML, link:NS_HTML,
406         listing:NS_HTML, main:NS_HTML, marquee:NS_HTML,
407
408         menu:NS_HTML,menuitem:NS_HTML, # WHATWG adds these
409
410         meta:NS_HTML, nav:NS_HTML, noembed:NS_HTML, noframes:NS_HTML,
411         noscript:NS_HTML, object:NS_HTML, ol:NS_HTML, p:NS_HTML, param:NS_HTML,
412         plaintext:NS_HTML, pre:NS_HTML, script:NS_HTML, section:NS_HTML,
413         select:NS_HTML, source:NS_HTML, style:NS_HTML, summary:NS_HTML,
414         table:NS_HTML, tbody:NS_HTML, td:NS_HTML, template:NS_HTML,
415         textarea:NS_HTML, tfoot:NS_HTML, th:NS_HTML, thead:NS_HTML, title:NS_HTML,
416         tr:NS_HTML, track:NS_HTML, ul:NS_HTML, wbr:NS_HTML, xmp:NS_HTML,
417
418         # MathML:
419         mi:NS_MATHML, mo:NS_MATHML, mn:NS_MATHML, ms:NS_MATHML, mtext:NS_MATHML,
420         'annotation-xml':NS_MATHML,
421
422         # SVG:
423         foreignObject:NS_SVG, desc:NS_SVG, title:NS_SVG
424 }
425
426 formatting_elements = {
427          a: true, b: true, big: true, code: true, em: true, font: true, i: true,
428          nobr: true, s: true, small: true, strike: true, strong: true, tt: true,
429          u: true
430 }
431
432 mathml_text_integration = {
433         mi: NS_MATHML, mo: NS_MATHML, mn: NS_MATHML, ms: NS_MATHML, mtext: NS_MATHML
434 }
435 is_mathml_text_integration_point = (el) ->
436         return mathml_text_integration[el.name] is el.namespace
437 is_html_integration = (el) -> # DON'T PASS A TOKEN
438         if el.namespace is NS_MATHML
439                 if el.name is 'annotation-xml'
440                         if el.attrs.encoding?
441                                 if el.attrs.encoding.toLowerCase() is 'text/html'
442                                         return true
443                                 if el.attrs.encoding.toLowerCase() is 'application/xhtml+xml'
444                                         return true
445                 return false
446         if el.namespace is NS_SVG
447                 if el.name is 'foreignObject' or el.name is 'desc' or el.name is 'title'
448                         return true
449         return false
450
451 h_tags = {
452         h1:NS_HTML, h2:NS_HTML, h3:NS_HTML, h4:NS_HTML, h5:NS_HTML, h6:NS_HTML
453 }
454
455 foster_parenting_targets = {
456         table: NS_HTML
457         tbody: NS_HTML
458         tfoot: NS_HTML
459         thead: NS_HTML
460         tr: NS_HTML
461 }
462
463 end_tag_implied = {
464         dd: NS_HTML
465         dt: NS_HTML
466         li: NS_HTML
467         option: NS_HTML
468         optgroup: NS_HTML
469         p: NS_HTML
470         rb: NS_HTML
471         rp: NS_HTML
472         rt: NS_HTML
473         rtc: NS_HTML
474 }
475
476 el_is_special = (e) ->
477         return special_elements[e.name] is e.namespace
478
479 adp_els = { address: NS_HTML, div: NS_HTML, p: NS_HTML }
480 el_is_special_not_adp = (el) ->
481         return special_elements[el.name] is el.namespace and adp_els[el.name] isnt el.namespace
482
483 svg_name_fixes = {
484         altglyph: 'altGlyph'
485         altglyphdef: 'altGlyphDef'
486         altglyphitem: 'altGlyphItem'
487         animatecolor: 'animateColor'
488         animatemotion: 'animateMotion'
489         animatetransform: 'animateTransform'
490         clippath: 'clipPath'
491         feblend: 'feBlend'
492         fecolormatrix: 'feColorMatrix'
493         fecomponenttransfer: 'feComponentTransfer'
494         fecomposite: 'feComposite'
495         feconvolvematrix: 'feConvolveMatrix'
496         fediffuselighting: 'feDiffuseLighting'
497         fedisplacementmap: 'feDisplacementMap'
498         fedistantlight: 'feDistantLight'
499         fedropshadow: 'feDropShadow'
500         feflood: 'feFlood'
501         fefunca: 'feFuncA'
502         fefuncb: 'feFuncB'
503         fefuncg: 'feFuncG'
504         fefuncr: 'feFuncR'
505         fegaussianblur: 'feGaussianBlur'
506         feimage: 'feImage'
507         femerge: 'feMerge'
508         femergenode: 'feMergeNode'
509         femorphology: 'feMorphology'
510         feoffset: 'feOffset'
511         fepointlight: 'fePointLight'
512         fespecularlighting: 'feSpecularLighting'
513         fespotlight: 'feSpotLight'
514         fetile: 'feTile'
515         feturbulence: 'feTurbulence'
516         foreignobject: 'foreignObject'
517         glyphref: 'glyphRef'
518         lineargradient: 'linearGradient'
519         radialgradient: 'radialGradient'
520         textpath: 'textPath'
521 }
522 svg_attribute_fixes = {
523         attributename: 'attributeName'
524         attributetype: 'attributeType'
525         basefrequency: 'baseFrequency'
526         baseprofile: 'baseProfile'
527         calcmode: 'calcMode'
528         clippathunits: 'clipPathUnits'
529         contentscripttype: 'contentScriptType'
530         contentstyletype: 'contentStyleType'
531         diffuseconstant: 'diffuseConstant'
532         edgemode: 'edgeMode'
533         externalresourcesrequired: 'externalResourcesRequired'
534         # WHATWG removes this: filterres: 'filterRes'
535         filterunits: 'filterUnits'
536         glyphref: 'glyphRef'
537         gradienttransform: 'gradientTransform'
538         gradientunits: 'gradientUnits'
539         kernelmatrix: 'kernelMatrix'
540         kernelunitlength: 'kernelUnitLength'
541         keypoints: 'keyPoints'
542         keysplines: 'keySplines'
543         keytimes: 'keyTimes'
544         lengthadjust: 'lengthAdjust'
545         limitingconeangle: 'limitingConeAngle'
546         markerheight: 'markerHeight'
547         markerunits: 'markerUnits'
548         markerwidth: 'markerWidth'
549         maskcontentunits: 'maskContentUnits'
550         maskunits: 'maskUnits'
551         numoctaves: 'numOctaves'
552         pathlength: 'pathLength'
553         patterncontentunits: 'patternContentUnits'
554         patterntransform: 'patternTransform'
555         patternunits: 'patternUnits'
556         pointsatx: 'pointsAtX'
557         pointsaty: 'pointsAtY'
558         pointsatz: 'pointsAtZ'
559         preservealpha: 'preserveAlpha'
560         preserveaspectratio: 'preserveAspectRatio'
561         primitiveunits: 'primitiveUnits'
562         refx: 'refX'
563         refy: 'refY'
564         repeatcount: 'repeatCount'
565         repeatdur: 'repeatDur'
566         requiredextensions: 'requiredExtensions'
567         requiredfeatures: 'requiredFeatures'
568         specularconstant: 'specularConstant'
569         specularexponent: 'specularExponent'
570         spreadmethod: 'spreadMethod'
571         startoffset: 'startOffset'
572         stddeviation: 'stdDeviation'
573         stitchtiles: 'stitchTiles'
574         surfacescale: 'surfaceScale'
575         systemlanguage: 'systemLanguage'
576         tablevalues: 'tableValues'
577         targetx: 'targetX'
578         targety: 'targetY'
579         textlength: 'textLength'
580         viewbox: 'viewBox'
581         viewtarget: 'viewTarget'
582         xchannelselector: 'xChannelSelector'
583         ychannelselector: 'yChannelSelector'
584         zoomandpan: 'zoomAndPan'
585 }
586 foreign_attr_fixes = {
587         'xlink:actuate': 'xlink actuate'
588         'xlink:arcrole': 'xlink arcrole'
589         'xlink:href': 'xlink href'
590         'xlink:role': 'xlink role'
591         'xlink:show': 'xlink show'
592         'xlink:title': 'xlink title'
593         'xlink:type': 'xlink type'
594         'xml:base': 'xml base'
595         'xml:lang': 'xml lang'
596         'xml:space': 'xml space'
597         'xmlns': 'xmlns'
598         'xmlns:xlink': 'xmlns xlink'
599 }
600 adjust_mathml_attributes = (t) ->
601         for a in t.attrs_a
602                 if a[0] is 'definitionurl'
603                         a[0] = 'definitionURL'
604         return
605 adjust_svg_attributes = (t) ->
606         for a in t.attrs_a
607                 if svg_attribute_fixes[a[0]]?
608                         a[0] = svg_attribute_fixes[a[0]]
609         return
610 adjust_foreign_attributes = (t) ->
611         # fixfull
612         for a in t.attrs_a
613                 if foreign_attr_fixes[a[0]]?
614                         a[0] = foreign_attr_fixes[a[0]]
615         return
616
617 # decode_named_char_ref()
618 #
619 # The list of named character references is _huge_ so ask the browser to decode
620 # for us instead of wasting bandwidth/space on including the table here.
621 #
622 # Pass without the "&" but with the ";" examples:
623 #    for "&amp" pass "amp;"
624 #    for "&#x2032" pass "x2032;"
625 g_dncr = {
626         cache: {}
627         textarea: document.createElement('textarea')
628 }
629 # TODO test this in IE8
630 decode_named_char_ref = (txt) ->
631         txt = "&#{txt}"
632         decoded = g_dncr.cache[txt]
633         return decoded if decoded?
634         g_dncr.textarea.innerHTML = txt
635         decoded = g_dncr.textarea.value
636         return null if decoded is txt
637         return g_dncr.cache[txt] = decoded
638
639 parse_html = (args) ->
640         txt = null
641         cur = null # index of next char in txt to be parsed
642         # declare doc and tokenizer variables so they're in scope below
643         doc = null
644         open_els = null # stack of open elements
645         afe = null # active formatting elements
646         template_ins_modes = null
647         ins_mode = null
648         original_ins_mode = null
649         tok_state = null
650         tok_cur_tag = null # partially parsed tag
651         flag_scripting = null
652         flag_frameset_ok = null
653         flag_parsing = null
654         flag_foster_parenting = null
655         form_element_pointer = null
656         temporary_buffer = null
657         pending_table_character_tokens = null
658         head_element_pointer = null
659         flag_fragment_parsing = null
660         context_element = null
661
662         stop_parsing = ->
663                 flag_parsing = false
664
665         parse_error = ->
666                 if args.error_cb?
667                         args.error_cb cur
668                 else
669                         console.log "Parse error at character #{cur} of #{txt.length}"
670
671         # http://www.w3.org/TR/html5/syntax.html#push-onto-the-list-of-active-formatting-elements
672         # "Noah's Ark clause" but with three
673         afe_push = (new_el) ->
674                 matches = 0
675                 for el, i in afe
676                         if el.type is TYPE_AFE_MARKER
677                                 break
678                         if el.name is new_el.name and el.namespace is new_el.namespace
679                                 attrs_match = true
680                                 for k, v of el.attrs
681                                         unless new_el.attrs[k] is v
682                                                 attrs_match = false
683                                                 break
684                                 if attrs_match
685                                         for k, v of new_el.attrs
686                                                 unless el.attrs[k] is v
687                                                         attrs_match = false
688                                                         break
689                                 if attrs_match
690                                         matches += 1
691                                         if matches is 3
692                                                 afe.splice i, 1
693                                                 break
694                 afe.unshift new_el
695         afe_push_marker = ->
696                 afe.unshift new_afe_marker()
697
698         # the functions below impliment the Tree Contstruction algorithm
699         # http://www.w3.org/TR/html5/syntax.html#tree-construction
700
701         # But first... the helpers
702         template_tag_is_open = ->
703                 for el in open_els
704                         if el.name is 'template' and el.namespace is NS_HTML
705                                 return true
706                 return false
707         is_in_scope_x = (tag_name, scope, namespace) ->
708                 for el in open_els
709                         if el.name is tag_name and (namespace is null or namespace is el.namespace)
710                                 return true
711                         if scope[el.name] is el.namespace
712                                 return false
713                 return false
714         is_in_scope_x_y = (tag_name, scope, scope2, namespace) ->
715                 for el in open_els
716                         if el.name is tag_name and (namespace is null or namespace is el.namespace)
717                                 return true
718                         if scope[el.name] is el.namespace
719                                 return false
720                         if scope2[el.name] is el.namespace
721                                 return false
722                 return false
723         standard_scopers = {
724                 applet: NS_HTML, caption: NS_HTML, html: NS_HTML, table: NS_HTML,
725                 td: NS_HTML, th: NS_HTML, marquee: NS_HTML, object: NS_HTML,
726                 template: NS_HTML,
727
728                 mi: NS_MATHML, mo: NS_MATHML, mn: NS_MATHML, ms: NS_MATHML,
729                 mtext: NS_MATHML, 'annotation-xml': NS_MATHML,
730
731                 foreignObject: NS_SVG, desc: NS_SVG, title: NS_SVG
732         }
733         button_scopers = button: NS_HTML
734         li_scopers = ol: NS_HTML, ul: NS_HTML
735         table_scopers = html: NS_HTML, table: NS_HTML, template: NS_HTML
736         is_in_scope = (tag_name, namespace = null) ->
737                 return is_in_scope_x tag_name, standard_scopers, namespace
738         is_in_button_scope = (tag_name, namespace = null) ->
739                 return is_in_scope_x_y tag_name, standard_scopers, button_scopers, namespace
740         is_in_table_scope = (tag_name, namespace = null) ->
741                 return is_in_scope_x tag_name, table_scopers, namespace
742         # aka is_in_list_item_scope
743         is_in_li_scope = (tag_name, namespace = null) ->
744                 return is_in_scope_x_y tag_name, standard_scopers, li_scopers, namespace
745         is_in_select_scope = (tag_name, namespace = null) ->
746                 for t in open_els
747                         if t.name is tag_name and (namespace is null or namespace is t.namespace)
748                                 return true
749                         if t.namespace isnt NS_HTML and t.name isnt 'optgroup' and t.name isnt 'option'
750                                 return false
751                 return false
752         # this checks for a particular element, not by name
753         # this requires a namespace match
754         el_is_in_scope = (needle) ->
755                 for el in open_els
756                         if el is needle
757                                 return true
758                         if standard_scopers[el.name] is el.namespace
759                                 return false
760                 return false
761
762         clear_to_table_stopers = {
763                 'table': true
764                 'template': true
765                 'html': true
766         }
767         clear_stack_to_table_context = ->
768                 loop
769                         if clear_to_table_stopers[open_els[0].name]?
770                                 break
771                         open_els.shift()
772                 return
773         clear_to_table_body_stopers = {
774                 tbody: NS_HTML
775                 tfoot: NS_HTML
776                 thead: NS_HTML
777                 template: NS_HTML
778                 html: NS_HTML
779         }
780         clear_stack_to_table_body_context = ->
781                 loop
782                         if clear_to_table_body_stopers[open_els[0].name] is open_els[0].namespace
783                                 break
784                         open_els.shift()
785                 return
786         clear_to_table_row_stopers = {
787                 'tr': true
788                 'template': true
789                 'html': true
790         }
791         clear_stack_to_table_row_context = ->
792                 loop
793                         if clear_to_table_row_stopers[open_els[0].name]?
794                                 break
795                         open_els.shift()
796                 return
797         clear_afe_to_marker = ->
798                 loop
799                         return unless afe.length > 0 # this happens in fragment case, ?spec error
800                         el = afe.shift()
801                         if el.type is TYPE_AFE_MARKER
802                                 return
803                 return
804
805         # 8.2.3.1 ...
806         # http://www.w3.org/TR/html5/syntax.html#reset-the-insertion-mode-appropriately
807         reset_ins_mode = ->
808                 # 1. Let last be false.
809                 last = false
810                 # 2. Let node be the last node in the stack of open elements.
811                 node_i = 0
812                 node = open_els[node_i]
813                 # 3. Loop: If node is the first node in the stack of open elements,
814                 # then set last to true, and, if the parser was originally created as
815                 # part of the HTML fragment parsing algorithm (fragment case) set node
816                 # to the context element.
817                 loop
818                         if node_i is open_els.length - 1
819                                 last = true
820                                 if flag_fragment_parsing
821                                         node = context_element
822                         # 4. If node is a select element, run these substeps:
823                         if node.name is 'select' and node.namespace is NS_HTML
824                                 # 1. If last is true, jump to the step below labeled done.
825                                 unless last
826                                         # 2. Let ancestor be node.
827                                         ancestor_i = node_i
828                                         ancestor = node
829                                         # 3. Loop: If ancestor is the first node in the stack of
830                                         # open elements, jump to the step below labeled done.
831                                         loop
832                                                 if ancestor_i is open_els.length - 1
833                                                         break
834                                                 # 4. Let ancestor be the node before ancestor in the stack
835                                                 # of open elements.
836                                                 ancestor_i += 1
837                                                 ancestor = open_els[ancestor_i]
838                                                 # 5. If ancestor is a template node, jump to the step below
839                                                 # labeled done.
840                                                 if ancestor.name is 'template' and ancestor.namespace is NS_HTML
841                                                         break
842                                                 # 6. If ancestor is a table node, switch the insertion mode
843                                                 # to "in select in table" and abort these steps.
844                                                 if ancestor.name is 'table' and ancestor.namespace is NS_HTML
845                                                         ins_mode = ins_mode_in_select_in_table
846                                                         return
847                                                 # 7. Jump back to the step labeled loop.
848                                 # 8. Done: Switch the insertion mode to "in select" and abort
849                                 # these steps.
850                                 ins_mode = ins_mode_in_select
851                                 return
852                         # 5. If node is a td or th element and last is false, then switch
853                         # the insertion mode to "in cell" and abort these steps.
854                         if (node.name is 'td' or node.name is 'th') and node.namespace is NS_HTML and last is false
855                                 ins_mode = ins_mode_in_cell
856                                 return
857                         # 6. If node is a tr element, then switch the insertion mode to "in
858                         # row" and abort these steps.
859                         if node.name is 'tr' and node.namespace is NS_HTML
860                                 ins_mode = ins_mode_in_row
861                                 return
862                         # 7. If node is a tbody, thead, or tfoot element, then switch the
863                         # insertion mode to "in table body" and abort these steps.
864                         if (node.name is 'tbody' or node.name is 'thead' or node.name is 'tfoot') and node.namespace is NS_HTML
865                                 ins_mode = ins_mode_in_table_body
866                                 return
867                         # 8. If node is a caption element, then switch the insertion mode
868                         # to "in caption" and abort these steps.
869                         if node.name is 'caption' and node.namespace is NS_HTML
870                                 ins_mode = ins_mode_in_caption
871                                 return
872                         # 9. If node is a colgroup element, then switch the insertion mode
873                         # to "in column group" and abort these steps.
874                         if node.name is 'colgroup' and node.namespace is NS_HTML
875                                 ins_mode = ins_mode_in_column_group
876                                 return
877                         # 10. If node is a table element, then switch the insertion mode to
878                         # "in table" and abort these steps.
879                         if node.name is 'table' and node.namespace is NS_HTML
880                                 ins_mode = ins_mode_in_table
881                                 return
882                         # 11. If node is a template element, then switch the insertion mode
883                         # to the current template insertion mode and abort these steps.
884                         if node.name is 'template' and node.namespace is NS_HTML
885                                 ins_mode = template_ins_modes[0]
886                                 return
887                         # 12. If node is a head element and last is true, then switch the
888                         # insertion mode to "in body" ("in body"! not "in head"!) and abort
889                         # these steps. (fragment case)
890                         if node.name is 'head' and node.namespace is NS_HTML and last
891                                 ins_mode = ins_mode_in_body
892                                 return
893                         # 13. If node is a head element and last is false, then switch the
894                         # insertion mode to "in head" and abort these steps.
895                         if node.name is 'head' and node.namespace is NS_HTML and last is false
896                                 ins_mode = ins_mode_in_head
897                                 return
898                         # 14. If node is a body element, then switch the insertion mode to
899                         # "in body" and abort these steps.
900                         if node.name is 'body' and node.namespace is NS_HTML
901                                 ins_mode = ins_mode_in_body
902                                 return
903                         # 15. If node is a frameset element, then switch the insertion mode
904                         # to "in frameset" and abort these steps. (fragment case)
905                         if node.name is 'frameset' and node.namespace is NS_HTML
906                                 ins_mode = ins_mode_in_frameset
907                                 return
908                         # 16. If node is an html element, run these substeps:
909                         if node.name is 'html' and node.namespace is NS_HTML
910                                 # 1. If the head element pointer is null, switch the insertion
911                                 # mode to "before head" and abort these steps. (fragment case)
912                                 if head_element_pointer is null
913                                         ins_mode = ins_mode_before_head
914                                 else
915                                         # 2. Otherwise, the head element pointer is not null,
916                                         # switch the insertion mode to "after head" and abort these
917                                         # steps.
918                                         ins_mode = ins_mode_after_head
919                                 return
920                         # 17. If last is true, then switch the insertion mode to "in body"
921                         # and abort these steps. (fragment case)
922                         if last
923                                 ins_mode = ins_mode_in_body
924                                 return
925                         # 18. Let node now be the node before node in the stack of open
926                         # elements.
927                         node_i += 1
928                         node = open_els[node_i]
929                         # 19. Return to the step labeled loop.
930
931         # 8.2.3.2
932
933         # http://www.w3.org/TR/html5/syntax.html#adjusted-current-node
934         adjusted_current_node = ->
935                 if open_els.length is 1 and flag_fragment_parsing
936                         return context_element
937                 return open_els[0]
938
939         # http://www.w3.org/TR/html5/syntax.html#reconstruct-the-active-formatting-elements
940         # this implementation is structured (mostly) as described at the link above.
941         # capitalized comments are the "labels" described at the link above.
942         reconstruct_afe = ->
943                 return if afe.length is 0
944                 if afe[0].type is TYPE_AFE_MARKER or afe[0] in open_els
945                         return
946                 # Rewind
947                 i = 0
948                 loop
949                         if i is afe.length - 1
950                                 break
951                         i += 1
952                         if afe[i].type is TYPE_AFE_MARKER or afe[i] in open_els
953                                 i -= 1 # Advance
954                                 break
955                 # Create
956                 loop
957                         el = insert_html_element afe[i].token
958                         afe[i] = el
959                         break if i is 0
960                         i -= 1 # Advance
961
962         # http://www.w3.org/TR/html5/syntax.html#adoption-agency-algorithm
963         # adoption agency algorithm
964         # overview here:
965         #   http://www.w3.org/TR/html5/syntax.html#misnested-tags:-b-i-/b-/i
966         #   http://www.w3.org/TR/html5/syntax.html#misnested-tags:-b-p-/b-/p
967         #   http://www.w3.org/TR/html5/syntax.html#unclosed-formatting-elements
968         adoption_agency = (subject) ->
969                 debug_log "adoption_agency()"
970                 debug_log "tree: #{serialize_els doc.children, false, true}"
971                 debug_log "open_els: #{serialize_els open_els, true, true}"
972                 debug_log "afe: #{serialize_els afe, true, true}"
973 # this block implements tha W3C spec
974 #               # 1. If the current node is an HTML element whose tag name is subject,
975 #               # then run these substeps:
976 #               #
977 #               # 1. Let element be the current node.
978 #               #
979 #               # 2. Pop element off the stack of open elements.
980 #               #
981 #               # 3. If element is also in the list of active formatting elements,
982 #               # remove the element from the list.
983 #               #
984 #               # 4. Abort the adoption agency algorithm.
985 #               if open_els[0].name is subject and open_els[0].namespace is NS_HTML
986 #                       el = open_els.shift()
987 #                       # remove it from the list of active formatting elements (if found)
988 #                       for t, i in afe
989 #                               if t is el
990 #                                       afe.splice i, 1
991 #                                       break
992 #                       debug_log "aaa: starting off with subject on top of stack, exiting"
993 #                       return
994 # WHATWG: https://html.spec.whatwg.org/multipage/syntax.html#adoption-agency-algorithm
995                 # If the current node is an HTML element whose tag name is subject, and
996                 # the current node is not in the list of active formatting elements,
997                 # then pop the current node off the stack of open elements, and abort
998                 # these steps.
999                 if open_els[0].name is subject and open_els[0].namespace is NS_HTML
1000                         debug_log "aaa: starting off with subject on top of stack, exiting"
1001                         # remove it from the list of active formatting elements (if found)
1002                         in_afe = false
1003                         for el, i in afe
1004                                 if el is open_els[0]
1005                                         in_afe = true
1006                                         break
1007                         unless in_afe
1008                                 debug_log "aaa: ...and not in afe, aaa done"
1009                                 open_els.shift()
1010                                 return
1011                         # fall through
1012 # END WHATWG
1013                 outer = 0
1014                 loop
1015                         if outer >= 8
1016                                 return
1017                         outer += 1
1018                         # 5. Let formatting element be the last element in the list of
1019                         # active formatting elements that: is between the end of the list
1020                         # and the last scope marker in the list, if any, or the start of
1021                         # the list otherwise, and  has the tag name subject.
1022                         fe = null
1023                         for t, fe_of_afe in afe
1024                                 if t.type is TYPE_AFE_MARKER
1025                                         break
1026                                 if t.name is subject
1027                                         fe = t
1028                                         break
1029                         # If there is no such element, then abort these steps and instead
1030                         # act as described in the "any other end tag" entry above.
1031                         if fe is null
1032                                 debug_log "aaa: fe not found in afe"
1033                                 in_body_any_other_end_tag subject
1034                                 return
1035                         # 6. If formatting element is not in the stack of open elements,
1036                         # then this is a parse error; remove the element from the list, and
1037                         # abort these steps.
1038                         in_open_els = false
1039                         for t, fe_of_open_els in open_els
1040                                 if t is fe
1041                                         in_open_els = true
1042                                         break
1043                         unless in_open_els
1044                                 debug_log "aaa: fe not found in open_els"
1045                                 parse_error()
1046                                 # "remove it from the list" must mean afe, since it's not in open_els
1047                                 afe.splice fe_of_afe, 1
1048                                 return
1049                         # 7. If formatting element is in the stack of open elements, but
1050                         # the element is not in scope, then this is a parse error; abort
1051                         # these steps.
1052                         unless el_is_in_scope fe
1053                                 debug_log "aaa: fe not in scope"
1054                                 parse_error()
1055                                 return
1056                         # 8. If formatting element is not the current node, this is a parse
1057                         # error. (But do not abort these steps.)
1058                         unless open_els[0] is fe
1059                                 parse_error()
1060                                 # continue
1061                         # 9. Let furthest block be the topmost node in the stack of open
1062                         # elements that is lower in the stack than formatting element, and
1063                         # is an element in the special category. There might not be one.
1064                         fb = null
1065                         fb_of_open_els = null
1066                         for t, i in open_els
1067                                 if t is fe
1068                                         break
1069                                 if el_is_special t
1070                                         fb = t
1071                                         fb_of_open_els = i
1072                                         # and continue, to see if there's one that's more "topmost"
1073                         # 10. If there is no furthest block, then the UA must first pop all
1074                         # the nodes from the bottom of the stack of open elements, from the
1075                         # current node up to and including formatting element, then remove
1076                         # formatting element from the list of active formatting elements,
1077                         # and finally abort these steps.
1078                         if fb is null
1079                                 debug_log "aaa: no fb"
1080                                 loop
1081                                         t = open_els.shift()
1082                                         if t is fe
1083                                                 afe.splice fe_of_afe, 1
1084                                                 return
1085                         # 11. Let common ancestor be the element immediately above
1086                         # formatting element in the stack of open elements.
1087                         ca = open_els[fe_of_open_els + 1] # common ancestor
1088
1089                         node_above = open_els[fb_of_open_els + 1] # next node if node isn't in open_els anymore
1090                         # 12. Let a bookmark note the position of formatting element in the list of active formatting elements relative to the elements on either side of it in the list.
1091                         bookmark = new_aaa_bookmark()
1092                         for t, i in afe
1093                                 if t is fe
1094                                         afe.splice i, 0, bookmark
1095                                         break
1096                         node = last_node = fb
1097                         inner = 0
1098                         loop
1099                                 inner += 1
1100                                 # 3. Let node be the element immediately above node in the
1101                                 # stack of open elements, or if node is no longer in the stack
1102                                 # of open elements (e.g. because it got removed by this
1103                                 # algorithm), the element that was immediately above node in
1104                                 # the stack of open elements before node was removed.
1105                                 node_next = null
1106                                 for t, i in open_els
1107                                         if t is node
1108                                                 node_next = open_els[i + 1]
1109                                                 break
1110                                 node = node_next ? node_above
1111                                 debug_log "inner loop #{inner}"
1112                                 debug_log "tree: #{serialize_els doc.children, false, true}"
1113                                 debug_log "open_els: #{serialize_els open_els, true, true}"
1114                                 debug_log "afe: #{serialize_els afe, true, true}"
1115                                 debug_log "ca: #{ca.name}##{ca.id} children: #{serialize_els ca.children, true, true}"
1116                                 debug_log "fe: #{fe.name}##{fe.id} children: #{serialize_els fe.children, true, true}"
1117                                 debug_log "fb: #{fb.name}##{fb.id} children: #{serialize_els fb.children, true, true}"
1118                                 debug_log "node: #{node.serialize true, true}"
1119                                 # TODO make sure node_above gets re-set if/when node is removed from open_els
1120
1121                                 # 4. If node is formatting element, then go to the next step in
1122                                 # the overall algorithm.
1123                                 if node is fe
1124                                         break
1125                                 debug_log "the meat"
1126                                 # 5. If inner loop counter is greater than three and node is in
1127                                 # the list of active formatting elements, then remove node from
1128                                 # the list of active formatting elements.
1129                                 node_in_afe = false
1130                                 for t, i in afe
1131                                         if t is node
1132                                                 if inner > 3
1133                                                         afe.splice i, 1
1134                                                         debug_log "max out inner"
1135                                                 else
1136                                                         node_in_afe = true
1137                                                         debug_log "in afe"
1138                                                 break
1139                                 # 6. If node is not in the list of active formatting elements,
1140                                 # then remove node from the stack of open elements and then go
1141                                 # back to the step labeled inner loop.
1142                                 unless node_in_afe
1143                                         debug_log "not in afe"
1144                                         for t, i in open_els
1145                                                 if t is node
1146                                                         node_above = open_els[i + 1]
1147                                                         open_els.splice i, 1
1148                                                         break
1149                                         continue
1150                                 debug_log "the bones"
1151                                 # 7. create an element for the token for which the element node
1152                                 # was created, in the HTML namespace, with common ancestor as
1153                                 # the intended parent; replace the entry for node in the list
1154                                 # of active formatting elements with an entry for the new
1155                                 # element, replace the entry for node in the stack of open
1156                                 # elements with an entry for the new element, and let node be
1157                                 # the new element.
1158                                 new_node = token_to_element node.token, NS_HTML, ca
1159                                 for t, i in afe
1160                                         if t is node
1161                                                 afe[i] = new_node
1162                                                 debug_log "replaced in afe"
1163                                                 break
1164                                 for t, i in open_els
1165                                         if t is node
1166                                                 node_above = open_els[i + 1]
1167                                                 open_els[i] = new_node
1168                                                 debug_log "replaced in open_els"
1169                                                 break
1170                                 node = new_node
1171                                 # 8. If last node is furthest block, then move the
1172                                 # aforementioned bookmark to be immediately after the new node
1173                                 # in the list of active formatting elements.
1174                                 if last_node is fb
1175                                         for t, i in afe
1176                                                 if t is bookmark
1177                                                         afe.splice i, 1
1178                                                         debug_log "removed bookmark"
1179                                                         break
1180                                         for t, i in afe
1181                                                 if t is node
1182                                                         # "after" means lower
1183                                                         afe.splice i, 0, bookmark # "after as <-
1184                                                         debug_log "placed bookmark after node"
1185                                                         debug_log "node: #{node.id} afe: #{serialize_els afe, true, true}"
1186                                                         break
1187                                 # 9. Insert last node into node, first removing it from its
1188                                 # previous parent node if any.
1189                                 if last_node.parent?
1190                                         debug_log "last_node has parent"
1191                                         for c, i in last_node.parent.children
1192                                                 if c is last_node
1193                                                         debug_log "removing last_node from parent"
1194                                                         last_node.parent.children.splice i, 1
1195                                                         break
1196                                 node.children.push last_node
1197                                 last_node.parent = node
1198                                 # 10. Let last node be node.
1199                                 last_node = node
1200                                 debug_log "at last"
1201                                 # 11. Return to the step labeled inner loop.
1202                         # 14. Insert whatever last node ended up being in the previous step
1203                         # at the appropriate place for inserting a node, but using common
1204                         # ancestor as the override target.
1205
1206                         # In the case where fe is immediately followed by fb:
1207                         #   * inner loop exits out early (node==fe)
1208                         #   * last_node is fb
1209                         #   * last_node is still in the tree (not a duplicate)
1210                         if last_node.parent?
1211                                 debug_log "FEFIRST? last_node has parent"
1212                                 for c, i in last_node.parent.children
1213                                         if c is last_node
1214                                                 debug_log "removing last_node from parent"
1215                                                 last_node.parent.children.splice i, 1
1216                                                 break
1217
1218                         debug_log "after aaa inner loop"
1219                         debug_log "ca: #{ca.name}##{ca.id} children: #{serialize_els ca.children, true, true}"
1220                         debug_log "fe: #{fe.name}##{fe.id} children: #{serialize_els fe.children, true, true}"
1221                         debug_log "fb: #{fb.name}##{fb.id} children: #{serialize_els fb.children, true, true}"
1222                         debug_log "last_node: #{last_node.name}##{last_node.id} children: #{serialize_els last_node.children, true, true}"
1223                         debug_log "tree: #{serialize_els doc.children, false, true}"
1224
1225                         debug_log "insert"
1226
1227
1228                         # can't use standard insert token thing, because it's already in
1229                         # open_els and must stay at it's current position in open_els
1230                         dest = adjusted_insertion_location ca
1231                         dest[0].children.splice dest[1], 0, last_node
1232                         last_node.parent = dest[0]
1233
1234
1235                         debug_log "ca: #{ca.name}##{ca.id} children: #{serialize_els ca.children, true, true}"
1236                         debug_log "fe: #{fe.name}##{fe.id} children: #{serialize_els fe.children, true, true}"
1237                         debug_log "fb: #{fb.name}##{fb.id} children: #{serialize_els fb.children, true, true}"
1238                         debug_log "last_node: #{last_node.name}##{last_node.id} children: #{serialize_els last_node.children, true, true}"
1239                         debug_log "tree: #{serialize_els doc.children, false, true}"
1240
1241                         # 15. Create an element for the token for which formatting element
1242                         # was created, in the HTML namespace, with furthest block as the
1243                         # intended parent.
1244                         new_element = token_to_element fe.token, NS_HTML, fb
1245                         # 16. Take all of the child nodes of furthest block and append them
1246                         # to the element created in the last step.
1247                         while fb.children.length
1248                                 t = fb.children.shift()
1249                                 t.parent = new_element
1250                                 new_element.children.push t
1251                         # 17. Append that new element to furthest block.
1252                         new_element.parent = fb
1253                         fb.children.push new_element
1254                         # 18. Remove formatting element from the list of active formatting
1255                         # elements, and insert the new element into the list of active
1256                         # formatting elements at the position of the aforementioned
1257                         # bookmark.
1258                         for t, i in afe
1259                                 if t is fe
1260                                         afe.splice i, 1
1261                                         break
1262                         for t, i in afe
1263                                 if t is bookmark
1264                                         afe[i] = new_element
1265                                         break
1266                         # 19. Remove formatting element from the stack of open elements,
1267                         # and insert the new element into the stack of open elements
1268                         # immediately below the position of furthest block in that stack.
1269                         for t, i in open_els
1270                                 if t is fe
1271                                         open_els.splice i, 1
1272                                         break
1273                         for t, i in open_els
1274                                 if t is fb
1275                                         open_els.splice i, 0, new_element
1276                                         break
1277                         # 20. Jump back to the step labeled outer loop.
1278                         debug_log "done wrapping fb's children. new_element: #{new_element.name}##{new_element.id}"
1279                         debug_log "tree: #{serialize_els doc.children, false, true}"
1280                         debug_log "open_els: #{serialize_els open_els, true, true}"
1281                         debug_log "afe: #{serialize_els afe, true, true}"
1282                 debug_log "AAA DONE"
1283
1284         # http://www.w3.org/TR/html5/syntax.html#close-a-p-element
1285         close_p_element = ->
1286                 generate_implied_end_tags 'p' # arg is exception
1287                 unless open_els[0].name is 'p' and open_els[0].namespace is NS_HTML
1288                         parse_error()
1289                 while open_els.length > 1 # just in case
1290                         el = open_els.shift()
1291                         if el.name is 'p' and el.namespace is NS_HTML
1292                                 return
1293         close_p_if_in_button_scope = ->
1294                 if is_in_button_scope 'p', NS_HTML
1295                         close_p_element()
1296
1297         # http://www.w3.org/TR/html5/syntax.html#insert-a-character
1298         # aka insert_a_character = (t) ->
1299         insert_character = (t) ->
1300                 dest = adjusted_insertion_location()
1301                 # fixfull check for Document node
1302                 if dest[1] > 0
1303                         prev = dest[0].children[dest[1] - 1]
1304                         if prev.type is TYPE_TEXT
1305                                 prev.text += t.text
1306                                 return
1307                 dest[0].children.splice dest[1], 0, t
1308
1309
1310         # 8.2.5 http://www.w3.org/TR/html5/syntax.html#tree-construction
1311         process_token = (t) ->
1312                 acn = adjusted_current_node()
1313                 unless acn?
1314                         ins_mode t
1315                         return
1316                 if acn.namespace is NS_HTML
1317                         ins_mode t
1318                         return
1319                 if is_mathml_text_integration_point(acn)
1320                         if t.type is TYPE_START_TAG and not (t.name is 'mglyph' or t.name is 'malignmark')
1321                                 ins_mode t
1322                                 return
1323                         if t.type is TYPE_TEXT
1324                                 ins_mode t
1325                                 return
1326                 if acn.namespace is NS_MATHML and acn.name is 'annotation-xml' and t.type is TYPE_START_TAG and t.name is 'svg'
1327                         ins_mode t
1328                         return
1329                 if is_html_integration acn
1330                         if t.type is TYPE_START_TAG or t.type is TYPE_TEXT
1331                                 ins_mode t
1332                                 return
1333                 if t.type is TYPE_EOF
1334                         ins_mode t
1335                         return
1336                 in_foreign_content t
1337                 return
1338
1339         # 8.2.5.1
1340         # http://www.w3.org/TR/html5/syntax.html#creating-and-inserting-nodes
1341         # http://www.w3.org/TR/html5/syntax.html#appropriate-place-for-inserting-a-node
1342         adjusted_insertion_location = (override_target = null) ->
1343                 # 1. If there was an override target specified, then let target be the
1344                 # override target.
1345                 if override_target?
1346                         target = override_target
1347                 else # Otherwise, let target be the current node.
1348                         target = open_els[0]
1349                 # 2. Determine the adjusted insertion location using the first matching
1350                 # steps from the following list:
1351                 #
1352                 # If foster parenting is enabled and target is a table, tbody, tfoot,
1353                 # thead, or tr element Foster parenting happens when content is
1354                 # misnested in tables.
1355                 if flag_foster_parenting and foster_parenting_targets[target.name] is target.namespace
1356                         loop # once. this is here so we can ``break`` to "abort these substeps"
1357                                 # 1. Let last template be the last template element in the
1358                                 # stack of open elements, if any.
1359                                 last_template = null
1360                                 last_template_i = null
1361                                 for el, i in open_els
1362                                         if el.name is 'template' and el.namespace is NS_HTML
1363                                                 last_template = el
1364                                                 last_template_i = i
1365                                                 break
1366                                 # 2. Let last table be the last table element in the stack of
1367                                 # open elements, if any.
1368                                 last_table = null
1369                                 last_table_i
1370                                 for el, i in open_els
1371                                         if el.name is 'table' and el.namespace is NS_HTML
1372                                                 last_table = el
1373                                                 last_table_i = i
1374                                                 break
1375                                 # 3. If there is a last template and either there is no last
1376                                 # table, or there is one, but last template is lower (more
1377                                 # recently added) than last table in the stack of open
1378                                 # elements, then: let adjusted insertion location be inside
1379                                 # last template's template contents, after its last child (if
1380                                 # any), and abort these substeps.
1381                                 if last_template and (last_table is null or last_template_i < last_table_i)
1382                                         target = last_template # fixfull should be it's contents
1383                                         target_i = target.children.length
1384                                         break
1385                                 # 4. If there is no last table, then let adjusted insertion
1386                                 # location be inside the first element in the stack of open
1387                                 # elements (the html element), after its last child (if any),
1388                                 # and abort these substeps. (fragment case)
1389                                 if last_table is null
1390                                         # this is odd
1391                                         target = open_els[open_els.length - 1]
1392                                         target_i = target.children.length
1393                                         break
1394                                 # 5. If last table has a parent element, then let adjusted
1395                                 # insertion location be inside last table's parent element,
1396                                 # immediately before last table, and abort these substeps.
1397                                 if last_table.parent?
1398                                         for c, i in last_table.parent.children
1399                                                 if c is last_table
1400                                                         target = last_table.parent
1401                                                         target_i = i
1402                                                         break
1403                                         break
1404                                 # 6. Let previous element be the element immediately above last
1405                                 # table in the stack of open elements.
1406                                 #
1407                                 # huh? how could it not have a parent?
1408                                 previous_element = open_els[last_table_i + 1]
1409                                 # 7. Let adjusted insertion location be inside previous
1410                                 # element, after its last child (if any).
1411                                 target = previous_element
1412                                 target_i = target.children.length
1413                                 # Note: These steps are involved in part because it's possible
1414                                 # for elements, the table element in this case in particular,
1415                                 # to have been moved by a script around in the DOM, or indeed
1416                                 # removed from the DOM entirely, after the element was inserted
1417                                 # by the parser.
1418                                 break # don't really loop
1419                 else
1420                         # Otherwise Let adjusted insertion location be inside target, after
1421                         # its last child (if any).
1422                         target_i = target.children.length
1423
1424                 # 3. If the adjusted insertion location is inside a template element,
1425                 # let it instead be inside the template element's template contents,
1426                 # after its last child (if any).
1427                 # fixfull (template)
1428
1429                 # 4. Return the adjusted insertion location.
1430                 return [target, target_i]
1431
1432         # http://www.w3.org/TR/html5/syntax.html#create-an-element-for-the-token
1433         # aka create_an_element_for_token
1434         token_to_element = (t, namespace, intended_parent) ->
1435                 # convert attributes into a hash
1436                 attrs = {}
1437                 for a in t.attrs_a
1438                         attrs[a[0]] = a[1] # TODO check what to do with dupilcate attrs
1439                 el = new Node TYPE_TAG, name: t.name, namespace: namespace, attrs: attrs, token: t
1440
1441                 # TODO 2. If the newly created element has an xmlns attribute in the
1442                 # XMLNS namespace whose value is not exactly the same as the element's
1443                 # namespace, that is a parse error. Similarly, if the newly created
1444                 # element has an xmlns:xlink attribute in the XMLNS namespace whose
1445                 # value is not the XLink Namespace, that is a parse error.
1446
1447                 # fixfull: the spec says stuff about form pointers and ownerDocument
1448
1449                 return el
1450
1451         # http://www.w3.org/TR/html5/syntax.html#insert-a-foreign-element
1452         insert_foreign_element = (token, namespace) ->
1453                 ail = adjusted_insertion_location()
1454                 ail_el = ail[0]
1455                 ail_i = ail[1]
1456                 el = token_to_element token, namespace, ail_el
1457                 # TODO skip this next step if it's broken (eg ail_el is document with child already)
1458                 el.parent = ail_el
1459                 ail_el.children.splice ail_i, 0, el
1460                 open_els.unshift el
1461                 return el
1462         # http://www.w3.org/TR/html5/syntax.html#insert-an-html-element
1463         insert_html_element = (token) ->
1464                 insert_foreign_element token, NS_HTML
1465
1466         # http://www.w3.org/TR/html5/syntax.html#insert-a-comment
1467         # position should be [node, index_within_children]
1468         insert_comment = (t, position = null) ->
1469                 position ?= adjusted_insertion_location()
1470                 position[0].children.splice position[1], 0, t
1471
1472         # 8.2.5.2
1473         # http://www.w3.org/TR/html5/syntax.html#generic-raw-text-element-parsing-algorithm
1474         parse_generic_raw_text = (t) ->
1475                 insert_html_element t
1476                 tok_state = tok_state_rawtext
1477                 original_ins_mode = ins_mode
1478                 ins_mode = ins_mode_text
1479         parse_generic_rcdata_text = (t) ->
1480                 insert_html_element t
1481                 tok_state = tok_state_rcdata
1482                 original_ins_mode = ins_mode
1483                 ins_mode = ins_mode_text
1484
1485         # 8.2.5.3 http://www.w3.org/TR/html5/syntax.html#closing-elements-that-have-implied-end-tags
1486         # http://www.w3.org/TR/html5/syntax.html#generate-implied-end-tags
1487         generate_implied_end_tags = (except = null) ->
1488                 while end_tag_implied[open_els[0].name] is open_els[0].namespace and open_els[0].name isnt except
1489                         open_els.shift()
1490
1491         # 8.2.5.4 The rules for parsing tokens in HTML content
1492         # http://www.w3.org/TR/html5/syntax.html#parsing-main-inhtml
1493
1494         # 8.2.5.4.1 The "initial" insertion mode
1495         # http://www.w3.org/TR/html5/syntax.html#the-initial-insertion-mode
1496         is_quirks_yes_doctype = (t) ->
1497                 if t.flag 'force-quirks'
1498                         return true
1499                 if t.name isnt 'html'
1500                         return true
1501                 if t.public_identifier?
1502                         pi = t.public_identifier.toLowerCase()
1503                         for p in quirks_yes_pi_prefixes
1504                                 if pi.substr(0, p.length) is p
1505                                         return true
1506                         if pi is '-//w3o//dtd w3 html strict 3.0//en//' or pi is '-/w3c/dtd html 4.0 transitional/en' or pi is 'html'
1507                                 return true
1508                 if t.system_identifier?
1509                         if t.system_identifier.toLowerCase() is 'http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd'
1510                                 return true
1511                 else if t.public_identifier?
1512                         # already did this: pi = t.public_identifier.toLowerCase()
1513                         if pi.substr(0, 32) is '-//w3c//dtd html 4.01 frameset//' or pi.substr(0, 36) is '-//w3c//dtd html 4.01 transitional//'
1514                                 return true
1515                 return false
1516         is_quirks_limited_doctype = (t) ->
1517                 if t.public_identifier?
1518                         pi = t.public_identifier.toLowerCase()
1519                         if pi.substr(0, 32) is '-//w3c//dtd xhtml 1.0 frameset//' or pi.substr(0, 36) is '-//w3c//dtd xhtml 1.0 transitional//'
1520                                 return true
1521                         if t.system_identifier?
1522                                 if pi.substr(0, 32) is '-//w3c//dtd html 4.01 frameset//' or pi.substr(0, 36) is '-//w3c//dtd html 4.01 transitional//'
1523                                         return true
1524                 return false
1525         ins_mode_initial = (t) ->
1526                 if is_space_tok t
1527                         return
1528                 if t.type is TYPE_COMMENT
1529                         # ?fixfull
1530                         doc.children.push t
1531                         return
1532                 if t.type is TYPE_DOCTYPE
1533                         # fixfull syntax error from first paragraph and following bullets
1534                         # fixfull set doc.doctype
1535                         # fixfull is the "not an iframe srcdoc" thing relevant?
1536                         if is_quirks_yes_doctype t
1537                                 doc.flag 'quirks mode', QUIRKS_YES
1538                         else if is_quirks_limited_doctype t
1539                                 doc.flag 'quirks mode', QUIRKS_LIMITED
1540                         doc.children.push t
1541                         ins_mode = ins_mode_before_html
1542                         return
1543                 # Anything else
1544                 # fixfull not iframe srcdoc?
1545                 parse_error()
1546                 doc.flag 'quirks mode', QUIRKS_YES
1547                 ins_mode = ins_mode_before_html
1548                 process_token t
1549                 return
1550
1551         # 8.2.5.4.2 http://www.w3.org/TR/html5/syntax.html#the-before-html-insertion-mode
1552         ins_mode_before_html = (t) ->
1553                 if t.type is TYPE_DOCTYPE
1554                         parse_error()
1555                         return
1556                 if t.type is TYPE_COMMENT
1557                         doc.children.push t
1558                         return
1559                 if is_space_tok t
1560                         return
1561                 if t.type is TYPE_START_TAG and t.name is 'html'
1562                         el = token_to_element t, NS_HTML, doc
1563                         doc.children.push el
1564                         el.document = doc
1565                         open_els.unshift(el)
1566                         # fixfull (big paragraph in spec about manifest, fragment, urls, etc)
1567                         ins_mode = ins_mode_before_head
1568                         return
1569                 if t.type is TYPE_END_TAG
1570                         if t.name is 'head' or t.name is 'body' or t.name is 'html' or t.name is 'br'
1571                                 # fall through to "anything else"
1572                         else
1573                                 parse_error()
1574                                 return
1575                 # Anything else
1576                 el = token_to_element new_open_tag('html'), NS_HTML, doc
1577                 doc.children.push el
1578                 el.document = doc
1579                 open_els.unshift el
1580                 # ?fixfull browsing context
1581                 ins_mode = ins_mode_before_head
1582                 process_token t
1583                 return
1584
1585         # 8.2.5.4.3 http://www.w3.org/TR/html5/syntax.html#the-before-head-insertion-mode
1586         ins_mode_before_head = (t) ->
1587                 if is_space_tok t
1588                         return
1589                 if t.type is TYPE_COMMENT
1590                         insert_comment t
1591                         return
1592                 if t.type is TYPE_DOCTYPE
1593                         parse_error()
1594                         return
1595                 if t.type is TYPE_START_TAG and t.name is 'html'
1596                         ins_mode_in_body t
1597                         return
1598                 if t.type is TYPE_START_TAG and t.name is 'head'
1599                         el = insert_html_element t
1600                         head_element_pointer = el
1601                         ins_mode = ins_mode_in_head
1602                         return
1603                 if t.type is TYPE_END_TAG
1604                         if t.name is 'head' or t.name is 'body' or t.name is 'html' or t.name is 'br'
1605                                 # fall through to Anything else below
1606                         else
1607                                 parse_error()
1608                                 return
1609                 # Anything else
1610                 el = insert_html_element new_open_tag 'head'
1611                 head_element_pointer = el
1612                 ins_mode = ins_mode_in_head
1613                 process_token t
1614
1615         # 8.2.5.4.4 http://www.w3.org/TR/html5/syntax.html#parsing-main-inhead
1616         ins_mode_in_head_else = (t) -> # factored out for same-as-spec flow control
1617                 open_els.shift() # spec says this will be a 'head' node
1618                 ins_mode = ins_mode_after_head
1619                 process_token t
1620         ins_mode_in_head = (t) ->
1621                 if t.type is TYPE_TEXT and (t.text is "\t" or t.text is "\n" or t.text is "\u000c" or t.text is ' ')
1622                         insert_character t
1623                         return
1624                 if t.type is TYPE_COMMENT
1625                         insert_comment t
1626                         return
1627                 if t.type is TYPE_DOCTYPE
1628                         parse_error()
1629                         return
1630                 if t.type is TYPE_START_TAG and t.name is 'html'
1631                         ins_mode_in_body t
1632                         return
1633                 if t.type is TYPE_START_TAG and (t.name is 'base' or t.name is 'basefont' or t.name is 'bgsound' or t.name is 'link')
1634                         el = insert_html_element t
1635                         open_els.shift()
1636                         t.acknowledge_self_closing()
1637                         return
1638                 if t.type is TYPE_START_TAG and t.name is 'meta'
1639                         el = insert_html_element t
1640                         open_els.shift()
1641                         t.acknowledge_self_closing()
1642                         # fixfull encoding stuff
1643                         return
1644                 if t.type is TYPE_START_TAG and t.name is 'title'
1645                         parse_generic_rcdata_text t
1646                         return
1647                 if t.type is TYPE_START_TAG and ((t.name is 'noscript' and flag_scripting) or t.name is 'noframes' or t.name is 'style')
1648                         parse_generic_raw_text t
1649                         return
1650                 if t.type is TYPE_START_TAG and t.name is 'noscript' and flag_scripting is false
1651                         insert_html_element t
1652                         ins_mode = ins_mode_in_head_noscript
1653                         return
1654                 if t.type is TYPE_START_TAG and t.name is 'script'
1655                         ail = adjusted_insertion_location()
1656                         el = token_to_element t, NS_HTML, ail
1657                         el.flag 'parser-inserted', true
1658                         # fixfull frament case
1659                         ail[0].children.splice ail[1], 0, el
1660                         open_els.unshift el
1661                         tok_state = tok_state_script_data
1662                         original_ins_mode = ins_mode # make sure orig... is defined
1663                         ins_mode = ins_mode_text
1664                         return
1665                 if t.type is TYPE_END_TAG and t.name is 'head'
1666                         open_els.shift() # will be a head element... spec says so
1667                         ins_mode = ins_mode_after_head
1668                         return
1669                 if t.type is TYPE_END_TAG and (t.name is 'body' or t.name is 'html' or t.name is 'br')
1670                         ins_mode_in_head_else t
1671                         return
1672                 if t.type is TYPE_START_TAG and t.name is 'template'
1673                         insert_html_element t
1674                         afe_push_marker()
1675                         flag_frameset_ok = false
1676                         ins_mode = ins_mode_in_template
1677                         template_ins_modes.unshift ins_mode_in_template
1678                         return
1679                 if t.type is TYPE_END_TAG and t.name is 'template'
1680                         if template_tag_is_open()
1681                                 generate_implied_end_tags
1682                                 if open_els[0].name isnt 'template'
1683                                         parse_error()
1684                                 loop
1685                                         el = open_els.shift()
1686                                         if el.name is 'template' and el.namespace is NS_HTML
1687                                                 break
1688                                 clear_afe_to_marker()
1689                                 template_ins_modes.shift()
1690                                 reset_ins_mode()
1691                         else
1692                                 parse_error()
1693                         return
1694                 if (t.type is TYPE_START_TAG and t.name is 'head') or t.type is TYPE_END_TAG
1695                         parse_error()
1696                         return
1697                 ins_mode_in_head_else t
1698
1699         # 8.2.5.4.5 http://www.w3.org/TR/html5/syntax.html#parsing-main-inheadnoscript
1700         ins_mode_in_head_noscript_else = (t) ->
1701                 parse_error()
1702                 open_els.shift()
1703                 ins_mode = ins_mode_in_head
1704                 process_token t
1705         ins_mode_in_head_noscript = (t) ->
1706                 if t.type is TYPE_DOCTYPE
1707                         parse_error()
1708                         return
1709                 if t.type is TYPE_START_TAG and t.name is 'html'
1710                         ins_mode_in_body t
1711                         return
1712                 if t.type is TYPE_END_TAG and t.name is 'noscript'
1713                         open_els.shift()
1714                         ins_mode = ins_mode_in_head
1715                         return
1716                 if is_space_tok(t) or t.type is TYPE_COMMENT or (t.type is TYPE_START_TAG and (t.name is 'basefont' or t.name is 'bgsound' or t.name is 'link' or t.name is 'meta' or t.name is 'noframes' or t.name is 'style'))
1717                         ins_mode_in_head t
1718                         return
1719                 if t.type is TYPE_END_TAG and t.name is 'br'
1720                         ins_mode_in_head_noscript_else t
1721                         return
1722                 if (t.type is TYPE_START_TAG and (t.name is 'head' or t.name is 'noscript')) or t.type is TYPE_END_TAG
1723                         parse_error()
1724                         return
1725                 # Anything else
1726                 ins_mode_in_head_noscript_else t
1727                 return
1728
1729
1730
1731         # 8.2.5.4.6 http://www.w3.org/TR/html5/syntax.html#the-after-head-insertion-mode
1732         ins_mode_after_head_else = (t) ->
1733                 body_tok = new_open_tag 'body'
1734                 insert_html_element body_tok
1735                 ins_mode = ins_mode_in_body
1736                 process_token t
1737                 return
1738         ins_mode_after_head = (t) ->
1739                 if is_space_tok t
1740                         insert_character t
1741                         return
1742                 if t.type is TYPE_COMMENT
1743                         insert_comment t
1744                         return
1745                 if t.type is TYPE_DOCTYPE
1746                         parse_error()
1747                         return
1748                 if t.type is TYPE_START_TAG and t.name is 'html'
1749                         ins_mode_in_body t
1750                         return
1751                 if t.type is TYPE_START_TAG and t.name is 'body'
1752                         insert_html_element t
1753                         flag_frameset_ok = false
1754                         ins_mode = ins_mode_in_body
1755                         return
1756                 if t.type is TYPE_START_TAG and t.name is 'frameset'
1757                         insert_html_element t
1758                         ins_mode = ins_mode_in_frameset
1759                         return
1760                 if t.type is TYPE_START_TAG and (t.name is 'base' or t.name is 'basefont' or t.name is 'bgsound' or t.name is 'link' or t.name is 'meta' or t.name is 'noframes' or t.name is 'script' or t.name is 'style' or t.name is 'template' or t.name is 'title')
1761                         parse_error()
1762                         open_els.unshift head_element_pointer
1763                         ins_mode_in_head t
1764                         for el, i in open_els
1765                                 if el is head_element_pointer
1766                                         open_els.splice i, 1
1767                                         return
1768                         console.log "warning: 23904 couldn't find head element in open_els"
1769                         return
1770                 if t.type is TYPE_END_TAG and t.name is 'template'
1771                         ins_mode_in_head t
1772                         return
1773                 if t.type is TYPE_END_TAG and (t.name is 'body' or t.name is 'html' or t.name is 'br')
1774                         ins_mode_after_head_else t
1775                         return
1776                 if (t.type is TYPE_START_TAG and t.name is 'head') or t.type is TYPE_END_TAG
1777                         parse_error()
1778                         return
1779                 # Anything else
1780                 ins_mode_after_head_else t
1781
1782         # 8.2.5.4.7 http://www.w3.org/TR/html5/syntax.html#parsing-main-inbody
1783         in_body_any_other_end_tag = (name) -> # factored out because adoption agency calls it
1784                 node = open_els[0]
1785                 loop
1786                         if node.name is name and node.namespace is NS_HTML
1787                                 generate_implied_end_tags name # arg is exception
1788                                 unless node is open_els[0]
1789                                         parse_error()
1790                                 loop
1791                                         el = open_els.shift()
1792                                         if el is node
1793                                                 return
1794                         if special_elements[node.name] is node.namespace
1795                                 parse_error()
1796                                 return
1797                         for el, i in open_els
1798                                 if node is el
1799                                         node = open_els[i + 1]
1800                                         break
1801                 return
1802         ins_mode_in_body = (t) ->
1803                 if t.type is TYPE_TEXT and t.text is "\u0000"
1804                         parse_error()
1805                         return
1806                 if is_space_tok t
1807                         reconstruct_afe()
1808                         insert_character t
1809                         return
1810                 if t.type is TYPE_TEXT
1811                         reconstruct_afe()
1812                         insert_character t
1813                         flag_frameset_ok = false
1814                         return
1815                 if t.type is TYPE_COMMENT
1816                         insert_comment t
1817                         return
1818                 if t.type is TYPE_DOCTYPE
1819                         parse_error()
1820                         return
1821                 if t.type is TYPE_START_TAG and t.name is 'html'
1822                         parse_error()
1823                         return if template_tag_is_open()
1824                         root_attrs = open_els[open_els.length - 1].attrs
1825                         for a in t.attrs_a
1826                                 root_attrs[a[0]] = a[1] unless root_attrs[a[0]]?
1827                         return
1828
1829                 if (t.type is TYPE_START_TAG and (t.name is 'base' or t.name is 'basefont' or t.name is 'bgsound' or t.name is 'link' or t.name is 'meta' or t.name is 'noframes' or t.name is 'script' or t.name is 'style' or t.name is 'template' or t.name is 'title')) or (t.type is TYPE_END_TAG and t.name is 'template')
1830                         ins_mode_in_head t
1831                         return
1832                 if t.type is TYPE_START_TAG and t.name is 'body'
1833                         parse_error()
1834                         return if open_els.length < 2
1835                         second = open_els[open_els.length - 2]
1836                         return unless second.namespace is NS_HTML
1837                         return unless second.name is 'body'
1838                         return if template_tag_is_open()
1839                         flag_frameset_ok = false
1840                         for a in t.attrs_a
1841                                 second.attrs[a[0]] = a[1] unless second.attrs[a[0]]?
1842                         return
1843                 if t.type is TYPE_START_TAG and t.name is 'frameset'
1844                         parse_error()
1845                         return if open_els.length < 2
1846                         second_i = open_els.length - 2
1847                         second = open_els[second_i]
1848                         return unless second.namespace is NS_HTML
1849                         return unless second.name is 'body'
1850                         if flag_frameset_ok is false
1851                                 return
1852                         if second.parent?
1853                                 for el, i in second.parent.children
1854                                         if el is second
1855                                                 second.parent.children.splice i, 1
1856                                                 break
1857                         open_els.splice second_i, 1
1858                         # pop everything except the "root html element"
1859                         while open_els.length > 1
1860                                 open_els.shift()
1861                         insert_html_element t
1862                         ins_mode = ins_mode_in_frameset
1863                         return
1864                 if t.type is TYPE_EOF
1865                         ok_tags = {
1866                                 dd:NS_HTML, dt:NS_HTML, li:NS_HTML, p:NS_HTML, tbody:NS_HTML,
1867                                 td:NS_HTML, tfoot:NS_HTML, th:NS_HTML, thead:NS_HTML,
1868                                 tr:NS_HTML, body:NS_HTML, html:NS_HTML,
1869                         }
1870                         for el in open_els
1871                                 unless ok_tags[t.name] is el.namespace
1872                                         parse_error()
1873                                         break
1874                         if template_ins_modes.length > 0
1875                                 ins_mode_in_template t
1876                         else
1877                                 stop_parsing()
1878                         return
1879                 if t.type is TYPE_END_TAG and t.name is 'body'
1880                         unless is_in_scope 'body', NS_HTML
1881                                 parse_error()
1882                                 return
1883                         ok_tags = {
1884                                 dd:NS_HTML, dt:NS_HTML, li:NS_HTML, optgroup:NS_HTML,
1885                                 option:NS_HTML, p:NS_HTML, rb:NS_HTML, rp:NS_HTML, rt:NS_HTML,
1886                                 rtc:NS_HTML, tbody:NS_HTML, td:NS_HTML, tfoot:NS_HTML,
1887                                 th:NS_HTML, thead:NS_HTML, tr:NS_HTML, body:NS_HTML,
1888                                 html:NS_HTML
1889                         }
1890                         for el in open_els
1891                                 unless ok_tags[t.name] is el.namespace
1892                                         parse_error()
1893                                         break
1894                         ins_mode = ins_mode_after_body
1895                         return
1896                 if t.type is TYPE_END_TAG and t.name is 'html'
1897                         unless is_in_scope 'body', NS_HTML
1898                                 parse_error()
1899                                 return
1900                         ok_tags = {
1901                                 dd:NS_HTML, dt:NS_HTML, li:NS_HTML, optgroup:NS_HTML,
1902                                 option:NS_HTML, p:NS_HTML, rb:NS_HTML, rp:NS_HTML, rt:NS_HTML,
1903                                 rtc:NS_HTML, tbody:NS_HTML, td:NS_HTML, tfoot:NS_HTML,
1904                                 th:NS_HTML, thead:NS_HTML, tr:NS_HTML, body:NS_HTML,
1905                                 html:NS_HTML
1906                         }
1907                         for el in open_els
1908                                 unless ok_tags[t.name] is el.namespace
1909                                         parse_error()
1910                                         break
1911                         ins_mode = ins_mode_after_body
1912                         process_token t
1913                         return
1914                 if t.type is TYPE_START_TAG and (t.name is 'address' or t.name is 'article' or t.name is 'aside' or t.name is 'blockquote' or t.name is 'center' or t.name is 'details' or t.name is 'dialog' or t.name is 'dir' or t.name is 'div' or t.name is 'dl' or t.name is 'fieldset' or t.name is 'figcaption' or t.name is 'figure' or t.name is 'footer' or t.name is 'header' or t.name is 'hgroup' or t.name is 'main' or t.name is 'nav' or t.name is 'ol' or t.name is 'p' or t.name is 'section' or t.name is 'summary' or t.name is 'ul')
1915                         close_p_if_in_button_scope()
1916                         insert_html_element t
1917                         return
1918                 if t.type is TYPE_START_TAG and h_tags[t.name]?
1919                         close_p_if_in_button_scope()
1920                         if h_tags[open_els[0].name] is open_els[0].namespace
1921                                 parse_error()
1922                                 open_els.shift()
1923                         insert_html_element t
1924                         return
1925                 if t.type is TYPE_START_TAG and (t.name is 'pre' or t.name is 'listing')
1926                         close_p_if_in_button_scope()
1927                         insert_html_element t
1928                         eat_next_token_if_newline()
1929                         flag_frameset_ok = false
1930                         return
1931                 if t.type is TYPE_START_TAG and t.name is 'form'
1932                         unless form_element_pointer is null or template_tag_is_open()
1933                                 parse_error()
1934                                 return
1935                         close_p_if_in_button_scope()
1936                         el = insert_html_element t
1937                         unless template_tag_is_open()
1938                                 form_element_pointer = el
1939                         return
1940                 if t.type is TYPE_START_TAG and t.name is 'li'
1941                         flag_frameset_ok = false
1942                         for node in open_els
1943                                 if node.name is 'li' and node.namespace is NS_HTML
1944                                         generate_implied_end_tags 'li' # arg is exception
1945                                         if open_els[0].name isnt 'li' or open_els[0].namespace isnt NS_HTML
1946                                                 parse_error()
1947                                         loop
1948                                                 el = open_els.shift()
1949                                                 if el.name is 'li' and el.namespace is NS_HTML
1950                                                         break
1951                                         break
1952                                 if el_is_special_not_adp node
1953                                                 break
1954                         close_p_if_in_button_scope()
1955                         insert_html_element t
1956                         return
1957                 if t.type is TYPE_START_TAG and (t.name is 'dd' or t.name is 'dt')
1958                         flag_frameset_ok = false
1959                         for node in open_els
1960                                 if node.name is 'dd' and node.namespace is NS_HTML
1961                                         generate_implied_end_tags 'dd' # arg is exception
1962                                         if open_els[0].name isnt 'dd' or open_els[0].namespace isnt NS_HTML
1963                                                 parse_error()
1964                                         loop
1965                                                 el = open_els.shift()
1966                                                 if el.name is 'dd' and el.namespace is NS_HTML
1967                                                         break
1968                                         break
1969                                 if node.name is 'dt' and node.namespace is NS_HTML
1970                                         generate_implied_end_tags 'dt' # arg is exception
1971                                         if open_els[0].name isnt 'dt' or open_els[0].namespace isnt NS_HTML
1972                                                 parse_error()
1973                                         loop
1974                                                 el = open_els.shift()
1975                                                 if el.name is 'dt' and el.namespace is NS_HTML
1976                                                         break
1977                                         break
1978                                 if el_is_special_not_adp node
1979                                         break
1980                         close_p_if_in_button_scope()
1981                         insert_html_element t
1982                         return
1983                 if t.type is TYPE_START_TAG and t.name is 'plaintext'
1984                         close_p_if_in_button_scope()
1985                         insert_html_element t
1986                         tok_state = tok_state_plaintext
1987                         return
1988                 if t.type is TYPE_START_TAG and t.name is 'button'
1989                         if is_in_scope 'button', NS_HTML
1990                                 parse_error()
1991                                 generate_implied_end_tags()
1992                                 loop
1993                                         el = open_els.shift()
1994                                         if el.name is 'button' and el.namespace is NS_HTML
1995                                                 break
1996                         reconstruct_afe()
1997                         insert_html_element t
1998                         flag_frameset_ok = false
1999                         return
2000                 if t.type is TYPE_END_TAG and (t.name is 'address' or t.name is 'article' or t.name is 'aside' or t.name is 'blockquote' or t.name is 'button' or t.name is 'center' or t.name is 'details' or t.name is 'dialog' or t.name is 'dir' or t.name is 'div' or t.name is 'dl' or t.name is 'fieldset' or t.name is 'figcaption' or t.name is 'figure' or t.name is 'footer' or t.name is 'header' or t.name is 'hgroup' or t.name is 'listing' or t.name is 'main' or t.name is 'nav' or t.name is 'ol' or t.name is 'pre' or t.name is 'section' or t.name is 'summary' or t.name is 'ul')
2001                         unless is_in_scope t.name, NS_HTML
2002                                 parse_error()
2003                                 return
2004                         generate_implied_end_tags()
2005                         unless open_els[0].name is t.name and open_els[0].namespace is NS_HTML
2006                                 parse_error()
2007                         loop
2008                                 el = open_els.shift()
2009                                 if el.name is t.name and el.namespace is NS_HTML
2010                                         return
2011                         return
2012                 if t.type is TYPE_END_TAG and t.name is 'form'
2013                         unless template_tag_is_open()
2014                                 node = form_element_pointer
2015                                 form_element_pointer = null
2016                                 if node is null or not el_is_in_scope node
2017                                         parse_error()
2018                                         return
2019                                 generate_implied_end_tags()
2020                                 if open_els[0] isnt node
2021                                         parse_error()
2022                                 for el, i in open_els
2023                                         if el is node
2024                                                 open_els.splice i, 1
2025                                                 break
2026                         else
2027                                 unless is_in_scope 'form', NS_HTML
2028                                         parse_error()
2029                                         return
2030                                 generate_implied_end_tags()
2031                                 if open_els[0].name isnt 'form' or open_els[0].namespace isnt NS_HTML
2032                                         parse_error()
2033                                 loop
2034                                         el = open_els.shift()
2035                                         if el.name is 'form' and el.namespace is NS_HTML
2036                                                 break
2037                         return
2038                 if t.type is TYPE_END_TAG and t.name is 'p'
2039                         unless is_in_button_scope 'p', NS_HTML
2040                                 parse_error()
2041                                 insert_html_element new_open_tag 'p'
2042                         close_p_element()
2043                         return
2044                 if t.type is TYPE_END_TAG and t.name is 'li'
2045                         unless is_in_li_scope 'li', NS_HTML
2046                                 parse_error()
2047                                 return
2048                         generate_implied_end_tags 'li' # arg is exception
2049                         if open_els[0].name isnt 'li' or open_els[0].namespace isnt NS_HTML
2050                                 parse_error()
2051                         loop
2052                                 el = open_els.shift()
2053                                 if el.name is 'li' and el.namespace is NS_HTML
2054                                         break
2055                         return
2056                 if t.type is TYPE_END_TAG and (t.name is 'dd' or t.name is 'dt')
2057                         unless is_in_scope t.name, NS_HTML
2058                                 parse_error()
2059                                 return
2060                         generate_implied_end_tags t.name # arg is exception
2061                         if open_els[0].name isnt t.name or open_els[0].namespace isnt NS_HTML
2062                                 parse_error()
2063                         loop
2064                                 el = open_els.shift()
2065                                 if el.name is t.name and el.namespace is NS_HTML
2066                                         break
2067                         return
2068                 if t.type is TYPE_END_TAG and h_tags[t.name]?
2069                         h_in_scope = false
2070                         for el in open_els
2071                                 if h_tags[el.name] is el.namespace
2072                                         h_in_scope = true
2073                                         break
2074                                 if standard_scopers[el.name] is el.namespace
2075                                         break
2076                         unless h_in_scope
2077                                 parse_error()
2078                                 return
2079                         generate_implied_end_tags()
2080                         if open_els[0].name isnt t.name or open_els[0].namespace isnt NS_HTML
2081                                 parse_error()
2082                         loop
2083                                 el = open_els.shift()
2084                                 if h_tags[el.name] is el.namespace
2085                                         break
2086                         return
2087                 # deep breath!
2088                 if t.type is TYPE_START_TAG and t.name is 'a'
2089                         # If the list of active formatting elements contains an a element
2090                         # between the end of the list and the last marker on the list (or
2091                         # the start of the list if there is no marker on the list), then
2092                         # this is a parse error; run the adoption agency algorithm for the
2093                         # tag name "a", then remove that element from the list of active
2094                         # formatting elements and the stack of open elements if the
2095                         # adoption agency algorithm didn't already remove it (it might not
2096                         # have if the element is not in table scope).
2097                         found = false
2098                         for el in afe
2099                                 if el.type is TYPE_AFE_MARKER
2100                                         break
2101                                 if el.name is 'a' and el.namespace is NS_HTML
2102                                         found = el
2103                         if found?
2104                                 parse_error()
2105                                 adoption_agency 'a'
2106                                 for el, i in afe
2107                                         if el is found
2108                                                 afe.splice i, 1
2109                                 for el, i in open_els
2110                                         if el is found
2111                                                 open_els.splice i, 1
2112                         reconstruct_afe()
2113                         el = insert_html_element t
2114                         afe_push el
2115                         return
2116                 if t.type is TYPE_START_TAG and (t.name is 'b' or t.name is 'big' or t.name is 'code' or t.name is 'em' or t.name is 'font' or t.name is 'i' or t.name is 's' or t.name is 'small' or t.name is 'strike' or t.name is 'strong' or t.name is 'tt' or t.name is 'u')
2117                         reconstruct_afe()
2118                         el = insert_html_element t
2119                         afe_push el
2120                         return
2121                 if t.type is TYPE_START_TAG and t.name is 'nobr'
2122                         reconstruct_afe()
2123                         if is_in_scope 'nobr', NS_HTML
2124                                 parse_error()
2125                                 adoption_agency 'nobr'
2126                                 reconstruct_afe()
2127                         el = insert_html_element t
2128                         afe_push el
2129                         return
2130                 if t.type is TYPE_END_TAG and (t.name is 'a' or t.name is 'b' or t.name is 'big' or t.name is 'code' or t.name is 'em' or t.name is 'font' or t.name is 'i' or t.name is 'nobr' or t.name is 's' or t.name is 'small' or t.name is 'strike' or t.name is 'strong' or t.name is 'tt' or t.name is 'u')
2131                         adoption_agency t.name
2132                         return
2133                 if t.type is TYPE_START_TAG and (t.name is 'applet' or t.name is 'marquee' or t.name is 'object')
2134                         reconstruct_afe()
2135                         insert_html_element t
2136                         afe_push_marker()
2137                         flag_frameset_ok = false
2138                         return
2139                 if t.type is TYPE_END_TAG and (t.name is 'applet' or t.name is 'marquee' or t.name is 'object')
2140                         unless is_in_scope t.name, NS_HTML
2141                                 parse_error()
2142                                 return
2143                         generate_implied_end_tags()
2144                         if open_els[0].name isnt t.name or open_els[0].namespace isnt NS_HTML
2145                                 parse_error()
2146                         loop
2147                                 el = open_els.shift()
2148                                 if el.name is t.name and el.namespace is NS_HTML
2149                                         break
2150                         clear_afe_to_marker()
2151                         return
2152                 if t.type is TYPE_START_TAG and t.name is 'table'
2153                         unless doc.flag('quirks mode') is QUIRKS_YES
2154                                 close_p_if_in_button_scope() # test
2155                         insert_html_element t
2156                         flag_frameset_ok = false
2157                         ins_mode = ins_mode_in_table
2158                         return
2159                 if t.type is TYPE_END_TAG and t.name is 'br'
2160                         parse_error()
2161                         # W3C: t.type = TYPE_START_TAG
2162                         t = new_open_tag 'br' # WHATWG
2163                         # fall through
2164                 if t.type is TYPE_START_TAG and (t.name is 'area' or t.name is 'br' or t.name is 'embed' or t.name is 'img' or t.name is 'keygen' or t.name is 'wbr')
2165                         reconstruct_afe()
2166                         insert_html_element t
2167                         open_els.shift()
2168                         t.acknowledge_self_closing()
2169                         flag_frameset_ok = false
2170                         return
2171                 if t.type is TYPE_START_TAG and t.name is 'input'
2172                         reconstruct_afe()
2173                         insert_html_element t
2174                         open_els.shift()
2175                         t.acknowledge_self_closing()
2176                         unless is_input_hidden_tok t
2177                                 flag_frameset_ok = false
2178                         return
2179                 if t.type is TYPE_START_TAG and (t.name is 'menuitem' or t.name is 'param' or t.name is 'source' or t.name is 'track')
2180                         # WHATWG adds 'menuitem' for this block
2181                         insert_html_element t
2182                         open_els.shift()
2183                         t.acknowledge_self_closing()
2184                         return
2185                 if t.type is TYPE_START_TAG and t.name is 'hr'
2186                         close_p_if_in_button_scope()
2187                         insert_html_element t
2188                         open_els.shift()
2189                         t.acknowledge_self_closing()
2190                         flag_frameset_ok = false
2191                         return
2192                 if t.type is TYPE_START_TAG and t.name is 'image'
2193                         parse_error()
2194                         t.name = 'img'
2195                         process_token t
2196                         return
2197                 if t.type is TYPE_START_TAG and t.name is 'isindex'
2198                         parse_error()
2199                         if template_tag_is_open() is false and form_element_pointer isnt null
2200                                 return
2201                         t.acknowledge_self_closing()
2202                         flag_frameset_ok = false
2203                         close_p_if_in_button_scope()
2204                         el = insert_html_element new_open_tag 'form'
2205                         unless template_tag_is_open()
2206                                 form_element_pointer = el
2207                         for a in t.attrs_a
2208                                 if a[0] is 'action'
2209                                         el.attrs['action'] = a[1]
2210                                         break
2211                         insert_html_element new_open_tag 'hr'
2212                         open_els.shift()
2213                         reconstruct_afe()
2214                         insert_html_element new_open_tag 'label'
2215                         # note: this is a little out-of-spec-order so we only have to scan t.attrs_a once
2216                         input_el = new_open_tag 'input'
2217                         prompt = null
2218                         for a in t.attrs_a
2219                                 if a[0] is 'prompt'
2220                                         prompt = a[1]
2221                                 if a[0] isnt 'name' and a[0] isnt 'action' and a[0] isnt 'prompt'
2222                                         input_el.attrs_a.push [a[0], a[1]]
2223                         input_el.attrs_a.push ['name', 'isindex']
2224                         # fixfull this next bit is in english... internationalize?
2225                         prompt ?= "This is a searchable index. Enter search keywords: "
2226                         insert_character new_character_token prompt # fixfull split
2227                         # TODO submit typo "balue" in spec
2228                         insert_html_element input_el
2229                         open_els.shift()
2230                         # insert_character '' # you can put chars here if promt attr missing
2231                         open_els.shift()
2232                         insert_html_element new_open_tag 'hr'
2233                         open_els.shift()
2234                         open_els.shift()
2235                         unless template_tag_is_open()
2236                                 form_element_pointer = null
2237                         return
2238                 if t.type is TYPE_START_TAG and t.name is 'textarea'
2239                         insert_html_element t
2240                         eat_next_token_if_newline()
2241                         tok_state = tok_state_rcdata
2242                         original_ins_mode = ins_mode
2243                         flag_frameset_ok = false
2244                         ins_mode = ins_mode_text
2245                         return
2246                 if t.type is TYPE_START_TAG and t.name is 'xmp'
2247                         close_p_if_in_button_scope()
2248                         reconstruct_afe()
2249                         flag_frameset_ok = false
2250                         parse_generic_raw_text t
2251                         return
2252                 if t.type is TYPE_START_TAG and t.name is 'iframe'
2253                         flag_frameset_ok = false
2254                         parse_generic_raw_text t
2255                         return
2256                 if t.type is TYPE_START_TAG and (t.name is 'noembed' or (t.name is 'noscript' and flag_scripting))
2257                         parse_generic_raw_text t
2258                         return
2259                 if t.type is TYPE_START_TAG and t.name is 'select'
2260                         reconstruct_afe()
2261                         insert_html_element t
2262                         flag_frameset_ok = false
2263                         if ins_mode is ins_mode_in_table or ins_mode is ins_mode_in_caption or ins_mode is ins_mode_in_table_body or ins_mode is ins_mode_in_row or ins_mode is ins_mode_in_cell
2264                                 ins_mode = ins_mode_in_select_in_table
2265                         else
2266                                 ins_mode = ins_mode_in_select
2267                         return
2268                 if t.type is TYPE_START_TAG and (t.name is 'optgroup' or t.name is 'option')
2269                         if open_els[0].name is 'option' and open_els[0].namespace is NS_HTML
2270                                 open_els.shift()
2271                         reconstruct_afe()
2272                         insert_html_element t
2273                         return
2274 # this comment block implements the W3C spec
2275 #               if t.type is TYPE_START_TAG and (t.name is 'rb' or t.name is 'rp' or t.name is 'rtc')
2276 #                       if is_in_scope 'ruby', NS_HTML
2277 #                               generate_implied_end_tags()
2278 #                               unless open_els[0].name is 'ruby' and open_els[0].namespace is NS_HTML
2279 #                                       parse_error()
2280 #                       insert_html_element t
2281 #                       return
2282 #               if t.type is TYPE_START_TAG and t.name is 'rt'
2283 #                       if is_in_scope 'ruby', NS_HTML
2284 #                               generate_implied_end_tags 'rtc' # arg is exception
2285 #                               unless (open_els[0].name is 'ruby' or open_els[0].name is 'rtc') and open_els[0].namespace is NS_HTML
2286 #                                       parse_error()
2287 #                       insert_html_element t
2288 #                       return
2289 # below implements the WHATWG spec https://html.spec.whatwg.org/multipage/syntax.html#parsing-main-inbody
2290                 if t.type is TYPE_START_TAG and (t.name is 'rb' or t.name is 'rtc')
2291                         if is_in_scope 'ruby', NS_HTML
2292                                 generate_implied_end_tags()
2293                                 unless open_els[0].name is 'ruby' and open_els[0].namespace is NS_HTML
2294                                         parse_error()
2295                         insert_html_element t
2296                         return
2297                 if t.type is TYPE_START_TAG and (t.name is 'rp' or t.name is 'rt')
2298                         if is_in_scope 'ruby', NS_HTML
2299                                 generate_implied_end_tags 'rtc'
2300                                 unless (open_els[0].name is 'ruby' or open_els[0].name is 'rtc') and open_els[0].namespace is NS_HTML
2301                                         parse_error()
2302                         insert_html_element t
2303                         return
2304 # end WHATWG chunk
2305                 if t.type is TYPE_START_TAG and t.name is 'math'
2306                         reconstruct_afe()
2307                         adjust_mathml_attributes t
2308                         adjust_foreign_attributes t
2309                         insert_foreign_element t, NS_MATHML
2310                         if t.flag 'self-closing'
2311                                 open_els.shift()
2312                                 t.acknowledge_self_closing()
2313                         return
2314                 if t.type is TYPE_START_TAG and t.name is 'svg'
2315                         reconstruct_afe()
2316                         adjust_svg_attributes t
2317                         adjust_foreign_attributes t
2318                         insert_foreign_element t, NS_SVG
2319                         if t.flag 'self-closing'
2320                                 open_els.shift()
2321                                 t.acknowledge_self_closing()
2322                         return
2323                 if t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'frame' or t.name is 'head' or t.name is 'tbody' or t.name is 'td' or t.name is 'tfoot' or t.name is 'th' or t.name is 'thead' or t.name is 'tr')
2324                         parse_error()
2325                         return
2326                 if t.type is TYPE_START_TAG # any other start tag
2327                         reconstruct_afe()
2328                         insert_html_element t
2329                         return
2330                 if t.type is TYPE_END_TAG # any other end tag
2331                         in_body_any_other_end_tag t.name
2332                         return
2333                 return
2334
2335         # 8.2.5.4.8 http://www.w3.org/TR/html5/syntax.html#parsing-main-incdata
2336         ins_mode_text = (t) ->
2337                 if t.type is TYPE_TEXT
2338                         insert_character t
2339                         return
2340                 if t.type is TYPE_EOF
2341                         parse_error()
2342                         if open_els[0].name is 'script' and open_els[0].namespace is NS_HTML
2343                                 open_els[0].flag 'already started', true
2344                         open_els.shift()
2345                         ins_mode = original_ins_mode
2346                         process_token t
2347                         return
2348                 if t.type is TYPE_END_TAG and t.name is 'script'
2349                         open_els.shift()
2350                         ins_mode = original_ins_mode
2351                         # fixfull the spec seems to assume that I'm going to run the script
2352                         # http://www.w3.org/TR/html5/syntax.html#scriptEndTag
2353                         return
2354                 if t.type is TYPE_END_TAG
2355                         open_els.shift()
2356                         ins_mode = original_ins_mode
2357                         return
2358                 console.log 'warning: end of ins_mode_text reached'
2359
2360         # the functions below implement the tokenizer stats described here:
2361         # http://www.w3.org/TR/html5/syntax.html#tokenization
2362
2363         # 8.2.5.4.9 http://www.w3.org/TR/html5/syntax.html#parsing-main-intable
2364         ins_mode_in_table_else = (t) ->
2365                 parse_error()
2366                 flag_foster_parenting = true
2367                 ins_mode_in_body t
2368                 flag_foster_parenting = false
2369                 return
2370         ins_mode_in_table = (t) ->
2371                 switch t.type
2372                         when TYPE_TEXT
2373                                 if (open_els[0].name is 'table' or open_els[0].name is 'tbody' or open_els[0].name is 'tfoot' or open_els[0].name is 'thead' or open_els[0].name is 'tr') and open_els[0].namespace is NS_HTML
2374                                         pending_table_character_tokens = []
2375                                         original_ins_mode = ins_mode
2376                                         ins_mode = ins_mode_in_table_text
2377                                         process_token t
2378                                 else
2379                                         ins_mode_in_table_else t
2380                         when TYPE_COMMENT
2381                                 insert_comment t
2382                         when TYPE_DOCTYPE
2383                                 parse_error()
2384                         when TYPE_START_TAG
2385                                 switch t.name
2386                                         when 'caption'
2387                                                 clear_stack_to_table_context()
2388                                                 afe_push_marker()
2389                                                 insert_html_element t
2390                                                 ins_mode = ins_mode_in_caption
2391                                         when 'colgroup'
2392                                                 clear_stack_to_table_context()
2393                                                 insert_html_element t
2394                                                 ins_mode = ins_mode_in_column_group
2395                                         when 'col'
2396                                                 clear_stack_to_table_context()
2397                                                 insert_html_element new_open_tag 'colgroup'
2398                                                 ins_mode = ins_mode_in_column_group
2399                                                 process_token t
2400                                         when 'tbody', 'tfoot', 'thead'
2401                                                 clear_stack_to_table_context()
2402                                                 insert_html_element t
2403                                                 ins_mode = ins_mode_in_table_body
2404                                         when 'td', 'th', 'tr'
2405                                                 clear_stack_to_table_context()
2406                                                 insert_html_element new_open_tag 'tbody'
2407                                                 ins_mode = ins_mode_in_table_body
2408                                                 process_token t
2409                                         when 'table'
2410                                                 parse_error()
2411                                                 if is_in_table_scope 'table', NS_HTML
2412                                                         loop
2413                                                                 el = open_els.shift()
2414                                                                 if el.name is 'table' and el.namespace is NS_HTML
2415                                                                         break
2416                                                         reset_ins_mode()
2417                                                         process_token t
2418                                         when 'style', 'script', 'template'
2419                                                 ins_mode_in_head t
2420                                         when 'input'
2421                                                 unless is_input_hidden_tok t
2422                                                         ins_mode_in_table_else t
2423                                                 else
2424                                                         parse_error()
2425                                                         el = insert_html_element t
2426                                                         open_els.shift()
2427                                                         t.acknowledge_self_closing()
2428                                         when 'form'
2429                                                 parse_error()
2430                                                 if form_element_pointer?
2431                                                         return
2432                                                 if template_tag_is_open()
2433                                                         return
2434                                                 form_element_pointer = insert_html_element t
2435                                                 open_els.shift()
2436                                         else
2437                                                 ins_mode_in_table_else t
2438                         when TYPE_END_TAG
2439                                 switch t.name
2440                                         when 'table'
2441                                                 if is_in_table_scope 'table', NS_HTML
2442                                                         loop
2443                                                                 el = open_els.shift()
2444                                                                 if el.name is 'table' and el.namespace is NS_HTML
2445                                                                         break
2446                                                         reset_ins_mode()
2447                                                 else
2448                                                         parse_error()
2449                                         when 'body', 'caption', 'col', 'colgroup', 'html', 'tbody', 'td', 'tfoot', 'th', 'thead', 'tr'
2450                                                 parse_error()
2451                                         when 'template'
2452                                                 ins_mode_in_head t
2453                                         else
2454                                                 ins_mode_in_table_else t
2455                         when TYPE_EOF
2456                                 ins_mode_in_body t
2457                         else
2458                                 ins_mode_in_table_else t
2459
2460
2461         # 8.2.5.4.10 http://www.w3.org/TR/html5/syntax.html#parsing-main-intabletext
2462         ins_mode_in_table_text = (t) ->
2463                 if t.type is TYPE_TEXT and t.text is "\u0000"
2464                         # from javascript?
2465                         parse_error()
2466                         return
2467                 if t.type is TYPE_TEXT
2468                         pending_table_character_tokens.push t
2469                         return
2470                 # Anything else
2471                 all_space = true
2472                 for old in pending_table_character_tokens
2473                         unless is_space_tok old
2474                                 all_space = false
2475                                 break
2476                 if all_space
2477                         for old in pending_table_character_tokens
2478                                 insert_character old
2479                 else
2480                         for old in pending_table_character_tokens
2481                                 ins_mode_in_table_else old
2482                 pending_table_character_tokens = []
2483                 ins_mode = original_ins_mode
2484                 process_token t
2485
2486         # 8.2.5.4.11 http://www.w3.org/TR/html5/syntax.html#parsing-main-incaption
2487         ins_mode_in_caption = (t) ->
2488                 if t.type is TYPE_END_TAG and t.name is 'caption'
2489                         if is_in_table_scope 'caption', NS_HTML
2490                                 generate_implied_end_tags()
2491                                 if open_els[0].name isnt 'caption'
2492                                         parse_error()
2493                                 loop
2494                                         el = open_els.shift()
2495                                         if el.name is 'caption' and el.namespace is NS_HTML
2496                                                 break
2497                                 clear_afe_to_marker()
2498                                 ins_mode = ins_mode_in_table
2499                         else
2500                                 parse_error()
2501                                 # fragment case
2502                         return
2503                 if (t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'tbody' or t.name is 'td' or t.name is 'tfoot' or t.name is 'th' or t.name is 'thead' or t.name is 'tr')) or t.type is TYPE_END_TAG and t.name is 'table'
2504                         parse_error()
2505                         if is_in_table_scope 'caption', NS_HTML
2506                                 loop
2507                                         el = open_els.shift()
2508                                         if el.name is 'caption' and el.namespace is NS_HTML
2509                                                 break
2510                                 clear_afe_to_marker()
2511                                 ins_mode = ins_mode_in_table
2512                                 process_token t
2513                         # else fragment case
2514                         return
2515                 if t.type is TYPE_END_TAG and (t.name is 'body' or t.name is 'col' or t.name is 'colgroup' or t.name is 'html' or t.name is 'tbody' or t.name is 'td' or t.name is 'tfoot' or t.name is 'th' or t.name is 'thead' or t.name is 'tr')
2516                         parse_error()
2517                         return
2518                 # Anything else
2519                 ins_mode_in_body t
2520
2521         # 8.2.5.4.12 http://www.w3.org/TR/html5/syntax.html#parsing-main-incolgroup
2522         ins_mode_in_column_group = (t) ->
2523                 if is_space_tok t
2524                         insert_character t
2525                         return
2526                 if t.type is TYPE_COMMENT
2527                         insert_comment t
2528                         return
2529                 if t.type is TYPE_DOCTYPE
2530                         parse_error()
2531                         return
2532                 if t.type is TYPE_START_TAG and t.name is 'html'
2533                         ins_mode_in_body t
2534                         return
2535                 if t.type is TYPE_START_TAG and t.name is 'col'
2536                         el = insert_html_element t
2537                         open_els.shift()
2538                         t.acknowledge_self_closing()
2539                         return
2540                 if t.type is TYPE_END_TAG and t.name is 'colgroup'
2541                         if open_els[0].name is 'colgroup' and open_els.namespace is NS_HTML
2542                                 open_els.shift()
2543                                 ins_mode = ins_mode_in_table
2544                         else
2545                                 parse_error()
2546                         return
2547                 if t.type is TYPE_END_TAG and t.name is 'col'
2548                         parse_error()
2549                         return
2550                 if (t.type is TYPE_START_TAG or t.type is TYPE_END_TAG) and t.name is 'template'
2551                         ins_mode_in_head t
2552                         return
2553                 if t.type is TYPE_EOF
2554                         ins_mode_in_body t
2555                         return
2556                 # Anything else
2557                 if open_els[0].name isnt 'colgroup'
2558                         parse_error()
2559                         return
2560                 open_els.shift()
2561                 ins_mode = ins_mode_in_table
2562                 process_token t
2563                 return
2564
2565         # 8.2.5.4.13 http://www.w3.org/TR/html5/syntax.html#parsing-main-intbody
2566         ins_mode_in_table_body = (t) ->
2567                 if t.type is TYPE_START_TAG and t.name is 'tr'
2568                         clear_stack_to_table_body_context()
2569                         insert_html_element t
2570                         ins_mode = ins_mode_in_row
2571                         return
2572                 if t.type is TYPE_START_TAG and (t.name is 'th' or t.name is 'td')
2573                         parse_error()
2574                         clear_stack_to_table_body_context()
2575                         insert_html_element new_open_tag 'tr'
2576                         ins_mode = ins_mode_in_row
2577                         process_token t
2578                         return
2579                 if t.type is TYPE_END_TAG and (t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead')
2580                         unless is_in_table_scope t.name, NS_HTML
2581                                 parse_error()
2582                                 return
2583                         clear_stack_to_table_body_context()
2584                         open_els.shift()
2585                         ins_mode = ins_mode_in_table
2586                         return
2587                 if (t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead')) or (t.type is TYPE_END_TAG and t.name is 'table')
2588                         has = false
2589                         for el in open_els
2590                                 if el.namespace is NS_HTML and (el.name is 'tbody' or el.name is 'tfoot' or el.name is 'thead')
2591                                         has = true
2592                                         break
2593                                 if table_scopers[el.name] is el.namespace
2594                                         break
2595                         if !has
2596                                 parse_error()
2597                                 return
2598                         clear_stack_to_table_body_context()
2599                         open_els.shift()
2600                         ins_mode = ins_mode_in_table
2601                         process_token t
2602                         return
2603                 if t.type is TYPE_END_TAG and (t.name is 'body' or t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'html' or t.name is 'td' or t.name is 'th' or t.name is 'tr')
2604                         parse_error()
2605                         return
2606                 # Anything else
2607                 ins_mode_in_table t
2608
2609         # 8.2.5.4.14 http://www.w3.org/TR/html5/syntax.html#parsing-main-intr
2610         ins_mode_in_row = (t) ->
2611                 if t.type is TYPE_START_TAG and (t.name is 'th' or t.name is 'td')
2612                         clear_stack_to_table_row_context()
2613                         insert_html_element t
2614                         ins_mode = ins_mode_in_cell
2615                         afe_push_marker()
2616                         return
2617                 if t.type is TYPE_END_TAG and t.name is 'tr'
2618                         if is_in_table_scope 'tr', NS_HTML
2619                                 clear_stack_to_table_row_context()
2620                                 open_els.shift()
2621                                 ins_mode = ins_mode_in_table_body
2622                         else
2623                                 parse_error()
2624                         return
2625                 if (t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead' or t.name is 'tr')) or t.type is TYPE_END_TAG and t.name is 'table'
2626                         if is_in_table_scope 'tr', NS_HTML
2627                                 clear_stack_to_table_row_context()
2628                                 open_els.shift()
2629                                 ins_mode = ins_mode_in_table_body
2630                                 process_token t
2631                         else
2632                                 parse_error()
2633                         return
2634                 if t.type is TYPE_END_TAG and (t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead')
2635                         if is_in_table_scope t.name, NS_HTML
2636                                 if is_in_table_scope 'tr', NS_HTML
2637                                         clear_stack_to_table_row_context()
2638                                         open_els.shift()
2639                                         ins_mode = ins_mode_in_table_body
2640                                         process_token t
2641                         else
2642                                 parse_error()
2643                         return
2644                 if t.type is TYPE_END_TAG and (t.name is 'body' or t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'html' or t.name is 'td' or t.name is 'th')
2645                         parse_error()
2646                         return
2647                 # Anything else
2648                 ins_mode_in_table t
2649
2650         # http://www.w3.org/TR/html5/syntax.html#close-the-cell
2651         close_the_cell = ->
2652                 generate_implied_end_tags()
2653                 unless (open_els[0].name is 'td' or open_els[0] is 'th') and open_els[0].namespace is NS_HTML
2654                         parse_error()
2655                 loop
2656                         el = open_els.shift()
2657                         if el.namespace is NS_HTML and (el.name is 'td' or el.name is 'th')
2658                                 break
2659                 clear_afe_to_marker()
2660                 ins_mode = ins_mode_in_row
2661
2662         # 8.2.5.4.15 http://www.w3.org/TR/html5/syntax.html#parsing-main-intd
2663         ins_mode_in_cell = (t) ->
2664                 if t.type is TYPE_END_TAG and (t.name is 'td' or t.name is 'th')
2665                         if is_in_table_scope t.name, NS_HTML
2666                                 generate_implied_end_tags()
2667                                 unless (open_els[0].name is t.name) and open_els[0].namespace is NS_HTML
2668                                         parse_error()
2669                                 loop
2670                                         el = open_els.shift()
2671                                         if el.name is t.name and el.namespace is NS_HTML
2672                                                 break
2673                                 clear_afe_to_marker()
2674                                 ins_mode = ins_mode_in_row
2675                         else
2676                                 parse_error()
2677                         return
2678                 if t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'tbody' or t.name is 'td' or t.name is 'tfoot' or t.name is 'th' or t.name is 'thead' or t.name is 'tr')
2679                         has = false
2680                         for el in open_els
2681                                 if el.namespace is NS_HTML and (el.name is 'td' or el.name is 'th')
2682                                         has = true
2683                                         break
2684                                 if table_scopers[el.name] is el.namespace
2685                                         break
2686                         if !has
2687                                 parse_error()
2688                                 return
2689                         close_the_cell()
2690                         process_token t
2691                         return
2692                 if t.type is TYPE_END_TAG and (t.name is 'body' or t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'html')
2693                         parse_error()
2694                         return
2695                 if t.type is TYPE_END_TAG and (t.name is 'table' or t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead' or t.name is 'tr')
2696                         if is_in_table_scope t.name, NS_HTML
2697                                 close_the_cell()
2698                                 process_token t
2699                         else
2700                                 parse_error()
2701                         return
2702                 # Anything Else
2703                 ins_mode_in_body t
2704
2705         # 8.2.5.4.16 http://www.w3.org/TR/html5/syntax.html#parsing-main-inselect
2706         ins_mode_in_select = (t) ->
2707                 if t.type is TYPE_TEXT and t.text is "\u0000"
2708                         parse_error()
2709                         return
2710                 if t.type is TYPE_TEXT
2711                         insert_character t
2712                         return
2713                 if t.type is TYPE_COMMENT
2714                         insert_comment t
2715                         return
2716                 if t.type is TYPE_DOCTYPE
2717                         parse_error()
2718                         return
2719                 if t.type is TYPE_START_TAG and t.name is 'html'
2720                         ins_mode_in_body t
2721                         return
2722                 if t.type is TYPE_START_TAG and t.name is 'option'
2723                         if open_els[0].name is 'option' and open_els[0].namespace is NS_HTML
2724                                 open_els.shift()
2725                         insert_html_element t
2726                         return
2727                 if t.type is TYPE_START_TAG and t.name is 'optgroup'
2728                         if open_els[0].name is 'option' and open_els[0].namespace is NS_HTML
2729                                 open_els.shift()
2730                         if open_els[0].name is 'optgroup' and open_els[0].namespace is NS_HTML
2731                                 open_els.shift()
2732                         insert_html_element t
2733                         return
2734                 if t.type is TYPE_END_TAG and t.name is 'optgroup'
2735                         if open_els[0].name is 'option' and open_els[0].namespace is NS_HTML
2736                                 if open_els[1].name is 'optgroup' and open_els[0].namespace is NS_HTML
2737                                         open_els.shift()
2738                         if open_els[0].name is 'optgroup' and open_els[0].namespace is NS_HTML
2739                                 open_els.shift()
2740                         else
2741                                 parse_error()
2742                         return
2743                 if t.type is TYPE_END_TAG and t.name is 'option'
2744                         if open_els[0].name is 'option' and open_els[0].namespace is NS_HTML
2745                                 open_els.shift()
2746                         else
2747                                 parse_error()
2748                         return
2749                 if t.type is TYPE_END_TAG and t.name is 'select'
2750                         if is_in_select_scope 'select', NS_HTML
2751                                 loop
2752                                         el = open_els.shift()
2753                                         if el.name is 'select' and el.namespace is NS_HTML
2754                                                 break
2755                                 reset_ins_mode()
2756                         else
2757                                 parse_error()
2758                         return
2759                 if t.type is TYPE_START_TAG and t.name is 'select'
2760                         parse_error()
2761                         loop
2762                                 el = open_els.shift()
2763                                 if el.name is 'select' and el.namespace is NS_HTML
2764                                         break
2765                         reset_ins_mode()
2766                         # spec says that this is the same as </select> but it doesn't say
2767                         # to check scope first
2768                         return
2769                 if t.type is TYPE_START_TAG and (t.name is 'input' or t.name is 'keygen' or t.name is 'textarea')
2770                         parse_error()
2771                         unless is_in_select_scope 'select', NS_HTML
2772                                 return
2773                         loop
2774                                 el = open_els.shift()
2775                                 if el.name is 'select' and el.namespace is NS_HTML
2776                                         break
2777                         reset_ins_mode()
2778                         process_token t
2779                         return
2780                 if t.type is TYPE_START_TAG and (t.name is 'script' or t.name is 'template')
2781                         ins_mode_in_head t
2782                         return
2783                 if t.type is TYPE_EOF
2784                         ins_mode_in_body t
2785                         return
2786                 # Anything else
2787                 parse_error()
2788                 return
2789
2790         # 8.2.5.4.17 http://www.w3.org/TR/html5/syntax.html#parsing-main-inselectintable
2791         ins_mode_in_select_in_table = (t) ->
2792                 if t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'table' or t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead' or t.name is 'tr' or t.name is 'td' or t.name is 'th')
2793                         parse_error()
2794                         loop
2795                                 el = open_els.shift()
2796                                 if el.name is 'select' and el.namespace is NS_HTML
2797                                         break
2798                         reset_ins_mode()
2799                         process_token t
2800                         return
2801                 if t.type is TYPE_END_TAG and (t.name is 'caption' or t.name is 'table' or t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead' or t.name is 'tr' or t.name is 'td' or t.name is 'th')
2802                         parse_error()
2803                         unless is_in_table_scope t.name, NS_HTML
2804                                 return
2805                         loop
2806                                 el = open_els.shift()
2807                                 if el.name is 'select' and el.namespace is NS_HTML
2808                                         break
2809                         reset_ins_mode()
2810                         process_token t
2811                         return
2812                 # Anything else
2813                 ins_mode_in_select t
2814                 return
2815
2816         # 8.2.5.4.18 http://www.w3.org/TR/html5/syntax.html#parsing-main-intemplate
2817         ins_mode_in_template = (t) ->
2818                 if t.type is TYPE_TEXT or t.type is TYPE_COMMENT or t.type is TYPE_DOCTYPE
2819                         ins_mode_in_body t
2820                         return
2821                 if (t.type is TYPE_START_TAG and (t.name is 'base' or t.name is 'basefont' or t.name is 'bgsound' or t.name is 'link' or t.name is 'meta' or t.name is 'noframes' or t.name is 'script' or t.name is 'style' or t.name is 'template' or t.name is 'title')) or (t.type is TYPE_END_TAG and t.name is 'template')
2822                         ins_mode_in_head t
2823                         return
2824                 if t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'colgroup' or t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead')
2825                         template_ins_modes.shift()
2826                         template_ins_modes.unshift ins_mode_in_table
2827                         ins_mode = ins_mode_in_table
2828                         process_token t
2829                         return
2830                 if t.type is TYPE_START_TAG and t.name is 'col'
2831                         template_ins_modes.shift()
2832                         template_ins_modes.unshift ins_mode_in_column_group
2833                         ins_mode = ins_mode_in_column_group
2834                         process_token t
2835                         return
2836                 if t.type is TYPE_START_TAG and t.name is 'tr'
2837                         template_ins_modes.shift()
2838                         template_ins_modes.unshift ins_mode_in_table_body
2839                         ins_mode = ins_mode_in_table_body
2840                         process_token t
2841                         return
2842                 if t.type is TYPE_START_TAG and (t.name is 'td' or t.name is 'th')
2843                         template_ins_modes.shift()
2844                         template_ins_modes.unshift ins_mode_in_row
2845                         ins_mode = ins_mode_in_row
2846                         process_token t
2847                         return
2848                 if t.type is TYPE_START_TAG
2849                         template_ins_modes.shift()
2850                         template_ins_modes.unshift ins_mode_in_body
2851                         ins_mode = ins_mode_in_body
2852                         process_token t
2853                         return
2854                 if t.type is TYPE_END_TAG
2855                         parse_error()
2856                         return
2857                 if t.type is TYPE_EOF
2858                         unless template_tag_is_open()
2859                                 stop_parsing()
2860                                 return
2861                         parse_error()
2862                         loop
2863                                 el = open_els.shift()
2864                                 if el.name is 'template' and el.namespace is NS_HTML
2865                                         break
2866                         clear_afe_to_marker()
2867                         template_ins_modes.shift()
2868                         reset_ins_mode()
2869                         process_token t
2870
2871         # 8.2.5.4.19 http://www.w3.org/TR/html5/syntax.html#parsing-main-afterbody
2872         ins_mode_after_body = (t) ->
2873                 if is_space_tok t
2874                         ins_mode_in_body t
2875                         return
2876                 if t.type is TYPE_COMMENT
2877                         first = open_els[open_els.length - 1]
2878                         insert_comment t, [first, first.children.length]
2879                         return
2880                 if t.type is TYPE_DOCTYPE
2881                         parse_error()
2882                         return
2883                 if t.type is TYPE_START_TAG and t.name is 'html'
2884                         ins_mode_in_body t
2885                         return
2886                 if t.type is TYPE_END_TAG and t.name is 'html'
2887                         if flag_fragment_parsing
2888                                 parse_error()
2889                                 return
2890                         ins_mode = ins_mode_after_after_body
2891                         return
2892                 if t.type is TYPE_EOF
2893                         stop_parsing()
2894                         return
2895                 # Anything ELse
2896                 parse_error()
2897                 ins_mode = ins_mode_in_body
2898                 process_token t
2899
2900         # 8.2.5.4.20 http://www.w3.org/TR/html5/syntax.html#parsing-main-inframeset
2901         ins_mode_in_frameset = (t) ->
2902                 if is_space_tok t
2903                         insert_character t
2904                         return
2905                 if t.type is TYPE_COMMENT
2906                         insert_comment t
2907                         return
2908                 if t.type is TYPE_DOCTYPE
2909                         parse_error()
2910                         return
2911                 if t.type is TYPE_START_TAG and t.name is 'html'
2912                         ins_mode_in_body t
2913                         return
2914                 if t.type is TYPE_START_TAG and t.name is 'frameset'
2915                         insert_html_element t
2916                         return
2917                 if t.type is TYPE_END_TAG and t.name is 'frameset'
2918                         if open_els.length is 1
2919                                 parse_error()
2920                                 return # fragment case
2921                         open_els.shift()
2922                         if flag_fragment_parsing is false and open_els[0].name isnt 'frameset'
2923                                 ins_mode = ins_mode_after_frameset
2924                         return
2925                 if t.type is TYPE_START_TAG and t.name is 'frame'
2926                         insert_html_element t
2927                         open_els.shift()
2928                         t.acknowledge_self_closing()
2929                         return
2930                 if t.type is TYPE_START_TAG and t.name is 'noframes'
2931                         ins_mode_in_head t
2932                         return
2933                 if t.type is TYPE_EOF
2934                         if open_els.length isnt 1
2935                                 parse_error()
2936                         stop_parsing()
2937                         return
2938                 # Anything else
2939                 parse_error()
2940                 return
2941
2942         # 8.2.5.4.21 http://www.w3.org/TR/html5/syntax.html#parsing-main-afterframeset
2943         ins_mode_after_frameset = (t) ->
2944                 if is_space_tok t
2945                         insert_character t
2946                         return
2947                 if t.type is TYPE_COMMENT
2948                         insert_comment t
2949                         return
2950                 if t.type is TYPE_DOCTYPE
2951                         parse_error()
2952                         return
2953                 if t.type is TYPE_START_TAG and t.name is 'html'
2954                         ins_mode_in_body t
2955                         return
2956                 if t.type is TYPE_END_TAG and t.name is 'html'
2957                         ins_mode = ins_mode_after_after_frameset
2958                         return
2959                 if t.type is TYPE_START_TAG and t.name is 'noframes'
2960                         ins_mode_in_head t
2961                         return
2962                 if t.type is TYPE_EOF
2963                         stop_parsing()
2964                         return
2965                 # Anything else
2966                 parse_error()
2967                 return
2968
2969         # 8.2.5.4.22 http://www.w3.org/TR/html5/syntax.html#the-after-after-body-insertion-mode
2970         ins_mode_after_after_body = (t) ->
2971                 if t.type is TYPE_COMMENT
2972                         insert_comment t, [doc, doc.children.length]
2973                         return
2974                 if t.type is TYPE_DOCTYPE or is_space_tok(t) or (t.type is TYPE_START_TAG and t.name is 'html')
2975                         ins_mode_in_body t
2976                         return
2977                 if t.type is TYPE_EOF
2978                         stop_parsing()
2979                         return
2980                 # Anything else
2981                 parse_error()
2982                 ins_mode = ins_mode_in_body
2983                 process_token t
2984                 return
2985
2986         # 8.2.5.4.23 http://www.w3.org/TR/html5/syntax.html#the-after-after-frameset-insertion-mode
2987         ins_mode_after_after_frameset = (t) ->
2988                 if t.type is TYPE_COMMENT
2989                         insert_comment t, [doc, doc.children.length]
2990                         return
2991                 if t.type is TYPE_DOCTYPE or is_space_tok(t) or (t.type is TYPE_START_TAG and t.name is 'html')
2992                         ins_mode_in_body t
2993                         return
2994                 if t.type is TYPE_EOF
2995                         stop_parsing()
2996                         return
2997                 if t.type is TYPE_START_TAG and t.name is 'noframes'
2998                         ins_mode_in_head t
2999                         return
3000                 # Anything else
3001                 parse_error()
3002                 return
3003
3004         # 8.2.5.5 http://www.w3.org/TR/html5/syntax.html#parsing-main-inforeign
3005         has_color_face_or_size = (t) ->
3006                 for a in t.attrs_a
3007                         if a[0] is 'color' or a[0] is 'face' or a[0] is 'size'
3008                                 return true
3009                 return false
3010         in_foreign_content_end_script = ->
3011                 open_els.shift()
3012                 # fixfull
3013                 return
3014         in_foreign_content_other_start = (t) ->
3015                 acn = adjusted_current_node()
3016                 if acn.namespace is NS_MATHML
3017                         adjust_mathml_attributes t
3018                 if acn.namespace is NS_SVG and svg_name_fixes[t.name]?
3019                         t.name = svg_name_fixes[t.name]
3020                 if acn.namespace is NS_SVG
3021                         adjust_svg_attributes t
3022                 adjust_foreign_attributes t
3023                 insert_foreign_element t, acn.namespace
3024                 if t.flag 'self-closing'
3025                         if t.name is 'script'
3026                                 t.acknowledge_self_closing()
3027                                 in_foreign_content_end_script()
3028                                 # fixfull
3029                         else
3030                                 open_els.shift()
3031                                 t.acknowledge_self_closing()
3032                 return
3033         in_foreign_content = (t) ->
3034                 if t.type is TYPE_TEXT and t.text is "\u0000"
3035                         parse_error()
3036                         insert_character new_character_token "\ufffd"
3037                         return
3038                 if is_space_tok t
3039                         insert_character t
3040                         return
3041                 if t.type is TYPE_TEXT
3042                         flag_frameset_ok = false
3043                         insert_character t
3044                         return
3045                 if t.type is TYPE_COMMENT
3046                         insert_comment t
3047                         return
3048                 if t.type is TYPE_DOCTYPE
3049                         parse_error()
3050                         return
3051                 if t.type is TYPE_START_TAG and (t.name is 'b' or t.name is 'big' or t.name is 'blockquote' or t.name is 'body' or t.name is 'br' or t.name is 'center' or t.name is 'code' or t.name is 'dd' or t.name is 'div' or t.name is 'dl' or t.name is 'dt' or t.name is 'em' or t.name is 'embed' or t.name is 'h1' or t.name is 'h2' or t.name is 'h3' or t.name is 'h4' or t.name is 'h5' or t.name is 'h6' or t.name is 'head' or t.name is 'hr' or t.name is 'i' or t.name is 'img' or t.name is 'li' or t.name is 'listing' or t.name is 'main' or t.name is 'meta' or t.name is 'nobr' or t.name is 'ol' or t.name is 'p' or t.name is 'pre' or t.name is 'ruby' or t.name is 's' or t.name is 'small' or t.name is 'span' or t.name is 'strong' or t.name is 'strike' or t.name is 'sub' or t.name is 'sup' or t.name is 'table' or t.name is 'tt' or t.name is 'u' or t.name is 'ul' or t.name is 'var' or (t.name is 'font' and has_color_face_or_size(t)))
3052                         parse_error()
3053                         if flag_fragment_parsing
3054                                 in_foreign_content_other_start t
3055                                 return
3056                         loop # is this safe?
3057                                 open_els.shift()
3058                                 if is_mathml_text_integration_point(open_els[0]) or is_html_integration(open_els[0]) or open_els[0].namespace is NS_HTML
3059                                         break
3060                         process_token t
3061                         return
3062                 if t.type is TYPE_START_TAG
3063                         in_foreign_content_other_start t
3064                         return
3065                 if t.type is TYPE_END_TAG and t.name is 'script' and open_els[0].name is 'script' and open_els[0].namespace is NS_SVG
3066                         in_foreign_content_end_script()
3067                         return
3068                 if t.type is TYPE_END_TAG
3069                         i = 0
3070                         node = open_els[i]
3071                         if node.name.toLowerCase() isnt t.name
3072                                 parse_error()
3073                         loop
3074                                 if node is open_els[open_els.length - 1]
3075                                         return
3076                                 if node.name.toLowerCase() is t.name
3077                                         loop
3078                                                 el = open_els.shift()
3079                                                 if el is node
3080                                                         return
3081                                 i += 1
3082                                 node = open_els[i]
3083                                 if node.namespace is NS_HTML
3084                                         break
3085                         ins_mode t # explicitly call HTML insertion mode
3086
3087
3088         # 8.2.4.1 http://www.w3.org/TR/html5/syntax.html#data-state
3089         tok_state_data = ->
3090                 switch c = txt.charAt(cur++)
3091                         when '&'
3092                                 return new_text_node parse_character_reference()
3093                         when '<'
3094                                 tok_state = tok_state_tag_open
3095                         when "\u0000"
3096                                 parse_error()
3097                                 return new_text_node c
3098                         when '' # EOF
3099                                 return new_eof_token()
3100                         else
3101                                 return new_text_node c
3102                 return null
3103
3104         # 8.2.4.2 http://www.w3.org/TR/html5/syntax.html#character-reference-in-data-state
3105         # not needed: tok_state_character_reference_in_data = ->
3106         # just call parse_character_reference()
3107
3108         # 8.2.4.3 http://www.w3.org/TR/html5/syntax.html#rcdata-state
3109         tok_state_rcdata = ->
3110                 switch c = txt.charAt(cur++)
3111                         when '&'
3112                                 return new_text_node parse_character_reference()
3113                         when '<'
3114                                 tok_state = tok_state_rcdata_less_than_sign
3115                         when "\u0000"
3116                                 parse_error()
3117                                 return new_character_token "\ufffd"
3118                         when '' # EOF
3119                                 return new_eof_token()
3120                         else
3121                                 return new_character_token c
3122                 return null
3123
3124         # 8.2.4.4 http://www.w3.org/TR/html5/syntax.html#character-reference-in-rcdata-state
3125         # not needed: tok_state_character_reference_in_rcdata = ->
3126         # just call parse_character_reference()
3127
3128         # 8.2.4.5 http://www.w3.org/TR/html5/syntax.html#rawtext-state
3129         tok_state_rawtext = ->
3130                 switch c = txt.charAt(cur++)
3131                         when '<'
3132                                 tok_state = tok_state_rawtext_less_than_sign
3133                         when "\u0000"
3134                                 parse_error()
3135                                 return new_character_token "\ufffd"
3136                         when '' # EOF
3137                                 return new_eof_token()
3138                         else
3139                                 return new_character_token c
3140                 return null
3141
3142         # 8.2.4.6 http://www.w3.org/TR/html5/syntax.html#script-data-state
3143         tok_state_script_data = ->
3144                 switch c = txt.charAt(cur++)
3145                         when '<'
3146                                 tok_state = tok_state_script_data_less_than_sign
3147                         when "\u0000"
3148                                 parse_error()
3149                                 return new_character_token "\ufffd"
3150                         when '' # EOF
3151                                 return new_eof_token()
3152                         else
3153                                 return new_character_token c
3154                 return null
3155
3156         # 8.2.4.7 http://www.w3.org/TR/html5/syntax.html#plaintext-state
3157         tok_state_plaintext = ->
3158                 switch c = txt.charAt(cur++)
3159                         when "\u0000"
3160                                 parse_error()
3161                                 return new_character_token "\ufffd"
3162                         when '' # EOF
3163                                 return new_eof_token()
3164                         else
3165                                 return new_character_token c
3166                 return null
3167
3168
3169         # 8.2.4.8 http://www.w3.org/TR/html5/syntax.html#tag-open-state
3170         tok_state_tag_open = ->
3171                 c = txt.charAt(cur++)
3172                 if c is '!'
3173                         tok_state = tok_state_markup_declaration_open
3174                         return
3175                 if c is '/'
3176                         tok_state = tok_state_end_tag_open
3177                         return
3178                 if is_uc_alpha(c)
3179                         tok_cur_tag = new_open_tag c.toLowerCase()
3180                         tok_state = tok_state_tag_name
3181                         return
3182                 if is_lc_alpha(c)
3183                         tok_cur_tag = new_open_tag c
3184                         tok_state = tok_state_tag_name
3185                         return
3186                 if c is '?'
3187                         parse_error()
3188                         tok_cur_tag = new_comment_token '?' # FIXME right?
3189                         tok_state = tok_state_bogus_comment
3190                         return
3191                 # Anything else
3192                 parse_error()
3193                 tok_state = tok_state_data
3194                 cur -= 1 # we didn't parse/handle the char after <
3195                 return new_text_node '<'
3196
3197         # 8.2.4.9 http://www.w3.org/TR/html5/syntax.html#end-tag-open-state
3198         tok_state_end_tag_open = ->
3199                 c = txt.charAt(cur++)
3200                 if is_uc_alpha(c)
3201                         tok_cur_tag = new_end_tag c.toLowerCase()
3202                         tok_state = tok_state_tag_name
3203                         return
3204                 if is_lc_alpha(c)
3205                         tok_cur_tag = new_end_tag c
3206                         tok_state = tok_state_tag_name
3207                         return
3208                 if c is '>'
3209                         parse_error()
3210                         tok_state = tok_state_data
3211                         return
3212                 if c is '' # EOF
3213                         parse_error()
3214                         tok_state = tok_state_data
3215                         return new_text_node '</'
3216                 # Anything else
3217                 parse_error()
3218                 tok_cur_tag = new_comment_token c
3219                 tok_state = tok_state_bogus_comment
3220                 return null
3221
3222         # 8.2.4.10 http://www.w3.org/TR/html5/syntax.html#tag-name-state
3223         tok_state_tag_name = ->
3224                 switch c = txt.charAt(cur++)
3225                         when "\t", "\n", "\u000c", ' '
3226                                 tok_state = tok_state_before_attribute_name
3227                         when '/'
3228                                 tok_state = tok_state_self_closing_start_tag
3229                         when '>'
3230                                 tok_state = tok_state_data
3231                                 tmp = tok_cur_tag
3232                                 tok_cur_tag = null
3233                                 return tmp
3234                         when "\u0000"
3235                                 parse_error()
3236                                 tok_cur_tag.name += "\ufffd"
3237                         when '' # EOF
3238                                 parse_error()
3239                                 tok_state = tok_state_data
3240                         else
3241                                 if is_uc_alpha(c)
3242                                         tok_cur_tag.name += c.toLowerCase()
3243                                 else
3244                                         tok_cur_tag.name += c
3245                 return null
3246
3247         # 8.2.4.11 http://www.w3.org/TR/html5/syntax.html#rcdata-less-than-sign-state
3248         tok_state_rcdata_less_than_sign = ->
3249                 c = txt.charAt(cur++)
3250                 if c is '/'
3251                         temporary_buffer = ''
3252                         tok_state = tok_state_rcdata_end_tag_open
3253                         return null
3254                 # Anything else
3255                 tok_state = tok_state_rcdata
3256                 cur -= 1 # reconsume the input character
3257                 return new_character_token '<'
3258
3259         # 8.2.4.12 http://www.w3.org/TR/html5/syntax.html#rcdata-end-tag-open-state
3260         tok_state_rcdata_end_tag_open = ->
3261                 c = txt.charAt(cur++)
3262                 if is_uc_alpha(c)
3263                         tok_cur_tag = new_end_tag c.toLowerCase()
3264                         temporary_buffer += c
3265                         tok_state = tok_state_rcdata_end_tag_name
3266                         return null
3267                 if is_lc_alpha(c)
3268                         tok_cur_tag = new_end_tag c
3269                         temporary_buffer += c
3270                         tok_state = tok_state_rcdata_end_tag_name
3271                         return null
3272                 # Anything else
3273                 tok_state = tok_state_rcdata
3274                 cur -= 1 # reconsume the input character
3275                 return new_character_token "</" # fixfull separate these
3276
3277         # http://www.w3.org/TR/html5/syntax.html#appropriate-end-tag-token
3278         is_appropriate_end_tag = (t) ->
3279                 # spec says to check against "the tag name of the last start tag to
3280                 # have been emitted from this tokenizer", but this is only called from
3281                 # the various "raw" states, so it's hopefully ok to assume that
3282                 # open_els[0].name will work instead TODO: verify this after the script
3283                 # data states are implemented
3284                 debug_log "#{t.type}, #{t.name} open_els: #{serialize_els open_els, true, true}"
3285                 return t.type is TYPE_END_TAG and t.name is open_els[0].name
3286
3287         # 8.2.4.13 http://www.w3.org/TR/html5/syntax.html#rcdata-end-tag-name-state
3288         tok_state_rcdata_end_tag_name = ->
3289                 c = txt.charAt(cur++)
3290                 if c is "\t" or c is "\n" or c is "\u000c" or c is ' '
3291                         if is_appropriate_end_tag tok_cur_tag
3292                                 tok_state = tok_state_before_attribute_name
3293                                 return
3294                         # else fall through to "Anything else"
3295                 if c is '/'
3296                         if is_appropriate_end_tag tok_cur_tag
3297                                 tok_state = tok_state_self_closing_start_tag # FIXME spec typo?
3298                                 return
3299                         # else fall through to "Anything else"
3300                 if c is '>'
3301                         if is_appropriate_end_tag tok_cur_tag
3302                                 tok_state = tok_state_data
3303                                 return tok_cur_tag
3304                         # else fall through to "Anything else"
3305                 if is_uc_alpha(c)
3306                         tok_cur_tag.name += c.toLowerCase()
3307                         temporary_buffer += c
3308                         return null
3309                 if is_lc_alpha(c)
3310                         tok_cur_tag.name += c
3311                         temporary_buffer += c
3312                         return null
3313                 # Anything else
3314                 tok_state = tok_state_rcdata
3315                 cur -= 1 # reconsume the input character
3316                 return new_character_token '</' + temporary_buffer # fixfull separate these
3317
3318         # 8.2.4.14 http://www.w3.org/TR/html5/syntax.html#rawtext-less-than-sign-state
3319         tok_state_rawtext_less_than_sign = ->
3320                 c = txt.charAt(cur++)
3321                 if c is '/'
3322                         temporary_buffer = ''
3323                         tok_state = tok_state_rawtext_end_tag_open
3324                         return null
3325                 # Anything else
3326                 tok_state = tok_state_rawtext
3327                 cur -= 1 # reconsume the input character
3328                 return new_character_token '<'
3329
3330         # 8.2.4.15 http://www.w3.org/TR/html5/syntax.html#rawtext-end-tag-open-state
3331         tok_state_rawtext_end_tag_open = ->
3332                 c = txt.charAt(cur++)
3333                 if is_uc_alpha(c)
3334                         tok_cur_tag = new_end_tag c.toLowerCase()
3335                         temporary_buffer += c
3336                         tok_state = tok_state_rawtext_end_tag_name
3337                         return null
3338                 if is_lc_alpha(c)
3339                         tok_cur_tag = new_end_tag c
3340                         temporary_buffer += c
3341                         tok_state = tok_state_rawtext_end_tag_name
3342                         return null
3343                 # Anything else
3344                 tok_state = tok_state_rawtext
3345                 cur -= 1 # reconsume the input character
3346                 return new_character_token "</" # fixfull separate these
3347
3348         # 8.2.4.16 http://www.w3.org/TR/html5/syntax.html#rawtext-end-tag-name-state
3349         tok_state_rawtext_end_tag_name = ->
3350                 c = txt.charAt(cur++)
3351                 if c is "\t" or c is "\n" or c is "\u000c" or c is ' '
3352                         if is_appropriate_end_tag tok_cur_tag
3353                                 tok_state = tok_state_before_attribute_name
3354                                 return
3355                         # else fall through to "Anything else"
3356                 if c is '/'
3357                         if is_appropriate_end_tag tok_cur_tag
3358                                 tok_state = tok_state_self_closing_start_tag
3359                                 return
3360                         # else fall through to "Anything else"
3361                 if c is '>'
3362                         if is_appropriate_end_tag tok_cur_tag
3363                                 tok_state = tok_state_data
3364                                 return tok_cur_tag
3365                         # else fall through to "Anything else"
3366                 if is_uc_alpha(c)
3367                         tok_cur_tag.name += c.toLowerCase()
3368                         temporary_buffer += c
3369                         return null
3370                 if is_lc_alpha(c)
3371                         tok_cur_tag.name += c
3372                         temporary_buffer += c
3373                         return null
3374                 # Anything else
3375                 tok_state = tok_state_rawtext
3376                 cur -= 1 # reconsume the input character
3377                 return new_character_token '</' + temporary_buffer # fixfull separate these
3378
3379         # 8.2.4.17 http://www.w3.org/TR/html5/syntax.html#script-data-less-than-sign-state
3380         tok_state_script_data_less_than_sign = ->
3381                 c = txt.charAt(cur++)
3382                 if c is '/'
3383                         temporary_buffer = ''
3384                         tok_state = tok_state_script_data_end_tag_open
3385                         return
3386                 if c is '!'
3387                         tok_state = tok_state_script_data_escape_start
3388                         return new_character_token '<!' # fixfull split
3389                 # Anything else
3390                 tok_state = tok_state_script_data
3391                 cur -= 1 # Reconsume
3392                 return new_character_token '<'
3393
3394         # 8.2.4.18 http://www.w3.org/TR/html5/syntax.html#script-data-end-tag-open-state
3395         tok_state_script_data_end_tag_open = ->
3396                 c = txt.charAt(cur++)
3397                 if is_uc_alpha(c)
3398                         tok_cur_tag = new_end_tag c.toLowerCase()
3399                         temporary_buffer += c
3400                         tok_state = tok_state_script_data_end_tag_name
3401                         return
3402                 if is_lc_alpha(c)
3403                         tok_cur_tag = new_end_tag c
3404                         temporary_buffer += c
3405                         tok_state = tok_state_script_data_end_tag_name
3406                         return
3407                 # Anything else
3408                 tok_state = tok_state_script_data
3409                 cur -= 1 # Reconsume
3410                 return new_character_token '</'
3411
3412         # 8.2.4.19 http://www.w3.org/TR/html5/syntax.html#script-data-end-tag-open-state
3413         tok_state_script_data_end_tag_name = ->
3414                 c = txt.charAt(cur++)
3415                 if c is "\t" or c is "\n" or c is "\u000c" or c is ' '
3416                         if is_appropriate_end_tag tok_cur_tag
3417                                 tok_state = tok_state_before_attribute_name
3418                                 return
3419                         # fall through
3420                 if c is '/'
3421                         if is_appropriate_end_tag tok_cur_tag
3422                                 tok_state = tok_state_self_closing_start_tag
3423                                 return
3424                         # fall through
3425                 if c is '>'
3426                         if is_appropriate_end_tag tok_cur_tag
3427                                 tok_state = tok_state_data
3428                                 return tok_cur_tag
3429                         # fall through
3430                 if is_uc_alpha(c)
3431                         tok_cur_tag.name += c.toLowerCase()
3432                         temporary_buffer += c
3433                         return
3434                 if is_lc_alpha(c)
3435                         tok_cur_tag.name += c
3436                         temporary_buffer += c
3437                         return
3438                 # Anything else
3439                 tok_state = tok_state_script_data
3440                 cur -= 1 # Reconsume
3441                 return new_character_token "</#{temporary_buffer}" # fixfull split
3442
3443         # 8.2.4.20 http://www.w3.org/TR/html5/syntax.html#script-data-escape-start-state
3444         tok_state_script_data_escape_start = ->
3445                 c = txt.charAt(cur++)
3446                 if c is '-'
3447                         tok_state = tok_state_script_data_escape_start_dash
3448                         return new_character_token '-'
3449                 # Anything else
3450                 tok_state = tok_state_script_data
3451                 cur -= 1 # Reconsume
3452                 return
3453
3454         # 8.2.4.21 http://www.w3.org/TR/html5/syntax.html#script-data-escape-start-dash-state
3455         tok_state_script_data_escape_start_dash = ->
3456                 c = txt.charAt(cur++)
3457                 if c is '-'
3458                         tok_state = tok_state_script_data_escaped_dash_dash
3459                         return new_character_token '-'
3460                 # Anything else
3461                 tok_state = tok_state_script_data
3462                 cur -= 1 # Reconsume
3463                 return
3464
3465         # 8.2.4.22 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-state
3466         tok_state_script_data_escaped = ->
3467                 c = txt.charAt(cur++)
3468                 if c is '-'
3469                         tok_state = tok_state_script_data_escaped_dash
3470                         return new_character_token '-'
3471                 if c is '<'
3472                         tok_state = tok_state_script_data_escaped_less_than_sign
3473                         return
3474                 if c is "\u0000"
3475                         parse_error()
3476                         return new_character_token "\ufffd"
3477                 if c is '' # EOF
3478                         tok_state = tok_state_data
3479                         parse_error()
3480                         cur -= 1 # Reconsume
3481                         return
3482                 # Anything else
3483                 return new_character_token c
3484
3485         # 8.2.4.23 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-dash-state
3486         tok_state_script_data_escaped_dash = ->
3487                 c = txt.charAt(cur++)
3488                 if c is '-'
3489                         tok_state = tok_state_script_data_escaped_dash_dash
3490                         return new_character_token '-'
3491                 if c is '<'
3492                         tok_state = tok_state_script_data_escaped_less_than_sign
3493                         return
3494                 if c is "\u0000"
3495                         parse_error()
3496                         tok_state = tok_state_script_data_escaped
3497                         return new_character_token "\ufffd"
3498                 if c is '' # EOF
3499                         tok_state = tok_state_data
3500                         parse_error()
3501                         cur -= 1 # Reconsume
3502                         return
3503                 # Anything else
3504                 tok_state = tok_state_script_data_escaped
3505                 return new_character_token c
3506
3507         # 8.2.4.24 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-dash-dash-state
3508         tok_state_script_data_escaped_dash_dash = ->
3509                 c = txt.charAt(cur++)
3510                 if c is '-'
3511                         return new_character_token '-'
3512                 if c is '<'
3513                         tok_state = tok_state_script_data_escaped_less_than_sign
3514                         return
3515                 if c is '>'
3516                         tok_state = tok_state_script_data
3517                         return new_character_token '>'
3518                 if c is "\u0000"
3519                         parse_error()
3520                         tok_state = tok_state_script_data_escaped
3521                         return new_character_token "\ufffd"
3522                 if c is '' # EOF
3523                         parse_error()
3524                         tok_state = tok_state_data
3525                         cur -= 1 # Reconsume
3526                         return
3527                 # Anything else
3528                 tok_state = tok_state_script_data_escaped
3529                 return new_character_token c
3530
3531         # 8.2.4.25 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-less-than-sign-state
3532         tok_state_script_data_escaped_less_than_sign = ->
3533                 c = txt.charAt(cur++)
3534                 if c is '/'
3535                         temporary_buffer = ''
3536                         tok_state = tok_state_script_data_escaped_end_tag_open
3537                         return
3538                 if is_uc_alpha(c)
3539                         temporary_buffer = c.toLowerCase() # yes, really
3540                         tok_state = tok_state_script_data_double_escape_start
3541                         return new_character_token "<#{c}" # fixfull split
3542                 if is_lc_alpha(c)
3543                         temporary_buffer = c
3544                         tok_state = tok_state_script_data_double_escape_start
3545                         return new_character_token "<#{c}" # fixfull split
3546                 # Anything else
3547                 tok_state = tok_state_script_data_escaped
3548                 cur -= 1 # Reconsume
3549                 return new_character_token '<'
3550
3551         # 8.2.4.26 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-end-tag-open-state
3552         tok_state_script_data_escaped_end_tag_open = ->
3553                 c = txt.charAt(cur++)
3554                 if is_uc_alpha(c)
3555                         tok_cur_tag = new_end_tag c.toLowerCase()
3556                         temporary_buffer += c
3557                         tok_state = tok_state_script_data_escaped_end_tag_name
3558                         return
3559                 if is_lc_alpha(c)
3560                         tok_cur_tag = new_end_tag c
3561                         temporary_buffer += c
3562                         tok_state = tok_state_script_data_escaped_end_tag_name
3563                         return
3564                 # Anything else
3565                 tok_state = tok_state_script_data_escaped
3566                 cur -= 1 # Reconsume
3567                 return new_character_token '</' # fixfull split
3568
3569         # 8.2.4.27 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-end-tag-name-state
3570         tok_state_script_data_escaped_end_tag_name = ->
3571                 c = txt.charAt(cur++)
3572                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
3573                         if is_appropriate_end_tag tok_cur_tag
3574                                 tok_state = tok_state_before_attribute_name
3575                                 return
3576                         # fall through
3577                 if c is '/'
3578                         if is_appropriate_end_tag tok_cur_tag
3579                                 tok_state = tok_state_self_closing_start_tag
3580                                 return
3581                         # fall through
3582                 if c is '>'
3583                         if is_appropriate_end_tag tok_cur_tag
3584                                 tok_state = tok_state_data
3585                                 return tok_cur_tag
3586                         # fall through
3587                 if is_uc_alpha(c)
3588                         tok_cur_tag.name += c.toLowerCase()
3589                         temporary_buffer += c.toLowerCase()
3590                         return
3591                 if is_lc_alpha(c)
3592                         tok_cur_tag.name += c
3593                         temporary_buffer += c.toLowerCase()
3594                         return
3595                 # Anything else
3596                 tok_state = tok_state_script_data_escaped
3597                 cur -= 1 # Reconsume
3598                 return new_character_token "</#{temporary_buffer}" # fixfull split
3599
3600         # 8.2.4.28 http://www.w3.org/TR/html5/syntax.html#script-data-double-escape-start-state
3601         tok_state_script_data_double_escape_start = ->
3602                 c = txt.charAt(cur++)
3603                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' ' or c is '/' or c is '>'
3604                         if temporary_buffer is 'script'
3605                                 tok_state = tok_state_script_data_double_escaped
3606                         else
3607                                 tok_state = tok_state_script_data_escaped
3608                         return new_character_token c
3609                 if is_uc_alpha(c)
3610                         temporary_buffer += c.toLowerCase() # yes, really lowercase
3611                         return new_character_token c
3612                 if is_lc_alpha(c)
3613                         temporary_buffer += c
3614                         return new_character_token c
3615                 # Anything else
3616                 tok_state = tok_state_script_data_escaped
3617                 cur -= 1 # Reconsume
3618                 return
3619
3620         # 8.2.4.29 http://www.w3.org/TR/html5/syntax.html#script-data-double-escaped-state
3621         tok_state_script_data_double_escaped = ->
3622                 c = txt.charAt(cur++)
3623                 if c is '-'
3624                         tok_state = tok_state_script_data_double_escaped_dash
3625                         return new_character_token '-'
3626                 if c is '<'
3627                         tok_state = tok_state_script_data_double_escaped_less_than_sign
3628                         return new_character_token '<'
3629                 if c is "\u0000"
3630                         parse_error()
3631                         return new_character_token "\ufffd"
3632                 if c is '' # EOF
3633                         parse_error()
3634                         tok_state = tok_state_data
3635                         cur -= 1 # Reconsume
3636                         return
3637                 # Anything else
3638                 return new_character_token c
3639
3640         # 8.2.4.30 http://www.w3.org/TR/html5/syntax.html#script-data-double-escaped-dash-state
3641         tok_state_script_data_double_escaped_dash = ->
3642                 c = txt.charAt(cur++)
3643                 if c is '-'
3644                         tok_state = tok_state_script_data_double_escaped_dash_dash
3645                         return new_character_token '-'
3646                 if c is '<'
3647                         tok_state = tok_state_script_data_double_escaped_less_than_sign
3648                         return new_character_token '<'
3649                 if c is "\u0000"
3650                         parse_error()
3651                         tok_state = tok_state_script_data_double_escaped
3652                         return new_character_token "\ufffd"
3653                 if c is '' # EOF
3654                         parse_error()
3655                         tok_state = tok_state_data
3656                         cur -= 1 # Reconsume
3657                         return
3658                 # Anything else
3659                 tok_state = tok_state_script_data_double_escaped
3660                 return new_character_token c
3661
3662         # 8.2.4.31 http://www.w3.org/TR/html5/syntax.html#script-data-double-escaped-dash-dash-state
3663         tok_state_script_data_double_escaped_dash_dash = ->
3664                 c = txt.charAt(cur++)
3665                 if c is '-'
3666                         return new_character_token '-'
3667                 if c is '<'
3668                         tok_state = tok_state_script_data_double_escaped_less_than_sign
3669                         return new_character_token '<'
3670                 if c is '>'
3671                         tok_state = tok_state_script_data
3672                         return new_character_token '>'
3673                 if c is "\u0000"
3674                         parse_error()
3675                         tok_state = tok_state_script_data_double_escaped
3676                         return new_character_token "\ufffd"
3677                 if c is '' # EOF
3678                         parse_error()
3679                         tok_state = tok_state_data
3680                         cur -= 1 # Reconsume
3681                         return
3682                 # Anything else
3683                 tok_state = tok_state_script_data_double_escaped
3684                 return new_character_token c
3685
3686         # 8.2.4.32 http://www.w3.org/TR/html5/syntax.html#script-data-double-escaped-less-than-sign-state
3687         tok_state_script_data_double_escaped_less_than_sign = ->
3688                 c = txt.charAt(cur++)
3689                 if c is '/'
3690                         temporary_buffer = ''
3691                         tok_state = tok_state_script_data_double_escape_end
3692                         return new_character_token '/'
3693                 # Anything else
3694                 tok_state = tok_state_script_data_double_escaped
3695                 cur -= 1 # Reconsume
3696                 return
3697
3698         # 8.2.4.33 http://www.w3.org/TR/html5/syntax.html#script-data-double-escape-end-state
3699         tok_state_script_data_double_escape_end = ->
3700                 c = txt.charAt(cur++)
3701                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' ' or c is '/' or c is '>'
3702                         if temporary_buffer is 'script'
3703                                 tok_state = tok_state_script_data_escaped
3704                         else
3705                                 tok_state = tok_state_script_data_double_escaped
3706                         return new_character_token c
3707                 if is_uc_alpha(c)
3708                         temporary_buffer += c.toLowerCase() # yes, really lowercase
3709                         return new_character_token c
3710                 if is_lc_alpha(c)
3711                         temporary_buffer += c
3712                         return new_character_token c
3713                 # Anything else
3714                 tok_state = tok_state_script_data_double_escaped
3715                 cur -= 1 # Reconsume
3716                 return
3717
3718         # 8.2.4.34 http://www.w3.org/TR/html5/syntax.html#before-attribute-name-state
3719         tok_state_before_attribute_name = ->
3720                 attr_name = null
3721                 switch c = txt.charAt(cur++)
3722                         when "\t", "\n", "\u000c", ' '
3723                                 return null
3724                         when '/'
3725                                 tok_state = tok_state_self_closing_start_tag
3726                                 return null
3727                         when '>'
3728                                 tok_state = tok_state_data
3729                                 tmp = tok_cur_tag
3730                                 tok_cur_tag = null
3731                                 return tmp
3732                         when "\u0000"
3733                                 parse_error()
3734                                 attr_name = "\ufffd"
3735                         when '"', "'", '<', '='
3736                                 parse_error()
3737                                 attr_name = c
3738                         when '' # EOF
3739                                 parse_error()
3740                                 tok_state = tok_state_data
3741                         else
3742                                 if is_uc_alpha(c)
3743                                         attr_name = c.toLowerCase()
3744                                 else
3745                                         attr_name = c
3746                 if attr_name?
3747                         tok_cur_tag.attrs_a.unshift [attr_name, '']
3748                         tok_state = tok_state_attribute_name
3749                 return null
3750
3751         # 8.2.4.35 http://www.w3.org/TR/html5/syntax.html#attribute-name-state
3752         tok_state_attribute_name = ->
3753                 switch c = txt.charAt(cur++)
3754                         when "\t", "\n", "\u000c", ' '
3755                                 tok_state = tok_state_after_attribute_name
3756                         when '/'
3757                                 tok_state = tok_state_self_closing_start_tag
3758                         when '='
3759                                 tok_state = tok_state_before_attribute_value
3760                         when '>'
3761                                 tok_state = tok_state_data
3762                                 tmp = tok_cur_tag
3763                                 tok_cur_tag = null
3764                                 return tmp
3765                         when "\u0000"
3766                                 parse_error()
3767                                 tok_cur_tag.attrs_a[0][0] += "\ufffd"
3768                         when '"', "'", '<'
3769                                 parse_error()
3770                                 tok_cur_tag.attrs_a[0][0] += c
3771                         when '' # EOF
3772                                 parse_error()
3773                                 tok_state = tok_state_data
3774                         else
3775                                 if is_uc_alpha(c)
3776                                         tok_cur_tag.attrs_a[0][0] += c.toLowerCase()
3777                                 else
3778                                         tok_cur_tag.attrs_a[0][0] += c
3779                 return null
3780
3781         # 8.2.4.36 http://www.w3.org/TR/html5/syntax.html#after-attribute-name-state
3782         tok_state_after_attribute_name = ->
3783                 c = txt.charAt(cur++)
3784                 if c is "\t" or c is "\n" or c is "\u000c" or c is ' '
3785                         return
3786                 if c is '/'
3787                         tok_state = tok_state_self_closing_start_tag
3788                         return
3789                 if c is '='
3790                         tok_state = tok_state_before_attribute_value
3791                         return
3792                 if c is '>'
3793                         tok_state = tok_state_data
3794                         return tok_cur_tag
3795                 if is_uc_alpha(c)
3796                         tok_cur_tag.attrs_a.unshift [c.toLowerCase(), '']
3797                         tok_state = tok_state_attribute_name
3798                         return
3799                 if c is "\u0000"
3800                         parse_error()
3801                         tok_cur_tag.attrs_a.unshift ["\ufffd", '']
3802                         tok_state = tok_state_attribute_name
3803                         return
3804                 if c is '' # EOF
3805                         parse_error()
3806                         tok_state = tok_state_data
3807                         cur -= 1 # reconsume
3808                         return
3809                 if c is '"' or c is "'" or c is '<'
3810                         parse_error()
3811                         # fall through to Anything else
3812                 # Anything else
3813                 tok_cur_tag.attrs_a.unshift [c, '']
3814                 tok_state = tok_state_attribute_name
3815
3816         # 8.2.4.37 http://www.w3.org/TR/html5/syntax.html#before-attribute-value-state
3817         tok_state_before_attribute_value = ->
3818                 switch c = txt.charAt(cur++)
3819                         when "\t", "\n", "\u000c", ' '
3820                                 return null
3821                         when '"'
3822                                 tok_state = tok_state_attribute_value_double_quoted
3823                         when '&'
3824                                 tok_state = tok_state_attribute_value_unquoted
3825                                 cur -= 1
3826                         when "'"
3827                                 tok_state = tok_state_attribute_value_single_quoted
3828                         when "\u0000"
3829                                 # Parse error
3830                                 tok_cur_tag.attrs_a[0][1] += "\ufffd"
3831                                 tok_state = tok_state_attribute_value_unquoted
3832                         when '>'
3833                                 # Parse error
3834                                 tok_state = tok_state_data
3835                                 tmp = tok_cur_tag
3836                                 tok_cur_tag = null
3837                                 return tmp
3838                         when '' # EOF
3839                                 parse_error()
3840                                 tok_state = tok_state_data
3841                         else
3842                                 tok_cur_tag.attrs_a[0][1] += c
3843                                 tok_state = tok_state_attribute_value_unquoted
3844                 return null
3845
3846         # 8.2.4.38 http://www.w3.org/TR/html5/syntax.html#attribute-value-(double-quoted)-state
3847         tok_state_attribute_value_double_quoted = ->
3848                 switch c = txt.charAt(cur++)
3849                         when '"'
3850                                 tok_state = tok_state_after_attribute_value_quoted
3851                         when '&'
3852                                 tok_cur_tag.attrs_a[0][1] += parse_character_reference '"', true
3853                         when "\u0000"
3854                                 # Parse error
3855                                 tok_cur_tag.attrs_a[0][1] += "\ufffd"
3856                         when '' # EOF
3857                                 parse_error()
3858                                 tok_state = tok_state_data
3859                         else
3860                                 tok_cur_tag.attrs_a[0][1] += c
3861                 return null
3862
3863         # 8.2.4.39 http://www.w3.org/TR/html5/syntax.html#attribute-value-(single-quoted)-state
3864         tok_state_attribute_value_single_quoted = ->
3865                 switch c = txt.charAt(cur++)
3866                         when "'"
3867                                 tok_state = tok_state_after_attribute_value_quoted
3868                         when '&'
3869                                 tok_cur_tag.attrs_a[0][1] += parse_character_reference "'", true
3870                         when "\u0000"
3871                                 # Parse error
3872                                 tok_cur_tag.attrs_a[0][1] += "\ufffd"
3873                         when '' # EOF
3874                                 parse_error()
3875                                 tok_state = tok_state_data
3876                         else
3877                                 tok_cur_tag.attrs_a[0][1] += c
3878                 return null
3879
3880         # 8.2.4.40 http://www.w3.org/TR/html5/syntax.html#attribute-value-(unquoted)-state
3881         tok_state_attribute_value_unquoted = ->
3882                 switch c = txt.charAt(cur++)
3883                         when "\t", "\n", "\u000c", ' '
3884                                 tok_state = tok_state_before_attribute_name
3885                         when '&'
3886                                 tok_cur_tag.attrs_a[0][1] += parse_character_reference '>', true
3887                         when '>'
3888                                 tok_state = tok_state_data
3889                                 tmp = tok_cur_tag
3890                                 tok_cur_tag = null
3891                                 return tmp
3892                         when "\u0000"
3893                                 tok_cur_tag.attrs_a[0][1] += "\ufffd"
3894                         when '' # EOF
3895                                 parse_error()
3896                                 tok_state = tok_state_data
3897                         else
3898                                 # Parse Error if ', <, = or ` (backtick)
3899                                 tok_cur_tag.attrs_a[0][1] += c
3900                 return null
3901
3902         # 8.2.4.42 http://www.w3.org/TR/html5/syntax.html#after-attribute-value-(quoted)-state
3903         tok_state_after_attribute_value_quoted = ->
3904                 switch c = txt.charAt(cur++)
3905                         when "\t", "\n", "\u000c", ' '
3906                                 tok_state = tok_state_before_attribute_name
3907                         when '/'
3908                                 tok_state = tok_state_self_closing_start_tag
3909                         when '>'
3910                                 tok_state = tok_state_data
3911                                 tmp = tok_cur_tag
3912                                 tok_cur_tag = null
3913                                 return tmp
3914                         when '' # EOF
3915                                 parse_error()
3916                                 tok_state = tok_state_data
3917                         else
3918                                 # Parse Error
3919                                 tok_state = tok_state_before_attribute_name
3920                                 cur -= 1 # we didn't handle that char
3921                 return null
3922
3923         # 8.2.4.43 http://www.w3.org/TR/html5/syntax.html#self-closing-start-tag-state
3924         tok_state_self_closing_start_tag = ->
3925                 c = txt.charAt(cur++)
3926                 if c is '>'
3927                         tok_cur_tag.flag 'self-closing', true
3928                         tok_state = tok_state_data
3929                         return tok_cur_tag
3930                 if c is ''
3931                         parse_error()
3932                         tok_state = tok_state_data
3933                         cur -= 1 # Reconsume
3934                         return
3935                 # Anything else
3936                 parse_error()
3937                 tok_state = tok_state_before_attribute_name
3938                 cur -= 1 # Reconsume
3939                 return
3940
3941         # 8.2.4.44 http://www.w3.org/TR/html5/syntax.html#bogus-comment-state
3942         # WARNING: put a comment token in tok_cur_tag before setting this state
3943         tok_state_bogus_comment = ->
3944                 next_gt = txt.indexOf '>', cur
3945                 if next_gt is -1
3946                         val = txt.substr cur
3947                         cur = txt.length
3948                 else
3949                         val = txt.substr cur, (next_gt - cur)
3950                         cur = next_gt + 1
3951                 val = val.replace(new RegExp("\u0000", 'g'), "\ufffd")
3952                 tok_cur_tag.text += val
3953                 tok_state = tok_state_data
3954                 return tok_cur_tag
3955
3956         # 8.2.4.45 http://www.w3.org/TR/html5/syntax.html#markup-declaration-open-state
3957         tok_state_markup_declaration_open = ->
3958                 if txt.substr(cur, 2) is '--'
3959                         cur += 2
3960                         tok_cur_tag = new_comment_token ''
3961                         tok_state = tok_state_comment_start
3962                         return
3963                 if txt.substr(cur, 7).toLowerCase() is 'doctype'
3964                         cur += 7
3965                         tok_state = tok_state_doctype
3966                         return
3967                 acn = adjusted_current_node()
3968                 if acn and acn.namespace isnt NS_HTML and txt.substr(cur, 7) is '[CDATA['
3969                         cur += 7
3970                         tok_state = tok_state_cdata_section
3971                         return
3972                 # Otherwise
3973                 parse_error()
3974                 tok_cur_tag = new_comment_token ''
3975                 tok_state = tok_state_bogus_comment
3976                 return
3977
3978         # 8.2.4.46 http://www.w3.org/TR/html5/syntax.html#comment-start-state
3979         tok_state_comment_start = ->
3980                 switch c = txt.charAt(cur++)
3981                         when '-'
3982                                 tok_state = tok_state_comment_start_dash
3983                         when "\u0000"
3984                                 parse_error()
3985                                 tok_state = tok_state_comment
3986                                 return new_character_token "\ufffd"
3987                         when '>'
3988                                 parse_error()
3989                                 tok_state = tok_state_data
3990                                 return tok_cur_tag
3991                         when '' # EOF
3992                                 parse_error()
3993                                 tok_state = tok_state_data
3994                                 cur -= 1 # Reconsume
3995                                 return tok_cur_tag
3996                         else
3997                                 tok_cur_tag.text += c
3998                                 tok_state = tok_state_comment
3999                 return null
4000
4001         # 8.2.4.47 http://www.w3.org/TR/html5/syntax.html#comment-start-dash-state
4002         tok_state_comment_start_dash = ->
4003                 switch c = txt.charAt(cur++)
4004                         when '-'
4005                                 tok_state = tok_state_comment_end
4006                         when "\u0000"
4007                                 parse_error()
4008                                 tok_cur_tag.text += "-\ufffd"
4009                                 tok_state = tok_state_comment
4010                         when '>'
4011                                 parse_error()
4012                                 tok_state = tok_state_data
4013                                 return tok_cur_tag
4014                         when '' # EOF
4015                                 parse_error()
4016                                 tok_state = tok_state_data
4017                                 cur -= 1 # Reconsume
4018                                 return tok_cur_tag
4019                         else
4020                                 tok_cur_tag.text += "-#{c}"
4021                                 tok_state = tok_state_comment
4022                 return null
4023
4024         # 8.2.4.48 http://www.w3.org/TR/html5/syntax.html#comment-state
4025         tok_state_comment = ->
4026                 switch c = txt.charAt(cur++)
4027                         when '-'
4028                                 tok_state = tok_state_comment_end_dash
4029                         when "\u0000"
4030                                 parse_error()
4031                                 tok_cur_tag.text += "\ufffd"
4032                         when '' # EOF
4033                                 parse_error()
4034                                 tok_state = tok_state_data
4035                                 cur -= 1 # Reconsume
4036                                 return tok_cur_tag
4037                         else
4038                                 tok_cur_tag.text += c
4039                 return null
4040
4041         # 8.2.4.49 http://www.w3.org/TR/html5/syntax.html#comment-end-dash-state
4042         tok_state_comment_end_dash = ->
4043                 switch c = txt.charAt(cur++)
4044                         when '-'
4045                                 tok_state = tok_state_comment_end
4046                         when "\u0000"
4047                                 parse_error()
4048                                 tok_cur_tag.text += "-\ufffd"
4049                                 tok_state = tok_state_comment
4050                         when '' # EOF
4051                                 parse_error()
4052                                 tok_state = tok_state_data
4053                                 cur -= 1 # Reconsume
4054                                 return tok_cur_tag
4055                         else
4056                                 tok_cur_tag.text += "-#{c}"
4057                                 tok_state = tok_state_comment
4058                 return null
4059
4060         # 8.2.4.50 http://www.w3.org/TR/html5/syntax.html#comment-end-state
4061         tok_state_comment_end = ->
4062                 switch c = txt.charAt(cur++)
4063                         when '>'
4064                                 tok_state = tok_state_data
4065                                 return tok_cur_tag
4066                         when "\u0000"
4067                                 parse_error()
4068                                 tok_cur_tag.text += "--\ufffd"
4069                                 tok_state = tok_state_comment
4070                         when '!'
4071                                 parse_error()
4072                                 tok_state = tok_state_comment_end_bang
4073                         when '-'
4074                                 parse_error()
4075                                 tok_cur_tag.text += '-'
4076                         when '' # EOF
4077                                 parse_error()
4078                                 tok_state = tok_state_data
4079                                 cur -= 1 # Reconsume
4080                                 return tok_cur_tag
4081                         else
4082                                 parse_error()
4083                                 tok_cur_tag.text += "--#{c}"
4084                                 tok_state = tok_state_comment
4085                 return null
4086
4087         # 8.2.4.51 http://www.w3.org/TR/html5/syntax.html#comment-end-bang-state
4088         tok_state_comment_end_bang = ->
4089                 switch c = txt.charAt(cur++)
4090                         when '-'
4091                                 tok_cur_tag.text += "--!#{c}"
4092                                 tok_state = tok_state_comment_end_dash
4093                         when '>'
4094                                 tok_state = tok_state_data
4095                                 return tok_cur_tag
4096                         when "\u0000"
4097                                 parse_error()
4098                                 tok_cur_tag.text += "--!\ufffd"
4099                                 tok_state = tok_state_comment
4100                         when '' # EOF
4101                                 parse_error()
4102                                 tok_state = tok_state_data
4103                                 cur -= 1 # Reconsume
4104                                 return tok_cur_tag
4105                         else
4106                                 tok_cur_tag.text += "--!#{c}"
4107                                 tok_state = tok_state_comment
4108                 return null
4109
4110         # 8.2.4.52 http://www.w3.org/TR/html5/syntax.html#doctype-state
4111         tok_state_doctype = ->
4112                 switch c = txt.charAt(cur++)
4113                         when "\t", "\u000a", "\u000c", ' '
4114                                 tok_state = tok_state_before_doctype_name
4115                         when '' # EOF
4116                                 parse_error()
4117                                 tok_state = tok_state_data
4118                                 el = new_doctype_token ''
4119                                 el.flag 'force-quirks', true
4120                                 cur -= 1 # Reconsume
4121                                 return el
4122                         else
4123                                 parse_error()
4124                                 tok_state = tok_state_before_doctype_name
4125                                 cur -= 1 # Reconsume
4126                 return null
4127
4128         # 8.2.4.52 http://www.w3.org/TR/html5/syntax.html#doctype-state
4129         tok_state_before_doctype_name = ->
4130                 c = txt.charAt(cur++)
4131                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4132                         return
4133                 if is_uc_alpha(c)
4134                         tok_cur_tag = new_doctype_token c.toLowerCase()
4135                         tok_state = tok_state_doctype_name
4136                         return
4137                 if c is "\u0000"
4138                         parse_error()
4139                         tok_cur_tag = new_doctype_token "\ufffd"
4140                         tok_state = tok_state_doctype_name
4141                         return
4142                 if c is '>'
4143                         parse_error()
4144                         el = new_doctype_token ''
4145                         el.flag 'force-quirks', true
4146                         tok_state = tok_state_data
4147                         return el
4148                 if c is '' # EOF
4149                         parse_error()
4150                         tok_state = tok_state_data
4151                         el = new_doctype_token ''
4152                         el.flag 'force-quirks', true
4153                         cur -= 1 # Reconsume
4154                         return el
4155                 # Anything else
4156                 tok_cur_tag = new_doctype_token c
4157                 tok_state = tok_state_doctype_name
4158                 return null
4159
4160         # 8.2.4.54 http://www.w3.org/TR/html5/syntax.html#doctype-name-state
4161         tok_state_doctype_name = ->
4162                 c = txt.charAt(cur++)
4163                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4164                         tok_state = tok_state_after_doctype_name
4165                         return
4166                 if c is '>'
4167                         tok_state = tok_state_data
4168                         return tok_cur_tag
4169                 if is_uc_alpha(c)
4170                         tok_cur_tag.name += c.toLowerCase()
4171                         return
4172                 if c is "\u0000"
4173                         parse_error()
4174                         tok_cur_tag.name += "\ufffd"
4175                         return
4176                 if c is '' # EOF
4177                         parse_error()
4178                         tok_state = tok_state_data
4179                         tok_cur_tag.flag 'force-quirks', true
4180                         cur -= 1 # Reconsume
4181                         return tok_cur_tag
4182                 # Anything else
4183                 tok_cur_tag.name += c
4184                 return null
4185
4186         # 8.2.4.55 http://www.w3.org/TR/html5/syntax.html#after-doctype-name-state
4187         tok_state_after_doctype_name = ->
4188                 c = txt.charAt(cur++)
4189                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4190                         return
4191                 if c is '>'
4192                         tok_state = tok_state_data
4193                         return tok_cur_tag
4194                 if c is '' # EOF
4195                         parse_error()
4196                         tok_state = tok_state_data
4197                         tok_cur_tag.flag 'force-quirks', true
4198                         cur -= 1 # Reconsume
4199                         return tok_cur_tag
4200                 # Anything else
4201                 if txt.substr(cur - 1, 6).toLowerCase() is 'public'
4202                         cur += 5
4203                         tok_state = tok_state_after_doctype_public_keyword
4204                         return
4205                 if txt.substr(cur - 1, 6).toLowerCase() is 'system'
4206                         cur += 5
4207                         tok_state = tok_state_after_doctype_system_keyword
4208                         return
4209                 parse_error()
4210                 tok_cur_tag.flag 'force-quirks', true
4211                 tok_state = tok_state_bogus_doctype
4212                 return null
4213
4214         # 8.2.4.56 http://www.w3.org/TR/html5/syntax.html#after-doctype-public-keyword-state
4215         tok_state_after_doctype_public_keyword = ->
4216                 c = txt.charAt(cur++)
4217                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4218                         tok_state = tok_state_before_doctype_public_identifier
4219                         return
4220                 if c is '"'
4221                         parse_error()
4222                         tok_cur_tag.public_identifier = ''
4223                         tok_state = tok_state_doctype_public_identifier_double_quoted
4224                         return
4225                 if c is "'"
4226                         parse_error()
4227                         tok_cur_tag.public_identifier = ''
4228                         tok_state = tok_state_doctype_public_identifier_single_quoted
4229                         return
4230                 if c is '>'
4231                         parse_error()
4232                         tok_cur_tag.flag 'force-quirks', true
4233                         tok_state = tok_state_data
4234                         return tok_cur_tag
4235                 if c is '' # EOF
4236                         parse_error()
4237                         tok_state = tok_state_data
4238                         tok_cur_tag.flag 'force-quirks', true
4239                         cur -= 1 # Reconsume
4240                         return tok_cur_tag
4241                 # Anything else
4242                 parse_error()
4243                 tok_cur_tag.flag 'force-quirks', true
4244                 tok_state = tok_state_bogus_doctype
4245                 return null
4246
4247         # 8.2.4.57 http://www.w3.org/TR/html5/syntax.html#before-doctype-public-identifier-state
4248         tok_state_before_doctype_public_identifier = ->
4249                 c = txt.charAt(cur++)
4250                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4251                         return
4252                 if c is '"'
4253                         parse_error()
4254                         tok_cur_tag.public_identifier = ''
4255                         tok_state = tok_state_doctype_public_identifier_double_quoted
4256                         return
4257                 if c is "'"
4258                         parse_error()
4259                         tok_cur_tag.public_identifier = ''
4260                         tok_state = tok_state_doctype_public_identifier_single_quoted
4261                         return
4262                 if c is '>'
4263                         parse_error()
4264                         tok_cur_tag.flag 'force-quirks', true
4265                         tok_state = tok_state_data
4266                         return tok_cur_tag
4267                 if c is '' # EOF
4268                         parse_error()
4269                         tok_state = tok_state_data
4270                         tok_cur_tag.flag 'force-quirks', true
4271                         cur -= 1 # Reconsume
4272                         return tok_cur_tag
4273                 # Anything else
4274                 parse_error()
4275                 tok_cur_tag.flag 'force-quirks', true
4276                 tok_state = tok_state_bogus_doctype
4277                 return null
4278
4279
4280         # 8.2.4.58 http://www.w3.org/TR/html5/syntax.html#doctype-public-identifier-(double-quoted)-state
4281         tok_state_doctype_public_identifier_double_quoted = ->
4282                 c = txt.charAt(cur++)
4283                 if c is '"'
4284                         tok_state = tok_state_after_doctype_public_identifier
4285                         return
4286                 if c is "\u0000"
4287                         parse_error()
4288                         tok_cur_tag.public_identifier += "\ufffd"
4289                         return
4290                 if c is '>'
4291                         parse_error()
4292                         tok_cur_tag.flag 'force-quirks', true
4293                         tok_state = tok_state_data
4294                         return tok_cur_tag
4295                 if c is '' # EOF
4296                         parse_error()
4297                         tok_state = tok_state_data
4298                         tok_cur_tag.flag 'force-quirks', true
4299                         cur -= 1 # Reconsume
4300                         return tok_cur_tag
4301                 # Anything else
4302                 tok_cur_tag.public_identifier += c
4303                 return null
4304
4305         # 8.2.4.59 http://www.w3.org/TR/html5/syntax.html#doctype-public-identifier-(single-quoted)-state
4306         tok_state_doctype_public_identifier_single_quoted = ->
4307                 c = txt.charAt(cur++)
4308                 if c is "'"
4309                         tok_state = tok_state_after_doctype_public_identifier
4310                         return
4311                 if c is "\u0000"
4312                         parse_error()
4313                         tok_cur_tag.public_identifier += "\ufffd"
4314                         return
4315                 if c is '>'
4316                         parse_error()
4317                         tok_cur_tag.flag 'force-quirks', true
4318                         tok_state = tok_state_data
4319                         return tok_cur_tag
4320                 if c is '' # EOF
4321                         parse_error()
4322                         tok_state = tok_state_data
4323                         tok_cur_tag.flag 'force-quirks', true
4324                         cur -= 1 # Reconsume
4325                         return tok_cur_tag
4326                 # Anything else
4327                 tok_cur_tag.public_identifier += c
4328                 return null
4329
4330         # 8.2.4.60 http://www.w3.org/TR/html5/syntax.html#after-doctype-public-identifier-state
4331         tok_state_after_doctype_public_identifier = ->
4332                 c = txt.charAt(cur++)
4333                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4334                         tok_state = tok_state_between_doctype_public_and_system_identifiers
4335                         return
4336                 if c is '>'
4337                         tok_state = tok_state_data
4338                         return tok_cur_tag
4339                 if c is '"'
4340                         parse_error()
4341                         tok_cur_tag.system_identifier = ''
4342                         tok_state = tok_state_doctype_system_identifier_double_quoted
4343                         return
4344                 if c is "'"
4345                         parse_error()
4346                         tok_cur_tag.system_identifier = ''
4347                         tok_state = tok_state_doctype_system_identifier_single_quoted
4348                         return
4349                 if c is '' # EOF
4350                         parse_error()
4351                         tok_state = tok_state_data
4352                         tok_cur_tag.flag 'force-quirks', true
4353                         cur -= 1 # Reconsume
4354                         return tok_cur_tag
4355                 # Anything else
4356                 parse_error()
4357                 tok_cur_tag.flag 'force-quirks', true
4358                 tok_state = tok_state_bogus_doctype
4359                 return null
4360
4361         # 8.2.4.61 http://www.w3.org/TR/html5/syntax.html#between-doctype-public-and-system-identifiers-state
4362         tok_state_between_doctype_public_and_system_identifiers = ->
4363                 c = txt.charAt(cur++)
4364                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4365                         return
4366                 if c is '>'
4367                         tok_state = tok_state_data
4368                         return tok_cur_tag
4369                 if c is '"'
4370                         parse_error()
4371                         tok_cur_tag.system_identifier = ''
4372                         tok_state = tok_state_doctype_system_identifier_double_quoted
4373                         return
4374                 if c is "'"
4375                         parse_error()
4376                         tok_cur_tag.system_identifier = ''
4377                         tok_state = tok_state_doctype_system_identifier_single_quoted
4378                         return
4379                 if c is '' # EOF
4380                         parse_error()
4381                         tok_state = tok_state_data
4382                         tok_cur_tag.flag 'force-quirks', true
4383                         cur -= 1 # Reconsume
4384                         return tok_cur_tag
4385                 # Anything else
4386                 parse_error()
4387                 tok_cur_tag.flag 'force-quirks', true
4388                 tok_state = tok_state_bogus_doctype
4389                 return null
4390
4391         # 8.2.4.62 http://www.w3.org/TR/html5/syntax.html#after-doctype-system-keyword-state
4392         tok_state_after_doctype_system_keyword = ->
4393                 c = txt.charAt(cur++)
4394                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4395                         tok_state = tok_state_before_doctype_system_identifier
4396                         return
4397                 if c is '"'
4398                         parse_error()
4399                         tok_cur_tag.system_identifier = ''
4400                         tok_state = tok_state_doctype_system_identifier_double_quoted
4401                         return
4402                 if c is "'"
4403                         parse_error()
4404                         tok_cur_tag.system_identifier = ''
4405                         tok_state = tok_state_doctype_system_identifier_single_quoted
4406                         return
4407                 if c is '>'
4408                         parse_error()
4409                         tok_cur_tag.flag 'force-quirks', true
4410                         tok_state = tok_state_data
4411                         return tok_cur_tag
4412                 if c is '' # EOF
4413                         parse_error()
4414                         tok_state = tok_state_data
4415                         tok_cur_tag.flag 'force-quirks', true
4416                         cur -= 1 # Reconsume
4417                         return tok_cur_tag
4418                 # Anything else
4419                 parse_error()
4420                 tok_cur_tag.flag 'force-quirks', true
4421                 tok_state = tok_state_bogus_doctype
4422                 return null
4423
4424         # 8.2.4.63 http://www.w3.org/TR/html5/syntax.html#before-doctype-system-identifier-state
4425         tok_state_before_doctype_system_identifier = ->
4426                 c = txt.charAt(cur++)
4427                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4428                         return
4429                 if c is '"'
4430                         tok_cur_tag.system_identifier = ''
4431                         tok_state = tok_state_doctype_system_identifier_double_quoted
4432                         return
4433                 if c is "'"
4434                         tok_cur_tag.system_identifier = ''
4435                         tok_state = tok_state_doctype_system_identifier_single_quoted
4436                         return
4437                 if c is '>'
4438                         parse_error()
4439                         tok_cur_tag.flag 'force-quirks', true
4440                         tok_state = tok_state_data
4441                         return tok_cur_tag
4442                 if c is '' # EOF
4443                         parse_error()
4444                         tok_state = tok_state_data
4445                         tok_cur_tag.flag 'force-quirks', true
4446                         cur -= 1 # Reconsume
4447                         return tok_cur_tag
4448                 # Anything else
4449                 parse_error()
4450                 tok_cur_tag.flag 'force-quirks', true
4451                 tok_state = tok_state_bogus_doctype
4452                 return null
4453
4454         # 8.2.4.64 http://www.w3.org/TR/html5/syntax.html#doctype-system-identifier-(double-quoted)-state
4455         tok_state_doctype_system_identifier_double_quoted = ->
4456                 c = txt.charAt(cur++)
4457                 if c is '"'
4458                         tok_state = tok_state_after_doctype_system_identifier
4459                         return
4460                 if c is "\u0000"
4461                         parse_error()
4462                         tok_cur_tag.system_identifier += "\ufffd"
4463                         return
4464                 if c is '>'
4465                         parse_error()
4466                         tok_cur_tag.flag 'force-quirks', true
4467                         tok_state = tok_state_data
4468                         return tok_cur_tag
4469                 if c is '' # EOF
4470                         parse_error()
4471                         tok_state = tok_state_data
4472                         tok_cur_tag.flag 'force-quirks', true
4473                         cur -= 1 # Reconsume
4474                         return tok_cur_tag
4475                 # Anything else
4476                 tok_cur_tag.system_identifier += c
4477                 return null
4478
4479         # 8.2.4.65 http://www.w3.org/TR/html5/syntax.html#doctype-system-identifier-(single-quoted)-state
4480         tok_state_doctype_system_identifier_single_quoted = ->
4481                 c = txt.charAt(cur++)
4482                 if c is "'"
4483                         tok_state = tok_state_after_doctype_system_identifier
4484                         return
4485                 if c is "\u0000"
4486                         parse_error()
4487                         tok_cur_tag.system_identifier += "\ufffd"
4488                         return
4489                 if c is '>'
4490                         parse_error()
4491                         tok_cur_tag.flag 'force-quirks', true
4492                         tok_state = tok_state_data
4493                         return tok_cur_tag
4494                 if c is '' # EOF
4495                         parse_error()
4496                         tok_state = tok_state_data
4497                         tok_cur_tag.flag 'force-quirks', true
4498                         cur -= 1 # Reconsume
4499                         return tok_cur_tag
4500                 # Anything else
4501                 tok_cur_tag.system_identifier += c
4502                 return null
4503
4504         # 8.2.4.66 http://www.w3.org/TR/html5/syntax.html#after-doctype-system-identifier-state
4505         tok_state_after_doctype_system_identifier = ->
4506                 c = txt.charAt(cur++)
4507                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4508                         return
4509                 if c is '>'
4510                         tok_state = tok_state_data
4511                         return tok_cur_tag
4512                 if c is '' # EOF
4513                         parse_error()
4514                         tok_state = tok_state_data
4515                         tok_cur_tag.flag 'force-quirks', true
4516                         cur -= 1 # Reconsume
4517                         return tok_cur_tag
4518                 # Anything else
4519                 parse_error()
4520                 # do _not_ tok_cur_tag.flag 'force-quirks', true
4521                 tok_state = tok_state_bogus_doctype
4522                 return null
4523
4524         # 8.2.4.67 http://www.w3.org/TR/html5/syntax.html#bogus-doctype-state
4525         tok_state_bogus_doctype = ->
4526                 c = txt.charAt(cur++)
4527                 if c is '>'
4528                         tok_state = tok_state_data
4529                         return tok_cur_tag
4530                 if c is '' # EOF
4531                         tok_state = tok_state_data
4532                         cur -= 1 # Reconsume
4533                         return tok_cur_tag
4534                 # Anything else
4535                 return null
4536
4537         # 8.2.4.68 http://www.w3.org/TR/html5/syntax.html#cdata-section-state
4538         tok_state_cdata_section = ->
4539                 tok_state = tok_state_data
4540                 next_gt = txt.indexOf ']]>', cur
4541                 if next_gt is -1
4542                         val = txt.substr cur
4543                         cur = txt.length
4544                 else
4545                         val = txt.substr cur, (next_gt - cur)
4546                         cur = next_gt + 3
4547                 val = val.replace(new RegExp("\u0000", 'g'), "\ufffd")
4548                 if val.length > 0
4549                         return new_character_token val # fixfull split
4550                 return null
4551
4552         # 8.2.4.69 http://www.w3.org/TR/html5/syntax.html#consume-a-character-reference
4553         # Don't set this as a state, just call it
4554         # returns a string (NOT a text node)
4555         parse_character_reference = (allowed_char = null, in_attr = false) ->
4556                 if cur >= txt.length
4557                         return '&'
4558                 switch c = txt.charAt(cur)
4559                         when "\t", "\n", "\u000c", ' ', '<', '&', '', allowed_char
4560                                 # explicitly not a parse error
4561                                 return '&'
4562                         when ';'
4563                                 # there has to be "one or more" alnums between & and ; to be a parse error
4564                                 return '&'
4565                         when '#'
4566                                 if cur + 1 >= txt.length
4567                                         return '&'
4568                                 if txt.charAt(cur + 1).toLowerCase() is 'x'
4569                                         base = 16
4570                                         charset = hex_chars
4571                                         start = cur + 2
4572                                 else
4573                                         charset = digits
4574                                         start = cur + 1
4575                                         base = 10
4576                                 i = 0
4577                                 while start + i < txt.length and charset.indexOf(txt.charAt(start + i)) > -1
4578                                         i += 1
4579                                 if i is 0
4580                                         return '&'
4581                                 cur = start + i
4582                                 if txt.charAt(start + i) is ';'
4583                                         cur += 1
4584                                 else
4585                                         parse_error()
4586                                 code_point = txt.substr(start, i)
4587                                 while code_point.charAt(0) is '0' and code_point.length > 1
4588                                         code_point = code_point.substr 1
4589                                 code_point = parseInt(code_point, base)
4590                                 if unicode_fixes[code_point]?
4591                                         parse_error()
4592                                         return unicode_fixes[code_point]
4593                                 else
4594                                         if (code_point >= 0xd800 and code_point <= 0xdfff) or code_point > 0x10ffff
4595                                                 parse_error()
4596                                                 return "\ufffd"
4597                                         else
4598                                                 if (code_point >= 0x0001 and code_point <= 0x0008) or (code_point >= 0x000D and code_point <= 0x001F) or (code_point >= 0x007F and code_point <= 0x009F) or (code_point >= 0xFDD0 and code_point <= 0xFDEF) or code_point is 0x000B or code_point is 0xFFFE or code_point is 0xFFFF or code_point is 0x1FFFE or code_point is 0x1FFFF or code_point is 0x2FFFE or code_point is 0x2FFFF or code_point is 0x3FFFE or code_point is 0x3FFFF or code_point is 0x4FFFE or code_point is 0x4FFFF or code_point is 0x5FFFE or code_point is 0x5FFFF or code_point is 0x6FFFE or code_point is 0x6FFFF or code_point is 0x7FFFE or code_point is 0x7FFFF or code_point is 0x8FFFE or code_point is 0x8FFFF or code_point is 0x9FFFE or code_point is 0x9FFFF or code_point is 0xAFFFE or code_point is 0xAFFFF or code_point is 0xBFFFE or code_point is 0xBFFFF or code_point is 0xCFFFE or code_point is 0xCFFFF or code_point is 0xDFFFE or code_point is 0xDFFFF or code_point is 0xEFFFE or code_point is 0xEFFFF or code_point is 0xFFFFE or code_point is 0xFFFFF or code_point is 0x10FFFE or code_point is 0x10FFFF
4599                                                         parse_error()
4600                                                 return from_code_point code_point
4601                                 return
4602                         else
4603                                 for i in [0...31]
4604                                         if alnum.indexOf(txt.charAt(cur + i)) is -1
4605                                                 break
4606                                 if i is 0
4607                                         # exit early, because parse_error() below needs at least one alnum
4608                                         return '&'
4609                                 if txt.charAt(cur + i) is ';'
4610                                         i += 1 # include ';' terminator in value
4611                                         decoded = decode_named_char_ref txt.substr(cur, i)
4612                                         if decoded?
4613                                                 cur += i
4614                                                 return decoded
4615                                         parse_error()
4616                                         return '&'
4617                                 else
4618                                         # no ';' terminator (only legacy char refs)
4619                                         max = i
4620                                         for i in [2..max] # no prefix matches, so ok to check shortest first
4621                                                 c = legacy_char_refs[txt.substr(cur, i)]
4622                                                 if c?
4623                                                         if in_attr
4624                                                                 if txt.charAt(cur + i) is '='
4625                                                                         # "because some legacy user agents will
4626                                                                         # misinterpret the markup in those cases"
4627                                                                         parse_error()
4628                                                                         return '&'
4629                                                                 if alnum.indexOf(txt.charAt(cur + i)) > -1
4630                                                                         # this makes attributes forgiving about url args
4631                                                                         return '&'
4632                                                         # ok, and besides the weird exceptions for attributes...
4633                                                         # return the matching char
4634                                                         cur += i # consume entity chars
4635                                                         parse_error() # because no terminating ";"
4636                                                         return c
4637                                         parse_error()
4638                                         return '&'
4639                 return # never reached
4640
4641         eat_next_token_if_newline = ->
4642                 old_cur = cur
4643                 t = null
4644                 until t?
4645                         t = tok_state()
4646                 if t.type is TYPE_TEXT
4647                         # definition of a newline depends on whether it was a character ref or not
4648                         if cur - old_cur is 1
4649                                 # not a character reference
4650                                 if t.text is "\u000d" or t.text is "\u000a"
4651                                         return
4652                         else
4653                                 if t.text is "\u000a"
4654                                         return
4655                 # not a "newline"
4656                 cur = old_cur
4657                 return
4658
4659         # tree constructor initialization
4660         # see comments on TYPE_TAG/etc for the structure of this data
4661         txt = args.html
4662         cur = 0
4663         doc = new Node TYPE_TAG, name: 'document', namespace: NS_HTML
4664         doc.flag 'quirks mode', QUIRKS_NO # TODO bugreport spec for not specifying this
4665         fragment_root = null # fragment parsing algorithm returns children of this
4666         open_els = []
4667         afe = [] # active formatting elements
4668         template_ins_modes = []
4669         ins_mode = ins_mode_initial
4670         original_ins_mode = ins_mode # TODO check spec
4671         flag_scripting = args.scripting ? true # TODO might need an extra flag to get <noscript> to parse correctly
4672         flag_frameset_ok = true
4673         flag_parsing = true
4674         flag_foster_parenting = false
4675         form_element_pointer = null
4676         temporary_buffer = null
4677         pending_table_character_tokens = []
4678         head_element_pointer = null
4679         flag_fragment_parsing = false
4680         context_element = null
4681         prev_node_id = 0 # just for debugging
4682
4683         # tokenizer initialization
4684         tok_state = tok_state_data
4685
4686         parse_init = ->
4687                 # fragment parsing (text arg)
4688                 if args.fragment?
4689                         # this handles the fragment from the tests in the format described here:
4690                         # https://github.com/html5lib/html5lib-tests/blob/master/tree-construction/README.md
4691                         f = args.fragment
4692                         ns = NS_HTML
4693                         if f.substr(0, 5) is 'math '
4694                                 f = f.substr 5
4695                                 ns = NS_MATHML
4696                         else if f.substr(0, 4) is 'svg '
4697                                 f = f.substr 4
4698                                 ns = NS_SVG
4699                         t = new_open_tag f
4700                         context_element = token_to_element t, ns
4701                         context_element.document = new Node TYPE_TAG, name: 'document', namespace: NS_HTML
4702                         context_element.document.flag 'quirks mode', QUIRKS_NO
4703                 # fragment parsing (Node arg)
4704                 if args.context?
4705                         context_element = args.context
4706
4707                 # http://www.w3.org/TR/html5/syntax.html#parsing-html-fragments
4708                 # fragment parsing algorithm
4709                 if context_element?
4710                         flag_fragment_parsing = true
4711                         doc = new Node TYPE_TAG, name: 'html', namespace: NS_HTML
4712                         # search up the tree from context, to try to find it's document,
4713                         # because this file only puts a "document" property on the root
4714                         # element.
4715                         old_doc = null
4716                         el = context_element
4717                         loop
4718                                 if el.document?
4719                                         old_doc = el.document
4720                                         break
4721                                 if el.parent
4722                                         el = el.parent
4723                                 else
4724                                         break
4725                         if old_doc
4726                                 doc.flag 'quirks mode', old_doc.flag 'quirks mode'
4727                         # set tok_state
4728                         if context_element.namespace is NS_HTML
4729                                 switch context_element.name
4730                                         when 'title', 'textarea'
4731                                                 tok_state = tok_state_rcdata
4732                                         when 'style', 'xmp', 'iframe', 'noembed', 'noframes'
4733                                                 tok_state = tok_state_rawtext
4734                                         when 'script'
4735                                                 tok_state = tok_state_script_data
4736                                         when 'noscript'
4737                                                 if flag_scripting
4738                                                         tok_state = tok_state_rawtext
4739                                         when 'plaintext'
4740                                                 tok_state = tok_state_plaintext
4741                         fragment_root = new Node TYPE_TAG, name: 'html', namespace: NS_HTML
4742                         doc.children.push fragment_root
4743                         fragment_root.document = doc
4744                         open_els = [fragment_root]
4745                         if context_element.name is 'template' and context_element.namespace is NS_HTML
4746                                 template_ins_modes.unshift ins_mode_in_template
4747                         # fixfull create token for context (it should have it's original one already)
4748                         reset_ins_mode()
4749                         # set form_element pointer... in the foreign doc?!
4750                         el = context_element
4751                         loop
4752                                 if el.name is 'form' and el.namespace is NS_HTML
4753                                         form_element_pointer = el
4754                                         break
4755                                 if el.parent
4756                                         el = el.parent
4757                                 else
4758                                         break
4759
4760                 # text pre-processing
4761                 # FIXME check http://www.w3.org/TR/html5/syntax.html#preprocessing-the-input-stream
4762                 txt = txt.replace(new RegExp("\r\n", 'g'), "\n") # fixfull spec doesn't say this
4763                 txt = txt.replace(new RegExp("\r", 'g'), "\n") # fixfull spec doesn't say this
4764
4765         # http://www.w3.org/TR/html5/syntax.html#tree-construction
4766         parse_main_loop = ->
4767                 while flag_parsing
4768                         t = tok_state()
4769                         if t?
4770                                 process_token t
4771                                 # fixfull parse error if has self-closing flag, but it wasn't acknolwedged
4772                 return
4773         parse_init()
4774         parse_main_loop()
4775
4776         if flag_fragment_parsing
4777                 return fragment_root.children
4778         return doc.children
4779
4780 serialize_els = (els, shallow, show_ids) ->
4781         serialized = ''
4782         sep = ''
4783         for t in els
4784                 serialized += sep
4785                 sep = ','
4786                 serialized += t.serialize shallow, show_ids
4787         return serialized
4788
4789 module.exports.parse_html = parse_html
4790 module.exports.debug_log_reset = debug_log_reset
4791 module.exports.debug_log_each = debug_log_each
4792 module.exports.TYPE_TAG = TYPE_TAG
4793 module.exports.TYPE_TEXT = TYPE_TEXT
4794 module.exports.TYPE_COMMENT = TYPE_COMMENT
4795 module.exports.TYPE_DOCTYPE = TYPE_DOCTYPE
4796 module.exports.NS_HTML = NS_HTML
4797 module.exports.NS_MATHML = NS_MATHML
4798 module.exports.NS_SVG = NS_SVG
4799 module.exports.QUIRKS_NO = QUIRKS_NO
4800 module.exports.QUIRKS_LIMITED = QUIRKS_LIMITED
4801 module.exports.QUIRKS_YES = QUIRKS_YES