JasonWoof Got questions, comments, patches, etc.? Contact Jason Woofenden
06c2a1b04e67c55ad0fbf7e5e1b2b78396efa99f
[peach-html5-editor.git] / parse-html.coffee
1 # HTML parser meant to run in a browser, in support of WYSIWYG editor
2 # Copyright 2015 Jason Woofenden
3 #
4 # This program is free software: you can redistribute it and/or modify it under
5 # the terms of the GNU Affero General Public License as published by the Free
6 # Software Foundation, either version 3 of the License, or (at your option) any
7 # later version.
8 #
9 # This program is distributed in the hope that it will be useful, but WITHOUT
10 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11 # FOR A PARTICULAR PURPOSE.  See the GNU Affero General Public License for more
12 # details.
13 #
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
16
17
18 # This file implements a parser for html snippets, meant to be used by a
19 # WYSIWYG editor. Hence it does not attempt to parse doctypes, <html>, <head>
20 # or <body> tags, nor does it produce the top level "document" node in the dom
21 # tree, nor nodes for html, head or body. Comments containing "fixfull"
22 # indicate places where additional code is needed for full HTML document
23 # parsing.
24 #
25 # Instead, the data structure produced by this parser is an array of Nodes.
26
27
28 # stacks/lists
29 #
30 # the spec uses a many different words do indicate which ends of lists/stacks
31 # they are talking about (and relative movement within the lists/stacks). This
32 # section splains. I'm implementing "lists" (afe and open_els) the same way
33 # (both as stacks)
34 #
35 # stacks grow downward (current element is index=0)
36 #
37 # example: open_els = [a, b, c, d, e, f, g]
38 #
39 # "grows downwards" means it's visualized like this: (index: el, names)
40 #
41 #   6: g "start of the list", "topmost", "first"
42 #   5: f
43 #   4: e "previous" (to d), "above", "before"
44 #   3: d   (previous/next are relative to this element)
45 #   2: c "next", "after", "lower", "below"
46 #   1: b
47 #   0: a "end of the list", "current node", "bottommost", "last"
48
49
50 # browser
51 # note: to get this to run outside a browser, you'll have to write a native
52 # implementation of decode_named_char_ref()
53 unless module?.exports?
54         window.wheic = {}
55         module = exports: window.wheic
56
57 from_code_point = (x) ->
58         if String.fromCodePoint?
59                 return String.fromCodePoint x
60         else
61                 if x <= 0xffff
62                         return String.fromCharCode x
63                 x -= 0x10000
64                 return String.fromCharCode((x >> 10) + 0xd800, (x % 0x400) + 0xdc00)
65
66 # Each node is an obect of the Node class. Here are the Node types:
67 TYPE_TAG = 0 # name, {attributes}, [children]
68 TYPE_TEXT = 1 # "text"
69 TYPE_COMMENT = 2
70 TYPE_DOCTYPE = 3
71 # the following types are emited by the tokenizer, but shouldn't end up in the tree:
72 TYPE_START_TAG = 4 # name, [attributes ([key,value]...) in reverse order], [children]
73 TYPE_END_TAG = 5 # name
74 TYPE_EOF = 6
75 TYPE_AFE_MARKER = 7 # http://www.w3.org/TR/html5/syntax.html#reconstruct-the-active-formatting-elements
76 TYPE_AAA_BOOKMARK = 8 # http://www.w3.org/TR/html5/syntax.html#adoption-agency-algorithm
77
78 # namespace constants
79 NS_HTML = 1
80 NS_MATHML = 2
81 NS_SVG = 3
82
83 g_debug_log = []
84 debug_log_reset = ->
85         g_debug_log = []
86 debug_log = (str) ->
87         g_debug_log.push str
88 debug_log_each = (cb) ->
89         for str in g_debug_log
90                 cb str
91
92 prev_node_id = 0
93 class Node
94         constructor: (type, args = {}) ->
95                 @type = type # one of the TYPE_* constants above
96                 @name = args.name ? '' # tag name
97                 @text = args.text ? '' # contents for text/comment nodes
98                 @attrs = args.attrs ? {}
99                 @attrs_a = args.attr_k ? [] # attrs in progress, TYPE_START_TAG only
100                 @children = args.children ? []
101                 @namespace = args.namespace ? NS_HTML
102                 @parent = args.parent ? null
103                 @token = args.token ? null
104                 @flags = args.flags ? {}
105                 if args.id?
106                         @id = "#{args.id}+"
107                 else
108                         @id = "#{++prev_node_id}"
109         acknowledge_self_closing: ->
110                 if @token?
111                         @token.flag 'did_self_close'
112                 else
113                         @flag 'did_self_close', true
114         flag: (key, value = null) ->
115                 if value?
116                         @flags[key] = value
117                 else
118                         return @flags[key]
119         serialize: (shallow = false, show_ids = false) -> # for unit tests
120                 ret = ''
121                 switch @type
122                         when TYPE_TAG
123                                 ret += 'tag:'
124                                 ret += JSON.stringify @name
125                                 ret += ','
126                                 if show_ids
127                                         ret += "##{@id},"
128                                 if shallow
129                                         break
130                                 attr_keys = []
131                                 for k of @attrs
132                                         attr_keys.push k
133                                 attr_keys.sort()
134                                 ret += '{'
135                                 sep = ''
136                                 for k in attr_keys
137                                         ret += sep
138                                         sep = ','
139                                         ret += "#{JSON.stringify k}:#{JSON.stringify @attrs[k]}"
140                                 ret += '},['
141                                 sep = ''
142                                 for c in @children
143                                         ret += sep
144                                         sep = ','
145                                         ret += c.serialize shallow, show_ids
146                                 ret += ']'
147                         when TYPE_TEXT
148                                 ret += 'text:'
149                                 ret += JSON.stringify @text
150                         when TYPE_COMMENT
151                                 ret += 'comment:'
152                                 ret += JSON.stringify @text
153                         when TYPE_DOCTYPE
154                                 ret += "doctype:#{@name},#{JSON.stringify(@public_identifier ? '')},#{JSON.stringify(@system_identifier ? '')}"
155                         when TYPE_AFE_MARKER
156                                 ret += 'marker'
157                         when TYPE_AAA_BOOKMARK
158                                 ret += 'aaa_bookmark'
159                         else
160                                 ret += 'unknown:'
161                                 console.log "unknown: #{JSON.stringify @}" # backtrace is just as well
162                 return ret
163
164 # helpers: (only take args that are normally known when parser creates nodes)
165 new_open_tag = (name) ->
166         return new Node TYPE_START_TAG, name: name
167 new_end_tag = (name) ->
168         return new Node TYPE_END_TAG, name: name
169 new_element = (name) ->
170         return new Node TYPE_TAG, name: name
171 new_text_node = (txt) ->
172         return new Node TYPE_TEXT, text: txt
173 new_character_token = new_text_node
174 new_comment_token = (txt) ->
175         return new Node TYPE_COMMENT, text: txt
176 new_doctype_token = (name) ->
177         return new Node TYPE_DOCTYPE, name: name
178 new_eof_token = ->
179         return new Node TYPE_EOF
180 new_afe_marker = ->
181         return new Node TYPE_AFE_MARKER
182 new_aaa_bookmark = ->
183         return new Node TYPE_AAA_BOOKMARK
184
185 lc_alpha = "abcdefghijklmnopqrstuvwxyz"
186 uc_alpha = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
187 digits = "0123456789"
188 alnum = lc_alpha + uc_alpha + digits
189 hex_chars = digits + "abcdefABCDEF"
190
191 is_uc_alpha = (str) ->
192         return str.length is 1 and uc_alpha.indexOf(str) > -1
193 is_lc_alpha = (str) ->
194         return str.length is 1 and lc_alpha.indexOf(str) > -1
195
196 # some SVG elements have dashes in them
197 tag_name_chars = alnum + "-"
198
199 # http://www.w3.org/TR/html5/infrastructure.html#space-character
200 space_chars = "\u0009\u000a\u000c\u000d\u0020"
201 is_space = (txt) ->
202         return txt.length is 1 and space_chars.indexOf(txt) > -1
203 is_space_tok = (t) ->
204         return t.type is TYPE_TEXT && t.text.length is 1 and space_chars.indexOf(t.text) > -1
205
206 is_input_hidden_tok = (t) ->
207         return false unless t.type is TYPE_START_TAG
208         for a in t.attrs_a
209                 if a[0] is 'type'
210                         if a[1].toLowerCase() is 'hidden'
211                                 return true
212                         return false
213         return false
214
215 # https://en.wikipedia.org/wiki/Whitespace_character#Unicode
216 whitespace_chars = "\u0009\u000a\u000b\u000c\u000d\u0020\u0085\u00a0\u1680\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200a\u2028\u2029\u202f\u205f\u3000"
217
218 unicode_fixes = {}
219 unicode_fixes[0x00] = "\uFFFD"
220 unicode_fixes[0x80] = "\u20AC"
221 unicode_fixes[0x82] = "\u201A"
222 unicode_fixes[0x83] = "\u0192"
223 unicode_fixes[0x84] = "\u201E"
224 unicode_fixes[0x85] = "\u2026"
225 unicode_fixes[0x86] = "\u2020"
226 unicode_fixes[0x87] = "\u2021"
227 unicode_fixes[0x88] = "\u02C6"
228 unicode_fixes[0x89] = "\u2030"
229 unicode_fixes[0x8A] = "\u0160"
230 unicode_fixes[0x8B] = "\u2039"
231 unicode_fixes[0x8C] = "\u0152"
232 unicode_fixes[0x8E] = "\u017D"
233 unicode_fixes[0x91] = "\u2018"
234 unicode_fixes[0x92] = "\u2019"
235 unicode_fixes[0x93] = "\u201C"
236 unicode_fixes[0x94] = "\u201D"
237 unicode_fixes[0x95] = "\u2022"
238 unicode_fixes[0x96] = "\u2013"
239 unicode_fixes[0x97] = "\u2014"
240 unicode_fixes[0x98] = "\u02DC"
241 unicode_fixes[0x99] = "\u2122"
242 unicode_fixes[0x9A] = "\u0161"
243 unicode_fixes[0x9B] = "\u203A"
244 unicode_fixes[0x9C] = "\u0153"
245 unicode_fixes[0x9E] = "\u017E"
246 unicode_fixes[0x9F] = "\u0178"
247
248 # These are the character references that don't need a terminating semicolon
249 # min length: 2, max: 6, none are a prefix of any other.
250 legacy_char_refs = {
251         Aacute: 'Á', aacute: 'á', Acirc: 'Â', acirc: 'â', acute: '´', AElig: 'Æ',
252         aelig: 'æ', Agrave: 'À', agrave: 'à', AMP: '&', amp: '&', Aring: 'Å',
253         aring: 'å', Atilde: 'Ã', atilde: 'ã', Auml: 'Ä', auml: 'ä', brvbar: '¦',
254         Ccedil: 'Ç', ccedil: 'ç', cedil: '¸', cent: '¢', COPY: '©', copy: '©',
255         curren: '¤', deg: '°', divide: '÷', Eacute: 'É', eacute: 'é', Ecirc: 'Ê',
256         ecirc: 'ê', Egrave: 'È', egrave: 'è', ETH: 'Ð', eth: 'ð', Euml: 'Ë',
257         euml: 'ë', frac12: '½', frac14: '¼', frac34: '¾', GT: '>', gt: '>',
258         Iacute: 'Í', iacute: 'í', Icirc: 'Î', icirc: 'î', iexcl: '¡', Igrave: 'Ì',
259         igrave: 'ì', iquest: '¿', Iuml: 'Ï', iuml: 'ï', laquo: '«', LT: '<',
260         lt: '<', macr: '¯', micro: 'µ', middot: '·', nbsp: "\u00a0", not: '¬',
261         Ntilde: 'Ñ', ntilde: 'ñ', Oacute: 'Ó', oacute: 'ó', Ocirc: 'Ô', ocirc: 'ô',
262         Ograve: 'Ò', ograve: 'ò', ordf: 'ª', ordm: 'º', Oslash: 'Ø', oslash: 'ø',
263         Otilde: 'Õ', otilde: 'õ', Ouml: 'Ö', ouml: 'ö', para: '¶', plusmn: '±',
264         pound: '£', QUOT: '"', quot: '"', raquo: '»', REG: '®', reg: '®', sect: '§',
265         shy: '­', sup1: '¹', sup2: '²', sup3: '³', szlig: 'ß', THORN: 'Þ', thorn: 'þ',
266         times: '×', Uacute: 'Ú', uacute: 'ú', Ucirc: 'Û', ucirc: 'û', Ugrave: 'Ù',
267         ugrave: 'ù', uml: '¨', Uuml: 'Ü', uuml: 'ü', Yacute: 'Ý', yacute: 'ý',
268         yen: '¥', yuml: 'ÿ'
269 }
270
271 void_elements = ['area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input', 'keygen', 'link', 'meta', 'param', 'source', 'track', 'wbr']
272 raw_text_elements = ['script', 'style']
273 escapable_raw_text_elements = ['textarea', 'title']
274 # http://www.w3.org/TR/SVG/ 1.1 (Second Edition)
275 svg_elements = [
276         'a', 'altGlyph', 'altGlyphDef', 'altGlyphItem', 'animate', 'animateColor',
277         'animateMotion', 'animateTransform', 'circle', 'clipPath', 'color-profile',
278         'cursor', 'defs', 'desc', 'ellipse', 'feBlend', 'feColorMatrix',
279         'feComponentTransfer', 'feComposite', 'feConvolveMatrix',
280         'feDiffuseLighting', 'feDisplacementMap', 'feDistantLight', 'feFlood',
281         'feFuncA', 'feFuncB', 'feFuncG', 'feFuncR', 'feGaussianBlur', 'feImage',
282         'feMerge', 'feMergeNode', 'feMorphology', 'feOffset', 'fePointLight',
283         'feSpecularLighting', 'feSpotLight', 'feTile', 'feTurbulence', 'filter',
284         'font', 'font-face', 'font-face-format', 'font-face-name', 'font-face-src',
285         'font-face-uri', 'foreignObject', 'g', 'glyph', 'glyphRef', 'hkern',
286         'image', 'line', 'linearGradient', 'marker', 'mask', 'metadata',
287         'missing-glyph', 'mpath', 'path', 'pattern', 'polygon', 'polyline',
288         'radialGradient', 'rect', 'script', 'set', 'stop', 'style', 'svg',
289         'switch', 'symbol', 'text', 'textPath', 'title', 'tref', 'tspan', 'use',
290         'view', 'vkern'
291 ]
292
293 # http://www.w3.org/TR/MathML/ Version 3.0 2nd Edition
294 mathml_elements = [
295         'abs', 'and', 'annotation', 'annotation-xml', 'apply', 'approx', 'arccos',
296         'arccosh', 'arccot', 'arccoth', 'arccsc', 'arccsch', 'arcsec', 'arcsech',
297         'arcsin', 'arcsinh', 'arctan', 'arctanh', 'arg', 'bind', 'bvar', 'card',
298         'cartesianproduct', 'cbytes', 'ceiling', 'cerror', 'ci', 'cn', 'codomain',
299         'complexes', 'compose', 'condition', 'conjugate', 'cos', 'cosh', 'cot',
300         'coth', 'cs', 'csc', 'csch', 'csymbol', 'curl', 'declare', 'degree',
301         'determinant', 'diff', 'divergence', 'divide', 'domain',
302         'domainofapplication', 'emptyset', 'eq', 'equivalent', 'eulergamma',
303         'exists', 'exp', 'exponentiale', 'factorial', 'factorof', 'false', 'floor',
304         'fn', 'forall', 'gcd', 'geq', 'grad', 'gt', 'ident', 'image', 'imaginary',
305         'imaginaryi', 'implies', 'in', 'infinity', 'int', 'integers', 'intersect',
306         'interval', 'inverse', 'lambda', 'laplacian', 'lcm', 'leq', 'limit',
307         'list', 'ln', 'log', 'logbase', 'lowlimit', 'lt', 'maction', 'maligngroup',
308         'malignmark', 'math', 'matrix', 'matrixrow', 'max', 'mean', 'median',
309         'menclose', 'merror', 'mfenced', 'mfrac', 'mglyph', 'mi', 'mi', 'min',
310         'minus', 'mlabeledtr', 'mlongdiv', 'mmultiscripts', 'mn', 'mo', 'mode',
311         'moment', 'momentabout', 'mover', 'mpadded', 'mphantom', 'mprescripts',
312         'mroot', 'mrow', 'ms', 'mscarries', 'mscarry', 'msgroup', 'msline',
313         'mspace', 'msqrt', 'msrow', 'mstack', 'mstyle', 'msub', 'msubsup', 'msup',
314         'mtable', 'mtd', 'mtext', 'mtr', 'munder', 'munderover', 'naturalnumbers',
315         'neq', 'none', 'not', 'notanumber', 'notin', 'notprsubset', 'notsubset',
316         'or', 'otherwise', 'outerproduct', 'partialdiff', 'pi', 'piece',
317         'piecewise', 'plus', 'power', 'primes', 'product', 'prsubset', 'quotient',
318         'rationals', 'real', 'reals', 'reln', 'rem', 'root', 'scalarproduct',
319         'sdev', 'sec', 'sech', 'selector', 'semantics', 'sep', 'set', 'setdiff',
320         'share', 'sin', 'sinh', 'span', 'subset', 'sum', 'tan', 'tanh', 'tendsto',
321         'times', 'transpose', 'true', 'union', 'uplimit', 'variance', 'vector',
322         'vectorproduct', 'xor'
323 ]
324 # foreign_elements = [svg_elements..., mathml_elements...]
325 #normal_elements = All other allowed HTML elements are normal elements.
326
327 special_elements = {
328         # HTML:
329         address:NS_HTML, applet:NS_HTML, area:NS_HTML, article:NS_HTML,
330         aside:NS_HTML, base:NS_HTML, basefont:NS_HTML, bgsound:NS_HTML,
331         blockquote:NS_HTML, body:NS_HTML, br:NS_HTML, button:NS_HTML,
332         caption:NS_HTML, center:NS_HTML, col:NS_HTML, colgroup:NS_HTML, dd:NS_HTML,
333         details:NS_HTML, dir:NS_HTML, div:NS_HTML, dl:NS_HTML, dt:NS_HTML,
334         embed:NS_HTML, fieldset:NS_HTML, figcaption:NS_HTML, figure:NS_HTML,
335         footer:NS_HTML, form:NS_HTML, frame:NS_HTML, frameset:NS_HTML, h1:NS_HTML,
336         h2:NS_HTML, h3:NS_HTML, h4:NS_HTML, h5:NS_HTML, h6:NS_HTML, head:NS_HTML,
337         header:NS_HTML, hgroup:NS_HTML, hr:NS_HTML, html:NS_HTML, iframe:NS_HTML,
338         img:NS_HTML, input:NS_HTML, isindex:NS_HTML, li:NS_HTML, link:NS_HTML,
339         listing:NS_HTML, main:NS_HTML, marquee:NS_HTML, meta:NS_HTML, nav:NS_HTML,
340         noembed:NS_HTML, noframes:NS_HTML, noscript:NS_HTML, object:NS_HTML,
341         ol:NS_HTML, p:NS_HTML, param:NS_HTML, plaintext:NS_HTML, pre:NS_HTML,
342         script:NS_HTML, section:NS_HTML, select:NS_HTML, source:NS_HTML,
343         style:NS_HTML, summary:NS_HTML, table:NS_HTML, tbody:NS_HTML, td:NS_HTML,
344         template:NS_HTML, textarea:NS_HTML, tfoot:NS_HTML, th:NS_HTML,
345         thead:NS_HTML, title:NS_HTML, tr:NS_HTML, track:NS_HTML, ul:NS_HTML,
346         wbr:NS_HTML, xmp:NS_HTML,
347
348         # MathML:
349         mi:NS_MATHML, mo:NS_MATHML, mn:NS_MATHML, ms:NS_MATHML, mtext:NS_MATHML,
350         'annotation-xml':NS_MATHML,
351
352         # SVG:
353         foreignObject:NS_SVG, desc:NS_SVG, title:NS_SVG
354 }
355
356 formatting_elements = {
357          a: true, b: true, big: true, code: true, em: true, font: true, i: true,
358          nobr: true, s: true, small: true, strike: true, strong: true, tt: true,
359          u: true
360 }
361
362 mathml_text_integration = {
363         mi: NS_MATHML, mo: NS_MATHML, mn: NS_MATHML, ms: NS_MATHML, mtext: NS_MATHML
364 }
365 is_mathml_text_integration_point = (el) ->
366         return mathml_text_integration[el.name] is el.namespace
367 is_html_integration = (el) -> # DON'T PASS A TOKEN
368         if el.namespace is NS_MATHML
369                 if el.name is 'annotation-xml'
370                         if el.attrs.encoding?
371                                 if el.attrs.encoding.toLowerCase() is 'text/html'
372                                         return true
373                                 if el.attrs.encoding.toLowerCase() is 'application/xhtml+xml'
374                                         return true
375                 return false
376         if el.namespace is NS_SVG
377                 if el.name is 'foreignObject' or el.name is 'desc' or el.name is 'title'
378                         return true
379         return false
380
381 h_tags = {
382         h1:NS_HTML, h2:NS_HTML, h3:NS_HTML, h4:NS_HTML, h5:NS_HTML, h6:NS_HTML
383 }
384
385 foster_parenting_targets = {
386         table: NS_HTML
387         tbody: NS_HTML
388         tfoot: NS_HTML
389         thead: NS_HTML
390         tr: NS_HTML
391 }
392
393 end_tag_implied = {
394         dd: NS_HTML
395         dt: NS_HTML
396         li: NS_HTML
397         option: NS_HTML
398         optgroup: NS_HTML
399         p: NS_HTML
400         rb: NS_HTML
401         rp: NS_HTML
402         rt: NS_HTML
403         rtc: NS_HTML
404 }
405
406 el_is_special = (e) ->
407         return special_elements[e.name] is e.namespace
408
409 adp_els = { address: NS_HTML, div: NS_HTML, p: NS_HTML }
410 el_is_special_not_adp = (el) ->
411         return special_elements[el.name] is el.namespace and adp_els[el.name] isnt el.namespace
412
413 svg_name_fixes = {
414         altglyph: 'altGlyph'
415         altglyphdef: 'altGlyphDef'
416         altglyphitem: 'altGlyphItem'
417         animatecolor: 'animateColor'
418         animatemotion: 'animateMotion'
419         animatetransform: 'animateTransform'
420         clippath: 'clipPath'
421         feblend: 'feBlend'
422         fecolormatrix: 'feColorMatrix'
423         fecomponenttransfer: 'feComponentTransfer'
424         fecomposite: 'feComposite'
425         feconvolvematrix: 'feConvolveMatrix'
426         fediffuselighting: 'feDiffuseLighting'
427         fedisplacementmap: 'feDisplacementMap'
428         fedistantlight: 'feDistantLight'
429         fedropshadow: 'feDropShadow'
430         feflood: 'feFlood'
431         fefunca: 'feFuncA'
432         fefuncb: 'feFuncB'
433         fefuncg: 'feFuncG'
434         fefuncr: 'feFuncR'
435         fegaussianblur: 'feGaussianBlur'
436         feimage: 'feImage'
437         femerge: 'feMerge'
438         femergenode: 'feMergeNode'
439         femorphology: 'feMorphology'
440         feoffset: 'feOffset'
441         fepointlight: 'fePointLight'
442         fespecularlighting: 'feSpecularLighting'
443         fespotlight: 'feSpotLight'
444         fetile: 'feTile'
445         feturbulence: 'feTurbulence'
446         foreignobject: 'foreignObject'
447         glyphref: 'glyphRef'
448         lineargradient: 'linearGradient'
449         radialgradient: 'radialGradient'
450         textpath: 'textPath'
451 }
452 svg_attribute_fixes = {
453         attributename: 'attributeName'
454         attributetype: 'attributeType'
455         basefrequency: 'baseFrequency'
456         baseprofile: 'baseProfile'
457         calcmode: 'calcMode'
458         clippathunits: 'clipPathUnits'
459         contentscripttype: 'contentScriptType'
460         contentstyletype: 'contentStyleType'
461         diffuseconstant: 'diffuseConstant'
462         edgemode: 'edgeMode'
463         externalresourcesrequired: 'externalResourcesRequired'
464         filterres: 'filterRes'
465         filterunits: 'filterUnits'
466         glyphref: 'glyphRef'
467         gradienttransform: 'gradientTransform'
468         gradientunits: 'gradientUnits'
469         kernelmatrix: 'kernelMatrix'
470         kernelunitlength: 'kernelUnitLength'
471         keypoints: 'keyPoints'
472         keysplines: 'keySplines'
473         keytimes: 'keyTimes'
474         lengthadjust: 'lengthAdjust'
475         limitingconeangle: 'limitingConeAngle'
476         markerheight: 'markerHeight'
477         markerunits: 'markerUnits'
478         markerwidth: 'markerWidth'
479         maskcontentunits: 'maskContentUnits'
480         maskunits: 'maskUnits'
481         numoctaves: 'numOctaves'
482         pathlength: 'pathLength'
483         patterncontentunits: 'patternContentUnits'
484         patterntransform: 'patternTransform'
485         patternunits: 'patternUnits'
486         pointsatx: 'pointsAtX'
487         pointsaty: 'pointsAtY'
488         pointsatz: 'pointsAtZ'
489         preservealpha: 'preserveAlpha'
490         preserveaspectratio: 'preserveAspectRatio'
491         primitiveunits: 'primitiveUnits'
492         refx: 'refX'
493         refy: 'refY'
494         repeatcount: 'repeatCount'
495         repeatdur: 'repeatDur'
496         requiredextensions: 'requiredExtensions'
497         requiredfeatures: 'requiredFeatures'
498         specularconstant: 'specularConstant'
499         specularexponent: 'specularExponent'
500         spreadmethod: 'spreadMethod'
501         startoffset: 'startOffset'
502         stddeviation: 'stdDeviation'
503         stitchtiles: 'stitchTiles'
504         surfacescale: 'surfaceScale'
505         systemlanguage: 'systemLanguage'
506         tablevalues: 'tableValues'
507         targetx: 'targetX'
508         targety: 'targetY'
509         textlength: 'textLength'
510         viewbox: 'viewBox'
511         viewtarget: 'viewTarget'
512         xchannelselector: 'xChannelSelector'
513         ychannelselector: 'yChannelSelector'
514         zoomandpan: 'zoomAndPan'
515 }
516 adjust_mathml_attributes = (t) ->
517         for a in t.attrs_a
518                 if a[0] is 'definitionurl'
519                         a[0] = 'definitionURL'
520         return
521 adjust_svg_attributes = (t) ->
522         for a in t.attrs_a
523                 if svg_attribute_fixes[a[0]]?
524                         a[0] = svg_attribute_fixes[a[0]]
525         return
526 adjust_foreign_attributes = (t) ->
527         # fixfull
528         return
529
530 # decode_named_char_ref()
531 #
532 # The list of named character references is _huge_ so ask the browser to decode
533 # for us instead of wasting bandwidth/space on including the table here.
534 #
535 # Pass without the "&" but with the ";" examples:
536 #    for "&amp" pass "amp;"
537 #    for "&#x2032" pass "x2032;"
538 g_dncr = {
539         cache: {}
540         textarea: document.createElement('textarea')
541 }
542 # TODO test this in IE8
543 decode_named_char_ref = (txt) ->
544         txt = "&#{txt}"
545         decoded = g_dncr.cache[txt]
546         return decoded if decoded?
547         g_dncr.textarea.innerHTML = txt
548         decoded = g_dncr.textarea.value
549         return null if decoded is txt
550         return g_dncr.cache[txt] = decoded
551
552 parse_html = (args) ->
553         txt = null
554         cur = null # index of next char in txt to be parsed
555         # declare doc and tokenizer variables so they're in scope below
556         doc = null
557         open_els = null # stack of open elements
558         afe = null # active formatting elements
559         template_ins_modes = null
560         ins_mode = null
561         original_ins_mode = null
562         tok_state = null
563         tok_cur_tag = null # partially parsed tag
564         flag_scripting = null
565         flag_frameset_ok = null
566         flag_parsing = null
567         flag_foster_parenting = null
568         form_element_pointer = null
569         temporary_buffer = null
570         pending_table_character_tokens = null
571         head_element_pointer = null
572         flag_fragment_parsing = null
573         context_element = null
574
575         stop_parsing = ->
576                 flag_parsing = false
577
578         parse_error = ->
579                 if args.error_cb?
580                         args.error_cb cur
581                 else
582                         console.log "Parse error at character #{cur} of #{txt.length}"
583
584         afe_push = (new_el) ->
585                 matches = 0
586                 for el, i in afe
587                         if el.name is new_el.name and el.namespace is new_el.namespace
588                                 for k, v of el.attrs
589                                         continue unless new_el.attrs[k] is v
590                                 for k, v of new_el.attrs
591                                         continue unless el.attrs[k] is v
592                                 matches += 1
593                                 if matches is 3
594                                         afe.splice i, 1
595                                         break
596                 afe.unshift new_el
597         afe_push_marker = ->
598                 afe.unshift new_afe_marker()
599
600         # the functions below impliment the Tree Contstruction algorithm
601         # http://www.w3.org/TR/html5/syntax.html#tree-construction
602
603         # But first... the helpers
604         template_tag_is_open = ->
605                 for t in open_els
606                         if t.name is 'template' and t.namespace is NS_HTML
607                                 return true
608                 return false
609         is_in_scope_x = (tag_name, scope, namespace) ->
610                 for t in open_els
611                         if t.name is tag_name and (namespace is null or namespace is t.namespace)
612                                 return true
613                         if scope[t.name] is t.namespace
614                                 return false
615                 return false
616         is_in_scope_x_y = (tag_name, scope, scope2, namespace) ->
617                 for t in open_els
618                         if t.name is tag_name and (namespace is null or namespace is t.namespace)
619                                 return true
620                         if scope[t.name] is t.namespace
621                                 return false
622                         if scope2[t.name] is t.namespace
623                                 return false
624                 return false
625         standard_scopers = {
626                 applet: NS_HTML, caption: NS_HTML, html: NS_HTML, table: NS_HTML,
627                 td: NS_HTML, th: NS_HTML, marquee: NS_HTML, object: NS_HTML,
628                 template: NS_HTML, mi: NS_MATHML,
629
630                 mo: NS_MATHML, mn: NS_MATHML, ms: NS_MATHML, mtext: NS_MATHML,
631                 'annotation-xml': NS_MATHML,
632
633                 foreignObject: NS_SVG, desc: NS_SVG, title: NS_SVG
634         }
635         button_scopers = button: NS_HTML
636         li_scopers = ol: NS_HTML, ul: NS_HTML
637         table_scopers = html: NS_HTML, table: NS_HTML, template: NS_HTML
638         is_in_scope = (tag_name, namespace = null) ->
639                 return is_in_scope_x tag_name, standard_scopers, namespace
640         is_in_button_scope = (tag_name, namespace = null) ->
641                 return is_in_scope_x_y tag_name, standard_scopers, button_scopers, namespace
642         is_in_table_scope = (tag_name, namespace = null) ->
643                 return is_in_scope_x tag_name, table_scopers, namespace
644         # aka is_in_list_item_scope
645         is_in_li_scope = (tag_name, namespace = null) ->
646                 return is_in_scope_x_y tag_name, standard_scopers, li_scopers, namespace
647         is_in_select_scope = (tag_name, namespace = null) ->
648                 for t in open_els
649                         if t.name is tag_name and (namespace is null or namespace is t.namespace)
650                                 return true
651                         if t.ns isnt NS_HTML and t.name isnt 'optgroup' and t.name isnt 'option'
652                                 return false
653                 return false
654         # this checks for a particular element, not by name
655         # this requires a namespace match
656         el_is_in_scope = (needle) ->
657                 for el in open_els
658                         if el is needle
659                                 return true
660                         if standard_scopers[el.name] is el.namespace
661                                 return false
662                 return false
663
664         clear_to_table_stopers = {
665                 'table': true
666                 'template': true
667                 'html': true
668         }
669         clear_stack_to_table_context = ->
670                 loop
671                         if clear_to_table_stopers[open_els[0].name]?
672                                 break
673                         open_els.shift()
674                 return
675         clear_to_table_body_stopers = {
676                 tbody: NS_HTML
677                 tfoot: NS_HTML
678                 thead: NS_HTML
679                 template: NS_HTML
680                 html: NS_HTML
681         }
682         clear_stack_to_table_body_context = ->
683                 loop
684                         if clear_to_table_body_stopers[open_els[0].name] is open_els[0].namespace
685                                 break
686                         open_els.shift()
687                 return
688         clear_to_table_row_stopers = {
689                 'tr': true
690                 'template': true
691                 'html': true
692         }
693         clear_stack_to_table_row_context = ->
694                 loop
695                         if clear_to_table_row_stopers[open_els[0].name]?
696                                 break
697                         open_els.shift()
698                 return
699         clear_afe_to_marker = ->
700                 loop
701                         return unless afe.length > 0 # this happens in fragment case, ?spec error
702                         el = afe.shift()
703                         if el.type is TYPE_AFE_MARKER
704                                 return
705                 return
706
707         # 8.2.3.1 ...
708         # http://www.w3.org/TR/html5/syntax.html#reset-the-insertion-mode-appropriately
709         reset_ins_mode = ->
710                 # 1. Let last be false.
711                 last = false
712                 # 2. Let node be the last node in the stack of open elements.
713                 node_i = 0
714                 node = open_els[node_i]
715                 # 3. Loop: If node is the first node in the stack of open elements,
716                 # then set last to true, and, if the parser was originally created as
717                 # part of the HTML fragment parsing algorithm (fragment case) set node
718                 # to the context element.
719                 loop
720                         if node_i is open_els.length - 1
721                                 last = true
722                                 # fixfull (fragment case)
723
724                         # 4. If node is a select element, run these substeps:
725                         if node.name is 'select' and node.namespace is NS_HTML
726                                 # 1. If last is true, jump to the step below labeled done.
727                                 unless last
728                                         # 2. Let ancestor be node.
729                                         ancestor_i = node_i
730                                         ancestor = node
731                                         # 3. Loop: If ancestor is the first node in the stack of
732                                         # open elements, jump to the step below labeled done.
733                                         loop
734                                                 if ancestor_i is open_els.length - 1
735                                                         break
736                                                 # 4. Let ancestor be the node before ancestor in the stack
737                                                 # of open elements.
738                                                 ancestor_i += 1
739                                                 ancestor = open_els[ancestor_i]
740                                                 # 5. If ancestor is a template node, jump to the step below
741                                                 # labeled done.
742                                                 if ancestor.name is 'template' and ancestor.namespace is NS_HTML
743                                                         break
744                                                 # 6. If ancestor is a table node, switch the insertion mode
745                                                 # to "in select in table" and abort these steps.
746                                                 if ancestor.name is 'table' and ancestor.namespace is NS_HTML
747                                                         ins_mode = ins_mode_in_select_in_table
748                                                         return
749                                                 # 7. Jump back to the step labeled loop.
750                                 # 8. Done: Switch the insertion mode to "in select" and abort
751                                 # these steps.
752                                 ins_mode = ins_mode_in_select
753                                 return
754                         # 5. If node is a td or th element and last is false, then switch
755                         # the insertion mode to "in cell" and abort these steps.
756                         if (node.name is 'td' or node.name is 'th') and node.namespace is NS_HTML and last is false
757                                 ins_mode = ins_mode_in_cell
758                                 return
759                         # 6. If node is a tr element, then switch the insertion mode to "in
760                         # row" and abort these steps.
761                         if node.name is 'tr' and node.namespace is NS_HTML
762                                 ins_mode = ins_mode_in_row
763                                 return
764                         # 7. If node is a tbody, thead, or tfoot element, then switch the
765                         # insertion mode to "in table body" and abort these steps.
766                         if (node.name is 'tbody' or node.name is 'thead' or node.name is 'tfoot') and node.namespace is NS_HTML
767                                 ins_mode = ins_mode_in_table_body
768                                 return
769                         # 8. If node is a caption element, then switch the insertion mode
770                         # to "in caption" and abort these steps.
771                         if node.name is 'caption' and node.namespace is NS_HTML
772                                 ins_mode = ins_mode_in_caption
773                                 return
774                         # 9. If node is a colgroup element, then switch the insertion mode
775                         # to "in column group" and abort these steps.
776                         if node.name is 'colgroup' and node.namespace is NS_HTML
777                                 ins_mode = ins_mode_in_column_group
778                                 return
779                         # 10. If node is a table element, then switch the insertion mode to
780                         # "in table" and abort these steps.
781                         if node.name is 'table' and node.namespace is NS_HTML
782                                 ins_mode = ins_mode_in_table
783                                 return
784                         # 11. If node is a template element, then switch the insertion mode
785                         # to the current template insertion mode and abort these steps.
786                         if node.name is 'template' and node.namespace is NS_HTML
787                                 ins_mode = template_ins_modes[0]
788                                 return
789                         # 12. If node is a head element and last is true, then switch the
790                         # insertion mode to "in body" ("in body"! not "in head"!) and abort
791                         # these steps. (fragment case)
792                         if node.name is 'head' and node.namespace is NS_HTML and last
793                                 ins_mode = ins_mode_in_body
794                                 return
795                         # 13. If node is a head element and last is false, then switch the
796                         # insertion mode to "in head" and abort these steps.
797                         if node.name is 'head' and node.namespace is NS_HTML and last is false
798                                 ins_mode = ins_mode_in_head
799                                 return
800                         # 14. If node is a body element, then switch the insertion mode to
801                         # "in body" and abort these steps.
802                         if node.name is 'body' and node.namespace is NS_HTML
803                                 ins_mode = ins_mode_in_body
804                                 return
805                         # 15. If node is a frameset element, then switch the insertion mode
806                         # to "in frameset" and abort these steps. (fragment case)
807                         if node.name is 'frameset' and node.namespace is NS_HTML
808                                 ins_mode = ins_mode_in_frameset
809                                 return
810                         # 16. If node is an html element, run these substeps:
811                         if node.name is 'html' and node.namespace is NS_HTML
812                                 # 1. If the head element pointer is null, switch the insertion
813                                 # mode to "before head" and abort these steps. (fragment case)
814                                 if head_element_pointer is null
815                                         ins_mode = ins_mode_before_head
816                                 else
817                                         # 2. Otherwise, the head element pointer is not null,
818                                         # switch the insertion mode to "after head" and abort these
819                                         # steps.
820                                         ins_mode = ins_mode_after_head
821                                 return
822                         # 17. If last is true, then switch the insertion mode to "in body"
823                         # and abort these steps. (fragment case)
824                         if last
825                                 ins_mode = ins_mode_in_body
826                                 return
827                         # 18. Let node now be the node before node in the stack of open
828                         # elements.
829                         node_i += 1
830                         node = open_els[node_i]
831                         # 19. Return to the step labeled loop.
832
833         # 8.2.3.2
834
835         # http://www.w3.org/TR/html5/syntax.html#adjusted-current-node
836         adjusted_current_node = ->
837                 if open_els.length is 1 and flag_fragment_parsing
838                         return context_element
839                 return open_els[0]
840
841         # http://www.w3.org/TR/html5/syntax.html#reconstruct-the-active-formatting-elements
842         # this implementation is structured (mostly) as described at the link above.
843         # capitalized comments are the "labels" described at the link above.
844         reconstruct_afe = ->
845                 return if afe.length is 0
846                 if afe[0].type is TYPE_AFE_MARKER or afe[0] in open_els
847                         return
848                 # Rewind
849                 i = 0
850                 loop
851                         if i is afe.length - 1
852                                 break
853                         i += 1
854                         if afe[i].type is TYPE_AFE_MARKER or afe[i] in open_els
855                                 i -= 1 # Advance
856                                 break
857                 # Create
858                 loop
859                         el = insert_html_element afe[i].token
860                         afe[i] = el
861                         break if i is 0
862                         i -= 1 # Advance
863
864         # http://www.w3.org/TR/html5/syntax.html#adoption-agency-algorithm
865         # adoption agency algorithm
866         # overview here:
867         #   http://www.w3.org/TR/html5/syntax.html#misnested-tags:-b-i-/b-/i
868         #   http://www.w3.org/TR/html5/syntax.html#misnested-tags:-b-p-/b-/p
869         #   http://www.w3.org/TR/html5/syntax.html#unclosed-formatting-elements
870         adoption_agency = (subject) ->
871                 debug_log "adoption_agency()"
872                 debug_log "tree: #{serialize_els doc.children, false, true}"
873                 debug_log "open_els: #{serialize_els open_els, true, true}"
874                 debug_log "afe: #{serialize_els afe, true, true}"
875                 if open_els[0].name is subject and open_els[0].namespace is NS_HTML
876                         el = open_els[0]
877                         open_els.shift()
878                         # remove it from the list of active formatting elements (if found)
879                         for t, i in afe
880                                 if t is el
881                                         afe.splice i, 1
882                                         break
883                         debug_log "aaa: starting off with subject on top of stack, exiting"
884                         return
885                 outer = 0
886                 loop
887                         if outer >= 8
888                                 return
889                         outer += 1
890                         # 5. Let formatting element be the last element in the list of
891                         # active formatting elements that: is between the end of the list
892                         # and the last scope marker in the list, if any, or the start of
893                         # the list otherwise, and  has the tag name subject.
894                         fe = null
895                         for t, fe_of_afe in afe
896                                 if t.type is TYPE_AFE_MARKER
897                                         break
898                                 if t.name is subject
899                                         fe = t
900                                         break
901                         # If there is no such element, then abort these steps and instead
902                         # act as described in the "any other end tag" entry above.
903                         if fe is null
904                                 debug_log "aaa: fe not found in afe"
905                                 in_body_any_other_end_tag subject
906                                 return
907                         # 6. If formatting element is not in the stack of open elements,
908                         # then this is a parse error; remove the element from the list, and
909                         # abort these steps.
910                         in_open_els = false
911                         for t, fe_of_open_els in open_els
912                                 if t is fe
913                                         in_open_els = true
914                                         break
915                         unless in_open_els
916                                 debug_log "aaa: fe not found in open_els"
917                                 parse_error()
918                                 # "remove it from the list" must mean afe, since it's not in open_els
919                                 afe.splice fe_of_afe, 1
920                                 return
921                         # 7. If formatting element is in the stack of open elements, but
922                         # the element is not in scope, then this is a parse error; abort
923                         # these steps.
924                         unless el_is_in_scope fe
925                                 debug_log "aaa: fe not in scope"
926                                 parse_error()
927                                 return
928                         # 8. If formatting element is not the current node, this is a parse
929                         # error. (But do not abort these steps.)
930                         unless open_els[0] is fe
931                                 parse_error()
932                                 # continue
933                         # 9. Let furthest block be the topmost node in the stack of open
934                         # elements that is lower in the stack than formatting element, and
935                         # is an element in the special category. There might not be one.
936                         fb = null
937                         fb_of_open_els = null
938                         for t, i in open_els
939                                 if t is fe
940                                         break
941                                 if el_is_special t
942                                         fb = t
943                                         fb_of_open_els = i
944                                         # and continue, to see if there's one that's more "topmost"
945                         # 10. If there is no furthest block, then the UA must first pop all
946                         # the nodes from the bottom of the stack of open elements, from the
947                         # current node up to and including formatting element, then remove
948                         # formatting element from the list of active formatting elements,
949                         # and finally abort these steps.
950                         if fb is null
951                                 debug_log "aaa: no fb"
952                                 loop
953                                         t = open_els.shift()
954                                         if t is fe
955                                                 afe.splice fe_of_afe, 1
956                                                 return
957                         # 11. Let common ancestor be the element immediately above
958                         # formatting element in the stack of open elements.
959                         ca = open_els[fe_of_open_els + 1] # common ancestor
960
961                         node_above = open_els[fb_of_open_els + 1] # next node if node isn't in open_els anymore
962                         # 12. Let a bookmark note the position of formatting element in the list of active formatting elements relative to the elements on either side of it in the list.
963                         bookmark = new_aaa_bookmark()
964                         for t, i in afe
965                                 if t is fe
966                                         afe.splice i, 0, bookmark
967                                         break
968                         node = last_node = fb
969                         inner = 0
970                         loop
971                                 inner += 1
972                                 # 3. Let node be the element immediately above node in the
973                                 # stack of open elements, or if node is no longer in the stack
974                                 # of open elements (e.g. because it got removed by this
975                                 # algorithm), the element that was immediately above node in
976                                 # the stack of open elements before node was removed.
977                                 node_next = null
978                                 for t, i in open_els
979                                         if t is node
980                                                 node_next = open_els[i + 1]
981                                                 break
982                                 node = node_next ? node_above
983                                 debug_log "inner loop #{inner}"
984                                 debug_log "tree: #{serialize_els doc.children, false, true}"
985                                 debug_log "open_els: #{serialize_els open_els, true, true}"
986                                 debug_log "afe: #{serialize_els afe, true, true}"
987                                 debug_log "ca: #{ca.name}##{ca.id} children: #{serialize_els ca.children, true, true}"
988                                 debug_log "fe: #{fe.name}##{fe.id} children: #{serialize_els fe.children, true, true}"
989                                 debug_log "fb: #{fb.name}##{fb.id} children: #{serialize_els fb.children, true, true}"
990                                 debug_log "node: #{node.serialize true, true}"
991                                 # TODO make sure node_above gets re-set if/when node is removed from open_els
992
993                                 # 4. If node is formatting element, then go to the next step in
994                                 # the overall algorithm.
995                                 if node is fe
996                                         break
997                                 debug_log "the meat"
998                                 # 5. If inner loop counter is greater than three and node is in
999                                 # the list of active formatting elements, then remove node from
1000                                 # the list of active formatting elements.
1001                                 node_in_afe = false
1002                                 for t, i in afe
1003                                         if t is node
1004                                                 if inner > 3
1005                                                         afe.splice i, 1
1006                                                         debug_log "max out inner"
1007                                                 else
1008                                                         node_in_afe = true
1009                                                         debug_log "in afe"
1010                                                 break
1011                                 # 6. If node is not in the list of active formatting elements,
1012                                 # then remove node from the stack of open elements and then go
1013                                 # back to the step labeled inner loop.
1014                                 unless node_in_afe
1015                                         debug_log "not in afe"
1016                                         for t, i in open_els
1017                                                 if t is node
1018                                                         node_above = open_els[i + 1]
1019                                                         open_els.splice i, 1
1020                                                         break
1021                                         continue
1022                                 debug_log "the bones"
1023                                 # 7. create an element for the token for which the element node
1024                                 # was created, in the HTML namespace, with common ancestor as
1025                                 # the intended parent; replace the entry for node in the list
1026                                 # of active formatting elements with an entry for the new
1027                                 # element, replace the entry for node in the stack of open
1028                                 # elements with an entry for the new element, and let node be
1029                                 # the new element.
1030                                 new_node = token_to_element node.token, NS_HTML, ca
1031                                 for t, i in afe
1032                                         if t is node
1033                                                 afe[i] = new_node
1034                                                 debug_log "replaced in afe"
1035                                                 break
1036                                 for t, i in open_els
1037                                         if t is node
1038                                                 node_above = open_els[i + 1]
1039                                                 open_els[i] = new_node
1040                                                 debug_log "replaced in open_els"
1041                                                 break
1042                                 node = new_node
1043                                 # 8. If last node is furthest block, then move the
1044                                 # aforementioned bookmark to be immediately after the new node
1045                                 # in the list of active formatting elements.
1046                                 if last_node is fb
1047                                         for t, i in afe
1048                                                 if t is bookmark
1049                                                         afe.splice i, 1
1050                                                         debug_log "removed bookmark"
1051                                                         break
1052                                         for t, i in afe
1053                                                 if t is node
1054                                                         # "after" means lower
1055                                                         afe.splice i, 0, bookmark # "after as <-
1056                                                         debug_log "placed bookmark after node"
1057                                                         debug_log "node: #{node.id} afe: #{serialize_els afe, true, true}"
1058                                                         break
1059                                 # 9. Insert last node into node, first removing it from its
1060                                 # previous parent node if any.
1061                                 if last_node.parent?
1062                                         debug_log "last_node has parent"
1063                                         for c, i in last_node.parent.children
1064                                                 if c is last_node
1065                                                         debug_log "removing last_node from parent"
1066                                                         last_node.parent.children.splice i, 1
1067                                                         break
1068                                 node.children.push last_node
1069                                 last_node.parent = node
1070                                 # 10. Let last node be node.
1071                                 last_node = node
1072                                 debug_log "at last"
1073                                 # 11. Return to the step labeled inner loop.
1074                         # 14. Insert whatever last node ended up being in the previous step
1075                         # at the appropriate place for inserting a node, but using common
1076                         # ancestor as the override target.
1077
1078                         # In the case where fe is immediately followed by fb:
1079                         #   * inner loop exits out early (node==fe)
1080                         #   * last_node is fb
1081                         #   * last_node is still in the tree (not a duplicate)
1082                         if last_node.parent?
1083                                 debug_log "FEFIRST? last_node has parent"
1084                                 for c, i in last_node.parent.children
1085                                         if c is last_node
1086                                                 debug_log "removing last_node from parent"
1087                                                 last_node.parent.children.splice i, 1
1088                                                 break
1089
1090                         debug_log "after aaa inner loop"
1091                         debug_log "ca: #{ca.name}##{ca.id} children: #{serialize_els ca.children, true, true}"
1092                         debug_log "fe: #{fe.name}##{fe.id} children: #{serialize_els fe.children, true, true}"
1093                         debug_log "fb: #{fb.name}##{fb.id} children: #{serialize_els fb.children, true, true}"
1094                         debug_log "last_node: #{last_node.name}##{last_node.id} children: #{serialize_els last_node.children, true, true}"
1095                         debug_log "tree: #{serialize_els doc.children, false, true}"
1096
1097                         debug_log "insert"
1098
1099
1100                         # can't use standard insert token thing, because it's already in
1101                         # open_els and must stay at it's current position in open_els
1102                         dest = adjusted_insertion_location ca
1103                         dest[0].children.splice dest[1], 0, last_node
1104                         last_node.parent = dest[0]
1105
1106
1107                         debug_log "ca: #{ca.name}##{ca.id} children: #{serialize_els ca.children, true, true}"
1108                         debug_log "fe: #{fe.name}##{fe.id} children: #{serialize_els fe.children, true, true}"
1109                         debug_log "fb: #{fb.name}##{fb.id} children: #{serialize_els fb.children, true, true}"
1110                         debug_log "last_node: #{last_node.name}##{last_node.id} children: #{serialize_els last_node.children, true, true}"
1111                         debug_log "tree: #{serialize_els doc.children, false, true}"
1112
1113                         # 15. Create an element for the token for which formatting element
1114                         # was created, in the HTML namespace, with furthest block as the
1115                         # intended parent.
1116                         new_element = token_to_element fe.token, NS_HTML, fb
1117                         # 16. Take all of the child nodes of furthest block and append them
1118                         # to the element created in the last step.
1119                         while fb.children.length
1120                                 t = fb.children.shift()
1121                                 t.parent = new_element
1122                                 new_element.children.push t
1123                         # 17. Append that new element to furthest block.
1124                         new_element.parent = fb
1125                         fb.children.push new_element
1126                         # 18. Remove formatting element from the list of active formatting
1127                         # elements, and insert the new element into the list of active
1128                         # formatting elements at the position of the aforementioned
1129                         # bookmark.
1130                         for t, i in afe
1131                                 if t is fe
1132                                         afe.splice i, 1
1133                                         break
1134                         for t, i in afe
1135                                 if t is bookmark
1136                                         afe[i] = new_element
1137                                         break
1138                         # 19. Remove formatting element from the stack of open elements,
1139                         # and insert the new element into the stack of open elements
1140                         # immediately below the position of furthest block in that stack.
1141                         for t, i in open_els
1142                                 if t is fe
1143                                         open_els.splice i, 1
1144                                         break
1145                         for t, i in open_els
1146                                 if t is fb
1147                                         open_els.splice i, 0, new_element
1148                                         break
1149                         # 20. Jump back to the step labeled outer loop.
1150                         debug_log "done wrapping fb's children. new_element: #{new_element.name}##{new_element.id}"
1151                         debug_log "tree: #{serialize_els doc.children, false, true}"
1152                         debug_log "open_els: #{serialize_els open_els, true, true}"
1153                         debug_log "afe: #{serialize_els afe, true, true}"
1154                 debug_log "AAA DONE"
1155
1156         # http://www.w3.org/TR/html5/syntax.html#close-a-p-element
1157         close_p_element = ->
1158                 generate_implied_end_tags 'p' # arg is exception
1159                 unless open_els[0].name is 'p' and open_els[0].namespace is NS_HTML
1160                         parse_error()
1161                 while open_els.length > 1 # just in case
1162                         el = open_els.shift()
1163                         if el.name is 'p' and el.namespace is NS_HTML
1164                                 return
1165         close_p_if_in_button_scope = ->
1166                 if is_in_button_scope 'p', NS_HTML
1167                         close_p_element()
1168
1169         # http://www.w3.org/TR/html5/syntax.html#insert-a-character
1170         # aka insert_a_character = (t) ->
1171         insert_character = (t) ->
1172                 dest = adjusted_insertion_location()
1173                 # fixfull check for Document node
1174                 if dest[1] > 0
1175                         prev = dest[0].children[dest[1] - 1]
1176                         if prev.type is TYPE_TEXT
1177                                 prev.text += t.text
1178                                 return
1179                 dest[0].children.splice dest[1], 0, t
1180
1181
1182         # 8.2.5 http://www.w3.org/TR/html5/syntax.html#tree-construction
1183         process_token = (t) ->
1184                 acn = adjusted_current_node()
1185                 unless acn?
1186                         ins_mode t
1187                         return
1188                 if acn.namespace is NS_HTML
1189                         ins_mode t
1190                         return
1191                 if is_mathml_text_integration_point(acn)
1192                         if t.type is TYPE_START_TAG and (t.name is 'mglyph' or t.name is 'malignmark')
1193                                 ins_mode t
1194                                 return
1195                         if t.type is TYPE_TEXT
1196                                 ins_mode t
1197                                 return
1198                 if acn.namespace is NS_MATHML and acn.name is 'annotation-xml' and t.type is TYPE_START_TAG and t.name is 'svg'
1199                         ins_mode t
1200                         return
1201                 if is_html_integration acn
1202                         if t.type is TYPE_START_TAG or t.type is TYPE_TEXT
1203                                 ins_mode t
1204                                 return
1205                 if t.type is TYPE_EOF
1206                         ins_mode t
1207                         return
1208                 in_foreign_content t
1209                 return
1210
1211         # 8.2.5.1
1212         # http://www.w3.org/TR/html5/syntax.html#creating-and-inserting-nodes
1213         # http://www.w3.org/TR/html5/syntax.html#appropriate-place-for-inserting-a-node
1214         adjusted_insertion_location = (override_target = null) ->
1215                 # 1. If there was an override target specified, then let target be the
1216                 # override target.
1217                 if override_target?
1218                         target = override_target
1219                 else # Otherwise, let target be the current node.
1220                         target = open_els[0]
1221                 # 2. Determine the adjusted insertion location using the first matching
1222                 # steps from the following list:
1223                 #
1224                 # If foster parenting is enabled and target is a table, tbody, tfoot,
1225                 # thead, or tr element Foster parenting happens when content is
1226                 # misnested in tables.
1227                 if flag_foster_parenting and foster_parenting_targets[target.name] is target.namespace
1228                         loop # once. this is here so we can ``break`` to "abort these substeps"
1229                                 # 1. Let last template be the last template element in the
1230                                 # stack of open elements, if any.
1231                                 last_template = null
1232                                 last_template_i = null
1233                                 for el, i in open_els
1234                                         if el.name is 'template' and el.namespace is NS_HTML
1235                                                 last_template = el
1236                                                 last_template_i = i
1237                                                 break
1238                                 # 2. Let last table be the last table element in the stack of
1239                                 # open elements, if any.
1240                                 last_table = null
1241                                 last_table_i
1242                                 for el, i in open_els
1243                                         if el.name is 'table' and el.namespace is NS_HTML
1244                                                 last_table = el
1245                                                 last_table_i = i
1246                                                 break
1247                                 # 3. If there is a last template and either there is no last
1248                                 # table, or there is one, but last template is lower (more
1249                                 # recently added) than last table in the stack of open
1250                                 # elements, then: let adjusted insertion location be inside
1251                                 # last template's template contents, after its last child (if
1252                                 # any), and abort these substeps.
1253                                 if last_template and (last_table is null or last_template_i < last_table_i)
1254                                         target = last_template # fixfull should be it's contents
1255                                         target_i = target.children.length
1256                                         break
1257                                 # 4. If there is no last table, then let adjusted insertion
1258                                 # location be inside the first element in the stack of open
1259                                 # elements (the html element), after its last child (if any),
1260                                 # and abort these substeps. (fragment case)
1261                                 if last_table is null
1262                                         # this is odd
1263                                         target = open_els[open_els.length - 1]
1264                                         target_i = target.children.length
1265                                         break
1266                                 # 5. If last table has a parent element, then let adjusted
1267                                 # insertion location be inside last table's parent element,
1268                                 # immediately before last table, and abort these substeps.
1269                                 if last_table.parent?
1270                                         for c, i in last_table.parent.children
1271                                                 if c is last_table
1272                                                         target = last_table.parent
1273                                                         target_i = i
1274                                                         break
1275                                         break
1276                                 # 6. Let previous element be the element immediately above last
1277                                 # table in the stack of open elements.
1278                                 #
1279                                 # huh? how could it not have a parent?
1280                                 previous_element = open_els[last_table_i + 1]
1281                                 # 7. Let adjusted insertion location be inside previous
1282                                 # element, after its last child (if any).
1283                                 target = previous_element
1284                                 target_i = target.children.length
1285                                 # Note: These steps are involved in part because it's possible
1286                                 # for elements, the table element in this case in particular,
1287                                 # to have been moved by a script around in the DOM, or indeed
1288                                 # removed from the DOM entirely, after the element was inserted
1289                                 # by the parser.
1290                                 break # don't really loop
1291                 else
1292                         # Otherwise Let adjusted insertion location be inside target, after
1293                         # its last child (if any).
1294                         target_i = target.children.length
1295
1296                 # 3. If the adjusted insertion location is inside a template element,
1297                 # let it instead be inside the template element's template contents,
1298                 # after its last child (if any).
1299                 # fixfull (template)
1300
1301                 # 4. Return the adjusted insertion location.
1302                 return [target, target_i]
1303
1304         # http://www.w3.org/TR/html5/syntax.html#create-an-element-for-the-token
1305         # aka create_an_element_for_token
1306         token_to_element = (t, namespace, intended_parent) ->
1307                 # convert attributes into a hash
1308                 attrs = {}
1309                 for a in t.attrs_a
1310                         attrs[a[0]] = a[1] # TODO check what to do with dupilcate attrs
1311                 el = new Node TYPE_TAG, name: t.name, namespace: namespace, attrs: attrs, token: t
1312
1313                 # TODO 2. If the newly created element has an xmlns attribute in the
1314                 # XMLNS namespace whose value is not exactly the same as the element's
1315                 # namespace, that is a parse error. Similarly, if the newly created
1316                 # element has an xmlns:xlink attribute in the XMLNS namespace whose
1317                 # value is not the XLink Namespace, that is a parse error.
1318
1319                 # fixfull: the spec says stuff about form pointers and ownerDocument
1320
1321                 return el
1322
1323         # http://www.w3.org/TR/html5/syntax.html#insert-a-foreign-element
1324         insert_foreign_element = (token, namespace) ->
1325                 ail = adjusted_insertion_location()
1326                 ail_el = ail[0]
1327                 ail_i = ail[1]
1328                 el = token_to_element token, namespace, ail_el
1329                 # TODO skip this next step if it's broken (eg ail_el is document with child already)
1330                 el.parent = ail_el
1331                 ail_el.children.splice ail_i, 0, el
1332                 open_els.unshift el
1333                 return el
1334         # http://www.w3.org/TR/html5/syntax.html#insert-an-html-element
1335         insert_html_element = (token) ->
1336                 insert_foreign_element token, NS_HTML
1337
1338         # http://www.w3.org/TR/html5/syntax.html#insert-a-comment
1339         # position should be [node, index_within_children]
1340         insert_comment = (t, position = null) ->
1341                 position ?= adjusted_insertion_location()
1342                 position[0].children.splice position[1], 0, t
1343
1344         # 8.2.5.2
1345         # http://www.w3.org/TR/html5/syntax.html#generic-raw-text-element-parsing-algorithm
1346         parse_generic_raw_text = (t) ->
1347                 insert_html_element t
1348                 tok_state = tok_state_rawtext
1349                 original_ins_mode = ins_mode
1350                 ins_mode = ins_mode_text
1351         parse_generic_rcdata_text = (t) ->
1352                 insert_html_element t
1353                 tok_state = tok_state_rcdata
1354                 original_ins_mode = ins_mode
1355                 ins_mode = ins_mode_text
1356
1357         # 8.2.5.3 http://www.w3.org/TR/html5/syntax.html#closing-elements-that-have-implied-end-tags
1358         # http://www.w3.org/TR/html5/syntax.html#generate-implied-end-tags
1359         generate_implied_end_tags = (except = null) ->
1360                 while end_tag_implied[open_els[0].name] is open_els[0].namespace and open_els[0].name isnt except
1361                         open_els.shift()
1362
1363         # 8.2.5.4 The rules for parsing tokens in HTML content
1364         # http://www.w3.org/TR/html5/syntax.html#parsing-main-inhtml
1365
1366         # 8.2.5.4.1 The "initial" insertion mode
1367         # http://www.w3.org/TR/html5/syntax.html#the-initial-insertion-mode
1368         ins_mode_initial = (t) ->
1369                 if is_space_tok t
1370                         return
1371                 if t.type is TYPE_COMMENT
1372                         # ?fixfull
1373                         doc.children.push t
1374                         return
1375                 if t.type is TYPE_DOCTYPE
1376                         # FIXME check identifiers, set quirks, etc
1377                         # fixfull
1378                         doc.children.push t
1379                         ins_mode = ins_mode_before_html
1380                         return
1381                 # Anything else
1382                 #fixfull (iframe, quirks)
1383                 ins_mode = ins_mode_before_html
1384                 process_token t
1385                 return
1386
1387         # 8.2.5.4.2 http://www.w3.org/TR/html5/syntax.html#the-before-html-insertion-mode
1388         ins_mode_before_html = (t) ->
1389                 if t.type is TYPE_DOCTYPE
1390                         parse_error()
1391                         return
1392                 if t.type is TYPE_COMMENT
1393                         doc.children.push t
1394                         return
1395                 if is_space_tok t
1396                         return
1397                 if t.type is TYPE_START_TAG and t.name is 'html'
1398                         el = token_to_element t, NS_HTML, doc
1399                         doc.children.push el
1400                         open_els.unshift(el)
1401                         # fixfull (big paragraph in spec about manifest, fragment, urls, etc)
1402                         ins_mode = ins_mode_before_head
1403                         return
1404                 if t.type is TYPE_END_TAG
1405                         if t.name is 'head' or t.name is 'body' or t.name is 'html' or t.name is 'br'
1406                                 # fall through to "anything else"
1407                         else
1408                                 parse_error()
1409                                 return
1410                 # Anything else
1411                 html_tok = new_open_tag 'html'
1412                 el = token_to_element html_tok, NS_HTML, doc
1413                 doc.children.push el
1414                 open_els.unshift el
1415                 # ?fixfull browsing context
1416                 ins_mode = ins_mode_before_head
1417                 process_token t
1418                 return
1419
1420         # 8.2.5.4.3 http://www.w3.org/TR/html5/syntax.html#the-before-head-insertion-mode
1421         ins_mode_before_head = (t) ->
1422                 if is_space_tok t
1423                         return
1424                 if t.type is TYPE_COMMENT
1425                         insert_comment t
1426                         return
1427                 if t.type is TYPE_DOCTYPE
1428                         parse_error()
1429                         return
1430                 if t.type is TYPE_START_TAG and t.name is 'html'
1431                         ins_mode_in_body t
1432                         return
1433                 if t.type is TYPE_START_TAG and t.name is 'head'
1434                         el = insert_html_element t
1435                         head_element_pointer = el
1436                         ins_mode = ins_mode_in_head
1437                         return
1438                 if t.type is TYPE_END_TAG
1439                         if t.name is 'head' or t.name is 'body' or t.name is 'html' or t.name is 'br'
1440                                 # fall through to Anything else below
1441                         else
1442                                 parse_error()
1443                                 return
1444                 # Anything else
1445                 head_tok = new_open_tag 'head'
1446                 el = insert_html_element head_tok
1447                 head_element_pointer = el
1448                 ins_mode = ins_mode_in_head
1449                 process_token t
1450
1451         # 8.2.5.4.4 http://www.w3.org/TR/html5/syntax.html#parsing-main-inhead
1452         ins_mode_in_head_else = (t) -> # factored out for same-as-spec flow control
1453                 open_els.shift() # spec says this will be a 'head' node
1454                 ins_mode = ins_mode_after_head
1455                 process_token t
1456         ins_mode_in_head = (t) ->
1457                 if t.type is TYPE_TEXT and (t.text is "\t" or t.text is "\n" or t.text is "\u000c" or t.text is ' ')
1458                         insert_character t
1459                         return
1460                 if t.type is TYPE_COMMENT
1461                         insert_comment t
1462                         return
1463                 if t.type is TYPE_DOCTYPE
1464                         parse_error()
1465                         return
1466                 if t.type is TYPE_START_TAG and t.name is 'html'
1467                         ins_mode_in_body t
1468                         return
1469                 if t.type is TYPE_START_TAG and (t.name is 'base' or t.name is 'basefont' or t.name is 'bgsound' or t.name is 'link')
1470                         el = insert_html_element t
1471                         open_els.shift()
1472                         t.acknowledge_self_closing()
1473                         return
1474                 if t.type is TYPE_START_TAG and t.name is 'meta'
1475                         el = insert_html_element t
1476                         open_els.shift()
1477                         t.acknowledge_self_closing()
1478                         # fixfull encoding stuff
1479                         return
1480                 if t.type is TYPE_START_TAG and t.name is 'title'
1481                         parse_generic_rcdata_text t
1482                         return
1483                 if t.type is TYPE_START_TAG and ((t.name is 'noscript' and flag_scripting) or t.name is 'noframes' or t.name is 'style')
1484                         parse_generic_raw_text t
1485                         return
1486                 if t.type is TYPE_START_TAG and t.name is 'noscript' and flag_scripting is false
1487                         insert_html_element t
1488                         ins_mode = ins_mode_in_head_noscript
1489                         return
1490                 if t.type is TYPE_START_TAG and t.name is 'script'
1491                         ail = adjusted_insertion_location()
1492                         el = token_to_element t, NS_HTML, ail
1493                         el.flag 'parser-inserted', true
1494                         # fixfull frament case
1495                         ail[0].children.splice ail[1], 0, el
1496                         open_els.unshift el
1497                         tok_state = tok_state_script_data
1498                         original_ins_mode = ins_mode # make sure orig... is defined
1499                         ins_mode = ins_mode_text
1500                         return
1501                 if t.type is TYPE_END_TAG and t.name is 'head'
1502                         open_els.shift() # will be a head element... spec says so
1503                         ins_mode = ins_mode_after_head
1504                         return
1505                 if t.type is TYPE_END_TAG and (t.name is 'body' or t.name is 'html' or t.name is 'br')
1506                         ins_mode_in_head_else t
1507                         return
1508                 if t.type is TYPE_START_TAG and t.name is 'template'
1509                         insert_html_element t
1510                         afe_push_marker()
1511                         flag_frameset_ok = false
1512                         ins_mode = ins_mode_in_template
1513                         template_ins_modes.unshift ins_mode_in_template
1514                         return
1515                 if t.type is TYPE_END_TAG and t.name is 'template'
1516                         if template_tag_is_open()
1517                                 generate_implied_end_tags
1518                                 if open_els[0].name isnt 'template'
1519                                         parse_error()
1520                                 loop
1521                                         el = open_els.shift()
1522                                         if el.name is 'template' and el.namespace is NS_HTML
1523                                                 break
1524                                 clear_afe_to_marker()
1525                                 template_ins_modes.shift()
1526                                 reset_ins_mode()
1527                         else
1528                                 parse_error()
1529                         return
1530                 if (t.type is TYPE_START_TAG and t.name is 'head') or t.type is TYPE_END_TAG
1531                         parse_error()
1532                         return
1533                 ins_mode_in_head_else t
1534
1535         # 8.2.5.4.5 http://www.w3.org/TR/html5/syntax.html#parsing-main-inheadnoscript
1536         ins_mode_in_head_noscript_else = (t) ->
1537                 parse_error()
1538                 open_els.shift()
1539                 ins_mode = ins_mode_in_head
1540                 process_token t
1541         ins_mode_in_head_noscript = (t) ->
1542                 if t.type is TYPE_DOCTYPE
1543                         parse_error()
1544                         return
1545                 if t.type is TYPE_START_TAG and t.name is 'html'
1546                         ins_mode_in_body t
1547                         return
1548                 if t.type is TYPE_END_TAG and t.name is 'noscript'
1549                         open_els.shift()
1550                         ins_mode = ins_mode_in_head
1551                         return
1552                 if is_space_tok(t) or t.type is TYPE_COMMENT or (t.type is TYPE_START_TAG and (t.name is 'basefont' or t.name is 'bgsound' or t.name is 'link' or t.name is 'meta' or t.name is 'noframes' or t.name is 'style'))
1553                         ins_mode_in_head t
1554                         return
1555                 if t.type is TYPE_END_TAG and t.name is 'br'
1556                         ins_mode_in_head_noscript_else t
1557                         return
1558                 if (t.type is TYPE_START_TAG and (t.name is 'head' or t.name is 'noscript')) or t.type is TYPE_END_TAG
1559                         parse_error()
1560                         return
1561                 # Anything else
1562                 ins_mode_in_head_noscript_else t
1563                 return
1564
1565
1566
1567         # 8.2.5.4.6 http://www.w3.org/TR/html5/syntax.html#the-after-head-insertion-mode
1568         ins_mode_after_head_else = (t) ->
1569                 body_tok = new_open_tag 'body'
1570                 insert_html_element body_tok
1571                 ins_mode = ins_mode_in_body
1572                 process_token t
1573                 return
1574         ins_mode_after_head = (t) ->
1575                 if is_space_tok t
1576                         insert_character t
1577                         return
1578                 if t.type is TYPE_COMMENT
1579                         insert_comment t
1580                         return
1581                 if t.type is TYPE_DOCTYPE
1582                         parse_error()
1583                         return
1584                 if t.type is TYPE_START_TAG and t.name is 'html'
1585                         ins_mode_in_body t
1586                         return
1587                 if t.type is TYPE_START_TAG and t.name is 'body'
1588                         insert_html_element t
1589                         flag_frameset_ok = false
1590                         ins_mode = ins_mode_in_body
1591                         return
1592                 if t.type is TYPE_START_TAG and t.name is 'frameset'
1593                         insert_html_element t
1594                         ins_mode = ins_mode_in_frameset
1595                         return
1596                 if t.type is TYPE_START_TAG and (t.name is 'base' or t.name is 'basefont' or t.name is 'bgsound' or t.name is 'link' or t.name is 'meta' or t.name is 'noframes' or t.name is 'script' or t.name is 'style' or t.name is 'template' or t.name is 'title')
1597                         parse_error()
1598                         open_els.unshift head_element_pointer
1599                         ins_mode_in_head t
1600                         for el, i of open_els
1601                                 if el is head_element_pointer
1602                                         open_els.splice i, 1
1603                                         return
1604                         console.log "warning: 23904 couldn't find head element in open_els"
1605                         return
1606                 if t.type is TYPE_END_TAG and t.name is 'template'
1607                         ins_mode_in_head t
1608                         return
1609                 if t.type is TYPE_END_TAG and (t.name is 'body' or t.name is 'html' or t.name is 'br')
1610                         ins_mode_after_head_else t
1611                         return
1612                 if (t.type is TYPE_START_TAG and t.name is 'head') or t.type is TYPE_END_TAG
1613                         parse_error()
1614                         return
1615                 # Anything else
1616                 ins_mode_after_head_else t
1617
1618         # 8.2.5.4.7 http://www.w3.org/TR/html5/syntax.html#parsing-main-inbody
1619         in_body_any_other_end_tag = (name) -> # factored out because adoption agency calls it
1620                 for el, i in open_els
1621                         if el.name is name and el.namespace is NS_HTML
1622                                 generate_implied_end_tags name # arg is exception
1623                                 parse_error() unless i is 0
1624                                 while i >= 0
1625                                         open_els.shift()
1626                                         i -= 1
1627                                 return
1628                         if special_elements[el.name] is el.namespace
1629                                 parse_error()
1630                                 return
1631                 return
1632         ins_mode_in_body = (t) ->
1633                 if t.type is TYPE_TEXT and t.text is "\u0000"
1634                         parse_error()
1635                         return
1636                 if is_space_tok t
1637                         reconstruct_afe()
1638                         insert_character t
1639                         return
1640                 if t.type is TYPE_TEXT
1641                         reconstruct_afe()
1642                         insert_character t
1643                         flag_frameset_ok = false
1644                         return
1645                 if t.type is TYPE_COMMENT
1646                         insert_comment t
1647                         return
1648                 if t.type is TYPE_DOCTYPE
1649                         parse_error()
1650                         return
1651                 if t.type is TYPE_START_TAG and t.name is 'html'
1652                         parse_error()
1653                         return if template_tag_is_open()
1654                         root_attrs = open_els[open_els.length - 1].attrs
1655                         for a of t.attrs_a
1656                                 root_attrs[a[0]] = a[1] unless root_attrs[a[0]]?
1657                         return
1658
1659                 if (t.type is TYPE_START_TAG and (t.name is 'base' or t.name is 'basefont' or t.name is 'bgsound' or t.name is 'link' or t.name is 'meta' or t.name is 'noframes' or t.name is 'script' or t.name is 'style' or t.name is 'template' or t.name is 'title')) or (t.type is TYPE_END_TAG and t.name is 'template')
1660                         ins_mode_in_head t
1661                         return
1662                 if t.type is TYPE_START_TAG and t.name is 'body'
1663                         parse_error()
1664                         return if open_els.length < 2
1665                         second = open_els[open_els.length - 2]
1666                         return unless second.ns is NS_HTML
1667                         return unless second.name is 'body'
1668                         return if template_tag_is_open()
1669                         frameset_ok_flag = false
1670                         for a of t.attrs_a
1671                                 second.attrs[a[0]] = a[1] unless second.attrs[a[0]]?
1672                         return
1673                 if t.type is TYPE_START_TAG and t.name is 'frameset'
1674                         parse_error()
1675                         return if open_els.length < 2
1676                         second_i = open_els.length - 2
1677                         second = open_els[second_i]
1678                         return unless second.ns is NS_HTML
1679                         return unless second.name is 'body'
1680                         flag_frameset_ok = false
1681                         if second.parent?
1682                                 for el, i in second.parent.children
1683                                         if el is second
1684                                                 second.parent.children.splice i, 1
1685                                                 break
1686                         open_els.splice second_i, 1
1687                         # pop everything except the "root html element"
1688                         while open_els.length > 1
1689                                 open_els.shift()
1690                         insert_html_element t
1691                         ins_mode = ins_mode_in_frameset
1692                         return
1693                 if t.type is TYPE_EOF
1694                         ok_tags = {
1695                                 dd:NS_HTML, dt:NS_HTML, li:NS_HTML, p:NS_HTML, tbody:NS_HTML,
1696                                 td:NS_HTML, tfoot:NS_HTML, th:NS_HTML, thead:NS_HTML,
1697                                 tr:NS_HTML, body:NS_HTML, html:NS_HTML,
1698                         }
1699                         for el in open_els
1700                                 unless ok_tags[t.name] is el.namespace
1701                                         parse_error()
1702                                         break
1703                         if template_ins_modes.length > 0
1704                                 ins_mode_in_template t
1705                         else
1706                                 stop_parsing()
1707                         return
1708                 if t.type is TYPE_END_TAG and t.name is 'body'
1709                         unless is_in_scope 'body', NS_HTML
1710                                 parse_error()
1711                                 return
1712                         ok_tags = {
1713                                 dd:NS_HTML, dt:NS_HTML, li:NS_HTML, optgroup:NS_HTML,
1714                                 option:NS_HTML, p:NS_HTML, rb:NS_HTML, rp:NS_HTML, rt:NS_HTML,
1715                                 rtc:NS_HTML, tbody:NS_HTML, td:NS_HTML, tfoot:NS_HTML,
1716                                 th:NS_HTML, thead:NS_HTML, tr:NS_HTML, body:NS_HTML,
1717                                 html:NS_HTML
1718                         }
1719                         for el in open_els
1720                                 unless ok_tags[t.name] is el.namespace
1721                                         parse_error()
1722                                         break
1723                         ins_mode = ins_mode_after_body
1724                         return
1725                 if t.type is TYPE_END_TAG and t.name is 'html'
1726                         unless is_in_scope 'body', NS_HTML
1727                                 parse_error()
1728                                 return
1729                         ok_tags = {
1730                                 dd:NS_HTML, dt:NS_HTML, li:NS_HTML, optgroup:NS_HTML,
1731                                 option:NS_HTML, p:NS_HTML, rb:NS_HTML, rp:NS_HTML, rt:NS_HTML,
1732                                 rtc:NS_HTML, tbody:NS_HTML, td:NS_HTML, tfoot:NS_HTML,
1733                                 th:NS_HTML, thead:NS_HTML, tr:NS_HTML, body:NS_HTML,
1734                                 html:NS_HTML
1735                         }
1736                         for el in open_els
1737                                 unless ok_tags[t.name] is el.namespace
1738                                         parse_error()
1739                                         break
1740                         ins_mode = ins_mode_after_body
1741                         process_token t
1742                         return
1743                 if t.type is TYPE_START_TAG and (t.name is 'address' or t.name is 'article' or t.name is 'aside' or t.name is 'blockquote' or t.name is 'center' or t.name is 'details' or t.name is 'dialog' or t.name is 'dir' or t.name is 'div' or t.name is 'dl' or t.name is 'fieldset' or t.name is 'figcaption' or t.name is 'figure' or t.name is 'footer' or t.name is 'header' or t.name is 'hgroup' or t.name is 'main' or t.name is 'nav' or t.name is 'ol' or t.name is 'p' or t.name is 'section' or t.name is 'summary' or t.name is 'ul')
1744                         close_p_if_in_button_scope()
1745                         insert_html_element t
1746                         return
1747                 if t.type is TYPE_START_TAG and h_tags[t.name]?
1748                         close_p_if_in_button_scope()
1749                         if h_tags[open_els[0].name] is open_els[0].namespace
1750                                 parse_error()
1751                                 open_els.shift()
1752                         insert_html_element t
1753                         return
1754                 if t.type is TYPE_START_TAG and (t.name is 'pre' or t.name is 'listing')
1755                         close_p_if_in_button_scope()
1756                         insert_html_element t
1757                         # spec: If the next token is a "LF" (U+000A) character token, then
1758                         # ignore that token and move on to the next one. (Newlines at the
1759                         # start of pre blocks are ignored as an authoring convenience.)
1760                         if txt.charAt(cur) is "\u000a" # FIXME check for crlf?
1761                                 cur += 1
1762                         flag_frameset_ok = false
1763                         return
1764                 if t.type is TYPE_START_TAG and t.name is 'form'
1765                         unless form_element_pointer is null or template_tag_is_open()
1766                                 parse_error()
1767                                 return
1768                         close_p_if_in_button_scope()
1769                         el = insert_html_element t
1770                         unless template_tag_is_open()
1771                                 form_element_pointer = el
1772                         return
1773                 if t.type is TYPE_START_TAG and t.name is 'li'
1774                         flag_frameset_ok = false
1775                         for node in open_els
1776                                 if node.name is 'li' and node.namespace is NS_HTML
1777                                         generate_implied_end_tags 'li' # arg is exception
1778                                         if open_els[0].name isnt 'li' or open_els[0].namespace isnt NS_HTML
1779                                                 parse_error()
1780                                         loop
1781                                                 el = open_els.shift()
1782                                                 if el.name is 'li' and el.namespace is NS_HTML
1783                                                         break
1784                                         break
1785                                 if el_is_special_not_adp node
1786                                                 break
1787                         close_p_if_in_button_scope()
1788                         insert_html_element t
1789                         return
1790                 if t.type is TYPE_START_TAG and (t.name is 'dd' or t.name is 'dt')
1791                         flag_frameset_ok = false
1792                         for node in open_els
1793                                 if node.name is 'dd' and node.namespace is NS_HTML
1794                                         generate_implied_end_tags 'dd' # arg is exception
1795                                         if open_els[0].name isnt 'dd' or open_els[0].namespace isnt NS_HTML
1796                                                 parse_error()
1797                                         loop
1798                                                 el = open_els.shift()
1799                                                 if el.name is 'dd' and el.namespace is NS_HTML
1800                                                         break
1801                                         break
1802                                 if node.name is 'dt' and node.namespace is NS_HTML
1803                                         generate_implied_end_tags 'dt' # arg is exception
1804                                         if open_els[0].name isnt 'dt' or open_els[0].namespace isnt NS_HTML
1805                                                 parse_error()
1806                                         loop
1807                                                 el = open_els.shift()
1808                                                 if el.name is 'dt' and el.namespace is NS_HTML
1809                                                         break
1810                                         break
1811                                 if el_is_special_not_adp node
1812                                         break
1813                         close_p_if_in_button_scope()
1814                         insert_html_element t
1815                         return
1816                 if t.type is TYPE_START_TAG and t.name is 'plaintext'
1817                         close_p_if_in_button_scope()
1818                         insert_html_element t
1819                         tok_state = tok_state_plaintext
1820                         return
1821                 if t.type is TYPE_START_TAG and t.name is 'button'
1822                         if is_in_scope 'button', NS_HTML
1823                                 parse_error()
1824                                 generate_implied_end_tags()
1825                                 loop
1826                                         el = open_els.shift()
1827                                         if el.name is 'button' and el.namespace is NS_HTML
1828                                                 break
1829                         reconstruct_afe()
1830                         insert_html_element t
1831                         flag_frameset_ok = false
1832                         return
1833                 if t.type is TYPE_END_TAG and (t.name is 'address' or t.name is 'article' or t.name is 'aside' or t.name is 'blockquote' or t.name is 'button' or t.name is 'center' or t.name is 'details' or t.name is 'dialog' or t.name is 'dir' or t.name is 'div' or t.name is 'dl' or t.name is 'fieldset' or t.name is 'figcaption' or t.name is 'figure' or t.name is 'footer' or t.name is 'header' or t.name is 'hgroup' or t.name is 'listing' or t.name is 'main' or t.name is 'nav' or t.name is 'ol' or t.name is 'pre' or t.name is 'section' or t.name is 'summary' or t.name is 'ul')
1834                         unless is_in_scope t.name, NS_HTML
1835                                 parse_error()
1836                                 return
1837                         generate_implied_end_tags()
1838                         unless open_els[0].name is t.name and open_els[0].namespace is NS_HTML
1839                                 parse_error()
1840                         loop
1841                                 el = open_els.shift()
1842                                 if el.name is t.name and el.namespace is NS_HTML
1843                                         return
1844                         return
1845                 if t.type is TYPE_END_TAG and t.name is 'form'
1846                         unless template_tag_is_open()
1847                                 node = form_element_pointer
1848                                 form_element_pointer = null
1849                                 if node is null or not el_is_in_scope node
1850                                         parse_error()
1851                                         return
1852                                 generate_implied_end_tags()
1853                                 if open_els[0] isnt node
1854                                         parse_error()
1855                                 for el, i in open_els
1856                                         if el is node
1857                                                 open_els.splice i, 1
1858                                                 break
1859                         else
1860                                 unless is_in_scope 'form', NS_HTML
1861                                         parse_error()
1862                                         return
1863                                 generate_implied_end_tags()
1864                                 if open_els[0].name isnt 'form' or open_els[0].namespace isnt NS_HTML
1865                                         parse_error()
1866                                 loop
1867                                         el = open_els.shift()
1868                                         if el.name is 'form' and el.namespace is NS_HTML
1869                                                 break
1870                         return
1871                 if t.type is TYPE_END_TAG and t.name is 'p'
1872                         unless is_in_button_scope 'p', NS_HTML
1873                                 parse_error()
1874                                 insert_html_element new_open_tag 'p'
1875                         close_p_element()
1876                         return
1877                 if t.type is TYPE_END_TAG and t.name is 'li'
1878                         unless is_in_li_scope 'li', NS_HTML
1879                                 parse_error()
1880                                 return
1881                         generate_implied_end_tags 'li' # arg is exception
1882                         if open_els[0].name isnt 'li' or open_els[0].namespace isnt NS_HTML
1883                                 parse_error()
1884                         loop
1885                                 el = open_els.shift()
1886                                 if el.name is 'li' and el.namespace is NS_HTML
1887                                         break
1888                         return
1889                 if t.type is TYPE_END_TAG and (t.name is 'dd' or t.name is 'dt')
1890                         unless is_in_scope t.name, NS_HTML
1891                                 parse_error()
1892                                 return
1893                         generate_implied_end_tags t.name # arg is exception
1894                         if open_els[0].name isnt t.name or open_els[0].namespace isnt NS_HTML
1895                                 parse_error()
1896                         loop
1897                                 el = open_els.shift()
1898                                 if el.name is t.name and el.namespace is NS_HTML
1899                                         break
1900                         return
1901                 if t.type is TYPE_END_TAG and h_tags[t.name]?
1902                         h_in_scope = false
1903                         for el in open_els
1904                                 if h_tags[el.name] is el.namespace
1905                                         h_in_scope = true
1906                                         break
1907                                 if standard_scopers[el.name] is el.namespace
1908                                         break
1909                         unless h_in_scope
1910                                 parse_error()
1911                                 return
1912                         generate_implied_end_tags()
1913                         if open_els[0].name isnt t.name or open_els[0].namespace isnt NS_HTML
1914                                 parse_error()
1915                         loop
1916                                 el = open_els.shift()
1917                                 if h_tags[el.name] is el.namespace
1918                                         break
1919                         return
1920                 # deep breath!
1921                 if t.type is TYPE_START_TAG and t.name is 'a'
1922                         # If the list of active formatting elements contains an a element
1923                         # between the end of the list and the last marker on the list (or
1924                         # the start of the list if there is no marker on the list), then
1925                         # this is a parse error; run the adoption agency algorithm for the
1926                         # tag name "a", then remove that element from the list of active
1927                         # formatting elements and the stack of open elements if the
1928                         # adoption agency algorithm didn't already remove it (it might not
1929                         # have if the element is not in table scope).
1930                         found = false
1931                         for el in afe
1932                                 if el.type is TYPE_AFE_MARKER
1933                                         break
1934                                 if el.name is 'a' and el.namespace is NS_HTML
1935                                         found = el
1936                         if found?
1937                                 parse_error()
1938                                 adoption_agency 'a'
1939                                 for el, i in afe
1940                                         if el is found
1941                                                 afe.splice i, 1
1942                                 for el, i in open_els
1943                                         if el is found
1944                                                 open_els.splice i, 1
1945                         reconstruct_afe()
1946                         el = insert_html_element t
1947                         afe_push el
1948                         return
1949                 if t.type is TYPE_START_TAG and (t.name is 'b' or t.name is 'big' or t.name is 'code' or t.name is 'em' or t.name is 'font' or t.name is 'i' or t.name is 's' or t.name is 'small' or t.name is 'strike' or t.name is 'strong' or t.name is 'tt' or t.name is 'u')
1950                         reconstruct_afe()
1951                         el = insert_html_element t
1952                         afe_push el
1953                         return
1954                 if t.type is TYPE_START_TAG and t.name is 'nobr'
1955                         reconstruct_afe()
1956                         el = insert_html_element t
1957                         afe_push el
1958                         return
1959                 if t.type is TYPE_END_TAG and (t.name is 'a' or t.name is 'b' or t.name is 'big' or t.name is 'code' or t.name is 'em' or t.name is 'font' or t.name is 'i' or t.name is 'nobr' or t.name is 's' or t.name is 'small' or t.name is 'strike' or t.name is 'strong' or t.name is 'tt' or t.name is 'u')
1960                         adoption_agency t.name
1961                         return
1962                 if t.type is TYPE_START_TAG and (t.name is 'applet' or t.name is 'marquee' or t.name is 'object')
1963                         reconstruct_afe()
1964                         insert_html_element t
1965                         afe_push_marker()
1966                         flag_frameset_ok = false
1967                         return
1968                 if t.type is TYPE_END_TAG and (t.name is 'applet' or t.name is 'marquee' or t.name is 'object')
1969                         unless is_in_scope t.name, NS_HTML
1970                                 parse_error()
1971                                 return
1972                         generate_implied_end_tags()
1973                         if open_els[0].name isnt t.name or open_els[0].namespace isnt NS_HTML
1974                                 parse_error()
1975                         loop
1976                                 el = open_els.shift()
1977                                 if el.name is t.name and el.namespace is NS_HTML
1978                                         break
1979                         clear_afe_to_marker()
1980                         return
1981                 if t.type is TYPE_START_TAG and t.name is 'table'
1982                         close_p_if_in_button_scope() # fixfull quirksmode thing
1983                         insert_html_element t
1984                         flag_frameset_ok = false
1985                         ins_mode = ins_mode_in_table
1986                         return
1987                 if t.type is TYPE_END_TAG and t.name is 'br'
1988                         parse_error()
1989                         t.type is TYPE_START_TAG
1990                         # fall through
1991                 if t.type is TYPE_START_TAG and (t.name is 'area' or t.name is 'br' or t.name is 'embed' or t.name is 'img' or t.name is 'keygen' or t.name is 'wbr')
1992                         reconstruct_afe()
1993                         insert_html_element t
1994                         open_els.shift()
1995                         t.acknowledge_self_closing()
1996                         flag_frameset_ok = false
1997                         return
1998                 if t.type is TYPE_START_TAG and t.name is 'input'
1999                         reconstruct_afe()
2000                         insert_html_element t
2001                         open_els.shift()
2002                         t.acknowledge_self_closing()
2003                         unless is_input_hidden_tok t
2004                                 flag_frameset_ok = false
2005                         return
2006                 if t.type is TYPE_START_TAG and (t.name is 'param' or t.name is 'source' or t.name is 'track')
2007                         insert_html_element t
2008                         open_els.shift()
2009                         t.acknowledge_self_closing()
2010                         return
2011                 if t.type is TYPE_START_TAG and t.name is 'hr'
2012                         close_p_if_in_button_scope()
2013                         insert_html_element t
2014                         open_els.shift()
2015                         t.acknowledge_self_closing()
2016                         flag_frameset_ok = false
2017                         return
2018                 if t.type is TYPE_START_TAG and t.name is 'image'
2019                         parse_error()
2020                         t.name = 'img'
2021                         process_token t
2022                         return
2023                 if t.type is TYPE_START_TAG and t.name is 'isindex'
2024                         parse_error()
2025                         if template_tag_is_open() is false and form_element_pointer isnt null
2026                                 return
2027                         t.acknowledge_self_closing()
2028                         flag_frameset_ok = false
2029                         close_p_if_in_button_scope()
2030                         el = insert_html_element new_open_tag 'form'
2031                         unless template_tag_is_open()
2032                                 form_element_pointer = el
2033                         for a in t.attrs_a
2034                                 if a[0] is 'action'
2035                                         el.attrs['action'] = a[1]
2036                                         break
2037                         insert_html_element new_open_tag 'hr'
2038                         open_els.shift()
2039                         reconstruct_afe()
2040                         insert_html_element new_open_tag 'label'
2041                         # note: this is a little out-of-spec-order so we only have to scan t.attrs_a once
2042                         input_el = new_open_tag 'input'
2043                         prompt = null
2044                         for a in t.attrs_a
2045                                 if a[0] is 'prompt'
2046                                         prompt = a[1]
2047                                 if a[0] isnt 'name' and a[0] isnt 'action' and a[0] isnt 'prompt'
2048                                         input_el.attrs_a.push [a[0], a[1]]
2049                         input_el.attrs_a.push ['name', 'isindex']
2050                         # fixfull this next bit is in english... internationalize?
2051                         prompt ?= "This is a searchable index. Enter search keywords: "
2052                         insert_character new_character_token prompt # fixfull split
2053                         # TODO submit typo "balue" in spec
2054                         insert_html_element input_el
2055                         open_els.shift()
2056                         # insert_character '' # you can put chars here if promt attr missing
2057                         open_els.shift()
2058                         insert_html_element new_open_tag 'hr'
2059                         open_els.shift()
2060                         open_els.shift()
2061                         unless template_tag_is_open()
2062                                 form_element_pointer = null
2063                         return
2064                 if t.type is TYPE_START_TAG and t.name is 'textarea'
2065                         insert_html_element t
2066                         if txt.charAt(cur) is "\u000a" # FIXME check for crlf?
2067                                 cur += 1
2068                         tok_state = tok_state_rcdata
2069                         original_ins_mode = ins_mode
2070                         flag_frameset_ok = false
2071                         ins_mode = ins_mode_text
2072                         return
2073                 if t.type is TYPE_START_TAG and t.name is 'xmp'
2074                         close_p_if_in_button_scope()
2075                         reconstruct_afe()
2076                         flag_frameset_ok = false
2077                         parse_generic_raw_text t
2078                         return
2079                 if t.type is TYPE_START_TAG and t.name is 'iframe'
2080                         flag_frameset_ok = false
2081                         parse_generic_raw_text t
2082                         return
2083                 if t.type is TYPE_START_TAG and (t.name is 'noembed' or (t.name is 'noscript' and flag_scripting))
2084                         parse_generic_raw_text t
2085                         return
2086                 if t.type is TYPE_START_TAG and t.name is 'select'
2087                         reconstruct_afe()
2088                         insert_html_element t
2089                         flag_frameset_ok = false
2090                         if ins_mode is ins_mode_in_table or ins_mode is ins_mode_in_caption or ins_mode is ins_mode_in_table_body or ins_mode is ins_mode_in_row or ins_mode is ins_mode_in_cell
2091                                 ins_mode = ins_mode_in_select_in_table
2092                         else
2093                                 ins_mode = ins_mode_in_select
2094                         return
2095                 if t.type is TYPE_START_TAG and (t.name is 'optgroup' or t.name is 'option')
2096                         if open_els[0].name is 'option' and open_els[0].namespace is NS_HTML
2097                                 open_els.shift()
2098                         reconstruct_afe()
2099                         insert_html_element t
2100                         return
2101                 if t.type is TYPE_START_TAG and (t.name is 'rb' or t.name is 'rp' or t.name is 'rtc')
2102                         if is_in_scope 'ruby', NS_HTML
2103                                 generate_implied_end_tags()
2104                                 unless open_els[0].name is 'ruby' and open_els[0].namespace is NS_HTML
2105                                         parse_error()
2106                         insert_html_element t
2107                         return
2108                 if t.type is TYPE_START_TAG and t.name is 'rt'
2109                         if is_in_scope 'ruby', NS_HTML
2110                                 generate_implied_end_tags 'rtc' # arg is exception
2111                                 unless (open_els[0].name is 'ruby' or open_els[0].name is 'rtc') and open_els[0].namespace is NS_HTML
2112                                         parse_error()
2113                         insert_html_element t
2114                         return
2115                 if t.type is TYPE_START_TAG and t.name is 'math'
2116                         reconstruct_afe()
2117                         adjust_mathml_attributes t
2118                         adjust_foreign_attributes t
2119                         insert_foreign_element t, NS_MATHML
2120                         if t.flag 'self-closing'
2121                                 open_els.shift()
2122                                 t.acknowledge_self_closing()
2123                         return
2124                 if t.type is TYPE_START_TAG and t.name is 'svg'
2125                         reconstruct_afe()
2126                         adjust_svg_attributes t
2127                         adjust_foreign_attributes t
2128                         insert_foreign_element t, NS_SVG
2129                         if t.flag 'self-closing'
2130                                 open_els.shift()
2131                                 t.acknowledge_self_closing()
2132                         return
2133                 if t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'frame' or t.name is 'head' or t.name is 'tbody' or t.name is 'td' or t.name is 'tfoot' or t.name is 'th' or t.name is 'thead' or t.name is 'tr')
2134                         parse_error()
2135                         return
2136                 if t.type is TYPE_START_TAG # any other start tag
2137                         reconstruct_afe()
2138                         insert_html_element t
2139                         return
2140                 if t.type is TYPE_END_TAG # any other end tag
2141                         in_body_any_other_end_tag t.name
2142                         return
2143                 return
2144
2145         # 8.2.5.4.8 http://www.w3.org/TR/html5/syntax.html#parsing-main-incdata
2146         ins_mode_text = (t) ->
2147                 if t.type is TYPE_TEXT
2148                         insert_character t
2149                         return
2150                 if t.type is TYPE_EOF
2151                         parse_error()
2152                         if open_els[0].name is 'script' and open_els[0].namespace is NS_HTML
2153                                 open_els[0].flag 'already started', true
2154                         open_els.shift()
2155                         ins_mode = original_ins_mode
2156                         process_token t
2157                         return
2158                 if t.type is TYPE_END_TAG and t.name is 'script'
2159                         open_els.shift()
2160                         ins_mode = original_ins_mode
2161                         # fixfull the spec seems to assume that I'm going to run the script
2162                         # http://www.w3.org/TR/html5/syntax.html#scriptEndTag
2163                         return
2164                 if t.type is TYPE_END_TAG
2165                         open_els.shift()
2166                         ins_mode = original_ins_mode
2167                         return
2168                 console.log 'warning: end of ins_mode_text reached'
2169
2170         # the functions below implement the tokenizer stats described here:
2171         # http://www.w3.org/TR/html5/syntax.html#tokenization
2172
2173         # 8.2.5.4.9 http://www.w3.org/TR/html5/syntax.html#parsing-main-intable
2174         ins_mode_in_table_else = (t) ->
2175                 parse_error()
2176                 flag_foster_parenting = true
2177                 ins_mode_in_body t
2178                 flag_foster_parenting = false
2179                 return
2180         ins_mode_in_table = (t) ->
2181                 switch t.type
2182                         when TYPE_TEXT
2183                                 if t.name is 'table' or t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead' or t.name is 'tr'
2184                                         original_ins_mode = ins_mode
2185                                         ins_mode = ins_mode_in_table_text
2186                                         process_token t
2187                                 else
2188                                         ins_mode_in_table_else t
2189                         when TYPE_COMMENT
2190                                 insert_comment t
2191                         when TYPE_DOCTYPE
2192                                 parse_error()
2193                         when TYPE_START_TAG
2194                                 switch t.name
2195                                         when 'caption'
2196                                                 clear_stack_to_table_context()
2197                                                 afe_push_marker()
2198                                                 insert_html_element t
2199                                                 ins_mode = ins_mode_in_caption
2200                                         when 'colgroup'
2201                                                 clear_stack_to_table_context()
2202                                                 insert_html_element t
2203                                                 ins_mode = ins_mode_in_column_group
2204                                         when 'col'
2205                                                 clear_stack_to_table_context()
2206                                                 insert_html_element new_open_tag 'colgroup'
2207                                                 ins_mode = ins_mode_in_column_group
2208                                                 process_token t
2209                                         when 'tbody', 'tfoot', 'thead'
2210                                                 clear_stack_to_table_context()
2211                                                 insert_html_element t
2212                                                 ins_mode = ins_mode_in_table_body
2213                                         when 'td', 'th', 'tr'
2214                                                 clear_stack_to_table_context()
2215                                                 insert_html_element new_open_tag 'tbody'
2216                                                 ins_mode = ins_mode_in_table_body
2217                                                 process_token t
2218                                         when 'table'
2219                                                 parse_error()
2220                                                 if is_in_table_scope 'table', NS_HTML
2221                                                         loop
2222                                                                 el = open_els.shift()
2223                                                                 if el.name is 'table' and el.namespace is NS_HTML
2224                                                                         break
2225                                                         reset_ins_mode()
2226                                                         process_token t
2227                                         when 'style', 'script', 'template'
2228                                                 ins_mode_in_head t
2229                                         when 'input'
2230                                                 unless is_input_hidden_tok t
2231                                                         ins_mode_in_table_else t
2232                                                 else
2233                                                         parse_error()
2234                                                         el = insert_html_element t
2235                                                         open_els.shift()
2236                                                         t.acknowledge_self_closing()
2237                                         when 'form'
2238                                                 parse_error()
2239                                                 if form_element_pointer?
2240                                                         return
2241                                                 if template_tag_is_open()
2242                                                         return
2243                                                 form_element_pointer = insert_html_element t
2244                                                 open_els.shift()
2245                                         else
2246                                                 ins_mode_in_table_else t
2247                         when TYPE_END_TAG
2248                                 switch t.name
2249                                         when 'table'
2250                                                 if is_in_table_scope 'table', NS_HTML
2251                                                         loop
2252                                                                 el = open_els.shift()
2253                                                                 if el.name is 'table' and el.namespace is NS_HTML
2254                                                                         break
2255                                                         reset_ins_mode()
2256                                                 else
2257                                                         parse_error()
2258                                         when 'body', 'caption', 'col', 'colgroup', 'html', 'tbody', 'td', 'tfoot', 'th', 'thead', 'tr'
2259                                                 parse_error()
2260                                         when 'template'
2261                                                 ins_mode_in_head t
2262                                         else
2263                                                 ins_mode_in_table_else t
2264                         when TYPE_EOF
2265                                 ins_mode_in_body t
2266                         else
2267                                 ins_mode_in_table_else t
2268
2269
2270         # 8.2.5.4.10 http://www.w3.org/TR/html5/syntax.html#parsing-main-intabletext
2271         ins_mode_in_table_text = (t) ->
2272                 if t.type is TYPE_TEXT and t.text is "\u0000"
2273                         # huh? I thought the tokenizer didn't emit these
2274                         parse_error()
2275                         return
2276                 if t.type is TYPE_TEXT
2277                         pending_table_character_tokens.push t
2278                         return
2279                 # Anything else
2280                 all_space = true
2281                 for old in pending_table_character_tokens
2282                         unless is_space_tok old
2283                                 all_space = false
2284                                 break
2285                 if all_space
2286                         for old in pending_table_character_tokens
2287                                 insert_character old
2288                 else
2289                         for old in pending_table_character_tokens
2290                                 ins_mode_table_else old
2291                 pending_table_character_tokens = [] # FIXME test (spec doesn't say this)
2292                 ins_mode = original_ins_mode
2293                 process_token t
2294
2295         # 8.2.5.4.11 http://www.w3.org/TR/html5/syntax.html#parsing-main-incaption
2296         ins_mode_in_caption = (t) ->
2297                 if t.type is TYPE_END_TAG and t.name is 'caption'
2298                         if is_in_table_scope 'caption', NS_HTML
2299                                 generate_implied_end_tags()
2300                                 if open_els[0].name isnt 'caption'
2301                                         parse_error()
2302                                 loop
2303                                         el = open_els.shift()
2304                                         if el.name is 'caption' and el.namespace is NS_HTML
2305                                                 break
2306                                 clear_afe_to_marker()
2307                                 ins_mode = ins_mode_in_table
2308                         else
2309                                 parse_error()
2310                                 # fragment case
2311                         return
2312                 if (t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'tbody' or t.name is 'td' or t.name is 'tfoot' or t.name is 'th' or t.name is 'thead' or t.name is 'tr')) or t.type is TYPE_END_TAG and t.name is 'table'
2313                         parse_error()
2314                         if is_in_table_scope 'caption', NS_HTML
2315                                 loop
2316                                         el = open_els.shift()
2317                                         if el.name is 'caption' and el.namespace is NS_HTML
2318                                                 break
2319                                 clear_afe_to_marker()
2320                                 ins_mode = ins_mode_in_table
2321                                 process_token t
2322                         # else fragment case
2323                         return
2324                 if t.type is TYPE_END_TAG and (t.name is 'body' or t.name is 'col' or t.name is 'colgroup' or t.name is 'html' or t.name is 'tbody' or t.name is 'td' or t.name is 'tfoot' or t.name is 'th' or t.name is 'thead' or t.name is 'tr')
2325                         parse_error()
2326                         return
2327                 # Anything else
2328                 ins_mode_in_body t
2329
2330         # 8.2.5.4.12 http://www.w3.org/TR/html5/syntax.html#parsing-main-incolgroup
2331         ins_mode_in_column_group = (t) ->
2332                 if is_space_tok t
2333                         insert_character t
2334                         return
2335                 if t.type is TYPE_COMMENT
2336                         insert_comment t
2337                         return
2338                 if t.type is TYPE_DOCTYPE
2339                         parse_error()
2340                         return
2341                 if t.type is TYPE_START_TAG and t.name is 'html'
2342                         ins_mode_in_body t
2343                         return
2344                 if t.type is TYPE_START_TAG and t.name is 'col'
2345                         el = insert_html_element t
2346                         open_els.shift()
2347                         t.acknowledge_self_closing()
2348                         return
2349                 if t.type is TYPE_END_TAG and t.name is 'colgroup'
2350                         if open_els[0].name is 'colgroup' and open_els.namespace is NS_HTML
2351                                 open_els.shift()
2352                                 ins_mode = ins_mode_in_table
2353                         else
2354                                 parse_error()
2355                         return
2356                 if t.type is TYPE_END_TAG and t.name is 'col'
2357                         parse_error()
2358                         return
2359                 if (t.type is TYPE_START_TAG or t.type is TYPE_END_TAG) and t.name is 'template'
2360                         ins_mode_in_head t
2361                         return
2362                 if t.type is TYPE_EOF
2363                         ins_mode_in_body t
2364                         return
2365                 # Anything else
2366                 if open_els[0].name isnt 'colgroup'
2367                         parse_error()
2368                         return
2369                 open_els.shift()
2370                 ins_mode = ins_mode_in_table
2371                 process_token t
2372                 return
2373
2374         # 8.2.5.4.13 http://www.w3.org/TR/html5/syntax.html#parsing-main-intbody
2375         ins_mode_in_table_body = (t) ->
2376                 if t.type is TYPE_START_TAG and t.name is 'tr'
2377                         clear_stack_to_table_body_context()
2378                         insert_html_element t
2379                         ins_mode = ins_mode_in_row
2380                         return
2381                 if t.type is TYPE_START_TAG and (t.name is 'th' or t.name is 'td')
2382                         parse_error()
2383                         clear_stack_to_table_body_context()
2384                         insert_html_element new_open_tag 'tr'
2385                         ins_mode = ins_mode_in_row
2386                         process_token t
2387                         return
2388                 if t.type is TYPE_END_TAG and (t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead')
2389                         unless is_in_table_scope t.name, NS_HTML
2390                                 parse_error()
2391                                 return
2392                         clear_stack_to_table_body_context()
2393                         open_els.shift()
2394                         ins_mode = ins_mode_in_table
2395                         return
2396                 if (t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead')) or (t.type is TYPE_END_TAG and t.name is 'table')
2397                         has = false
2398                         for el in open_els
2399                                 if el.namespace is NS_HTML and (el.name is 'tbody' or el.name is 'tfoot' or el.name is 'thead')
2400                                         has = true
2401                                         break
2402                                 if table_scopers[el.name] is el.namespace
2403                                         break
2404                         if !has
2405                                 parse_error()
2406                                 return
2407                         clear_stack_to_table_body_context()
2408                         open_els.shift()
2409                         ins_mode = ins_mode_in_table
2410                         process_token t
2411                         return
2412                 if t.type is TYPE_END_TAG and (t.name is 'body' or t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'html' or t.name is 'td' or t.name is 'th' or t.name is 'tr')
2413                         parse_error()
2414                         return
2415                 # Anything else
2416                 ins_mode_in_table t
2417
2418         # 8.2.5.4.14 http://www.w3.org/TR/html5/syntax.html#parsing-main-intr
2419         ins_mode_in_row = (t) ->
2420                 if t.type is TYPE_START_TAG and (t.name is 'th' or t.name is 'td')
2421                         clear_stack_to_table_row_context()
2422                         insert_html_element t
2423                         ins_mode = ins_mode_in_cell
2424                         afe_push_marker()
2425                         return
2426                 if t.type is TYPE_END_TAG and t.name is 'tr'
2427                         if is_in_table_scope 'tr', NS_HTML
2428                                 clear_stack_to_table_row_context()
2429                                 open_els.shift()
2430                                 ins_mode = ins_mode_in_table_body
2431                         else
2432                                 parse_error()
2433                         return
2434                 if (t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead' or t.name is 'tr')) or t.type is TYPE_END_TAG and t.name is 'table'
2435                         if is_in_table_scope 'tr', NS_HTML
2436                                 clear_stack_to_table_row_context()
2437                                 open_els.shift()
2438                                 ins_mode = ins_mode_in_table_body
2439                                 process_token t
2440                         else
2441                                 parse_error()
2442                         return
2443                 if t.type is TYPE_END_TAG and (t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead')
2444                         if is_in_table_scope t.name, NS_HTML
2445                                 if is_in_table_scope 'tr', NS_HTML
2446                                         clear_stack_to_table_row_context()
2447                                         open_els.shift()
2448                                         ins_mode = ins_mode_in_table_body
2449                                         process_token t
2450                         else
2451                                 parse_error()
2452                         return
2453                 if t.type is TYPE_END_TAG and (t.name is 'body' or t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'html' or t.name is 'td' or t.name is 'th')
2454                         parse_error()
2455                         return
2456                 # Anything else
2457                 ins_mode_in_table t
2458
2459         # http://www.w3.org/TR/html5/syntax.html#close-the-cell
2460         close_the_cell = ->
2461                 generate_implied_end_tags()
2462                 unless (open_els[0].name is 'td' or open_els[0] is 'th') and open_els[0].namespace is NS_HTML
2463                         parse_error()
2464                 loop
2465                         el = open_els.shift()
2466                         if el.namespace is NS_HTML and (el.name is 'td' or el.name is 'th')
2467                                 break
2468                 clear_afe_to_marker()
2469                 ins_mode = ins_mode_in_row
2470
2471         # 8.2.5.4.15 http://www.w3.org/TR/html5/syntax.html#parsing-main-intd
2472         ins_mode_in_cell = (t) ->
2473                 if t.type is TYPE_END_TAG and (t.name is 'td' or t.name is 'th')
2474                         if is_in_table_scope t.name, NS_HTML
2475                                 generate_implied_end_tags()
2476                                 unless (open_els[0].name is t.name) and open_els[0].namespace is NS_HTML
2477                                         parse_error()
2478                                 loop
2479                                         el = open_els.shift()
2480                                         if el.name is t.name and el.namespace is NS_HTML
2481                                                 break
2482                                 clear_afe_to_marker()
2483                                 ins_mode = ins_mode_in_row
2484                         else
2485                                 parse_error()
2486                         return
2487                 if t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'tbody' or t.name is 'td' or t.name is 'tfoot' or t.name is 'th' or t.name is 'thead' or t.name is 'tr')
2488                         has = false
2489                         for el in open_els
2490                                 if el.namespace is NS_HTML and (el.name is 'td' or el.name is 'th')
2491                                         has = true
2492                                         break
2493                                 if table_scopers[el.name] is el.namespace
2494                                         break
2495                         if !has
2496                                 parse_error()
2497                                 return
2498                         close_the_cell()
2499                         process_token t
2500                         return
2501                 if t.type is TYPE_END_TAG and (t.name is 'body' or t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'html')
2502                         parse_error()
2503                         return
2504                 if t.type is TYPE_END_TAG and (t.name is 'table' or t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead' or t.name is 'tr')
2505                         if is_in_table_scope t.name, NS_HTML
2506                                 close_the_cell()
2507                                 process_token t
2508                         else
2509                                 parse_error()
2510                         return
2511                 # Anything Else
2512                 ins_mode_in_body t
2513
2514         # 8.2.5.4.16 http://www.w3.org/TR/html5/syntax.html#parsing-main-inselect
2515         ins_mode_in_select = (t) ->
2516                 if t.type is TYPE_TEXT and t.text is "\u0000"
2517                         parse_error()
2518                         return
2519                 if t.type is TYPE_TEXT
2520                         insert_character t
2521                         return
2522                 if t.type is TYPE_COMMENT
2523                         insert_comment t
2524                         return
2525                 if t.type is TYPE_DOCTYPE
2526                         parse_error()
2527                         return
2528                 if t.type is TYPE_START_TAG and t.name is 'html'
2529                         ins_mode_in_body t
2530                         return
2531                 if t.type is TYPE_START_TAG and t.name is 'option'
2532                         if open_els[0].name is 'option' and open_els[0].namespace is NS_HTML
2533                                 open_els.shift()
2534                         insert_html_element t
2535                         return
2536                 if t.type is TYPE_START_TAG and t.name is 'optgroup'
2537                         if open_els[0].name is 'option' and open_els[0].namespace is NS_HTML
2538                                 open_els.shift()
2539                         if open_els[0].name is 'optgroup' and open_els[0].namespace is NS_HTML
2540                                 open_els.shift()
2541                         insert_html_element t
2542                         return
2543                 if t.type is TYPE_END_TAG and t.name is 'optgroup'
2544                         if open_els[0].name is 'option' and open_els[0].namespace in NS_HTML
2545                                 if open_els[1].name is 'optgroup' and open_els[0].namespace is NS_HTML
2546                                         open_els.shift()
2547                         if open_els[0].name is 'optgroup' and open_els[0].namespace is NS_HTML
2548                                 open_els.shift()
2549                         else
2550                                 parse_error()
2551                         return
2552                 if t.type is TYPE_END_TAG and t.name is 'option'
2553                         if open_els[0].name is 'option' and open_els[0].namespace is NS_HTML
2554                                 open_els.shift()
2555                         else
2556                                 parse_error()
2557                         return
2558                 if t.type is TYPE_END_TAG and t.name is 'select'
2559                         if is_in_select_scope 'select', NS_HTML
2560                                 loop
2561                                         el = open_els.shift()
2562                                         if el.name is 'select' and el.namespace is NS_HTML
2563                                                 break
2564                                 reset_ins_mode()
2565                         else
2566                                 parse_error()
2567                         return
2568                 if t.type is TYPE_START_TAG and t.name is 'select'
2569                         parse_error()
2570                         loop
2571                                 el = open_els.shift()
2572                                 if el.name is 'select' and el.namespace is NS_HTML
2573                                         break
2574                         reset_ins_mode()
2575                         # spec says that this is the same as </select> but it doesn't say
2576                         # to check scope first
2577                         return
2578                 if t.type is TYPE_START_TAG and (t.name is 'input' or t.name is 'keygen' or t.name is 'textarea')
2579                         parse_error()
2580                         if is_in_select_scope 'select', NS_HTML
2581                                 return
2582                         loop
2583                                 el = open_els.shift()
2584                                 if el.name is 'select' and el.namespace is NS_HTML
2585                                         break
2586                         reset_ins_mode()
2587                         process_token t
2588                         return
2589                 if t.type is TYPE_START_TAG and (t.name is 'script' or t.name is 'template')
2590                         ins_mode_in_head t
2591                         return
2592                 if t.type is TYPE_EOF
2593                         ins_mode_in_body t
2594                         return
2595                 # Anything else
2596                 parse_error()
2597                 return
2598
2599         # 8.2.5.4.17 http://www.w3.org/TR/html5/syntax.html#parsing-main-inselectintable
2600         ins_mode_in_select_in_table = (t) ->
2601                 if t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'table' or t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead' or t.name is 'tr' or t.name is 'td' or t.name is 'th')
2602                         parse_error()
2603                         loop
2604                                 el = open_els.shift()
2605                                 if el.name is 'select' and el.namespace is NS_HTML
2606                                         break
2607                         reset_ins_mode()
2608                         process_token t
2609                         return
2610                 if t.type is TYPE_END_TAG and (t.name is 'caption' or t.name is 'table' or t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead' or t.name is 'tr' or t.name is 'td' or t.name is 'th')
2611                         parse_error()
2612                         unless is_in_table_scope t.name, NS_HTML
2613                                 return
2614                         loop
2615                                 el = open_els.shift()
2616                                 if el.name is 'select' and el.namespace is NS_HTML
2617                                         break
2618                         reset_ins_mode()
2619                         process_token t
2620                         return
2621                 # Anything else
2622                 ins_mode_in_select t
2623                 return
2624
2625         # 8.2.5.4.18 http://www.w3.org/TR/html5/syntax.html#parsing-main-intemplate
2626         ins_mode_in_template = (t) ->
2627                 if t.type is TYPE_TEXT or t.type is TYPE_COMMENT or t.type is TYPE_DOCTYPE
2628                         ins_mode_in_body t
2629                         return
2630                 if (t.type is TYPE_START_TAG and (t.name is 'base' or t.name is 'basefont' or t.name is 'bgsound' or t.name is 'link' or t.name is 'meta' or t.name is 'noframes' or t.name is 'script' or t.name is 'style' or t.name is 'template' or t.name is 'title')) or (t.type is TYPE_END_TAG and t.name is 'template')
2631                         ins_mode_in_head t
2632                         return
2633                 if t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'colgroup' or t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead')
2634                         template_ins_modes.shift()
2635                         template_ins_modes.unshift ins_mode_in_table
2636                         ins_mode = ins_mode_in_table
2637                         process_token t
2638                         return
2639                 if t.type is TYPE_START_TAG and t.name is 'col'
2640                         template_ins_modes.shift()
2641                         template_ins_modes.unshift ins_mode_in_column_group
2642                         ins_mode = ins_mode_in_column_group
2643                         process_token t
2644                         return
2645                 if t.type is TYPE_START_TAG and t.name is 'tr'
2646                         template_ins_modes.shift()
2647                         template_ins_modes.unshift ins_mode_in_table_body
2648                         ins_mode = ins_mode_in_table_body
2649                         process_token t
2650                         return
2651                 if t.type is TYPE_START_TAG and (t.name is 'td' or t.name is 'th')
2652                         template_ins_modes.shift()
2653                         template_ins_modes.unshift ins_mode_in_row
2654                         ins_mode = ins_mode_in_row
2655                         process_token t
2656                         return
2657                 if t.type is TYPE_START_TAG
2658                         template_ins_modes.shift()
2659                         template_ins_modes.unshift ins_mode_in_body
2660                         ins_mode = ins_mode_in_body
2661                         process_token t
2662                         return
2663                 if t.type is TYPE_END_TAG
2664                         parse_error()
2665                         return
2666                 if t.type is TYPE_EOF
2667                         unless template_tag_is_open()
2668                                 stop_parsing()
2669                                 return
2670                         parse_error()
2671                         loop
2672                                 el = open_els.shift()
2673                                 if el.name is 'template' and el.namespace is NS_HTML
2674                                         break
2675                         clear_afe_to_marker()
2676                         template_ins_modes.shift()
2677                         reset_ins_mode()
2678                         process_token t
2679
2680         # 8.2.5.4.19 http://www.w3.org/TR/html5/syntax.html#parsing-main-afterbody
2681         ins_mode_after_body = (t) ->
2682                 if is_space_tok t
2683                         ins_mode_in_body t
2684                         return
2685                 if t.type is TYPE_COMMENT
2686                         insert_comment t, [open_els[0], open_els[0].children.length]
2687                         return
2688                 if t.type is TYPE_DOCTYPE
2689                         parse_error()
2690                         return
2691                 if t.type is TYPE_START_TAG and t.name is 'html'
2692                         ins_mode_in_body t
2693                         return
2694                 if t.type is TYPE_END_TAG and t.name is 'html'
2695                         # fixfull fragment case
2696                         ins_mode = ins_mode_after_after_body
2697                         return
2698                 if t.type is TYPE_EOF
2699                         stop_parsing()
2700                         return
2701                 # Anything ELse
2702                 parse_error()
2703                 ins_mode = ins_mode_in_body
2704                 process_token t
2705
2706         # 8.2.5.4.20 http://www.w3.org/TR/html5/syntax.html#parsing-main-inframeset
2707         ins_mode_in_frameset = (t) ->
2708                 if is_space_tok t
2709                         insert_character t
2710                         return
2711                 if t.type is TYPE_COMMENT
2712                         insert_comment t
2713                         return
2714                 if t.type is TYPE_DOCTYPE
2715                         parse_error()
2716                         return
2717                 if t.type is TYPE_START_TAG and t.name is 'html'
2718                         ins_mode_in_body t
2719                         return
2720                 if t.type is TYPE_START_TAG and t.name is 'frameset'
2721                         insert_html_element t
2722                         return
2723                 if t.type is TYPE_END_TAG and t.name is 'frameset'
2724                         if open_els.length is 1
2725                                 parse_error()
2726                                 return # fragment case
2727                         open_els.shift()
2728                         if flag_fragment_parsing is false and open_els[0].name isnt 'frameset'
2729                                 ins_mode = ins_mode_after_frameset
2730                         return
2731                 if t.type is TYPE_START_TAG and t.name is 'frame'
2732                         insert_html_element t
2733                         open_els.shift()
2734                         t.acknowledge_self_closing()
2735                         return
2736                 if t.type is TYPE_START_TAG and t.name is 'noframes'
2737                         ins_mode_in_head t
2738                         return
2739                 if t.type is TYPE_EOF
2740                         if open_els.length isnt 1
2741                                 parse_error()
2742                         stop_parsing()
2743                         return
2744                 # Anything else
2745                 parse_error()
2746                 return
2747
2748         # 8.2.5.4.21 http://www.w3.org/TR/html5/syntax.html#parsing-main-afterframeset
2749         ins_mode_after_frameset = (t) ->
2750                 if is_space_tok t
2751                         insert_character t
2752                         return
2753                 if t.type is TYPE_COMMENT
2754                         insert_comment t
2755                         return
2756                 if t.type is TYPE_DOCTYPE
2757                         parse_error()
2758                         return
2759                 if t.type is TYPE_START_TAG and t.name is 'html'
2760                         ins_mode_in_body t
2761                         return
2762                 if t.type is TYPE_END_TAG and t.name is 'html'
2763                         insert_mode = ins_mode_after_after_frameset
2764                         return
2765                 if t.type is TYPE_START_TAG and t.name is 'noframes'
2766                         ins_mode_in_head t
2767                         return
2768                 if t.type is TYPE_EOF
2769                         stop_parsing()
2770                         return
2771                 # Anything else
2772                 parse_error()
2773                 return
2774
2775         # 8.2.5.4.22 http://www.w3.org/TR/html5/syntax.html#the-after-after-body-insertion-mode
2776         ins_mode_after_after_body = (t) ->
2777                 if t.type is TYPE_COMMENT
2778                         insert_comment t, [doc, doc.children.length]
2779                         return
2780                 if t.type is TYPE_DOCTYPE or is_space_tok(t) or (t.type is TYPE_START_TAG and t.name is 'html')
2781                         ins_mode_in_body t
2782                         return
2783                 if t.type is TYPE_EOF
2784                         stop_parsing()
2785                         return
2786                 # Anything else
2787                 parse_error()
2788                 ins_mode = ins_mode_in_body
2789                 return
2790
2791         # 8.2.5.4.23 http://www.w3.org/TR/html5/syntax.html#the-after-after-frameset-insertion-mode
2792         ins_mode_after_after_frameset = (t) ->
2793                 if t.type is TYPE_COMMENT
2794                         insert_comment t, [doc, doc.children.length]
2795                         return
2796                 if t.type is TYPE_DOCTYPE or is_space_tok(t) or (t.type is TYPE_START_TAG and t.name is 'html')
2797                         ins_mode_in_body t
2798                         return
2799                 if t.type is TYPE_EOF
2800                         stop_parsing()
2801                         return
2802                 if t.type is TYPE_START_TAG and t.name is 'noframes'
2803                         ins_mode_in_head t
2804                         return
2805                 # Anything else
2806                 parse_error()
2807                 return
2808
2809         # 8.2.5.5 http://www.w3.org/TR/html5/syntax.html#parsing-main-inforeign
2810         has_color_face_or_size = (t) ->
2811                 for a in t.attrs_a
2812                         if a[0] is 'color' or a[0] is 'face' or a[0] is 'size'
2813                                 return true
2814                 return false
2815         in_foreign_content_end_script = ->
2816                 open_els.shift()
2817                 # fixfull
2818                 return
2819         in_foreign_content_other_start = (t) ->
2820                 acn = adjusted_current_node()
2821                 if acn.namespace is NS_MATHML
2822                         adjust_mathml_attributes t
2823                 if acn.namespace is NS_SVG and svg_name_fixes[t.name]?
2824                         t.name = svg_name_fixes[t.name]
2825                 if acn.namespace is NS_SVG
2826                         adjust_svg_attributes t
2827                 adjust_foreign_attributes t
2828                 insert_foreign_element t, acn.namespace
2829                 if t.flag 'self-closing'
2830                         if t.name is 'script'
2831                                 t.acknowledge_self_closing()
2832                                 in_foreign_content_end_script()
2833                         else
2834                                 open_els.shift()
2835                                 t.acknowledge_self_closing()
2836                 return
2837         in_foreign_content = (t) ->
2838                 if t.type is TYPE_TEXT and t.text is "\u0000"
2839                         parse_error()
2840                         insert_character new_character_token "\ufffd"
2841                         return
2842                 if is_space_tok t
2843                         insert_character t
2844                         return
2845                 if t.type is TYPE_TEXT
2846                         flag_frameset_ok = false
2847                         insert_character t
2848                         return
2849                 if t.type is TYPE_COMMENT
2850                         insert_comment t
2851                         return
2852                 if t.type is TYPE_DOCTYPE
2853                         parse_error()
2854                         return
2855                 if t.type is TYPE_START_TAG and (t.name is 'b' or t.name is 'big' or t.name is 'blockquote' or t.name is 'body' or t.name is 'br' or t.name is 'center' or t.name is 'code' or t.name is 'dd' or t.name is 'div' or t.name is 'dl' or t.name is 'dt' or t.name is 'em' or t.name is 'embed' or t.name is 'h1' or t.name is 'h2' or t.name is 'h3' or t.name is 'h4' or t.name is 'h5' or t.name is 'h6' or t.name is 'head' or t.name is 'hr' or t.name is 'i' or t.name is 'img' or t.name is 'li' or t.name is 'listing' or t.name is 'main' or t.name is 'meta' or t.name is 'nobr' or t.name is 'ol' or t.name is 'p' or t.name is 'pre' or t.name is 'ruby' or t.name is 's' or t.name is 'small' or t.name is 'span' or t.name is 'strong' or t.name is 'strike' or t.name is 'sub' or t.name is 'sup' or t.name is 'table' or t.name is 'tt' or t.name is 'u' or t.name is 'ul' or t.name is 'var' or (t.name is 'font' and has_color_face_or_size(t)))
2856                         parse_error()
2857                         if flag_fragment_parsing
2858                                 in_foreign_content_other_start t
2859                                 return
2860                         loop # is this safe?
2861                                 open_els.shift()
2862                                 cn = open_els[0]
2863                                 if is_mathml_text_integration_point(cn) or is_html_integration(cn) or cn.namespace is NS_HTML
2864                                         break
2865                         process_token t
2866                         return
2867                 if t.type is TYPE_START_TAG
2868                         in_foreign_content_other_start t
2869                         return
2870                 if t.type is TYPE_END_TAG and t.name is 'script' and open_els[0].name is 'script' and open_els[0].namespace is NS_SVG
2871                         in_foreign_content_end_script()
2872                         return
2873                 if t.type is TYPE_END_TAG
2874                         if open_els[0].name.toLowerCase() isnt t.name
2875                                 parse_error()
2876                         for node in open_els
2877                                 if node is open_els[open_els.length - 1]
2878                                         return
2879                                 if node.name.toLowerCase() is t.name
2880                                         loop
2881                                                 el = open_els.shift()
2882                                                 if el is node
2883                                                         return
2884                                 if node.namespace is NS_HTML
2885                                         break
2886                         ins_mode t # explicitly call HTML insertion mode
2887
2888
2889         # 8.2.4.1 http://www.w3.org/TR/html5/syntax.html#data-state
2890         tok_state_data = ->
2891                 switch c = txt.charAt(cur++)
2892                         when '&'
2893                                 return new_text_node parse_character_reference()
2894                         when '<'
2895                                 tok_state = tok_state_tag_open
2896                         when "\u0000"
2897                                 parse_error()
2898                                 return new_text_node c
2899                         when '' # EOF
2900                                 return new_eof_token()
2901                         else
2902                                 return new_text_node c
2903                 return null
2904
2905         # 8.2.4.2 http://www.w3.org/TR/html5/syntax.html#character-reference-in-data-state
2906         # not needed: tok_state_character_reference_in_data = ->
2907         # just call parse_character_reference()
2908
2909         # 8.2.4.3 http://www.w3.org/TR/html5/syntax.html#rcdata-state
2910         tok_state_rcdata = ->
2911                 switch c = txt.charAt(cur++)
2912                         when '&'
2913                                 return new_text_node parse_character_reference()
2914                         when '<'
2915                                 tok_state = tok_state_rcdata_less_than_sign
2916                         when "\u0000"
2917                                 parse_error()
2918                                 return new_character_token "\ufffd"
2919                         when '' # EOF
2920                                 return new_eof_token()
2921                         else
2922                                 return new_character_token c
2923                 return null
2924
2925         # 8.2.4.4 http://www.w3.org/TR/html5/syntax.html#character-reference-in-rcdata-state
2926         # not needed: tok_state_character_reference_in_rcdata = ->
2927         # just call parse_character_reference()
2928
2929         # 8.2.4.5 http://www.w3.org/TR/html5/syntax.html#rawtext-state
2930         tok_state_rawtext = ->
2931                 switch c = txt.charAt(cur++)
2932                         when '<'
2933                                 tok_state = tok_state_rawtext_less_than_sign
2934                         when "\u0000"
2935                                 parse_error()
2936                                 return new_character_token "\ufffd"
2937                         when '' # EOF
2938                                 return new_eof_token()
2939                         else
2940                                 return new_character_token c
2941                 return null
2942
2943         # 8.2.4.6 http://www.w3.org/TR/html5/syntax.html#script-data-state
2944         tok_state_script_data = ->
2945                 switch c = txt.charAt(cur++)
2946                         when '<'
2947                                 tok_state = tok_state_script_data_less_than_sign
2948                         when "\u0000"
2949                                 parse_error()
2950                                 return new_character_token "\ufffd"
2951                         when '' # EOF
2952                                 return new_eof_token()
2953                         else
2954                                 return new_character_token c
2955                 return null
2956
2957         # 8.2.4.7 http://www.w3.org/TR/html5/syntax.html#plaintext-state
2958         tok_state_plaintext = ->
2959                 switch c = txt.charAt(cur++)
2960                         when "\u0000"
2961                                 parse_error()
2962                                 return new_character_token "\ufffd"
2963                         when '' # EOF
2964                                 return new_eof_token()
2965                         else
2966                                 return new_character_token c
2967                 return null
2968
2969
2970         # 8.2.4.8 http://www.w3.org/TR/html5/syntax.html#tag-open-state
2971         tok_state_tag_open = ->
2972                 switch c = txt.charAt(cur++)
2973                         when '!'
2974                                 tok_state = tok_state_markup_declaration_open
2975                         when '/'
2976                                 tok_state = tok_state_end_tag_open
2977                         when '?'
2978                                 parse_error()
2979                                 tok_cur_tag = new_comment_token '?'
2980                                 tok_state = tok_state_bogus_comment
2981                         else
2982                                 if is_lc_alpha(c)
2983                                         tok_cur_tag = new_open_tag c
2984                                         tok_state = tok_state_tag_name
2985                                 else if is_uc_alpha(c)
2986                                         tok_cur_tag = new_open_tag c.toLowerCase()
2987                                         tok_state = tok_state_tag_name
2988                                 else
2989                                         parse_error()
2990                                         tok_state = tok_state_data
2991                                         cur -= 1 # we didn't parse/handle the char after <
2992                                         return new_text_node '<'
2993                 return null
2994
2995         # 8.2.4.9 http://www.w3.org/TR/html5/syntax.html#end-tag-open-state
2996         tok_state_end_tag_open = ->
2997                 switch c = txt.charAt(cur++)
2998                         when '>'
2999                                 parse_error()
3000                                 tok_state = tok_state_data
3001                         when '' # EOF
3002                                 parse_error()
3003                                 tok_state = tok_state_data
3004                                 return new_text_node '</'
3005                         else
3006                                 if is_uc_alpha(c)
3007                                         tok_cur_tag = new_end_tag c.toLowerCase()
3008                                         tok_state = tok_state_tag_name
3009                                 else if is_lc_alpha(c)
3010                                         tok_cur_tag = new_end_tag c
3011                                         tok_state = tok_state_tag_name
3012                                 else
3013                                         parse_error()
3014                                         tok_cur_tag = new_comment_token '/'
3015                                         tok_state = tok_state_bogus_comment
3016                 return null
3017
3018         # 8.2.4.10 http://www.w3.org/TR/html5/syntax.html#tag-name-state
3019         tok_state_tag_name = ->
3020                 switch c = txt.charAt(cur++)
3021                         when "\t", "\n", "\u000c", ' '
3022                                 tok_state = tok_state_before_attribute_name
3023                         when '/'
3024                                 tok_state = tok_state_self_closing_start_tag
3025                         when '>'
3026                                 tok_state = tok_state_data
3027                                 tmp = tok_cur_tag
3028                                 tok_cur_tag = null
3029                                 return tmp
3030                         when "\u0000"
3031                                 parse_error()
3032                                 tok_cur_tag.name += "\ufffd"
3033                         when '' # EOF
3034                                 parse_error()
3035                                 tok_state = tok_state_data
3036                         else
3037                                 if is_uc_alpha(c)
3038                                         tok_cur_tag.name += c.toLowerCase()
3039                                 else
3040                                         tok_cur_tag.name += c
3041                 return null
3042
3043         # 8.2.4.11 http://www.w3.org/TR/html5/syntax.html#rcdata-less-than-sign-state
3044         tok_state_rcdata_less_than_sign = ->
3045                 c = txt.charAt(cur++)
3046                 if c is '/'
3047                         temporary_buffer = ''
3048                         tok_state = tok_state_rcdata_end_tag_open
3049                         return null
3050                 # Anything else
3051                 tok_state = tok_state_rcdata
3052                 cur -= 1 # reconsume the input character
3053                 return new_character_token '<'
3054
3055         # 8.2.4.12 http://www.w3.org/TR/html5/syntax.html#rcdata-end-tag-open-state
3056         tok_state_rcdata_end_tag_open = ->
3057                 c = txt.charAt(cur++)
3058                 if is_uc_alpha(c)
3059                         tok_cur_tag = new_end_tag c.toLowerCase()
3060                         temporary_buffer += c
3061                         tok_state = tok_state_rcdata_end_tag_name
3062                         return null
3063                 if is_lc_alpha(c)
3064                         tok_cur_tag = new_end_tag c
3065                         temporary_buffer += c
3066                         tok_state = tok_state_rcdata_end_tag_name
3067                         return null
3068                 # Anything else
3069                 tok_state = tok_state_rcdata
3070                 cur -= 1 # reconsume the input character
3071                 return new_character_token "</" # fixfull separate these
3072
3073         # http://www.w3.org/TR/html5/syntax.html#appropriate-end-tag-token
3074         is_appropriate_end_tag = (t) ->
3075                 # spec says to check against "the tag name of the last start tag to
3076                 # have been emitted from this tokenizer", but this is only called from
3077                 # the various "raw" states, so it's hopefully ok to assume that
3078                 # open_els[0].name will work instead TODO: verify this after the script
3079                 # data states are implemented
3080                 debug_log "#{t.type}, #{t.name} open_els: #{serialize_els open_els, true, true}"
3081                 return t.type is TYPE_END_TAG and t.name is open_els[0].name
3082
3083         # 8.2.4.13 http://www.w3.org/TR/html5/syntax.html#rcdata-end-tag-name-state
3084         tok_state_rcdata_end_tag_name = ->
3085                 c = txt.charAt(cur++)
3086                 if c is "\t" or c is "\n" or c is "\u000c" or c is ' '
3087                         if is_appropriate_end_tag tok_cur_tag
3088                                 tok_state = tok_state_before_attribute_name
3089                                 return
3090                         # else fall through to "Anything else"
3091                 if c is '/'
3092                         if is_appropriate_end_tag tok_cur_tag
3093                                 tok_state = tok_state_self_closing_start_tag # FIXME spec typo?
3094                                 return
3095                         # else fall through to "Anything else"
3096                 if c is '>'
3097                         if is_appropriate_end_tag tok_cur_tag
3098                                 tok_state = tok_state_data
3099                                 return tok_cur_tag
3100                         # else fall through to "Anything else"
3101                 if is_uc_alpha(c)
3102                         tok_cur_tag.name += c.toLowerCase()
3103                         temporary_buffer += c
3104                         return null
3105                 if is_lc_alpha(c)
3106                         tok_cur_tag.name += c
3107                         temporary_buffer += c
3108                         return null
3109                 # Anything else
3110                 tok_state = tok_state_rcdata
3111                 cur -= 1 # reconsume the input character
3112                 return new_character_token '</' + temporary_buffer # fixfull separate these
3113
3114         # 8.2.4.14 http://www.w3.org/TR/html5/syntax.html#rawtext-less-than-sign-state
3115         tok_state_rawtext_less_than_sign = ->
3116                 c = txt.charAt(cur++)
3117                 if c is '/'
3118                         temporary_buffer = ''
3119                         tok_state = tok_state_rawtext_end_tag_open
3120                         return null
3121                 # Anything else
3122                 tok_state = tok_state_rawtext
3123                 cur -= 1 # reconsume the input character
3124                 return new_character_token '<'
3125
3126         # 8.2.4.15 http://www.w3.org/TR/html5/syntax.html#rawtext-end-tag-open-state
3127         tok_state_rawtext_end_tag_open = ->
3128                 c = txt.charAt(cur++)
3129                 if is_uc_alpha(c)
3130                         tok_cur_tag = new_end_tag c.toLowerCase()
3131                         temporary_buffer += c
3132                         tok_state = tok_state_rawtext_end_tag_name
3133                         return null
3134                 if is_lc_alpha(c)
3135                         tok_cur_tag = new_end_tag c
3136                         temporary_buffer += c
3137                         tok_state = tok_state_rawtext_end_tag_name
3138                         return null
3139                 # Anything else
3140                 tok_state = tok_state_rawtext
3141                 cur -= 1 # reconsume the input character
3142                 return new_character_token "</" # fixfull separate these
3143
3144         # 8.2.4.16 http://www.w3.org/TR/html5/syntax.html#rawtext-end-tag-name-state
3145         tok_state_rawtext_end_tag_name = ->
3146                 c = txt.charAt(cur++)
3147                 if c is "\t" or c is "\n" or c is "\u000c" or c is ' '
3148                         if is_appropriate_end_tag tok_cur_tag
3149                                 tok_state = tok_state_before_attribute_name
3150                                 return
3151                         # else fall through to "Anything else"
3152                 if c is '/'
3153                         if is_appropriate_end_tag tok_cur_tag
3154                                 tok_state = tok_state_self_closing_start_tag
3155                                 return
3156                         # else fall through to "Anything else"
3157                 if c is '>'
3158                         if is_appropriate_end_tag tok_cur_tag
3159                                 tok_state = tok_state_data
3160                                 return tok_cur_tag
3161                         # else fall through to "Anything else"
3162                 if is_uc_alpha(c)
3163                         tok_cur_tag.name += c.toLowerCase()
3164                         temporary_buffer += c
3165                         return null
3166                 if is_lc_alpha(c)
3167                         tok_cur_tag.name += c
3168                         temporary_buffer += c
3169                         return null
3170                 # Anything else
3171                 tok_state = tok_state_rawtext
3172                 cur -= 1 # reconsume the input character
3173                 return new_character_token '</' + temporary_buffer # fixfull separate these
3174
3175         # 8.2.4.17 http://www.w3.org/TR/html5/syntax.html#script-data-less-than-sign-state
3176         tok_state_script_data_less_than_sign = ->
3177                 c = txt.charAt(cur++)
3178                 if c is '/'
3179                         temporary_buffer = ''
3180                         tok_state = tok_state_script_data_end_tag_open
3181                         return
3182                 if c is '!'
3183                         tok_state = tok_state_script_data_escape_start
3184                         return new_character_token '<!' # fixfull split
3185                 # Anything else
3186                 tok_state = tok_state_script_data
3187                 cur -= 1 # Reconsume
3188                 return new_character_token '<'
3189
3190         # 8.2.4.18 http://www.w3.org/TR/html5/syntax.html#script-data-end-tag-open-state
3191         tok_state_script_data_end_tag_open = ->
3192                 c = txt.charAt(cur++)
3193                 if is_uc_alpha(c)
3194                         tok_cur_tag = new_end_tag c.toLowerCase()
3195                         temporary_buffer += c
3196                         tok_state = tok_state_script_data_end_tag_name
3197                         return
3198                 if is_lc_alpha(c)
3199                         tok_cur_tag = new_end_tag c
3200                         temporary_buffer += c
3201                         tok_state = tok_state_script_data_end_tag_name
3202                         return
3203                 # Anything else
3204                 tok_state = tok_state_script_data
3205                 cur -= 1 # Reconsume
3206                 return new_character_token '</'
3207
3208         # 8.2.4.19 http://www.w3.org/TR/html5/syntax.html#script-data-end-tag-open-state
3209         tok_state_script_data_end_tag_name = ->
3210                 c = txt.charAt(cur++)
3211                 if c is "\t" or c is "\n" or c is "\u000c" or c is ' '
3212                         if is_appropriate_end_tag tok_cur_tag
3213                                 tok_state = tok_state_before_attribute_name
3214                                 return
3215                         # fall through
3216                 if c is '/'
3217                         if is_appropriate_end_tag tok_cur_tag
3218                                 tok_state = tok_state_self_closing_start_tag
3219                                 return
3220                         # fall through
3221                 if c is '>'
3222                         if is_appropriate_end_tag tok_cur_tag
3223                                 tok_state = tok_state_data
3224                                 return tok_cur_tag
3225                         # fall through
3226                 if is_uc_alpha(c)
3227                         tok_cur_tag.name += c.toLowerCase()
3228                         temporary_buffer += c
3229                         return
3230                 if is_lc_alpha(c)
3231                         tok_cur_tag.name += c
3232                         temporary_buffer += c
3233                         return
3234                 # Anything else
3235                 tok_state = tok_state_script_data
3236                 cur -= 1 # Reconsume
3237                 return new_character_token "</#{temporary_buffer}" # fixfull split
3238
3239         # 8.2.4.20 http://www.w3.org/TR/html5/syntax.html#script-data-escape-start-state
3240         tok_state_script_data_escape_start = ->
3241                 c = txt.charAt(cur++)
3242                 if c is '-'
3243                         tok_state = tok_state_script_data_escape_start_dash
3244                         return new_character_token '-'
3245                 # Anything else
3246                 tok_state = tok_state_script_data
3247                 cur -= 1 # Reconsume
3248                 return
3249
3250         # 8.2.4.21 http://www.w3.org/TR/html5/syntax.html#script-data-escape-start-dash-state
3251         tok_state_script_data_escape_start_dash = ->
3252                 c = txt.charAt(cur++)
3253                 if c is '-'
3254                         tok_state = tok_state_script_data_escaped_dash_dash
3255                         return new_character_token '-'
3256                 # Anything else
3257                 tok_state = tok_state_script_data
3258                 cur -= 1 # Reconsume
3259                 return
3260
3261         # 8.2.4.22 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-state
3262         tok_state_script_data_escaped = ->
3263                 c = txt.charAt(cur++)
3264                 if c is '-'
3265                         tok_state = tok_state_script_data_escaped_dash
3266                         return new_character_token '-'
3267                 if c is '<'
3268                         tok_state = tok_state_script_data_escaped_less_than_sign
3269                         return
3270                 if c is "\u0000"
3271                         parse_error()
3272                         return new_character_token "\ufffd"
3273                 if c is '' # EOF
3274                         tok_state = tok_state_data
3275                         parse_error()
3276                         cur -= 1 # Reconsume
3277                         return
3278                 # Anything else
3279                 return new_character_token c
3280
3281         # 8.2.4.23 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-dash-state
3282         tok_state_script_data_escaped_dash = ->
3283                 c = txt.charAt(cur++)
3284                 if c is '-'
3285                         tok_state = tok_state_script_data_escaped_dash_dash
3286                         return new_character_token '-'
3287                 if c is '<'
3288                         tok_state = tok_state_script_data_escaped_less_than_sign
3289                         return
3290                 if c is "\u0000"
3291                         parse_error()
3292                         tok_state = tok_state_script_data_escaped
3293                         return new_character_token "\ufffd"
3294                 if c is '' # EOF
3295                         tok_state = tok_state_data
3296                         parse_error()
3297                         cur -= 1 # Reconsume
3298                         return
3299                 # Anything else
3300                 tok_state = tok_state_script_data_escaped
3301                 return new_character_token c
3302
3303         # 8.2.4.24 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-dash-dash-state
3304         tok_state_script_data_escaped_dash_dash = ->
3305                 c = txt.charAt(cur++)
3306                 if c is '-'
3307                         return new_character_token '-'
3308                 if c is '<'
3309                         tok_state = tok_state_script_data_escaped_less_than_sign
3310                         return
3311                 if c is '>'
3312                         tok_state = tok_state_script_data
3313                         return new_character_token '>'
3314                 if c is "\u0000"
3315                         parse_error()
3316                         tok_state = tok_state_script_data_escaped
3317                         return new_character_token "\ufffd"
3318                 if c is '' # EOF
3319                         parse_error()
3320                         tok_state = tok_state_data
3321                         cur -= 1 # Reconsume
3322                         return
3323                 # Anything else
3324                 tok_state = tok_state_script_data_escaped
3325                 return new_character_token c
3326
3327         # 8.2.4.25 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-less-than-sign-state
3328         tok_state_script_data_escaped_less_than_sign = ->
3329                 c = txt.charAt(cur++)
3330                 if c is '/'
3331                         temporary_buffer = ''
3332                         tok_state = tok_state_script_data_escaped_end_tag_open
3333                         return
3334                 if is_uc_alpha(c)
3335                         temporary_buffer = c.toLowerCase() # yes, really
3336                         tok_state = tok_state_script_data_double_escape_start
3337                         return new_character_token "<#{c}" # fixfull split
3338                 if is_lc_alpha(c)
3339                         temporary_buffer = c
3340                         tok_state = tok_state_script_data_double_escape_start
3341                         return new_character_token "<#{c}" # fixfull split
3342                 # Anything else
3343                 tok_state = tok_state_script_data_escaped
3344                 cur -= 1 # Reconsume
3345                 return new_character_token c
3346
3347         # 8.2.4.26 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-end-tag-open-state
3348         tok_state_script_data_escaped_end_tag_open = ->
3349                 c = txt.charAt(cur++)
3350                 if is_uc_alpha(c)
3351                         tok_cur_tag = new_end_tag c.toLowerCase()
3352                         temporary_buffer += c
3353                         tok_state = tok_state_script_data_escaped_end_tag_name
3354                         return
3355                 if is_lc_alpha(c)
3356                         tok_cur_tag = new_end_tag c
3357                         temporary_buffer += c
3358                         tok_state = tok_state_script_data_escaped_end_tag_name
3359                         return
3360                 # Anything else
3361                 tok_state = tok_state_script_data_escaped
3362                 cur -= 1 # Reconsume
3363                 return new_character_token '</' # fixfull split
3364
3365         # 8.2.4.27 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-end-tag-name-state
3366         tok_state_script_data_escaped_end_tag_name = ->
3367                 c = txt.charAt(cur++)
3368                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
3369                         if is_appropriate_end_tag tok_cur_tag
3370                                 tok_state = tok_state_before_attribute_name
3371                                 return
3372                         # fall through
3373                 if c is '/'
3374                         if is_appropriate_end_tag tok_cur_tag
3375                                 tok_state = tok_state_self_closing_start_tag
3376                                 return
3377                         # fall through
3378                 if c is '>'
3379                         if is_appropriate_end_tag tok_cur_tag
3380                                 tok_state = tok_state_data
3381                                 return tok_cur_tag
3382                         # fall through
3383                 if is_uc_alpha(c)
3384                         tok_cur_tag.name += c.toLowerCase()
3385                         temporary_buffer += c.toLowerCase()
3386                         return
3387                 if is_lc_alpha(c)
3388                         tok_cur_tag.name += c
3389                         temporary_buffer += c.toLowerCase()
3390                         return
3391                 # Anything else
3392                 tok_state = tok_state_script_data_escaped
3393                 cur -= 1 # Reconsume
3394                 return new_character_token "</#{temporary_buffer}" # fixfull split
3395
3396         # 8.2.4.28 http://www.w3.org/TR/html5/syntax.html#script-data-double-escape-start-state
3397         tok_state_script_data_double_escape_start = ->
3398                 c = txt.charAt(cur++)
3399                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' ' or c is '/' or c is '>'
3400                         if temporary_buffer is 'script'
3401                                 tok_state = tok_state_script_data_double_escaped
3402                         else
3403                                 tok_state = tok_state_script_data_escaped
3404                         return new_character_token c
3405                 if is_uc_alpha(c)
3406                         temporary_buffer += c.toLowerCase() # yes, really lowercase
3407                         return new_character_token c
3408                 if is_lc_alpha(c)
3409                         temporary_buffer += c
3410                         return new_character_token c
3411                 # Anything else
3412                 tok_state = tok_state_script_data_escaped
3413                 cur -= 1 # Reconsume
3414                 return
3415
3416         # 8.2.4.29 http://www.w3.org/TR/html5/syntax.html#script-data-double-escaped-state
3417         tok_state_script_data_double_escaped = ->
3418                 c = txt.charAt(cur++)
3419                 if c is '-'
3420                         tok_state = tok_state_script_data_double_escaped_dash
3421                         return new_character_token '-'
3422                 if c is '<'
3423                         tok_state = tok_state_script_data_double_escaped_less_than_sign
3424                         return new_character_token '<'
3425                 if c is "\u0000"
3426                         parse_error()
3427                         return new_character_token "\ufffd"
3428                 if c is '' # EOF
3429                         parse_error()
3430                         tok_state = tok_state_data
3431                         cur -= 1 # Reconsume
3432                         return
3433                 # Anything else
3434                 return new_character_token c
3435
3436         # 8.2.4.30 http://www.w3.org/TR/html5/syntax.html#script-data-double-escaped-dash-state
3437         tok_state_script_data_double_escaped_dash = ->
3438                 c = txt.charAt(cur++)
3439                 if c is '-'
3440                         tok_state = tok_state_script_data_double_escaped_dash_dash
3441                         return new_character_token '-'
3442                 if c is '<'
3443                         tok_state = tok_state_script_data_double_escaped_less_than_sign
3444                         return new_character_token '<'
3445                 if c is "\u0000"
3446                         parse_error()
3447                         tok_state = tok_state_script_data_double_escaped
3448                         return new_character_token "\ufffd"
3449                 if c is '' # EOF
3450                         parse_error()
3451                         tok_state = tok_state_data
3452                         cur -= 1 # Reconsume
3453                         return
3454                 # Anything else
3455                 tok_state = tok_state_script_data_double_escaped
3456                 return new_character_token c
3457
3458         # 8.2.4.31 http://www.w3.org/TR/html5/syntax.html#script-data-double-escaped-dash-dash-state
3459         tok_state_script_data_double_escaped_dash_dash = ->
3460                 c = txt.charAt(cur++)
3461                 if c is '-'
3462                         return new_character_token '-'
3463                 if c is '<'
3464                         tok_state = tok_state_script_data_double_escaped_less_than_sign
3465                         return new_character_token '<'
3466                 if c is '>'
3467                         tok_state = tok_state_script_data
3468                         return new_character_token '>'
3469                 if c is "\u0000"
3470                         parse_error()
3471                         tok_state = tok_state_script_data_double_escaped
3472                         return new_character_token "\ufffd"
3473                 if c is '' # EOF
3474                         parse_error()
3475                         tok_state = tok_state_data
3476                         cur -= 1 # Reconsume
3477                         return
3478                 # Anything else
3479                 tok_state = tok_state_script_data_double_escaped
3480                 return new_character_token c
3481
3482         # 8.2.4.32 http://www.w3.org/TR/html5/syntax.html#script-data-double-escaped-less-than-sign-state
3483         tok_state_script_data_double_escaped_less_than_sign = ->
3484                 c = txt.charAt(cur++)
3485                 if c is '/'
3486                         temporary_buffer = ''
3487                         tok_state = tok_state_script_data_double_escape_end
3488                         return new_character_token '/'
3489                 # Anything else
3490                 tok_state = tok_state_script_data_double_escaped
3491                 cur -= 1 # Reconsume
3492                 return
3493
3494         # 8.2.4.33 http://www.w3.org/TR/html5/syntax.html#script-data-double-escape-end-state
3495         tok_state_script_data_double_escape_end = ->
3496                 c = txt.charAt(cur++)
3497                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' ' or c is '/' or c is '>'
3498                         if temporary_buffer is 'script'
3499                                 tok_state = tok_state_script_data_escaped
3500                         else
3501                                 tok_state = tok_state_script_data_double_escaped
3502                         return new_character_token c
3503                 if is_uc_alpha(c)
3504                         temporary_buffer += c.toLowerCase() # yes, really lowercase
3505                         return new_character_token c
3506                 if is_lc_alpha(c)
3507                         temporary_buffer += c
3508                         return new_character_token c
3509                 # Anything else
3510                 tok_state = tok_state_script_data_double_escaped
3511                 cur -= 1 # Reconsume
3512                 return
3513
3514         # 8.2.4.34 http://www.w3.org/TR/html5/syntax.html#before-attribute-name-state
3515         tok_state_before_attribute_name = ->
3516                 attr_name = null
3517                 switch c = txt.charAt(cur++)
3518                         when "\t", "\n", "\u000c", ' '
3519                                 return null
3520                         when '/'
3521                                 tok_state = tok_state_self_closing_start_tag
3522                                 return null
3523                         when '>'
3524                                 tok_state = tok_state_data
3525                                 tmp = tok_cur_tag
3526                                 tok_cur_tag = null
3527                                 return tmp
3528                         when "\u0000"
3529                                 parse_error()
3530                                 attr_name = "\ufffd"
3531                         when '"', "'", '<', '='
3532                                 parse_error()
3533                                 attr_name = c
3534                         when '' # EOF
3535                                 parse_error()
3536                                 tok_state = tok_state_data
3537                         else
3538                                 if is_uc_alpha(c)
3539                                         attr_name = c.toLowerCase()
3540                                 else
3541                                         attr_name = c
3542                 if attr_name?
3543                         tok_cur_tag.attrs_a.unshift [attr_name, '']
3544                         tok_state = tok_state_attribute_name
3545                 return null
3546
3547         # 8.2.4.35 http://www.w3.org/TR/html5/syntax.html#attribute-name-state
3548         tok_state_attribute_name = ->
3549                 switch c = txt.charAt(cur++)
3550                         when "\t", "\n", "\u000c", ' '
3551                                 tok_state = tok_state_after_attribute_name
3552                         when '/'
3553                                 tok_state = tok_state_self_closing_start_tag
3554                         when '='
3555                                 tok_state = tok_state_before_attribute_value
3556                         when '>'
3557                                 tok_state = tok_state_data
3558                                 tmp = tok_cur_tag
3559                                 tok_cur_tag = null
3560                                 return tmp
3561                         when "\u0000"
3562                                 parse_error()
3563                                 tok_cur_tag.attrs_a[0][0] += "\ufffd"
3564                         when '"', "'", '<'
3565                                 parse_error()
3566                                 tok_cur_tag.attrs_a[0][0] += c
3567                         when '' # EOF
3568                                 parse_error()
3569                                 tok_state = tok_state_data
3570                         else
3571                                 if is_uc_alpha(c)
3572                                         tok_cur_tag.attrs_a[0][0] += c.toLowerCase()
3573                                 else
3574                                         tok_cur_tag.attrs_a[0][0] += c
3575                 return null
3576
3577         # 8.2.4.36 http://www.w3.org/TR/html5/syntax.html#after-attribute-name-state
3578         tok_state_after_attribute_name = ->
3579                 c = txt.charAt(cur++)
3580                 if c is "\t" or c is "\n" or c is "\u000c" or c is ' '
3581                         return
3582                 if c is '/'
3583                         tok_state = tok_state_self_closing_start_tag
3584                         return
3585                 if c is '='
3586                         tok_state = tok_state_before_attribute_value
3587                         return
3588                 if c is '>'
3589                         tok_state = tok_state_data
3590                         return
3591                 if is_uc_alpha(c)
3592                         tok_cur_tag.attrs_a.unshift [c.toLowerCase(), '']
3593                         tok_state = tok_state_attribute_name
3594                         return
3595                 if c is "\u0000"
3596                         parse_error()
3597                         tok_cur_tag.attrs_a.unshift ["\ufffd", '']
3598                         tok_state = tok_state_attribute_name
3599                         return
3600                 if c is '' # EOF
3601                         parse_error()
3602                         tok_state = tok_state_data
3603                         cur -= 1 # reconsume
3604                         return
3605                 if c is '"' or c is "'" or c is '<'
3606                         parse_error()
3607                         # fall through to Anything else
3608                 # Anything else
3609                 tok_cur_tag.attrs_a.unshift [c, '']
3610                 tok_state = tok_state_attribute_name
3611
3612         # 8.2.4.37 http://www.w3.org/TR/html5/syntax.html#before-attribute-value-state
3613         tok_state_before_attribute_value = ->
3614                 switch c = txt.charAt(cur++)
3615                         when "\t", "\n", "\u000c", ' '
3616                                 return null
3617                         when '"'
3618                                 tok_state = tok_state_attribute_value_double_quoted
3619                         when '&'
3620                                 tok_state = tok_state_attribute_value_unquoted
3621                                 cur -= 1
3622                         when "'"
3623                                 tok_state = tok_state_attribute_value_single_quoted
3624                         when "\u0000"
3625                                 # Parse error
3626                                 tok_cur_tag.attrs_a[0][1] += "\ufffd"
3627                                 tok_state = tok_state_attribute_value_unquoted
3628                         when '>'
3629                                 # Parse error
3630                                 tok_state = tok_state_data
3631                                 tmp = tok_cur_tag
3632                                 tok_cur_tag = null
3633                                 return tmp
3634                         when '' # EOF
3635                                 parse_error()
3636                                 tok_state = tok_state_data
3637                         else
3638                                 tok_cur_tag.attrs_a[0][1] += c
3639                                 tok_state = tok_state_attribute_value_unquoted
3640                 return null
3641
3642         # 8.2.4.38 http://www.w3.org/TR/html5/syntax.html#attribute-value-(double-quoted)-state
3643         tok_state_attribute_value_double_quoted = ->
3644                 switch c = txt.charAt(cur++)
3645                         when '"'
3646                                 tok_state = tok_state_after_attribute_value_quoted
3647                         when '&'
3648                                 tok_cur_tag.attrs_a[0][1] += parse_character_reference '"', true
3649                         when "\u0000"
3650                                 # Parse error
3651                                 tok_cur_tag.attrs_a[0][1] += "\ufffd"
3652                         when '' # EOF
3653                                 parse_error()
3654                                 tok_state = tok_state_data
3655                         else
3656                                 tok_cur_tag.attrs_a[0][1] += c
3657                 return null
3658
3659         # 8.2.4.39 http://www.w3.org/TR/html5/syntax.html#attribute-value-(single-quoted)-state
3660         tok_state_attribute_value_single_quoted = ->
3661                 switch c = txt.charAt(cur++)
3662                         when "'"
3663                                 tok_state = tok_state_after_attribute_value_quoted
3664                         when '&'
3665                                 tok_cur_tag.attrs_a[0][1] += parse_character_reference "'", true
3666                         when "\u0000"
3667                                 # Parse error
3668                                 tok_cur_tag.attrs_a[0][1] += "\ufffd"
3669                         when '' # EOF
3670                                 parse_error()
3671                                 tok_state = tok_state_data
3672                         else
3673                                 tok_cur_tag.attrs_a[0][1] += c
3674                 return null
3675
3676         # 8.2.4.40 http://www.w3.org/TR/html5/syntax.html#attribute-value-(unquoted)-state
3677         tok_state_attribute_value_unquoted = ->
3678                 switch c = txt.charAt(cur++)
3679                         when "\t", "\n", "\u000c", ' '
3680                                 tok_state = tok_state_before_attribute_name
3681                         when '&'
3682                                 tok_cur_tag.attrs_a[0][1] += parse_character_reference '>', true
3683                         when '>'
3684                                 tok_state = tok_state_data
3685                                 tmp = tok_cur_tag
3686                                 tok_cur_tag = null
3687                                 return tmp
3688                         when "\u0000"
3689                                 tok_cur_tag.attrs_a[0][1] += "\ufffd"
3690                         when '' # EOF
3691                                 parse_error()
3692                                 tok_state = tok_state_data
3693                         else
3694                                 # Parse Error if ', <, = or ` (backtick)
3695                                 tok_cur_tag.attrs_a[0][1] += c
3696                 return null
3697
3698         # 8.2.4.42 http://www.w3.org/TR/html5/syntax.html#after-attribute-value-(quoted)-state
3699         tok_state_after_attribute_value_quoted = ->
3700                 switch c = txt.charAt(cur++)
3701                         when "\t", "\n", "\u000c", ' '
3702                                 tok_state = tok_state_before_attribute_name
3703                         when '/'
3704                                 tok_state = tok_state_self_closing_start_tag
3705                         when '>'
3706                                 tok_state = tok_state_data
3707                                 tmp = tok_cur_tag
3708                                 tok_cur_tag = null
3709                                 return tmp
3710                         when '' # EOF
3711                                 parse_error()
3712                                 tok_state = tok_state_data
3713                         else
3714                                 # Parse Error
3715                                 tok_state = tok_state_before_attribute_name
3716                                 cur -= 1 # we didn't handle that char
3717                 return null
3718
3719         # 8.2.4.43 http://www.w3.org/TR/html5/syntax.html#self-closing-start-tag-state
3720         tok_state_self_closing_start_tag = ->
3721                 c = txt.charAt(cur++)
3722                 if c is '>'
3723                         tok_cur_tag.flag 'self-closing'
3724                         tok_state = tok_state_data
3725                         return tok_cur_tag
3726                 if c is ''
3727                         parse_error()
3728                         tok_state = tok_state_data
3729                         cur -= 1 # Reconsume
3730                         return
3731                 # Anything else
3732                 parse_error()
3733                 tok_state = tok_state_before_attribute_name
3734                 cur -= 1 # Reconsume
3735                 return
3736
3737         # 8.2.4.44 http://www.w3.org/TR/html5/syntax.html#bogus-comment-state
3738         # WARNING: put a comment token in tok_cur_tag before setting this state
3739         tok_state_bogus_comment = ->
3740                 next_gt = txt.indexOf '>', cur
3741                 if next_gt is -1
3742                         val = txt.substr cur
3743                         cur = txt.length
3744                 else
3745                         val = txt.substr cur, (next_gt - cur)
3746                         cur = next_gt + 1
3747                 val = val.replace "\u0000", "\ufffd"
3748                 tok_cur_tag.text += val
3749                 tok_state = tok_state_data
3750                 return tok_cur_tag
3751
3752         # 8.2.4.45 http://www.w3.org/TR/html5/syntax.html#markup-declaration-open-state
3753         tok_state_markup_declaration_open = ->
3754                 if txt.substr(cur, 2) is '--'
3755                         cur += 2
3756                         tok_cur_tag = new_comment_token ''
3757                         tok_state = tok_state_comment_start
3758                         return
3759                 if txt.substr(cur, 7).toLowerCase() is 'doctype'
3760                         cur += 7
3761                         tok_state = tok_state_doctype
3762                         return
3763                 acn = adjusted_current_node()
3764                 if acn and acn.namespace isnt NS_HTML and txt.substr(cur, 7) is '[CDATA['
3765                         cur += 7
3766                         tok_state = tok_state_cdata_section
3767                         return
3768                 # Otherwise
3769                 parse_error()
3770                 tok_cur_tag = new_comment_token ''
3771                 tok_state = tok_state_bogus_comment
3772                 return
3773
3774         # 8.2.4.46 http://www.w3.org/TR/html5/syntax.html#comment-start-state
3775         tok_state_comment_start = ->
3776                 switch c = txt.charAt(cur++)
3777                         when '-'
3778                                 tok_state = tok_state_comment_start_dash
3779                         when "\u0000"
3780                                 parse_error()
3781                                 tok_state = tok_state_comment
3782                                 return new_character_token "\ufffd"
3783                         when '>'
3784                                 parse_error()
3785                                 tok_state = tok_state_data
3786                                 return tok_cur_tag
3787                         when '' # EOF
3788                                 parse_error()
3789                                 tok_state = tok_state_data
3790                                 cur -= 1 # Reconsume
3791                                 return tok_cur_tag
3792                         else
3793                                 tok_cur_tag.text += c
3794                                 tok_state = tok_state_comment
3795                 return null
3796
3797         # 8.2.4.47 http://www.w3.org/TR/html5/syntax.html#comment-start-dash-state
3798         tok_state_comment_start_dash = ->
3799                 switch c = txt.charAt(cur++)
3800                         when '-'
3801                                 tok_state = tok_state_comment_end
3802                         when "\u0000"
3803                                 parse_error()
3804                                 tok_cur_tag.text += "-\ufffd"
3805                                 tok_state = tok_state_comment
3806                         when '>'
3807                                 parse_error()
3808                                 tok_state = tok_state_data
3809                                 return tok_cur_tag
3810                         when '' # EOF
3811                                 parse_error()
3812                                 tok_state = tok_state_data
3813                                 cur -= 1 # Reconsume
3814                                 return tok_cur_tag
3815                         else
3816                                 tok_cur_tag.text += "-#{c}"
3817                                 tok_state = tok_state_comment
3818                 return null
3819
3820         # 8.2.4.48 http://www.w3.org/TR/html5/syntax.html#comment-state
3821         tok_state_comment = ->
3822                 switch c = txt.charAt(cur++)
3823                         when '-'
3824                                 tok_state = tok_state_comment_end_dash
3825                         when "\u0000"
3826                                 parse_error()
3827                                 tok_cur_tag.text += "\ufffd"
3828                         when '' # EOF
3829                                 parse_error()
3830                                 tok_state = tok_state_data
3831                                 cur -= 1 # Reconsume
3832                                 return tok_cur_tag
3833                         else
3834                                 tok_cur_tag.text += c
3835                 return null
3836
3837         # 8.2.4.49 http://www.w3.org/TR/html5/syntax.html#comment-end-dash-state
3838         tok_state_comment_end_dash = ->
3839                 switch c = txt.charAt(cur++)
3840                         when '-'
3841                                 tok_state = tok_state_comment_end
3842                         when "\u0000"
3843                                 parse_error()
3844                                 tok_cur_tag.text += "-\ufffd"
3845                                 tok_state = tok_state_comment
3846                         when '' # EOF
3847                                 parse_error()
3848                                 tok_state = tok_state_data
3849                                 cur -= 1 # Reconsume
3850                                 return tok_cur_tag
3851                         else
3852                                 tok_cur_tag.text += "-#{c}"
3853                                 tok_state = tok_state_comment
3854                 return null
3855
3856         # 8.2.4.50 http://www.w3.org/TR/html5/syntax.html#comment-end-state
3857         tok_state_comment_end = ->
3858                 switch c = txt.charAt(cur++)
3859                         when '>'
3860                                 tok_state = tok_state_data
3861                                 return tok_cur_tag
3862                         when "\u0000"
3863                                 parse_error()
3864                                 tok_cur_tag.text += "--\ufffd"
3865                                 tok_state = tok_state_comment
3866                         when '!'
3867                                 parse_error()
3868                                 tok_state = tok_state_comment_end_bang
3869                         when '-'
3870                                 parse_error()
3871                                 tok_cur_tag.text += '-'
3872                         when '' # EOF
3873                                 parse_error()
3874                                 tok_state = tok_state_data
3875                                 cur -= 1 # Reconsume
3876                                 return tok_cur_tag
3877                         else
3878                                 parse_error()
3879                                 tok_cur_tag.text += "--#{c}"
3880                                 tok_state = tok_state_comment
3881                 return null
3882
3883         # 8.2.4.51 http://www.w3.org/TR/html5/syntax.html#comment-end-bang-state
3884         tok_state_comment_end_bang = ->
3885                 switch c = txt.charAt(cur++)
3886                         when '-'
3887                                 tok_cur_tag.text += "--!#{c}"
3888                                 tok_state = tok_state_comment_end_dash
3889                         when '>'
3890                                 tok_state = tok_state_data
3891                                 return tok_cur_tag
3892                         when "\u0000"
3893                                 parse_error()
3894                                 tok_cur_tag.text += "--!\ufffd"
3895                                 tok_state = tok_state_comment
3896                         when '' # EOF
3897                                 parse_error()
3898                                 tok_state = tok_state_data
3899                                 cur -= 1 # Reconsume
3900                                 return tok_cur_tag
3901                         else
3902                                 tok_cur_tag.text += "--!#{c}"
3903                                 tok_state = tok_state_comment
3904                 return null
3905
3906         # 8.2.4.52 http://www.w3.org/TR/html5/syntax.html#doctype-state
3907         tok_state_doctype = ->
3908                 switch c = txt.charAt(cur++)
3909                         when "\t", "\u000a", "\u000c", ' '
3910                                 tok_state = tok_state_before_doctype_name
3911                         when '' # EOF
3912                                 parse_error()
3913                                 tok_state = tok_state_data
3914                                 el = new_doctype_token ''
3915                                 el.flag 'force-quirks', true
3916                                 cur -= 1 # Reconsume
3917                                 return el
3918                         else
3919                                 parse_error()
3920                                 tok_state = tok_state_before_doctype_name
3921                                 cur -= 1 # Reconsume
3922                 return null
3923
3924         # 8.2.4.52 http://www.w3.org/TR/html5/syntax.html#doctype-state
3925         tok_state_before_doctype_name = ->
3926                 c = txt.charAt(cur++)
3927                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
3928                         return
3929                 if is_uc_alpha(c)
3930                         tok_cur_tag = new_doctype_token c.toLowerCase()
3931                         tok_state = tok_state_doctype_name
3932                         return
3933                 if c is "\u0000"
3934                         parse_error()
3935                         tok_cur_tag = new_doctype_token "\ufffd"
3936                         tok_state = tok_state_doctype_name
3937                         return
3938                 if c is '>'
3939                         parse_error()
3940                         el = new_doctype_token ''
3941                         el.flag 'force-quirks', true
3942                         tok_state = tok_state_data
3943                         return el
3944                 if c is '' # EOF
3945                         parse_error()
3946                         tok_state = tok_state_data
3947                         el = new_doctype_token ''
3948                         el.flag 'force-quirks', true
3949                         cur -= 1 # Reconsume
3950                         return el
3951                 # Anything else
3952                 tok_cur_tag = new_doctype_token c
3953                 tok_state = tok_state_doctype_name
3954                 return null
3955
3956         # 8.2.4.54 http://www.w3.org/TR/html5/syntax.html#doctype-name-state
3957         tok_state_doctype_name = ->
3958                 c = txt.charAt(cur++)
3959                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
3960                         tok_state = tok_state_after_doctype_name
3961                         return
3962                 if c is '>'
3963                         tok_state = tok_state_data
3964                         return tok_cur_tag
3965                 if is_uc_alpha(c)
3966                         tok_cur_tag.name += c.toLowerCase()
3967                         return
3968                 if c is "\u0000"
3969                         parse_error()
3970                         tok_cur_tag.name += "\ufffd"
3971                         return
3972                 if c is '' # EOF
3973                         parse_error()
3974                         tok_state = tok_state_data
3975                         tok_cur_tag.flag 'force-quirks', true
3976                         cur -= 1 # Reconsume
3977                         return tok_cur_tag
3978                 # Anything else
3979                 tok_cur_tag.name += c
3980                 return null
3981
3982         # 8.2.4.55 http://www.w3.org/TR/html5/syntax.html#after-doctype-name-state
3983         tok_state_after_doctype_name = ->
3984                 c = txt.charAt(cur++)
3985                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
3986                         return
3987                 if c is '>'
3988                         tok_state = tok_state_data
3989                         return tok_cur_tag
3990                 if c is '' # EOF
3991                         parse_error()
3992                         tok_state = tok_state_data
3993                         tok_cur_tag.flag 'force-quirks', true
3994                         cur -= 1 # Reconsume
3995                         return tok_cur_tag
3996                 # Anything else
3997                 if txt.substr(cur - 1, 6).toLowerCase() is 'public'
3998                         cur += 5
3999                         tok_state = tok_state_after_doctype_public_keyword
4000                         return
4001                 if txt.substr(cur - 1, 6).toLowerCase() is 'system'
4002                         cur += 5
4003                         tok_state = tok_state_after_doctype_system_keyword
4004                         return
4005                 parse_error()
4006                 tok_cur_tag.flag 'force-quirks', true
4007                 tok_state = tok_state_bogus_doctype
4008                 return null
4009
4010         # 8.2.4.56 http://www.w3.org/TR/html5/syntax.html#after-doctype-public-keyword-state
4011         tok_state_after_doctype_public_keyword = ->
4012                 c = txt.charAt(cur++)
4013                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4014                         tok_state = tok_state_before_doctype_public_identifier
4015                         return
4016                 if c is '"'
4017                         parse_error()
4018                         tok_cur_tag.public_identifier = ''
4019                         tok_state = tok_state_doctype_public_identifier_double_quoted
4020                         return
4021                 if c is "'"
4022                         parse_error()
4023                         tok_cur_tag.public_identifier = ''
4024                         tok_state = tok_state_doctype_public_identifier_single_quoted
4025                         return
4026                 if c is '>'
4027                         parse_error()
4028                         tok_cur_tag.flag 'force-quirks', true
4029                         tok_state = tok_state_data
4030                         return tok_cur_tag
4031                 if c is '' # EOF
4032                         parse_error()
4033                         tok_state = tok_state_data
4034                         tok_cur_tag.flag 'force-quirks', true
4035                         cur -= 1 # Reconsume
4036                         return tok_cur_tag
4037                 # Anything else
4038                 parse_error()
4039                 tok_cur_tag.flag 'force-quirks', true
4040                 tok_state = tok_state_bogus_doctype
4041                 return null
4042
4043         # 8.2.4.57 http://www.w3.org/TR/html5/syntax.html#before-doctype-public-identifier-state
4044         tok_state_before_doctype_public_identifier = ->
4045                 c = txt.charAt(cur++)
4046                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4047                         return
4048                 if c is '"'
4049                         parse_error()
4050                         tok_cur_tag.public_identifier = ''
4051                         tok_state = tok_state_doctype_public_identifier_double_quoted
4052                         return
4053                 if c is "'"
4054                         parse_error()
4055                         tok_cur_tag.public_identifier = ''
4056                         tok_state = tok_state_doctype_public_identifier_single_quoted
4057                         return
4058                 if c is '>'
4059                         parse_error()
4060                         tok_cur_tag.flag 'force-quirks', true
4061                         tok_state = tok_state_data
4062                         return tok_cur_tag
4063                 if c is '' # EOF
4064                         parse_error()
4065                         tok_state = tok_state_data
4066                         tok_cur_tag.flag 'force-quirks', true
4067                         cur -= 1 # Reconsume
4068                         return tok_cur_tag
4069                 # Anything else
4070                 parse_error()
4071                 tok_cur_tag.flag 'force-quirks', true
4072                 tok_state = tok_state_bogus_doctype
4073                 return null
4074
4075
4076         # 8.2.4.58 http://www.w3.org/TR/html5/syntax.html#doctype-public-identifier-(double-quoted)-state
4077         tok_state_doctype_public_identifier_double_quoted = ->
4078                 c = txt.charAt(cur++)
4079                 if c is '"'
4080                         tok_state = tok_state_after_doctype_public_identifier
4081                         return
4082                 if c is "\u0000"
4083                         parse_error()
4084                         tok_cur_tag.public_identifier += "\ufffd"
4085                         return
4086                 if c is '>'
4087                         parse_error()
4088                         tok_cur_tag.flag 'force-quirks', true
4089                         tok_state = tok_state_data
4090                         return tok_cur_tag
4091                 if c is '' # EOF
4092                         parse_error()
4093                         tok_state = tok_state_data
4094                         tok_cur_tag.flag 'force-quirks', true
4095                         cur -= 1 # Reconsume
4096                         return tok_cur_tag
4097                 # Anything else
4098                 tok_cur_tag.public_identifier += c
4099                 return null
4100
4101         # 8.2.4.59 http://www.w3.org/TR/html5/syntax.html#doctype-public-identifier-(single-quoted)-state
4102         tok_state_doctype_public_identifier_single_quoted = ->
4103                 c = txt.charAt(cur++)
4104                 if c is "'"
4105                         tok_state = tok_state_after_doctype_public_identifier
4106                         return
4107                 if c is "\u0000"
4108                         parse_error()
4109                         tok_cur_tag.public_identifier += "\ufffd"
4110                         return
4111                 if c is '>'
4112                         parse_error()
4113                         tok_cur_tag.flag 'force-quirks', true
4114                         tok_state = tok_state_data
4115                         return tok_cur_tag
4116                 if c is '' # EOF
4117                         parse_error()
4118                         tok_state = tok_state_data
4119                         tok_cur_tag.flag 'force-quirks', true
4120                         cur -= 1 # Reconsume
4121                         return tok_cur_tag
4122                 # Anything else
4123                 tok_cur_tag.public_identifier += c
4124                 return null
4125
4126         # 8.2.4.60 http://www.w3.org/TR/html5/syntax.html#after-doctype-public-identifier-state
4127         tok_state_after_doctype_public_identifier = ->
4128                 c = txt.charAt(cur++)
4129                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4130                         tok_state = tok_state_between_doctype_public_and_system_identifiers
4131                         return
4132                 if c is '>'
4133                         tok_state = tok_state_data
4134                         return tok_cur_tag
4135                 if c is '"'
4136                         parse_error()
4137                         tok_cur_tag.system_identifier = ''
4138                         tok_state = tok_state_doctype_system_identifier_double_quoted
4139                         return
4140                 if c is "'"
4141                         parse_error()
4142                         tok_cur_tag.system_identifier = ''
4143                         tok_state = tok_state_doctype_system_identifier_single_quoted
4144                         return
4145                 if c is '' # EOF
4146                         parse_error()
4147                         tok_state = tok_state_data
4148                         tok_cur_tag.flag 'force-quirks', true
4149                         cur -= 1 # Reconsume
4150                         return tok_cur_tag
4151                 # Anything else
4152                 parse_error()
4153                 tok_cur_tag.flag 'force-quirks', true
4154                 tok_state = tok_state_bogus_doctype
4155                 return null
4156
4157         # 8.2.4.61 http://www.w3.org/TR/html5/syntax.html#between-doctype-public-and-system-identifiers-state
4158         tok_state_between_doctype_public_and_system_identifiers = ->
4159                 c = txt.charAt(cur++)
4160                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4161                         return
4162                 if c is '>'
4163                         tok_state = tok_state_data
4164                         return tok_cur_tag
4165                 if c is '"'
4166                         parse_error()
4167                         tok_cur_tag.system_identifier = ''
4168                         tok_state = tok_state_doctype_system_identifier_double_quoted
4169                         return
4170                 if c is "'"
4171                         parse_error()
4172                         tok_cur_tag.system_identifier = ''
4173                         tok_state = tok_state_doctype_system_identifier_single_quoted
4174                         return
4175                 if c is '' # EOF
4176                         parse_error()
4177                         tok_state = tok_state_data
4178                         tok_cur_tag.flag 'force-quirks', true
4179                         cur -= 1 # Reconsume
4180                         return tok_cur_tag
4181                 # Anything else
4182                 parse_error()
4183                 tok_cur_tag.flag 'force-quirks', true
4184                 tok_state = tok_state_bogus_doctype
4185                 return null
4186
4187         # 8.2.4.62 http://www.w3.org/TR/html5/syntax.html#after-doctype-system-keyword-state
4188         tok_state_after_doctype_system_keyword = ->
4189                 c = txt.charAt(cur++)
4190                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4191                         tok_state = tok_state_before_doctype_system_identifier
4192                         return
4193                 if c is '"'
4194                         parse_error()
4195                         tok_cur_tag.system_identifier = ''
4196                         tok_state = tok_state_doctype_system_identifier_double_quoted
4197                         return
4198                 if c is "'"
4199                         parse_error()
4200                         tok_cur_tag.system_identifier = ''
4201                         tok_state = tok_state_doctype_system_identifier_single_quoted
4202                         return
4203                 if c is '>'
4204                         parse_error()
4205                         tok_cur_tag.flag 'force-quirks', true
4206                         tok_state = tok_state_data
4207                         return tok_cur_tag
4208                 if c is '' # EOF
4209                         parse_error()
4210                         tok_state = tok_state_data
4211                         tok_cur_tag.flag 'force-quirks', true
4212                         cur -= 1 # Reconsume
4213                         return tok_cur_tag
4214                 # Anything else
4215                 parse_error()
4216                 tok_cur_tag.flag 'force-quirks', true
4217                 tok_state = tok_state_bogus_doctype
4218                 return null
4219
4220         # 8.2.4.63 http://www.w3.org/TR/html5/syntax.html#before-doctype-system-identifier-state
4221         tok_state_before_doctype_system_identifier = ->
4222                 c = txt.charAt(cur++)
4223                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4224                         return
4225                 if c is '"'
4226                         tok_cur_tag.system_identifier = ''
4227                         tok_state = tok_state_doctype_system_identifier_double_quoted
4228                         return
4229                 if c is "'"
4230                         tok_cur_tag.system_identifier = ''
4231                         tok_state = tok_state_doctype_system_identifier_single_quoted
4232                         return
4233                 if c is '>'
4234                         parse_error()
4235                         tok_cur_tag.flag 'force-quirks', true
4236                         tok_state = tok_state_data
4237                         return tok_cur_tag
4238                 if c is '' # EOF
4239                         parse_error()
4240                         tok_state = tok_state_data
4241                         tok_cur_tag.flag 'force-quirks', true
4242                         cur -= 1 # Reconsume
4243                         return tok_cur_tag
4244                 # Anything else
4245                 parse_error()
4246                 tok_cur_tag.flag 'force-quirks', true
4247                 tok_state = tok_state_bogus_doctype
4248                 return null
4249
4250         # 8.2.4.64 http://www.w3.org/TR/html5/syntax.html#doctype-system-identifier-(double-quoted)-state
4251         tok_state_doctype_system_identifier_double_quoted = ->
4252                 c = txt.charAt(cur++)
4253                 if c is '"'
4254                         tok_state = tok_state_after_doctype_system_identifier
4255                         return
4256                 if c is "\u0000"
4257                         parse_error()
4258                         tok_cur_tag.system_identifier += "\ufffd"
4259                         return
4260                 if c is '>'
4261                         parse_error()
4262                         tok_cur_tag.flag 'force-quirks', true
4263                         tok_state = tok_state_data
4264                         return tok_cur_tag
4265                 if c is '' # EOF
4266                         parse_error()
4267                         tok_state = tok_state_data
4268                         tok_cur_tag.flag 'force-quirks', true
4269                         cur -= 1 # Reconsume
4270                         return tok_cur_tag
4271                 # Anything else
4272                 tok_cur_tag.system_identifier += c
4273                 return null
4274
4275         # 8.2.4.65 http://www.w3.org/TR/html5/syntax.html#doctype-system-identifier-(single-quoted)-state
4276         tok_state_doctype_system_identifier_single_quoted = ->
4277                 c = txt.charAt(cur++)
4278                 if c is "'"
4279                         tok_state = tok_state_after_doctype_system_identifier
4280                         return
4281                 if c is "\u0000"
4282                         parse_error()
4283                         tok_cur_tag.system_identifier += "\ufffd"
4284                         return
4285                 if c is '>'
4286                         parse_error()
4287                         tok_cur_tag.flag 'force-quirks', true
4288                         tok_state = tok_state_data
4289                         return tok_cur_tag
4290                 if c is '' # EOF
4291                         parse_error()
4292                         tok_state = tok_state_data
4293                         tok_cur_tag.flag 'force-quirks', true
4294                         cur -= 1 # Reconsume
4295                         return tok_cur_tag
4296                 # Anything else
4297                 tok_cur_tag.system_identifier += c
4298                 return null
4299
4300         # 8.2.4.66 http://www.w3.org/TR/html5/syntax.html#after-doctype-system-identifier-state
4301         tok_state_after_doctype_system_identifier = ->
4302                 c = txt.charAt(cur++)
4303                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4304                         return
4305                 if c is '>'
4306                         tok_state = tok_state_data
4307                         return tok_cur_tag
4308                 if c is '' # EOF
4309                         parse_error()
4310                         tok_state = tok_state_data
4311                         tok_cur_tag.flag 'force-quirks', true
4312                         cur -= 1 # Reconsume
4313                         return tok_cur_tag
4314                 # Anything else
4315                 parse_error()
4316                 # do _not_ tok_cur_tag.flag 'force-quirks', true
4317                 tok_state = tok_state_bogus_doctype
4318                 return null
4319
4320         # 8.2.4.67 http://www.w3.org/TR/html5/syntax.html#bogus-doctype-state
4321         tok_state_bogus_doctype = ->
4322                 c = txt.charAt(cur++)
4323                 if c is '>'
4324                         tok_state = tok_state_data
4325                         return tok_cur_tag
4326                 if c is '' # EOF
4327                         tok_state = tok_state_data
4328                         cur -= 1 # Reconsume
4329                         return tok_cur_tag
4330                 # Anything else
4331                 return null
4332
4333         # 8.2.4.68 http://www.w3.org/TR/html5/syntax.html#cdata-section-state
4334         tok_state_cdata_section = ->
4335                 tok_state = tok_state_data
4336                 next_gt = txt.indexOf ']]>', cur
4337                 if next_gt is -1
4338                         val = txt.substr cur
4339                         cur = txt.length
4340                 else
4341                         val = txt.substr cur, (next_gt - cur)
4342                         cur = next_gt + 3
4343                 val = val.replace(new RegExp("\u0000", 'g'), "\ufffd") # fixfull spec doesn't say this
4344                 val = val.replace(new RegExp("\r\n", 'g'), "\n") # fixfull spec doesn't say this
4345                 val = val.replace(new RegExp("\r", 'g'), "\n") # fixfull spec doesn't say this
4346                 return new_character_token val # fixfull split
4347
4348         # 8.2.4.69 http://www.w3.org/TR/html5/syntax.html#consume-a-character-reference
4349         # Don't set this as a state, just call it
4350         # returns a string (NOT a text node)
4351         parse_character_reference = (allowed_char = null, in_attr = false) ->
4352                 if cur >= txt.length
4353                         return '&'
4354                 switch c = txt.charAt(cur)
4355                         when "\t", "\n", "\u000c", ' ', '<', '&', '', allowed_char
4356                                 # explicitly not a parse error
4357                                 return '&'
4358                         when ';'
4359                                 # there has to be "one or more" alnums between & and ; to be a parse error
4360                                 return '&'
4361                         when '#'
4362                                 if cur + 1 >= txt.length
4363                                         return '&'
4364                                 if txt.charAt(cur + 1).toLowerCase() is 'x'
4365                                         base = 16
4366                                         charset = hex_chars
4367                                         start = cur + 2
4368                                 else
4369                                         charset = digits
4370                                         start = cur + 1
4371                                         base = 10
4372                                 i = 0
4373                                 while start + i < txt.length and charset.indexOf(txt.charAt(start + i)) > -1
4374                                         i += 1
4375                                 if i is 0
4376                                         return '&'
4377                                 cur = start + i
4378                                 if txt.charAt(start + i) is ';'
4379                                         cur += 1
4380                                 else
4381                                         parse_error()
4382                                 code_point = txt.substr(start, i)
4383                                 while code_point.charAt(0) is '0' and code_point.length > 1
4384                                         code_point = code_point.substr 1
4385                                 code_point = parseInt(code_point, base)
4386                                 if unicode_fixes[code_point]?
4387                                         parse_error()
4388                                         return unicode_fixes[code_point]
4389                                 else
4390                                         if (code_point >= 0xd800 and code_point <= 0xdfff) or code_point > 0x10ffff
4391                                                 parse_error()
4392                                                 return "\ufffd"
4393                                         else
4394                                                 if (code_point >= 0x0001 and code_point <= 0x0008) or (code_point >= 0x000D and code_point <= 0x001F) or (code_point >= 0x007F and code_point <= 0x009F) or (code_point >= 0xFDD0 and code_point <= 0xFDEF) or code_point is 0x000B or code_point is 0xFFFE or code_point is 0xFFFF or code_point is 0x1FFFE or code_point is 0x1FFFF or code_point is 0x2FFFE or code_point is 0x2FFFF or code_point is 0x3FFFE or code_point is 0x3FFFF or code_point is 0x4FFFE or code_point is 0x4FFFF or code_point is 0x5FFFE or code_point is 0x5FFFF or code_point is 0x6FFFE or code_point is 0x6FFFF or code_point is 0x7FFFE or code_point is 0x7FFFF or code_point is 0x8FFFE or code_point is 0x8FFFF or code_point is 0x9FFFE or code_point is 0x9FFFF or code_point is 0xAFFFE or code_point is 0xAFFFF or code_point is 0xBFFFE or code_point is 0xBFFFF or code_point is 0xCFFFE or code_point is 0xCFFFF or code_point is 0xDFFFE or code_point is 0xDFFFF or code_point is 0xEFFFE or code_point is 0xEFFFF or code_point is 0xFFFFE or code_point is 0xFFFFF or code_point is 0x10FFFE or code_point is 0x10FFFF
4395                                                         parse_error()
4396                                                 return from_code_point code_point
4397                                 return
4398                         else
4399                                 for i in [0...31]
4400                                         if alnum.indexOf(txt.charAt(cur + i)) is -1
4401                                                 break
4402                                 if i is 0
4403                                         # exit early, because parse_error() below needs at least one alnum
4404                                         return '&'
4405                                 if txt.charAt(cur + i) is ';'
4406                                         i += 1 # include ';' terminator in value
4407                                         decoded = decode_named_char_ref txt.substr(cur, i)
4408                                         if decoded?
4409                                                 cur += i
4410                                                 return decoded
4411                                         parse_error()
4412                                         return '&'
4413                                 else
4414                                         # no ';' terminator (only legacy char refs)
4415                                         max = i
4416                                         for i in [2..max] # no prefix matches, so ok to check shortest first
4417                                                 c = legacy_char_refs[txt.substr(cur, i)]
4418                                                 if c?
4419                                                         if in_attr
4420                                                                 if txt.charAt(cur + i) is '='
4421                                                                         # "because some legacy user agents will
4422                                                                         # misinterpret the markup in those cases"
4423                                                                         parse_error()
4424                                                                         return '&'
4425                                                                 if alnum.indexOf(txt.charAt(cur + i)) > -1
4426                                                                         # this makes attributes forgiving about url args
4427                                                                         return '&'
4428                                                         # ok, and besides the weird exceptions for attributes...
4429                                                         # return the matching char
4430                                                         cur += i # consume entity chars
4431                                                         parse_error() # because no terminating ";"
4432                                                         return c
4433                                         parse_error()
4434                                         return '&'
4435                 return # never reached
4436
4437         # tree constructor initialization
4438         # see comments on TYPE_TAG/etc for the structure of this data
4439         txt = args.html
4440         cur = 0
4441         doc = new Node TYPE_TAG, name: 'html', namespace: NS_HTML
4442         open_els = []
4443         afe = [] # active formatting elements
4444         template_ins_modes = []
4445         ins_mode = ins_mode_initial
4446         original_ins_mode = ins_mode # TODO check spec
4447         flag_scripting = args.scripting ? true # TODO might need an extra flag to get <noscript> to parse correctly
4448         flag_frameset_ok = true
4449         flag_parsing = true
4450         flag_foster_parenting = false
4451         form_element_pointer = null
4452         temporary_buffer = null
4453         pending_table_character_tokens = []
4454         head_element_pointer = null
4455         flag_fragment_parsing = false # parser originally created as part of the html fragment parsing algorithm (fragment case)
4456         context_element = null # FIXME initialize from args.fragment http://www.w3.org/TR/html5/syntax.html#parsing-html-fragments
4457
4458         # tokenizer initialization
4459         tok_state = tok_state_data
4460
4461         if args.name is "namespace-sensitivity.dat #1"
4462                 console.log "hi"
4463         # proccess input
4464         # http://www.w3.org/TR/html5/syntax.html#tree-construction
4465         while flag_parsing
4466                 t = tok_state()
4467                 if t?
4468                         process_token t
4469                         # fixfull parse error if has self-closing flag, but it wasn't acknolwedged
4470         return doc.children
4471
4472 serialize_els = (els, shallow, show_ids) ->
4473         serialized = ''
4474         sep = ''
4475         for t in els
4476                 serialized += sep
4477                 sep = ','
4478                 serialized += t.serialize shallow, show_ids
4479         return serialized
4480
4481 module.exports.parse_html = parse_html
4482 module.exports.debug_log_reset = debug_log_reset
4483 module.exports.debug_log_each = debug_log_each
4484 module.exports.TYPE_TAG = TYPE_TAG
4485 module.exports.TYPE_TEXT = TYPE_TEXT
4486 module.exports.TYPE_COMMENT = TYPE_COMMENT
4487 module.exports.TYPE_DOCTYPE = TYPE_DOCTYPE
4488 module.exports.NS_HTML = NS_HTML
4489 module.exports.NS_MATHML = NS_MATHML
4490 module.exports.NS_SVG = NS_SVG