JasonWoof Got questions, comments, patches, etc.? Contact Jason Woofenden
namespace tweaks, fix <table><input>
[peach-html5-editor.git] / parse-html.coffee
1 # HTML parser meant to run in a browser, in support of WYSIWYG editor
2 # Copyright 2015 Jason Woofenden
3 #
4 # This program is free software: you can redistribute it and/or modify it under
5 # the terms of the GNU Affero General Public License as published by the Free
6 # Software Foundation, either version 3 of the License, or (at your option) any
7 # later version.
8 #
9 # This program is distributed in the hope that it will be useful, but WITHOUT
10 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11 # FOR A PARTICULAR PURPOSE.  See the GNU Affero General Public License for more
12 # details.
13 #
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
16
17
18 # This file implements a parser for html snippets, meant to be used by a
19 # WYSIWYG editor. Hence it does not attempt to parse doctypes, <html>, <head>
20 # or <body> tags, nor does it produce the top level "document" node in the dom
21 # tree, nor nodes for html, head or body. Comments containing "fixfull"
22 # indicate places where additional code is needed for full HTML document
23 # parsing.
24 #
25 # Instead, the data structure produced by this parser is an array of Nodes.
26
27
28 # stacks/lists
29 #
30 # the spec uses a many different words do indicate which ends of lists/stacks
31 # they are talking about (and relative movement within the lists/stacks). This
32 # section splains. I'm implementing "lists" (afe and open_els) the same way
33 # (both as stacks)
34 #
35 # stacks grow downward (current element is index=0)
36 #
37 # example: open_els = [a, b, c, d, e, f, g]
38 #
39 # "grows downwards" means it's visualized like this: (index: el, names)
40 #
41 #   6: g "start of the list", "topmost", "first"
42 #   5: f
43 #   4: e "previous" (to d), "above", "before"
44 #   3: d   (previous/next are relative to this element)
45 #   2: c "next", "after", "lower", "below"
46 #   1: b
47 #   0: a "end of the list", "current node", "bottommost", "last"
48
49
50 # browser
51 # note: to get this to run outside a browser, you'll have to write a native
52 # implementation of decode_named_char_ref()
53 unless module?.exports?
54         window.wheic = {}
55         module = exports: window.wheic
56
57 # Each node is an obect of the Node class. Here are the Node types:
58 TYPE_TAG = 0 # name, {attributes}, [children]
59 TYPE_TEXT = 1 # "text"
60 TYPE_COMMENT = 2
61 TYPE_DOCTYPE = 3
62 # the following types are emited by the tokenizer, but shouldn't end up in the tree:
63 TYPE_START_TAG = 4 # name, [attributes ([key,value]...) in reverse order], [children]
64 TYPE_END_TAG = 5 # name
65 TYPE_EOF = 6
66 TYPE_AFE_MARKER = 7 # http://www.w3.org/TR/html5/syntax.html#reconstruct-the-active-formatting-elements
67 TYPE_AAA_BOOKMARK = 8 # http://www.w3.org/TR/html5/syntax.html#adoption-agency-algorithm
68
69 # namespace constants
70 NS_HTML = 1
71 NS_MATHML = 2
72 NS_SVG = 3
73
74 g_debug_log = []
75 debug_log_reset = ->
76         g_debug_log = []
77 debug_log = (str) ->
78         g_debug_log.push str
79 debug_log_each = (cb) ->
80         for str in g_debug_log
81                 cb str
82
83 prev_node_id = 0
84 class Node
85         constructor: (type, args = {}) ->
86                 @type = type # one of the TYPE_* constants above
87                 @name = args.name ? '' # tag name
88                 @text = args.text ? '' # contents for text/comment nodes
89                 @attrs = args.attrs ? {}
90                 @attrs_a = args.attr_k ? [] # attrs in progress, TYPE_START_TAG only
91                 @children = args.children ? []
92                 @namespace = args.namespace ? NS_HTML
93                 @parent = args.parent ? null
94                 @token = args.token ? null
95                 @flags = args.flags ? {}
96                 if args.id?
97                         @id = "#{args.id}+"
98                 else
99                         @id = "#{++prev_node_id}"
100         acknowledge_self_closing: ->
101                 if @token?
102                         @token.flag 'did_self_close'
103                 else
104                         @flag 'did_self_close', true
105         flag: (key, value = null) ->
106                 if value?
107                         @flags[key] = value
108                 else
109                         return @flags[key]
110         serialize: (shallow = false, show_ids = false) -> # for unit tests
111                 ret = ''
112                 switch @type
113                         when TYPE_TAG
114                                 ret += 'tag:'
115                                 ret += JSON.stringify @name
116                                 ret += ','
117                                 if show_ids
118                                         ret += "##{@id},"
119                                 if shallow
120                                         break
121                                 attr_keys = []
122                                 for k of @attrs
123                                         attr_keys.push k
124                                 attr_keys.sort()
125                                 ret += '{'
126                                 sep = ''
127                                 for k in attr_keys
128                                         ret += sep
129                                         sep = ','
130                                         ret += "#{JSON.stringify k}:#{JSON.stringify @attrs[k]}"
131                                 ret += '},['
132                                 sep = ''
133                                 for c in @children
134                                         ret += sep
135                                         sep = ','
136                                         ret += c.serialize shallow, show_ids
137                                 ret += ']'
138                         when TYPE_TEXT
139                                 ret += 'text:'
140                                 ret += JSON.stringify @text
141                         when TYPE_COMMENT
142                                 ret += 'comment:'
143                                 ret += JSON.stringify @text
144                         when TYPE_DOCTYPE
145                                 ret += "doctype:#{@name},#{JSON.stringify(@public_identifier ? '')},#{JSON.stringify(@system_identifier ? '')}"
146                         when TYPE_AFE_MARKER
147                                 ret += 'marker'
148                         when TYPE_AAA_BOOKMARK
149                                 ret += 'aaa_bookmark'
150                         else
151                                 ret += 'unknown:'
152                                 console.log "unknown: #{JSON.stringify @}" # backtrace is just as well
153                 return ret
154
155 # helpers: (only take args that are normally known when parser creates nodes)
156 new_open_tag = (name) ->
157         return new Node TYPE_START_TAG, name: name
158 new_end_tag = (name) ->
159         return new Node TYPE_END_TAG, name: name
160 new_element = (name) ->
161         return new Node TYPE_TAG, name: name
162 new_text_node = (txt) ->
163         return new Node TYPE_TEXT, text: txt
164 new_character_token = new_text_node
165 new_comment_token = (txt) ->
166         return new Node TYPE_COMMENT, text: txt
167 new_doctype_token = (name) ->
168         return new Node TYPE_DOCTYPE, name: name
169 new_eof_token = ->
170         return new Node TYPE_EOF
171 new_afe_marker = ->
172         return new Node TYPE_AFE_MARKER
173 new_aaa_bookmark = ->
174         return new Node TYPE_AAA_BOOKMARK
175
176 lc_alpha = "abcdefghijklmnopqrstuvwxyz"
177 uc_alpha = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
178 digits = "0123456789"
179 alnum = lc_alpha + uc_alpha + digits
180 hex_chars = digits + "abcdefABCDEF"
181
182 is_uc_alpha = (str) ->
183         return str.length is 1 and uc_alpha.indexOf(str) > -1
184 is_lc_alpha = (str) ->
185         return str.length is 1 and lc_alpha.indexOf(str) > -1
186
187 # some SVG elements have dashes in them
188 tag_name_chars = alnum + "-"
189
190 # http://www.w3.org/TR/html5/infrastructure.html#space-character
191 space_chars = "\u0009\u000a\u000c\u000d\u0020"
192 is_space = (txt) ->
193         return txt.length is 1 and space_chars.indexOf(txt) > -1
194 is_space_tok = (t) ->
195         return t.type is TYPE_TEXT && t.text.length is 1 and space_chars.indexOf(t.text) > -1
196
197 is_input_hidden_tok = (t) ->
198         return unless t.type is TYPE_START_TAG
199         for a of t.attrs_a
200                 if a[0] is 'type'
201                         if a[1].toLowerCase() is 'hidden'
202                                 return true
203                         return false
204         return false
205
206 # https://en.wikipedia.org/wiki/Whitespace_character#Unicode
207 whitespace_chars = "\u0009\u000a\u000b\u000c\u000d\u0020\u0085\u00a0\u1680\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200a\u2028\u2029\u202f\u205f\u3000"
208
209 # These are the character references that don't need a terminating semicolon
210 # min length: 2, max: 6, none are a prefix of any other.
211 legacy_char_refs = {
212         Aacute: 'Á', aacute: 'á', Acirc: 'Â', acirc: 'â', acute: '´', AElig: 'Æ',
213         aelig: 'æ', Agrave: 'À', agrave: 'à', AMP: '&', amp: '&', Aring: 'Å',
214         aring: 'å', Atilde: 'Ã', atilde: 'ã', Auml: 'Ä', auml: 'ä', brvbar: '¦',
215         Ccedil: 'Ç', ccedil: 'ç', cedil: '¸', cent: '¢', COPY: '©', copy: '©',
216         curren: '¤', deg: '°', divide: '÷', Eacute: 'É', eacute: 'é', Ecirc: 'Ê',
217         ecirc: 'ê', Egrave: 'È', egrave: 'è', ETH: 'Ð', eth: 'ð', Euml: 'Ë',
218         euml: 'ë', frac12: '½', frac14: '¼', frac34: '¾', GT: '>', gt: '>',
219         Iacute: 'Í', iacute: 'í', Icirc: 'Î', icirc: 'î', iexcl: '¡', Igrave: 'Ì',
220         igrave: 'ì', iquest: '¿', Iuml: 'Ï', iuml: 'ï', laquo: '«', LT: '<',
221         lt: '<', macr: '¯', micro: 'µ', middot: '·', nbsp: "\u00a0", not: '¬',
222         Ntilde: 'Ñ', ntilde: 'ñ', Oacute: 'Ó', oacute: 'ó', Ocirc: 'Ô', ocirc: 'ô',
223         Ograve: 'Ò', ograve: 'ò', ordf: 'ª', ordm: 'º', Oslash: 'Ø', oslash: 'ø',
224         Otilde: 'Õ', otilde: 'õ', Ouml: 'Ö', ouml: 'ö', para: '¶', plusmn: '±',
225         pound: '£', QUOT: '"', quot: '"', raquo: '»', REG: '®', reg: '®', sect: '§',
226         shy: '­', sup1: '¹', sup2: '²', sup3: '³', szlig: 'ß', THORN: 'Þ', thorn: 'þ',
227         times: '×', Uacute: 'Ú', uacute: 'ú', Ucirc: 'Û', ucirc: 'û', Ugrave: 'Ù',
228         ugrave: 'ù', uml: '¨', Uuml: 'Ü', uuml: 'ü', Yacute: 'Ý', yacute: 'ý',
229         yen: '¥', yuml: 'ÿ'
230 }
231
232 void_elements = ['area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input', 'keygen', 'link', 'meta', 'param', 'source', 'track', 'wbr']
233 raw_text_elements = ['script', 'style']
234 escapable_raw_text_elements = ['textarea', 'title']
235 # http://www.w3.org/TR/SVG/ 1.1 (Second Edition)
236 svg_elements = [
237         'a', 'altGlyph', 'altGlyphDef', 'altGlyphItem', 'animate', 'animateColor',
238         'animateMotion', 'animateTransform', 'circle', 'clipPath', 'color-profile',
239         'cursor', 'defs', 'desc', 'ellipse', 'feBlend', 'feColorMatrix',
240         'feComponentTransfer', 'feComposite', 'feConvolveMatrix',
241         'feDiffuseLighting', 'feDisplacementMap', 'feDistantLight', 'feFlood',
242         'feFuncA', 'feFuncB', 'feFuncG', 'feFuncR', 'feGaussianBlur', 'feImage',
243         'feMerge', 'feMergeNode', 'feMorphology', 'feOffset', 'fePointLight',
244         'feSpecularLighting', 'feSpotLight', 'feTile', 'feTurbulence', 'filter',
245         'font', 'font-face', 'font-face-format', 'font-face-name', 'font-face-src',
246         'font-face-uri', 'foreignObject', 'g', 'glyph', 'glyphRef', 'hkern',
247         'image', 'line', 'linearGradient', 'marker', 'mask', 'metadata',
248         'missing-glyph', 'mpath', 'path', 'pattern', 'polygon', 'polyline',
249         'radialGradient', 'rect', 'script', 'set', 'stop', 'style', 'svg',
250         'switch', 'symbol', 'text', 'textPath', 'title', 'tref', 'tspan', 'use',
251         'view', 'vkern'
252 ]
253
254 # http://www.w3.org/TR/MathML/ Version 3.0 2nd Edition
255 mathml_elements = [
256         'abs', 'and', 'annotation', 'annotation-xml', 'apply', 'approx', 'arccos',
257         'arccosh', 'arccot', 'arccoth', 'arccsc', 'arccsch', 'arcsec', 'arcsech',
258         'arcsin', 'arcsinh', 'arctan', 'arctanh', 'arg', 'bind', 'bvar', 'card',
259         'cartesianproduct', 'cbytes', 'ceiling', 'cerror', 'ci', 'cn', 'codomain',
260         'complexes', 'compose', 'condition', 'conjugate', 'cos', 'cosh', 'cot',
261         'coth', 'cs', 'csc', 'csch', 'csymbol', 'curl', 'declare', 'degree',
262         'determinant', 'diff', 'divergence', 'divide', 'domain',
263         'domainofapplication', 'emptyset', 'eq', 'equivalent', 'eulergamma',
264         'exists', 'exp', 'exponentiale', 'factorial', 'factorof', 'false', 'floor',
265         'fn', 'forall', 'gcd', 'geq', 'grad', 'gt', 'ident', 'image', 'imaginary',
266         'imaginaryi', 'implies', 'in', 'infinity', 'int', 'integers', 'intersect',
267         'interval', 'inverse', 'lambda', 'laplacian', 'lcm', 'leq', 'limit',
268         'list', 'ln', 'log', 'logbase', 'lowlimit', 'lt', 'maction', 'maligngroup',
269         'malignmark', 'math', 'matrix', 'matrixrow', 'max', 'mean', 'median',
270         'menclose', 'merror', 'mfenced', 'mfrac', 'mglyph', 'mi', 'mi', 'min',
271         'minus', 'mlabeledtr', 'mlongdiv', 'mmultiscripts', 'mn', 'mo', 'mode',
272         'moment', 'momentabout', 'mover', 'mpadded', 'mphantom', 'mprescripts',
273         'mroot', 'mrow', 'ms', 'mscarries', 'mscarry', 'msgroup', 'msline',
274         'mspace', 'msqrt', 'msrow', 'mstack', 'mstyle', 'msub', 'msubsup', 'msup',
275         'mtable', 'mtd', 'mtext', 'mtr', 'munder', 'munderover', 'naturalnumbers',
276         'neq', 'none', 'not', 'notanumber', 'notin', 'notprsubset', 'notsubset',
277         'or', 'otherwise', 'outerproduct', 'partialdiff', 'pi', 'piece',
278         'piecewise', 'plus', 'power', 'primes', 'product', 'prsubset', 'quotient',
279         'rationals', 'real', 'reals', 'reln', 'rem', 'root', 'scalarproduct',
280         'sdev', 'sec', 'sech', 'selector', 'semantics', 'sep', 'set', 'setdiff',
281         'share', 'sin', 'sinh', 'span', 'subset', 'sum', 'tan', 'tanh', 'tendsto',
282         'times', 'transpose', 'true', 'union', 'uplimit', 'variance', 'vector',
283         'vectorproduct', 'xor'
284 ]
285 # foreign_elements = [svg_elements..., mathml_elements...]
286 #normal_elements = All other allowed HTML elements are normal elements.
287
288 special_elements = {
289         # HTML:
290         address:NS_HTML, applet:NS_HTML, area:NS_HTML, article:NS_HTML,
291         aside:NS_HTML, base:NS_HTML, basefont:NS_HTML, bgsound:NS_HTML,
292         blockquote:NS_HTML, body:NS_HTML, br:NS_HTML, button:NS_HTML,
293         caption:NS_HTML, center:NS_HTML, col:NS_HTML, colgroup:NS_HTML, dd:NS_HTML,
294         details:NS_HTML, dir:NS_HTML, div:NS_HTML, dl:NS_HTML, dt:NS_HTML,
295         embed:NS_HTML, fieldset:NS_HTML, figcaption:NS_HTML, figure:NS_HTML,
296         footer:NS_HTML, form:NS_HTML, frame:NS_HTML, frameset:NS_HTML, h1:NS_HTML,
297         h2:NS_HTML, h3:NS_HTML, h4:NS_HTML, h5:NS_HTML, h6:NS_HTML, head:NS_HTML,
298         header:NS_HTML, hgroup:NS_HTML, hr:NS_HTML, html:NS_HTML, iframe:NS_HTML,
299         img:NS_HTML, input:NS_HTML, isindex:NS_HTML, li:NS_HTML, link:NS_HTML,
300         listing:NS_HTML, main:NS_HTML, marquee:NS_HTML, meta:NS_HTML, nav:NS_HTML,
301         noembed:NS_HTML, noframes:NS_HTML, noscript:NS_HTML, object:NS_HTML,
302         ol:NS_HTML, p:NS_HTML, param:NS_HTML, plaintext:NS_HTML, pre:NS_HTML,
303         script:NS_HTML, section:NS_HTML, select:NS_HTML, source:NS_HTML,
304         style:NS_HTML, summary:NS_HTML, table:NS_HTML, tbody:NS_HTML, td:NS_HTML,
305         template:NS_HTML, textarea:NS_HTML, tfoot:NS_HTML, th:NS_HTML,
306         thead:NS_HTML, title:NS_HTML, tr:NS_HTML, track:NS_HTML, ul:NS_HTML,
307         wbr:NS_HTML, xmp:NS_HTML,
308
309         # MathML:
310         mi:NS_MATHML, mo:NS_MATHML, mn:NS_MATHML, ms:NS_MATHML, mtext:NS_MATHML,
311         'annotation-xml':NS_MATHML,
312
313         # SVG:
314         foreignObject:NS_SVG, desc:NS_SVG, title:NS_SVG
315 }
316
317 formatting_elements = {
318          a: true, b: true, big: true, code: true, em: true, font: true, i: true,
319          nobr: true, s: true, small: true, strike: true, strong: true, tt: true,
320          u: true
321 }
322
323 mathml_text_integration = {
324         mi: NS_MATHML, mo: NS_MATHML, mn: NS_MATHML, ms: NS_MATHML, mtext: NS_MATHML
325 }
326 is_mathml_text_integration_point = (el) ->
327         return mathml_text_integration[el.name] = el.namespace
328 is_html_integration = (el) -> # DON'T PASS A TOKEN
329         if el.namespace is NS_MATHML and el.name is 'annotation-xml'
330                 if el.attrs.encoding?
331                         if el.attrs.encoding.toLowerCase() is 'text/html'
332                                 return true
333                         if el.attrs.encoding.toLowerCase() is 'application/xhtml+xml'
334                                 return true
335                 return false
336         if el.namespace is NS_SVG
337                 if el.name is 'foreignObject' or el.name is 'desc' or el.name is 'title'
338                         return true
339         return false
340
341 h_tags = {
342         h1:NS_HTML, h2:NS_HTML, h3:NS_HTML, h4:NS_HTML, h5:NS_HTML, h6:NS_HTML
343 }
344
345 # FIXME namespacify
346 foster_parenting_targets = {
347         table: true
348         tbody: true
349         tfoot: true
350         thead: true
351         tr: true
352 }
353
354 # FIXME namespacify
355 # all html I presume
356 end_tag_implied = {
357         dd: true
358         dt: true
359         li: true
360         option: true
361         optgroup: true
362         p: true
363         rb: true
364         rp: true
365         rt: true
366         rtc: true
367 }
368
369 el_is_special = (e) ->
370         return special_elements[e.name] is e.namespace
371
372 adp_els = { address: NS_HTML, div: NS_HTML, p: NS_HTML }
373 el_is_special_not_adp = (el) ->
374         return special_elements[el.name] is el.namespace and adp_els[el.name] isnt el.namespace
375
376 svg_name_fixes = {
377         altglyph: 'altGlyph'
378         altglyphdef: 'altGlyphDef'
379         altglyphitem: 'altGlyphItem'
380         animatecolor: 'animateColor'
381         animatemotion: 'animateMotion'
382         animatetransform: 'animateTransform'
383         clippath: 'clipPath'
384         feblend: 'feBlend'
385         fecolormatrix: 'feColorMatrix'
386         fecomponenttransfer: 'feComponentTransfer'
387         fecomposite: 'feComposite'
388         feconvolvematrix: 'feConvolveMatrix'
389         fediffuselighting: 'feDiffuseLighting'
390         fedisplacementmap: 'feDisplacementMap'
391         fedistantlight: 'feDistantLight'
392         fedropshadow: 'feDropShadow'
393         feflood: 'feFlood'
394         fefunca: 'feFuncA'
395         fefuncb: 'feFuncB'
396         fefuncg: 'feFuncG'
397         fefuncr: 'feFuncR'
398         fegaussianblur: 'feGaussianBlur'
399         feimage: 'feImage'
400         femerge: 'feMerge'
401         femergenode: 'feMergeNode'
402         femorphology: 'feMorphology'
403         feoffset: 'feOffset'
404         fepointlight: 'fePointLight'
405         fespecularlighting: 'feSpecularLighting'
406         fespotlight: 'feSpotLight'
407         fetile: 'feTile'
408         feturbulence: 'feTurbulence'
409         foreignobject: 'foreignObject'
410         glyphref: 'glyphRef'
411         lineargradient: 'linearGradient'
412         radialgradient: 'radialGradient'
413         textpath: 'textPath'
414 }
415 svg_attribute_fixes = {
416         attributename: 'attributeName'
417         attributetype: 'attributeType'
418         basefrequency: 'baseFrequency'
419         baseprofile: 'baseProfile'
420         calcmode: 'calcMode'
421         clippathunits: 'clipPathUnits'
422         contentscripttype: 'contentScriptType'
423         contentstyletype: 'contentStyleType'
424         diffuseconstant: 'diffuseConstant'
425         edgemode: 'edgeMode'
426         externalresourcesrequired: 'externalResourcesRequired'
427         filterres: 'filterRes'
428         filterunits: 'filterUnits'
429         glyphref: 'glyphRef'
430         gradienttransform: 'gradientTransform'
431         gradientunits: 'gradientUnits'
432         kernelmatrix: 'kernelMatrix'
433         kernelunitlength: 'kernelUnitLength'
434         keypoints: 'keyPoints'
435         keysplines: 'keySplines'
436         keytimes: 'keyTimes'
437         lengthadjust: 'lengthAdjust'
438         limitingconeangle: 'limitingConeAngle'
439         markerheight: 'markerHeight'
440         markerunits: 'markerUnits'
441         markerwidth: 'markerWidth'
442         maskcontentunits: 'maskContentUnits'
443         maskunits: 'maskUnits'
444         numoctaves: 'numOctaves'
445         pathlength: 'pathLength'
446         patterncontentunits: 'patternContentUnits'
447         patterntransform: 'patternTransform'
448         patternunits: 'patternUnits'
449         pointsatx: 'pointsAtX'
450         pointsaty: 'pointsAtY'
451         pointsatz: 'pointsAtZ'
452         preservealpha: 'preserveAlpha'
453         preserveaspectratio: 'preserveAspectRatio'
454         primitiveunits: 'primitiveUnits'
455         refx: 'refX'
456         refy: 'refY'
457         repeatcount: 'repeatCount'
458         repeatdur: 'repeatDur'
459         requiredextensions: 'requiredExtensions'
460         requiredfeatures: 'requiredFeatures'
461         specularconstant: 'specularConstant'
462         specularexponent: 'specularExponent'
463         spreadmethod: 'spreadMethod'
464         startoffset: 'startOffset'
465         stddeviation: 'stdDeviation'
466         stitchtiles: 'stitchTiles'
467         surfacescale: 'surfaceScale'
468         systemlanguage: 'systemLanguage'
469         tablevalues: 'tableValues'
470         targetx: 'targetX'
471         targety: 'targetY'
472         textlength: 'textLength'
473         viewbox: 'viewBox'
474         viewtarget: 'viewTarget'
475         xchannelselector: 'xChannelSelector'
476         ychannelselector: 'yChannelSelector'
477         zoomandpan: 'zoomAndPan'
478 }
479 adjust_mathml_attributes = (t) ->
480         for a in t.attrs_a
481                 if a[0] is 'definitionurl'
482                         a[0] = 'definitionURL'
483         return
484 adjust_svg_attributes = (t) ->
485         for a in t.attrs_a
486                 if svg_attribute_fixes[a[0]]?
487                         a[0] = svg_attribute_fixes[a[0]]
488         return
489 adjust_foreign_attributes = (t) ->
490         # fixfull
491         return
492
493 # decode_named_char_ref()
494 #
495 # The list of named character references is _huge_ so ask the browser to decode
496 # for us instead of wasting bandwidth/space on including the table here.
497 #
498 # Pass without the "&" but with the ";" examples:
499 #    for "&amp" pass "amp;"
500 #    for "&#x2032" pass "x2032;"
501 g_dncr = {
502         cache: {}
503         textarea: document.createElement('textarea')
504 }
505 # TODO test this in IE8
506 decode_named_char_ref = (txt) ->
507         txt = "&#{txt}"
508         decoded = g_dncr.cache[txt]
509         return decoded if decoded?
510         g_dncr.textarea.innerHTML = txt
511         decoded = g_dncr.textarea.value
512         return null if decoded is txt
513         return g_dncr.cache[txt] = decoded
514
515 parse_html = (txt, parse_error_cb = null) ->
516         cur = 0 # index of next char in txt to be parsed
517         # declare doc and tokenizer variables so they're in scope below
518         doc = null
519         open_els = null # stack of open elements
520         afe = null # active formatting elements
521         template_ins_modes = null
522         ins_mode = null
523         original_ins_mode = null
524         tok_state = null
525         tok_cur_tag = null # partially parsed tag
526         flag_scripting = null
527         flag_frameset_ok = null
528         flag_parsing = null
529         flag_foster_parenting = null
530         form_element_pointer = null
531         temporary_buffer = null
532         pending_table_character_tokens = null
533         head_element_pointer = null
534         flag_fragment_parsing = null
535         context_element = null
536
537         stop_parsing = ->
538                 flag_parsing = false
539
540         parse_error = ->
541                 if parse_error_cb?
542                         parse_error_cb cur
543                 else
544                         console.log "Parse error at character #{cur} of #{txt.length}"
545
546         afe_push = (new_el) ->
547                 matches = 0
548                 for el, i in afe
549                         if el.name is new_el.name and el.namespace is new_el.namespace
550                                 for k, v of el.attrs
551                                         continue unless new_el.attrs[k] is v
552                                 for k, v of new_el.attrs
553                                         continue unless el.attrs[k] is v
554                                 matches += 1
555                                 if matches is 3
556                                         afe.splice i, 1
557                                         break
558                 afe.unshift new_el
559         afe_push_marker = ->
560                 afe.unshift new_afe_marker()
561
562         # the functions below impliment the Tree Contstruction algorithm
563         # http://www.w3.org/TR/html5/syntax.html#tree-construction
564
565         # But first... the helpers
566         template_tag_is_open = ->
567                 for t in open_els
568                         if t.name is 'template' # maybe should also check: and t.namespace is 'html'
569                                 return true
570                 return false
571         is_in_scope_x = (tag_name, scope, namespace) ->
572                 for t in open_els
573                         if t.name is tag_name and (namespace is null or namespace is t.namespace)
574                                 return true
575                         if scope[t.name] is t.namespace
576                                 return false
577                 return false
578         is_in_scope_x_y = (tag_name, scope, scope2, namespace) ->
579                 for t in open_els
580                         if t.name is tag_name and (namespace is null or namespace is t.namespace)
581                                 return true
582                         if scope[t.name] is t.namespace
583                                 return false
584                         if scope2[t.name] is t.namespace
585                                 return false
586                 return false
587         standard_scopers = {
588                 applet: NS_HTML, caption: NS_HTML, html: NS_HTML, table: NS_HTML,
589                 td: NS_HTML, th: NS_HTML, marquee: NS_HTML, object: NS_HTML,
590                 template: NS_HTML, mi: NS_MATHML,
591
592                 mo: NS_MATHML, mn: NS_MATHML, ms: NS_MATHML, mtext: NS_MATHML,
593                 'annotation-xml': NS_MATHML,
594
595                 foreignObject: NS_SVG, desc: NS_SVG, title: NS_SVG
596         }
597         button_scopers = button: NS_HTML
598         li_scopers = ol: NS_HTML, ul: NS_HTML
599         table_scopers = html: NS_HTML, table: NS_HTML, template: NS_HTML
600         is_in_scope = (tag_name, namespace = null) ->
601                 return is_in_scope_x tag_name, standard_scopers, namespace
602         is_in_button_scope = (tag_name, namespace = null) ->
603                 return is_in_scope_x_y tag_name, standard_scopers, button_scopers, namespace
604         is_in_table_scope = (tag_name, namespace = null) ->
605                 return is_in_scope_x tag_name, table_scopers, namespace
606         # aka is_in_list_item_scope
607         is_in_li_scope = (tag_name, namespace = null) ->
608                 return is_in_scope_x_y tag_name, standard_scopers, li_scopers, namespace
609         is_in_select_scope = (tag_name, namespace = null) ->
610                 for t in open_els
611                         if t.name is tag_name and (namespace is null or namespace is t.namespace)
612                                 return true
613                         if t.ns isnt NS_HTML and t.name isnt 'optgroup' and t.name isnt 'option'
614                                 return false
615                 return false
616         # this checks for a particular element, not by name
617         el_is_in_scope = (el) ->
618                 for t in open_els
619                         if t is el
620                                 return true
621                         if standard_scopers[t.name] is t.namespace
622                                 return false
623                 return false
624
625         clear_to_table_stopers = {
626                 'table': true
627                 'template': true
628                 'html': true
629         }
630         clear_stack_to_table_context = ->
631                 loop
632                         if clear_to_table_stopers[open_els[0].name]?
633                                 break
634                         open_els.shift()
635                 return
636         clear_to_table_body_stopers = {
637                 'tbody': true
638                 'tfoot': true
639                 'thead': true
640                 'template': true
641                 'html': true
642         }
643         clear_stack_to_table_body_context = ->
644                 loop
645                         if clear_to_table_body_stopers[open_els[0].name]?
646                                 break
647                         open_els.shift()
648                 return
649         clear_to_table_row_stopers = {
650                 'tr': true
651                 'template': true
652                 'html': true
653         }
654         clear_stack_to_table_row_context = ->
655                 loop
656                         if clear_to_table_row_stopers[open_els[0].name]?
657                                 break
658                         open_els.shift()
659                 return
660         clear_afe_to_marker = ->
661                 loop
662                         return unless afe.length > 0 # this happens in fragment case, ?spec error
663                         el = afe.shift()
664                         if el.type is TYPE_AFE_MARKER
665                                 return
666                 return
667
668         # 8.2.3.1 ...
669         # http://www.w3.org/TR/html5/syntax.html#reset-the-insertion-mode-appropriately
670         reset_ins_mode = ->
671                 # 1. Let last be false.
672                 last = false
673                 # 2. Let node be the last node in the stack of open elements.
674                 node_i = 0
675                 node = open_els[node_i]
676                 # 3. Loop: If node is the first node in the stack of open elements,
677                 # then set last to true, and, if the parser was originally created as
678                 # part of the HTML fragment parsing algorithm (fragment case) set node
679                 # to the context element.
680                 loop
681                         if node_i is open_els.length - 1
682                                 last = true
683                                 # fixfull (fragment case)
684
685                         # 4. If node is a select element, run these substeps:
686                         if node.name is 'select'
687                                 # 1. If last is true, jump to the step below labeled done.
688                                 unless last
689                                         # 2. Let ancestor be node.
690                                         ancestor_i = node_i
691                                         ancestor = node
692                                         # 3. Loop: If ancestor is the first node in the stack of
693                                         # open elements, jump to the step below labeled done.
694                                         loop
695                                                 if ancestor_i is open_els.length - 1
696                                                         break
697                                                 # 4. Let ancestor be the node before ancestor in the stack
698                                                 # of open elements.
699                                                 ancestor_i += 1
700                                                 ancestor = open_els[ancestor_i]
701                                                 # 5. If ancestor is a template node, jump to the step below
702                                                 # labeled done.
703                                                 if ancestor.name is 'template'
704                                                         break
705                                                 # 6. If ancestor is a table node, switch the insertion mode
706                                                 # to "in select in table" and abort these steps.
707                                                 if ancestor.name is 'table'
708                                                         ins_mode = ins_mode_in_select_in_table
709                                                         return
710                                                 # 7. Jump back to the step labeled loop.
711                                 # 8. Done: Switch the insertion mode to "in select" and abort
712                                 # these steps.
713                                 ins_mode = ins_mode_in_select
714                                 return
715                         # 5. If node is a td or th element and last is false, then switch
716                         # the insertion mode to "in cell" and abort these steps.
717                         if (node.name is 'td' or node.name is 'th') and last is false
718                                 ins_mode = ins_mode_in_cell
719                                 return
720                         # 6. If node is a tr element, then switch the insertion mode to "in
721                         # row" and abort these steps.
722                         if node.name is 'tr'
723                                 ins_mode = ins_mode_in_row
724                                 return
725                         # 7. If node is a tbody, thead, or tfoot element, then switch the
726                         # insertion mode to "in table body" and abort these steps.
727                         if node.name is 'tbody' or node.name is 'thead' or node.name is 'tfoot'
728                                 ins_mode = ins_mode_in_table_body
729                                 return
730                         # 8. If node is a caption element, then switch the insertion mode
731                         # to "in caption" and abort these steps.
732                         if node.name is 'caption'
733                                 ins_mode = ins_mode_in_caption
734                                 return
735                         # 9. If node is a colgroup element, then switch the insertion mode
736                         # to "in column group" and abort these steps.
737                         if node.name is 'colgroup'
738                                 ins_mode = ins_mode_in_column_group
739                                 return
740                         # 10. If node is a table element, then switch the insertion mode to
741                         # "in table" and abort these steps.
742                         if node.name is 'table'
743                                 ins_mode = ins_mode_in_table
744                                 return
745                         # 11. If node is a template element, then switch the insertion mode
746                         # to the current template insertion mode and abort these steps.
747                         # fixfull (template insertion mode stack)
748
749                         # 12. If node is a head element and last is true, then switch the
750                         # insertion mode to "in body" ("in body"! not "in head"!) and abort
751                         # these steps. (fragment case)
752                         if node.name is 'head' and last
753                                 ins_mode = ins_mode_in_body
754                                 return
755                         # 13. If node is a head element and last is false, then switch the
756                         # insertion mode to "in head" and abort these steps.
757                         if node.name is 'head' and last is false
758                                 ins_mode = ins_mode_in_head
759                                 return
760                         # 14. If node is a body element, then switch the insertion mode to
761                         # "in body" and abort these steps.
762                         if node.name is 'body'
763                                 ins_mode = ins_mode_in_body
764                                 return
765                         # 15. If node is a frameset element, then switch the insertion mode
766                         # to "in frameset" and abort these steps. (fragment case)
767                         if node.name is 'frameset'
768                                 ins_mode = ins_mode_in_frameset
769                                 return
770                         # 16. If node is an html element, run these substeps:
771                         if node.name is 'html'
772                                 # 1. If the head element pointer is null, switch the insertion
773                                 # mode to "before head" and abort these steps. (fragment case)
774                                 if head_element_pointer is null
775                                         ins_mode = ins_mode_before_head
776                                 else
777                                         # 2. Otherwise, the head element pointer is not null,
778                                         # switch the insertion mode to "after head" and abort these
779                                         # steps.
780                                         ins_mode = ins_mode_after_head
781                                 return
782                         # 17. If last is true, then switch the insertion mode to "in body"
783                         # and abort these steps. (fragment case)
784                         if last
785                                 ins_mode = ins_mode_in_body
786                                 return
787                         # 18. Let node now be the node before node in the stack of open
788                         # elements.
789                         node_i += 1
790                         node = open_els[node_i]
791                         # 19. Return to the step labeled loop.
792
793         # 8.2.3.2
794
795         # http://www.w3.org/TR/html5/syntax.html#adjusted-current-node
796         adjusted_current_node = ->
797                 if open_els.length is 1 and flag_fragment_parsing
798                         return context_element
799                 return open_els[0]
800
801         # http://www.w3.org/TR/html5/syntax.html#reconstruct-the-active-formatting-elements
802         # this implementation is structured (mostly) as described at the link above.
803         # capitalized comments are the "labels" described at the link above.
804         reconstruct_afe = ->
805                 return if afe.length is 0
806                 if afe[0].type is TYPE_AFE_MARKER or afe[0] in open_els
807                         return
808                 # Rewind
809                 i = 0
810                 loop
811                         if i is afe.length - 1
812                                 break
813                         i += 1
814                         if afe[i].type is TYPE_AFE_MARKER or afe[i] in open_els
815                                 i -= 1 # Advance
816                                 break
817                 # Create
818                 loop
819                         el = insert_html_element afe[i].token
820                         afe[i] = el
821                         break if i is 0
822                         i -= 1 # Advance
823
824         # http://www.w3.org/TR/html5/syntax.html#adoption-agency-algorithm
825         # adoption agency algorithm
826         # overview here:
827         #   http://www.w3.org/TR/html5/syntax.html#misnested-tags:-b-i-/b-/i
828         #   http://www.w3.org/TR/html5/syntax.html#misnested-tags:-b-p-/b-/p
829         #   http://www.w3.org/TR/html5/syntax.html#unclosed-formatting-elements
830         adoption_agency = (subject) ->
831                 debug_log "adoption_agency()"
832                 debug_log "tree: #{serialize_els doc.children, false, true}"
833                 debug_log "open_els: #{serialize_els open_els, true, true}"
834                 debug_log "afe: #{serialize_els afe, true, true}"
835                 if open_els[0].name is subject
836                         el = open_els[0]
837                         open_els.shift()
838                         # remove it from the list of active formatting elements (if found)
839                         for t, i in afe
840                                 if t is el
841                                         afe.splice i, 1
842                                         break
843                         debug_log "aaa: starting off with subject on top of stack, exiting"
844                         return
845                 outer = 0
846                 loop
847                         if outer >= 8
848                                 return
849                         outer += 1
850                         # 5. Let formatting element be the last element in the list of
851                         # active formatting elements that: is between the end of the list
852                         # and the last scope marker in the list, if any, or the start of
853                         # the list otherwise, and  has the tag name subject.
854                         fe = null
855                         for t, fe_of_afe in afe
856                                 if t.type is TYPE_AFE_MARKER
857                                         break
858                                 if t.name is subject
859                                         fe = t
860                                         break
861                         # If there is no such element, then abort these steps and instead
862                         # act as described in the "any other end tag" entry above.
863                         if fe is null
864                                 debug_log "aaa: fe not found in afe"
865                                 in_body_any_other_end_tag subject
866                                 return
867                         # 6. If formatting element is not in the stack of open elements,
868                         # then this is a parse error; remove the element from the list, and
869                         # abort these steps.
870                         in_open_els = false
871                         for t, fe_of_open_els in open_els
872                                 if t is fe
873                                         in_open_els = true
874                                         break
875                         unless in_open_els
876                                 debug_log "aaa: fe not found in open_els"
877                                 parse_error()
878                                 # "remove it from the list" must mean afe, since it's not in open_els
879                                 afe.splice fe_of_afe, 1
880                                 return
881                         # 7. If formatting element is in the stack of open elements, but
882                         # the element is not in scope, then this is a parse error; abort
883                         # these steps.
884                         unless el_is_in_scope fe
885                                 debug_log "aaa: fe not in scope"
886                                 parse_error()
887                                 return
888                         # 8. If formatting element is not the current node, this is a parse
889                         # error. (But do not abort these steps.)
890                         unless open_els[0] is fe
891                                 parse_error()
892                                 # continue
893                         # 9. Let furthest block be the topmost node in the stack of open
894                         # elements that is lower in the stack than formatting element, and
895                         # is an element in the special category. There might not be one.
896                         fb = null
897                         fb_of_open_els = null
898                         for t, i in open_els
899                                 if t is fe
900                                         break
901                                 if el_is_special t
902                                         fb = t
903                                         fb_of_open_els = i
904                                         # and continue, to see if there's one that's more "topmost"
905                         # 10. If there is no furthest block, then the UA must first pop all
906                         # the nodes from the bottom of the stack of open elements, from the
907                         # current node up to and including formatting element, then remove
908                         # formatting element from the list of active formatting elements,
909                         # and finally abort these steps.
910                         if fb is null
911                                 debug_log "aaa: no fb"
912                                 loop
913                                         t = open_els.shift()
914                                         if t is fe
915                                                 afe.splice fe_of_afe, 1
916                                                 return
917                         # 11. Let common ancestor be the element immediately above
918                         # formatting element in the stack of open elements.
919                         ca = open_els[fe_of_open_els + 1] # common ancestor
920
921                         node_above = open_els[fb_of_open_els + 1] # next node if node isn't in open_els anymore
922                         # 12. Let a bookmark note the position of formatting element in the list of active formatting elements relative to the elements on either side of it in the list.
923                         bookmark = new_aaa_bookmark()
924                         for t, i in afe
925                                 if t is fe
926                                         afe.splice i, 0, bookmark
927                                         break
928                         node = last_node = fb
929                         inner = 0
930                         loop
931                                 inner += 1
932                                 # 3. Let node be the element immediately above node in the
933                                 # stack of open elements, or if node is no longer in the stack
934                                 # of open elements (e.g. because it got removed by this
935                                 # algorithm), the element that was immediately above node in
936                                 # the stack of open elements before node was removed.
937                                 node_next = null
938                                 for t, i in open_els
939                                         if t is node
940                                                 node_next = open_els[i + 1]
941                                                 break
942                                 node = node_next ? node_above
943                                 debug_log "inner loop #{inner}"
944                                 debug_log "tree: #{serialize_els doc.children, false, true}"
945                                 debug_log "open_els: #{serialize_els open_els, true, true}"
946                                 debug_log "afe: #{serialize_els afe, true, true}"
947                                 debug_log "ca: #{ca.name}##{ca.id} children: #{serialize_els ca.children, true, true}"
948                                 debug_log "fe: #{fe.name}##{fe.id} children: #{serialize_els fe.children, true, true}"
949                                 debug_log "fb: #{fb.name}##{fb.id} children: #{serialize_els fb.children, true, true}"
950                                 debug_log "node: #{node.serialize true, true}"
951                                 # TODO make sure node_above gets re-set if/when node is removed from open_els
952
953                                 # 4. If node is formatting element, then go to the next step in
954                                 # the overall algorithm.
955                                 if node is fe
956                                         break
957                                 debug_log "the meat"
958                                 # 5. If inner loop counter is greater than three and node is in
959                                 # the list of active formatting elements, then remove node from
960                                 # the list of active formatting elements.
961                                 node_in_afe = false
962                                 for t, i in afe
963                                         if t is node
964                                                 if inner > 3
965                                                         afe.splice i, 1
966                                                         debug_log "max out inner"
967                                                 else
968                                                         node_in_afe = true
969                                                         debug_log "in afe"
970                                                 break
971                                 # 6. If node is not in the list of active formatting elements,
972                                 # then remove node from the stack of open elements and then go
973                                 # back to the step labeled inner loop.
974                                 unless node_in_afe
975                                         debug_log "not in afe"
976                                         for t, i in open_els
977                                                 if t is node
978                                                         node_above = open_els[i + 1]
979                                                         open_els.splice i, 1
980                                                         break
981                                         continue
982                                 debug_log "the bones"
983                                 # 7. create an element for the token for which the element node
984                                 # was created, in the HTML namespace, with common ancestor as
985                                 # the intended parent; replace the entry for node in the list
986                                 # of active formatting elements with an entry for the new
987                                 # element, replace the entry for node in the stack of open
988                                 # elements with an entry for the new element, and let node be
989                                 # the new element.
990                                 new_node = token_to_element node.token, NS_HTML, ca
991                                 for t, i in afe
992                                         if t is node
993                                                 afe[i] = new_node
994                                                 debug_log "replaced in afe"
995                                                 break
996                                 for t, i in open_els
997                                         if t is node
998                                                 node_above = open_els[i + 1]
999                                                 open_els[i] = new_node
1000                                                 debug_log "replaced in open_els"
1001                                                 break
1002                                 node = new_node
1003                                 # 8. If last node is furthest block, then move the
1004                                 # aforementioned bookmark to be immediately after the new node
1005                                 # in the list of active formatting elements.
1006                                 if last_node is fb
1007                                         for t, i in afe
1008                                                 if t is bookmark
1009                                                         afe.splice i, 1
1010                                                         debug_log "removed bookmark"
1011                                                         break
1012                                         for t, i in afe
1013                                                 if t is node
1014                                                         # "after" means lower
1015                                                         afe.splice i, 0, bookmark # "after as <-
1016                                                         debug_log "placed bookmark after node"
1017                                                         debug_log "node: #{node.id} afe: #{serialize_els afe, true, true}"
1018                                                         break
1019                                 # 9. Insert last node into node, first removing it from its
1020                                 # previous parent node if any.
1021                                 if last_node.parent?
1022                                         debug_log "last_node has parent"
1023                                         for c, i in last_node.parent.children
1024                                                 if c is last_node
1025                                                         debug_log "removing last_node from parent"
1026                                                         last_node.parent.children.splice i, 1
1027                                                         break
1028                                 node.children.push last_node
1029                                 last_node.parent = node
1030                                 # 10. Let last node be node.
1031                                 last_node = node
1032                                 debug_log "at last"
1033                                 # 11. Return to the step labeled inner loop.
1034                         # 14. Insert whatever last node ended up being in the previous step
1035                         # at the appropriate place for inserting a node, but using common
1036                         # ancestor as the override target.
1037
1038                         # In the case where fe is immediately followed by fb:
1039                         #   * inner loop exits out early (node==fe)
1040                         #   * last_node is fb
1041                         #   * last_node is still in the tree (not a duplicate)
1042                         if last_node.parent?
1043                                 debug_log "FEFIRST? last_node has parent"
1044                                 for c, i in last_node.parent.children
1045                                         if c is last_node
1046                                                 debug_log "removing last_node from parent"
1047                                                 last_node.parent.children.splice i, 1
1048                                                 break
1049
1050                         debug_log "after aaa inner loop"
1051                         debug_log "ca: #{ca.name}##{ca.id} children: #{serialize_els ca.children, true, true}"
1052                         debug_log "fe: #{fe.name}##{fe.id} children: #{serialize_els fe.children, true, true}"
1053                         debug_log "fb: #{fb.name}##{fb.id} children: #{serialize_els fb.children, true, true}"
1054                         debug_log "last_node: #{last_node.name}##{last_node.id} children: #{serialize_els last_node.children, true, true}"
1055                         debug_log "tree: #{serialize_els doc.children, false, true}"
1056
1057                         debug_log "insert"
1058
1059
1060                         # can't use standard insert token thing, because it's already in
1061                         # open_els and must stay at it's current position in open_els
1062                         dest = adjusted_insertion_location ca
1063                         dest[0].children.splice dest[1], 0, last_node
1064                         last_node.parent = dest[0]
1065
1066
1067                         debug_log "ca: #{ca.name}##{ca.id} children: #{serialize_els ca.children, true, true}"
1068                         debug_log "fe: #{fe.name}##{fe.id} children: #{serialize_els fe.children, true, true}"
1069                         debug_log "fb: #{fb.name}##{fb.id} children: #{serialize_els fb.children, true, true}"
1070                         debug_log "last_node: #{last_node.name}##{last_node.id} children: #{serialize_els last_node.children, true, true}"
1071                         debug_log "tree: #{serialize_els doc.children, false, true}"
1072
1073                         # 15. Create an element for the token for which formatting element
1074                         # was created, in the HTML namespace, with furthest block as the
1075                         # intended parent.
1076                         new_element = token_to_element fe.token, NS_HTML, fb
1077                         # 16. Take all of the child nodes of furthest block and append them
1078                         # to the element created in the last step.
1079                         while fb.children.length
1080                                 t = fb.children.shift()
1081                                 t.parent = new_element
1082                                 new_element.children.push t
1083                         # 17. Append that new element to furthest block.
1084                         new_element.parent = fb
1085                         fb.children.push new_element
1086                         # 18. Remove formatting element from the list of active formatting
1087                         # elements, and insert the new element into the list of active
1088                         # formatting elements at the position of the aforementioned
1089                         # bookmark.
1090                         for t, i in afe
1091                                 if t is fe
1092                                         afe.splice i, 1
1093                                         break
1094                         for t, i in afe
1095                                 if t is bookmark
1096                                         afe[i] = new_element
1097                                         break
1098                         # 19. Remove formatting element from the stack of open elements,
1099                         # and insert the new element into the stack of open elements
1100                         # immediately below the position of furthest block in that stack.
1101                         for t, i in open_els
1102                                 if t is fe
1103                                         open_els.splice i, 1
1104                                         break
1105                         for t, i in open_els
1106                                 if t is fb
1107                                         open_els.splice i, 0, new_element
1108                                         break
1109                         # 20. Jump back to the step labeled outer loop.
1110                         debug_log "done wrapping fb's children. new_element: #{new_element.name}##{new_element.id}"
1111                         debug_log "tree: #{serialize_els doc.children, false, true}"
1112                         debug_log "open_els: #{serialize_els open_els, true, true}"
1113                         debug_log "afe: #{serialize_els afe, true, true}"
1114                 debug_log "AAA DONE"
1115
1116         # http://www.w3.org/TR/html5/syntax.html#close-a-p-element
1117         close_p_element = ->
1118                 generate_implied_end_tags 'p' # arg is exception
1119                 if open_els[0].name isnt 'p'
1120                         parse_error()
1121                 while open_els.length > 1 # just in case
1122                         el = open_els.shift()
1123                         if el.name is 'p'
1124                                 return
1125         close_p_if_in_button_scope = ->
1126                 if is_in_button_scope 'p'
1127                         close_p_element()
1128
1129         # http://www.w3.org/TR/html5/syntax.html#insert-a-character
1130         # aka insert_a_character = (t) ->
1131         insert_character = (t) ->
1132                 dest = adjusted_insertion_location()
1133                 # fixfull check for Document node
1134                 if dest[1] > 0
1135                         prev = dest[0].children[dest[1] - 1]
1136                         if prev.type is TYPE_TEXT
1137                                 prev.text += t.text
1138                                 return
1139                 dest[0].children.splice dest[1], 0, t
1140
1141
1142         # 8.2.5 http://www.w3.org/TR/html5/syntax.html#tree-construction
1143         process_token = (t) ->
1144                 acn = adjusted_current_node()
1145                 unless acn?
1146                         ins_mode t
1147                         return
1148                 if acn.namespace is NS_HTML
1149                         ins_mode t
1150                         return
1151                 if is_mathml_text_integration_point(acn)
1152                         if t.type is TYPE_START_TAG and (t.name is 'mglyph' or t.name is 'malignmark')
1153                                 ins_mode t
1154                                 return
1155                         if t.type is TYPE_TEXT
1156                                 ins_mode t
1157                                 return
1158                 if acn.namespace is NS_MATHML and acn.name is 'annotation-xml' and t.type is TYPE_START_TAG and t.name is 'svg'
1159                         ins_mode t
1160                         return
1161                 if is_html_integration acn
1162                         if t.type is TYPE_START_TAG or t.type is TYPE_TEXT
1163                                 ins_mode t
1164                                 return
1165                 if t.type is TYPE_EOF
1166                         ins_mode t
1167                         return
1168                 in_foreign_content t
1169                 return
1170
1171         # 8.2.5.1
1172         # http://www.w3.org/TR/html5/syntax.html#creating-and-inserting-nodes
1173         # http://www.w3.org/TR/html5/syntax.html#appropriate-place-for-inserting-a-node
1174         adjusted_insertion_location = (override_target = null) ->
1175                 # 1. If there was an override target specified, then let target be the
1176                 # override target.
1177                 if override_target?
1178                         target = override_target
1179                 else # Otherwise, let target be the current node.
1180                         target = open_els[0]
1181                 # 2. Determine the adjusted insertion location using the first matching
1182                 # steps from the following list:
1183                 #
1184                 # If foster parenting is enabled and target is a table, tbody, tfoot,
1185                 # thead, or tr element Foster parenting happens when content is
1186                 # misnested in tables.
1187                 if flag_foster_parenting and foster_parenting_targets[target.name]
1188                         loop # once. this is here so we can ``break`` to "abort these substeps"
1189                                 # 1. Let last template be the last template element in the
1190                                 # stack of open elements, if any.
1191                                 last_template = null
1192                                 last_template_i = null
1193                                 for el, i in open_els
1194                                         if el.name is 'template' and el.namespace is NS_HTML
1195                                                 last_template = el
1196                                                 last_template_i = i
1197                                                 break
1198                                 # 2. Let last table be the last table element in the stack of
1199                                 # open elements, if any.
1200                                 last_table = null
1201                                 last_table_i
1202                                 for el, i in open_els
1203                                         if el.name is 'table' and el.namespace is NS_HTML
1204                                                 last_table = el
1205                                                 last_table_i = i
1206                                                 break
1207                                 # 3. If there is a last template and either there is no last
1208                                 # table, or there is one, but last template is lower (more
1209                                 # recently added) than last table in the stack of open
1210                                 # elements, then: let adjusted insertion location be inside
1211                                 # last template's template contents, after its last child (if
1212                                 # any), and abort these substeps.
1213                                 if last_template and (last_table is null or last_template_i < last_table_i)
1214                                         target = last_template # fixfull should be it's contents
1215                                         target_i = target.children.length
1216                                         break
1217                                 # 4. If there is no last table, then let adjusted insertion
1218                                 # location be inside the first element in the stack of open
1219                                 # elements (the html element), after its last child (if any),
1220                                 # and abort these substeps. (fragment case)
1221                                 if last_table is null
1222                                         # this is odd
1223                                         target = open_els[open_els.length - 1]
1224                                         target_i = target.children.length
1225                                         break
1226                                 # 5. If last table has a parent element, then let adjusted
1227                                 # insertion location be inside last table's parent element,
1228                                 # immediately before last table, and abort these substeps.
1229                                 if last_table.parent?
1230                                         for c, i in last_table.parent.children
1231                                                 if c is last_table
1232                                                         target = last_table.parent
1233                                                         target_i = i
1234                                                         break
1235                                         break
1236                                 # 6. Let previous element be the element immediately above last
1237                                 # table in the stack of open elements.
1238                                 #
1239                                 # huh? how could it not have a parent?
1240                                 previous_element = open_els[last_table_i + 1]
1241                                 # 7. Let adjusted insertion location be inside previous
1242                                 # element, after its last child (if any).
1243                                 target = previous_element
1244                                 target_i = target.children.length
1245                                 # Note: These steps are involved in part because it's possible
1246                                 # for elements, the table element in this case in particular,
1247                                 # to have been moved by a script around in the DOM, or indeed
1248                                 # removed from the DOM entirely, after the element was inserted
1249                                 # by the parser.
1250                                 break # don't really loop
1251                 else
1252                         # Otherwise Let adjusted insertion location be inside target, after
1253                         # its last child (if any).
1254                         target_i = target.children.length
1255
1256                 # 3. If the adjusted insertion location is inside a template element,
1257                 # let it instead be inside the template element's template contents,
1258                 # after its last child (if any).
1259                 # fixfull (template)
1260
1261                 # 4. Return the adjusted insertion location.
1262                 return [target, target_i]
1263
1264         # http://www.w3.org/TR/html5/syntax.html#create-an-element-for-the-token
1265         # aka create_an_element_for_token
1266         token_to_element = (t, namespace, intended_parent) ->
1267                 # convert attributes into a hash
1268                 attrs = {}
1269                 for a in t.attrs_a
1270                         attrs[a[0]] = a[1] # TODO check what to do with dupilcate attrs
1271                 el = new Node TYPE_TAG, name: t.name, namespace: namespace, attrs: attrs, token: t
1272
1273                 # TODO 2. If the newly created element has an xmlns attribute in the
1274                 # XMLNS namespace whose value is not exactly the same as the element's
1275                 # namespace, that is a parse error. Similarly, if the newly created
1276                 # element has an xmlns:xlink attribute in the XMLNS namespace whose
1277                 # value is not the XLink Namespace, that is a parse error.
1278
1279                 # fixfull: the spec says stuff about form pointers and ownerDocument
1280
1281                 return el
1282
1283         # http://www.w3.org/TR/html5/syntax.html#insert-a-foreign-element
1284         insert_foreign_element = (token, namespace) ->
1285                 ail = adjusted_insertion_location()
1286                 ail_el = ail[0]
1287                 ail_i = ail[1]
1288                 el = token_to_element token, namespace, ail_el
1289                 # TODO skip this next step if it's broken (eg ail_el is document with child already)
1290                 el.parent = ail_el
1291                 ail_el.children.splice ail_i, 0, el
1292                 open_els.unshift el
1293                 return el
1294         # http://www.w3.org/TR/html5/syntax.html#insert-an-html-element
1295         insert_html_element = (token) ->
1296                 insert_foreign_element token, NS_HTML
1297
1298         # http://www.w3.org/TR/html5/syntax.html#insert-a-comment
1299         # position should be [node, index_within_children]
1300         insert_comment = (t, position = null) ->
1301                 position ?= adjusted_insertion_location()
1302                 position[0].children.splice position[1], 0, t
1303
1304         # 8.2.5.2
1305         # http://www.w3.org/TR/html5/syntax.html#generic-raw-text-element-parsing-algorithm
1306         parse_generic_raw_text = (t) ->
1307                 insert_html_element t
1308                 tok_state = tok_state_rawtext
1309                 original_ins_mode = ins_mode
1310                 ins_mode = ins_mode_text
1311         parse_generic_rcdata_text = (t) ->
1312                 insert_html_element t
1313                 tok_state = tok_state_rcdata
1314                 original_ins_mode = ins_mode
1315                 ins_mode = ins_mode_text
1316
1317         # 8.2.5.3 http://www.w3.org/TR/html5/syntax.html#closing-elements-that-have-implied-end-tags
1318         # http://www.w3.org/TR/html5/syntax.html#generate-implied-end-tags
1319         generate_implied_end_tags = (except = null) ->
1320                 while end_tag_implied[open_els[0].name] and open_els[0].name isnt except
1321                         open_els.shift()
1322
1323         # 8.2.5.4 The rules for parsing tokens in HTML content
1324         # http://www.w3.org/TR/html5/syntax.html#parsing-main-inhtml
1325
1326         # 8.2.5.4.1 The "initial" insertion mode
1327         # http://www.w3.org/TR/html5/syntax.html#the-initial-insertion-mode
1328         ins_mode_initial = (t) ->
1329                 if is_space_tok t
1330                         return
1331                 if t.type is TYPE_COMMENT
1332                         # ?fixfull
1333                         doc.children.push t
1334                         return
1335                 if t.type is TYPE_DOCTYPE
1336                         # FIXME check identifiers, set quirks, etc
1337                         # fixfull
1338                         doc.children.push t
1339                         ins_mode = ins_mode_before_html
1340                         return
1341                 # Anything else
1342                 #fixfull (iframe, quirks)
1343                 ins_mode = ins_mode_before_html
1344                 process_token t
1345                 return
1346
1347         # 8.2.5.4.2 http://www.w3.org/TR/html5/syntax.html#the-before-html-insertion-mode
1348         ins_mode_before_html = (t) ->
1349                 if t.type is TYPE_DOCTYPE
1350                         parse_error()
1351                         return
1352                 if t.type is TYPE_COMMENT
1353                         doc.children.push t
1354                         return
1355                 if is_space_tok t
1356                         return
1357                 if t.type is TYPE_START_TAG and t.name is 'html'
1358                         el = token_to_element t, NS_HTML, doc
1359                         doc.children.push el
1360                         open_els.unshift(el)
1361                         # fixfull (big paragraph in spec about manifest, fragment, urls, etc)
1362                         ins_mode = ins_mode_before_head
1363                         return
1364                 if t.type is TYPE_END_TAG
1365                         if t.name is 'head' or t.name is 'body' or t.name is 'html' or t.name is 'br'
1366                                 # fall through to "anything else"
1367                         else
1368                                 parse_error()
1369                                 return
1370                 # Anything else
1371                 html_tok = new_open_tag 'html'
1372                 el = token_to_element html_tok, NS_HTML, doc
1373                 doc.children.push el
1374                 open_els.unshift el
1375                 # ?fixfull browsing context
1376                 ins_mode = ins_mode_before_head
1377                 process_token t
1378                 return
1379
1380         # 8.2.5.4.3 http://www.w3.org/TR/html5/syntax.html#the-before-head-insertion-mode
1381         ins_mode_before_head = (t) ->
1382                 if is_space_tok t
1383                         return
1384                 if t.type is TYPE_COMMENT
1385                         insert_comment t
1386                         return
1387                 if t.type is TYPE_DOCTYPE
1388                         parse_error()
1389                         return
1390                 if t.type is TYPE_START_TAG and t.name is 'html'
1391                         ins_mode_in_body t
1392                         return
1393                 if t.type is TYPE_START_TAG and t.name is 'head'
1394                         el = insert_html_element t
1395                         head_element_pointer = el
1396                         ins_mode = ins_mode_in_head
1397                 if t.type is TYPE_END_TAG
1398                         if t.name is 'head' or t.name is 'body' or t.name is 'html' or t.name is 'br'
1399                                 # fall through to Anything else below
1400                         else
1401                                 parse_error()
1402                                 return
1403                 # Anything else
1404                 head_tok = new_open_tag 'head'
1405                 el = insert_html_element head_tok
1406                 head_element_pointer = el
1407                 ins_mode = ins_mode_in_head
1408                 process_token t
1409
1410         # 8.2.5.4.4 http://www.w3.org/TR/html5/syntax.html#parsing-main-inhead
1411         ins_mode_in_head_else = (t) -> # factored out for same-as-spec flow control
1412                 open_els.shift() # spec says this will be a 'head' node
1413                 ins_mode = ins_mode_after_head
1414                 process_token t
1415         ins_mode_in_head = (t) ->
1416                 if t.type is TYPE_TEXT and (t.text is "\t" or t.text is "\n" or t.text is "\u000c" or t.text is ' ')
1417                         insert_character t
1418                         return
1419                 if t.type is TYPE_COMMENT
1420                         insert_comment t
1421                         return
1422                 if t.type is TYPE_DOCTYPE
1423                         parse_error()
1424                         return
1425                 if t.type is TYPE_START_TAG and t.name is 'html'
1426                         ins_mode_in_body t
1427                         return
1428                 if t.type is TYPE_START_TAG and (t.name is 'base' or t.name is 'basefont' or t.name is 'bgsound' or t.name is 'link')
1429                         el = insert_html_element t
1430                         open_els.shift()
1431                         t.acknowledge_self_closing()
1432                         return
1433                 if t.type is TYPE_START_TAG and t.name is 'meta'
1434                         el = insert_html_element t
1435                         open_els.shift()
1436                         t.acknowledge_self_closing()
1437                         # fixfull encoding stuff
1438                         return
1439                 if t.type is TYPE_START_TAG and t.name is 'title'
1440                         parse_generic_rcdata_text t
1441                         return
1442                 if t.type is TYPE_START_TAG and ((t.name is 'noscript' and flag_scripting) or (t.name is 'noframes' or t.name is 'style'))
1443                         parse_generic_raw_text t
1444                         return
1445                 if t.type is TYPE_START_TAG and t.name is 'noscript' and flag_scripting is false
1446                         insert_html_element t
1447                         ins_mode = ins_mode_in_head_noscript
1448                         return
1449                 if t.type is TYPE_START_TAG and t.name is 'script'
1450                         ail = adjusted_insertion_location()
1451                         el = token_to_element t, NS_HTML, ail
1452                         el.flag 'parser-inserted', true
1453                         # fixfull frament case
1454                         ail[0].children.splice ail[1], 0, el
1455                         open_els.unshift el
1456                         tok_state = tok_state_script_data
1457                         original_ins_mode = ins_mode # make sure orig... is defined
1458                         ins_mode = ins_mode_text
1459                         return
1460                 if t.type is TYPE_END_TAG and t.name is 'head'
1461                         open_els.shift() # will be a head element... spec says so
1462                         ins_mode = ins_mode_after_head
1463                         return
1464                 if t.type is TYPE_END_TAG and (t.name is 'body' or t.name is 'html' or t.name is 'br')
1465                         ins_mode_in_head_else t
1466                         return
1467                 if t.type is TYPE_START_TAG and t.name is 'template'
1468                         insert_html_element t
1469                         afe_push_marker()
1470                         flag_frameset_ok = false
1471                         ins_mode = ins_mode_in_template
1472                         template_ins_modes.unshift ins_mode_in_template
1473                         return
1474                 if t.type is TYPE_END_TAG and t.name is 'template'
1475                         if template_tag_is_open()
1476                                 generate_implied_end_tags
1477                                 if open_els[0].name isnt 'template'
1478                                         parse_error()
1479                                 loop
1480                                         el = open_els.shift()
1481                                         if el.name is 'template'
1482                                                 break
1483                                 clear_afe_to_marker()
1484                                 template_ins_modes.shift()
1485                                 reset_ins_mode()
1486                         else
1487                                 parse_error()
1488                         return
1489                 if (t.type is TYPE_START_TAG and t.name is 'head') or t.type is TYPE_END_TAG
1490                         parse_error()
1491                         return
1492                 ins_mode_in_head_else t
1493
1494         # 8.2.5.4.5 http://www.w3.org/TR/html5/syntax.html#parsing-main-inheadnoscript
1495         ins_mode_in_head_noscript_else = (t) ->
1496                 parse_error()
1497                 open_els.shift()
1498                 ins_mode = ins_mode_in_head
1499                 process_token t
1500         ins_mode_in_head_noscript = (t) ->
1501                 if t.type is TYPE_DOCTYPE
1502                         parse_error()
1503                         return
1504                 if t.type is TYPE_START_TAG
1505                         ins_mode_in_body t
1506                         return
1507                 if t.type is TYPE_END_TAG and t.name is 'noscript'
1508                         open_els.shift()
1509                         ins_mode = ins_mode_in_head
1510                         return
1511                 if (t.type is TYPE_TEXT and (t.text is "\t" or t.text is "\u000a" or t.text is "\u000c" or t.text is "\u000d" or t.text is ' ')) or t.type is TYPE_COMMENT or (t.type is TYPE_START_TAG and (t.name is 'basefont' or t.name is 'bgsound' or t.name is 'link' or t.name is 'meta' or t.name is 'noframes' or t.name is 'style'))
1512                         ins_mode_in_head t
1513                         return
1514                 if t.type is TYPE_END_TAG and t.name is 'br'
1515                         ins_mode_in_head_noscript_else t
1516                         return
1517                 if (t.type is TYPE_START_TAG and (t.name is 'head' or t.name is 'noscript')) or t.type is TYPE_END_TAG
1518                         parse_error()
1519                         return
1520                 # Anything else
1521                 ins_mode_in_head_noscript_else t
1522                 return
1523
1524
1525
1526         # 8.2.5.4.6 http://www.w3.org/TR/html5/syntax.html#the-after-head-insertion-mode
1527         ins_mode_after_head_else = (t) ->
1528                 body_tok = new_open_tag 'body'
1529                 insert_html_element body_tok
1530                 ins_mode = ins_mode_in_body
1531                 process_token t
1532                 return
1533         ins_mode_after_head = (t) ->
1534                 if is_space_tok t
1535                         insert_character t
1536                         return
1537                 if t.type is TYPE_COMMENT
1538                         insert_comment t
1539                         return
1540                 if t.type is TYPE_DOCTYPE
1541                         parse_error()
1542                         return
1543                 if t.type is TYPE_START_TAG and t.name is 'html'
1544                         ins_mode_in_body t
1545                         return
1546                 if t.type is TYPE_START_TAG and t.name is 'body'
1547                         insert_html_element t
1548                         flag_frameset_ok = false
1549                         ins_mode = ins_mode_in_body
1550                         return
1551                 if t.type is TYPE_START_TAG and t.name is 'frameset'
1552                         insert_html_element t
1553                         ins_mode = ins_mode_in_frameset
1554                         return
1555                 if t.type is TYPE_START_TAG and (t.name is 'base' or t.name is 'basefont' or t.name is 'bgsound' or t.name is 'link' or t.name is 'meta' or t.name is 'noframes' or t.name is 'script' or t.name is 'style' or t.name is 'template' or t.name is 'title')
1556                         parse_error()
1557                         open_els.unshift head_element_pointer
1558                         ins_mode_in_head t
1559                         for el, i of open_els
1560                                 if el is head_element_pointer
1561                                         open_els.splice i, 1
1562                                         return
1563                         console.log "warning: 23904 couldn't find head element in open_els"
1564                         return
1565                 if t.type is TYPE_END_TAG and t.name is 'template'
1566                         ins_mode_in_head t
1567                         return
1568                 if t.type is TYPE_END_TAG and (t.name is 'body' or t.name is 'html' or t.name is 'br')
1569                         ins_mode_after_head_else t
1570                         return
1571                 if (t.type is TYPE_START_TAG and t.name is 'head') or t.type is TYPE_END_TAG
1572                         parse_error()
1573                         return
1574                 # Anything else
1575                 ins_mode_after_head_else t
1576
1577         # 8.2.5.4.7 http://www.w3.org/TR/html5/syntax.html#parsing-main-inbody
1578         in_body_any_other_end_tag = (name) -> # factored out because adoption agency calls it
1579                 for el, i in open_els
1580                         if el.namespace is NS_HTML and el.name is name
1581                                 generate_implied_end_tags name # arg is exception
1582                                 parse_error() unless i is 0
1583                                 while i >= 0
1584                                         open_els.shift()
1585                                         i -= 1
1586                                 return
1587                         if special_elements[el.name] is el.namespace
1588                                 parse_error()
1589                                 return
1590                 return
1591         ins_mode_in_body = (t) ->
1592                 if t.type is TYPE_TEXT and t.text is "\u0000"
1593                         parse_error()
1594                         return
1595                 if is_space_tok t
1596                         reconstruct_afe()
1597                         insert_character t
1598                         return
1599                 if t.type is TYPE_TEXT
1600                         reconstruct_afe()
1601                         insert_character t
1602                         flag_frameset_ok = false
1603                         return
1604                 if t.type is TYPE_COMMENT
1605                         insert_comment t
1606                         return
1607                 if t.type is TYPE_DOCTYPE
1608                         parse_error()
1609                         return
1610                 if t.type is TYPE_START_TAG and t.name is 'html'
1611                         parse_error()
1612                         return if template_tag_is_open()
1613                         root_attrs = open_els[open_els.length - 1].attrs
1614                         for a of t.attrs_a
1615                                 root_attrs[a[0]] = a[1] unless root_attrs[a[0]]?
1616                         return
1617
1618                 if (t.type is TYPE_START_TAG and (t.name is 'base' or t.name is 'basefont' or t.name is 'bgsound' or t.name is 'link' or t.name is 'meta' or t.name is 'noframes' or t.name is 'script' or t.name is 'style' or t.name is 'template' or t.name is 'title')) or (t.type is TYPE_END_TAG and t.name is 'template')
1619                         ins_mode_in_head t
1620                         return
1621                 if t.type is TYPE_START_TAG and t.name is 'body'
1622                         parse_error()
1623                         return if open_els.length < 2
1624                         second = open_els[open_els.length - 2]
1625                         return unless second.ns is NS_HTML
1626                         return unless second.name is 'body'
1627                         return if template_tag_is_open()
1628                         frameset_ok_flag = false
1629                         for a of t.attrs_a
1630                                 second.attrs[a[0]] = a[1] unless second.attrs[a[0]]?
1631                         return
1632                 if t.type is TYPE_START_TAG and t.name is 'frameset'
1633                         parse_error()
1634                         return if open_els.length < 2
1635                         second_i = open_els.length - 2
1636                         second = open_els[second_i]
1637                         return unless second.ns is NS_HTML
1638                         return unless second.name is 'body'
1639                         flag_frameset_ok = false
1640                         if second.parent?
1641                                 for el, i in second.parent.children
1642                                         if el is second
1643                                                 second.parent.children.splice i, 1
1644                                                 break
1645                         open_els.splice second_i, 1
1646                         # pop everything except the "root html element"
1647                         while open_els.length > 1
1648                                 open_els.shift()
1649                         insert_html_element t
1650                         ins_mode = ins_mode_in_frameset
1651                         return
1652                 if t.type is TYPE_EOF
1653                         ok_tags = {
1654                                 dd:NS_HTML, dt:NS_HTML, li:NS_HTML, p:NS_HTML, tbody:NS_HTML,
1655                                 td:NS_HTML, tfoot:NS_HTML, th:NS_HTML, thead:NS_HTML,
1656                                 tr:NS_HTML, body:NS_HTML, html:NS_HTML,
1657                         }
1658                         for el in open_els
1659                                 unless ok_tags[t.name] is el.namespace
1660                                         parse_error()
1661                                         break
1662                         if template_ins_modes.length > 0
1663                                 ins_mode_in_template t
1664                         else
1665                                 stop_parsing()
1666                         return
1667                 if t.type is TYPE_END_TAG and t.name is 'body'
1668                         unless is_in_scope 'body'
1669                                 parse_error()
1670                                 return
1671                         ok_tags = {
1672                                 dd:NS_HTML, dt:NS_HTML, li:NS_HTML, optgroup:NS_HTML,
1673                                 option:NS_HTML, p:NS_HTML, rb:NS_HTML, rp:NS_HTML, rt:NS_HTML,
1674                                 rtc:NS_HTML, tbody:NS_HTML, td:NS_HTML, tfoot:NS_HTML,
1675                                 th:NS_HTML, thead:NS_HTML, tr:NS_HTML, body:NS_HTML,
1676                                 html:NS_HTML
1677                         }
1678                         for el in open_els
1679                                 unless ok_tags[t.name] is el.namespace
1680                                         parse_error()
1681                                         break
1682                         ins_mode = ins_mode_after_body
1683                         return
1684                 if t.type is TYPE_END_TAG and t.name is 'html'
1685                         unless is_in_scope 'body'
1686                                 parse_error()
1687                                 return
1688                         ok_tags = {
1689                                 dd:NS_HTML, dt:NS_HTML, li:NS_HTML, optgroup:NS_HTML,
1690                                 option:NS_HTML, p:NS_HTML, rb:NS_HTML, rp:NS_HTML, rt:NS_HTML,
1691                                 rtc:NS_HTML, tbody:NS_HTML, td:NS_HTML, tfoot:NS_HTML,
1692                                 th:NS_HTML, thead:NS_HTML, tr:NS_HTML, body:NS_HTML,
1693                                 html:NS_HTML
1694                         }
1695                         for el in open_els
1696                                 unless ok_tags[t.name] is el.namespace
1697                                         parse_error()
1698                                         break
1699                         ins_mode = ins_mode_after_body
1700                         process_token t
1701                         return
1702                 if t.type is TYPE_START_TAG and (t.name is 'address' or t.name is 'article' or t.name is 'aside' or t.name is 'blockquote' or t.name is 'center' or t.name is 'details' or t.name is 'dialog' or t.name is 'dir' or t.name is 'div' or t.name is 'dl' or t.name is 'fieldset' or t.name is 'figcaption' or t.name is 'figure' or t.name is 'footer' or t.name is 'header' or t.name is 'hgroup' or t.name is 'main' or t.name is 'nav' or t.name is 'ol' or t.name is 'p' or t.name is 'section' or t.name is 'summary' or t.name is 'ul')
1703                         close_p_if_in_button_scope()
1704                         insert_html_element t
1705                         return
1706                 if t.type is TYPE_START_TAG and h_tags[t.name]?
1707                         close_p_if_in_button_scope()
1708                         if h_tags[open_els[0]] is NS_HTML
1709                                 parse_error()
1710                                 open_els.shift()
1711                         insert_html_element t
1712                         return
1713                 if t.type is TYPE_START_TAG and (t.name is 'pre' or t.name is 'listing')
1714                         close_p_if_in_button_scope()
1715                         insert_html_element t
1716                         # spec: If the next token is a "LF" (U+000A) character token, then
1717                         # ignore that token and move on to the next one. (Newlines at the
1718                         # start of pre blocks are ignored as an authoring convenience.)
1719                         if txt.charAt(cur) is "\u000a" # FIXME check for crlf?
1720                                 cur += 1
1721                         flag_frameset_ok = false
1722                         return
1723                 if t.type is TYPE_START_TAG and t.name is 'form'
1724                         unless form_element_pointer is null or template_tag_is_open()
1725                                 parse_error()
1726                                 return
1727                         close_p_if_in_button_scope()
1728                         el = insert_html_element t
1729                         unless template_tag_is_open()
1730                                 form_element_pointer = el
1731                         return
1732                 if t.type is TYPE_START_TAG and t.name is 'li'
1733                         flag_frameset_ok = false
1734                         for node in open_els
1735                                 if node.name is 'li' and node.namespace is NS_HTML
1736                                         generate_implied_end_tags 'li' # arg is exception
1737                                         if open_els[0].name isnt 'li' or open_els[0].namespace isnt NS_HTML
1738                                                 parse_error()
1739                                         loop
1740                                                 el = open_els.shift()
1741                                                 if el.name is 'li' and el.namespace is NS_HTML
1742                                                         break
1743                                         break
1744                                 if el_is_special_not_adp node
1745                                                 break
1746                         close_p_if_in_button_scope()
1747                         insert_html_element t
1748                         return
1749                 if t.type is TYPE_START_TAG and (t.name is 'dd' or t.name is 'dt')
1750                         flag_frameset_ok = false
1751                         for node in open_els
1752                                 if node.name is 'dd' and node.namespace is NS_HTML
1753                                         generate_implied_end_tags 'dd' # arg is exception
1754                                         if open_els[0].name isnt 'dd' or open_els[0].namespace isnt NS_HTML
1755                                                 parse_error()
1756                                         loop
1757                                                 el = open_els.shift()
1758                                                 if el.name is 'dd' and el.namespace is NS_HTML
1759                                                         break
1760                                         break
1761                                 if node.name is 'dt' and node.namespace is NS_HTML
1762                                         generate_implied_end_tags 'dt' # arg is exception
1763                                         if open_els[0].name isnt 'dt' or open_els[0].namespace isnt NS_HTML
1764                                                 parse_error()
1765                                         loop
1766                                                 el = open_els.shift()
1767                                                 if el.name is 'dt' and el.namespace is NS_HTML
1768                                                         break
1769                                         break
1770                                 if el_is_special_not_adp node
1771                                         break
1772                         close_p_if_in_button_scope()
1773                         insert_html_element t
1774                         return
1775                 if t.type is TYPE_START_TAG and t.name is 'plaintext'
1776                         close_p_if_in_button_scope()
1777                         insert_html_element t
1778                         tok_state = tok_state_plaintext
1779                         return
1780                 if t.type is TYPE_START_TAG and t.name is 'button'
1781                         if is_in_scope 'button', NS_HTML
1782                                 parse_error()
1783                                 generate_implied_end_tags()
1784                                 loop
1785                                         el = open_els.shift()
1786                                         if el.name is 'button' and el.namespace is NS_HTML
1787                                                 break
1788                         reconstruct_afe()
1789                         insert_html_element t
1790                         flag_frameset_ok = false
1791                         return
1792                 if t.type is TYPE_END_TAG and (t.name is 'address' or t.name is 'article' or t.name is 'aside' or t.name is 'blockquote' or t.name is 'button' or t.name is 'center' or t.name is 'details' or t.name is 'dialog' or t.name is 'dir' or t.name is 'div' or t.name is 'dl' or t.name is 'fieldset' or t.name is 'figcaption' or t.name is 'figure' or t.name is 'footer' or t.name is 'header' or t.name is 'hgroup' or t.name is 'listing' or t.name is 'main' or t.name is 'nav' or t.name is 'ol' or t.name is 'pre' or t.name is 'section' or t.name is 'summary' or t.name is 'ul')
1793                         unless is_in_scope t.name, NS_HTML
1794                                 parse_error()
1795                                 return
1796                         generate_implied_end_tags()
1797                         unless open_els[0].name is t.name and open_els[0].namespace is NS_HTML
1798                                 parse_error()
1799                         loop
1800                                 el = open_els.shift()
1801                                 if el.name is t.name and el.namespace is NS_HTML
1802                                         return
1803                         return
1804                 if t.type is TYPE_END_TAG and t.name is 'form'
1805                         unless template_tag_is_open()
1806                                 node = form_element_pointer
1807                                 form_element_pointer = null
1808                                 if node is null or not el_is_in_scope node
1809                                         parse_error()
1810                                         return
1811                                 generate_implied_end_tags()
1812                                 if open_els[0] isnt node
1813                                         parse_error()
1814                                 for el, i in open_els
1815                                         if el is node
1816                                                 open_els.splice i, 1
1817                                                 break
1818                         else
1819                                 unless is_in_scope 'form', NS_HTML
1820                                         parse_error()
1821                                         return
1822                                 generate_implied_end_tags()
1823                                 if open_els[0].name isnt 'form' or open_els[0].namespace isnt NS_HTML
1824                                         parse_error()
1825                                 loop
1826                                         el = open_els.shift()
1827                                         if el.name is 'form' and el.namespace is NS_HTML
1828                                                 break
1829                         return
1830                 if t.type is TYPE_END_TAG and t.name is 'p'
1831                         unless is_in_button_scope 'p', NS_HTML
1832                                 parse_error()
1833                                 insert_html_element new_open_tag 'p'
1834                         close_p_element()
1835                         return
1836                 if t.type is TYPE_END_TAG and t.name is 'li'
1837                         unless is_in_li_scope 'li', NS_HTML
1838                                 parse_error()
1839                                 return
1840                         generate_implied_end_tags 'li' # arg is exception
1841                         if open_els[0].name isnt 'li' or open_els[0].namespace isnt NS_HTML
1842                                 parse_error()
1843                         loop
1844                                 el = open_els.shift()
1845                                 if el.name is 'li' and el.namespace is NS_HTML
1846                                         break
1847                         return
1848                 if t.type is TYPE_END_TAG and (t.name is 'dd' or t.name is 'dt')
1849                         unless is_in_scope t.name, NS_HTML
1850                                 parse_error()
1851                                 return
1852                         generate_implied_end_tags t.name # arg is exception
1853                         if open_els[0].name isnt t.name or open_els[0].namespace isnt NS_HTML
1854                                 parse_error()
1855                         loop
1856                                 el = open_els.shift()
1857                                 if el.name is t.name and el.namespace is NS_HTML
1858                                         break
1859                         return
1860                 if t.type is TYPE_END_TAG and h_tags[t.name]?
1861                         h_in_scope = false
1862                         for el in open_els
1863                                 if h_tags[el.name] is el.namespace
1864                                         h_in_scope = true
1865                                         break
1866                                 if standard_scopers[el.name] is el.namespace
1867                                         break
1868                         unless h_in_scope
1869                                 parse_error()
1870                                 return
1871                         generate_implied_end_tags()
1872                         if open_els[0].name isnt t.name or open_els[0].namespace isnt NS_HTML
1873                                 parse_error()
1874                         loop
1875                                 el = open_els.shift()
1876                                 if h_tags[el.name] is el.namespace
1877                                         break
1878                         return
1879                 # deep breath!
1880                 if t.type is TYPE_START_TAG and t.name is 'a'
1881                         # If the list of active formatting elements contains an a element
1882                         # between the end of the list and the last marker on the list (or
1883                         # the start of the list if there is no marker on the list), then
1884                         # this is a parse error; run the adoption agency algorithm for the
1885                         # tag name "a", then remove that element from the list of active
1886                         # formatting elements and the stack of open elements if the
1887                         # adoption agency algorithm didn't already remove it (it might not
1888                         # have if the element is not in table scope).
1889                         found = false
1890                         for el in afe
1891                                 if el.type is TYPE_AFE_MARKER
1892                                         break
1893                                 if el.name is 'a' and el.namespace is NS_HTML
1894                                         found = el
1895                         if found?
1896                                 parse_error()
1897                                 adoption_agency 'a'
1898                                 for el, i in afe
1899                                         if el is found
1900                                                 afe.splice i, 1
1901                                 for el, i in open_els
1902                                         if el is found
1903                                                 open_els.splice i, 1
1904                         reconstruct_afe()
1905                         el = insert_html_element t
1906                         afe_push el
1907                         return
1908                 if t.type is TYPE_START_TAG and (t.name is 'b' or t.name is 'big' or t.name is 'code' or t.name is 'em' or t.name is 'font' or t.name is 'i' or t.name is 's' or t.name is 'small' or t.name is 'strike' or t.name is 'strong' or t.name is 'tt' or t.name is 'u')
1909                         reconstruct_afe()
1910                         el = insert_html_element t
1911                         afe_push el
1912                         return
1913                 if t.type is TYPE_START_TAG and t.name is 'nobr'
1914                         reconstruct_afe()
1915                         el = insert_html_element t
1916                         afe_push el
1917                         return
1918                 if t.type is TYPE_END_TAG and (t.name is 'a' or t.name is 'b' or t.name is 'big' or t.name is 'code' or t.name is 'em' or t.name is 'font' or t.name is 'i' or t.name is 'nobr' or t.name is 's' or t.name is 'small' or t.name is 'strike' or t.name is 'strong' or t.name is 'tt' or t.name is 'u')
1919                         adoption_agency t.name
1920                         return
1921                 if t.type is TYPE_START_TAG and (t.name is 'applet' or t.name is 'marquee' or t.name is 'object')
1922                         reconstruct_afe()
1923                         insert_html_element t
1924                         afe_push_marker()
1925                         flag_frameset_ok = false
1926                         return
1927                 if t.type is TYPE_END_TAG and (t.name is 'applet' or t.name is 'marquee' or t.name is 'object')
1928                         unless is_in_scope t.name, NS_HTML
1929                                 parse_error()
1930                                 return
1931                         generate_implied_end_tags()
1932                         if open_els[0].name isnt t.name or open_els[0].namespace isnt NS_HTML
1933                                 parse_error()
1934                         loop
1935                                 el = open_els.shift()
1936                                 if el.name is t.name and el.namespace is NS_HTML
1937                                         break
1938                         clear_afe_to_marker()
1939                         return
1940                 if t.type is TYPE_START_TAG and t.name is 'table'
1941                         close_p_if_in_button_scope() # fixfull quirksmode thing
1942                         insert_html_element t
1943                         flag_frameset_ok = false
1944                         ins_mode = ins_mode_in_table
1945                         return
1946                 if t.type is TYPE_END_TAG and t.name is 'br'
1947                         parse_error()
1948                         t.type is TYPE_START_TAG
1949                         # fall through
1950                 if t.type is TYPE_START_TAG and (t.name is 'area' or t.name is 'br' or t.name is 'embed' or t.name is 'img' or t.name is 'keygen' or t.name is 'wbr')
1951                         reconstruct_afe()
1952                         insert_html_element t
1953                         open_els.shift()
1954                         t.acknowledge_self_closing()
1955                         flag_frameset_ok = false
1956                         return
1957                 if t.type is TYPE_START_TAG and t.name is 'input'
1958                         reconstruct_afe()
1959                         insert_html_element t
1960                         open_els.shift()
1961                         t.acknowledge_self_closing()
1962                         unless is_input_hidden_tok t
1963                                 flag_frameset_ok = false
1964                         return
1965                 if t.type is TYPE_START_TAG and (t.name is 'param' or t.name is 'source' or t.name is 'track')
1966                         insert_html_element t
1967                         open_els.shift()
1968                         t.acknowledge_self_closing()
1969                         return
1970                 if t.type is TYPE_START_TAG and t.name is 'hr'
1971                         close_p_if_in_button_scope()
1972                         insert_html_element t
1973                         open_els.shift()
1974                         t.acknowledge_self_closing()
1975                         flag_frameset_ok = false
1976                         return
1977                 if t.type is TYPE_START_TAG and t.name is 'image'
1978                         parse_error()
1979                         t.name = 'img'
1980                         process_token t
1981                         return
1982                 if t.type is TYPE_START_TAG and t.name is 'isindex'
1983                         parse_error()
1984                         if template_tag_is_open() is false and form_element_pointer isnt null
1985                                 return
1986                         t.acknowledge_self_closing()
1987                         flag_frameset_ok = false
1988                         close_p_if_in_button_scope()
1989                         el = insert_html_element new_open_tag 'form'
1990                         unless template_tag_is_open()
1991                                 form_element_pointer = el
1992                         for a in t.attrs_a
1993                                 if a[0] is 'action'
1994                                         el.attrs['action'] = a[1]
1995                                         break
1996                         insert_html_element new_open_tag 'hr'
1997                         open_els.shift()
1998                         reconstruct_afe()
1999                         insert_html_element new_open_tag 'label'
2000                         # note: this is a little out-of-spec-order so we only have to scan t.attrs_a once
2001                         input_el = new_open_tag 'input'
2002                         prompt = null
2003                         for a in t.attrs_a
2004                                 if a[0] is 'prompt'
2005                                         prompt = a[1]
2006                                 if a[0] isnt 'name' and a[0] isnt 'action' and a[0] isnt 'prompt'
2007                                         input_el.attrs_a.push [a[0], a[1]]
2008                         input_el.attrs_a.push ['name', 'isindex']
2009                         # fixfull this next bit is in english... internationalize?
2010                         prompt ?= "This is a searchable index. Enter search keywords: "
2011                         insert_character new_character_token prompt # fixfull split
2012                         # TODO submit typo "balue" in spec
2013                         insert_html_element input_el
2014                         open_els.shift()
2015                         # insert_character '' # you can put chars here if promt attr missing
2016                         open_els.shift()
2017                         insert_html_element new_open_tag 'hr'
2018                         open_els.shift()
2019                         open_els.shift()
2020                         unless template_tag_is_open()
2021                                 form_element_pointer = null
2022                         return
2023                 if t.type is TYPE_START_TAG and t.name is 'textarea'
2024                         insert_html_element t
2025                         if txt.charAt(cur) is "\u000a" # FIXME check for crlf?
2026                                 cur += 1
2027                         tok_state = tok_state_rcdata
2028                         original_ins_mode = ins_mode
2029                         flag_frameset_ok = false
2030                         ins_mode = ins_mode_text
2031                         return
2032                 if t.type is TYPE_START_TAG and t.name is 'xmp'
2033                         close_p_if_in_button_scope()
2034                         reconstruct_afe()
2035                         flag_frameset_ok = false
2036                         parse_generic_raw_text t
2037                         return
2038                 if t.type is TYPE_START_TAG and t.name is 'iframe'
2039                         flag_frameset_ok = false
2040                         parse_generic_raw_text t
2041                         return
2042                 if t.type is TYPE_START_TAG and (t.name is 'noembed' or (t.name is 'noscript' and flag_scripting))
2043                         parse_generic_raw_text t
2044                         return
2045                 if t.type is TYPE_START_TAG and t.name is 'select'
2046                         reconstruct_afe()
2047                         insert_html_element t
2048                         flag_frameset_ok = false
2049                         if ins_mode is ins_mode_in_table or ins_mode is ins_mode_in_caption or ins_mode is ins_mode_in_table_body or ins_mode is ins_mode_in_row or ins_mode is ins_mode_in_cell
2050                                 ins_mode = ins_mode_in_select_in_table
2051                         else
2052                                 ins_mode = ins_mode_in_select
2053                         return
2054                 if t.type is TYPE_START_TAG and (t.name is 'optgroup' or t.name is 'option')
2055                         if open_els[0].name is 'option' and open_els[0].namespace is NS_HTML
2056                                 open_els.shift()
2057                         reconstruct_afe()
2058                         insert_html_element t
2059                         return
2060                 if t.type is TYPE_START_TAG and (t.name is 'rb' or t.name is 'rp' or t.name is 'rtc')
2061                         if is_in_scope 'ruby', NS_HTML
2062                                 generate_implied_end_tags()
2063                                 unless open_els[0].name is 'ruby' and open_els[0].namespace is NS_HTML
2064                                         parse_error()
2065                         insert_html_element t
2066                         return
2067                 if t.type is TYPE_START_TAG and t.name is 'rt'
2068                         if is_in_scope 'ruby', NS_HTML
2069                                 generate_implied_end_tags 'rtc' # arg is exception
2070                                 unless (open_els[0].name is 'ruby' or open_els[0].name is 'rtc') and open_els[0].namespace is NS_HTML
2071                                         parse_error()
2072                         insert_html_element t
2073                         return
2074                 if t.type is TYPE_START_TAG and t.name is 'math'
2075                         reconstruct_afe()
2076                         adjust_mathml_attributes t
2077                         adjust_foreign_attributes t
2078                         insert_foreign_element t, NS_MATHML
2079                         if t.flag 'self-closing'
2080                                 open_els.shift()
2081                                 t.acknowledge_self_closing()
2082                         return
2083                 if t.type is TYPE_START_TAG and t.name is 'svg'
2084                         reconstruct_afe()
2085                         adjust_svg_attributes t
2086                         adjust_foreign_attributes t
2087                         insert_foreign_element t, NS_SVG
2088                         if t.flag 'self-closing'
2089                                 open_els.shift()
2090                                 t.acknowledge_self_closing()
2091                         return
2092                 if t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'frame' or t.name is 'head' or t.name is 'tbody' or t.name is 'td' or t.name is 'tfoot' or t.name is 'th' or t.name is 'thead' or t.name is 'tr')
2093                         parse_error()
2094                         return
2095                 if t.type is TYPE_START_TAG # any other start tag
2096                         reconstruct_afe()
2097                         insert_html_element t
2098                         return
2099                 if t.type is TYPE_END_TAG # any other end tag
2100                         in_body_any_other_end_tag t.name
2101                         return
2102                 return
2103
2104         # 8.2.5.4.8 http://www.w3.org/TR/html5/syntax.html#parsing-main-incdata
2105         ins_mode_text = (t) ->
2106                 if t.type is TYPE_TEXT
2107                         insert_character t
2108                         return
2109                 if t.type is TYPE_EOF
2110                         parse_error()
2111                         if open_els[0].name is 'script'
2112                                 open_els[0].flag 'already started', true
2113                         open_els.shift()
2114                         ins_mode = original_ins_mode
2115                         process_token t
2116                         return
2117                 if t.type is TYPE_END_TAG and t.name is 'script'
2118                         open_els.shift()
2119                         ins_mode = original_ins_mode
2120                         # fixfull the spec seems to assume that I'm going to run the script
2121                         # http://www.w3.org/TR/html5/syntax.html#scriptEndTag
2122                         return
2123                 if t.type is TYPE_END_TAG
2124                         open_els.shift()
2125                         ins_mode = original_ins_mode
2126                         return
2127                 console.log 'warning: end of ins_mode_text reached'
2128
2129         # the functions below implement the tokenizer stats described here:
2130         # http://www.w3.org/TR/html5/syntax.html#tokenization
2131
2132         # 8.2.5.4.9 http://www.w3.org/TR/html5/syntax.html#parsing-main-intable
2133         ins_mode_in_table_else = (t) ->
2134                 parse_error()
2135                 flag_foster_parenting = true
2136                 ins_mode_in_body t
2137                 flag_foster_parenting = false
2138                 return
2139         can_in_table = { # FIXME do this inline like everywhere else
2140                 'table': true
2141                 'tbody': true
2142                 'tfoot': true
2143                 'thead': true
2144                 'tr': true
2145         }
2146         ins_mode_in_table = (t) ->
2147                 switch t.type
2148                         when TYPE_TEXT
2149                                 if can_in_table[t.name]
2150                                         original_ins_mode = ins_mode
2151                                         ins_mode = ins_mode_in_table_text
2152                                         process_token t
2153                                 else
2154                                         ins_mode_in_table_else t
2155                         when TYPE_COMMENT
2156                                 insert_comment t
2157                         when TYPE_DOCTYPE
2158                                 parse_error()
2159                         when TYPE_START_TAG
2160                                 switch t.name
2161                                         when 'caption'
2162                                                 clear_stack_to_table_context()
2163                                                 afe_push_marker()
2164                                                 insert_html_element t
2165                                                 ins_mode = ins_mode_in_caption
2166                                         when 'colgroup'
2167                                                 clear_stack_to_table_context()
2168                                                 insert_html_element t
2169                                                 ins_mode = ins_mode_in_column_group
2170                                         when 'col'
2171                                                 clear_stack_to_table_context()
2172                                                 insert_html_element new_open_tag 'colgroup'
2173                                                 ins_mode = ins_mode_in_column_group
2174                                                 process_token t
2175                                         when 'tbody', 'tfoot', 'thead'
2176                                                 clear_stack_to_table_context()
2177                                                 insert_html_element t
2178                                                 ins_mode = ins_mode_in_table_body
2179                                         when 'td', 'th', 'tr'
2180                                                 clear_stack_to_table_context()
2181                                                 insert_html_element new_open_tag 'tbody'
2182                                                 ins_mode = ins_mode_in_table_body
2183                                                 process_token t
2184                                         when 'table'
2185                                                 parse_error()
2186                                                 if is_in_table_scope 'table'
2187                                                         loop
2188                                                                 el = open_els.shift()
2189                                                                 if el.name is 'table'
2190                                                                         break
2191                                                         reset_ins_mode()
2192                                                         process_token t
2193                                         when 'style', 'script', 'template'
2194                                                 ins_mode_in_head t
2195                                         when 'input'
2196                                                 unless is_input_hidden_tok t
2197                                                         ins_mode_in_table_else t
2198                                                 else
2199                                                         parse_error()
2200                                                         el = insert_html_element t
2201                                                         open_els.shift()
2202                                                         t.acknowledge_self_closing()
2203                                         when 'form'
2204                                                 parse_error()
2205                                                 if form_element_pointer?
2206                                                         return
2207                                                 if template_tag_is_open()
2208                                                         return
2209                                                 form_element_pointer = insert_html_element t
2210                                                 open_els.shift()
2211                                         else
2212                                                 ins_mode_in_table_else t
2213                         when TYPE_END_TAG
2214                                 switch t.name
2215                                         when 'table'
2216                                                 if is_in_table_scope 'table'
2217                                                         loop
2218                                                                 el = open_els.shift()
2219                                                                 if el.name is 'table'
2220                                                                         break
2221                                                         reset_ins_mode()
2222                                                 else
2223                                                         parse_error
2224                                         when 'body', 'caption', 'col', 'colgroup', 'html', 'tbody', 'td', 'tfoot', 'th', 'thead', 'tr'
2225                                                 parse_error()
2226                                         when 'template'
2227                                                 ins_mode_in_head t
2228                                         else
2229                                                 ins_mode_in_table_else t
2230                         when TYPE_EOF
2231                                 ins_mode_in_body t
2232                         else
2233                                 ins_mode_in_table_else t
2234
2235
2236         # 8.2.5.4.10 http://www.w3.org/TR/html5/syntax.html#parsing-main-intabletext
2237         ins_mode_in_table_text = (t) ->
2238                 if t.type is TYPE_TEXT and t.text is "\u0000"
2239                         # huh? I thought the tokenizer didn't emit these
2240                         parse_error()
2241                         return
2242                 if t.type is TYPE_TEXT
2243                         pending_table_character_tokens.push t
2244                         return
2245                 # Anything else
2246                 all_space = true
2247                 for old in pending_table_character_tokens
2248                         unless is_space_tok old
2249                                 all_space = false
2250                                 break
2251                 if all_space
2252                         for old in pending_table_character_tokens
2253                                 insert_character old
2254                 else
2255                         for old in pending_table_character_tokens
2256                                 ins_mode_table_else old
2257                 pending_table_character_tokens = [] # FIXME test (spec doesn't say this)
2258                 ins_mode = original_ins_mode
2259                 process_token t
2260
2261         # 8.2.5.4.11 http://www.w3.org/TR/html5/syntax.html#parsing-main-incaption
2262         ins_mode_in_caption = (t) ->
2263                 if t.type is TYPE_END_TAG and t.name is 'caption'
2264                         if is_in_table_scope 'caption'
2265                                 generate_implied_end_tags()
2266                                 if open_els[0].name isnt 'caption'
2267                                         parse_error()
2268                                 loop
2269                                         el = open_els.shift()
2270                                         if el.name is 'caption'
2271                                                 break
2272                                 clear_afe_to_marker()
2273                                 ins_mode = ins_mode_in_table
2274                         else
2275                                 parse_error()
2276                                 # fragment case
2277                         return
2278                 if (t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'tbody' or t.name is 'td' or t.name is 'tfoot' or t.name is 'th' or t.name is 'thead' or t.name is 'tr')) or t.type is TYPE_END_TAG and t.name is 'table'
2279                         parse_error()
2280                         if is_in_table_scope 'caption'
2281                                 loop
2282                                         el = open_els.shift()
2283                                         if el.name is 'caption'
2284                                                 break
2285                                 clear_afe_to_marker()
2286                                 ins_mode = ins_mode_in_table
2287                                 process_token t
2288                         # else fragment case
2289                         return
2290                 if t.type is TYPE_END_TAG and (t.name is 'body' or t.name is 'col' or t.name is 'colgroup' or t.name is 'html' or t.name is 'tbody' or t.name is 'td' or t.name is 'tfoot' or t.name is 'th' or t.name is 'thead' or t.name is 'tr')
2291                         parse_error()
2292                         return
2293                 # Anything else
2294                 ins_mode_in_body t
2295
2296         # 8.2.5.4.12 http://www.w3.org/TR/html5/syntax.html#parsing-main-incolgroup
2297         ins_mode_in_column_group = (t) ->
2298                 if is_space_tok t
2299                         insert_character t
2300                         return
2301                 if t.type is TYPE_COMMENT
2302                         insert_comment t
2303                         return
2304                 if t.type is TYPE_DOCTYPE
2305                         parse_error()
2306                         return
2307                 if t.type is TYPE_START_TAG and t.name is 'html'
2308                         ins_mode_in_body t
2309                         return
2310                 if t.type is TYPE_START_TAG and t.name is 'col'
2311                         el = insert_html_element t
2312                         open_els.shift()
2313                         t.acknowledge_self_closing()
2314                         return
2315                 if t.type is TYPE_END_TAG and t.name is 'colgroup'
2316                         if open_els[0].name is 'colgroup'
2317                                 open_els.shift()
2318                                 ins_mode = ins_mode_in_table
2319                         else
2320                                 parse_error()
2321                         return
2322                 if t.type is TYPE_END_TAG and t.name is 'col'
2323                         parse_error()
2324                         return
2325                 if (t.type is TYPE_START_TAG or t.type is TYPE_END_TAG) and t.name is 'template'
2326                         ins_mode_in_head t
2327                         return
2328                 if t.type is TYPE_EOF
2329                         ins_mode_in_body t
2330                         return
2331                 # Anything else
2332                 if open_els[0].name isnt 'colgroup'
2333                         parse_error()
2334                         return
2335                 open_els.shift()
2336                 ins_mode = ins_mode_in_table
2337                 process_token t
2338                 return
2339
2340         # 8.2.5.4.13 http://www.w3.org/TR/html5/syntax.html#parsing-main-intbody
2341         ins_mode_in_table_body = (t) ->
2342                 if t.type is TYPE_START_TAG and t.name is 'tr'
2343                         clear_stack_to_table_body_context()
2344                         insert_html_element t
2345                         ins_mode = ins_mode_in_row
2346                         return
2347                 if t.type is TYPE_START_TAG and (t.name is 'th' or t.name is 'td')
2348                         parse_error()
2349                         clear_stack_to_table_body_context()
2350                         insert_html_element new_open_tag 'tr'
2351                         ins_mode = ins_mode_in_row
2352                         process_token t
2353                         return
2354                 if t.type is TYPE_END_TAG and (t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead')
2355                         unless is_in_table_scope t.name # fixfull check namespace
2356                                 parse_error()
2357                                 return
2358                         clear_stack_to_table_body_context()
2359                         open_els.shift()
2360                         ins_mode = ins_mode_in_table
2361                         return
2362                 if (t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead')) or (t.type is TYPE_END_TAG and t.name is 'table')
2363                         has = false
2364                         for el in open_els
2365                                 if el.name is 'tbody' or el.name is 'tfoot' or el.name is 'thead'
2366                                         has = true
2367                                         break
2368                                 if table_scopers[el.name]
2369                                         break
2370                         if !has
2371                                 parse_error()
2372                                 return
2373                         clear_stack_to_table_body_context()
2374                         open_els.shift()
2375                         ins_mode = ins_mode_in_table
2376                         process_token t
2377                         return
2378                 if t.type is TYPE_END_TAG and (t.name is 'body' or t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'html' or t.name is 'td' or t.name is 'th' or t.name is 'tr')
2379                         parse_error()
2380                         return
2381                 # Anything else
2382                 ins_mode_in_table t
2383
2384         # 8.2.5.4.14 http://www.w3.org/TR/html5/syntax.html#parsing-main-intr
2385         ins_mode_in_row = (t) ->
2386                 if t.type is TYPE_START_TAG and (t.name is 'th' or t.name is 'td')
2387                         clear_stack_to_table_row_context()
2388                         insert_html_element t
2389                         ins_mode = ins_mode_in_cell
2390                         afe_push_marker()
2391                         return
2392                 if t.type is TYPE_END_TAG and t.name is 'tr'
2393                         if is_in_table_scope 'tr'
2394                                 clear_stack_to_table_row_context()
2395                                 open_els.shift()
2396                                 ins_mode = ins_mode_in_table_body
2397                         else
2398                                 parse_error()
2399                         return
2400                 if (t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead' or t.name is 'tr')) or t.type is TYPE_END_TAG and t.name is 'table'
2401                         if is_in_table_scope 'tr'
2402                                 clear_stack_to_table_row_context()
2403                                 open_els.shift()
2404                                 ins_mode = ins_mode_in_table_body
2405                                 process_token t
2406                         else
2407                                 parse_error()
2408                         return
2409                 if t.type is TYPE_END_TAG and (t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead')
2410                         if is_in_table_scope t.name # fixfull namespace
2411                                 if is_in_table_scope 'tr'
2412                                         clear_stack_to_table_row_context()
2413                                         open_els.shift()
2414                                         ins_mode = ins_mode_in_table_body
2415                                         process_token t
2416                         else
2417                                 parse_error()
2418                         return
2419                 if t.type is TYPE_END_TAG and (t.name is 'body' or t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'html' or t.name is 'td' or t.name is 'th')
2420                         parse_error()
2421                         return
2422                 # Anything else
2423                 ins_mode_in_table t
2424
2425         # http://www.w3.org/TR/html5/syntax.html#close-the-cell
2426         close_the_cell = ->
2427                 generate_implied_end_tags()
2428                 unless open_els[0].name is 'td' or open_els[0] is 'th'
2429                         parse_error()
2430                 loop
2431                         el = open_els.shift()
2432                         if el.name is 'td' or el.name is 'th'
2433                                 break
2434                 clear_afe_to_marker()
2435                 ins_mode = ins_mode_in_row
2436
2437         # 8.2.5.4.15 http://www.w3.org/TR/html5/syntax.html#parsing-main-intd
2438         ins_mode_in_cell = (t) ->
2439                 if t.type is TYPE_END_TAG and (t.name is 'td' or t.name is 'th')
2440                         if is_in_table_scope t.name
2441                                 generate_implied_end_tags()
2442                                 if open_els[0].name isnt t.name
2443                                         parse_error
2444                                 loop
2445                                         el = open_els.shift()
2446                                         if el.name is t.name
2447                                                 break
2448                                 clear_afe_to_marker()
2449                                 ins_mode = ins_mode_in_row
2450                         else
2451                                 parse_error()
2452                         return
2453                 if t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'tbody' or t.name is 'td' or t.name is 'tfoot' or t.name is 'th' or t.name is 'thead' or t.name is 'tr')
2454                         has = false
2455                         for el in open_els
2456                                 if el.name is 'td' or el.name is 'th'
2457                                         has = true
2458                                         break
2459                                 if table_scopers[el.name]
2460                                         break
2461                         if !has
2462                                 parse_error()
2463                                 return
2464                         close_the_cell()
2465                         process_token t
2466                         return
2467                 if t.type is TYPE_END_TAG and (t.name is 'body' or t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'html')
2468                         parse_error()
2469                         return
2470                 if t.type is TYPE_END_TAG and (t.name is 'table' or t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead' or t.name is 'tr')
2471                         if is_in_table_scope t.name # fixfull namespace
2472                                 close_the_cell()
2473                                 process_token t
2474                         else
2475                                 parse_error()
2476                         return
2477                 # Anything Else
2478                 ins_mode_in_body t
2479
2480         # 8.2.5.4.16 http://www.w3.org/TR/html5/syntax.html#parsing-main-inselect
2481         ins_mode_in_select = (t) ->
2482                 if t.type is TYPE_TEXT and t.text is "\u0000"
2483                         parse_error()
2484                         return
2485                 if t.type is TYPE_TEXT
2486                         insert_character t
2487                         return
2488                 if t.type is TYPE_COMMENT
2489                         insert_comment t
2490                         return
2491                 if t.type is TYPE_DOCTYPE
2492                         parse_error()
2493                         return
2494                 if t.type is TYPE_START_TAG and t.name is 'html'
2495                         ins_mode_in_body t
2496                         return
2497                 if t.type is TYPE_START_TAG and t.name is 'option'
2498                         if open_els[0].name is 'option'
2499                                 open_els.shift()
2500                         insert_html_element t
2501                         return
2502                 if t.type is TYPE_START_TAG and t.name is 'optgroup'
2503                         if open_els[0].name is 'option'
2504                                 open_els.shift()
2505                         if open_els[0].name is 'optgroup'
2506                                 open_els.shift()
2507                         insert_html_element t
2508                         return
2509                 if t.type is TYPE_END_TAG and t.name is 'optgroup'
2510                         if open_els[0].name is 'option' and open_els[1].name is 'optgroup'
2511                                 open_els.shift()
2512                         if open_els[0].name is 'optgroup'
2513                                 open_els.shift()
2514                         else
2515                                 parse_error()
2516                         return
2517                 if t.type is TYPE_END_TAG and t.name is 'option'
2518                         if open_els[0].name is 'option'
2519                                 open_els.shift()
2520                         else
2521                                 parse_error()
2522                         return
2523                 if t.type is TYPE_END_TAG and t.name is 'select'
2524                         if is_in_select_scope 'select'
2525                                 loop
2526                                         el = open_els.shift()
2527                                         if el.name is 'select'
2528                                                 break
2529                                 reset_ins_mode()
2530                         else
2531                                 parse_error()
2532                         return
2533                 if t.type is TYPE_START_TAG and t.name is 'select'
2534                         parse_error()
2535                         loop
2536                                 el = open_els.shift()
2537                                 if el.name is 'select'
2538                                         break
2539                         reset_ins_mode()
2540                         # spec says that this is the same as </select> but it doesn't say
2541                         # to check scope first
2542                         return
2543                 if t.type is TYPE_START_TAG and (t.name is 'input' or t.name is 'keygen' or t.name is 'textarea')
2544                         parse_error()
2545                         if is_in_select_scope 'select'
2546                                 return
2547                         loop
2548                                 el = open_els.shift()
2549                                 if el.name is 'select'
2550                                         break
2551                         reset_ins_mode()
2552                         process_token t
2553                         return
2554                 if t.type is TYPE_START_TAG and (t.name is 'script' or t.name is 'template')
2555                         ins_mode_in_head t
2556                         return
2557                 if t.type is TYPE_EOF
2558                         ins_mode_in_body t
2559                         return
2560                 # Anything else
2561                 parse_error()
2562                 return
2563
2564         # 8.2.5.4.17 http://www.w3.org/TR/html5/syntax.html#parsing-main-inselectintable
2565         ins_mode_in_select_in_table = (t) ->
2566                 if t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'table' or t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead' or t.name is 'tr' or t.name is 'td' or t.name is 'th')
2567                         parse_error()
2568                         loop
2569                                 el = open_els.shift()
2570                                 if el.name is 'select'
2571                                         break
2572                         reset_ins_mode()
2573                         process_token t
2574                         return
2575                 if t.type is TYPE_END_TAG and (t.name is 'caption' or t.name is 'table' or t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead' or t.name is 'tr' or t.name is 'td' or t.name is 'th')
2576                         parse_error()
2577                         unless is_in_table_scope t.name, NS_HTML
2578                                 return
2579                         loop
2580                                 el = open_els.shift()
2581                                 if el.name is 'select'
2582                                         break
2583                         reset_ins_mode()
2584                         process_token t
2585                         return
2586                 # Anything else
2587                 ins_mode_in_select t
2588                 return
2589
2590         # 8.2.5.4.18 http://www.w3.org/TR/html5/syntax.html#parsing-main-intemplate
2591         ins_mode_in_template = (t) ->
2592                 if t.type is TYPE_TEXT or t.type is TYPE_COMMENT or t.type is TYPE_DOCTYPE
2593                         ins_mode_in_body t
2594                         return
2595                 if (t.type is TYPE_START_TAG and (t.name is 'base' or t.name is 'basefont' or t.name is 'bgsound' or t.name is 'link' or t.name is 'meta' or t.name is 'noframes' or t.name is 'script' or t.name is 'style' or t.name is 'template' or t.name is 'title')) or (t.type is TYPE_END_TAG and t.name is 'template')
2596                         ins_mode_in_head t
2597                         return
2598                 if t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'colgroup' or t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead')
2599                         template_ins_modes.shift()
2600                         template_ins_modes.unshift ins_mode_in_table
2601                         ins_mode = ins_mode_in_table
2602                         process_token t
2603                         return
2604                 if t.type is TYPE_START_TAG and t.name is 'col'
2605                         template_ins_modes.shift()
2606                         template_ins_modes.unshift ins_mode_in_column_group
2607                         ins_mode = ins_mode_in_column_group
2608                         process_token t
2609                         return
2610                 if t.type is TYPE_START_TAG and t.name is 'tr'
2611                         template_ins_modes.shift()
2612                         template_ins_modes.unshift ins_mode_in_table_body
2613                         ins_mode = ins_mode_in_table_body
2614                         process_token t
2615                         return
2616                 if t.type is TYPE_START_TAG and (t.name is 'td' or t.name is 'th')
2617                         template_ins_modes.shift()
2618                         template_ins_modes.unshift ins_mode_in_row
2619                         ins_mode = ins_mode_in_row
2620                         process_token t
2621                         return
2622                 if t.type is TYPE_START_TAG
2623                         template_ins_modes.shift()
2624                         template_ins_modes.unshift ins_mode_in_body
2625                         ins_mode = ins_mode_in_body
2626                         process_token t
2627                         return
2628                 if t.type is TYPE_END_TAG
2629                         parse_error()
2630                         return
2631                 if t.type is TYPE_EOF
2632                         unless template_tag_is_open()
2633                                 stop_parsing()
2634                                 return
2635                         parse_error()
2636                         loop
2637                                 el = open_els.shift()
2638                                 if el.name is 'template' # fixfull check namespace
2639                                         break
2640                         clear_afe_to_marker()
2641                         template_ins_modes.shift()
2642                         reset_ins_mode()
2643                         process_token t
2644
2645         # 8.2.5.4.19 http://www.w3.org/TR/html5/syntax.html#parsing-main-afterbody
2646         ins_mode_after_body = (t) ->
2647                 if is_space_tok t
2648                         ins_mode_in_body t
2649                         return
2650                 if t.type is TYPE_COMMENT
2651                         insert_comment t, [open_els[0], open_els[0].children.length]
2652                         return
2653                 if t.type is TYPE_DOCTYPE
2654                         parse_error()
2655                         return
2656                 if t.type is TYPE_START_TAG and t.name is 'html'
2657                         ins_mode_in_body t
2658                         return
2659                 if t.type is TYPE_END_TAG and t.name is 'html'
2660                         # fixfull fragment case
2661                         ins_mode = ins_mode_after_after_body
2662                         return
2663                 if t.type is TYPE_EOF
2664                         stop_parsing()
2665                         return
2666                 # Anything ELse
2667                 parse_error()
2668                 ins_mode = ins_mode_in_body
2669                 process_token t
2670
2671         # 8.2.5.4.20 http://www.w3.org/TR/html5/syntax.html#parsing-main-inframeset
2672         ins_mode_in_frameset = (t) ->
2673                 if is_space_tok t
2674                         insert_character t
2675                         return
2676                 if t.type is TYPE_COMMENT
2677                         insert_comment t
2678                         return
2679                 if t.type is TYPE_DOCTYPE
2680                         parse_error()
2681                         return
2682                 if t.type is TYPE_START_TAG and t.name is 'html'
2683                         ins_mode_in_body t
2684                         return
2685                 if t.type is TYPE_START_TAG and t.name is 'frameset'
2686                         insert_html_element t
2687                         return
2688                 if t.type is TYPE_END_TAG and t.name is 'frameset'
2689                         # TODO ?correct for: "if the current node is the root html element"
2690                         if open_els.length is 1
2691                                 parse_error()
2692                                 return # fragment case
2693                         open_els.shift()
2694                         if flag_fragment_parsing is false and open_els[0].name isnt 'frameset'
2695                                 ins_mode = ins_mode_after_frameset
2696                         return
2697                 if t.type is TYPE_START_TAG and t.name is 'frame'
2698                         insert_html_element t
2699                         open_els.shift()
2700                         t.acknowledge_self_closing()
2701                         return
2702                 if t.type is TYPE_START_TAG and t.name is 'noframes'
2703                         ins_mode_in_head t
2704                         return
2705                 if t.type is TYPE_EOF
2706                         # TODO ?correct for: "if the current node is not the root html element"
2707                         if open_els.length isnt 1
2708                                 parse_error()
2709                         stop_parsing()
2710                         return
2711                 # Anything else
2712                 parse_error()
2713                 return
2714
2715         # 8.2.5.4.21 http://www.w3.org/TR/html5/syntax.html#parsing-main-afterframeset
2716         ins_mode_after_frameset = (t) ->
2717                 if is_space_tok t
2718                         insert_character t
2719                         return
2720                 if t.type is TYPE_COMMENT
2721                         insert_comment t
2722                         return
2723                 if t.type is TYPE_DOCTYPE
2724                         parse_error()
2725                         return
2726                 if t.type is TYPE_START_TAG and t.name is 'html'
2727                         ins_mode_in_body t
2728                         return
2729                 if t.type is TYPE_END_TAG and t.name is 'html'
2730                         insert_mode = ins_mode_after_after_frameset
2731                         return
2732                 if t.type is TYPE_START_TAG and t.name is 'noframes'
2733                         ins_mode_in_head t
2734                         return
2735                 if t.type is TYPE_EOF
2736                         stop_parsing()
2737                         return
2738                 # Anything else
2739                 parse_error()
2740                 return
2741
2742         # 8.2.5.4.22 http://www.w3.org/TR/html5/syntax.html#the-after-after-body-insertion-mode
2743         ins_mode_after_after_body = (t) ->
2744                 if t.type is TYPE_COMMENT
2745                         insert_comment t, [doc, doc.children.length]
2746                         return
2747                 if t.type is TYPE_DOCTYPE or is_space_tok(t) or (t.type is TYPE_START_TAG and t.name is 'html')
2748                         ins_mode_in_body t
2749                         return
2750                 if t.type is TYPE_EOF
2751                         stop_parsing()
2752                         return
2753                 # Anything else
2754                 parse_error()
2755                 ins_mode = ins_mode_in_body
2756                 return
2757
2758         # 8.2.5.4.23 http://www.w3.org/TR/html5/syntax.html#the-after-after-frameset-insertion-mode
2759         ins_mode_after_after_frameset = (t) ->
2760                 if t.type is TYPE_COMMENT
2761                         insert_comment t, [doc, doc.children.length]
2762                         return
2763                 if t.type is TYPE_DOCTYPE or is_space_tok(t) or (t.type is TYPE_START_TAG and t.name is 'html')
2764                         ins_mode_in_body t
2765                         return
2766                 if t.type is TYPE_EOF
2767                         stop_parsing()
2768                         return
2769                 if t.type is TYPE_START_TAG and t.name is 'noframes'
2770                         ins_mode_in_head t
2771                         return
2772                 # Anything else
2773                 parse_error()
2774                 return
2775
2776         # 8.2.5.5 http://www.w3.org/TR/html5/syntax.html#parsing-main-inforeign
2777         has_color_face_or_size = (t) ->
2778                 for a in t.attrs_a
2779                         if a[0] is 'color' or a[0] is 'face' or a[0] is 'size'
2780                                 return true
2781                 return false
2782         in_foreign_content_end_script = ->
2783                 open_els.shift()
2784                 # fixfull
2785                 return
2786         in_foreign_content_other_start = (t) ->
2787                 acn = adjusted_current_node()
2788                 if acn.namespace is NS_MATHML
2789                         adjust_mathml_attributes t
2790                 if acn.namespace is NS_SVG and svg_name_fixes[t.name]?
2791                         t.name = svg_name_fixes[t.name]
2792                 if acn.namespace is NS_SVG
2793                         adjust_svg_attributes t
2794                 adjust_foreign_attributes t
2795                 insert_foreign_element t, acn.namespace
2796                 if t.flag 'self-closing'
2797                         if t.name is 'script'
2798                                 t.acknowledge_self_closing()
2799                                 in_foreign_content_end_script()
2800                         else
2801                                 open_els.shift()
2802                                 t.acknowledge_self_closing()
2803                 return
2804         in_foreign_content = (t) ->
2805                 if t.type is TYPE_TEXT and t.text is "\u0000"
2806                         parse_error()
2807                         insert_character new_character_token "\ufffd"
2808                         return
2809                 if is_space_tok t
2810                         insert_character t
2811                         return
2812                 if t.type is TYPE_TEXT
2813                         flag_frameset_ok = false
2814                         insert_character t
2815                         return
2816                 if t.type is TYPE_COMMENT
2817                         insert_comment t
2818                         return
2819                 if t.type is TYPE_DOCTYPE
2820                         parse_error()
2821                         return
2822                 if t.type is TYPE_START_TAG and (t.name is 'b' or t.name is 'big' or t.name is 'blockquote' or t.name is 'body' or t.name is 'br' or t.name is 'center' or t.name is 'code' or t.name is 'dd' or t.name is 'div' or t.name is 'dl' or t.name is 'dt' or t.name is 'em' or t.name is 'embed' or t.name is 'h1' or t.name is 'h2' or t.name is 'h3' or t.name is 'h4' or t.name is 'h5' or t.name is 'h6' or t.name is 'head' or t.name is 'hr' or t.name is 'i' or t.name is 'img' or t.name is 'li' or t.name is 'listing' or t.name is 'main' or t.name is 'meta' or t.name is 'nobr' or t.name is 'ol' or t.name is 'p' or t.name is 'pre' or t.name is 'ruby' or t.name is 's' or t.name is 'small' or t.name is 'span' or t.name is 'strong' or t.name is 'strike' or t.name is 'sub' or t.name is 'sup' or t.name is 'table' or t.name is 'tt' or t.name is 'u' or t.name is 'ul' or t.name is 'var' or (t.name is 'font' and has_color_face_or_size(t)))
2823                         parse_error()
2824                         if flag_fragment_parsing
2825                                 in_foreign_content_other_start t
2826                                 return
2827                         loop # is this safe?
2828                                 open_els.shift()
2829                                 cn = open_els[0]
2830                                 if is_mathml_text_integration_point(cn) or is_html_integration(cn) or cn.namespace is NS_HTML
2831                                         break
2832                         process_token t
2833                         return
2834                 if t.type is TYPE_START_TAG
2835                         in_foreign_content_other_start t
2836                         return
2837                 if t.type is TYPE_END_TAG and t.name is 'script' and open_els[0].name is 'script' and open_els[0].namespace is NS_SVG
2838                         in_foreign_content_end_script()
2839                         return
2840                 if t.type is TYPE_END_TAG
2841                         if open_els[0].name.toLowerCase() isnt t.name
2842                                 parse_error()
2843                         for node in open_els
2844                                 if node is open_els[open_els.length - 1]
2845                                         return
2846                                 if node.name.toLowerCase() is t.name
2847                                         loop
2848                                                 el = open_els.shift()
2849                                                 if el is node
2850                                                         return
2851                                 if node.namespace is NS_HTML
2852                                         break
2853                         ins_mode t # explicitly call HTML insertion mode
2854
2855
2856         # 8.2.4.1 http://www.w3.org/TR/html5/syntax.html#data-state
2857         tok_state_data = ->
2858                 switch c = txt.charAt(cur++)
2859                         when '&'
2860                                 return new_text_node parse_character_reference()
2861                         when '<'
2862                                 tok_state = tok_state_tag_open
2863                         when "\u0000"
2864                                 parse_error()
2865                                 return new_text_node c
2866                         when '' # EOF
2867                                 return new_eof_token()
2868                         else
2869                                 return new_text_node c
2870                 return null
2871
2872         # 8.2.4.2 http://www.w3.org/TR/html5/syntax.html#character-reference-in-data-state
2873         # not needed: tok_state_character_reference_in_data = ->
2874         # just call parse_character_reference()
2875
2876         # 8.2.4.3 http://www.w3.org/TR/html5/syntax.html#rcdata-state
2877         tok_state_rcdata = ->
2878                 switch c = txt.charAt(cur++)
2879                         when '&'
2880                                 return new_text_node parse_character_reference()
2881                         when '<'
2882                                 tok_state = tok_state_rcdata_less_than_sign
2883                         when "\u0000"
2884                                 parse_error()
2885                                 return new_character_token "\ufffd"
2886                         when '' # EOF
2887                                 return new_eof_token()
2888                         else
2889                                 return new_character_token c
2890                 return null
2891
2892         # 8.2.4.4 http://www.w3.org/TR/html5/syntax.html#character-reference-in-rcdata-state
2893         # not needed: tok_state_character_reference_in_rcdata = ->
2894         # just call parse_character_reference()
2895
2896         # 8.2.4.5 http://www.w3.org/TR/html5/syntax.html#rawtext-state
2897         tok_state_rawtext = ->
2898                 switch c = txt.charAt(cur++)
2899                         when '<'
2900                                 tok_state = tok_state_rawtext_less_than_sign
2901                         when "\u0000"
2902                                 parse_error()
2903                                 return new_character_token "\ufffd"
2904                         when '' # EOF
2905                                 return new_eof_token()
2906                         else
2907                                 return new_character_token c
2908                 return null
2909
2910         # 8.2.4.6 http://www.w3.org/TR/html5/syntax.html#script-data-state
2911         tok_state_script_data = ->
2912                 switch c = txt.charAt(cur++)
2913                         when '<'
2914                                 tok_state = tok_state_script_data_less_than_sign
2915                         when "\u0000"
2916                                 parse_error()
2917                                 return new_character_token "\ufffd"
2918                         when '' # EOF
2919                                 return new_eof_token()
2920                         else
2921                                 return new_character_token c
2922                 return null
2923
2924         # 8.2.4.7 http://www.w3.org/TR/html5/syntax.html#plaintext-state
2925         tok_state_plaintext = ->
2926                 switch c = txt.charAt(cur++)
2927                         when "\u0000"
2928                                 parse_error()
2929                                 return new_character_token "\ufffd"
2930                         when '' # EOF
2931                                 return new_eof_token()
2932                         else
2933                                 return new_character_token c
2934                 return null
2935
2936
2937         # 8.2.4.8 http://www.w3.org/TR/html5/syntax.html#tag-open-state
2938         tok_state_tag_open = ->
2939                 switch c = txt.charAt(cur++)
2940                         when '!'
2941                                 tok_state = tok_state_markup_declaration_open
2942                         when '/'
2943                                 tok_state = tok_state_end_tag_open
2944                         when '?'
2945                                 parse_error()
2946                                 tok_cur_tag = new_comment_token '?'
2947                                 tok_state = tok_state_bogus_comment
2948                         else
2949                                 if is_lc_alpha(c)
2950                                         tok_cur_tag = new_open_tag c
2951                                         tok_state = tok_state_tag_name
2952                                 else if is_uc_alpha(c)
2953                                         tok_cur_tag = new_open_tag c.toLowerCase()
2954                                         tok_state = tok_state_tag_name
2955                                 else
2956                                         parse_error()
2957                                         tok_state = tok_state_data
2958                                         cur -= 1 # we didn't parse/handle the char after <
2959                                         return new_text_node '<'
2960                 return null
2961
2962         # 8.2.4.9 http://www.w3.org/TR/html5/syntax.html#end-tag-open-state
2963         tok_state_end_tag_open = ->
2964                 switch c = txt.charAt(cur++)
2965                         when '>'
2966                                 parse_error()
2967                                 tok_state = tok_state_data
2968                         when '' # EOF
2969                                 parse_error()
2970                                 tok_state = tok_state_data
2971                                 return new_text_node '</'
2972                         else
2973                                 if is_uc_alpha(c)
2974                                         tok_cur_tag = new_end_tag c.toLowerCase()
2975                                         tok_state = tok_state_tag_name
2976                                 else if is_lc_alpha(c)
2977                                         tok_cur_tag = new_end_tag c
2978                                         tok_state = tok_state_tag_name
2979                                 else
2980                                         parse_error()
2981                                         tok_cur_tag = new_comment_token '/'
2982                                         tok_state = tok_state_bogus_comment
2983                 return null
2984
2985         # 8.2.4.10 http://www.w3.org/TR/html5/syntax.html#tag-name-state
2986         tok_state_tag_name = ->
2987                 switch c = txt.charAt(cur++)
2988                         when "\t", "\n", "\u000c", ' '
2989                                 tok_state = tok_state_before_attribute_name
2990                         when '/'
2991                                 tok_state = tok_state_self_closing_start_tag
2992                         when '>'
2993                                 tok_state = tok_state_data
2994                                 tmp = tok_cur_tag
2995                                 tok_cur_tag = null
2996                                 return tmp
2997                         when "\u0000"
2998                                 parse_error()
2999                                 tok_cur_tag.name += "\ufffd"
3000                         when '' # EOF
3001                                 parse_error()
3002                                 tok_state = tok_state_data
3003                         else
3004                                 if is_uc_alpha(c)
3005                                         tok_cur_tag.name += c.toLowerCase()
3006                                 else
3007                                         tok_cur_tag.name += c
3008                 return null
3009
3010         # 8.2.4.11 http://www.w3.org/TR/html5/syntax.html#rcdata-less-than-sign-state
3011         tok_state_rcdata_less_than_sign = ->
3012                 c = txt.charAt(cur++)
3013                 if c is '/'
3014                         temporary_buffer = ''
3015                         tok_state = tok_state_rcdata_end_tag_open
3016                         return null
3017                 # Anything else
3018                 tok_state = tok_state_rcdata
3019                 cur -= 1 # reconsume the input character
3020                 return new_character_token '<'
3021
3022         # 8.2.4.12 http://www.w3.org/TR/html5/syntax.html#rcdata-end-tag-open-state
3023         tok_state_rcdata_end_tag_open = ->
3024                 c = txt.charAt(cur++)
3025                 if is_uc_alpha(c)
3026                         tok_cur_tag = new_end_tag c.toLowerCase()
3027                         temporary_buffer += c
3028                         tok_state = tok_state_rcdata_end_tag_name
3029                         return null
3030                 if is_lc_alpha(c)
3031                         tok_cur_tag = new_end_tag c
3032                         temporary_buffer += c
3033                         tok_state = tok_state_rcdata_end_tag_name
3034                         return null
3035                 # Anything else
3036                 tok_state = tok_state_rcdata
3037                 cur -= 1 # reconsume the input character
3038                 return new_character_token "</" # fixfull separate these
3039
3040         # http://www.w3.org/TR/html5/syntax.html#appropriate-end-tag-token
3041         is_appropriate_end_tag = (t) ->
3042                 # spec says to check against "the tag name of the last start tag to
3043                 # have been emitted from this tokenizer", but this is only called from
3044                 # the various "raw" states, which I'm pretty sure all push the start
3045                 # token onto open_els. TODO: verify this after the script data states
3046                 # are implemented
3047                 debug_log "#{t.type}, #{t.name} open_els: #{serialize_els open_els, true, true}"
3048                 return t.type is TYPE_END_TAG and t.name is open_els[0].name
3049
3050         # 8.2.4.13 http://www.w3.org/TR/html5/syntax.html#rcdata-end-tag-name-state
3051         tok_state_rcdata_end_tag_name = ->
3052                 c = txt.charAt(cur++)
3053                 if c is "\t" or c is "\n" or c is "\u000c" or c is ' '
3054                         if is_appropriate_end_tag tok_cur_tag
3055                                 tok_state = tok_state_before_attribute_name
3056                                 return
3057                         # else fall through to "Anything else"
3058                 if c is '/'
3059                         if is_appropriate_end_tag tok_cur_tag
3060                                 tok_state = tok_state_self_closing_start_tag # FIXME spec typo?
3061                                 return
3062                         # else fall through to "Anything else"
3063                 if c is '>'
3064                         if is_appropriate_end_tag tok_cur_tag
3065                                 tok_state = tok_state_data
3066                                 return tok_cur_tag
3067                         # else fall through to "Anything else"
3068                 if is_uc_alpha(c)
3069                         tok_cur_tag.name += c.toLowerCase()
3070                         temporary_buffer += c
3071                         return null
3072                 if is_lc_alpha(c)
3073                         tok_cur_tag.name += c
3074                         temporary_buffer += c
3075                         return null
3076                 # Anything else
3077                 tok_state = tok_state_rcdata
3078                 cur -= 1 # reconsume the input character
3079                 return new_character_token '</' + temporary_buffer # fixfull separate these
3080
3081         # 8.2.4.14 http://www.w3.org/TR/html5/syntax.html#rawtext-less-than-sign-state
3082         tok_state_rawtext_less_than_sign = ->
3083                 c = txt.charAt(cur++)
3084                 if c is '/'
3085                         temporary_buffer = ''
3086                         tok_state = tok_state_rawtext_end_tag_open
3087                         return null
3088                 # Anything else
3089                 tok_state = tok_state_rawtext
3090                 cur -= 1 # reconsume the input character
3091                 return new_character_token '<'
3092
3093         # 8.2.4.15 http://www.w3.org/TR/html5/syntax.html#rawtext-end-tag-open-state
3094         tok_state_rawtext_end_tag_open = ->
3095                 c = txt.charAt(cur++)
3096                 if is_uc_alpha(c)
3097                         tok_cur_tag = new_end_tag c.toLowerCase()
3098                         temporary_buffer += c
3099                         tok_state = tok_state_rawtext_end_tag_name
3100                         return null
3101                 if is_lc_alpha(c)
3102                         tok_cur_tag = new_end_tag c
3103                         temporary_buffer += c
3104                         tok_state = tok_state_rawtext_end_tag_name
3105                         return null
3106                 # Anything else
3107                 tok_state = tok_state_rawtext
3108                 cur -= 1 # reconsume the input character
3109                 return new_character_token "</" # fixfull separate these
3110
3111         # 8.2.4.16 http://www.w3.org/TR/html5/syntax.html#rawtext-end-tag-name-state
3112         tok_state_rawtext_end_tag_name = ->
3113                 c = txt.charAt(cur++)
3114                 if c is "\t" or c is "\n" or c is "\u000c" or c is ' '
3115                         if is_appropriate_end_tag tok_cur_tag
3116                                 tok_state = tok_state_before_attribute_name
3117                                 return
3118                         # else fall through to "Anything else"
3119                 if c is '/'
3120                         if is_appropriate_end_tag tok_cur_tag
3121                                 tok_state = tok_state_self_closing_start_tag
3122                                 return
3123                         # else fall through to "Anything else"
3124                 if c is '>'
3125                         if is_appropriate_end_tag tok_cur_tag
3126                                 tok_state = tok_state_data
3127                                 return tok_cur_tag
3128                         # else fall through to "Anything else"
3129                 if is_uc_alpha(c)
3130                         tok_cur_tag.name += c.toLowerCase()
3131                         temporary_buffer += c
3132                         return null
3133                 if is_lc_alpha(c)
3134                         tok_cur_tag.name += c
3135                         temporary_buffer += c
3136                         return null
3137                 # Anything else
3138                 tok_state = tok_state_rawtext
3139                 cur -= 1 # reconsume the input character
3140                 return new_character_token '</' + temporary_buffer # fixfull separate these
3141
3142         # 8.2.4.17 http://www.w3.org/TR/html5/syntax.html#script-data-less-than-sign-state
3143         tok_state_script_data_less_than_sign = ->
3144                 c = txt.charAt(cur++)
3145                 if c is '/'
3146                         temporary_buffer = ''
3147                         tok_state = tok_state_script_data_end_tag_open
3148                         return
3149                 if c is '!'
3150                         tok_state = tok_state_script_data_escape_start
3151                         return new_character_token '<!' # fixfull split
3152                 # Anything else
3153                 tok_state = tok_state_script_data
3154                 cur -= 1 # Reconsume
3155                 return new_character_token '<'
3156
3157         # 8.2.4.18 http://www.w3.org/TR/html5/syntax.html#script-data-end-tag-open-state
3158         tok_state_script_data_end_tag_open = ->
3159                 c = txt.charAt(cur++)
3160                 if is_uc_alpha(c)
3161                         tok_cur_tag = new_end_tag c.toLowerCase()
3162                         temporary_buffer += c
3163                         tok_state = tok_state_script_data_end_tag_name
3164                         return
3165                 if is_lc_alpha(c)
3166                         tok_cur_tag = new_end_tag c
3167                         temporary_buffer += c
3168                         tok_state = tok_state_script_data_end_tag_name
3169                         return
3170                 # Anything else
3171                 tok_state = tok_state_script_data
3172                 cur -= 1 # Reconsume
3173                 return new_character_token '</'
3174
3175         # 8.2.4.19 http://www.w3.org/TR/html5/syntax.html#script-data-end-tag-open-state
3176         tok_state_script_data_end_tag_name = ->
3177                 c = txt.charAt(cur++)
3178                 if c is "\t" or c is "\n" or c is "\u000c" or c is ' '
3179                         if is_appropriate_end_tag tok_cur_tag
3180                                 tok_state = tok_state_before_attribute_name
3181                                 return
3182                         # fall through
3183                 if c is '/'
3184                         if is_appropriate_end_tag tok_cur_tag
3185                                 tok_state = tok_state_self_closing_start_tag
3186                                 return
3187                         # fall through
3188                 if is_uc_alpha(c)
3189                         tok_cur_tag.name += c.toLowerCase()
3190                         temporary_buffer += c
3191                         return
3192                 if is_lc_alpha(c)
3193                         tok_cur_tag.name += c
3194                         temporary_buffer += c
3195                         return
3196                 # Anything else
3197                 tok_state = tok_state_script_data
3198                 cur -= 1 # Reconsume
3199                 return new_character_token "</#{temporary_buffer}" # fixfull split
3200
3201         # 8.2.4.20 http://www.w3.org/TR/html5/syntax.html#script-data-escape-start-state
3202         tok_state_script_data_escape_start = ->
3203                 c = txt.charAt(cur++)
3204                 if c is '-'
3205                         tok_state = tok_state_script_data_escape_start_dash
3206                         return new_character_token '-'
3207                 # Anything else
3208                 tok_state = tok_state_script_data
3209                 cur -= 1 # Reconsume
3210                 return
3211
3212         # 8.2.4.21 http://www.w3.org/TR/html5/syntax.html#script-data-escape-start-dash-state
3213         tok_state_script_data_escape_start_dash = ->
3214                 c = txt.charAt(cur++)
3215                 if c is '-'
3216                         tok_state = tok_state_script_data_escaped_dash_dash
3217                         return new_character_token '-'
3218                 # Anything else
3219                 tok_state = tok_state_script_data
3220                 cur -= 1 # Reconsume
3221                 return
3222
3223         # 8.2.4.22 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-state
3224         tok_state_script_data_escaped = ->
3225                 c = txt.charAt(cur++)
3226                 if c is '-'
3227                         tok_state = tok_state_script_data_escaped_dash
3228                         return new_character_token '-'
3229                 if c is '<'
3230                         tok_state = tok_state_script_data_escaped_less_than_sign
3231                         return
3232                 if c is "\u0000"
3233                         parse_error()
3234                         return new_character_token "\ufffd"
3235                 if c is '' # EOF
3236                         tok_state = tok_state_data
3237                         parse_error()
3238                         cur -= 1 # Reconsume
3239                         return
3240                 # Anything else
3241                 return new_character_token c
3242
3243         # 8.2.4.23 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-dash-state
3244         tok_state_script_data_escaped_dash = ->
3245                 c = txt.charAt(cur++)
3246                 if c is '-'
3247                         tok_state = tok_state_script_data_escaped_dash_dash
3248                         return new_character_token '-'
3249                 if c is '<'
3250                         tok_state = tok_state_script_data_escaped_less_than_sign
3251                         return
3252                 if c is "\u0000"
3253                         parse_error()
3254                         tok_state = tok_state_script_data_escaped
3255                         return new_character_token "\ufffd"
3256                 if c is '' # EOF
3257                         tok_state = tok_state_data
3258                         parse_error()
3259                         cur -= 1 # Reconsume
3260                         return
3261                 # Anything else
3262                 tok_state = tok_state_script_data_escaped
3263                 return new_character_token c
3264
3265         # 8.2.4.24 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-dash-dash-state
3266         tok_state_script_data_escaped_dash_dash = ->
3267                 c = txt.charAt(cur++)
3268                 if c is '-'
3269                         return new_character_token '-'
3270                 if c is '<'
3271                         tok_state = tok_state_script_data_escaped_less_than_sign
3272                         return
3273                 if c is '>'
3274                         tok_state = tok_state_script_data
3275                         return new_character_token '>'
3276                 if c is "\u0000"
3277                         parse_error()
3278                         tok_state = tok_state_script_data_escaped
3279                         return new_character_token "\ufffd"
3280                 if c is '' # EOF
3281                         parse_error()
3282                         tok_state = tok_state_data
3283                         cur -= 1 # Reconsume
3284                         return
3285                 # Anything else
3286                 tok_state = tok_state_script_data_escaped
3287                 return new_character_token c
3288
3289         # 8.2.4.25 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-less-than-sign-state
3290         tok_state_script_data_escaped_less_than_sign = ->
3291                 c = txt.charAt(cur++)
3292                 if c is '/'
3293                         temporary_buffer = ''
3294                         tok_state = tok_state_script_data_escaped_end_tag_open
3295                         return
3296                 if is_uc_alpha(c)
3297                         temporary_buffer = c.toLowerCase() # yes, really
3298                         tok_state = tok_state_script_data_double_escape_start
3299                         return new_character_token "<#{c}" # fixfull split
3300                 if is_lc_alpha(c)
3301                         temporary_buffer = c
3302                         tok_state = tok_state_script_data_double_escape_start
3303                         return new_character_token "<#{c}" # fixfull split
3304                 # Anything else
3305                 tok_state = tok_state_script_data_escaped
3306                 cur -= 1 # Reconsume
3307                 return new_character_token c
3308
3309         # 8.2.4.26 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-end-tag-open-state
3310         tok_state_script_data_escaped_end_tag_open = ->
3311                 c = txt.charAt(cur++)
3312                 if is_uc_alpha(c)
3313                         tok_cur_tag = new_end_tag c.toLowerCase()
3314                         temporary_buffer += c
3315                         tok_state = tok_state_script_data_escaped_end_tag_name
3316                         return
3317                 if is_lc_alpha(c)
3318                         tok_cur_tag = new_end_tag c
3319                         temporary_buffer += c
3320                         tok_state = tok_state_script_data_escaped_end_tag_name
3321                         return
3322                 # Anything else
3323                 tok_state = tok_state_script_data_escaped
3324                 cur -= 1 # Reconsume
3325                 return new_character_token '</' # fixfull split
3326
3327         # 8.2.4.27 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-end-tag-name-state
3328         tok_state_script_data_escaped_end_tag_name = ->
3329                 c = txt.charAt(cur++)
3330                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
3331                         if is_appropriate_end_tag tok_cur_tag
3332                                 tok_state = tok_state_before_attribute_name
3333                                 return
3334                         # fall through
3335                 if c is '/'
3336                         if is_appropriate_end_tag tok_cur_tag
3337                                 tok_state = tok_state_self_closing_start_tag
3338                                 return
3339                         # fall through
3340                 if is_uc_alpha(c)
3341                         tok_cur_tag.name += c.toLowerCase()
3342                         temporary_buffer += c.toLowerCase()
3343                         return
3344                 if is_lc_alpha(c)
3345                         tok_cur_tag.name += c
3346                         temporary_buffer += c.toLowerCase()
3347                         return
3348                 # Anything else
3349                 tok_state = tok_state_script_data_escaped
3350                 cur -= 1 # Reconsume
3351                 return new_character_token "</#{temporary_buffer}" # fixfull split
3352
3353         # 8.2.4.28 http://www.w3.org/TR/html5/syntax.html#script-data-double-escape-start-state
3354         tok_state_script_data_double_escape_start = ->
3355                 c = txt.charAt(cur++)
3356                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' ' or c is '/' or c is '>'
3357                         if temporary_buffer is 'script'
3358                                 tok_state = tok_state_script_data_double_escaped
3359                         else
3360                                 tok_state = tok_state_script_data_escaped
3361                         return new_character_token c
3362                 if is_uc_alpha(c)
3363                         temporary_buffer += c.toLowerCase() # yes, really lowercase
3364                         return new_character_token c
3365                 if is_lc_alpha(c)
3366                         temporary_buffer += c
3367                         return new_character_token c
3368                 # Anything else
3369                 tok_state = tok_state_script_data_escaped
3370                 cur -= 1 # Reconsume
3371                 return
3372
3373         # 8.2.4.29 http://www.w3.org/TR/html5/syntax.html#script-data-double-escaped-state
3374         tok_state_script_data_double_escaped = ->
3375                 c = txt.charAt(cur++)
3376                 if c is '-'
3377                         tok_state = tok_state_script_data_double_escaped_dash
3378                         return new_character_token '-'
3379                 if c is '<'
3380                         tok_state = tok_state_script_data_double_escaped_less_than_sign
3381                         return new_character_token '<'
3382                 if c is "\u0000"
3383                         parse_error()
3384                         return new_character_token "\ufffd"
3385                 if c is '' # EOF
3386                         parse_error()
3387                         tok_state = tok_state_data
3388                         cur -= 1 # Reconsume
3389                         return
3390                 # Anything else
3391                 return new_character_token c
3392
3393         # 8.2.4.30 http://www.w3.org/TR/html5/syntax.html#script-data-double-escaped-dash-state
3394         tok_state_script_data_double_escaped_dash = ->
3395                 c = txt.charAt(cur++)
3396                 if c is '-'
3397                         tok_state = tok_state_script_data_double_escaped_dash_dash
3398                         return new_character_token '-'
3399                 if c is '<'
3400                         tok_state = tok_state_script_data_double_escaped_less_than_sign
3401                         return new_character_token '<'
3402                 if c is "\u0000"
3403                         parse_error()
3404                         tok_state = tok_state_script_data_double_escaped
3405                         return new_character_token "\ufffd"
3406                 if c is '' # EOF
3407                         parse_error()
3408                         tok_state = tok_state_data
3409                         cur -= 1 # Reconsume
3410                         return
3411                 # Anything else
3412                 tok_state = tok_state_script_data_double_escaped
3413                 return new_character_token c
3414
3415         # 8.2.4.31 http://www.w3.org/TR/html5/syntax.html#script-data-double-escaped-dash-dash-state
3416         tok_state_script_data_double_escaped_dash_dash = ->
3417                 c = txt.charAt(cur++)
3418                 if c is '-'
3419                         return new_character_token '-'
3420                 if c is '<'
3421                         tok_state = tok_state_script_data_double_escaped_less_than_sign
3422                         return new_character_token '<'
3423                 if c is '>'
3424                         tok_state = tok_state_script_data
3425                         return new_character_token '>'
3426                 if c is "\u0000"
3427                         parse_error()
3428                         tok_state = tok_state_script_data_double_escaped
3429                         return new_character_token "\ufffd"
3430                 if c is '' # EOF
3431                         parse_error()
3432                         tok_state = tok_state_data
3433                         cur -= 1 # Reconsume
3434                         return
3435                 # Anything else
3436                 tok_state = tok_state_script_data_double_escaped
3437                 return new_character_token c
3438
3439         # 8.2.4.32 http://www.w3.org/TR/html5/syntax.html#script-data-double-escaped-less-than-sign-state
3440         tok_state_script_data_double_escaped_less_than_sign = ->
3441                 c = txt.charAt(cur++)
3442                 if c is '/'
3443                         temporary_buffer = ''
3444                         tok_state = tok_state_script_data_double_escape_end
3445                         return new_character_token '/'
3446                 # Anything else
3447                 tok_state = tok_state_script_data_double_escaped
3448                 cur -= 1 # Reconsume
3449                 return
3450
3451         # 8.2.4.33 http://www.w3.org/TR/html5/syntax.html#script-data-double-escape-end-state
3452         tok_state_script_data_double_escape_end = ->
3453                 c = txt.charAt(cur++)
3454                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' ' or c is '/' or c is '>'
3455                         if temporary_buffer is 'script'
3456                                 tok_state = tok_state_script_data_escaped
3457                         else
3458                                 tok_state = tok_state_script_data_double_escaped
3459                         return new_character_token c
3460                 if is_uc_alpha(c)
3461                         temporary_buffer += c.toLowerCase() # yes, really lowercase
3462                         return new_character_token c
3463                 if is_lc_alpha(c)
3464                         temporary_buffer += c
3465                         return new_character_token c
3466                 # Anything else
3467                 tok_state = tok_state_script_data_double_escaped
3468                 cur -= 1 # Reconsume
3469                 return
3470
3471         # 8.2.4.34 http://www.w3.org/TR/html5/syntax.html#before-attribute-name-state
3472         tok_state_before_attribute_name = ->
3473                 attr_name = null
3474                 switch c = txt.charAt(cur++)
3475                         when "\t", "\n", "\u000c", ' '
3476                                 return null
3477                         when '/'
3478                                 tok_state = tok_state_self_closing_start_tag
3479                                 return null
3480                         when '>'
3481                                 tok_state = tok_state_data
3482                                 tmp = tok_cur_tag
3483                                 tok_cur_tag = null
3484                                 return tmp
3485                         when "\u0000"
3486                                 parse_error()
3487                                 attr_name = "\ufffd"
3488                         when '"', "'", '<', '='
3489                                 parse_error()
3490                                 attr_name = c
3491                         when '' # EOF
3492                                 parse_error()
3493                                 tok_state = tok_state_data
3494                         else
3495                                 if is_uc_alpha(c)
3496                                         attr_name = c.toLowerCase()
3497                                 else
3498                                         attr_name = c
3499                 if attr_name?
3500                         tok_cur_tag.attrs_a.unshift [attr_name, '']
3501                         tok_state = tok_state_attribute_name
3502                 return null
3503
3504         # 8.2.4.35 http://www.w3.org/TR/html5/syntax.html#attribute-name-state
3505         tok_state_attribute_name = ->
3506                 switch c = txt.charAt(cur++)
3507                         when "\t", "\n", "\u000c", ' '
3508                                 tok_state = tok_state_after_attribute_name
3509                         when '/'
3510                                 tok_state = tok_state_self_closing_start_tag
3511                         when '='
3512                                 tok_state = tok_state_before_attribute_value
3513                         when '>'
3514                                 tok_state = tok_state_data
3515                                 tmp = tok_cur_tag
3516                                 tok_cur_tag = null
3517                                 return tmp
3518                         when "\u0000"
3519                                 parse_error()
3520                                 tok_cur_tag.attrs_a[0][0] = "\ufffd"
3521                         when '"', "'", '<'
3522                                 parse_error()
3523                                 tok_cur_tag.attrs_a[0][0] = c
3524                         when '' # EOF
3525                                 parse_error()
3526                                 tok_state = tok_state_data
3527                         else
3528                                 if is_uc_alpha(c)
3529                                         tok_cur_tag.attrs_a[0][0] = c.toLowerCase()
3530                                 else
3531                                         tok_cur_tag.attrs_a[0][0] += c
3532                 return null
3533
3534         # 8.2.4.36 http://www.w3.org/TR/html5/syntax.html#after-attribute-name-state
3535         tok_state_after_attribute_name = ->
3536                 c = txt.charAt(cur++)
3537                 if c is "\t" or c is "\n" or c is "\u000c" or c is ' '
3538                         return
3539                 if c is '/'
3540                         tok_state = tok_state_self_closing_start_tag
3541                         return
3542                 if c is '='
3543                         tok_state = tok_state_before_attribute_value
3544                         return
3545                 if c is '>'
3546                         tok_state = tok_state_data
3547                         return
3548                 if is_uc_alpha(c)
3549                         tok_cur_tag.attrs_a.unshift [c.toLowerCase(), '']
3550                         tok_state = tok_state_attribute_name
3551                         return
3552                 if c is "\u0000"
3553                         parse_error()
3554                         tok_cur_tag.attrs_a.unshift ["\ufffd", '']
3555                         tok_state = tok_state_attribute_name
3556                         return
3557                 if c is '' # EOF
3558                         parse_error()
3559                         tok_state = tok_state_data
3560                         cur -= 1 # reconsume
3561                         return
3562                 if c is '"' or c is "'" or c is '<'
3563                         parse_error()
3564                         # fall through to Anything else
3565                 # Anything else
3566                 tok_cur_tag.attrs_a.unshift [c, '']
3567                 tok_state = tok_state_attribute_name
3568
3569         # 8.2.4.37 http://www.w3.org/TR/html5/syntax.html#before-attribute-value-state
3570         tok_state_before_attribute_value = ->
3571                 switch c = txt.charAt(cur++)
3572                         when "\t", "\n", "\u000c", ' '
3573                                 return null
3574                         when '"'
3575                                 tok_state = tok_state_attribute_value_double_quoted
3576                         when '&'
3577                                 tok_state = tok_state_attribute_value_unquoted
3578                                 cur -= 1
3579                         when "'"
3580                                 tok_state = tok_state_attribute_value_single_quoted
3581                         when "\u0000"
3582                                 # Parse error
3583                                 tok_cur_tag.attrs_a[0][1] += "\ufffd"
3584                                 tok_state = tok_state_attribute_value_unquoted
3585                         when '>'
3586                                 # Parse error
3587                                 tok_state = tok_state_data
3588                                 tmp = tok_cur_tag
3589                                 tok_cur_tag = null
3590                                 return tmp
3591                         when '' # EOF
3592                                 parse_error()
3593                                 tok_state = tok_state_data
3594                         else
3595                                 tok_cur_tag.attrs_a[0][1] += c
3596                                 tok_state = tok_state_attribute_value_unquoted
3597                 return null
3598
3599         # 8.2.4.38 http://www.w3.org/TR/html5/syntax.html#attribute-value-(double-quoted)-state
3600         tok_state_attribute_value_double_quoted = ->
3601                 switch c = txt.charAt(cur++)
3602                         when '"'
3603                                 tok_state = tok_state_after_attribute_value_quoted
3604                         when '&'
3605                                 tok_cur_tag.attrs_a[0][1] += parse_character_reference '"', true
3606                         when "\u0000"
3607                                 # Parse error
3608                                 tok_cur_tag.attrs_a[0][1] += "\ufffd"
3609                         when '' # EOF
3610                                 parse_error()
3611                                 tok_state = tok_state_data
3612                         else
3613                                 tok_cur_tag.attrs_a[0][1] += c
3614                 return null
3615
3616         # 8.2.4.39 http://www.w3.org/TR/html5/syntax.html#attribute-value-(single-quoted)-state
3617         tok_state_attribute_value_single_quoted = ->
3618                 switch c = txt.charAt(cur++)
3619                         when "'"
3620                                 tok_state = tok_state_after_attribute_value_quoted
3621                         when '&'
3622                                 tok_cur_tag.attrs_a[0][1] += parse_character_reference "'", true
3623                         when "\u0000"
3624                                 # Parse error
3625                                 tok_cur_tag.attrs_a[0][1] += "\ufffd"
3626                         when '' # EOF
3627                                 parse_error()
3628                                 tok_state = tok_state_data
3629                         else
3630                                 tok_cur_tag.attrs_a[0][1] += c
3631                 return null
3632
3633         # 8.2.4.40 http://www.w3.org/TR/html5/syntax.html#attribute-value-(unquoted)-state
3634         tok_state_attribute_value_unquoted = ->
3635                 switch c = txt.charAt(cur++)
3636                         when "\t", "\n", "\u000c", ' '
3637                                 tok_state = tok_state_before_attribute_name
3638                         when '&'
3639                                 tok_cur_tag.attrs_a[0][1] += parse_character_reference '>', true
3640                         when '>'
3641                                 tok_state = tok_state_data
3642                                 tmp = tok_cur_tag
3643                                 tok_cur_tag = null
3644                                 return tmp
3645                         when "\u0000"
3646                                 tok_cur_tag.attrs_a[0][1] += "\ufffd"
3647                         when '' # EOF
3648                                 parse_error()
3649                                 tok_state = tok_state_data
3650                         else
3651                                 # Parse Error if ', <, = or ` (backtick)
3652                                 tok_cur_tag.attrs_a[0][1] += c
3653                 return null
3654
3655         # 8.2.4.42 http://www.w3.org/TR/html5/syntax.html#after-attribute-value-(quoted)-state
3656         tok_state_after_attribute_value_quoted = ->
3657                 switch c = txt.charAt(cur++)
3658                         when "\t", "\n", "\u000c", ' '
3659                                 tok_state = tok_state_before_attribute_name
3660                         when '/'
3661                                 tok_state = tok_state_self_closing_start_tag
3662                         when '>'
3663                                 tok_state = tok_state_data
3664                                 tmp = tok_cur_tag
3665                                 tok_cur_tag = null
3666                                 return tmp
3667                         when '' # EOF
3668                                 parse_error()
3669                                 tok_state = tok_state_data
3670                         else
3671                                 # Parse Error
3672                                 tok_state = tok_state_before_attribute_name
3673                                 cur -= 1 # we didn't handle that char
3674                 return null
3675
3676         # 8.2.4.43 http://www.w3.org/TR/html5/syntax.html#self-closing-start-tag-state
3677         tok_state_self_closing_start_tag = ->
3678                 c = txt.charAt(cur++)
3679                 if c is '>'
3680                         tok_cur_tag.flag 'self-closing'
3681                         tok_state = tok_state_data
3682                         return tok_cur_tag
3683                 if c is ''
3684                         parse_error()
3685                         tok_state = tok_state_data
3686                         cur -= 1 # Reconsume
3687                         return
3688                 # Anything else
3689                 parse_error()
3690                 tok_state = tok_state_before_attribute_name
3691                 cur -= 1 # Reconsume
3692                 return
3693
3694         # 8.2.4.44 http://www.w3.org/TR/html5/syntax.html#bogus-comment-state
3695         # WARNING: put a comment token in tok_cur_tag before setting this state
3696         tok_state_bogus_comment = ->
3697                 next_gt = txt.indexOf '>', cur
3698                 if next_gt is -1
3699                         val = txt.substr cur
3700                         cur = txt.length
3701                 else
3702                         val = txt.substr cur, (next_gt - cur)
3703                         cur = next_gt + 1
3704                 val = val.replace "\u0000", "\ufffd"
3705                 tok_cur_tag.text += val
3706                 tok_state = tok_state_data
3707                 return tok_cur_tag
3708
3709         # 8.2.4.45 http://www.w3.org/TR/html5/syntax.html#markup-declaration-open-state
3710         tok_state_markup_declaration_open = ->
3711                 if txt.substr(cur, 2) is '--'
3712                         cur += 2
3713                         tok_cur_tag = new_comment_token ''
3714                         tok_state = tok_state_comment_start
3715                         return
3716                 if txt.substr(cur, 7).toLowerCase() is 'doctype'
3717                         cur += 7
3718                         tok_state = tok_state_doctype
3719                         return
3720                 acn = adjusted_current_node()
3721                 if acn and acn.namespace isnt NS_HTML and txt.substr(cur, 7) is '[CDATA['
3722                         cur += 7
3723                         tok_state = tok_state_cdata_section
3724                         return
3725                 # Otherwise
3726                 parse_error()
3727                 tok_cur_tag = new_comment_token '!' # TODO test ("!" right?)
3728                 tok_state = tok_state_bogus_comment
3729                 return
3730
3731         # 8.2.4.46 http://www.w3.org/TR/html5/syntax.html#comment-start-state
3732         tok_state_comment_start = ->
3733                 switch c = txt.charAt(cur++)
3734                         when '-'
3735                                 tok_state = tok_state_comment_start_dash
3736                         when "\u0000"
3737                                 parse_error()
3738                                 return new_character_token "\ufffd"
3739                         when '>'
3740                                 parse_error()
3741                                 tok_state = tok_state_data
3742                                 return tok_cur_tag
3743                         when '' # EOF
3744                                 parse_error()
3745                                 tok_state = tok_state_data
3746                                 cur -= 1 # Reconsume
3747                                 return tok_cur_tag
3748                         else
3749                                 tok_cur_tag.text += c
3750                 return null
3751
3752         # 8.2.4.47 http://www.w3.org/TR/html5/syntax.html#comment-start-dash-state
3753         tok_state_comment_start_dash = ->
3754                 switch c = txt.charAt(cur++)
3755                         when '-'
3756                                 tok_state = tok_state_comment_end
3757                         when "\u0000"
3758                                 parse_error()
3759                                 tok_cur_tag.text += "-\ufffd"
3760                                 tok_state = tok_state_comment
3761                         when '>'
3762                                 parse_error()
3763                                 tok_state = tok_state_data
3764                                 return tok_cur_tag
3765                         when '' # EOF
3766                                 parse_error()
3767                                 tok_state = tok_state_data
3768                                 cur -= 1 # Reconsume
3769                                 return tok_cur_tag
3770                         else
3771                                 tok_cur_tag.text += "-#{c}"
3772                                 tok_state = tok_state_comment
3773                 return null
3774
3775         # 8.2.4.48 http://www.w3.org/TR/html5/syntax.html#comment-state
3776         tok_state_comment = ->
3777                 switch c = txt.charAt(cur++)
3778                         when '-'
3779                                 tok_state = tok_state_comment_end_dash
3780                         when "\u0000"
3781                                 parse_error()
3782                                 tok_cur_tag.text += "\ufffd"
3783                         when '' # EOF
3784                                 parse_error()
3785                                 tok_state = tok_state_data
3786                                 cur -= 1 # Reconsume
3787                                 return tok_cur_tag
3788                         else
3789                                 tok_cur_tag.text += c
3790                 return null
3791
3792         # 8.2.4.49 http://www.w3.org/TR/html5/syntax.html#comment-end-dash-state
3793         tok_state_comment_end_dash = ->
3794                 switch c = txt.charAt(cur++)
3795                         when '-'
3796                                 tok_state = tok_state_comment_end
3797                         when "\u0000"
3798                                 parse_error()
3799                                 tok_cur_tag.text += "-\ufffd"
3800                                 tok_state = tok_state_comment
3801                         when '' # EOF
3802                                 parse_error()
3803                                 tok_state = tok_state_data
3804                                 cur -= 1 # Reconsume
3805                                 return tok_cur_tag
3806                         else
3807                                 tok_cur_tag.text += "-#{c}"
3808                                 tok_state = tok_state_comment
3809                 return null
3810
3811         # 8.2.4.50 http://www.w3.org/TR/html5/syntax.html#comment-end-state
3812         tok_state_comment_end = ->
3813                 switch c = txt.charAt(cur++)
3814                         when '>'
3815                                 tok_state = tok_state_data
3816                                 return tok_cur_tag
3817                         when "\u0000"
3818                                 parse_error()
3819                                 tok_cur_tag.text += "--\ufffd"
3820                                 tok_state = tok_state_comment
3821                         when '!'
3822                                 parse_error()
3823                                 tok_state = tok_state_comment_end_bang
3824                         when '-'
3825                                 parse_error()
3826                                 tok_cur_tag.text += '-'
3827                         when '' # EOF
3828                                 parse_error()
3829                                 tok_state = tok_state_data
3830                                 cur -= 1 # Reconsume
3831                                 return tok_cur_tag
3832                         else
3833                                 parse_error()
3834                                 tok_cur_tag.text += "--#{c}"
3835                                 tok_state = tok_state_comment
3836                 return null
3837
3838         # 8.2.4.51 http://www.w3.org/TR/html5/syntax.html#comment-end-bang-state
3839         tok_state_comment_end_bang = ->
3840                 switch c = txt.charAt(cur++)
3841                         when '-'
3842                                 tok_cur_tag.text += "--!#{c}"
3843                                 tok_state = tok_state_comment_end_dash
3844                         when '>'
3845                                 tok_state = tok_state_data
3846                                 return tok_cur_tag
3847                         when "\u0000"
3848                                 parse_error()
3849                                 tok_cur_tag.text += "--!\ufffd"
3850                                 tok_state = tok_state_comment
3851                         when '' # EOF
3852                                 parse_error()
3853                                 tok_state = tok_state_data
3854                                 cur -= 1 # Reconsume
3855                                 return tok_cur_tag
3856                         else
3857                                 tok_cur_tag.text += "--!#{c}"
3858                                 tok_state = tok_state_comment
3859                 return null
3860
3861         # 8.2.4.52 http://www.w3.org/TR/html5/syntax.html#doctype-state
3862         tok_state_doctype = ->
3863                 switch c = txt.charAt(cur++)
3864                         when "\t", "\u000a", "\u000c", ' '
3865                                 tok_state = tok_state_before_doctype_name
3866                         when '' # EOF
3867                                 parse_error()
3868                                 tok_state = tok_state_data
3869                                 el = new_doctype_token ''
3870                                 el.flag 'force-quirks', true
3871                                 cur -= 1 # Reconsume
3872                                 return el
3873                         else
3874                                 parse_error()
3875                                 tok_state = tok_state_before_doctype_name
3876                                 cur -= 1 # Reconsume
3877                 return null
3878
3879         # 8.2.4.52 http://www.w3.org/TR/html5/syntax.html#doctype-state
3880         tok_state_before_doctype_name = ->
3881                 c = txt.charAt(cur++)
3882                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
3883                         return
3884                 if is_uc_alpha(c)
3885                         tok_cur_tag = new_doctype_token c.toLowerCase()
3886                         tok_state = tok_state_doctype_name
3887                         return
3888                 if c is "\u0000"
3889                         parse_error()
3890                         tok_cur_tag = new_doctype_token "\ufffd"
3891                         tok_state = tok_state_doctype_name
3892                         return
3893                 if c is '>'
3894                         parse_error()
3895                         el = new_doctype_token ''
3896                         el.flag 'force-quirks', true
3897                         tok_state = tok_state_data
3898                         return el
3899                 if c is '' # EOF
3900                         parse_error()
3901                         tok_state = tok_state_data
3902                         el = new_doctype_token ''
3903                         el.flag 'force-quirks', true
3904                         cur -= 1 # Reconsume
3905                         return el
3906                 # Anything else
3907                 tok_cur_tag = new_doctype_token c
3908                 tok_state = tok_state_doctype_name
3909                 return null
3910
3911         # 8.2.4.54 http://www.w3.org/TR/html5/syntax.html#doctype-name-state
3912         tok_state_doctype_name = ->
3913                 c = txt.charAt(cur++)
3914                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
3915                         tok_state = tok_state_after_doctype_name
3916                         return
3917                 if c is '>'
3918                         tok_state = tok_state_data
3919                         return tok_cur_tag
3920                 if is_uc_alpha(c)
3921                         tok_cur_tag.name += c.toLowerCase()
3922                         return
3923                 if c is "\u0000"
3924                         parse_error()
3925                         tok_cur_tag.name += "\ufffd"
3926                         return
3927                 if c is '' # EOF
3928                         parse_error()
3929                         tok_state = tok_state_data
3930                         tok_cur_tag.flag 'force-quirks', true
3931                         cur -= 1 # Reconsume
3932                         return tok_cur_tag
3933                 # Anything else
3934                 tok_cur_tag.name += c
3935                 return null
3936
3937         # 8.2.4.55 http://www.w3.org/TR/html5/syntax.html#after-doctype-name-state
3938         tok_state_after_doctype_name = ->
3939                 c = txt.charAt(cur++)
3940                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
3941                         return
3942                 if c is '>'
3943                         tok_state = tok_state_data
3944                         return tok_cur_tag
3945                 if c is '' # EOF
3946                         parse_error()
3947                         tok_state = tok_state_data
3948                         tok_cur_tag.flag 'force-quirks', true
3949                         cur -= 1 # Reconsume
3950                         return tok_cur_tag
3951                 # Anything else
3952                 if txt.substr(cur - 1, 6).toLowerCase() is 'public'
3953                         cur += 5
3954                         tok_state = tok_state_after_doctype_public_keyword
3955                         return
3956                 if txt.substr(cur - 1, 6).toLowerCase() is 'system'
3957                         cur += 5
3958                         tok_state = tok_state_after_doctype_system_keyword
3959                         return
3960                 parse_error()
3961                 tok_cur_tag.flag 'force-quirks', true
3962                 tok_state = tok_state_bogus_doctype
3963                 return null
3964
3965         # 8.2.4.56 http://www.w3.org/TR/html5/syntax.html#after-doctype-public-keyword-state
3966         tok_state_after_doctype_public_keyword = ->
3967                 c = txt.charAt(cur++)
3968                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
3969                         tok_state = tok_state_before_doctype_public_identifier
3970                         return
3971                 if c is '"'
3972                         parse_error()
3973                         tok_cur_tag.public_identifier = '' # FIXME should this go in @attrs or @text?
3974                         tok_state = tok_state_doctype_public_identifier_double_quoted
3975                         return
3976                 if c is "'"
3977                         parse_error()
3978                         tok_cur_tag.public_identifier = '' # FIXME should this go in @attrs or @text?
3979                         tok_state = tok_state_doctype_public_identifier_single_quoted
3980                         return
3981                 if c is '>'
3982                         parse_error()
3983                         tok_cur_tag.flag 'force-quirks', true
3984                         tok_state = tok_state_data
3985                         return tok_cur_tag
3986                 if c is '' # EOF
3987                         parse_error()
3988                         tok_state = tok_state_data
3989                         tok_cur_tag.flag 'force-quirks', true
3990                         cur -= 1 # Reconsume
3991                         return tok_cur_tag
3992                 # Anything else
3993                 parse_error()
3994                 tok_cur_tag.flag 'force-quirks', true
3995                 tok_state = tok_state_bogus_doctype
3996                 return null
3997
3998         # 8.2.4.57 http://www.w3.org/TR/html5/syntax.html#before-doctype-public-identifier-state
3999         tok_state_before_doctype_public_identifier = ->
4000                 c = txt.charAt(cur++)
4001                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4002                         return
4003                 if c is '"'
4004                         parse_error()
4005                         tok_cur_tag.public_identifier = '' # FIXME should this go in @attrs or @text?
4006                         tok_state = tok_state_doctype_public_identifier_double_quoted
4007                         return
4008                 if c is "'"
4009                         parse_error()
4010                         tok_cur_tag.public_identifier = '' # FIXME should this go in @attrs or @text?
4011                         tok_state = tok_state_doctype_public_identifier_single_quoted
4012                         return
4013                 if c is '>'
4014                         parse_error()
4015                         tok_cur_tag.flag 'force-quirks', true
4016                         tok_state = tok_state_data
4017                         return tok_cur_tag
4018                 if c is '' # EOF
4019                         parse_error()
4020                         tok_state = tok_state_data
4021                         tok_cur_tag.flag 'force-quirks', true
4022                         cur -= 1 # Reconsume
4023                         return tok_cur_tag
4024                 # Anything else
4025                 parse_error()
4026                 tok_cur_tag.flag 'force-quirks', true
4027                 tok_state = tok_state_bogus_doctype
4028                 return null
4029
4030
4031         # 8.2.4.58 http://www.w3.org/TR/html5/syntax.html#doctype-public-identifier-(double-quoted)-state
4032         tok_state_doctype_public_identifier_double_quoted = ->
4033                 c = txt.charAt(cur++)
4034                 if c is '"'
4035                         tok_state = tok_state_after_doctype_public_identifier
4036                         return
4037                 if c is "\u0000"
4038                         parse_error()
4039                         tok_cur_tag.public_identifier += "\ufffd"
4040                         return
4041                 if c is '>'
4042                         parse_error()
4043                         tok_cur_tag.flag 'force-quirks', true
4044                         tok_state = tok_state_data
4045                         return tok_cur_tag
4046                 if c is '' # EOF
4047                         parse_error()
4048                         tok_state = tok_state_data
4049                         tok_cur_tag.flag 'force-quirks', true
4050                         cur -= 1 # Reconsume
4051                         return tok_cur_tag
4052                 # Anything else
4053                 tok_cur_tag.public_identifier += c
4054                 return null
4055
4056         # 8.2.4.59 http://www.w3.org/TR/html5/syntax.html#doctype-public-identifier-(single-quoted)-state
4057         tok_state_doctype_public_identifier_single_quoted = ->
4058                 c = txt.charAt(cur++)
4059                 if c is "'"
4060                         tok_state = tok_state_after_doctype_public_identifier
4061                         return
4062                 if c is "\u0000"
4063                         parse_error()
4064                         tok_cur_tag.public_identifier += "\ufffd"
4065                         return
4066                 if c is '>'
4067                         parse_error()
4068                         tok_cur_tag.flag 'force-quirks', true
4069                         tok_state = tok_state_data
4070                         return tok_cur_tag
4071                 if c is '' # EOF
4072                         parse_error()
4073                         tok_state = tok_state_data
4074                         tok_cur_tag.flag 'force-quirks', true
4075                         cur -= 1 # Reconsume
4076                         return tok_cur_tag
4077                 # Anything else
4078                 tok_cur_tag.public_identifier += c
4079                 return null
4080
4081         # 8.2.4.60 http://www.w3.org/TR/html5/syntax.html#after-doctype-public-identifier-state
4082         tok_state_after_doctype_public_identifier = ->
4083                 c = txt.charAt(cur++)
4084                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4085                         tok_state = tok_state_between_doctype_public_and_system_identifiers
4086                         return
4087                 if c is '>'
4088                         tok_state = tok_state_data
4089                         return tok_cur_tag
4090                 if c is '"'
4091                         parse_error()
4092                         tok_cur_tag.system_identifier = ''
4093                         tok_state = tok_state_doctype_system_identifier_double_quoted
4094                         return
4095                 if c is "'"
4096                         parse_error()
4097                         tok_cur_tag.system_identifier = ''
4098                         tok_state = tok_state_doctype_system_identifier_single_quoted
4099                         return
4100                 if c is '' # EOF
4101                         parse_error()
4102                         tok_state = tok_state_data
4103                         tok_cur_tag.flag 'force-quirks', true
4104                         cur -= 1 # Reconsume
4105                         return tok_cur_tag
4106                 # Anything else
4107                 parse_error()
4108                 tok_cur_tag.flag 'force-quirks', true
4109                 tok_state = tok_state_bogus_doctype
4110                 return null
4111
4112         # 8.2.4.61 http://www.w3.org/TR/html5/syntax.html#between-doctype-public-and-system-identifiers-state
4113         tok_state_between_doctype_public_and_system_identifiers = ->
4114                 c = txt.charAt(cur++)
4115                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4116                         return
4117                 if c is '>'
4118                         tok_state = tok_state_data
4119                         return tok_cur_tag
4120                 if c is '"'
4121                         parse_error()
4122                         tok_cur_tag.system_identifier = ''
4123                         tok_state = tok_state_doctype_system_identifier_double_quoted
4124                         return
4125                 if c is "'"
4126                         parse_error()
4127                         tok_cur_tag.system_identifier = ''
4128                         tok_state = tok_state_doctype_system_identifier_single_quoted
4129                         return
4130                 if c is '' # EOF
4131                         parse_error()
4132                         tok_state = tok_state_data
4133                         tok_cur_tag.flag 'force-quirks', true
4134                         cur -= 1 # Reconsume
4135                         return tok_cur_tag
4136                 # Anything else
4137                 parse_error()
4138                 tok_cur_tag.flag 'force-quirks', true
4139                 tok_state = tok_state_bogus_doctype
4140                 return null
4141
4142         # 8.2.4.62 http://www.w3.org/TR/html5/syntax.html#after-doctype-system-keyword-state
4143         tok_state_after_doctype_system_keyword = ->
4144                 c = txt.charAt(cur++)
4145                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4146                         tok_state = tok_state_before_doctype_system_identifier
4147                         return
4148                 if c is '"'
4149                         parse_error()
4150                         tok_cur_tag.system_identifier = ''
4151                         tok_state = tok_state_doctype_system_identifier_double_quoted
4152                         return
4153                 if c is "'"
4154                         parse_error()
4155                         tok_cur_tag.system_identifier = ''
4156                         tok_state = tok_state_doctype_system_identifier_single_quoted
4157                         return
4158                 if c is '>'
4159                         parse_error()
4160                         tok_cur_tag.flag 'force-quirks', true
4161                         tok_state = tok_state_data
4162                         return tok_cur_tag
4163                 if c is '' # EOF
4164                         parse_error()
4165                         tok_state = tok_state_data
4166                         tok_cur_tag.flag 'force-quirks', true
4167                         cur -= 1 # Reconsume
4168                         return tok_cur_tag
4169                 # Anything else
4170                 parse_error()
4171                 tok_cur_tag.flag 'force-quirks', true
4172                 tok_state = tok_state_bogus_doctype
4173                 return null
4174
4175         # 8.2.4.63 http://www.w3.org/TR/html5/syntax.html#before-doctype-system-identifier-state
4176         tok_state_before_doctype_system_identifier = ->
4177                 c = txt.charAt(cur++)
4178                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4179                         return
4180                 if c is '"'
4181                         tok_cur_tag.system_identifier = ''
4182                         tok_state = tok_state_doctype_system_identifier_double_quoted
4183                         return
4184                 if c is "'"
4185                         tok_cur_tag.system_identifier = ''
4186                         tok_state = tok_state_doctype_system_identifier_single_quoted
4187                         return
4188                 if c is '>'
4189                         parse_error()
4190                         tok_cur_tag.flag 'force-quirks', true
4191                         tok_state = tok_state_data
4192                         return tok_cur_tag
4193                 if c is '' # EOF
4194                         parse_error()
4195                         tok_state = tok_state_data
4196                         tok_cur_tag.flag 'force-quirks', true
4197                         cur -= 1 # Reconsume
4198                         return tok_cur_tag
4199                 # Anything else
4200                 parse_error()
4201                 tok_cur_tag.flag 'force-quirks', true
4202                 tok_state = tok_state_bogus_doctype
4203                 return null
4204
4205         # 8.2.4.64 http://www.w3.org/TR/html5/syntax.html#doctype-system-identifier-(double-quoted)-state
4206         tok_state_doctype_system_identifier_double_quoted = ->
4207                 c = txt.charAt(cur++)
4208                 if c is '"'
4209                         tok_state = tok_state_after_doctype_system_identifier
4210                         return
4211                 if c is "\u0000"
4212                         parse_error()
4213                         tok_cur_tag.system_identifier += "\ufffd"
4214                         return
4215                 if c is '>'
4216                         parse_error()
4217                         tok_cur_tag.flag 'force-quirks', true
4218                         tok_state = tok_state_data
4219                         return tok_cur_tag
4220                 if c is '' # EOF
4221                         parse_error()
4222                         tok_state = tok_state_data
4223                         tok_cur_tag.flag 'force-quirks', true
4224                         cur -= 1 # Reconsume
4225                         return tok_cur_tag
4226                 # Anything else
4227                 tok_cur_tag.system_identifier += c
4228                 return null
4229
4230         # 8.2.4.65 http://www.w3.org/TR/html5/syntax.html#doctype-system-identifier-(single-quoted)-state
4231         tok_state_doctype_system_identifier_single_quoted = ->
4232                 c = txt.charAt(cur++)
4233                 if c is "'"
4234                         tok_state = tok_state_after_doctype_system_identifier
4235                         return
4236                 if c is "\u0000"
4237                         parse_error()
4238                         tok_cur_tag.system_identifier += "\ufffd"
4239                         return
4240                 if c is '>'
4241                         parse_error()
4242                         tok_cur_tag.flag 'force-quirks', true
4243                         tok_state = tok_state_data
4244                         return tok_cur_tag
4245                 if c is '' # EOF
4246                         parse_error()
4247                         tok_state = tok_state_data
4248                         tok_cur_tag.flag 'force-quirks', true
4249                         cur -= 1 # Reconsume
4250                         return tok_cur_tag
4251                 # Anything else
4252                 tok_cur_tag.system_identifier += c
4253                 return null
4254
4255         # 8.2.4.66 http://www.w3.org/TR/html5/syntax.html#after-doctype-system-identifier-state
4256         tok_state_after_doctype_system_identifier = ->
4257                 c = txt.charAt(cur++)
4258                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4259                         return
4260                 if c is '>'
4261                         tok_state = tok_state_data
4262                         return tok_cur_tag
4263                 if c is '' # EOF
4264                         parse_error()
4265                         tok_state = tok_state_data
4266                         tok_cur_tag.flag 'force-quirks', true
4267                         cur -= 1 # Reconsume
4268                         return tok_cur_tag
4269                 # Anything else
4270                 parse_error()
4271                 # do _not_ tok_cur_tag.flag 'force-quirks', true
4272                 tok_state = tok_state_bogus_doctype
4273                 return null
4274
4275         # 8.2.4.67 http://www.w3.org/TR/html5/syntax.html#bogus-doctype-state
4276         tok_state_bogus_doctype = ->
4277                 c = txt.charAt(cur++)
4278                 if c is '>'
4279                         tok_state = tok_state_data
4280                         return tok_cur_tag
4281                 if c is '' # EOF
4282                         tok_state = tok_state_data
4283                         cur -= 1 # Reconsume
4284                         return tok_cur_tag
4285                 # Anything else
4286                 return null
4287
4288         # 8.2.4.68 http://www.w3.org/TR/html5/syntax.html#cdata-section-state
4289         tok_state_cdata_section = ->
4290                 tok_state = tok_state_data
4291                 next_gt = txt.indexOf ']]>', cur
4292                 if next_gt is -1
4293                         val = txt.substr cur
4294                         cur = txt.length
4295                 else
4296                         val = txt.substr cur, (next_gt - cur)
4297                         cur = next_gt + 3
4298                 val = val.replace "\u0000", "\ufffd" # fixfull spec doesn't say this
4299                 return new_character_token val # fixfull split
4300
4301         # 8.2.4.69 http://www.w3.org/TR/html5/syntax.html#consume-a-character-reference
4302         # Don't set this as a state, just call it
4303         # returns a string (NOT a text node)
4304         parse_character_reference = (allowed_char = null, in_attr = false) ->
4305                 if cur >= txt.length
4306                         return '&'
4307                 switch c = txt.charAt(cur)
4308                         when "\t", "\n", "\u000c", ' ', '<', '&', '', allowed_char
4309                                 # explicitly not a parse error
4310                                 return '&'
4311                         when ';'
4312                                 # there has to be "one or more" alnums between & and ; to be a parse error
4313                                 return '&'
4314                         when '#'
4315                                 if cur + 1 >= txt.length
4316                                         return '&'
4317                                 if txt.charAt(cur + 1).toLowerCase() is 'x'
4318                                         prefix = '#x'
4319                                         charset = hex_chars
4320                                         start = cur + 2
4321                                 else
4322                                         charset = digits
4323                                         start = cur + 1
4324                                         prefix = '#'
4325                                 i = 0
4326                                 while start + i < txt.length and charset.indexOf(txt.charAt(start + i)) > -1
4327                                         i += 1
4328                                 if i is 0
4329                                         return '&'
4330                                 if txt.charAt(start + i) is ';'
4331                                         i += 1
4332                                 # FIXME This is supposed to generate parse errors for some chars
4333                                 decoded = decode_named_char_ref(prefix + txt.substr(start, i).toLowerCase())
4334                                 if decoded?
4335                                         cur = start + i
4336                                         return decoded
4337                                 return '&'
4338                         else
4339                                 for i in [0...31]
4340                                         if alnum.indexOf(txt.charAt(cur + i)) is -1
4341                                                 break
4342                                 if i is 0
4343                                         # exit early, because parse_error() below needs at least one alnum
4344                                         return '&'
4345                                 if txt.charAt(cur + i) is ';'
4346                                         i += 1 # include ';' terminator in value
4347                                         decoded = decode_named_char_ref txt.substr(cur, i)
4348                                         if decoded?
4349                                                 cur += i
4350                                                 return decoded
4351                                         parse_error()
4352                                         return '&'
4353                                 else
4354                                         # no ';' terminator (only legacy char refs)
4355                                         max = i
4356                                         for i in [2..max] # no prefix matches, so ok to check shortest first
4357                                                 c = legacy_char_refs[txt.substr(cur, i)]
4358                                                 if c?
4359                                                         if in_attr
4360                                                                 if txt.charAt(cur + i) is '='
4361                                                                         # "because some legacy user agents will
4362                                                                         # misinterpret the markup in those cases"
4363                                                                         parse_error()
4364                                                                         return '&'
4365                                                                 if alnum.indexOf(txt.charAt(cur + i)) > -1
4366                                                                         # this makes attributes forgiving about url args
4367                                                                         return '&'
4368                                                         # ok, and besides the weird exceptions for attributes...
4369                                                         # return the matching char
4370                                                         cur += i # consume entity chars
4371                                                         parse_error() # because no terminating ";"
4372                                                         return c
4373                                         parse_error()
4374                                         return '&'
4375                 return # never reached
4376
4377         # tree constructor initialization
4378         # see comments on TYPE_TAG/etc for the structure of this data
4379         doc = new Node TYPE_TAG, name: 'html', namespace: NS_HTML
4380         open_els = []
4381         afe = [] # active formatting elements
4382         template_ins_modes = []
4383         ins_mode = ins_mode_initial
4384         original_ins_mode = ins_mode # TODO check spec
4385         flag_scripting = true # TODO might need an extra flag to get <noscript> to parse correctly
4386         flag_frameset_ok = true
4387         flag_parsing = true
4388         flag_foster_parenting = false
4389         form_element_pointer = null
4390         temporary_buffer = null
4391         pending_table_character_tokens = []
4392         head_element_pointer = null
4393         flag_fragment_parsing = false # parser originally created as part of the html fragment parsing algorithm (fragment case)
4394         context_element = null # FIXME initialize from args.fragment http://www.w3.org/TR/html5/syntax.html#parsing-html-fragments
4395
4396         # tokenizer initialization
4397         tok_state = tok_state_data
4398
4399         # proccess input
4400         # http://www.w3.org/TR/html5/syntax.html#tree-construction
4401         while flag_parsing
4402                 t = tok_state()
4403                 if t?
4404                         process_token t
4405                         # fixfull parse error if has self-closing flag, but it wasn't acknolwedged
4406         return doc.children
4407
4408 serialize_els = (els, shallow, show_ids) ->
4409         serialized = ''
4410         sep = ''
4411         for t in els
4412                 serialized += sep
4413                 sep = ','
4414                 serialized += t.serialize shallow, show_ids
4415         return serialized
4416
4417 # TODO export TYPE_*
4418 module.exports.parse_html = parse_html
4419 module.exports.debug_log_reset = debug_log_reset
4420 module.exports.debug_log_each = debug_log_each
4421 module.exports.TYPE_TAG = TYPE_TAG
4422 module.exports.TYPE_TEXT = TYPE_TEXT
4423 module.exports.TYPE_COMMENT = TYPE_COMMENT
4424 module.exports.TYPE_DOCTYPE = TYPE_DOCTYPE
4425 module.exports.NS_HTML = NS_HTML
4426 module.exports.NS_MATHML = NS_MATHML
4427 module.exports.NS_SVG = NS_SVG