JasonWoof Got questions, comments, patches, etc.? Contact Jason Woofenden
namespace checking cleanup
[peach-html5-editor.git] / parse-html.coffee
1 # HTML parser meant to run in a browser, in support of WYSIWYG editor
2 # Copyright 2015 Jason Woofenden
3 #
4 # This program is free software: you can redistribute it and/or modify it under
5 # the terms of the GNU Affero General Public License as published by the Free
6 # Software Foundation, either version 3 of the License, or (at your option) any
7 # later version.
8 #
9 # This program is distributed in the hope that it will be useful, but WITHOUT
10 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11 # FOR A PARTICULAR PURPOSE.  See the GNU Affero General Public License for more
12 # details.
13 #
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
16
17
18 # This file implements a parser for html snippets, meant to be used by a
19 # WYSIWYG editor. Hence it does not attempt to parse doctypes, <html>, <head>
20 # or <body> tags, nor does it produce the top level "document" node in the dom
21 # tree, nor nodes for html, head or body. Comments containing "fixfull"
22 # indicate places where additional code is needed for full HTML document
23 # parsing.
24 #
25 # Instead, the data structure produced by this parser is an array of Nodes.
26
27
28 # stacks/lists
29 #
30 # the spec uses a many different words do indicate which ends of lists/stacks
31 # they are talking about (and relative movement within the lists/stacks). This
32 # section splains. I'm implementing "lists" (afe and open_els) the same way
33 # (both as stacks)
34 #
35 # stacks grow downward (current element is index=0)
36 #
37 # example: open_els = [a, b, c, d, e, f, g]
38 #
39 # "grows downwards" means it's visualized like this: (index: el, names)
40 #
41 #   6: g "start of the list", "topmost", "first"
42 #   5: f
43 #   4: e "previous" (to d), "above", "before"
44 #   3: d   (previous/next are relative to this element)
45 #   2: c "next", "after", "lower", "below"
46 #   1: b
47 #   0: a "end of the list", "current node", "bottommost", "last"
48
49
50 # browser
51 # note: to get this to run outside a browser, you'll have to write a native
52 # implementation of decode_named_char_ref()
53 unless module?.exports?
54         window.wheic = {}
55         module = exports: window.wheic
56
57 # Each node is an obect of the Node class. Here are the Node types:
58 TYPE_TAG = 0 # name, {attributes}, [children]
59 TYPE_TEXT = 1 # "text"
60 TYPE_COMMENT = 2
61 TYPE_DOCTYPE = 3
62 # the following types are emited by the tokenizer, but shouldn't end up in the tree:
63 TYPE_START_TAG = 4 # name, [attributes ([key,value]...) in reverse order], [children]
64 TYPE_END_TAG = 5 # name
65 TYPE_EOF = 6
66 TYPE_AFE_MARKER = 7 # http://www.w3.org/TR/html5/syntax.html#reconstruct-the-active-formatting-elements
67 TYPE_AAA_BOOKMARK = 8 # http://www.w3.org/TR/html5/syntax.html#adoption-agency-algorithm
68
69 # namespace constants
70 NS_HTML = 1
71 NS_MATHML = 2
72 NS_SVG = 3
73
74 g_debug_log = []
75 debug_log_reset = ->
76         g_debug_log = []
77 debug_log = (str) ->
78         g_debug_log.push str
79 debug_log_each = (cb) ->
80         for str in g_debug_log
81                 cb str
82
83 prev_node_id = 0
84 class Node
85         constructor: (type, args = {}) ->
86                 @type = type # one of the TYPE_* constants above
87                 @name = args.name ? '' # tag name
88                 @text = args.text ? '' # contents for text/comment nodes
89                 @attrs = args.attrs ? {}
90                 @attrs_a = args.attr_k ? [] # attrs in progress, TYPE_START_TAG only
91                 @children = args.children ? []
92                 @namespace = args.namespace ? NS_HTML
93                 @parent = args.parent ? null
94                 @token = args.token ? null
95                 @flags = args.flags ? {}
96                 if args.id?
97                         @id = "#{args.id}+"
98                 else
99                         @id = "#{++prev_node_id}"
100         acknowledge_self_closing: ->
101                 if @token?
102                         @token.flag 'did_self_close'
103                 else
104                         @flag 'did_self_close', true
105         flag: (key, value = null) ->
106                 if value?
107                         @flags[key] = value
108                 else
109                         return @flags[key]
110         serialize: (shallow = false, show_ids = false) -> # for unit tests
111                 ret = ''
112                 switch @type
113                         when TYPE_TAG
114                                 ret += 'tag:'
115                                 ret += JSON.stringify @name
116                                 ret += ','
117                                 if show_ids
118                                         ret += "##{@id},"
119                                 if shallow
120                                         break
121                                 attr_keys = []
122                                 for k of @attrs
123                                         attr_keys.push k
124                                 attr_keys.sort()
125                                 ret += '{'
126                                 sep = ''
127                                 for k in attr_keys
128                                         ret += sep
129                                         sep = ','
130                                         ret += "#{JSON.stringify k}:#{JSON.stringify @attrs[k]}"
131                                 ret += '},['
132                                 sep = ''
133                                 for c in @children
134                                         ret += sep
135                                         sep = ','
136                                         ret += c.serialize shallow, show_ids
137                                 ret += ']'
138                         when TYPE_TEXT
139                                 ret += 'text:'
140                                 ret += JSON.stringify @text
141                         when TYPE_COMMENT
142                                 ret += 'comment:'
143                                 ret += JSON.stringify @text
144                         when TYPE_DOCTYPE
145                                 ret += "doctype:#{@name},#{JSON.stringify(@public_identifier ? '')},#{JSON.stringify(@system_identifier ? '')}"
146                         when TYPE_AFE_MARKER
147                                 ret += 'marker'
148                         when TYPE_AAA_BOOKMARK
149                                 ret += 'aaa_bookmark'
150                         else
151                                 ret += 'unknown:'
152                                 console.log "unknown: #{JSON.stringify @}" # backtrace is just as well
153                 return ret
154
155 # helpers: (only take args that are normally known when parser creates nodes)
156 new_open_tag = (name) ->
157         return new Node TYPE_START_TAG, name: name
158 new_end_tag = (name) ->
159         return new Node TYPE_END_TAG, name: name
160 new_element = (name) ->
161         return new Node TYPE_TAG, name: name
162 new_text_node = (txt) ->
163         return new Node TYPE_TEXT, text: txt
164 new_character_token = new_text_node
165 new_comment_token = (txt) ->
166         return new Node TYPE_COMMENT, text: txt
167 new_doctype_token = (name) ->
168         return new Node TYPE_DOCTYPE, name: name
169 new_eof_token = ->
170         return new Node TYPE_EOF
171 new_afe_marker = ->
172         return new Node TYPE_AFE_MARKER
173 new_aaa_bookmark = ->
174         return new Node TYPE_AAA_BOOKMARK
175
176 lc_alpha = "abcdefghijklmnopqrstuvwxyz"
177 uc_alpha = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
178 digits = "0123456789"
179 alnum = lc_alpha + uc_alpha + digits
180 hex_chars = digits + "abcdefABCDEF"
181
182 is_uc_alpha = (str) ->
183         return str.length is 1 and uc_alpha.indexOf(str) > -1
184 is_lc_alpha = (str) ->
185         return str.length is 1 and lc_alpha.indexOf(str) > -1
186
187 # some SVG elements have dashes in them
188 tag_name_chars = alnum + "-"
189
190 # http://www.w3.org/TR/html5/infrastructure.html#space-character
191 space_chars = "\u0009\u000a\u000c\u000d\u0020"
192 is_space = (txt) ->
193         return txt.length is 1 and space_chars.indexOf(txt) > -1
194 is_space_tok = (t) ->
195         return t.type is TYPE_TEXT && t.text.length is 1 and space_chars.indexOf(t.text) > -1
196
197 is_input_hidden_tok = (t) ->
198         return false unless t.type is TYPE_START_TAG
199         for a in t.attrs_a
200                 if a[0] is 'type'
201                         if a[1].toLowerCase() is 'hidden'
202                                 return true
203                         return false
204         return false
205
206 # https://en.wikipedia.org/wiki/Whitespace_character#Unicode
207 whitespace_chars = "\u0009\u000a\u000b\u000c\u000d\u0020\u0085\u00a0\u1680\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200a\u2028\u2029\u202f\u205f\u3000"
208
209 # These are the character references that don't need a terminating semicolon
210 # min length: 2, max: 6, none are a prefix of any other.
211 legacy_char_refs = {
212         Aacute: 'Á', aacute: 'á', Acirc: 'Â', acirc: 'â', acute: '´', AElig: 'Æ',
213         aelig: 'æ', Agrave: 'À', agrave: 'à', AMP: '&', amp: '&', Aring: 'Å',
214         aring: 'å', Atilde: 'Ã', atilde: 'ã', Auml: 'Ä', auml: 'ä', brvbar: '¦',
215         Ccedil: 'Ç', ccedil: 'ç', cedil: '¸', cent: '¢', COPY: '©', copy: '©',
216         curren: '¤', deg: '°', divide: '÷', Eacute: 'É', eacute: 'é', Ecirc: 'Ê',
217         ecirc: 'ê', Egrave: 'È', egrave: 'è', ETH: 'Ð', eth: 'ð', Euml: 'Ë',
218         euml: 'ë', frac12: '½', frac14: '¼', frac34: '¾', GT: '>', gt: '>',
219         Iacute: 'Í', iacute: 'í', Icirc: 'Î', icirc: 'î', iexcl: '¡', Igrave: 'Ì',
220         igrave: 'ì', iquest: '¿', Iuml: 'Ï', iuml: 'ï', laquo: '«', LT: '<',
221         lt: '<', macr: '¯', micro: 'µ', middot: '·', nbsp: "\u00a0", not: '¬',
222         Ntilde: 'Ñ', ntilde: 'ñ', Oacute: 'Ó', oacute: 'ó', Ocirc: 'Ô', ocirc: 'ô',
223         Ograve: 'Ò', ograve: 'ò', ordf: 'ª', ordm: 'º', Oslash: 'Ø', oslash: 'ø',
224         Otilde: 'Õ', otilde: 'õ', Ouml: 'Ö', ouml: 'ö', para: '¶', plusmn: '±',
225         pound: '£', QUOT: '"', quot: '"', raquo: '»', REG: '®', reg: '®', sect: '§',
226         shy: '­', sup1: '¹', sup2: '²', sup3: '³', szlig: 'ß', THORN: 'Þ', thorn: 'þ',
227         times: '×', Uacute: 'Ú', uacute: 'ú', Ucirc: 'Û', ucirc: 'û', Ugrave: 'Ù',
228         ugrave: 'ù', uml: '¨', Uuml: 'Ü', uuml: 'ü', Yacute: 'Ý', yacute: 'ý',
229         yen: '¥', yuml: 'ÿ'
230 }
231
232 void_elements = ['area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input', 'keygen', 'link', 'meta', 'param', 'source', 'track', 'wbr']
233 raw_text_elements = ['script', 'style']
234 escapable_raw_text_elements = ['textarea', 'title']
235 # http://www.w3.org/TR/SVG/ 1.1 (Second Edition)
236 svg_elements = [
237         'a', 'altGlyph', 'altGlyphDef', 'altGlyphItem', 'animate', 'animateColor',
238         'animateMotion', 'animateTransform', 'circle', 'clipPath', 'color-profile',
239         'cursor', 'defs', 'desc', 'ellipse', 'feBlend', 'feColorMatrix',
240         'feComponentTransfer', 'feComposite', 'feConvolveMatrix',
241         'feDiffuseLighting', 'feDisplacementMap', 'feDistantLight', 'feFlood',
242         'feFuncA', 'feFuncB', 'feFuncG', 'feFuncR', 'feGaussianBlur', 'feImage',
243         'feMerge', 'feMergeNode', 'feMorphology', 'feOffset', 'fePointLight',
244         'feSpecularLighting', 'feSpotLight', 'feTile', 'feTurbulence', 'filter',
245         'font', 'font-face', 'font-face-format', 'font-face-name', 'font-face-src',
246         'font-face-uri', 'foreignObject', 'g', 'glyph', 'glyphRef', 'hkern',
247         'image', 'line', 'linearGradient', 'marker', 'mask', 'metadata',
248         'missing-glyph', 'mpath', 'path', 'pattern', 'polygon', 'polyline',
249         'radialGradient', 'rect', 'script', 'set', 'stop', 'style', 'svg',
250         'switch', 'symbol', 'text', 'textPath', 'title', 'tref', 'tspan', 'use',
251         'view', 'vkern'
252 ]
253
254 # http://www.w3.org/TR/MathML/ Version 3.0 2nd Edition
255 mathml_elements = [
256         'abs', 'and', 'annotation', 'annotation-xml', 'apply', 'approx', 'arccos',
257         'arccosh', 'arccot', 'arccoth', 'arccsc', 'arccsch', 'arcsec', 'arcsech',
258         'arcsin', 'arcsinh', 'arctan', 'arctanh', 'arg', 'bind', 'bvar', 'card',
259         'cartesianproduct', 'cbytes', 'ceiling', 'cerror', 'ci', 'cn', 'codomain',
260         'complexes', 'compose', 'condition', 'conjugate', 'cos', 'cosh', 'cot',
261         'coth', 'cs', 'csc', 'csch', 'csymbol', 'curl', 'declare', 'degree',
262         'determinant', 'diff', 'divergence', 'divide', 'domain',
263         'domainofapplication', 'emptyset', 'eq', 'equivalent', 'eulergamma',
264         'exists', 'exp', 'exponentiale', 'factorial', 'factorof', 'false', 'floor',
265         'fn', 'forall', 'gcd', 'geq', 'grad', 'gt', 'ident', 'image', 'imaginary',
266         'imaginaryi', 'implies', 'in', 'infinity', 'int', 'integers', 'intersect',
267         'interval', 'inverse', 'lambda', 'laplacian', 'lcm', 'leq', 'limit',
268         'list', 'ln', 'log', 'logbase', 'lowlimit', 'lt', 'maction', 'maligngroup',
269         'malignmark', 'math', 'matrix', 'matrixrow', 'max', 'mean', 'median',
270         'menclose', 'merror', 'mfenced', 'mfrac', 'mglyph', 'mi', 'mi', 'min',
271         'minus', 'mlabeledtr', 'mlongdiv', 'mmultiscripts', 'mn', 'mo', 'mode',
272         'moment', 'momentabout', 'mover', 'mpadded', 'mphantom', 'mprescripts',
273         'mroot', 'mrow', 'ms', 'mscarries', 'mscarry', 'msgroup', 'msline',
274         'mspace', 'msqrt', 'msrow', 'mstack', 'mstyle', 'msub', 'msubsup', 'msup',
275         'mtable', 'mtd', 'mtext', 'mtr', 'munder', 'munderover', 'naturalnumbers',
276         'neq', 'none', 'not', 'notanumber', 'notin', 'notprsubset', 'notsubset',
277         'or', 'otherwise', 'outerproduct', 'partialdiff', 'pi', 'piece',
278         'piecewise', 'plus', 'power', 'primes', 'product', 'prsubset', 'quotient',
279         'rationals', 'real', 'reals', 'reln', 'rem', 'root', 'scalarproduct',
280         'sdev', 'sec', 'sech', 'selector', 'semantics', 'sep', 'set', 'setdiff',
281         'share', 'sin', 'sinh', 'span', 'subset', 'sum', 'tan', 'tanh', 'tendsto',
282         'times', 'transpose', 'true', 'union', 'uplimit', 'variance', 'vector',
283         'vectorproduct', 'xor'
284 ]
285 # foreign_elements = [svg_elements..., mathml_elements...]
286 #normal_elements = All other allowed HTML elements are normal elements.
287
288 special_elements = {
289         # HTML:
290         address:NS_HTML, applet:NS_HTML, area:NS_HTML, article:NS_HTML,
291         aside:NS_HTML, base:NS_HTML, basefont:NS_HTML, bgsound:NS_HTML,
292         blockquote:NS_HTML, body:NS_HTML, br:NS_HTML, button:NS_HTML,
293         caption:NS_HTML, center:NS_HTML, col:NS_HTML, colgroup:NS_HTML, dd:NS_HTML,
294         details:NS_HTML, dir:NS_HTML, div:NS_HTML, dl:NS_HTML, dt:NS_HTML,
295         embed:NS_HTML, fieldset:NS_HTML, figcaption:NS_HTML, figure:NS_HTML,
296         footer:NS_HTML, form:NS_HTML, frame:NS_HTML, frameset:NS_HTML, h1:NS_HTML,
297         h2:NS_HTML, h3:NS_HTML, h4:NS_HTML, h5:NS_HTML, h6:NS_HTML, head:NS_HTML,
298         header:NS_HTML, hgroup:NS_HTML, hr:NS_HTML, html:NS_HTML, iframe:NS_HTML,
299         img:NS_HTML, input:NS_HTML, isindex:NS_HTML, li:NS_HTML, link:NS_HTML,
300         listing:NS_HTML, main:NS_HTML, marquee:NS_HTML, meta:NS_HTML, nav:NS_HTML,
301         noembed:NS_HTML, noframes:NS_HTML, noscript:NS_HTML, object:NS_HTML,
302         ol:NS_HTML, p:NS_HTML, param:NS_HTML, plaintext:NS_HTML, pre:NS_HTML,
303         script:NS_HTML, section:NS_HTML, select:NS_HTML, source:NS_HTML,
304         style:NS_HTML, summary:NS_HTML, table:NS_HTML, tbody:NS_HTML, td:NS_HTML,
305         template:NS_HTML, textarea:NS_HTML, tfoot:NS_HTML, th:NS_HTML,
306         thead:NS_HTML, title:NS_HTML, tr:NS_HTML, track:NS_HTML, ul:NS_HTML,
307         wbr:NS_HTML, xmp:NS_HTML,
308
309         # MathML:
310         mi:NS_MATHML, mo:NS_MATHML, mn:NS_MATHML, ms:NS_MATHML, mtext:NS_MATHML,
311         'annotation-xml':NS_MATHML,
312
313         # SVG:
314         foreignObject:NS_SVG, desc:NS_SVG, title:NS_SVG
315 }
316
317 formatting_elements = {
318          a: true, b: true, big: true, code: true, em: true, font: true, i: true,
319          nobr: true, s: true, small: true, strike: true, strong: true, tt: true,
320          u: true
321 }
322
323 mathml_text_integration = {
324         mi: NS_MATHML, mo: NS_MATHML, mn: NS_MATHML, ms: NS_MATHML, mtext: NS_MATHML
325 }
326 is_mathml_text_integration_point = (el) ->
327         return mathml_text_integration[el.name] is el.namespace
328 is_html_integration = (el) -> # DON'T PASS A TOKEN
329         if el.namespace is NS_MATHML
330                 if el.name is 'annotation-xml'
331                         if el.attrs.encoding?
332                                 if el.attrs.encoding.toLowerCase() is 'text/html'
333                                         return true
334                                 if el.attrs.encoding.toLowerCase() is 'application/xhtml+xml'
335                                         return true
336                 return false
337         if el.namespace is NS_SVG
338                 if el.name is 'foreignObject' or el.name is 'desc' or el.name is 'title'
339                         return true
340         return false
341
342 h_tags = {
343         h1:NS_HTML, h2:NS_HTML, h3:NS_HTML, h4:NS_HTML, h5:NS_HTML, h6:NS_HTML
344 }
345
346 foster_parenting_targets = {
347         table: NS_HTML
348         tbody: NS_HTML
349         tfoot: NS_HTML
350         thead: NS_HTML
351         tr: NS_HTML
352 }
353
354 end_tag_implied = {
355         dd: NS_HTML
356         dt: NS_HTML
357         li: NS_HTML
358         option: NS_HTML
359         optgroup: NS_HTML
360         p: NS_HTML
361         rb: NS_HTML
362         rp: NS_HTML
363         rt: NS_HTML
364         rtc: NS_HTML
365 }
366
367 el_is_special = (e) ->
368         return special_elements[e.name] is e.namespace
369
370 adp_els = { address: NS_HTML, div: NS_HTML, p: NS_HTML }
371 el_is_special_not_adp = (el) ->
372         return special_elements[el.name] is el.namespace and adp_els[el.name] isnt el.namespace
373
374 svg_name_fixes = {
375         altglyph: 'altGlyph'
376         altglyphdef: 'altGlyphDef'
377         altglyphitem: 'altGlyphItem'
378         animatecolor: 'animateColor'
379         animatemotion: 'animateMotion'
380         animatetransform: 'animateTransform'
381         clippath: 'clipPath'
382         feblend: 'feBlend'
383         fecolormatrix: 'feColorMatrix'
384         fecomponenttransfer: 'feComponentTransfer'
385         fecomposite: 'feComposite'
386         feconvolvematrix: 'feConvolveMatrix'
387         fediffuselighting: 'feDiffuseLighting'
388         fedisplacementmap: 'feDisplacementMap'
389         fedistantlight: 'feDistantLight'
390         fedropshadow: 'feDropShadow'
391         feflood: 'feFlood'
392         fefunca: 'feFuncA'
393         fefuncb: 'feFuncB'
394         fefuncg: 'feFuncG'
395         fefuncr: 'feFuncR'
396         fegaussianblur: 'feGaussianBlur'
397         feimage: 'feImage'
398         femerge: 'feMerge'
399         femergenode: 'feMergeNode'
400         femorphology: 'feMorphology'
401         feoffset: 'feOffset'
402         fepointlight: 'fePointLight'
403         fespecularlighting: 'feSpecularLighting'
404         fespotlight: 'feSpotLight'
405         fetile: 'feTile'
406         feturbulence: 'feTurbulence'
407         foreignobject: 'foreignObject'
408         glyphref: 'glyphRef'
409         lineargradient: 'linearGradient'
410         radialgradient: 'radialGradient'
411         textpath: 'textPath'
412 }
413 svg_attribute_fixes = {
414         attributename: 'attributeName'
415         attributetype: 'attributeType'
416         basefrequency: 'baseFrequency'
417         baseprofile: 'baseProfile'
418         calcmode: 'calcMode'
419         clippathunits: 'clipPathUnits'
420         contentscripttype: 'contentScriptType'
421         contentstyletype: 'contentStyleType'
422         diffuseconstant: 'diffuseConstant'
423         edgemode: 'edgeMode'
424         externalresourcesrequired: 'externalResourcesRequired'
425         filterres: 'filterRes'
426         filterunits: 'filterUnits'
427         glyphref: 'glyphRef'
428         gradienttransform: 'gradientTransform'
429         gradientunits: 'gradientUnits'
430         kernelmatrix: 'kernelMatrix'
431         kernelunitlength: 'kernelUnitLength'
432         keypoints: 'keyPoints'
433         keysplines: 'keySplines'
434         keytimes: 'keyTimes'
435         lengthadjust: 'lengthAdjust'
436         limitingconeangle: 'limitingConeAngle'
437         markerheight: 'markerHeight'
438         markerunits: 'markerUnits'
439         markerwidth: 'markerWidth'
440         maskcontentunits: 'maskContentUnits'
441         maskunits: 'maskUnits'
442         numoctaves: 'numOctaves'
443         pathlength: 'pathLength'
444         patterncontentunits: 'patternContentUnits'
445         patterntransform: 'patternTransform'
446         patternunits: 'patternUnits'
447         pointsatx: 'pointsAtX'
448         pointsaty: 'pointsAtY'
449         pointsatz: 'pointsAtZ'
450         preservealpha: 'preserveAlpha'
451         preserveaspectratio: 'preserveAspectRatio'
452         primitiveunits: 'primitiveUnits'
453         refx: 'refX'
454         refy: 'refY'
455         repeatcount: 'repeatCount'
456         repeatdur: 'repeatDur'
457         requiredextensions: 'requiredExtensions'
458         requiredfeatures: 'requiredFeatures'
459         specularconstant: 'specularConstant'
460         specularexponent: 'specularExponent'
461         spreadmethod: 'spreadMethod'
462         startoffset: 'startOffset'
463         stddeviation: 'stdDeviation'
464         stitchtiles: 'stitchTiles'
465         surfacescale: 'surfaceScale'
466         systemlanguage: 'systemLanguage'
467         tablevalues: 'tableValues'
468         targetx: 'targetX'
469         targety: 'targetY'
470         textlength: 'textLength'
471         viewbox: 'viewBox'
472         viewtarget: 'viewTarget'
473         xchannelselector: 'xChannelSelector'
474         ychannelselector: 'yChannelSelector'
475         zoomandpan: 'zoomAndPan'
476 }
477 adjust_mathml_attributes = (t) ->
478         for a in t.attrs_a
479                 if a[0] is 'definitionurl'
480                         a[0] = 'definitionURL'
481         return
482 adjust_svg_attributes = (t) ->
483         for a in t.attrs_a
484                 if svg_attribute_fixes[a[0]]?
485                         a[0] = svg_attribute_fixes[a[0]]
486         return
487 adjust_foreign_attributes = (t) ->
488         # fixfull
489         return
490
491 # decode_named_char_ref()
492 #
493 # The list of named character references is _huge_ so ask the browser to decode
494 # for us instead of wasting bandwidth/space on including the table here.
495 #
496 # Pass without the "&" but with the ";" examples:
497 #    for "&amp" pass "amp;"
498 #    for "&#x2032" pass "x2032;"
499 g_dncr = {
500         cache: {}
501         textarea: document.createElement('textarea')
502 }
503 # TODO test this in IE8
504 decode_named_char_ref = (txt) ->
505         txt = "&#{txt}"
506         decoded = g_dncr.cache[txt]
507         return decoded if decoded?
508         g_dncr.textarea.innerHTML = txt
509         decoded = g_dncr.textarea.value
510         return null if decoded is txt
511         return g_dncr.cache[txt] = decoded
512
513 parse_html = (args) ->
514         txt = null
515         cur = null # index of next char in txt to be parsed
516         # declare doc and tokenizer variables so they're in scope below
517         doc = null
518         open_els = null # stack of open elements
519         afe = null # active formatting elements
520         template_ins_modes = null
521         ins_mode = null
522         original_ins_mode = null
523         tok_state = null
524         tok_cur_tag = null # partially parsed tag
525         flag_scripting = null
526         flag_frameset_ok = null
527         flag_parsing = null
528         flag_foster_parenting = null
529         form_element_pointer = null
530         temporary_buffer = null
531         pending_table_character_tokens = null
532         head_element_pointer = null
533         flag_fragment_parsing = null
534         context_element = null
535
536         stop_parsing = ->
537                 flag_parsing = false
538
539         parse_error = ->
540                 if args.error_cb?
541                         args.error_cb cur
542                 else
543                         console.log "Parse error at character #{cur} of #{txt.length}"
544
545         afe_push = (new_el) ->
546                 matches = 0
547                 for el, i in afe
548                         if el.name is new_el.name and el.namespace is new_el.namespace
549                                 for k, v of el.attrs
550                                         continue unless new_el.attrs[k] is v
551                                 for k, v of new_el.attrs
552                                         continue unless el.attrs[k] is v
553                                 matches += 1
554                                 if matches is 3
555                                         afe.splice i, 1
556                                         break
557                 afe.unshift new_el
558         afe_push_marker = ->
559                 afe.unshift new_afe_marker()
560
561         # the functions below impliment the Tree Contstruction algorithm
562         # http://www.w3.org/TR/html5/syntax.html#tree-construction
563
564         # But first... the helpers
565         template_tag_is_open = ->
566                 for t in open_els
567                         if t.name is 'template' and t.namespace is NS_HTML
568                                 return true
569                 return false
570         is_in_scope_x = (tag_name, scope, namespace) ->
571                 for t in open_els
572                         if t.name is tag_name and (namespace is null or namespace is t.namespace)
573                                 return true
574                         if scope[t.name] is t.namespace
575                                 return false
576                 return false
577         is_in_scope_x_y = (tag_name, scope, scope2, namespace) ->
578                 for t in open_els
579                         if t.name is tag_name and (namespace is null or namespace is t.namespace)
580                                 return true
581                         if scope[t.name] is t.namespace
582                                 return false
583                         if scope2[t.name] is t.namespace
584                                 return false
585                 return false
586         standard_scopers = {
587                 applet: NS_HTML, caption: NS_HTML, html: NS_HTML, table: NS_HTML,
588                 td: NS_HTML, th: NS_HTML, marquee: NS_HTML, object: NS_HTML,
589                 template: NS_HTML, mi: NS_MATHML,
590
591                 mo: NS_MATHML, mn: NS_MATHML, ms: NS_MATHML, mtext: NS_MATHML,
592                 'annotation-xml': NS_MATHML,
593
594                 foreignObject: NS_SVG, desc: NS_SVG, title: NS_SVG
595         }
596         button_scopers = button: NS_HTML
597         li_scopers = ol: NS_HTML, ul: NS_HTML
598         table_scopers = html: NS_HTML, table: NS_HTML, template: NS_HTML
599         is_in_scope = (tag_name, namespace = null) ->
600                 return is_in_scope_x tag_name, standard_scopers, namespace
601         is_in_button_scope = (tag_name, namespace = null) ->
602                 return is_in_scope_x_y tag_name, standard_scopers, button_scopers, namespace
603         is_in_table_scope = (tag_name, namespace = null) ->
604                 return is_in_scope_x tag_name, table_scopers, namespace
605         # aka is_in_list_item_scope
606         is_in_li_scope = (tag_name, namespace = null) ->
607                 return is_in_scope_x_y tag_name, standard_scopers, li_scopers, namespace
608         is_in_select_scope = (tag_name, namespace = null) ->
609                 for t in open_els
610                         if t.name is tag_name and (namespace is null or namespace is t.namespace)
611                                 return true
612                         if t.ns isnt NS_HTML and t.name isnt 'optgroup' and t.name isnt 'option'
613                                 return false
614                 return false
615         # this checks for a particular element, not by name
616         # this requires a namespace match
617         el_is_in_scope = (needle) ->
618                 for el in open_els
619                         if el is needle
620                                 return true
621                         if standard_scopers[el.name] is el.namespace
622                                 return false
623                 return false
624
625         clear_to_table_stopers = {
626                 'table': true
627                 'template': true
628                 'html': true
629         }
630         clear_stack_to_table_context = ->
631                 loop
632                         if clear_to_table_stopers[open_els[0].name]?
633                                 break
634                         open_els.shift()
635                 return
636         clear_to_table_body_stopers = {
637                 tbody: NS_HTML
638                 tfoot: NS_HTML
639                 thead: NS_HTML
640                 template: NS_HTML
641                 html: NS_HTML
642         }
643         clear_stack_to_table_body_context = ->
644                 loop
645                         if clear_to_table_body_stopers[open_els[0].name] is open_els[0].namespace
646                                 break
647                         open_els.shift()
648                 return
649         clear_to_table_row_stopers = {
650                 'tr': true
651                 'template': true
652                 'html': true
653         }
654         clear_stack_to_table_row_context = ->
655                 loop
656                         if clear_to_table_row_stopers[open_els[0].name]?
657                                 break
658                         open_els.shift()
659                 return
660         clear_afe_to_marker = ->
661                 loop
662                         return unless afe.length > 0 # this happens in fragment case, ?spec error
663                         el = afe.shift()
664                         if el.type is TYPE_AFE_MARKER
665                                 return
666                 return
667
668         # 8.2.3.1 ...
669         # http://www.w3.org/TR/html5/syntax.html#reset-the-insertion-mode-appropriately
670         reset_ins_mode = ->
671                 # 1. Let last be false.
672                 last = false
673                 # 2. Let node be the last node in the stack of open elements.
674                 node_i = 0
675                 node = open_els[node_i]
676                 # 3. Loop: If node is the first node in the stack of open elements,
677                 # then set last to true, and, if the parser was originally created as
678                 # part of the HTML fragment parsing algorithm (fragment case) set node
679                 # to the context element.
680                 loop
681                         if node_i is open_els.length - 1
682                                 last = true
683                                 # fixfull (fragment case)
684
685                         # 4. If node is a select element, run these substeps:
686                         if node.name is 'select'
687                                 # 1. If last is true, jump to the step below labeled done.
688                                 unless last
689                                         # 2. Let ancestor be node.
690                                         ancestor_i = node_i
691                                         ancestor = node
692                                         # 3. Loop: If ancestor is the first node in the stack of
693                                         # open elements, jump to the step below labeled done.
694                                         loop
695                                                 if ancestor_i is open_els.length - 1
696                                                         break
697                                                 # 4. Let ancestor be the node before ancestor in the stack
698                                                 # of open elements.
699                                                 ancestor_i += 1
700                                                 ancestor = open_els[ancestor_i]
701                                                 # 5. If ancestor is a template node, jump to the step below
702                                                 # labeled done.
703                                                 if ancestor.name is 'template'
704                                                         break
705                                                 # 6. If ancestor is a table node, switch the insertion mode
706                                                 # to "in select in table" and abort these steps.
707                                                 if ancestor.name is 'table'
708                                                         ins_mode = ins_mode_in_select_in_table
709                                                         return
710                                                 # 7. Jump back to the step labeled loop.
711                                 # 8. Done: Switch the insertion mode to "in select" and abort
712                                 # these steps.
713                                 ins_mode = ins_mode_in_select
714                                 return
715                         # 5. If node is a td or th element and last is false, then switch
716                         # the insertion mode to "in cell" and abort these steps.
717                         if (node.name is 'td' or node.name is 'th') and last is false
718                                 ins_mode = ins_mode_in_cell
719                                 return
720                         # 6. If node is a tr element, then switch the insertion mode to "in
721                         # row" and abort these steps.
722                         if node.name is 'tr'
723                                 ins_mode = ins_mode_in_row
724                                 return
725                         # 7. If node is a tbody, thead, or tfoot element, then switch the
726                         # insertion mode to "in table body" and abort these steps.
727                         if node.name is 'tbody' or node.name is 'thead' or node.name is 'tfoot'
728                                 ins_mode = ins_mode_in_table_body
729                                 return
730                         # 8. If node is a caption element, then switch the insertion mode
731                         # to "in caption" and abort these steps.
732                         if node.name is 'caption'
733                                 ins_mode = ins_mode_in_caption
734                                 return
735                         # 9. If node is a colgroup element, then switch the insertion mode
736                         # to "in column group" and abort these steps.
737                         if node.name is 'colgroup'
738                                 ins_mode = ins_mode_in_column_group
739                                 return
740                         # 10. If node is a table element, then switch the insertion mode to
741                         # "in table" and abort these steps.
742                         if node.name is 'table'
743                                 ins_mode = ins_mode_in_table
744                                 return
745                         # 11. If node is a template element, then switch the insertion mode
746                         # to the current template insertion mode and abort these steps.
747                         # fixfull (template insertion mode stack)
748
749                         # 12. If node is a head element and last is true, then switch the
750                         # insertion mode to "in body" ("in body"! not "in head"!) and abort
751                         # these steps. (fragment case)
752                         if node.name is 'head' and last
753                                 ins_mode = ins_mode_in_body
754                                 return
755                         # 13. If node is a head element and last is false, then switch the
756                         # insertion mode to "in head" and abort these steps.
757                         if node.name is 'head' and last is false
758                                 ins_mode = ins_mode_in_head
759                                 return
760                         # 14. If node is a body element, then switch the insertion mode to
761                         # "in body" and abort these steps.
762                         if node.name is 'body'
763                                 ins_mode = ins_mode_in_body
764                                 return
765                         # 15. If node is a frameset element, then switch the insertion mode
766                         # to "in frameset" and abort these steps. (fragment case)
767                         if node.name is 'frameset'
768                                 ins_mode = ins_mode_in_frameset
769                                 return
770                         # 16. If node is an html element, run these substeps:
771                         if node.name is 'html'
772                                 # 1. If the head element pointer is null, switch the insertion
773                                 # mode to "before head" and abort these steps. (fragment case)
774                                 if head_element_pointer is null
775                                         ins_mode = ins_mode_before_head
776                                 else
777                                         # 2. Otherwise, the head element pointer is not null,
778                                         # switch the insertion mode to "after head" and abort these
779                                         # steps.
780                                         ins_mode = ins_mode_after_head
781                                 return
782                         # 17. If last is true, then switch the insertion mode to "in body"
783                         # and abort these steps. (fragment case)
784                         if last
785                                 ins_mode = ins_mode_in_body
786                                 return
787                         # 18. Let node now be the node before node in the stack of open
788                         # elements.
789                         node_i += 1
790                         node = open_els[node_i]
791                         # 19. Return to the step labeled loop.
792
793         # 8.2.3.2
794
795         # http://www.w3.org/TR/html5/syntax.html#adjusted-current-node
796         adjusted_current_node = ->
797                 if open_els.length is 1 and flag_fragment_parsing
798                         return context_element
799                 return open_els[0]
800
801         # http://www.w3.org/TR/html5/syntax.html#reconstruct-the-active-formatting-elements
802         # this implementation is structured (mostly) as described at the link above.
803         # capitalized comments are the "labels" described at the link above.
804         reconstruct_afe = ->
805                 return if afe.length is 0
806                 if afe[0].type is TYPE_AFE_MARKER or afe[0] in open_els
807                         return
808                 # Rewind
809                 i = 0
810                 loop
811                         if i is afe.length - 1
812                                 break
813                         i += 1
814                         if afe[i].type is TYPE_AFE_MARKER or afe[i] in open_els
815                                 i -= 1 # Advance
816                                 break
817                 # Create
818                 loop
819                         el = insert_html_element afe[i].token
820                         afe[i] = el
821                         break if i is 0
822                         i -= 1 # Advance
823
824         # http://www.w3.org/TR/html5/syntax.html#adoption-agency-algorithm
825         # adoption agency algorithm
826         # overview here:
827         #   http://www.w3.org/TR/html5/syntax.html#misnested-tags:-b-i-/b-/i
828         #   http://www.w3.org/TR/html5/syntax.html#misnested-tags:-b-p-/b-/p
829         #   http://www.w3.org/TR/html5/syntax.html#unclosed-formatting-elements
830         adoption_agency = (subject) ->
831                 debug_log "adoption_agency()"
832                 debug_log "tree: #{serialize_els doc.children, false, true}"
833                 debug_log "open_els: #{serialize_els open_els, true, true}"
834                 debug_log "afe: #{serialize_els afe, true, true}"
835                 if open_els[0].name is subject and open_els[0].namespace is NS_HTML
836                         el = open_els[0]
837                         open_els.shift()
838                         # remove it from the list of active formatting elements (if found)
839                         for t, i in afe
840                                 if t is el
841                                         afe.splice i, 1
842                                         break
843                         debug_log "aaa: starting off with subject on top of stack, exiting"
844                         return
845                 outer = 0
846                 loop
847                         if outer >= 8
848                                 return
849                         outer += 1
850                         # 5. Let formatting element be the last element in the list of
851                         # active formatting elements that: is between the end of the list
852                         # and the last scope marker in the list, if any, or the start of
853                         # the list otherwise, and  has the tag name subject.
854                         fe = null
855                         for t, fe_of_afe in afe
856                                 if t.type is TYPE_AFE_MARKER
857                                         break
858                                 if t.name is subject
859                                         fe = t
860                                         break
861                         # If there is no such element, then abort these steps and instead
862                         # act as described in the "any other end tag" entry above.
863                         if fe is null
864                                 debug_log "aaa: fe not found in afe"
865                                 in_body_any_other_end_tag subject
866                                 return
867                         # 6. If formatting element is not in the stack of open elements,
868                         # then this is a parse error; remove the element from the list, and
869                         # abort these steps.
870                         in_open_els = false
871                         for t, fe_of_open_els in open_els
872                                 if t is fe
873                                         in_open_els = true
874                                         break
875                         unless in_open_els
876                                 debug_log "aaa: fe not found in open_els"
877                                 parse_error()
878                                 # "remove it from the list" must mean afe, since it's not in open_els
879                                 afe.splice fe_of_afe, 1
880                                 return
881                         # 7. If formatting element is in the stack of open elements, but
882                         # the element is not in scope, then this is a parse error; abort
883                         # these steps.
884                         unless el_is_in_scope fe
885                                 debug_log "aaa: fe not in scope"
886                                 parse_error()
887                                 return
888                         # 8. If formatting element is not the current node, this is a parse
889                         # error. (But do not abort these steps.)
890                         unless open_els[0] is fe
891                                 parse_error()
892                                 # continue
893                         # 9. Let furthest block be the topmost node in the stack of open
894                         # elements that is lower in the stack than formatting element, and
895                         # is an element in the special category. There might not be one.
896                         fb = null
897                         fb_of_open_els = null
898                         for t, i in open_els
899                                 if t is fe
900                                         break
901                                 if el_is_special t
902                                         fb = t
903                                         fb_of_open_els = i
904                                         # and continue, to see if there's one that's more "topmost"
905                         # 10. If there is no furthest block, then the UA must first pop all
906                         # the nodes from the bottom of the stack of open elements, from the
907                         # current node up to and including formatting element, then remove
908                         # formatting element from the list of active formatting elements,
909                         # and finally abort these steps.
910                         if fb is null
911                                 debug_log "aaa: no fb"
912                                 loop
913                                         t = open_els.shift()
914                                         if t is fe
915                                                 afe.splice fe_of_afe, 1
916                                                 return
917                         # 11. Let common ancestor be the element immediately above
918                         # formatting element in the stack of open elements.
919                         ca = open_els[fe_of_open_els + 1] # common ancestor
920
921                         node_above = open_els[fb_of_open_els + 1] # next node if node isn't in open_els anymore
922                         # 12. Let a bookmark note the position of formatting element in the list of active formatting elements relative to the elements on either side of it in the list.
923                         bookmark = new_aaa_bookmark()
924                         for t, i in afe
925                                 if t is fe
926                                         afe.splice i, 0, bookmark
927                                         break
928                         node = last_node = fb
929                         inner = 0
930                         loop
931                                 inner += 1
932                                 # 3. Let node be the element immediately above node in the
933                                 # stack of open elements, or if node is no longer in the stack
934                                 # of open elements (e.g. because it got removed by this
935                                 # algorithm), the element that was immediately above node in
936                                 # the stack of open elements before node was removed.
937                                 node_next = null
938                                 for t, i in open_els
939                                         if t is node
940                                                 node_next = open_els[i + 1]
941                                                 break
942                                 node = node_next ? node_above
943                                 debug_log "inner loop #{inner}"
944                                 debug_log "tree: #{serialize_els doc.children, false, true}"
945                                 debug_log "open_els: #{serialize_els open_els, true, true}"
946                                 debug_log "afe: #{serialize_els afe, true, true}"
947                                 debug_log "ca: #{ca.name}##{ca.id} children: #{serialize_els ca.children, true, true}"
948                                 debug_log "fe: #{fe.name}##{fe.id} children: #{serialize_els fe.children, true, true}"
949                                 debug_log "fb: #{fb.name}##{fb.id} children: #{serialize_els fb.children, true, true}"
950                                 debug_log "node: #{node.serialize true, true}"
951                                 # TODO make sure node_above gets re-set if/when node is removed from open_els
952
953                                 # 4. If node is formatting element, then go to the next step in
954                                 # the overall algorithm.
955                                 if node is fe
956                                         break
957                                 debug_log "the meat"
958                                 # 5. If inner loop counter is greater than three and node is in
959                                 # the list of active formatting elements, then remove node from
960                                 # the list of active formatting elements.
961                                 node_in_afe = false
962                                 for t, i in afe
963                                         if t is node
964                                                 if inner > 3
965                                                         afe.splice i, 1
966                                                         debug_log "max out inner"
967                                                 else
968                                                         node_in_afe = true
969                                                         debug_log "in afe"
970                                                 break
971                                 # 6. If node is not in the list of active formatting elements,
972                                 # then remove node from the stack of open elements and then go
973                                 # back to the step labeled inner loop.
974                                 unless node_in_afe
975                                         debug_log "not in afe"
976                                         for t, i in open_els
977                                                 if t is node
978                                                         node_above = open_els[i + 1]
979                                                         open_els.splice i, 1
980                                                         break
981                                         continue
982                                 debug_log "the bones"
983                                 # 7. create an element for the token for which the element node
984                                 # was created, in the HTML namespace, with common ancestor as
985                                 # the intended parent; replace the entry for node in the list
986                                 # of active formatting elements with an entry for the new
987                                 # element, replace the entry for node in the stack of open
988                                 # elements with an entry for the new element, and let node be
989                                 # the new element.
990                                 new_node = token_to_element node.token, NS_HTML, ca
991                                 for t, i in afe
992                                         if t is node
993                                                 afe[i] = new_node
994                                                 debug_log "replaced in afe"
995                                                 break
996                                 for t, i in open_els
997                                         if t is node
998                                                 node_above = open_els[i + 1]
999                                                 open_els[i] = new_node
1000                                                 debug_log "replaced in open_els"
1001                                                 break
1002                                 node = new_node
1003                                 # 8. If last node is furthest block, then move the
1004                                 # aforementioned bookmark to be immediately after the new node
1005                                 # in the list of active formatting elements.
1006                                 if last_node is fb
1007                                         for t, i in afe
1008                                                 if t is bookmark
1009                                                         afe.splice i, 1
1010                                                         debug_log "removed bookmark"
1011                                                         break
1012                                         for t, i in afe
1013                                                 if t is node
1014                                                         # "after" means lower
1015                                                         afe.splice i, 0, bookmark # "after as <-
1016                                                         debug_log "placed bookmark after node"
1017                                                         debug_log "node: #{node.id} afe: #{serialize_els afe, true, true}"
1018                                                         break
1019                                 # 9. Insert last node into node, first removing it from its
1020                                 # previous parent node if any.
1021                                 if last_node.parent?
1022                                         debug_log "last_node has parent"
1023                                         for c, i in last_node.parent.children
1024                                                 if c is last_node
1025                                                         debug_log "removing last_node from parent"
1026                                                         last_node.parent.children.splice i, 1
1027                                                         break
1028                                 node.children.push last_node
1029                                 last_node.parent = node
1030                                 # 10. Let last node be node.
1031                                 last_node = node
1032                                 debug_log "at last"
1033                                 # 11. Return to the step labeled inner loop.
1034                         # 14. Insert whatever last node ended up being in the previous step
1035                         # at the appropriate place for inserting a node, but using common
1036                         # ancestor as the override target.
1037
1038                         # In the case where fe is immediately followed by fb:
1039                         #   * inner loop exits out early (node==fe)
1040                         #   * last_node is fb
1041                         #   * last_node is still in the tree (not a duplicate)
1042                         if last_node.parent?
1043                                 debug_log "FEFIRST? last_node has parent"
1044                                 for c, i in last_node.parent.children
1045                                         if c is last_node
1046                                                 debug_log "removing last_node from parent"
1047                                                 last_node.parent.children.splice i, 1
1048                                                 break
1049
1050                         debug_log "after aaa inner loop"
1051                         debug_log "ca: #{ca.name}##{ca.id} children: #{serialize_els ca.children, true, true}"
1052                         debug_log "fe: #{fe.name}##{fe.id} children: #{serialize_els fe.children, true, true}"
1053                         debug_log "fb: #{fb.name}##{fb.id} children: #{serialize_els fb.children, true, true}"
1054                         debug_log "last_node: #{last_node.name}##{last_node.id} children: #{serialize_els last_node.children, true, true}"
1055                         debug_log "tree: #{serialize_els doc.children, false, true}"
1056
1057                         debug_log "insert"
1058
1059
1060                         # can't use standard insert token thing, because it's already in
1061                         # open_els and must stay at it's current position in open_els
1062                         dest = adjusted_insertion_location ca
1063                         dest[0].children.splice dest[1], 0, last_node
1064                         last_node.parent = dest[0]
1065
1066
1067                         debug_log "ca: #{ca.name}##{ca.id} children: #{serialize_els ca.children, true, true}"
1068                         debug_log "fe: #{fe.name}##{fe.id} children: #{serialize_els fe.children, true, true}"
1069                         debug_log "fb: #{fb.name}##{fb.id} children: #{serialize_els fb.children, true, true}"
1070                         debug_log "last_node: #{last_node.name}##{last_node.id} children: #{serialize_els last_node.children, true, true}"
1071                         debug_log "tree: #{serialize_els doc.children, false, true}"
1072
1073                         # 15. Create an element for the token for which formatting element
1074                         # was created, in the HTML namespace, with furthest block as the
1075                         # intended parent.
1076                         new_element = token_to_element fe.token, NS_HTML, fb
1077                         # 16. Take all of the child nodes of furthest block and append them
1078                         # to the element created in the last step.
1079                         while fb.children.length
1080                                 t = fb.children.shift()
1081                                 t.parent = new_element
1082                                 new_element.children.push t
1083                         # 17. Append that new element to furthest block.
1084                         new_element.parent = fb
1085                         fb.children.push new_element
1086                         # 18. Remove formatting element from the list of active formatting
1087                         # elements, and insert the new element into the list of active
1088                         # formatting elements at the position of the aforementioned
1089                         # bookmark.
1090                         for t, i in afe
1091                                 if t is fe
1092                                         afe.splice i, 1
1093                                         break
1094                         for t, i in afe
1095                                 if t is bookmark
1096                                         afe[i] = new_element
1097                                         break
1098                         # 19. Remove formatting element from the stack of open elements,
1099                         # and insert the new element into the stack of open elements
1100                         # immediately below the position of furthest block in that stack.
1101                         for t, i in open_els
1102                                 if t is fe
1103                                         open_els.splice i, 1
1104                                         break
1105                         for t, i in open_els
1106                                 if t is fb
1107                                         open_els.splice i, 0, new_element
1108                                         break
1109                         # 20. Jump back to the step labeled outer loop.
1110                         debug_log "done wrapping fb's children. new_element: #{new_element.name}##{new_element.id}"
1111                         debug_log "tree: #{serialize_els doc.children, false, true}"
1112                         debug_log "open_els: #{serialize_els open_els, true, true}"
1113                         debug_log "afe: #{serialize_els afe, true, true}"
1114                 debug_log "AAA DONE"
1115
1116         # http://www.w3.org/TR/html5/syntax.html#close-a-p-element
1117         close_p_element = ->
1118                 generate_implied_end_tags 'p' # arg is exception
1119                 unless open_els[0].name is 'p' and open_els[0].namespace is NS_HTML
1120                         parse_error()
1121                 while open_els.length > 1 # just in case
1122                         el = open_els.shift()
1123                         if el.name is 'p' and el.namespace is NS_HTML
1124                                 return
1125         close_p_if_in_button_scope = ->
1126                 if is_in_button_scope 'p', NS_HTML
1127                         close_p_element()
1128
1129         # http://www.w3.org/TR/html5/syntax.html#insert-a-character
1130         # aka insert_a_character = (t) ->
1131         insert_character = (t) ->
1132                 dest = adjusted_insertion_location()
1133                 # fixfull check for Document node
1134                 if dest[1] > 0
1135                         prev = dest[0].children[dest[1] - 1]
1136                         if prev.type is TYPE_TEXT
1137                                 prev.text += t.text
1138                                 return
1139                 dest[0].children.splice dest[1], 0, t
1140
1141
1142         # 8.2.5 http://www.w3.org/TR/html5/syntax.html#tree-construction
1143         process_token = (t) ->
1144                 acn = adjusted_current_node()
1145                 unless acn?
1146                         ins_mode t
1147                         return
1148                 if acn.namespace is NS_HTML
1149                         ins_mode t
1150                         return
1151                 if is_mathml_text_integration_point(acn)
1152                         if t.type is TYPE_START_TAG and (t.name is 'mglyph' or t.name is 'malignmark')
1153                                 ins_mode t
1154                                 return
1155                         if t.type is TYPE_TEXT
1156                                 ins_mode t
1157                                 return
1158                 if acn.namespace is NS_MATHML and acn.name is 'annotation-xml' and t.type is TYPE_START_TAG and t.name is 'svg'
1159                         ins_mode t
1160                         return
1161                 if is_html_integration acn
1162                         if t.type is TYPE_START_TAG or t.type is TYPE_TEXT
1163                                 ins_mode t
1164                                 return
1165                 if t.type is TYPE_EOF
1166                         ins_mode t
1167                         return
1168                 in_foreign_content t
1169                 return
1170
1171         # 8.2.5.1
1172         # http://www.w3.org/TR/html5/syntax.html#creating-and-inserting-nodes
1173         # http://www.w3.org/TR/html5/syntax.html#appropriate-place-for-inserting-a-node
1174         adjusted_insertion_location = (override_target = null) ->
1175                 # 1. If there was an override target specified, then let target be the
1176                 # override target.
1177                 if override_target?
1178                         target = override_target
1179                 else # Otherwise, let target be the current node.
1180                         target = open_els[0]
1181                 # 2. Determine the adjusted insertion location using the first matching
1182                 # steps from the following list:
1183                 #
1184                 # If foster parenting is enabled and target is a table, tbody, tfoot,
1185                 # thead, or tr element Foster parenting happens when content is
1186                 # misnested in tables.
1187                 if flag_foster_parenting and foster_parenting_targets[target.name] is target.namespace
1188                         loop # once. this is here so we can ``break`` to "abort these substeps"
1189                                 # 1. Let last template be the last template element in the
1190                                 # stack of open elements, if any.
1191                                 last_template = null
1192                                 last_template_i = null
1193                                 for el, i in open_els
1194                                         if el.name is 'template' and el.namespace is NS_HTML
1195                                                 last_template = el
1196                                                 last_template_i = i
1197                                                 break
1198                                 # 2. Let last table be the last table element in the stack of
1199                                 # open elements, if any.
1200                                 last_table = null
1201                                 last_table_i
1202                                 for el, i in open_els
1203                                         if el.name is 'table' and el.namespace is NS_HTML
1204                                                 last_table = el
1205                                                 last_table_i = i
1206                                                 break
1207                                 # 3. If there is a last template and either there is no last
1208                                 # table, or there is one, but last template is lower (more
1209                                 # recently added) than last table in the stack of open
1210                                 # elements, then: let adjusted insertion location be inside
1211                                 # last template's template contents, after its last child (if
1212                                 # any), and abort these substeps.
1213                                 if last_template and (last_table is null or last_template_i < last_table_i)
1214                                         target = last_template # fixfull should be it's contents
1215                                         target_i = target.children.length
1216                                         break
1217                                 # 4. If there is no last table, then let adjusted insertion
1218                                 # location be inside the first element in the stack of open
1219                                 # elements (the html element), after its last child (if any),
1220                                 # and abort these substeps. (fragment case)
1221                                 if last_table is null
1222                                         # this is odd
1223                                         target = open_els[open_els.length - 1]
1224                                         target_i = target.children.length
1225                                         break
1226                                 # 5. If last table has a parent element, then let adjusted
1227                                 # insertion location be inside last table's parent element,
1228                                 # immediately before last table, and abort these substeps.
1229                                 if last_table.parent?
1230                                         for c, i in last_table.parent.children
1231                                                 if c is last_table
1232                                                         target = last_table.parent
1233                                                         target_i = i
1234                                                         break
1235                                         break
1236                                 # 6. Let previous element be the element immediately above last
1237                                 # table in the stack of open elements.
1238                                 #
1239                                 # huh? how could it not have a parent?
1240                                 previous_element = open_els[last_table_i + 1]
1241                                 # 7. Let adjusted insertion location be inside previous
1242                                 # element, after its last child (if any).
1243                                 target = previous_element
1244                                 target_i = target.children.length
1245                                 # Note: These steps are involved in part because it's possible
1246                                 # for elements, the table element in this case in particular,
1247                                 # to have been moved by a script around in the DOM, or indeed
1248                                 # removed from the DOM entirely, after the element was inserted
1249                                 # by the parser.
1250                                 break # don't really loop
1251                 else
1252                         # Otherwise Let adjusted insertion location be inside target, after
1253                         # its last child (if any).
1254                         target_i = target.children.length
1255
1256                 # 3. If the adjusted insertion location is inside a template element,
1257                 # let it instead be inside the template element's template contents,
1258                 # after its last child (if any).
1259                 # fixfull (template)
1260
1261                 # 4. Return the adjusted insertion location.
1262                 return [target, target_i]
1263
1264         # http://www.w3.org/TR/html5/syntax.html#create-an-element-for-the-token
1265         # aka create_an_element_for_token
1266         token_to_element = (t, namespace, intended_parent) ->
1267                 # convert attributes into a hash
1268                 attrs = {}
1269                 for a in t.attrs_a
1270                         attrs[a[0]] = a[1] # TODO check what to do with dupilcate attrs
1271                 el = new Node TYPE_TAG, name: t.name, namespace: namespace, attrs: attrs, token: t
1272
1273                 # TODO 2. If the newly created element has an xmlns attribute in the
1274                 # XMLNS namespace whose value is not exactly the same as the element's
1275                 # namespace, that is a parse error. Similarly, if the newly created
1276                 # element has an xmlns:xlink attribute in the XMLNS namespace whose
1277                 # value is not the XLink Namespace, that is a parse error.
1278
1279                 # fixfull: the spec says stuff about form pointers and ownerDocument
1280
1281                 return el
1282
1283         # http://www.w3.org/TR/html5/syntax.html#insert-a-foreign-element
1284         insert_foreign_element = (token, namespace) ->
1285                 ail = adjusted_insertion_location()
1286                 ail_el = ail[0]
1287                 ail_i = ail[1]
1288                 el = token_to_element token, namespace, ail_el
1289                 # TODO skip this next step if it's broken (eg ail_el is document with child already)
1290                 el.parent = ail_el
1291                 ail_el.children.splice ail_i, 0, el
1292                 open_els.unshift el
1293                 return el
1294         # http://www.w3.org/TR/html5/syntax.html#insert-an-html-element
1295         insert_html_element = (token) ->
1296                 insert_foreign_element token, NS_HTML
1297
1298         # http://www.w3.org/TR/html5/syntax.html#insert-a-comment
1299         # position should be [node, index_within_children]
1300         insert_comment = (t, position = null) ->
1301                 position ?= adjusted_insertion_location()
1302                 position[0].children.splice position[1], 0, t
1303
1304         # 8.2.5.2
1305         # http://www.w3.org/TR/html5/syntax.html#generic-raw-text-element-parsing-algorithm
1306         parse_generic_raw_text = (t) ->
1307                 insert_html_element t
1308                 tok_state = tok_state_rawtext
1309                 original_ins_mode = ins_mode
1310                 ins_mode = ins_mode_text
1311         parse_generic_rcdata_text = (t) ->
1312                 insert_html_element t
1313                 tok_state = tok_state_rcdata
1314                 original_ins_mode = ins_mode
1315                 ins_mode = ins_mode_text
1316
1317         # 8.2.5.3 http://www.w3.org/TR/html5/syntax.html#closing-elements-that-have-implied-end-tags
1318         # http://www.w3.org/TR/html5/syntax.html#generate-implied-end-tags
1319         generate_implied_end_tags = (except = null) ->
1320                 while end_tag_implied[open_els[0].name] is open_els[0].namespace and open_els[0].name isnt except
1321                         open_els.shift()
1322
1323         # 8.2.5.4 The rules for parsing tokens in HTML content
1324         # http://www.w3.org/TR/html5/syntax.html#parsing-main-inhtml
1325
1326         # 8.2.5.4.1 The "initial" insertion mode
1327         # http://www.w3.org/TR/html5/syntax.html#the-initial-insertion-mode
1328         ins_mode_initial = (t) ->
1329                 if is_space_tok t
1330                         return
1331                 if t.type is TYPE_COMMENT
1332                         # ?fixfull
1333                         doc.children.push t
1334                         return
1335                 if t.type is TYPE_DOCTYPE
1336                         # FIXME check identifiers, set quirks, etc
1337                         # fixfull
1338                         doc.children.push t
1339                         ins_mode = ins_mode_before_html
1340                         return
1341                 # Anything else
1342                 #fixfull (iframe, quirks)
1343                 ins_mode = ins_mode_before_html
1344                 process_token t
1345                 return
1346
1347         # 8.2.5.4.2 http://www.w3.org/TR/html5/syntax.html#the-before-html-insertion-mode
1348         ins_mode_before_html = (t) ->
1349                 if t.type is TYPE_DOCTYPE
1350                         parse_error()
1351                         return
1352                 if t.type is TYPE_COMMENT
1353                         doc.children.push t
1354                         return
1355                 if is_space_tok t
1356                         return
1357                 if t.type is TYPE_START_TAG and t.name is 'html'
1358                         el = token_to_element t, NS_HTML, doc
1359                         doc.children.push el
1360                         open_els.unshift(el)
1361                         # fixfull (big paragraph in spec about manifest, fragment, urls, etc)
1362                         ins_mode = ins_mode_before_head
1363                         return
1364                 if t.type is TYPE_END_TAG
1365                         if t.name is 'head' or t.name is 'body' or t.name is 'html' or t.name is 'br'
1366                                 # fall through to "anything else"
1367                         else
1368                                 parse_error()
1369                                 return
1370                 # Anything else
1371                 html_tok = new_open_tag 'html'
1372                 el = token_to_element html_tok, NS_HTML, doc
1373                 doc.children.push el
1374                 open_els.unshift el
1375                 # ?fixfull browsing context
1376                 ins_mode = ins_mode_before_head
1377                 process_token t
1378                 return
1379
1380         # 8.2.5.4.3 http://www.w3.org/TR/html5/syntax.html#the-before-head-insertion-mode
1381         ins_mode_before_head = (t) ->
1382                 if is_space_tok t
1383                         return
1384                 if t.type is TYPE_COMMENT
1385                         insert_comment t
1386                         return
1387                 if t.type is TYPE_DOCTYPE
1388                         parse_error()
1389                         return
1390                 if t.type is TYPE_START_TAG and t.name is 'html'
1391                         ins_mode_in_body t
1392                         return
1393                 if t.type is TYPE_START_TAG and t.name is 'head'
1394                         el = insert_html_element t
1395                         head_element_pointer = el
1396                         ins_mode = ins_mode_in_head
1397                         return
1398                 if t.type is TYPE_END_TAG
1399                         if t.name is 'head' or t.name is 'body' or t.name is 'html' or t.name is 'br'
1400                                 # fall through to Anything else below
1401                         else
1402                                 parse_error()
1403                                 return
1404                 # Anything else
1405                 head_tok = new_open_tag 'head'
1406                 el = insert_html_element head_tok
1407                 head_element_pointer = el
1408                 ins_mode = ins_mode_in_head
1409                 process_token t
1410
1411         # 8.2.5.4.4 http://www.w3.org/TR/html5/syntax.html#parsing-main-inhead
1412         ins_mode_in_head_else = (t) -> # factored out for same-as-spec flow control
1413                 open_els.shift() # spec says this will be a 'head' node
1414                 ins_mode = ins_mode_after_head
1415                 process_token t
1416         ins_mode_in_head = (t) ->
1417                 if t.type is TYPE_TEXT and (t.text is "\t" or t.text is "\n" or t.text is "\u000c" or t.text is ' ')
1418                         insert_character t
1419                         return
1420                 if t.type is TYPE_COMMENT
1421                         insert_comment t
1422                         return
1423                 if t.type is TYPE_DOCTYPE
1424                         parse_error()
1425                         return
1426                 if t.type is TYPE_START_TAG and t.name is 'html'
1427                         ins_mode_in_body t
1428                         return
1429                 if t.type is TYPE_START_TAG and (t.name is 'base' or t.name is 'basefont' or t.name is 'bgsound' or t.name is 'link')
1430                         el = insert_html_element t
1431                         open_els.shift()
1432                         t.acknowledge_self_closing()
1433                         return
1434                 if t.type is TYPE_START_TAG and t.name is 'meta'
1435                         el = insert_html_element t
1436                         open_els.shift()
1437                         t.acknowledge_self_closing()
1438                         # fixfull encoding stuff
1439                         return
1440                 if t.type is TYPE_START_TAG and t.name is 'title'
1441                         parse_generic_rcdata_text t
1442                         return
1443                 if t.type is TYPE_START_TAG and ((t.name is 'noscript' and flag_scripting) or t.name is 'noframes' or t.name is 'style')
1444                         parse_generic_raw_text t
1445                         return
1446                 if t.type is TYPE_START_TAG and t.name is 'noscript' and flag_scripting is false
1447                         insert_html_element t
1448                         ins_mode = ins_mode_in_head_noscript
1449                         return
1450                 if t.type is TYPE_START_TAG and t.name is 'script'
1451                         ail = adjusted_insertion_location()
1452                         el = token_to_element t, NS_HTML, ail
1453                         el.flag 'parser-inserted', true
1454                         # fixfull frament case
1455                         ail[0].children.splice ail[1], 0, el
1456                         open_els.unshift el
1457                         tok_state = tok_state_script_data
1458                         original_ins_mode = ins_mode # make sure orig... is defined
1459                         ins_mode = ins_mode_text
1460                         return
1461                 if t.type is TYPE_END_TAG and t.name is 'head'
1462                         open_els.shift() # will be a head element... spec says so
1463                         ins_mode = ins_mode_after_head
1464                         return
1465                 if t.type is TYPE_END_TAG and (t.name is 'body' or t.name is 'html' or t.name is 'br')
1466                         ins_mode_in_head_else t
1467                         return
1468                 if t.type is TYPE_START_TAG and t.name is 'template'
1469                         insert_html_element t
1470                         afe_push_marker()
1471                         flag_frameset_ok = false
1472                         ins_mode = ins_mode_in_template
1473                         template_ins_modes.unshift ins_mode_in_template
1474                         return
1475                 if t.type is TYPE_END_TAG and t.name is 'template'
1476                         if template_tag_is_open()
1477                                 generate_implied_end_tags
1478                                 if open_els[0].name isnt 'template'
1479                                         parse_error()
1480                                 loop
1481                                         el = open_els.shift()
1482                                         if el.name is 'template' and el.namespace is NS_HTML
1483                                                 break
1484                                 clear_afe_to_marker()
1485                                 template_ins_modes.shift()
1486                                 reset_ins_mode()
1487                         else
1488                                 parse_error()
1489                         return
1490                 if (t.type is TYPE_START_TAG and t.name is 'head') or t.type is TYPE_END_TAG
1491                         parse_error()
1492                         return
1493                 ins_mode_in_head_else t
1494
1495         # 8.2.5.4.5 http://www.w3.org/TR/html5/syntax.html#parsing-main-inheadnoscript
1496         ins_mode_in_head_noscript_else = (t) ->
1497                 parse_error()
1498                 open_els.shift()
1499                 ins_mode = ins_mode_in_head
1500                 process_token t
1501         ins_mode_in_head_noscript = (t) ->
1502                 if t.type is TYPE_DOCTYPE
1503                         parse_error()
1504                         return
1505                 if t.type is TYPE_START_TAG and t.name is 'html'
1506                         ins_mode_in_body t
1507                         return
1508                 if t.type is TYPE_END_TAG and t.name is 'noscript'
1509                         open_els.shift()
1510                         ins_mode = ins_mode_in_head
1511                         return
1512                 if is_space_tok(t) or t.type is TYPE_COMMENT or (t.type is TYPE_START_TAG and (t.name is 'basefont' or t.name is 'bgsound' or t.name is 'link' or t.name is 'meta' or t.name is 'noframes' or t.name is 'style'))
1513                         ins_mode_in_head t
1514                         return
1515                 if t.type is TYPE_END_TAG and t.name is 'br'
1516                         ins_mode_in_head_noscript_else t
1517                         return
1518                 if (t.type is TYPE_START_TAG and (t.name is 'head' or t.name is 'noscript')) or t.type is TYPE_END_TAG
1519                         parse_error()
1520                         return
1521                 # Anything else
1522                 ins_mode_in_head_noscript_else t
1523                 return
1524
1525
1526
1527         # 8.2.5.4.6 http://www.w3.org/TR/html5/syntax.html#the-after-head-insertion-mode
1528         ins_mode_after_head_else = (t) ->
1529                 body_tok = new_open_tag 'body'
1530                 insert_html_element body_tok
1531                 ins_mode = ins_mode_in_body
1532                 process_token t
1533                 return
1534         ins_mode_after_head = (t) ->
1535                 if is_space_tok t
1536                         insert_character t
1537                         return
1538                 if t.type is TYPE_COMMENT
1539                         insert_comment t
1540                         return
1541                 if t.type is TYPE_DOCTYPE
1542                         parse_error()
1543                         return
1544                 if t.type is TYPE_START_TAG and t.name is 'html'
1545                         ins_mode_in_body t
1546                         return
1547                 if t.type is TYPE_START_TAG and t.name is 'body'
1548                         insert_html_element t
1549                         flag_frameset_ok = false
1550                         ins_mode = ins_mode_in_body
1551                         return
1552                 if t.type is TYPE_START_TAG and t.name is 'frameset'
1553                         insert_html_element t
1554                         ins_mode = ins_mode_in_frameset
1555                         return
1556                 if t.type is TYPE_START_TAG and (t.name is 'base' or t.name is 'basefont' or t.name is 'bgsound' or t.name is 'link' or t.name is 'meta' or t.name is 'noframes' or t.name is 'script' or t.name is 'style' or t.name is 'template' or t.name is 'title')
1557                         parse_error()
1558                         open_els.unshift head_element_pointer
1559                         ins_mode_in_head t
1560                         for el, i of open_els
1561                                 if el is head_element_pointer
1562                                         open_els.splice i, 1
1563                                         return
1564                         console.log "warning: 23904 couldn't find head element in open_els"
1565                         return
1566                 if t.type is TYPE_END_TAG and t.name is 'template'
1567                         ins_mode_in_head t
1568                         return
1569                 if t.type is TYPE_END_TAG and (t.name is 'body' or t.name is 'html' or t.name is 'br')
1570                         ins_mode_after_head_else t
1571                         return
1572                 if (t.type is TYPE_START_TAG and t.name is 'head') or t.type is TYPE_END_TAG
1573                         parse_error()
1574                         return
1575                 # Anything else
1576                 ins_mode_after_head_else t
1577
1578         # 8.2.5.4.7 http://www.w3.org/TR/html5/syntax.html#parsing-main-inbody
1579         in_body_any_other_end_tag = (name) -> # factored out because adoption agency calls it
1580                 for el, i in open_els
1581                         if el.name is name and el.namespace is NS_HTML
1582                                 generate_implied_end_tags name # arg is exception
1583                                 parse_error() unless i is 0
1584                                 while i >= 0
1585                                         open_els.shift()
1586                                         i -= 1
1587                                 return
1588                         if special_elements[el.name] is el.namespace
1589                                 parse_error()
1590                                 return
1591                 return
1592         ins_mode_in_body = (t) ->
1593                 if t.type is TYPE_TEXT and t.text is "\u0000"
1594                         parse_error()
1595                         return
1596                 if is_space_tok t
1597                         reconstruct_afe()
1598                         insert_character t
1599                         return
1600                 if t.type is TYPE_TEXT
1601                         reconstruct_afe()
1602                         insert_character t
1603                         flag_frameset_ok = false
1604                         return
1605                 if t.type is TYPE_COMMENT
1606                         insert_comment t
1607                         return
1608                 if t.type is TYPE_DOCTYPE
1609                         parse_error()
1610                         return
1611                 if t.type is TYPE_START_TAG and t.name is 'html'
1612                         parse_error()
1613                         return if template_tag_is_open()
1614                         root_attrs = open_els[open_els.length - 1].attrs
1615                         for a of t.attrs_a
1616                                 root_attrs[a[0]] = a[1] unless root_attrs[a[0]]?
1617                         return
1618
1619                 if (t.type is TYPE_START_TAG and (t.name is 'base' or t.name is 'basefont' or t.name is 'bgsound' or t.name is 'link' or t.name is 'meta' or t.name is 'noframes' or t.name is 'script' or t.name is 'style' or t.name is 'template' or t.name is 'title')) or (t.type is TYPE_END_TAG and t.name is 'template')
1620                         ins_mode_in_head t
1621                         return
1622                 if t.type is TYPE_START_TAG and t.name is 'body'
1623                         parse_error()
1624                         return if open_els.length < 2
1625                         second = open_els[open_els.length - 2]
1626                         return unless second.ns is NS_HTML
1627                         return unless second.name is 'body'
1628                         return if template_tag_is_open()
1629                         frameset_ok_flag = false
1630                         for a of t.attrs_a
1631                                 second.attrs[a[0]] = a[1] unless second.attrs[a[0]]?
1632                         return
1633                 if t.type is TYPE_START_TAG and t.name is 'frameset'
1634                         parse_error()
1635                         return if open_els.length < 2
1636                         second_i = open_els.length - 2
1637                         second = open_els[second_i]
1638                         return unless second.ns is NS_HTML
1639                         return unless second.name is 'body'
1640                         flag_frameset_ok = false
1641                         if second.parent?
1642                                 for el, i in second.parent.children
1643                                         if el is second
1644                                                 second.parent.children.splice i, 1
1645                                                 break
1646                         open_els.splice second_i, 1
1647                         # pop everything except the "root html element"
1648                         while open_els.length > 1
1649                                 open_els.shift()
1650                         insert_html_element t
1651                         ins_mode = ins_mode_in_frameset
1652                         return
1653                 if t.type is TYPE_EOF
1654                         ok_tags = {
1655                                 dd:NS_HTML, dt:NS_HTML, li:NS_HTML, p:NS_HTML, tbody:NS_HTML,
1656                                 td:NS_HTML, tfoot:NS_HTML, th:NS_HTML, thead:NS_HTML,
1657                                 tr:NS_HTML, body:NS_HTML, html:NS_HTML,
1658                         }
1659                         for el in open_els
1660                                 unless ok_tags[t.name] is el.namespace
1661                                         parse_error()
1662                                         break
1663                         if template_ins_modes.length > 0
1664                                 ins_mode_in_template t
1665                         else
1666                                 stop_parsing()
1667                         return
1668                 if t.type is TYPE_END_TAG and t.name is 'body'
1669                         unless is_in_scope 'body', NS_HTML
1670                                 parse_error()
1671                                 return
1672                         ok_tags = {
1673                                 dd:NS_HTML, dt:NS_HTML, li:NS_HTML, optgroup:NS_HTML,
1674                                 option:NS_HTML, p:NS_HTML, rb:NS_HTML, rp:NS_HTML, rt:NS_HTML,
1675                                 rtc:NS_HTML, tbody:NS_HTML, td:NS_HTML, tfoot:NS_HTML,
1676                                 th:NS_HTML, thead:NS_HTML, tr:NS_HTML, body:NS_HTML,
1677                                 html:NS_HTML
1678                         }
1679                         for el in open_els
1680                                 unless ok_tags[t.name] is el.namespace
1681                                         parse_error()
1682                                         break
1683                         ins_mode = ins_mode_after_body
1684                         return
1685                 if t.type is TYPE_END_TAG and t.name is 'html'
1686                         unless is_in_scope 'body', NS_HTML
1687                                 parse_error()
1688                                 return
1689                         ok_tags = {
1690                                 dd:NS_HTML, dt:NS_HTML, li:NS_HTML, optgroup:NS_HTML,
1691                                 option:NS_HTML, p:NS_HTML, rb:NS_HTML, rp:NS_HTML, rt:NS_HTML,
1692                                 rtc:NS_HTML, tbody:NS_HTML, td:NS_HTML, tfoot:NS_HTML,
1693                                 th:NS_HTML, thead:NS_HTML, tr:NS_HTML, body:NS_HTML,
1694                                 html:NS_HTML
1695                         }
1696                         for el in open_els
1697                                 unless ok_tags[t.name] is el.namespace
1698                                         parse_error()
1699                                         break
1700                         ins_mode = ins_mode_after_body
1701                         process_token t
1702                         return
1703                 if t.type is TYPE_START_TAG and (t.name is 'address' or t.name is 'article' or t.name is 'aside' or t.name is 'blockquote' or t.name is 'center' or t.name is 'details' or t.name is 'dialog' or t.name is 'dir' or t.name is 'div' or t.name is 'dl' or t.name is 'fieldset' or t.name is 'figcaption' or t.name is 'figure' or t.name is 'footer' or t.name is 'header' or t.name is 'hgroup' or t.name is 'main' or t.name is 'nav' or t.name is 'ol' or t.name is 'p' or t.name is 'section' or t.name is 'summary' or t.name is 'ul')
1704                         close_p_if_in_button_scope()
1705                         insert_html_element t
1706                         return
1707                 if t.type is TYPE_START_TAG and h_tags[t.name]?
1708                         close_p_if_in_button_scope()
1709                         if h_tags[open_els[0].name] is open_els[0].namespace
1710                                 parse_error()
1711                                 open_els.shift()
1712                         insert_html_element t
1713                         return
1714                 if t.type is TYPE_START_TAG and (t.name is 'pre' or t.name is 'listing')
1715                         close_p_if_in_button_scope()
1716                         insert_html_element t
1717                         # spec: If the next token is a "LF" (U+000A) character token, then
1718                         # ignore that token and move on to the next one. (Newlines at the
1719                         # start of pre blocks are ignored as an authoring convenience.)
1720                         if txt.charAt(cur) is "\u000a" # FIXME check for crlf?
1721                                 cur += 1
1722                         flag_frameset_ok = false
1723                         return
1724                 if t.type is TYPE_START_TAG and t.name is 'form'
1725                         unless form_element_pointer is null or template_tag_is_open()
1726                                 parse_error()
1727                                 return
1728                         close_p_if_in_button_scope()
1729                         el = insert_html_element t
1730                         unless template_tag_is_open()
1731                                 form_element_pointer = el
1732                         return
1733                 if t.type is TYPE_START_TAG and t.name is 'li'
1734                         flag_frameset_ok = false
1735                         for node in open_els
1736                                 if node.name is 'li' and node.namespace is NS_HTML
1737                                         generate_implied_end_tags 'li' # arg is exception
1738                                         if open_els[0].name isnt 'li' or open_els[0].namespace isnt NS_HTML
1739                                                 parse_error()
1740                                         loop
1741                                                 el = open_els.shift()
1742                                                 if el.name is 'li' and el.namespace is NS_HTML
1743                                                         break
1744                                         break
1745                                 if el_is_special_not_adp node
1746                                                 break
1747                         close_p_if_in_button_scope()
1748                         insert_html_element t
1749                         return
1750                 if t.type is TYPE_START_TAG and (t.name is 'dd' or t.name is 'dt')
1751                         flag_frameset_ok = false
1752                         for node in open_els
1753                                 if node.name is 'dd' and node.namespace is NS_HTML
1754                                         generate_implied_end_tags 'dd' # arg is exception
1755                                         if open_els[0].name isnt 'dd' or open_els[0].namespace isnt NS_HTML
1756                                                 parse_error()
1757                                         loop
1758                                                 el = open_els.shift()
1759                                                 if el.name is 'dd' and el.namespace is NS_HTML
1760                                                         break
1761                                         break
1762                                 if node.name is 'dt' and node.namespace is NS_HTML
1763                                         generate_implied_end_tags 'dt' # arg is exception
1764                                         if open_els[0].name isnt 'dt' or open_els[0].namespace isnt NS_HTML
1765                                                 parse_error()
1766                                         loop
1767                                                 el = open_els.shift()
1768                                                 if el.name is 'dt' and el.namespace is NS_HTML
1769                                                         break
1770                                         break
1771                                 if el_is_special_not_adp node
1772                                         break
1773                         close_p_if_in_button_scope()
1774                         insert_html_element t
1775                         return
1776                 if t.type is TYPE_START_TAG and t.name is 'plaintext'
1777                         close_p_if_in_button_scope()
1778                         insert_html_element t
1779                         tok_state = tok_state_plaintext
1780                         return
1781                 if t.type is TYPE_START_TAG and t.name is 'button'
1782                         if is_in_scope 'button', NS_HTML
1783                                 parse_error()
1784                                 generate_implied_end_tags()
1785                                 loop
1786                                         el = open_els.shift()
1787                                         if el.name is 'button' and el.namespace is NS_HTML
1788                                                 break
1789                         reconstruct_afe()
1790                         insert_html_element t
1791                         flag_frameset_ok = false
1792                         return
1793                 if t.type is TYPE_END_TAG and (t.name is 'address' or t.name is 'article' or t.name is 'aside' or t.name is 'blockquote' or t.name is 'button' or t.name is 'center' or t.name is 'details' or t.name is 'dialog' or t.name is 'dir' or t.name is 'div' or t.name is 'dl' or t.name is 'fieldset' or t.name is 'figcaption' or t.name is 'figure' or t.name is 'footer' or t.name is 'header' or t.name is 'hgroup' or t.name is 'listing' or t.name is 'main' or t.name is 'nav' or t.name is 'ol' or t.name is 'pre' or t.name is 'section' or t.name is 'summary' or t.name is 'ul')
1794                         unless is_in_scope t.name, NS_HTML
1795                                 parse_error()
1796                                 return
1797                         generate_implied_end_tags()
1798                         unless open_els[0].name is t.name and open_els[0].namespace is NS_HTML
1799                                 parse_error()
1800                         loop
1801                                 el = open_els.shift()
1802                                 if el.name is t.name and el.namespace is NS_HTML
1803                                         return
1804                         return
1805                 if t.type is TYPE_END_TAG and t.name is 'form'
1806                         unless template_tag_is_open()
1807                                 node = form_element_pointer
1808                                 form_element_pointer = null
1809                                 if node is null or not el_is_in_scope node
1810                                         parse_error()
1811                                         return
1812                                 generate_implied_end_tags()
1813                                 if open_els[0] isnt node
1814                                         parse_error()
1815                                 for el, i in open_els
1816                                         if el is node
1817                                                 open_els.splice i, 1
1818                                                 break
1819                         else
1820                                 unless is_in_scope 'form', NS_HTML
1821                                         parse_error()
1822                                         return
1823                                 generate_implied_end_tags()
1824                                 if open_els[0].name isnt 'form' or open_els[0].namespace isnt NS_HTML
1825                                         parse_error()
1826                                 loop
1827                                         el = open_els.shift()
1828                                         if el.name is 'form' and el.namespace is NS_HTML
1829                                                 break
1830                         return
1831                 if t.type is TYPE_END_TAG and t.name is 'p'
1832                         unless is_in_button_scope 'p', NS_HTML
1833                                 parse_error()
1834                                 insert_html_element new_open_tag 'p'
1835                         close_p_element()
1836                         return
1837                 if t.type is TYPE_END_TAG and t.name is 'li'
1838                         unless is_in_li_scope 'li', NS_HTML
1839                                 parse_error()
1840                                 return
1841                         generate_implied_end_tags 'li' # arg is exception
1842                         if open_els[0].name isnt 'li' or open_els[0].namespace isnt NS_HTML
1843                                 parse_error()
1844                         loop
1845                                 el = open_els.shift()
1846                                 if el.name is 'li' and el.namespace is NS_HTML
1847                                         break
1848                         return
1849                 if t.type is TYPE_END_TAG and (t.name is 'dd' or t.name is 'dt')
1850                         unless is_in_scope t.name, NS_HTML
1851                                 parse_error()
1852                                 return
1853                         generate_implied_end_tags t.name # arg is exception
1854                         if open_els[0].name isnt t.name or open_els[0].namespace isnt NS_HTML
1855                                 parse_error()
1856                         loop
1857                                 el = open_els.shift()
1858                                 if el.name is t.name and el.namespace is NS_HTML
1859                                         break
1860                         return
1861                 if t.type is TYPE_END_TAG and h_tags[t.name]?
1862                         h_in_scope = false
1863                         for el in open_els
1864                                 if h_tags[el.name] is el.namespace
1865                                         h_in_scope = true
1866                                         break
1867                                 if standard_scopers[el.name] is el.namespace
1868                                         break
1869                         unless h_in_scope
1870                                 parse_error()
1871                                 return
1872                         generate_implied_end_tags()
1873                         if open_els[0].name isnt t.name or open_els[0].namespace isnt NS_HTML
1874                                 parse_error()
1875                         loop
1876                                 el = open_els.shift()
1877                                 if h_tags[el.name] is el.namespace
1878                                         break
1879                         return
1880                 # deep breath!
1881                 if t.type is TYPE_START_TAG and t.name is 'a'
1882                         # If the list of active formatting elements contains an a element
1883                         # between the end of the list and the last marker on the list (or
1884                         # the start of the list if there is no marker on the list), then
1885                         # this is a parse error; run the adoption agency algorithm for the
1886                         # tag name "a", then remove that element from the list of active
1887                         # formatting elements and the stack of open elements if the
1888                         # adoption agency algorithm didn't already remove it (it might not
1889                         # have if the element is not in table scope).
1890                         found = false
1891                         for el in afe
1892                                 if el.type is TYPE_AFE_MARKER
1893                                         break
1894                                 if el.name is 'a' and el.namespace is NS_HTML
1895                                         found = el
1896                         if found?
1897                                 parse_error()
1898                                 adoption_agency 'a'
1899                                 for el, i in afe
1900                                         if el is found
1901                                                 afe.splice i, 1
1902                                 for el, i in open_els
1903                                         if el is found
1904                                                 open_els.splice i, 1
1905                         reconstruct_afe()
1906                         el = insert_html_element t
1907                         afe_push el
1908                         return
1909                 if t.type is TYPE_START_TAG and (t.name is 'b' or t.name is 'big' or t.name is 'code' or t.name is 'em' or t.name is 'font' or t.name is 'i' or t.name is 's' or t.name is 'small' or t.name is 'strike' or t.name is 'strong' or t.name is 'tt' or t.name is 'u')
1910                         reconstruct_afe()
1911                         el = insert_html_element t
1912                         afe_push el
1913                         return
1914                 if t.type is TYPE_START_TAG and t.name is 'nobr'
1915                         reconstruct_afe()
1916                         el = insert_html_element t
1917                         afe_push el
1918                         return
1919                 if t.type is TYPE_END_TAG and (t.name is 'a' or t.name is 'b' or t.name is 'big' or t.name is 'code' or t.name is 'em' or t.name is 'font' or t.name is 'i' or t.name is 'nobr' or t.name is 's' or t.name is 'small' or t.name is 'strike' or t.name is 'strong' or t.name is 'tt' or t.name is 'u')
1920                         adoption_agency t.name
1921                         return
1922                 if t.type is TYPE_START_TAG and (t.name is 'applet' or t.name is 'marquee' or t.name is 'object')
1923                         reconstruct_afe()
1924                         insert_html_element t
1925                         afe_push_marker()
1926                         flag_frameset_ok = false
1927                         return
1928                 if t.type is TYPE_END_TAG and (t.name is 'applet' or t.name is 'marquee' or t.name is 'object')
1929                         unless is_in_scope t.name, NS_HTML
1930                                 parse_error()
1931                                 return
1932                         generate_implied_end_tags()
1933                         if open_els[0].name isnt t.name or open_els[0].namespace isnt NS_HTML
1934                                 parse_error()
1935                         loop
1936                                 el = open_els.shift()
1937                                 if el.name is t.name and el.namespace is NS_HTML
1938                                         break
1939                         clear_afe_to_marker()
1940                         return
1941                 if t.type is TYPE_START_TAG and t.name is 'table'
1942                         close_p_if_in_button_scope() # fixfull quirksmode thing
1943                         insert_html_element t
1944                         flag_frameset_ok = false
1945                         ins_mode = ins_mode_in_table
1946                         return
1947                 if t.type is TYPE_END_TAG and t.name is 'br'
1948                         parse_error()
1949                         t.type is TYPE_START_TAG
1950                         # fall through
1951                 if t.type is TYPE_START_TAG and (t.name is 'area' or t.name is 'br' or t.name is 'embed' or t.name is 'img' or t.name is 'keygen' or t.name is 'wbr')
1952                         reconstruct_afe()
1953                         insert_html_element t
1954                         open_els.shift()
1955                         t.acknowledge_self_closing()
1956                         flag_frameset_ok = false
1957                         return
1958                 if t.type is TYPE_START_TAG and t.name is 'input'
1959                         reconstruct_afe()
1960                         insert_html_element t
1961                         open_els.shift()
1962                         t.acknowledge_self_closing()
1963                         unless is_input_hidden_tok t
1964                                 flag_frameset_ok = false
1965                         return
1966                 if t.type is TYPE_START_TAG and (t.name is 'param' or t.name is 'source' or t.name is 'track')
1967                         insert_html_element t
1968                         open_els.shift()
1969                         t.acknowledge_self_closing()
1970                         return
1971                 if t.type is TYPE_START_TAG and t.name is 'hr'
1972                         close_p_if_in_button_scope()
1973                         insert_html_element t
1974                         open_els.shift()
1975                         t.acknowledge_self_closing()
1976                         flag_frameset_ok = false
1977                         return
1978                 if t.type is TYPE_START_TAG and t.name is 'image'
1979                         parse_error()
1980                         t.name = 'img'
1981                         process_token t
1982                         return
1983                 if t.type is TYPE_START_TAG and t.name is 'isindex'
1984                         parse_error()
1985                         if template_tag_is_open() is false and form_element_pointer isnt null
1986                                 return
1987                         t.acknowledge_self_closing()
1988                         flag_frameset_ok = false
1989                         close_p_if_in_button_scope()
1990                         el = insert_html_element new_open_tag 'form'
1991                         unless template_tag_is_open()
1992                                 form_element_pointer = el
1993                         for a in t.attrs_a
1994                                 if a[0] is 'action'
1995                                         el.attrs['action'] = a[1]
1996                                         break
1997                         insert_html_element new_open_tag 'hr'
1998                         open_els.shift()
1999                         reconstruct_afe()
2000                         insert_html_element new_open_tag 'label'
2001                         # note: this is a little out-of-spec-order so we only have to scan t.attrs_a once
2002                         input_el = new_open_tag 'input'
2003                         prompt = null
2004                         for a in t.attrs_a
2005                                 if a[0] is 'prompt'
2006                                         prompt = a[1]
2007                                 if a[0] isnt 'name' and a[0] isnt 'action' and a[0] isnt 'prompt'
2008                                         input_el.attrs_a.push [a[0], a[1]]
2009                         input_el.attrs_a.push ['name', 'isindex']
2010                         # fixfull this next bit is in english... internationalize?
2011                         prompt ?= "This is a searchable index. Enter search keywords: "
2012                         insert_character new_character_token prompt # fixfull split
2013                         # TODO submit typo "balue" in spec
2014                         insert_html_element input_el
2015                         open_els.shift()
2016                         # insert_character '' # you can put chars here if promt attr missing
2017                         open_els.shift()
2018                         insert_html_element new_open_tag 'hr'
2019                         open_els.shift()
2020                         open_els.shift()
2021                         unless template_tag_is_open()
2022                                 form_element_pointer = null
2023                         return
2024                 if t.type is TYPE_START_TAG and t.name is 'textarea'
2025                         insert_html_element t
2026                         if txt.charAt(cur) is "\u000a" # FIXME check for crlf?
2027                                 cur += 1
2028                         tok_state = tok_state_rcdata
2029                         original_ins_mode = ins_mode
2030                         flag_frameset_ok = false
2031                         ins_mode = ins_mode_text
2032                         return
2033                 if t.type is TYPE_START_TAG and t.name is 'xmp'
2034                         close_p_if_in_button_scope()
2035                         reconstruct_afe()
2036                         flag_frameset_ok = false
2037                         parse_generic_raw_text t
2038                         return
2039                 if t.type is TYPE_START_TAG and t.name is 'iframe'
2040                         flag_frameset_ok = false
2041                         parse_generic_raw_text t
2042                         return
2043                 if t.type is TYPE_START_TAG and (t.name is 'noembed' or (t.name is 'noscript' and flag_scripting))
2044                         parse_generic_raw_text t
2045                         return
2046                 if t.type is TYPE_START_TAG and t.name is 'select'
2047                         reconstruct_afe()
2048                         insert_html_element t
2049                         flag_frameset_ok = false
2050                         if ins_mode is ins_mode_in_table or ins_mode is ins_mode_in_caption or ins_mode is ins_mode_in_table_body or ins_mode is ins_mode_in_row or ins_mode is ins_mode_in_cell
2051                                 ins_mode = ins_mode_in_select_in_table
2052                         else
2053                                 ins_mode = ins_mode_in_select
2054                         return
2055                 if t.type is TYPE_START_TAG and (t.name is 'optgroup' or t.name is 'option')
2056                         if open_els[0].name is 'option' and open_els[0].namespace is NS_HTML
2057                                 open_els.shift()
2058                         reconstruct_afe()
2059                         insert_html_element t
2060                         return
2061                 if t.type is TYPE_START_TAG and (t.name is 'rb' or t.name is 'rp' or t.name is 'rtc')
2062                         if is_in_scope 'ruby', NS_HTML
2063                                 generate_implied_end_tags()
2064                                 unless open_els[0].name is 'ruby' and open_els[0].namespace is NS_HTML
2065                                         parse_error()
2066                         insert_html_element t
2067                         return
2068                 if t.type is TYPE_START_TAG and t.name is 'rt'
2069                         if is_in_scope 'ruby', NS_HTML
2070                                 generate_implied_end_tags 'rtc' # arg is exception
2071                                 unless (open_els[0].name is 'ruby' or open_els[0].name is 'rtc') and open_els[0].namespace is NS_HTML
2072                                         parse_error()
2073                         insert_html_element t
2074                         return
2075                 if t.type is TYPE_START_TAG and t.name is 'math'
2076                         reconstruct_afe()
2077                         adjust_mathml_attributes t
2078                         adjust_foreign_attributes t
2079                         insert_foreign_element t, NS_MATHML
2080                         if t.flag 'self-closing'
2081                                 open_els.shift()
2082                                 t.acknowledge_self_closing()
2083                         return
2084                 if t.type is TYPE_START_TAG and t.name is 'svg'
2085                         reconstruct_afe()
2086                         adjust_svg_attributes t
2087                         adjust_foreign_attributes t
2088                         insert_foreign_element t, NS_SVG
2089                         if t.flag 'self-closing'
2090                                 open_els.shift()
2091                                 t.acknowledge_self_closing()
2092                         return
2093                 if t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'frame' or t.name is 'head' or t.name is 'tbody' or t.name is 'td' or t.name is 'tfoot' or t.name is 'th' or t.name is 'thead' or t.name is 'tr')
2094                         parse_error()
2095                         return
2096                 if t.type is TYPE_START_TAG # any other start tag
2097                         reconstruct_afe()
2098                         insert_html_element t
2099                         return
2100                 if t.type is TYPE_END_TAG # any other end tag
2101                         in_body_any_other_end_tag t.name
2102                         return
2103                 return
2104
2105         # 8.2.5.4.8 http://www.w3.org/TR/html5/syntax.html#parsing-main-incdata
2106         ins_mode_text = (t) ->
2107                 if t.type is TYPE_TEXT
2108                         insert_character t
2109                         return
2110                 if t.type is TYPE_EOF
2111                         parse_error()
2112                         if open_els[0].name is 'script' and open_els[0].namespace is NS_HTML
2113                                 open_els[0].flag 'already started', true
2114                         open_els.shift()
2115                         ins_mode = original_ins_mode
2116                         process_token t
2117                         return
2118                 if t.type is TYPE_END_TAG and t.name is 'script'
2119                         open_els.shift()
2120                         ins_mode = original_ins_mode
2121                         # fixfull the spec seems to assume that I'm going to run the script
2122                         # http://www.w3.org/TR/html5/syntax.html#scriptEndTag
2123                         return
2124                 if t.type is TYPE_END_TAG
2125                         open_els.shift()
2126                         ins_mode = original_ins_mode
2127                         return
2128                 console.log 'warning: end of ins_mode_text reached'
2129
2130         # the functions below implement the tokenizer stats described here:
2131         # http://www.w3.org/TR/html5/syntax.html#tokenization
2132
2133         # 8.2.5.4.9 http://www.w3.org/TR/html5/syntax.html#parsing-main-intable
2134         ins_mode_in_table_else = (t) ->
2135                 parse_error()
2136                 flag_foster_parenting = true
2137                 ins_mode_in_body t
2138                 flag_foster_parenting = false
2139                 return
2140         ins_mode_in_table = (t) ->
2141                 switch t.type
2142                         when TYPE_TEXT
2143                                 if t.name is 'table' or t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead' or t.name is 'tr'
2144                                         original_ins_mode = ins_mode
2145                                         ins_mode = ins_mode_in_table_text
2146                                         process_token t
2147                                 else
2148                                         ins_mode_in_table_else t
2149                         when TYPE_COMMENT
2150                                 insert_comment t
2151                         when TYPE_DOCTYPE
2152                                 parse_error()
2153                         when TYPE_START_TAG
2154                                 switch t.name
2155                                         when 'caption'
2156                                                 clear_stack_to_table_context()
2157                                                 afe_push_marker()
2158                                                 insert_html_element t
2159                                                 ins_mode = ins_mode_in_caption
2160                                         when 'colgroup'
2161                                                 clear_stack_to_table_context()
2162                                                 insert_html_element t
2163                                                 ins_mode = ins_mode_in_column_group
2164                                         when 'col'
2165                                                 clear_stack_to_table_context()
2166                                                 insert_html_element new_open_tag 'colgroup'
2167                                                 ins_mode = ins_mode_in_column_group
2168                                                 process_token t
2169                                         when 'tbody', 'tfoot', 'thead'
2170                                                 clear_stack_to_table_context()
2171                                                 insert_html_element t
2172                                                 ins_mode = ins_mode_in_table_body
2173                                         when 'td', 'th', 'tr'
2174                                                 clear_stack_to_table_context()
2175                                                 insert_html_element new_open_tag 'tbody'
2176                                                 ins_mode = ins_mode_in_table_body
2177                                                 process_token t
2178                                         when 'table'
2179                                                 parse_error()
2180                                                 if is_in_table_scope 'table', NS_HTML
2181                                                         loop
2182                                                                 el = open_els.shift()
2183                                                                 if el.name is 'table' and el.namespace is NS_HTML
2184                                                                         break
2185                                                         reset_ins_mode()
2186                                                         process_token t
2187                                         when 'style', 'script', 'template'
2188                                                 ins_mode_in_head t
2189                                         when 'input'
2190                                                 unless is_input_hidden_tok t
2191                                                         ins_mode_in_table_else t
2192                                                 else
2193                                                         parse_error()
2194                                                         el = insert_html_element t
2195                                                         open_els.shift()
2196                                                         t.acknowledge_self_closing()
2197                                         when 'form'
2198                                                 parse_error()
2199                                                 if form_element_pointer?
2200                                                         return
2201                                                 if template_tag_is_open()
2202                                                         return
2203                                                 form_element_pointer = insert_html_element t
2204                                                 open_els.shift()
2205                                         else
2206                                                 ins_mode_in_table_else t
2207                         when TYPE_END_TAG
2208                                 switch t.name
2209                                         when 'table'
2210                                                 if is_in_table_scope 'table', NS_HTML
2211                                                         loop
2212                                                                 el = open_els.shift()
2213                                                                 if el.name is 'table' and el.namespace is NS_HTML
2214                                                                         break
2215                                                         reset_ins_mode()
2216                                                 else
2217                                                         parse_error()
2218                                         when 'body', 'caption', 'col', 'colgroup', 'html', 'tbody', 'td', 'tfoot', 'th', 'thead', 'tr'
2219                                                 parse_error()
2220                                         when 'template'
2221                                                 ins_mode_in_head t
2222                                         else
2223                                                 ins_mode_in_table_else t
2224                         when TYPE_EOF
2225                                 ins_mode_in_body t
2226                         else
2227                                 ins_mode_in_table_else t
2228
2229
2230         # 8.2.5.4.10 http://www.w3.org/TR/html5/syntax.html#parsing-main-intabletext
2231         ins_mode_in_table_text = (t) ->
2232                 if t.type is TYPE_TEXT and t.text is "\u0000"
2233                         # huh? I thought the tokenizer didn't emit these
2234                         parse_error()
2235                         return
2236                 if t.type is TYPE_TEXT
2237                         pending_table_character_tokens.push t
2238                         return
2239                 # Anything else
2240                 all_space = true
2241                 for old in pending_table_character_tokens
2242                         unless is_space_tok old
2243                                 all_space = false
2244                                 break
2245                 if all_space
2246                         for old in pending_table_character_tokens
2247                                 insert_character old
2248                 else
2249                         for old in pending_table_character_tokens
2250                                 ins_mode_table_else old
2251                 pending_table_character_tokens = [] # FIXME test (spec doesn't say this)
2252                 ins_mode = original_ins_mode
2253                 process_token t
2254
2255         # 8.2.5.4.11 http://www.w3.org/TR/html5/syntax.html#parsing-main-incaption
2256         ins_mode_in_caption = (t) ->
2257                 if t.type is TYPE_END_TAG and t.name is 'caption'
2258                         if is_in_table_scope 'caption', NS_HTML
2259                                 generate_implied_end_tags()
2260                                 if open_els[0].name isnt 'caption'
2261                                         parse_error()
2262                                 loop
2263                                         el = open_els.shift()
2264                                         if el.name is 'caption' and el.namespace is NS_HTML
2265                                                 break
2266                                 clear_afe_to_marker()
2267                                 ins_mode = ins_mode_in_table
2268                         else
2269                                 parse_error()
2270                                 # fragment case
2271                         return
2272                 if (t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'tbody' or t.name is 'td' or t.name is 'tfoot' or t.name is 'th' or t.name is 'thead' or t.name is 'tr')) or t.type is TYPE_END_TAG and t.name is 'table'
2273                         parse_error()
2274                         if is_in_table_scope 'caption', NS_HTML
2275                                 loop
2276                                         el = open_els.shift()
2277                                         if el.name is 'caption' and el.namespace is NS_HTML
2278                                                 break
2279                                 clear_afe_to_marker()
2280                                 ins_mode = ins_mode_in_table
2281                                 process_token t
2282                         # else fragment case
2283                         return
2284                 if t.type is TYPE_END_TAG and (t.name is 'body' or t.name is 'col' or t.name is 'colgroup' or t.name is 'html' or t.name is 'tbody' or t.name is 'td' or t.name is 'tfoot' or t.name is 'th' or t.name is 'thead' or t.name is 'tr')
2285                         parse_error()
2286                         return
2287                 # Anything else
2288                 ins_mode_in_body t
2289
2290         # 8.2.5.4.12 http://www.w3.org/TR/html5/syntax.html#parsing-main-incolgroup
2291         ins_mode_in_column_group = (t) ->
2292                 if is_space_tok t
2293                         insert_character t
2294                         return
2295                 if t.type is TYPE_COMMENT
2296                         insert_comment t
2297                         return
2298                 if t.type is TYPE_DOCTYPE
2299                         parse_error()
2300                         return
2301                 if t.type is TYPE_START_TAG and t.name is 'html'
2302                         ins_mode_in_body t
2303                         return
2304                 if t.type is TYPE_START_TAG and t.name is 'col'
2305                         el = insert_html_element t
2306                         open_els.shift()
2307                         t.acknowledge_self_closing()
2308                         return
2309                 if t.type is TYPE_END_TAG and t.name is 'colgroup'
2310                         if open_els[0].name is 'colgroup' and open_els.namespace is NS_HTML
2311                                 open_els.shift()
2312                                 ins_mode = ins_mode_in_table
2313                         else
2314                                 parse_error()
2315                         return
2316                 if t.type is TYPE_END_TAG and t.name is 'col'
2317                         parse_error()
2318                         return
2319                 if (t.type is TYPE_START_TAG or t.type is TYPE_END_TAG) and t.name is 'template'
2320                         ins_mode_in_head t
2321                         return
2322                 if t.type is TYPE_EOF
2323                         ins_mode_in_body t
2324                         return
2325                 # Anything else
2326                 if open_els[0].name isnt 'colgroup'
2327                         parse_error()
2328                         return
2329                 open_els.shift()
2330                 ins_mode = ins_mode_in_table
2331                 process_token t
2332                 return
2333
2334         # 8.2.5.4.13 http://www.w3.org/TR/html5/syntax.html#parsing-main-intbody
2335         ins_mode_in_table_body = (t) ->
2336                 if t.type is TYPE_START_TAG and t.name is 'tr'
2337                         clear_stack_to_table_body_context()
2338                         insert_html_element t
2339                         ins_mode = ins_mode_in_row
2340                         return
2341                 if t.type is TYPE_START_TAG and (t.name is 'th' or t.name is 'td')
2342                         parse_error()
2343                         clear_stack_to_table_body_context()
2344                         insert_html_element new_open_tag 'tr'
2345                         ins_mode = ins_mode_in_row
2346                         process_token t
2347                         return
2348                 if t.type is TYPE_END_TAG and (t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead')
2349                         unless is_in_table_scope t.name, NS_HTML
2350                                 parse_error()
2351                                 return
2352                         clear_stack_to_table_body_context()
2353                         open_els.shift()
2354                         ins_mode = ins_mode_in_table
2355                         return
2356                 if (t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead')) or (t.type is TYPE_END_TAG and t.name is 'table')
2357                         has = false
2358                         for el in open_els
2359                                 if el.namespace is NS_HTML and (el.name is 'tbody' or el.name is 'tfoot' or el.name is 'thead')
2360                                         has = true
2361                                         break
2362                                 if table_scopers[el.name] is el.namespace
2363                                         break
2364                         if !has
2365                                 parse_error()
2366                                 return
2367                         clear_stack_to_table_body_context()
2368                         open_els.shift()
2369                         ins_mode = ins_mode_in_table
2370                         process_token t
2371                         return
2372                 if t.type is TYPE_END_TAG and (t.name is 'body' or t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'html' or t.name is 'td' or t.name is 'th' or t.name is 'tr')
2373                         parse_error()
2374                         return
2375                 # Anything else
2376                 ins_mode_in_table t
2377
2378         # 8.2.5.4.14 http://www.w3.org/TR/html5/syntax.html#parsing-main-intr
2379         ins_mode_in_row = (t) ->
2380                 if t.type is TYPE_START_TAG and (t.name is 'th' or t.name is 'td')
2381                         clear_stack_to_table_row_context()
2382                         insert_html_element t
2383                         ins_mode = ins_mode_in_cell
2384                         afe_push_marker()
2385                         return
2386                 if t.type is TYPE_END_TAG and t.name is 'tr'
2387                         if is_in_table_scope 'tr', NS_HTML
2388                                 clear_stack_to_table_row_context()
2389                                 open_els.shift()
2390                                 ins_mode = ins_mode_in_table_body
2391                         else
2392                                 parse_error()
2393                         return
2394                 if (t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead' or t.name is 'tr')) or t.type is TYPE_END_TAG and t.name is 'table'
2395                         if is_in_table_scope 'tr', NS_HTML
2396                                 clear_stack_to_table_row_context()
2397                                 open_els.shift()
2398                                 ins_mode = ins_mode_in_table_body
2399                                 process_token t
2400                         else
2401                                 parse_error()
2402                         return
2403                 if t.type is TYPE_END_TAG and (t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead')
2404                         if is_in_table_scope t.name, NS_HTML
2405                                 if is_in_table_scope 'tr', NS_HTML
2406                                         clear_stack_to_table_row_context()
2407                                         open_els.shift()
2408                                         ins_mode = ins_mode_in_table_body
2409                                         process_token t
2410                         else
2411                                 parse_error()
2412                         return
2413                 if t.type is TYPE_END_TAG and (t.name is 'body' or t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'html' or t.name is 'td' or t.name is 'th')
2414                         parse_error()
2415                         return
2416                 # Anything else
2417                 ins_mode_in_table t
2418
2419         # http://www.w3.org/TR/html5/syntax.html#close-the-cell
2420         close_the_cell = ->
2421                 generate_implied_end_tags()
2422                 unless (open_els[0].name is 'td' or open_els[0] is 'th') and open_els[0].namespace is NS_HTML
2423                         parse_error()
2424                 loop
2425                         el = open_els.shift()
2426                         if el.namespace is NS_HTML and (el.name is 'td' or el.name is 'th')
2427                                 break
2428                 clear_afe_to_marker()
2429                 ins_mode = ins_mode_in_row
2430
2431         # 8.2.5.4.15 http://www.w3.org/TR/html5/syntax.html#parsing-main-intd
2432         ins_mode_in_cell = (t) ->
2433                 if t.type is TYPE_END_TAG and (t.name is 'td' or t.name is 'th')
2434                         if is_in_table_scope t.name, NS_HTML
2435                                 generate_implied_end_tags()
2436                                 unless (open_els[0].name is t.name) and open_els[0].namespace is NS_HTML
2437                                         parse_error()
2438                                 loop
2439                                         el = open_els.shift()
2440                                         if el.name is t.name and el.namespace is NS_HTML
2441                                                 break
2442                                 clear_afe_to_marker()
2443                                 ins_mode = ins_mode_in_row
2444                         else
2445                                 parse_error()
2446                         return
2447                 if t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'tbody' or t.name is 'td' or t.name is 'tfoot' or t.name is 'th' or t.name is 'thead' or t.name is 'tr')
2448                         has = false
2449                         for el in open_els
2450                                 if el.namespace is NS_HTML and (el.name is 'td' or el.name is 'th')
2451                                         has = true
2452                                         break
2453                                 if table_scopers[el.name] is el.namespace
2454                                         break
2455                         if !has
2456                                 parse_error()
2457                                 return
2458                         close_the_cell()
2459                         process_token t
2460                         return
2461                 if t.type is TYPE_END_TAG and (t.name is 'body' or t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'html')
2462                         parse_error()
2463                         return
2464                 if t.type is TYPE_END_TAG and (t.name is 'table' or t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead' or t.name is 'tr')
2465                         if is_in_table_scope t.name, NS_HTML
2466                                 close_the_cell()
2467                                 process_token t
2468                         else
2469                                 parse_error()
2470                         return
2471                 # Anything Else
2472                 ins_mode_in_body t
2473
2474         # 8.2.5.4.16 http://www.w3.org/TR/html5/syntax.html#parsing-main-inselect
2475         ins_mode_in_select = (t) ->
2476                 if t.type is TYPE_TEXT and t.text is "\u0000"
2477                         parse_error()
2478                         return
2479                 if t.type is TYPE_TEXT
2480                         insert_character t
2481                         return
2482                 if t.type is TYPE_COMMENT
2483                         insert_comment t
2484                         return
2485                 if t.type is TYPE_DOCTYPE
2486                         parse_error()
2487                         return
2488                 if t.type is TYPE_START_TAG and t.name is 'html'
2489                         ins_mode_in_body t
2490                         return
2491                 if t.type is TYPE_START_TAG and t.name is 'option'
2492                         if open_els[0].name is 'option' and open_els[0].namespace is NS_HTML
2493                                 open_els.shift()
2494                         insert_html_element t
2495                         return
2496                 if t.type is TYPE_START_TAG and t.name is 'optgroup'
2497                         if open_els[0].name is 'option' and open_els[0].namespace is NS_HTML
2498                                 open_els.shift()
2499                         if open_els[0].name is 'optgroup' and open_els[0].namespace is NS_HTML
2500                                 open_els.shift()
2501                         insert_html_element t
2502                         return
2503                 if t.type is TYPE_END_TAG and t.name is 'optgroup'
2504                         if open_els[0].name is 'option' and open_els[0].namespace in NS_HTML
2505                                 if open_els[1].name is 'optgroup' and open_els[0].namespace is NS_HTML
2506                                         open_els.shift()
2507                         if open_els[0].name is 'optgroup' and open_els[0].namespace is NS_HTML
2508                                 open_els.shift()
2509                         else
2510                                 parse_error()
2511                         return
2512                 if t.type is TYPE_END_TAG and t.name is 'option'
2513                         if open_els[0].name is 'option' and open_els[0].namespace is NS_HTML
2514                                 open_els.shift()
2515                         else
2516                                 parse_error()
2517                         return
2518                 if t.type is TYPE_END_TAG and t.name is 'select'
2519                         if is_in_select_scope 'select', NS_HTML
2520                                 loop
2521                                         el = open_els.shift()
2522                                         if el.name is 'select' and el.namespace is NS_HTML
2523                                                 break
2524                                 reset_ins_mode()
2525                         else
2526                                 parse_error()
2527                         return
2528                 if t.type is TYPE_START_TAG and t.name is 'select'
2529                         parse_error()
2530                         loop
2531                                 el = open_els.shift()
2532                                 if el.name is 'select' and el.namespace is NS_HTML
2533                                         break
2534                         reset_ins_mode()
2535                         # spec says that this is the same as </select> but it doesn't say
2536                         # to check scope first
2537                         return
2538                 if t.type is TYPE_START_TAG and (t.name is 'input' or t.name is 'keygen' or t.name is 'textarea')
2539                         parse_error()
2540                         if is_in_select_scope 'select', NS_HTML
2541                                 return
2542                         loop
2543                                 el = open_els.shift()
2544                                 if el.name is 'select' and el.namespace is NS_HTML
2545                                         break
2546                         reset_ins_mode()
2547                         process_token t
2548                         return
2549                 if t.type is TYPE_START_TAG and (t.name is 'script' or t.name is 'template')
2550                         ins_mode_in_head t
2551                         return
2552                 if t.type is TYPE_EOF
2553                         ins_mode_in_body t
2554                         return
2555                 # Anything else
2556                 parse_error()
2557                 return
2558
2559         # 8.2.5.4.17 http://www.w3.org/TR/html5/syntax.html#parsing-main-inselectintable
2560         ins_mode_in_select_in_table = (t) ->
2561                 if t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'table' or t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead' or t.name is 'tr' or t.name is 'td' or t.name is 'th')
2562                         parse_error()
2563                         loop
2564                                 el = open_els.shift()
2565                                 if el.name is 'select' and el.namespace is NS_HTML
2566                                         break
2567                         reset_ins_mode()
2568                         process_token t
2569                         return
2570                 if t.type is TYPE_END_TAG and (t.name is 'caption' or t.name is 'table' or t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead' or t.name is 'tr' or t.name is 'td' or t.name is 'th')
2571                         parse_error()
2572                         unless is_in_table_scope t.name, NS_HTML
2573                                 return
2574                         loop
2575                                 el = open_els.shift()
2576                                 if el.name is 'select' and el.namespace is NS_HTML
2577                                         break
2578                         reset_ins_mode()
2579                         process_token t
2580                         return
2581                 # Anything else
2582                 ins_mode_in_select t
2583                 return
2584
2585         # 8.2.5.4.18 http://www.w3.org/TR/html5/syntax.html#parsing-main-intemplate
2586         ins_mode_in_template = (t) ->
2587                 if t.type is TYPE_TEXT or t.type is TYPE_COMMENT or t.type is TYPE_DOCTYPE
2588                         ins_mode_in_body t
2589                         return
2590                 if (t.type is TYPE_START_TAG and (t.name is 'base' or t.name is 'basefont' or t.name is 'bgsound' or t.name is 'link' or t.name is 'meta' or t.name is 'noframes' or t.name is 'script' or t.name is 'style' or t.name is 'template' or t.name is 'title')) or (t.type is TYPE_END_TAG and t.name is 'template')
2591                         ins_mode_in_head t
2592                         return
2593                 if t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'colgroup' or t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead')
2594                         template_ins_modes.shift()
2595                         template_ins_modes.unshift ins_mode_in_table
2596                         ins_mode = ins_mode_in_table
2597                         process_token t
2598                         return
2599                 if t.type is TYPE_START_TAG and t.name is 'col'
2600                         template_ins_modes.shift()
2601                         template_ins_modes.unshift ins_mode_in_column_group
2602                         ins_mode = ins_mode_in_column_group
2603                         process_token t
2604                         return
2605                 if t.type is TYPE_START_TAG and t.name is 'tr'
2606                         template_ins_modes.shift()
2607                         template_ins_modes.unshift ins_mode_in_table_body
2608                         ins_mode = ins_mode_in_table_body
2609                         process_token t
2610                         return
2611                 if t.type is TYPE_START_TAG and (t.name is 'td' or t.name is 'th')
2612                         template_ins_modes.shift()
2613                         template_ins_modes.unshift ins_mode_in_row
2614                         ins_mode = ins_mode_in_row
2615                         process_token t
2616                         return
2617                 if t.type is TYPE_START_TAG
2618                         template_ins_modes.shift()
2619                         template_ins_modes.unshift ins_mode_in_body
2620                         ins_mode = ins_mode_in_body
2621                         process_token t
2622                         return
2623                 if t.type is TYPE_END_TAG
2624                         parse_error()
2625                         return
2626                 if t.type is TYPE_EOF
2627                         unless template_tag_is_open()
2628                                 stop_parsing()
2629                                 return
2630                         parse_error()
2631                         loop
2632                                 el = open_els.shift()
2633                                 if el.name is 'template' and el.namespace is NS_HTML
2634                                         break
2635                         clear_afe_to_marker()
2636                         template_ins_modes.shift()
2637                         reset_ins_mode()
2638                         process_token t
2639
2640         # 8.2.5.4.19 http://www.w3.org/TR/html5/syntax.html#parsing-main-afterbody
2641         ins_mode_after_body = (t) ->
2642                 if is_space_tok t
2643                         ins_mode_in_body t
2644                         return
2645                 if t.type is TYPE_COMMENT
2646                         insert_comment t, [open_els[0], open_els[0].children.length]
2647                         return
2648                 if t.type is TYPE_DOCTYPE
2649                         parse_error()
2650                         return
2651                 if t.type is TYPE_START_TAG and t.name is 'html'
2652                         ins_mode_in_body t
2653                         return
2654                 if t.type is TYPE_END_TAG and t.name is 'html'
2655                         # fixfull fragment case
2656                         ins_mode = ins_mode_after_after_body
2657                         return
2658                 if t.type is TYPE_EOF
2659                         stop_parsing()
2660                         return
2661                 # Anything ELse
2662                 parse_error()
2663                 ins_mode = ins_mode_in_body
2664                 process_token t
2665
2666         # 8.2.5.4.20 http://www.w3.org/TR/html5/syntax.html#parsing-main-inframeset
2667         ins_mode_in_frameset = (t) ->
2668                 if is_space_tok t
2669                         insert_character t
2670                         return
2671                 if t.type is TYPE_COMMENT
2672                         insert_comment t
2673                         return
2674                 if t.type is TYPE_DOCTYPE
2675                         parse_error()
2676                         return
2677                 if t.type is TYPE_START_TAG and t.name is 'html'
2678                         ins_mode_in_body t
2679                         return
2680                 if t.type is TYPE_START_TAG and t.name is 'frameset'
2681                         insert_html_element t
2682                         return
2683                 if t.type is TYPE_END_TAG and t.name is 'frameset'
2684                         if open_els.length is 1
2685                                 parse_error()
2686                                 return # fragment case
2687                         open_els.shift()
2688                         if flag_fragment_parsing is false and open_els[0].name isnt 'frameset'
2689                                 ins_mode = ins_mode_after_frameset
2690                         return
2691                 if t.type is TYPE_START_TAG and t.name is 'frame'
2692                         insert_html_element t
2693                         open_els.shift()
2694                         t.acknowledge_self_closing()
2695                         return
2696                 if t.type is TYPE_START_TAG and t.name is 'noframes'
2697                         ins_mode_in_head t
2698                         return
2699                 if t.type is TYPE_EOF
2700                         if open_els.length isnt 1
2701                                 parse_error()
2702                         stop_parsing()
2703                         return
2704                 # Anything else
2705                 parse_error()
2706                 return
2707
2708         # 8.2.5.4.21 http://www.w3.org/TR/html5/syntax.html#parsing-main-afterframeset
2709         ins_mode_after_frameset = (t) ->
2710                 if is_space_tok t
2711                         insert_character t
2712                         return
2713                 if t.type is TYPE_COMMENT
2714                         insert_comment t
2715                         return
2716                 if t.type is TYPE_DOCTYPE
2717                         parse_error()
2718                         return
2719                 if t.type is TYPE_START_TAG and t.name is 'html'
2720                         ins_mode_in_body t
2721                         return
2722                 if t.type is TYPE_END_TAG and t.name is 'html'
2723                         insert_mode = ins_mode_after_after_frameset
2724                         return
2725                 if t.type is TYPE_START_TAG and t.name is 'noframes'
2726                         ins_mode_in_head t
2727                         return
2728                 if t.type is TYPE_EOF
2729                         stop_parsing()
2730                         return
2731                 # Anything else
2732                 parse_error()
2733                 return
2734
2735         # 8.2.5.4.22 http://www.w3.org/TR/html5/syntax.html#the-after-after-body-insertion-mode
2736         ins_mode_after_after_body = (t) ->
2737                 if t.type is TYPE_COMMENT
2738                         insert_comment t, [doc, doc.children.length]
2739                         return
2740                 if t.type is TYPE_DOCTYPE or is_space_tok(t) or (t.type is TYPE_START_TAG and t.name is 'html')
2741                         ins_mode_in_body t
2742                         return
2743                 if t.type is TYPE_EOF
2744                         stop_parsing()
2745                         return
2746                 # Anything else
2747                 parse_error()
2748                 ins_mode = ins_mode_in_body
2749                 return
2750
2751         # 8.2.5.4.23 http://www.w3.org/TR/html5/syntax.html#the-after-after-frameset-insertion-mode
2752         ins_mode_after_after_frameset = (t) ->
2753                 if t.type is TYPE_COMMENT
2754                         insert_comment t, [doc, doc.children.length]
2755                         return
2756                 if t.type is TYPE_DOCTYPE or is_space_tok(t) or (t.type is TYPE_START_TAG and t.name is 'html')
2757                         ins_mode_in_body t
2758                         return
2759                 if t.type is TYPE_EOF
2760                         stop_parsing()
2761                         return
2762                 if t.type is TYPE_START_TAG and t.name is 'noframes'
2763                         ins_mode_in_head t
2764                         return
2765                 # Anything else
2766                 parse_error()
2767                 return
2768
2769         # 8.2.5.5 http://www.w3.org/TR/html5/syntax.html#parsing-main-inforeign
2770         has_color_face_or_size = (t) ->
2771                 for a in t.attrs_a
2772                         if a[0] is 'color' or a[0] is 'face' or a[0] is 'size'
2773                                 return true
2774                 return false
2775         in_foreign_content_end_script = ->
2776                 open_els.shift()
2777                 # fixfull
2778                 return
2779         in_foreign_content_other_start = (t) ->
2780                 acn = adjusted_current_node()
2781                 if acn.namespace is NS_MATHML
2782                         adjust_mathml_attributes t
2783                 if acn.namespace is NS_SVG and svg_name_fixes[t.name]?
2784                         t.name = svg_name_fixes[t.name]
2785                 if acn.namespace is NS_SVG
2786                         adjust_svg_attributes t
2787                 adjust_foreign_attributes t
2788                 insert_foreign_element t, acn.namespace
2789                 if t.flag 'self-closing'
2790                         if t.name is 'script'
2791                                 t.acknowledge_self_closing()
2792                                 in_foreign_content_end_script()
2793                         else
2794                                 open_els.shift()
2795                                 t.acknowledge_self_closing()
2796                 return
2797         in_foreign_content = (t) ->
2798                 if t.type is TYPE_TEXT and t.text is "\u0000"
2799                         parse_error()
2800                         insert_character new_character_token "\ufffd"
2801                         return
2802                 if is_space_tok t
2803                         insert_character t
2804                         return
2805                 if t.type is TYPE_TEXT
2806                         flag_frameset_ok = false
2807                         insert_character t
2808                         return
2809                 if t.type is TYPE_COMMENT
2810                         insert_comment t
2811                         return
2812                 if t.type is TYPE_DOCTYPE
2813                         parse_error()
2814                         return
2815                 if t.type is TYPE_START_TAG and (t.name is 'b' or t.name is 'big' or t.name is 'blockquote' or t.name is 'body' or t.name is 'br' or t.name is 'center' or t.name is 'code' or t.name is 'dd' or t.name is 'div' or t.name is 'dl' or t.name is 'dt' or t.name is 'em' or t.name is 'embed' or t.name is 'h1' or t.name is 'h2' or t.name is 'h3' or t.name is 'h4' or t.name is 'h5' or t.name is 'h6' or t.name is 'head' or t.name is 'hr' or t.name is 'i' or t.name is 'img' or t.name is 'li' or t.name is 'listing' or t.name is 'main' or t.name is 'meta' or t.name is 'nobr' or t.name is 'ol' or t.name is 'p' or t.name is 'pre' or t.name is 'ruby' or t.name is 's' or t.name is 'small' or t.name is 'span' or t.name is 'strong' or t.name is 'strike' or t.name is 'sub' or t.name is 'sup' or t.name is 'table' or t.name is 'tt' or t.name is 'u' or t.name is 'ul' or t.name is 'var' or (t.name is 'font' and has_color_face_or_size(t)))
2816                         parse_error()
2817                         if flag_fragment_parsing
2818                                 in_foreign_content_other_start t
2819                                 return
2820                         loop # is this safe?
2821                                 open_els.shift()
2822                                 cn = open_els[0]
2823                                 if is_mathml_text_integration_point(cn) or is_html_integration(cn) or cn.namespace is NS_HTML
2824                                         break
2825                         process_token t
2826                         return
2827                 if t.type is TYPE_START_TAG
2828                         in_foreign_content_other_start t
2829                         return
2830                 if t.type is TYPE_END_TAG and t.name is 'script' and open_els[0].name is 'script' and open_els[0].namespace is NS_SVG
2831                         in_foreign_content_end_script()
2832                         return
2833                 if t.type is TYPE_END_TAG
2834                         if open_els[0].name.toLowerCase() isnt t.name
2835                                 parse_error()
2836                         for node in open_els
2837                                 if node is open_els[open_els.length - 1]
2838                                         return
2839                                 if node.name.toLowerCase() is t.name
2840                                         loop
2841                                                 el = open_els.shift()
2842                                                 if el is node
2843                                                         return
2844                                 if node.namespace is NS_HTML
2845                                         break
2846                         ins_mode t # explicitly call HTML insertion mode
2847
2848
2849         # 8.2.4.1 http://www.w3.org/TR/html5/syntax.html#data-state
2850         tok_state_data = ->
2851                 switch c = txt.charAt(cur++)
2852                         when '&'
2853                                 return new_text_node parse_character_reference()
2854                         when '<'
2855                                 tok_state = tok_state_tag_open
2856                         when "\u0000"
2857                                 parse_error()
2858                                 return new_text_node c
2859                         when '' # EOF
2860                                 return new_eof_token()
2861                         else
2862                                 return new_text_node c
2863                 return null
2864
2865         # 8.2.4.2 http://www.w3.org/TR/html5/syntax.html#character-reference-in-data-state
2866         # not needed: tok_state_character_reference_in_data = ->
2867         # just call parse_character_reference()
2868
2869         # 8.2.4.3 http://www.w3.org/TR/html5/syntax.html#rcdata-state
2870         tok_state_rcdata = ->
2871                 switch c = txt.charAt(cur++)
2872                         when '&'
2873                                 return new_text_node parse_character_reference()
2874                         when '<'
2875                                 tok_state = tok_state_rcdata_less_than_sign
2876                         when "\u0000"
2877                                 parse_error()
2878                                 return new_character_token "\ufffd"
2879                         when '' # EOF
2880                                 return new_eof_token()
2881                         else
2882                                 return new_character_token c
2883                 return null
2884
2885         # 8.2.4.4 http://www.w3.org/TR/html5/syntax.html#character-reference-in-rcdata-state
2886         # not needed: tok_state_character_reference_in_rcdata = ->
2887         # just call parse_character_reference()
2888
2889         # 8.2.4.5 http://www.w3.org/TR/html5/syntax.html#rawtext-state
2890         tok_state_rawtext = ->
2891                 switch c = txt.charAt(cur++)
2892                         when '<'
2893                                 tok_state = tok_state_rawtext_less_than_sign
2894                         when "\u0000"
2895                                 parse_error()
2896                                 return new_character_token "\ufffd"
2897                         when '' # EOF
2898                                 return new_eof_token()
2899                         else
2900                                 return new_character_token c
2901                 return null
2902
2903         # 8.2.4.6 http://www.w3.org/TR/html5/syntax.html#script-data-state
2904         tok_state_script_data = ->
2905                 switch c = txt.charAt(cur++)
2906                         when '<'
2907                                 tok_state = tok_state_script_data_less_than_sign
2908                         when "\u0000"
2909                                 parse_error()
2910                                 return new_character_token "\ufffd"
2911                         when '' # EOF
2912                                 return new_eof_token()
2913                         else
2914                                 return new_character_token c
2915                 return null
2916
2917         # 8.2.4.7 http://www.w3.org/TR/html5/syntax.html#plaintext-state
2918         tok_state_plaintext = ->
2919                 switch c = txt.charAt(cur++)
2920                         when "\u0000"
2921                                 parse_error()
2922                                 return new_character_token "\ufffd"
2923                         when '' # EOF
2924                                 return new_eof_token()
2925                         else
2926                                 return new_character_token c
2927                 return null
2928
2929
2930         # 8.2.4.8 http://www.w3.org/TR/html5/syntax.html#tag-open-state
2931         tok_state_tag_open = ->
2932                 switch c = txt.charAt(cur++)
2933                         when '!'
2934                                 tok_state = tok_state_markup_declaration_open
2935                         when '/'
2936                                 tok_state = tok_state_end_tag_open
2937                         when '?'
2938                                 parse_error()
2939                                 tok_cur_tag = new_comment_token '?'
2940                                 tok_state = tok_state_bogus_comment
2941                         else
2942                                 if is_lc_alpha(c)
2943                                         tok_cur_tag = new_open_tag c
2944                                         tok_state = tok_state_tag_name
2945                                 else if is_uc_alpha(c)
2946                                         tok_cur_tag = new_open_tag c.toLowerCase()
2947                                         tok_state = tok_state_tag_name
2948                                 else
2949                                         parse_error()
2950                                         tok_state = tok_state_data
2951                                         cur -= 1 # we didn't parse/handle the char after <
2952                                         return new_text_node '<'
2953                 return null
2954
2955         # 8.2.4.9 http://www.w3.org/TR/html5/syntax.html#end-tag-open-state
2956         tok_state_end_tag_open = ->
2957                 switch c = txt.charAt(cur++)
2958                         when '>'
2959                                 parse_error()
2960                                 tok_state = tok_state_data
2961                         when '' # EOF
2962                                 parse_error()
2963                                 tok_state = tok_state_data
2964                                 return new_text_node '</'
2965                         else
2966                                 if is_uc_alpha(c)
2967                                         tok_cur_tag = new_end_tag c.toLowerCase()
2968                                         tok_state = tok_state_tag_name
2969                                 else if is_lc_alpha(c)
2970                                         tok_cur_tag = new_end_tag c
2971                                         tok_state = tok_state_tag_name
2972                                 else
2973                                         parse_error()
2974                                         tok_cur_tag = new_comment_token '/'
2975                                         tok_state = tok_state_bogus_comment
2976                 return null
2977
2978         # 8.2.4.10 http://www.w3.org/TR/html5/syntax.html#tag-name-state
2979         tok_state_tag_name = ->
2980                 switch c = txt.charAt(cur++)
2981                         when "\t", "\n", "\u000c", ' '
2982                                 tok_state = tok_state_before_attribute_name
2983                         when '/'
2984                                 tok_state = tok_state_self_closing_start_tag
2985                         when '>'
2986                                 tok_state = tok_state_data
2987                                 tmp = tok_cur_tag
2988                                 tok_cur_tag = null
2989                                 return tmp
2990                         when "\u0000"
2991                                 parse_error()
2992                                 tok_cur_tag.name += "\ufffd"
2993                         when '' # EOF
2994                                 parse_error()
2995                                 tok_state = tok_state_data
2996                         else
2997                                 if is_uc_alpha(c)
2998                                         tok_cur_tag.name += c.toLowerCase()
2999                                 else
3000                                         tok_cur_tag.name += c
3001                 return null
3002
3003         # 8.2.4.11 http://www.w3.org/TR/html5/syntax.html#rcdata-less-than-sign-state
3004         tok_state_rcdata_less_than_sign = ->
3005                 c = txt.charAt(cur++)
3006                 if c is '/'
3007                         temporary_buffer = ''
3008                         tok_state = tok_state_rcdata_end_tag_open
3009                         return null
3010                 # Anything else
3011                 tok_state = tok_state_rcdata
3012                 cur -= 1 # reconsume the input character
3013                 return new_character_token '<'
3014
3015         # 8.2.4.12 http://www.w3.org/TR/html5/syntax.html#rcdata-end-tag-open-state
3016         tok_state_rcdata_end_tag_open = ->
3017                 c = txt.charAt(cur++)
3018                 if is_uc_alpha(c)
3019                         tok_cur_tag = new_end_tag c.toLowerCase()
3020                         temporary_buffer += c
3021                         tok_state = tok_state_rcdata_end_tag_name
3022                         return null
3023                 if is_lc_alpha(c)
3024                         tok_cur_tag = new_end_tag c
3025                         temporary_buffer += c
3026                         tok_state = tok_state_rcdata_end_tag_name
3027                         return null
3028                 # Anything else
3029                 tok_state = tok_state_rcdata
3030                 cur -= 1 # reconsume the input character
3031                 return new_character_token "</" # fixfull separate these
3032
3033         # http://www.w3.org/TR/html5/syntax.html#appropriate-end-tag-token
3034         is_appropriate_end_tag = (t) ->
3035                 # spec says to check against "the tag name of the last start tag to
3036                 # have been emitted from this tokenizer", but this is only called from
3037                 # the various "raw" states, so it's hopefully ok to assume that
3038                 # open_els[0].name will work instead TODO: verify this after the script
3039                 # data states are implemented
3040                 debug_log "#{t.type}, #{t.name} open_els: #{serialize_els open_els, true, true}"
3041                 return t.type is TYPE_END_TAG and t.name is open_els[0].name
3042
3043         # 8.2.4.13 http://www.w3.org/TR/html5/syntax.html#rcdata-end-tag-name-state
3044         tok_state_rcdata_end_tag_name = ->
3045                 c = txt.charAt(cur++)
3046                 if c is "\t" or c is "\n" or c is "\u000c" or c is ' '
3047                         if is_appropriate_end_tag tok_cur_tag
3048                                 tok_state = tok_state_before_attribute_name
3049                                 return
3050                         # else fall through to "Anything else"
3051                 if c is '/'
3052                         if is_appropriate_end_tag tok_cur_tag
3053                                 tok_state = tok_state_self_closing_start_tag # FIXME spec typo?
3054                                 return
3055                         # else fall through to "Anything else"
3056                 if c is '>'
3057                         if is_appropriate_end_tag tok_cur_tag
3058                                 tok_state = tok_state_data
3059                                 return tok_cur_tag
3060                         # else fall through to "Anything else"
3061                 if is_uc_alpha(c)
3062                         tok_cur_tag.name += c.toLowerCase()
3063                         temporary_buffer += c
3064                         return null
3065                 if is_lc_alpha(c)
3066                         tok_cur_tag.name += c
3067                         temporary_buffer += c
3068                         return null
3069                 # Anything else
3070                 tok_state = tok_state_rcdata
3071                 cur -= 1 # reconsume the input character
3072                 return new_character_token '</' + temporary_buffer # fixfull separate these
3073
3074         # 8.2.4.14 http://www.w3.org/TR/html5/syntax.html#rawtext-less-than-sign-state
3075         tok_state_rawtext_less_than_sign = ->
3076                 c = txt.charAt(cur++)
3077                 if c is '/'
3078                         temporary_buffer = ''
3079                         tok_state = tok_state_rawtext_end_tag_open
3080                         return null
3081                 # Anything else
3082                 tok_state = tok_state_rawtext
3083                 cur -= 1 # reconsume the input character
3084                 return new_character_token '<'
3085
3086         # 8.2.4.15 http://www.w3.org/TR/html5/syntax.html#rawtext-end-tag-open-state
3087         tok_state_rawtext_end_tag_open = ->
3088                 c = txt.charAt(cur++)
3089                 if is_uc_alpha(c)
3090                         tok_cur_tag = new_end_tag c.toLowerCase()
3091                         temporary_buffer += c
3092                         tok_state = tok_state_rawtext_end_tag_name
3093                         return null
3094                 if is_lc_alpha(c)
3095                         tok_cur_tag = new_end_tag c
3096                         temporary_buffer += c
3097                         tok_state = tok_state_rawtext_end_tag_name
3098                         return null
3099                 # Anything else
3100                 tok_state = tok_state_rawtext
3101                 cur -= 1 # reconsume the input character
3102                 return new_character_token "</" # fixfull separate these
3103
3104         # 8.2.4.16 http://www.w3.org/TR/html5/syntax.html#rawtext-end-tag-name-state
3105         tok_state_rawtext_end_tag_name = ->
3106                 c = txt.charAt(cur++)
3107                 if c is "\t" or c is "\n" or c is "\u000c" or c is ' '
3108                         if is_appropriate_end_tag tok_cur_tag
3109                                 tok_state = tok_state_before_attribute_name
3110                                 return
3111                         # else fall through to "Anything else"
3112                 if c is '/'
3113                         if is_appropriate_end_tag tok_cur_tag
3114                                 tok_state = tok_state_self_closing_start_tag
3115                                 return
3116                         # else fall through to "Anything else"
3117                 if c is '>'
3118                         if is_appropriate_end_tag tok_cur_tag
3119                                 tok_state = tok_state_data
3120                                 return tok_cur_tag
3121                         # else fall through to "Anything else"
3122                 if is_uc_alpha(c)
3123                         tok_cur_tag.name += c.toLowerCase()
3124                         temporary_buffer += c
3125                         return null
3126                 if is_lc_alpha(c)
3127                         tok_cur_tag.name += c
3128                         temporary_buffer += c
3129                         return null
3130                 # Anything else
3131                 tok_state = tok_state_rawtext
3132                 cur -= 1 # reconsume the input character
3133                 return new_character_token '</' + temporary_buffer # fixfull separate these
3134
3135         # 8.2.4.17 http://www.w3.org/TR/html5/syntax.html#script-data-less-than-sign-state
3136         tok_state_script_data_less_than_sign = ->
3137                 c = txt.charAt(cur++)
3138                 if c is '/'
3139                         temporary_buffer = ''
3140                         tok_state = tok_state_script_data_end_tag_open
3141                         return
3142                 if c is '!'
3143                         tok_state = tok_state_script_data_escape_start
3144                         return new_character_token '<!' # fixfull split
3145                 # Anything else
3146                 tok_state = tok_state_script_data
3147                 cur -= 1 # Reconsume
3148                 return new_character_token '<'
3149
3150         # 8.2.4.18 http://www.w3.org/TR/html5/syntax.html#script-data-end-tag-open-state
3151         tok_state_script_data_end_tag_open = ->
3152                 c = txt.charAt(cur++)
3153                 if is_uc_alpha(c)
3154                         tok_cur_tag = new_end_tag c.toLowerCase()
3155                         temporary_buffer += c
3156                         tok_state = tok_state_script_data_end_tag_name
3157                         return
3158                 if is_lc_alpha(c)
3159                         tok_cur_tag = new_end_tag c
3160                         temporary_buffer += c
3161                         tok_state = tok_state_script_data_end_tag_name
3162                         return
3163                 # Anything else
3164                 tok_state = tok_state_script_data
3165                 cur -= 1 # Reconsume
3166                 return new_character_token '</'
3167
3168         # 8.2.4.19 http://www.w3.org/TR/html5/syntax.html#script-data-end-tag-open-state
3169         tok_state_script_data_end_tag_name = ->
3170                 c = txt.charAt(cur++)
3171                 if c is "\t" or c is "\n" or c is "\u000c" or c is ' '
3172                         if is_appropriate_end_tag tok_cur_tag
3173                                 tok_state = tok_state_before_attribute_name
3174                                 return
3175                         # fall through
3176                 if c is '/'
3177                         if is_appropriate_end_tag tok_cur_tag
3178                                 tok_state = tok_state_self_closing_start_tag
3179                                 return
3180                         # fall through
3181                 if c is '>'
3182                         if is_appropriate_end_tag tok_cur_tag
3183                                 tok_state = tok_state_data
3184                                 return tok_cur_tag
3185                         # fall through
3186                 if is_uc_alpha(c)
3187                         tok_cur_tag.name += c.toLowerCase()
3188                         temporary_buffer += c
3189                         return
3190                 if is_lc_alpha(c)
3191                         tok_cur_tag.name += c
3192                         temporary_buffer += c
3193                         return
3194                 # Anything else
3195                 tok_state = tok_state_script_data
3196                 cur -= 1 # Reconsume
3197                 return new_character_token "</#{temporary_buffer}" # fixfull split
3198
3199         # 8.2.4.20 http://www.w3.org/TR/html5/syntax.html#script-data-escape-start-state
3200         tok_state_script_data_escape_start = ->
3201                 c = txt.charAt(cur++)
3202                 if c is '-'
3203                         tok_state = tok_state_script_data_escape_start_dash
3204                         return new_character_token '-'
3205                 # Anything else
3206                 tok_state = tok_state_script_data
3207                 cur -= 1 # Reconsume
3208                 return
3209
3210         # 8.2.4.21 http://www.w3.org/TR/html5/syntax.html#script-data-escape-start-dash-state
3211         tok_state_script_data_escape_start_dash = ->
3212                 c = txt.charAt(cur++)
3213                 if c is '-'
3214                         tok_state = tok_state_script_data_escaped_dash_dash
3215                         return new_character_token '-'
3216                 # Anything else
3217                 tok_state = tok_state_script_data
3218                 cur -= 1 # Reconsume
3219                 return
3220
3221         # 8.2.4.22 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-state
3222         tok_state_script_data_escaped = ->
3223                 c = txt.charAt(cur++)
3224                 if c is '-'
3225                         tok_state = tok_state_script_data_escaped_dash
3226                         return new_character_token '-'
3227                 if c is '<'
3228                         tok_state = tok_state_script_data_escaped_less_than_sign
3229                         return
3230                 if c is "\u0000"
3231                         parse_error()
3232                         return new_character_token "\ufffd"
3233                 if c is '' # EOF
3234                         tok_state = tok_state_data
3235                         parse_error()
3236                         cur -= 1 # Reconsume
3237                         return
3238                 # Anything else
3239                 return new_character_token c
3240
3241         # 8.2.4.23 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-dash-state
3242         tok_state_script_data_escaped_dash = ->
3243                 c = txt.charAt(cur++)
3244                 if c is '-'
3245                         tok_state = tok_state_script_data_escaped_dash_dash
3246                         return new_character_token '-'
3247                 if c is '<'
3248                         tok_state = tok_state_script_data_escaped_less_than_sign
3249                         return
3250                 if c is "\u0000"
3251                         parse_error()
3252                         tok_state = tok_state_script_data_escaped
3253                         return new_character_token "\ufffd"
3254                 if c is '' # EOF
3255                         tok_state = tok_state_data
3256                         parse_error()
3257                         cur -= 1 # Reconsume
3258                         return
3259                 # Anything else
3260                 tok_state = tok_state_script_data_escaped
3261                 return new_character_token c
3262
3263         # 8.2.4.24 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-dash-dash-state
3264         tok_state_script_data_escaped_dash_dash = ->
3265                 c = txt.charAt(cur++)
3266                 if c is '-'
3267                         return new_character_token '-'
3268                 if c is '<'
3269                         tok_state = tok_state_script_data_escaped_less_than_sign
3270                         return
3271                 if c is '>'
3272                         tok_state = tok_state_script_data
3273                         return new_character_token '>'
3274                 if c is "\u0000"
3275                         parse_error()
3276                         tok_state = tok_state_script_data_escaped
3277                         return new_character_token "\ufffd"
3278                 if c is '' # EOF
3279                         parse_error()
3280                         tok_state = tok_state_data
3281                         cur -= 1 # Reconsume
3282                         return
3283                 # Anything else
3284                 tok_state = tok_state_script_data_escaped
3285                 return new_character_token c
3286
3287         # 8.2.4.25 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-less-than-sign-state
3288         tok_state_script_data_escaped_less_than_sign = ->
3289                 c = txt.charAt(cur++)
3290                 if c is '/'
3291                         temporary_buffer = ''
3292                         tok_state = tok_state_script_data_escaped_end_tag_open
3293                         return
3294                 if is_uc_alpha(c)
3295                         temporary_buffer = c.toLowerCase() # yes, really
3296                         tok_state = tok_state_script_data_double_escape_start
3297                         return new_character_token "<#{c}" # fixfull split
3298                 if is_lc_alpha(c)
3299                         temporary_buffer = c
3300                         tok_state = tok_state_script_data_double_escape_start
3301                         return new_character_token "<#{c}" # fixfull split
3302                 # Anything else
3303                 tok_state = tok_state_script_data_escaped
3304                 cur -= 1 # Reconsume
3305                 return new_character_token c
3306
3307         # 8.2.4.26 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-end-tag-open-state
3308         tok_state_script_data_escaped_end_tag_open = ->
3309                 c = txt.charAt(cur++)
3310                 if is_uc_alpha(c)
3311                         tok_cur_tag = new_end_tag c.toLowerCase()
3312                         temporary_buffer += c
3313                         tok_state = tok_state_script_data_escaped_end_tag_name
3314                         return
3315                 if is_lc_alpha(c)
3316                         tok_cur_tag = new_end_tag c
3317                         temporary_buffer += c
3318                         tok_state = tok_state_script_data_escaped_end_tag_name
3319                         return
3320                 # Anything else
3321                 tok_state = tok_state_script_data_escaped
3322                 cur -= 1 # Reconsume
3323                 return new_character_token '</' # fixfull split
3324
3325         # 8.2.4.27 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-end-tag-name-state
3326         tok_state_script_data_escaped_end_tag_name = ->
3327                 c = txt.charAt(cur++)
3328                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
3329                         if is_appropriate_end_tag tok_cur_tag
3330                                 tok_state = tok_state_before_attribute_name
3331                                 return
3332                         # fall through
3333                 if c is '/'
3334                         if is_appropriate_end_tag tok_cur_tag
3335                                 tok_state = tok_state_self_closing_start_tag
3336                                 return
3337                         # fall through
3338                 if c is '>'
3339                         if is_appropriate_end_tag tok_cur_tag
3340                                 tok_state = tok_state_data
3341                                 return tok_cur_tag
3342                         # fall through
3343                 if is_uc_alpha(c)
3344                         tok_cur_tag.name += c.toLowerCase()
3345                         temporary_buffer += c.toLowerCase()
3346                         return
3347                 if is_lc_alpha(c)
3348                         tok_cur_tag.name += c
3349                         temporary_buffer += c.toLowerCase()
3350                         return
3351                 # Anything else
3352                 tok_state = tok_state_script_data_escaped
3353                 cur -= 1 # Reconsume
3354                 return new_character_token "</#{temporary_buffer}" # fixfull split
3355
3356         # 8.2.4.28 http://www.w3.org/TR/html5/syntax.html#script-data-double-escape-start-state
3357         tok_state_script_data_double_escape_start = ->
3358                 c = txt.charAt(cur++)
3359                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' ' or c is '/' or c is '>'
3360                         if temporary_buffer is 'script'
3361                                 tok_state = tok_state_script_data_double_escaped
3362                         else
3363                                 tok_state = tok_state_script_data_escaped
3364                         return new_character_token c
3365                 if is_uc_alpha(c)
3366                         temporary_buffer += c.toLowerCase() # yes, really lowercase
3367                         return new_character_token c
3368                 if is_lc_alpha(c)
3369                         temporary_buffer += c
3370                         return new_character_token c
3371                 # Anything else
3372                 tok_state = tok_state_script_data_escaped
3373                 cur -= 1 # Reconsume
3374                 return
3375
3376         # 8.2.4.29 http://www.w3.org/TR/html5/syntax.html#script-data-double-escaped-state
3377         tok_state_script_data_double_escaped = ->
3378                 c = txt.charAt(cur++)
3379                 if c is '-'
3380                         tok_state = tok_state_script_data_double_escaped_dash
3381                         return new_character_token '-'
3382                 if c is '<'
3383                         tok_state = tok_state_script_data_double_escaped_less_than_sign
3384                         return new_character_token '<'
3385                 if c is "\u0000"
3386                         parse_error()
3387                         return new_character_token "\ufffd"
3388                 if c is '' # EOF
3389                         parse_error()
3390                         tok_state = tok_state_data
3391                         cur -= 1 # Reconsume
3392                         return
3393                 # Anything else
3394                 return new_character_token c
3395
3396         # 8.2.4.30 http://www.w3.org/TR/html5/syntax.html#script-data-double-escaped-dash-state
3397         tok_state_script_data_double_escaped_dash = ->
3398                 c = txt.charAt(cur++)
3399                 if c is '-'
3400                         tok_state = tok_state_script_data_double_escaped_dash_dash
3401                         return new_character_token '-'
3402                 if c is '<'
3403                         tok_state = tok_state_script_data_double_escaped_less_than_sign
3404                         return new_character_token '<'
3405                 if c is "\u0000"
3406                         parse_error()
3407                         tok_state = tok_state_script_data_double_escaped
3408                         return new_character_token "\ufffd"
3409                 if c is '' # EOF
3410                         parse_error()
3411                         tok_state = tok_state_data
3412                         cur -= 1 # Reconsume
3413                         return
3414                 # Anything else
3415                 tok_state = tok_state_script_data_double_escaped
3416                 return new_character_token c
3417
3418         # 8.2.4.31 http://www.w3.org/TR/html5/syntax.html#script-data-double-escaped-dash-dash-state
3419         tok_state_script_data_double_escaped_dash_dash = ->
3420                 c = txt.charAt(cur++)
3421                 if c is '-'
3422                         return new_character_token '-'
3423                 if c is '<'
3424                         tok_state = tok_state_script_data_double_escaped_less_than_sign
3425                         return new_character_token '<'
3426                 if c is '>'
3427                         tok_state = tok_state_script_data
3428                         return new_character_token '>'
3429                 if c is "\u0000"
3430                         parse_error()
3431                         tok_state = tok_state_script_data_double_escaped
3432                         return new_character_token "\ufffd"
3433                 if c is '' # EOF
3434                         parse_error()
3435                         tok_state = tok_state_data
3436                         cur -= 1 # Reconsume
3437                         return
3438                 # Anything else
3439                 tok_state = tok_state_script_data_double_escaped
3440                 return new_character_token c
3441
3442         # 8.2.4.32 http://www.w3.org/TR/html5/syntax.html#script-data-double-escaped-less-than-sign-state
3443         tok_state_script_data_double_escaped_less_than_sign = ->
3444                 c = txt.charAt(cur++)
3445                 if c is '/'
3446                         temporary_buffer = ''
3447                         tok_state = tok_state_script_data_double_escape_end
3448                         return new_character_token '/'
3449                 # Anything else
3450                 tok_state = tok_state_script_data_double_escaped
3451                 cur -= 1 # Reconsume
3452                 return
3453
3454         # 8.2.4.33 http://www.w3.org/TR/html5/syntax.html#script-data-double-escape-end-state
3455         tok_state_script_data_double_escape_end = ->
3456                 c = txt.charAt(cur++)
3457                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' ' or c is '/' or c is '>'
3458                         if temporary_buffer is 'script'
3459                                 tok_state = tok_state_script_data_escaped
3460                         else
3461                                 tok_state = tok_state_script_data_double_escaped
3462                         return new_character_token c
3463                 if is_uc_alpha(c)
3464                         temporary_buffer += c.toLowerCase() # yes, really lowercase
3465                         return new_character_token c
3466                 if is_lc_alpha(c)
3467                         temporary_buffer += c
3468                         return new_character_token c
3469                 # Anything else
3470                 tok_state = tok_state_script_data_double_escaped
3471                 cur -= 1 # Reconsume
3472                 return
3473
3474         # 8.2.4.34 http://www.w3.org/TR/html5/syntax.html#before-attribute-name-state
3475         tok_state_before_attribute_name = ->
3476                 attr_name = null
3477                 switch c = txt.charAt(cur++)
3478                         when "\t", "\n", "\u000c", ' '
3479                                 return null
3480                         when '/'
3481                                 tok_state = tok_state_self_closing_start_tag
3482                                 return null
3483                         when '>'
3484                                 tok_state = tok_state_data
3485                                 tmp = tok_cur_tag
3486                                 tok_cur_tag = null
3487                                 return tmp
3488                         when "\u0000"
3489                                 parse_error()
3490                                 attr_name = "\ufffd"
3491                         when '"', "'", '<', '='
3492                                 parse_error()
3493                                 attr_name = c
3494                         when '' # EOF
3495                                 parse_error()
3496                                 tok_state = tok_state_data
3497                         else
3498                                 if is_uc_alpha(c)
3499                                         attr_name = c.toLowerCase()
3500                                 else
3501                                         attr_name = c
3502                 if attr_name?
3503                         tok_cur_tag.attrs_a.unshift [attr_name, '']
3504                         tok_state = tok_state_attribute_name
3505                 return null
3506
3507         # 8.2.4.35 http://www.w3.org/TR/html5/syntax.html#attribute-name-state
3508         tok_state_attribute_name = ->
3509                 switch c = txt.charAt(cur++)
3510                         when "\t", "\n", "\u000c", ' '
3511                                 tok_state = tok_state_after_attribute_name
3512                         when '/'
3513                                 tok_state = tok_state_self_closing_start_tag
3514                         when '='
3515                                 tok_state = tok_state_before_attribute_value
3516                         when '>'
3517                                 tok_state = tok_state_data
3518                                 tmp = tok_cur_tag
3519                                 tok_cur_tag = null
3520                                 return tmp
3521                         when "\u0000"
3522                                 parse_error()
3523                                 tok_cur_tag.attrs_a[0][0] += "\ufffd"
3524                         when '"', "'", '<'
3525                                 parse_error()
3526                                 tok_cur_tag.attrs_a[0][0] += c
3527                         when '' # EOF
3528                                 parse_error()
3529                                 tok_state = tok_state_data
3530                         else
3531                                 if is_uc_alpha(c)
3532                                         tok_cur_tag.attrs_a[0][0] += c.toLowerCase()
3533                                 else
3534                                         tok_cur_tag.attrs_a[0][0] += c
3535                 return null
3536
3537         # 8.2.4.36 http://www.w3.org/TR/html5/syntax.html#after-attribute-name-state
3538         tok_state_after_attribute_name = ->
3539                 c = txt.charAt(cur++)
3540                 if c is "\t" or c is "\n" or c is "\u000c" or c is ' '
3541                         return
3542                 if c is '/'
3543                         tok_state = tok_state_self_closing_start_tag
3544                         return
3545                 if c is '='
3546                         tok_state = tok_state_before_attribute_value
3547                         return
3548                 if c is '>'
3549                         tok_state = tok_state_data
3550                         return
3551                 if is_uc_alpha(c)
3552                         tok_cur_tag.attrs_a.unshift [c.toLowerCase(), '']
3553                         tok_state = tok_state_attribute_name
3554                         return
3555                 if c is "\u0000"
3556                         parse_error()
3557                         tok_cur_tag.attrs_a.unshift ["\ufffd", '']
3558                         tok_state = tok_state_attribute_name
3559                         return
3560                 if c is '' # EOF
3561                         parse_error()
3562                         tok_state = tok_state_data
3563                         cur -= 1 # reconsume
3564                         return
3565                 if c is '"' or c is "'" or c is '<'
3566                         parse_error()
3567                         # fall through to Anything else
3568                 # Anything else
3569                 tok_cur_tag.attrs_a.unshift [c, '']
3570                 tok_state = tok_state_attribute_name
3571
3572         # 8.2.4.37 http://www.w3.org/TR/html5/syntax.html#before-attribute-value-state
3573         tok_state_before_attribute_value = ->
3574                 switch c = txt.charAt(cur++)
3575                         when "\t", "\n", "\u000c", ' '
3576                                 return null
3577                         when '"'
3578                                 tok_state = tok_state_attribute_value_double_quoted
3579                         when '&'
3580                                 tok_state = tok_state_attribute_value_unquoted
3581                                 cur -= 1
3582                         when "'"
3583                                 tok_state = tok_state_attribute_value_single_quoted
3584                         when "\u0000"
3585                                 # Parse error
3586                                 tok_cur_tag.attrs_a[0][1] += "\ufffd"
3587                                 tok_state = tok_state_attribute_value_unquoted
3588                         when '>'
3589                                 # Parse error
3590                                 tok_state = tok_state_data
3591                                 tmp = tok_cur_tag
3592                                 tok_cur_tag = null
3593                                 return tmp
3594                         when '' # EOF
3595                                 parse_error()
3596                                 tok_state = tok_state_data
3597                         else
3598                                 tok_cur_tag.attrs_a[0][1] += c
3599                                 tok_state = tok_state_attribute_value_unquoted
3600                 return null
3601
3602         # 8.2.4.38 http://www.w3.org/TR/html5/syntax.html#attribute-value-(double-quoted)-state
3603         tok_state_attribute_value_double_quoted = ->
3604                 switch c = txt.charAt(cur++)
3605                         when '"'
3606                                 tok_state = tok_state_after_attribute_value_quoted
3607                         when '&'
3608                                 tok_cur_tag.attrs_a[0][1] += parse_character_reference '"', true
3609                         when "\u0000"
3610                                 # Parse error
3611                                 tok_cur_tag.attrs_a[0][1] += "\ufffd"
3612                         when '' # EOF
3613                                 parse_error()
3614                                 tok_state = tok_state_data
3615                         else
3616                                 tok_cur_tag.attrs_a[0][1] += c
3617                 return null
3618
3619         # 8.2.4.39 http://www.w3.org/TR/html5/syntax.html#attribute-value-(single-quoted)-state
3620         tok_state_attribute_value_single_quoted = ->
3621                 switch c = txt.charAt(cur++)
3622                         when "'"
3623                                 tok_state = tok_state_after_attribute_value_quoted
3624                         when '&'
3625                                 tok_cur_tag.attrs_a[0][1] += parse_character_reference "'", true
3626                         when "\u0000"
3627                                 # Parse error
3628                                 tok_cur_tag.attrs_a[0][1] += "\ufffd"
3629                         when '' # EOF
3630                                 parse_error()
3631                                 tok_state = tok_state_data
3632                         else
3633                                 tok_cur_tag.attrs_a[0][1] += c
3634                 return null
3635
3636         # 8.2.4.40 http://www.w3.org/TR/html5/syntax.html#attribute-value-(unquoted)-state
3637         tok_state_attribute_value_unquoted = ->
3638                 switch c = txt.charAt(cur++)
3639                         when "\t", "\n", "\u000c", ' '
3640                                 tok_state = tok_state_before_attribute_name
3641                         when '&'
3642                                 tok_cur_tag.attrs_a[0][1] += parse_character_reference '>', true
3643                         when '>'
3644                                 tok_state = tok_state_data
3645                                 tmp = tok_cur_tag
3646                                 tok_cur_tag = null
3647                                 return tmp
3648                         when "\u0000"
3649                                 tok_cur_tag.attrs_a[0][1] += "\ufffd"
3650                         when '' # EOF
3651                                 parse_error()
3652                                 tok_state = tok_state_data
3653                         else
3654                                 # Parse Error if ', <, = or ` (backtick)
3655                                 tok_cur_tag.attrs_a[0][1] += c
3656                 return null
3657
3658         # 8.2.4.42 http://www.w3.org/TR/html5/syntax.html#after-attribute-value-(quoted)-state
3659         tok_state_after_attribute_value_quoted = ->
3660                 switch c = txt.charAt(cur++)
3661                         when "\t", "\n", "\u000c", ' '
3662                                 tok_state = tok_state_before_attribute_name
3663                         when '/'
3664                                 tok_state = tok_state_self_closing_start_tag
3665                         when '>'
3666                                 tok_state = tok_state_data
3667                                 tmp = tok_cur_tag
3668                                 tok_cur_tag = null
3669                                 return tmp
3670                         when '' # EOF
3671                                 parse_error()
3672                                 tok_state = tok_state_data
3673                         else
3674                                 # Parse Error
3675                                 tok_state = tok_state_before_attribute_name
3676                                 cur -= 1 # we didn't handle that char
3677                 return null
3678
3679         # 8.2.4.43 http://www.w3.org/TR/html5/syntax.html#self-closing-start-tag-state
3680         tok_state_self_closing_start_tag = ->
3681                 c = txt.charAt(cur++)
3682                 if c is '>'
3683                         tok_cur_tag.flag 'self-closing'
3684                         tok_state = tok_state_data
3685                         return tok_cur_tag
3686                 if c is ''
3687                         parse_error()
3688                         tok_state = tok_state_data
3689                         cur -= 1 # Reconsume
3690                         return
3691                 # Anything else
3692                 parse_error()
3693                 tok_state = tok_state_before_attribute_name
3694                 cur -= 1 # Reconsume
3695                 return
3696
3697         # 8.2.4.44 http://www.w3.org/TR/html5/syntax.html#bogus-comment-state
3698         # WARNING: put a comment token in tok_cur_tag before setting this state
3699         tok_state_bogus_comment = ->
3700                 next_gt = txt.indexOf '>', cur
3701                 if next_gt is -1
3702                         val = txt.substr cur
3703                         cur = txt.length
3704                 else
3705                         val = txt.substr cur, (next_gt - cur)
3706                         cur = next_gt + 1
3707                 val = val.replace "\u0000", "\ufffd"
3708                 tok_cur_tag.text += val
3709                 tok_state = tok_state_data
3710                 return tok_cur_tag
3711
3712         # 8.2.4.45 http://www.w3.org/TR/html5/syntax.html#markup-declaration-open-state
3713         tok_state_markup_declaration_open = ->
3714                 if txt.substr(cur, 2) is '--'
3715                         cur += 2
3716                         tok_cur_tag = new_comment_token ''
3717                         tok_state = tok_state_comment_start
3718                         return
3719                 if txt.substr(cur, 7).toLowerCase() is 'doctype'
3720                         cur += 7
3721                         tok_state = tok_state_doctype
3722                         return
3723                 acn = adjusted_current_node()
3724                 if acn and acn.namespace isnt NS_HTML and txt.substr(cur, 7) is '[CDATA['
3725                         cur += 7
3726                         tok_state = tok_state_cdata_section
3727                         return
3728                 # Otherwise
3729                 parse_error()
3730                 tok_cur_tag = new_comment_token ''
3731                 tok_state = tok_state_bogus_comment
3732                 return
3733
3734         # 8.2.4.46 http://www.w3.org/TR/html5/syntax.html#comment-start-state
3735         tok_state_comment_start = ->
3736                 switch c = txt.charAt(cur++)
3737                         when '-'
3738                                 tok_state = tok_state_comment_start_dash
3739                         when "\u0000"
3740                                 parse_error()
3741                                 tok_state = tok_state_comment
3742                                 return new_character_token "\ufffd"
3743                         when '>'
3744                                 parse_error()
3745                                 tok_state = tok_state_data
3746                                 return tok_cur_tag
3747                         when '' # EOF
3748                                 parse_error()
3749                                 tok_state = tok_state_data
3750                                 cur -= 1 # Reconsume
3751                                 return tok_cur_tag
3752                         else
3753                                 tok_cur_tag.text += c
3754                                 tok_state = tok_state_comment
3755                 return null
3756
3757         # 8.2.4.47 http://www.w3.org/TR/html5/syntax.html#comment-start-dash-state
3758         tok_state_comment_start_dash = ->
3759                 switch c = txt.charAt(cur++)
3760                         when '-'
3761                                 tok_state = tok_state_comment_end
3762                         when "\u0000"
3763                                 parse_error()
3764                                 tok_cur_tag.text += "-\ufffd"
3765                                 tok_state = tok_state_comment
3766                         when '>'
3767                                 parse_error()
3768                                 tok_state = tok_state_data
3769                                 return tok_cur_tag
3770                         when '' # EOF
3771                                 parse_error()
3772                                 tok_state = tok_state_data
3773                                 cur -= 1 # Reconsume
3774                                 return tok_cur_tag
3775                         else
3776                                 tok_cur_tag.text += "-#{c}"
3777                                 tok_state = tok_state_comment
3778                 return null
3779
3780         # 8.2.4.48 http://www.w3.org/TR/html5/syntax.html#comment-state
3781         tok_state_comment = ->
3782                 switch c = txt.charAt(cur++)
3783                         when '-'
3784                                 tok_state = tok_state_comment_end_dash
3785                         when "\u0000"
3786                                 parse_error()
3787                                 tok_cur_tag.text += "\ufffd"
3788                         when '' # EOF
3789                                 parse_error()
3790                                 tok_state = tok_state_data
3791                                 cur -= 1 # Reconsume
3792                                 return tok_cur_tag
3793                         else
3794                                 tok_cur_tag.text += c
3795                 return null
3796
3797         # 8.2.4.49 http://www.w3.org/TR/html5/syntax.html#comment-end-dash-state
3798         tok_state_comment_end_dash = ->
3799                 switch c = txt.charAt(cur++)
3800                         when '-'
3801                                 tok_state = tok_state_comment_end
3802                         when "\u0000"
3803                                 parse_error()
3804                                 tok_cur_tag.text += "-\ufffd"
3805                                 tok_state = tok_state_comment
3806                         when '' # EOF
3807                                 parse_error()
3808                                 tok_state = tok_state_data
3809                                 cur -= 1 # Reconsume
3810                                 return tok_cur_tag
3811                         else
3812                                 tok_cur_tag.text += "-#{c}"
3813                                 tok_state = tok_state_comment
3814                 return null
3815
3816         # 8.2.4.50 http://www.w3.org/TR/html5/syntax.html#comment-end-state
3817         tok_state_comment_end = ->
3818                 switch c = txt.charAt(cur++)
3819                         when '>'
3820                                 tok_state = tok_state_data
3821                                 return tok_cur_tag
3822                         when "\u0000"
3823                                 parse_error()
3824                                 tok_cur_tag.text += "--\ufffd"
3825                                 tok_state = tok_state_comment
3826                         when '!'
3827                                 parse_error()
3828                                 tok_state = tok_state_comment_end_bang
3829                         when '-'
3830                                 parse_error()
3831                                 tok_cur_tag.text += '-'
3832                         when '' # EOF
3833                                 parse_error()
3834                                 tok_state = tok_state_data
3835                                 cur -= 1 # Reconsume
3836                                 return tok_cur_tag
3837                         else
3838                                 parse_error()
3839                                 tok_cur_tag.text += "--#{c}"
3840                                 tok_state = tok_state_comment
3841                 return null
3842
3843         # 8.2.4.51 http://www.w3.org/TR/html5/syntax.html#comment-end-bang-state
3844         tok_state_comment_end_bang = ->
3845                 switch c = txt.charAt(cur++)
3846                         when '-'
3847                                 tok_cur_tag.text += "--!#{c}"
3848                                 tok_state = tok_state_comment_end_dash
3849                         when '>'
3850                                 tok_state = tok_state_data
3851                                 return tok_cur_tag
3852                         when "\u0000"
3853                                 parse_error()
3854                                 tok_cur_tag.text += "--!\ufffd"
3855                                 tok_state = tok_state_comment
3856                         when '' # EOF
3857                                 parse_error()
3858                                 tok_state = tok_state_data
3859                                 cur -= 1 # Reconsume
3860                                 return tok_cur_tag
3861                         else
3862                                 tok_cur_tag.text += "--!#{c}"
3863                                 tok_state = tok_state_comment
3864                 return null
3865
3866         # 8.2.4.52 http://www.w3.org/TR/html5/syntax.html#doctype-state
3867         tok_state_doctype = ->
3868                 switch c = txt.charAt(cur++)
3869                         when "\t", "\u000a", "\u000c", ' '
3870                                 tok_state = tok_state_before_doctype_name
3871                         when '' # EOF
3872                                 parse_error()
3873                                 tok_state = tok_state_data
3874                                 el = new_doctype_token ''
3875                                 el.flag 'force-quirks', true
3876                                 cur -= 1 # Reconsume
3877                                 return el
3878                         else
3879                                 parse_error()
3880                                 tok_state = tok_state_before_doctype_name
3881                                 cur -= 1 # Reconsume
3882                 return null
3883
3884         # 8.2.4.52 http://www.w3.org/TR/html5/syntax.html#doctype-state
3885         tok_state_before_doctype_name = ->
3886                 c = txt.charAt(cur++)
3887                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
3888                         return
3889                 if is_uc_alpha(c)
3890                         tok_cur_tag = new_doctype_token c.toLowerCase()
3891                         tok_state = tok_state_doctype_name
3892                         return
3893                 if c is "\u0000"
3894                         parse_error()
3895                         tok_cur_tag = new_doctype_token "\ufffd"
3896                         tok_state = tok_state_doctype_name
3897                         return
3898                 if c is '>'
3899                         parse_error()
3900                         el = new_doctype_token ''
3901                         el.flag 'force-quirks', true
3902                         tok_state = tok_state_data
3903                         return el
3904                 if c is '' # EOF
3905                         parse_error()
3906                         tok_state = tok_state_data
3907                         el = new_doctype_token ''
3908                         el.flag 'force-quirks', true
3909                         cur -= 1 # Reconsume
3910                         return el
3911                 # Anything else
3912                 tok_cur_tag = new_doctype_token c
3913                 tok_state = tok_state_doctype_name
3914                 return null
3915
3916         # 8.2.4.54 http://www.w3.org/TR/html5/syntax.html#doctype-name-state
3917         tok_state_doctype_name = ->
3918                 c = txt.charAt(cur++)
3919                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
3920                         tok_state = tok_state_after_doctype_name
3921                         return
3922                 if c is '>'
3923                         tok_state = tok_state_data
3924                         return tok_cur_tag
3925                 if is_uc_alpha(c)
3926                         tok_cur_tag.name += c.toLowerCase()
3927                         return
3928                 if c is "\u0000"
3929                         parse_error()
3930                         tok_cur_tag.name += "\ufffd"
3931                         return
3932                 if c is '' # EOF
3933                         parse_error()
3934                         tok_state = tok_state_data
3935                         tok_cur_tag.flag 'force-quirks', true
3936                         cur -= 1 # Reconsume
3937                         return tok_cur_tag
3938                 # Anything else
3939                 tok_cur_tag.name += c
3940                 return null
3941
3942         # 8.2.4.55 http://www.w3.org/TR/html5/syntax.html#after-doctype-name-state
3943         tok_state_after_doctype_name = ->
3944                 c = txt.charAt(cur++)
3945                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
3946                         return
3947                 if c is '>'
3948                         tok_state = tok_state_data
3949                         return tok_cur_tag
3950                 if c is '' # EOF
3951                         parse_error()
3952                         tok_state = tok_state_data
3953                         tok_cur_tag.flag 'force-quirks', true
3954                         cur -= 1 # Reconsume
3955                         return tok_cur_tag
3956                 # Anything else
3957                 if txt.substr(cur - 1, 6).toLowerCase() is 'public'
3958                         cur += 5
3959                         tok_state = tok_state_after_doctype_public_keyword
3960                         return
3961                 if txt.substr(cur - 1, 6).toLowerCase() is 'system'
3962                         cur += 5
3963                         tok_state = tok_state_after_doctype_system_keyword
3964                         return
3965                 parse_error()
3966                 tok_cur_tag.flag 'force-quirks', true
3967                 tok_state = tok_state_bogus_doctype
3968                 return null
3969
3970         # 8.2.4.56 http://www.w3.org/TR/html5/syntax.html#after-doctype-public-keyword-state
3971         tok_state_after_doctype_public_keyword = ->
3972                 c = txt.charAt(cur++)
3973                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
3974                         tok_state = tok_state_before_doctype_public_identifier
3975                         return
3976                 if c is '"'
3977                         parse_error()
3978                         tok_cur_tag.public_identifier = ''
3979                         tok_state = tok_state_doctype_public_identifier_double_quoted
3980                         return
3981                 if c is "'"
3982                         parse_error()
3983                         tok_cur_tag.public_identifier = ''
3984                         tok_state = tok_state_doctype_public_identifier_single_quoted
3985                         return
3986                 if c is '>'
3987                         parse_error()
3988                         tok_cur_tag.flag 'force-quirks', true
3989                         tok_state = tok_state_data
3990                         return tok_cur_tag
3991                 if c is '' # EOF
3992                         parse_error()
3993                         tok_state = tok_state_data
3994                         tok_cur_tag.flag 'force-quirks', true
3995                         cur -= 1 # Reconsume
3996                         return tok_cur_tag
3997                 # Anything else
3998                 parse_error()
3999                 tok_cur_tag.flag 'force-quirks', true
4000                 tok_state = tok_state_bogus_doctype
4001                 return null
4002
4003         # 8.2.4.57 http://www.w3.org/TR/html5/syntax.html#before-doctype-public-identifier-state
4004         tok_state_before_doctype_public_identifier = ->
4005                 c = txt.charAt(cur++)
4006                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4007                         return
4008                 if c is '"'
4009                         parse_error()
4010                         tok_cur_tag.public_identifier = ''
4011                         tok_state = tok_state_doctype_public_identifier_double_quoted
4012                         return
4013                 if c is "'"
4014                         parse_error()
4015                         tok_cur_tag.public_identifier = ''
4016                         tok_state = tok_state_doctype_public_identifier_single_quoted
4017                         return
4018                 if c is '>'
4019                         parse_error()
4020                         tok_cur_tag.flag 'force-quirks', true
4021                         tok_state = tok_state_data
4022                         return tok_cur_tag
4023                 if c is '' # EOF
4024                         parse_error()
4025                         tok_state = tok_state_data
4026                         tok_cur_tag.flag 'force-quirks', true
4027                         cur -= 1 # Reconsume
4028                         return tok_cur_tag
4029                 # Anything else
4030                 parse_error()
4031                 tok_cur_tag.flag 'force-quirks', true
4032                 tok_state = tok_state_bogus_doctype
4033                 return null
4034
4035
4036         # 8.2.4.58 http://www.w3.org/TR/html5/syntax.html#doctype-public-identifier-(double-quoted)-state
4037         tok_state_doctype_public_identifier_double_quoted = ->
4038                 c = txt.charAt(cur++)
4039                 if c is '"'
4040                         tok_state = tok_state_after_doctype_public_identifier
4041                         return
4042                 if c is "\u0000"
4043                         parse_error()
4044                         tok_cur_tag.public_identifier += "\ufffd"
4045                         return
4046                 if c is '>'
4047                         parse_error()
4048                         tok_cur_tag.flag 'force-quirks', true
4049                         tok_state = tok_state_data
4050                         return tok_cur_tag
4051                 if c is '' # EOF
4052                         parse_error()
4053                         tok_state = tok_state_data
4054                         tok_cur_tag.flag 'force-quirks', true
4055                         cur -= 1 # Reconsume
4056                         return tok_cur_tag
4057                 # Anything else
4058                 tok_cur_tag.public_identifier += c
4059                 return null
4060
4061         # 8.2.4.59 http://www.w3.org/TR/html5/syntax.html#doctype-public-identifier-(single-quoted)-state
4062         tok_state_doctype_public_identifier_single_quoted = ->
4063                 c = txt.charAt(cur++)
4064                 if c is "'"
4065                         tok_state = tok_state_after_doctype_public_identifier
4066                         return
4067                 if c is "\u0000"
4068                         parse_error()
4069                         tok_cur_tag.public_identifier += "\ufffd"
4070                         return
4071                 if c is '>'
4072                         parse_error()
4073                         tok_cur_tag.flag 'force-quirks', true
4074                         tok_state = tok_state_data
4075                         return tok_cur_tag
4076                 if c is '' # EOF
4077                         parse_error()
4078                         tok_state = tok_state_data
4079                         tok_cur_tag.flag 'force-quirks', true
4080                         cur -= 1 # Reconsume
4081                         return tok_cur_tag
4082                 # Anything else
4083                 tok_cur_tag.public_identifier += c
4084                 return null
4085
4086         # 8.2.4.60 http://www.w3.org/TR/html5/syntax.html#after-doctype-public-identifier-state
4087         tok_state_after_doctype_public_identifier = ->
4088                 c = txt.charAt(cur++)
4089                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4090                         tok_state = tok_state_between_doctype_public_and_system_identifiers
4091                         return
4092                 if c is '>'
4093                         tok_state = tok_state_data
4094                         return tok_cur_tag
4095                 if c is '"'
4096                         parse_error()
4097                         tok_cur_tag.system_identifier = ''
4098                         tok_state = tok_state_doctype_system_identifier_double_quoted
4099                         return
4100                 if c is "'"
4101                         parse_error()
4102                         tok_cur_tag.system_identifier = ''
4103                         tok_state = tok_state_doctype_system_identifier_single_quoted
4104                         return
4105                 if c is '' # EOF
4106                         parse_error()
4107                         tok_state = tok_state_data
4108                         tok_cur_tag.flag 'force-quirks', true
4109                         cur -= 1 # Reconsume
4110                         return tok_cur_tag
4111                 # Anything else
4112                 parse_error()
4113                 tok_cur_tag.flag 'force-quirks', true
4114                 tok_state = tok_state_bogus_doctype
4115                 return null
4116
4117         # 8.2.4.61 http://www.w3.org/TR/html5/syntax.html#between-doctype-public-and-system-identifiers-state
4118         tok_state_between_doctype_public_and_system_identifiers = ->
4119                 c = txt.charAt(cur++)
4120                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4121                         return
4122                 if c is '>'
4123                         tok_state = tok_state_data
4124                         return tok_cur_tag
4125                 if c is '"'
4126                         parse_error()
4127                         tok_cur_tag.system_identifier = ''
4128                         tok_state = tok_state_doctype_system_identifier_double_quoted
4129                         return
4130                 if c is "'"
4131                         parse_error()
4132                         tok_cur_tag.system_identifier = ''
4133                         tok_state = tok_state_doctype_system_identifier_single_quoted
4134                         return
4135                 if c is '' # EOF
4136                         parse_error()
4137                         tok_state = tok_state_data
4138                         tok_cur_tag.flag 'force-quirks', true
4139                         cur -= 1 # Reconsume
4140                         return tok_cur_tag
4141                 # Anything else
4142                 parse_error()
4143                 tok_cur_tag.flag 'force-quirks', true
4144                 tok_state = tok_state_bogus_doctype
4145                 return null
4146
4147         # 8.2.4.62 http://www.w3.org/TR/html5/syntax.html#after-doctype-system-keyword-state
4148         tok_state_after_doctype_system_keyword = ->
4149                 c = txt.charAt(cur++)
4150                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4151                         tok_state = tok_state_before_doctype_system_identifier
4152                         return
4153                 if c is '"'
4154                         parse_error()
4155                         tok_cur_tag.system_identifier = ''
4156                         tok_state = tok_state_doctype_system_identifier_double_quoted
4157                         return
4158                 if c is "'"
4159                         parse_error()
4160                         tok_cur_tag.system_identifier = ''
4161                         tok_state = tok_state_doctype_system_identifier_single_quoted
4162                         return
4163                 if c is '>'
4164                         parse_error()
4165                         tok_cur_tag.flag 'force-quirks', true
4166                         tok_state = tok_state_data
4167                         return tok_cur_tag
4168                 if c is '' # EOF
4169                         parse_error()
4170                         tok_state = tok_state_data
4171                         tok_cur_tag.flag 'force-quirks', true
4172                         cur -= 1 # Reconsume
4173                         return tok_cur_tag
4174                 # Anything else
4175                 parse_error()
4176                 tok_cur_tag.flag 'force-quirks', true
4177                 tok_state = tok_state_bogus_doctype
4178                 return null
4179
4180         # 8.2.4.63 http://www.w3.org/TR/html5/syntax.html#before-doctype-system-identifier-state
4181         tok_state_before_doctype_system_identifier = ->
4182                 c = txt.charAt(cur++)
4183                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4184                         return
4185                 if c is '"'
4186                         tok_cur_tag.system_identifier = ''
4187                         tok_state = tok_state_doctype_system_identifier_double_quoted
4188                         return
4189                 if c is "'"
4190                         tok_cur_tag.system_identifier = ''
4191                         tok_state = tok_state_doctype_system_identifier_single_quoted
4192                         return
4193                 if c is '>'
4194                         parse_error()
4195                         tok_cur_tag.flag 'force-quirks', true
4196                         tok_state = tok_state_data
4197                         return tok_cur_tag
4198                 if c is '' # EOF
4199                         parse_error()
4200                         tok_state = tok_state_data
4201                         tok_cur_tag.flag 'force-quirks', true
4202                         cur -= 1 # Reconsume
4203                         return tok_cur_tag
4204                 # Anything else
4205                 parse_error()
4206                 tok_cur_tag.flag 'force-quirks', true
4207                 tok_state = tok_state_bogus_doctype
4208                 return null
4209
4210         # 8.2.4.64 http://www.w3.org/TR/html5/syntax.html#doctype-system-identifier-(double-quoted)-state
4211         tok_state_doctype_system_identifier_double_quoted = ->
4212                 c = txt.charAt(cur++)
4213                 if c is '"'
4214                         tok_state = tok_state_after_doctype_system_identifier
4215                         return
4216                 if c is "\u0000"
4217                         parse_error()
4218                         tok_cur_tag.system_identifier += "\ufffd"
4219                         return
4220                 if c is '>'
4221                         parse_error()
4222                         tok_cur_tag.flag 'force-quirks', true
4223                         tok_state = tok_state_data
4224                         return tok_cur_tag
4225                 if c is '' # EOF
4226                         parse_error()
4227                         tok_state = tok_state_data
4228                         tok_cur_tag.flag 'force-quirks', true
4229                         cur -= 1 # Reconsume
4230                         return tok_cur_tag
4231                 # Anything else
4232                 tok_cur_tag.system_identifier += c
4233                 return null
4234
4235         # 8.2.4.65 http://www.w3.org/TR/html5/syntax.html#doctype-system-identifier-(single-quoted)-state
4236         tok_state_doctype_system_identifier_single_quoted = ->
4237                 c = txt.charAt(cur++)
4238                 if c is "'"
4239                         tok_state = tok_state_after_doctype_system_identifier
4240                         return
4241                 if c is "\u0000"
4242                         parse_error()
4243                         tok_cur_tag.system_identifier += "\ufffd"
4244                         return
4245                 if c is '>'
4246                         parse_error()
4247                         tok_cur_tag.flag 'force-quirks', true
4248                         tok_state = tok_state_data
4249                         return tok_cur_tag
4250                 if c is '' # EOF
4251                         parse_error()
4252                         tok_state = tok_state_data
4253                         tok_cur_tag.flag 'force-quirks', true
4254                         cur -= 1 # Reconsume
4255                         return tok_cur_tag
4256                 # Anything else
4257                 tok_cur_tag.system_identifier += c
4258                 return null
4259
4260         # 8.2.4.66 http://www.w3.org/TR/html5/syntax.html#after-doctype-system-identifier-state
4261         tok_state_after_doctype_system_identifier = ->
4262                 c = txt.charAt(cur++)
4263                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4264                         return
4265                 if c is '>'
4266                         tok_state = tok_state_data
4267                         return tok_cur_tag
4268                 if c is '' # EOF
4269                         parse_error()
4270                         tok_state = tok_state_data
4271                         tok_cur_tag.flag 'force-quirks', true
4272                         cur -= 1 # Reconsume
4273                         return tok_cur_tag
4274                 # Anything else
4275                 parse_error()
4276                 # do _not_ tok_cur_tag.flag 'force-quirks', true
4277                 tok_state = tok_state_bogus_doctype
4278                 return null
4279
4280         # 8.2.4.67 http://www.w3.org/TR/html5/syntax.html#bogus-doctype-state
4281         tok_state_bogus_doctype = ->
4282                 c = txt.charAt(cur++)
4283                 if c is '>'
4284                         tok_state = tok_state_data
4285                         return tok_cur_tag
4286                 if c is '' # EOF
4287                         tok_state = tok_state_data
4288                         cur -= 1 # Reconsume
4289                         return tok_cur_tag
4290                 # Anything else
4291                 return null
4292
4293         # 8.2.4.68 http://www.w3.org/TR/html5/syntax.html#cdata-section-state
4294         tok_state_cdata_section = ->
4295                 tok_state = tok_state_data
4296                 next_gt = txt.indexOf ']]>', cur
4297                 if next_gt is -1
4298                         val = txt.substr cur
4299                         cur = txt.length
4300                 else
4301                         val = txt.substr cur, (next_gt - cur)
4302                         cur = next_gt + 3
4303                 val = val.replace(new RegExp("\u0000", 'g'), "\ufffd") # fixfull spec doesn't say this
4304                 val = val.replace(new RegExp("\r\n", 'g'), "\n") # fixfull spec doesn't say this
4305                 val = val.replace(new RegExp("\r", 'g'), "\n") # fixfull spec doesn't say this
4306                 return new_character_token val # fixfull split
4307
4308         # 8.2.4.69 http://www.w3.org/TR/html5/syntax.html#consume-a-character-reference
4309         # Don't set this as a state, just call it
4310         # returns a string (NOT a text node)
4311         parse_character_reference = (allowed_char = null, in_attr = false) ->
4312                 if cur >= txt.length
4313                         return '&'
4314                 switch c = txt.charAt(cur)
4315                         when "\t", "\n", "\u000c", ' ', '<', '&', '', allowed_char
4316                                 # explicitly not a parse error
4317                                 return '&'
4318                         when ';'
4319                                 # there has to be "one or more" alnums between & and ; to be a parse error
4320                                 return '&'
4321                         when '#'
4322                                 if cur + 1 >= txt.length
4323                                         return '&'
4324                                 if txt.charAt(cur + 1).toLowerCase() is 'x'
4325                                         prefix = '#x'
4326                                         charset = hex_chars
4327                                         start = cur + 2
4328                                 else
4329                                         charset = digits
4330                                         start = cur + 1
4331                                         prefix = '#'
4332                                 i = 0
4333                                 while start + i < txt.length and charset.indexOf(txt.charAt(start + i)) > -1
4334                                         i += 1
4335                                 if i is 0
4336                                         return '&'
4337                                 if txt.charAt(start + i) is ';'
4338                                         i += 1
4339                                 # FIXME This is supposed to generate parse errors for some chars
4340                                 decoded = decode_named_char_ref(prefix + txt.substr(start, i).toLowerCase())
4341                                 if decoded?
4342                                         cur = start + i
4343                                         return decoded
4344                                 return '&'
4345                         else
4346                                 for i in [0...31]
4347                                         if alnum.indexOf(txt.charAt(cur + i)) is -1
4348                                                 break
4349                                 if i is 0
4350                                         # exit early, because parse_error() below needs at least one alnum
4351                                         return '&'
4352                                 if txt.charAt(cur + i) is ';'
4353                                         i += 1 # include ';' terminator in value
4354                                         decoded = decode_named_char_ref txt.substr(cur, i)
4355                                         if decoded?
4356                                                 cur += i
4357                                                 return decoded
4358                                         parse_error()
4359                                         return '&'
4360                                 else
4361                                         # no ';' terminator (only legacy char refs)
4362                                         max = i
4363                                         for i in [2..max] # no prefix matches, so ok to check shortest first
4364                                                 c = legacy_char_refs[txt.substr(cur, i)]
4365                                                 if c?
4366                                                         if in_attr
4367                                                                 if txt.charAt(cur + i) is '='
4368                                                                         # "because some legacy user agents will
4369                                                                         # misinterpret the markup in those cases"
4370                                                                         parse_error()
4371                                                                         return '&'
4372                                                                 if alnum.indexOf(txt.charAt(cur + i)) > -1
4373                                                                         # this makes attributes forgiving about url args
4374                                                                         return '&'
4375                                                         # ok, and besides the weird exceptions for attributes...
4376                                                         # return the matching char
4377                                                         cur += i # consume entity chars
4378                                                         parse_error() # because no terminating ";"
4379                                                         return c
4380                                         parse_error()
4381                                         return '&'
4382                 return # never reached
4383
4384         # tree constructor initialization
4385         # see comments on TYPE_TAG/etc for the structure of this data
4386         txt = args.html
4387         cur = 0
4388         doc = new Node TYPE_TAG, name: 'html', namespace: NS_HTML
4389         open_els = []
4390         afe = [] # active formatting elements
4391         template_ins_modes = []
4392         ins_mode = ins_mode_initial
4393         original_ins_mode = ins_mode # TODO check spec
4394         flag_scripting = args.scripting ? true # TODO might need an extra flag to get <noscript> to parse correctly
4395         flag_frameset_ok = true
4396         flag_parsing = true
4397         flag_foster_parenting = false
4398         form_element_pointer = null
4399         temporary_buffer = null
4400         pending_table_character_tokens = []
4401         head_element_pointer = null
4402         flag_fragment_parsing = false # parser originally created as part of the html fragment parsing algorithm (fragment case)
4403         context_element = null # FIXME initialize from args.fragment http://www.w3.org/TR/html5/syntax.html#parsing-html-fragments
4404
4405         # tokenizer initialization
4406         tok_state = tok_state_data
4407
4408         if args.name is "namespace-sensitivity.dat #1"
4409                 console.log "hi"
4410         # proccess input
4411         # http://www.w3.org/TR/html5/syntax.html#tree-construction
4412         while flag_parsing
4413                 t = tok_state()
4414                 if t?
4415                         process_token t
4416                         # fixfull parse error if has self-closing flag, but it wasn't acknolwedged
4417         return doc.children
4418
4419 serialize_els = (els, shallow, show_ids) ->
4420         serialized = ''
4421         sep = ''
4422         for t in els
4423                 serialized += sep
4424                 sep = ','
4425                 serialized += t.serialize shallow, show_ids
4426         return serialized
4427
4428 module.exports.parse_html = parse_html
4429 module.exports.debug_log_reset = debug_log_reset
4430 module.exports.debug_log_each = debug_log_each
4431 module.exports.TYPE_TAG = TYPE_TAG
4432 module.exports.TYPE_TEXT = TYPE_TEXT
4433 module.exports.TYPE_COMMENT = TYPE_COMMENT
4434 module.exports.TYPE_DOCTYPE = TYPE_DOCTYPE
4435 module.exports.NS_HTML = NS_HTML
4436 module.exports.NS_MATHML = NS_MATHML
4437 module.exports.NS_SVG = NS_SVG