JasonWoof Got questions, comments, patches, etc.? Contact Jason Woofenden
fix self-closing and mathml ints
[peach-html5-editor.git] / parse-html.coffee
1 # HTML parser meant to run in a browser, in support of WYSIWYG editor
2 # Copyright 2015 Jason Woofenden
3 #
4 # This program is free software: you can redistribute it and/or modify it under
5 # the terms of the GNU Affero General Public License as published by the Free
6 # Software Foundation, either version 3 of the License, or (at your option) any
7 # later version.
8 #
9 # This program is distributed in the hope that it will be useful, but WITHOUT
10 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11 # FOR A PARTICULAR PURPOSE.  See the GNU Affero General Public License for more
12 # details.
13 #
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
16
17
18 # This file implements a parser for html snippets, meant to be used by a
19 # WYSIWYG editor.
20
21 # The implementation is a pretty direct implementation of the parsing algorithm
22 # described here:
23 # http://www.w3.org/TR/html5/syntax.html#preprocessing-the-input-stream
24 #
25 # Deviations from that spec:
26 #
27 #   Purposeful: search this file for "WTAG"
28 #
29 #   Not finished yet: search this file for "fixfull", "TODO" and "FIXME"
30
31
32 # stacks/lists
33 #
34 # the spec uses a many different words do indicate which ends of lists/stacks
35 # they are talking about (and relative movement within the lists/stacks). This
36 # section splains. I'm implementing "lists" (afe and open_els) the same way
37 # (both as stacks)
38 #
39 # stacks grow downward (current element is index=0)
40 #
41 # example: open_els = [a, b, c, d, e, f, g]
42 #
43 # "grows downwards" means it's visualized like this: (index: el, names)
44 #
45 #   6: g "start of the list", "topmost", "first"
46 #   5: f
47 #   4: e "previous" (to d), "above", "before"
48 #   3: d   (previous/next are relative to this element)
49 #   2: c "next", "after", "lower", "below"
50 #   1: b
51 #   0: a "end of the list", "current node", "bottommost", "last"
52
53
54 # browser
55 # note: to get this to run outside a browser, you'll have to write a native
56 # implementation of decode_named_char_ref()
57 unless module?.exports?
58         window.wheic = {}
59         module = exports: window.wheic
60
61 from_code_point = (x) ->
62         if String.fromCodePoint?
63                 return String.fromCodePoint x
64         else
65                 if x <= 0xffff
66                         return String.fromCharCode x
67                 x -= 0x10000
68                 return String.fromCharCode((x >> 10) + 0xd800, (x % 0x400) + 0xdc00)
69
70 # Each node is an obect of the Node class. Here are the Node types:
71 TYPE_TAG = 0 # name, {attributes}, [children]
72 TYPE_TEXT = 1 # "text"
73 TYPE_COMMENT = 2
74 TYPE_DOCTYPE = 3
75 # the following types are emited by the tokenizer, but shouldn't end up in the tree:
76 TYPE_START_TAG = 4 # name, [attributes ([key,value]...) in reverse order], [children]
77 TYPE_END_TAG = 5 # name
78 TYPE_EOF = 6
79 TYPE_AFE_MARKER = 7 # http://www.w3.org/TR/html5/syntax.html#reconstruct-the-active-formatting-elements
80 TYPE_AAA_BOOKMARK = 8 # http://www.w3.org/TR/html5/syntax.html#adoption-agency-algorithm
81
82 # namespace constants
83 NS_HTML = 1
84 NS_MATHML = 2
85 NS_SVG = 3
86
87 g_debug_log = []
88 debug_log_reset = ->
89         g_debug_log = []
90 debug_log = (str) ->
91         g_debug_log.push str
92 debug_log_each = (cb) ->
93         for str in g_debug_log
94                 cb str
95
96 prev_node_id = 0
97 class Node
98         constructor: (type, args = {}) ->
99                 @type = type # one of the TYPE_* constants above
100                 @name = args.name ? '' # tag name
101                 @text = args.text ? '' # contents for text/comment nodes
102                 @attrs = args.attrs ? {}
103                 @attrs_a = args.attr_k ? [] # attrs in progress, TYPE_START_TAG only
104                 @children = args.children ? []
105                 @namespace = args.namespace ? NS_HTML
106                 @parent = args.parent ? null
107                 @token = args.token ? null
108                 @flags = args.flags ? {}
109                 if args.id?
110                         @id = "#{args.id}+"
111                 else
112                         @id = "#{++prev_node_id}"
113         acknowledge_self_closing: ->
114                 if @token?
115                         @token.flag 'did_self_close', true
116                 else
117                         @flag 'did_self_close', true
118         flag: (key, value = null) ->
119                 if value?
120                         @flags[key] = value
121                 else
122                         return @flags[key]
123         serialize: (shallow = false, show_ids = false) -> # for unit tests
124                 ret = ''
125                 switch @type
126                         when TYPE_TAG
127                                 ret += 'tag:'
128                                 ret += JSON.stringify @name
129                                 ret += ','
130                                 if show_ids
131                                         ret += "##{@id},"
132                                 if shallow
133                                         break
134                                 attr_keys = []
135                                 for k of @attrs
136                                         attr_keys.push k
137                                 attr_keys.sort()
138                                 ret += '{'
139                                 sep = ''
140                                 for k in attr_keys
141                                         ret += sep
142                                         sep = ','
143                                         ret += "#{JSON.stringify k}:#{JSON.stringify @attrs[k]}"
144                                 ret += '},['
145                                 sep = ''
146                                 for c in @children
147                                         ret += sep
148                                         sep = ','
149                                         ret += c.serialize shallow, show_ids
150                                 ret += ']'
151                         when TYPE_TEXT
152                                 ret += 'text:'
153                                 ret += JSON.stringify @text
154                         when TYPE_COMMENT
155                                 ret += 'comment:'
156                                 ret += JSON.stringify @text
157                         when TYPE_DOCTYPE
158                                 ret += "doctype:#{@name},#{JSON.stringify(@public_identifier ? '')},#{JSON.stringify(@system_identifier ? '')}"
159                         when TYPE_AFE_MARKER
160                                 ret += 'marker'
161                         when TYPE_AAA_BOOKMARK
162                                 ret += 'aaa_bookmark'
163                         else
164                                 ret += 'unknown:'
165                                 console.log "unknown: #{JSON.stringify @}" # backtrace is just as well
166                 return ret
167
168 # helpers: (only take args that are normally known when parser creates nodes)
169 new_open_tag = (name) ->
170         return new Node TYPE_START_TAG, name: name
171 new_end_tag = (name) ->
172         return new Node TYPE_END_TAG, name: name
173 new_element = (name) ->
174         return new Node TYPE_TAG, name: name
175 new_text_node = (txt) ->
176         return new Node TYPE_TEXT, text: txt
177 new_character_token = new_text_node
178 new_comment_token = (txt) ->
179         return new Node TYPE_COMMENT, text: txt
180 new_doctype_token = (name) ->
181         return new Node TYPE_DOCTYPE, name: name
182 new_eof_token = ->
183         return new Node TYPE_EOF
184 new_afe_marker = ->
185         return new Node TYPE_AFE_MARKER
186 new_aaa_bookmark = ->
187         return new Node TYPE_AAA_BOOKMARK
188
189 lc_alpha = "abcdefghijklmnopqrstuvwxyz"
190 uc_alpha = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
191 digits = "0123456789"
192 alnum = lc_alpha + uc_alpha + digits
193 hex_chars = digits + "abcdefABCDEF"
194
195 is_uc_alpha = (str) ->
196         return str.length is 1 and uc_alpha.indexOf(str) > -1
197 is_lc_alpha = (str) ->
198         return str.length is 1 and lc_alpha.indexOf(str) > -1
199
200 # some SVG elements have dashes in them
201 tag_name_chars = alnum + "-"
202
203 # http://www.w3.org/TR/html5/infrastructure.html#space-character
204 space_chars = "\u0009\u000a\u000c\u000d\u0020"
205 is_space = (txt) ->
206         return txt.length is 1 and space_chars.indexOf(txt) > -1
207 is_space_tok = (t) ->
208         return t.type is TYPE_TEXT && t.text.length is 1 and space_chars.indexOf(t.text) > -1
209
210 is_input_hidden_tok = (t) ->
211         return false unless t.type is TYPE_START_TAG
212         for a in t.attrs_a
213                 if a[0] is 'type'
214                         if a[1].toLowerCase() is 'hidden'
215                                 return true
216                         return false
217         return false
218
219 # https://en.wikipedia.org/wiki/Whitespace_character#Unicode
220 whitespace_chars = "\u0009\u000a\u000b\u000c\u000d\u0020\u0085\u00a0\u1680\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200a\u2028\u2029\u202f\u205f\u3000"
221
222 unicode_fixes = {}
223 unicode_fixes[0x00] = "\uFFFD"
224 unicode_fixes[0x80] = "\u20AC"
225 unicode_fixes[0x82] = "\u201A"
226 unicode_fixes[0x83] = "\u0192"
227 unicode_fixes[0x84] = "\u201E"
228 unicode_fixes[0x85] = "\u2026"
229 unicode_fixes[0x86] = "\u2020"
230 unicode_fixes[0x87] = "\u2021"
231 unicode_fixes[0x88] = "\u02C6"
232 unicode_fixes[0x89] = "\u2030"
233 unicode_fixes[0x8A] = "\u0160"
234 unicode_fixes[0x8B] = "\u2039"
235 unicode_fixes[0x8C] = "\u0152"
236 unicode_fixes[0x8E] = "\u017D"
237 unicode_fixes[0x91] = "\u2018"
238 unicode_fixes[0x92] = "\u2019"
239 unicode_fixes[0x93] = "\u201C"
240 unicode_fixes[0x94] = "\u201D"
241 unicode_fixes[0x95] = "\u2022"
242 unicode_fixes[0x96] = "\u2013"
243 unicode_fixes[0x97] = "\u2014"
244 unicode_fixes[0x98] = "\u02DC"
245 unicode_fixes[0x99] = "\u2122"
246 unicode_fixes[0x9A] = "\u0161"
247 unicode_fixes[0x9B] = "\u203A"
248 unicode_fixes[0x9C] = "\u0153"
249 unicode_fixes[0x9E] = "\u017E"
250 unicode_fixes[0x9F] = "\u0178"
251
252 # These are the character references that don't need a terminating semicolon
253 # min length: 2, max: 6, none are a prefix of any other.
254 legacy_char_refs = {
255         Aacute: 'Á', aacute: 'á', Acirc: 'Â', acirc: 'â', acute: '´', AElig: 'Æ',
256         aelig: 'æ', Agrave: 'À', agrave: 'à', AMP: '&', amp: '&', Aring: 'Å',
257         aring: 'å', Atilde: 'Ã', atilde: 'ã', Auml: 'Ä', auml: 'ä', brvbar: '¦',
258         Ccedil: 'Ç', ccedil: 'ç', cedil: '¸', cent: '¢', COPY: '©', copy: '©',
259         curren: '¤', deg: '°', divide: '÷', Eacute: 'É', eacute: 'é', Ecirc: 'Ê',
260         ecirc: 'ê', Egrave: 'È', egrave: 'è', ETH: 'Ð', eth: 'ð', Euml: 'Ë',
261         euml: 'ë', frac12: '½', frac14: '¼', frac34: '¾', GT: '>', gt: '>',
262         Iacute: 'Í', iacute: 'í', Icirc: 'Î', icirc: 'î', iexcl: '¡', Igrave: 'Ì',
263         igrave: 'ì', iquest: '¿', Iuml: 'Ï', iuml: 'ï', laquo: '«', LT: '<',
264         lt: '<', macr: '¯', micro: 'µ', middot: '·', nbsp: "\u00a0", not: '¬',
265         Ntilde: 'Ñ', ntilde: 'ñ', Oacute: 'Ó', oacute: 'ó', Ocirc: 'Ô', ocirc: 'ô',
266         Ograve: 'Ò', ograve: 'ò', ordf: 'ª', ordm: 'º', Oslash: 'Ø', oslash: 'ø',
267         Otilde: 'Õ', otilde: 'õ', Ouml: 'Ö', ouml: 'ö', para: '¶', plusmn: '±',
268         pound: '£', QUOT: '"', quot: '"', raquo: '»', REG: '®', reg: '®', sect: '§',
269         shy: '­', sup1: '¹', sup2: '²', sup3: '³', szlig: 'ß', THORN: 'Þ', thorn: 'þ',
270         times: '×', Uacute: 'Ú', uacute: 'ú', Ucirc: 'Û', ucirc: 'û', Ugrave: 'Ù',
271         ugrave: 'ù', uml: '¨', Uuml: 'Ü', uuml: 'ü', Yacute: 'Ý', yacute: 'ý',
272         yen: '¥', yuml: 'ÿ'
273 }
274
275 void_elements = ['area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input', 'keygen', 'link', 'meta', 'param', 'source', 'track', 'wbr']
276 raw_text_elements = ['script', 'style']
277 escapable_raw_text_elements = ['textarea', 'title']
278 # http://www.w3.org/TR/SVG/ 1.1 (Second Edition)
279 svg_elements = [
280         'a', 'altGlyph', 'altGlyphDef', 'altGlyphItem', 'animate', 'animateColor',
281         'animateMotion', 'animateTransform', 'circle', 'clipPath', 'color-profile',
282         'cursor', 'defs', 'desc', 'ellipse', 'feBlend', 'feColorMatrix',
283         'feComponentTransfer', 'feComposite', 'feConvolveMatrix',
284         'feDiffuseLighting', 'feDisplacementMap', 'feDistantLight', 'feFlood',
285         'feFuncA', 'feFuncB', 'feFuncG', 'feFuncR', 'feGaussianBlur', 'feImage',
286         'feMerge', 'feMergeNode', 'feMorphology', 'feOffset', 'fePointLight',
287         'feSpecularLighting', 'feSpotLight', 'feTile', 'feTurbulence', 'filter',
288         'font', 'font-face', 'font-face-format', 'font-face-name', 'font-face-src',
289         'font-face-uri', 'foreignObject', 'g', 'glyph', 'glyphRef', 'hkern',
290         'image', 'line', 'linearGradient', 'marker', 'mask', 'metadata',
291         'missing-glyph', 'mpath', 'path', 'pattern', 'polygon', 'polyline',
292         'radialGradient', 'rect', 'script', 'set', 'stop', 'style', 'svg',
293         'switch', 'symbol', 'text', 'textPath', 'title', 'tref', 'tspan', 'use',
294         'view', 'vkern'
295 ]
296
297 # http://www.w3.org/TR/MathML/ Version 3.0 2nd Edition
298 mathml_elements = [
299         'abs', 'and', 'annotation', 'annotation-xml', 'apply', 'approx', 'arccos',
300         'arccosh', 'arccot', 'arccoth', 'arccsc', 'arccsch', 'arcsec', 'arcsech',
301         'arcsin', 'arcsinh', 'arctan', 'arctanh', 'arg', 'bind', 'bvar', 'card',
302         'cartesianproduct', 'cbytes', 'ceiling', 'cerror', 'ci', 'cn', 'codomain',
303         'complexes', 'compose', 'condition', 'conjugate', 'cos', 'cosh', 'cot',
304         'coth', 'cs', 'csc', 'csch', 'csymbol', 'curl', 'declare', 'degree',
305         'determinant', 'diff', 'divergence', 'divide', 'domain',
306         'domainofapplication', 'emptyset', 'eq', 'equivalent', 'eulergamma',
307         'exists', 'exp', 'exponentiale', 'factorial', 'factorof', 'false', 'floor',
308         'fn', 'forall', 'gcd', 'geq', 'grad', 'gt', 'ident', 'image', 'imaginary',
309         'imaginaryi', 'implies', 'in', 'infinity', 'int', 'integers', 'intersect',
310         'interval', 'inverse', 'lambda', 'laplacian', 'lcm', 'leq', 'limit',
311         'list', 'ln', 'log', 'logbase', 'lowlimit', 'lt', 'maction', 'maligngroup',
312         'malignmark', 'math', 'matrix', 'matrixrow', 'max', 'mean', 'median',
313         'menclose', 'merror', 'mfenced', 'mfrac', 'mglyph', 'mi', 'mi', 'min',
314         'minus', 'mlabeledtr', 'mlongdiv', 'mmultiscripts', 'mn', 'mo', 'mode',
315         'moment', 'momentabout', 'mover', 'mpadded', 'mphantom', 'mprescripts',
316         'mroot', 'mrow', 'ms', 'mscarries', 'mscarry', 'msgroup', 'msline',
317         'mspace', 'msqrt', 'msrow', 'mstack', 'mstyle', 'msub', 'msubsup', 'msup',
318         'mtable', 'mtd', 'mtext', 'mtr', 'munder', 'munderover', 'naturalnumbers',
319         'neq', 'none', 'not', 'notanumber', 'notin', 'notprsubset', 'notsubset',
320         'or', 'otherwise', 'outerproduct', 'partialdiff', 'pi', 'piece',
321         'piecewise', 'plus', 'power', 'primes', 'product', 'prsubset', 'quotient',
322         'rationals', 'real', 'reals', 'reln', 'rem', 'root', 'scalarproduct',
323         'sdev', 'sec', 'sech', 'selector', 'semantics', 'sep', 'set', 'setdiff',
324         'share', 'sin', 'sinh', 'span', 'subset', 'sum', 'tan', 'tanh', 'tendsto',
325         'times', 'transpose', 'true', 'union', 'uplimit', 'variance', 'vector',
326         'vectorproduct', 'xor'
327 ]
328 # foreign_elements = [svg_elements..., mathml_elements...]
329 #normal_elements = All other allowed HTML elements are normal elements.
330
331 special_elements = {
332         # HTML:
333         address:NS_HTML, applet:NS_HTML, area:NS_HTML, article:NS_HTML,
334         aside:NS_HTML, base:NS_HTML, basefont:NS_HTML, bgsound:NS_HTML,
335         blockquote:NS_HTML, body:NS_HTML, br:NS_HTML, button:NS_HTML,
336         caption:NS_HTML, center:NS_HTML, col:NS_HTML, colgroup:NS_HTML, dd:NS_HTML,
337         details:NS_HTML, dir:NS_HTML, div:NS_HTML, dl:NS_HTML, dt:NS_HTML,
338         embed:NS_HTML, fieldset:NS_HTML, figcaption:NS_HTML, figure:NS_HTML,
339         footer:NS_HTML, form:NS_HTML, frame:NS_HTML, frameset:NS_HTML, h1:NS_HTML,
340         h2:NS_HTML, h3:NS_HTML, h4:NS_HTML, h5:NS_HTML, h6:NS_HTML, head:NS_HTML,
341         header:NS_HTML, hgroup:NS_HTML, hr:NS_HTML, html:NS_HTML, iframe:NS_HTML,
342         img:NS_HTML, input:NS_HTML, isindex:NS_HTML, li:NS_HTML, link:NS_HTML,
343         listing:NS_HTML, main:NS_HTML, marquee:NS_HTML,
344
345         menu:NS_HTML,menuitem:NS_HTML, # WATWG adds these
346
347         meta:NS_HTML, nav:NS_HTML, noembed:NS_HTML, noframes:NS_HTML,
348         noscript:NS_HTML, object:NS_HTML, ol:NS_HTML, p:NS_HTML, param:NS_HTML,
349         plaintext:NS_HTML, pre:NS_HTML, script:NS_HTML, section:NS_HTML,
350         select:NS_HTML, source:NS_HTML, style:NS_HTML, summary:NS_HTML,
351         table:NS_HTML, tbody:NS_HTML, td:NS_HTML, template:NS_HTML,
352         textarea:NS_HTML, tfoot:NS_HTML, th:NS_HTML, thead:NS_HTML, title:NS_HTML,
353         tr:NS_HTML, track:NS_HTML, ul:NS_HTML, wbr:NS_HTML, xmp:NS_HTML,
354
355         # MathML:
356         mi:NS_MATHML, mo:NS_MATHML, mn:NS_MATHML, ms:NS_MATHML, mtext:NS_MATHML,
357         'annotation-xml':NS_MATHML,
358
359         # SVG:
360         foreignObject:NS_SVG, desc:NS_SVG, title:NS_SVG
361 }
362
363 formatting_elements = {
364          a: true, b: true, big: true, code: true, em: true, font: true, i: true,
365          nobr: true, s: true, small: true, strike: true, strong: true, tt: true,
366          u: true
367 }
368
369 mathml_text_integration = {
370         mi: NS_MATHML, mo: NS_MATHML, mn: NS_MATHML, ms: NS_MATHML, mtext: NS_MATHML
371 }
372 is_mathml_text_integration_point = (el) ->
373         return mathml_text_integration[el.name] is el.namespace
374 is_html_integration = (el) -> # DON'T PASS A TOKEN
375         if el.namespace is NS_MATHML
376                 if el.name is 'annotation-xml'
377                         if el.attrs.encoding?
378                                 if el.attrs.encoding.toLowerCase() is 'text/html'
379                                         return true
380                                 if el.attrs.encoding.toLowerCase() is 'application/xhtml+xml'
381                                         return true
382                 return false
383         if el.namespace is NS_SVG
384                 if el.name is 'foreignObject' or el.name is 'desc' or el.name is 'title'
385                         return true
386         return false
387
388 h_tags = {
389         h1:NS_HTML, h2:NS_HTML, h3:NS_HTML, h4:NS_HTML, h5:NS_HTML, h6:NS_HTML
390 }
391
392 foster_parenting_targets = {
393         table: NS_HTML
394         tbody: NS_HTML
395         tfoot: NS_HTML
396         thead: NS_HTML
397         tr: NS_HTML
398 }
399
400 end_tag_implied = {
401         dd: NS_HTML
402         dt: NS_HTML
403         li: NS_HTML
404         option: NS_HTML
405         optgroup: NS_HTML
406         p: NS_HTML
407         rb: NS_HTML
408         rp: NS_HTML
409         rt: NS_HTML
410         rtc: NS_HTML
411 }
412
413 el_is_special = (e) ->
414         return special_elements[e.name] is e.namespace
415
416 adp_els = { address: NS_HTML, div: NS_HTML, p: NS_HTML }
417 el_is_special_not_adp = (el) ->
418         return special_elements[el.name] is el.namespace and adp_els[el.name] isnt el.namespace
419
420 svg_name_fixes = {
421         altglyph: 'altGlyph'
422         altglyphdef: 'altGlyphDef'
423         altglyphitem: 'altGlyphItem'
424         animatecolor: 'animateColor'
425         animatemotion: 'animateMotion'
426         animatetransform: 'animateTransform'
427         clippath: 'clipPath'
428         feblend: 'feBlend'
429         fecolormatrix: 'feColorMatrix'
430         fecomponenttransfer: 'feComponentTransfer'
431         fecomposite: 'feComposite'
432         feconvolvematrix: 'feConvolveMatrix'
433         fediffuselighting: 'feDiffuseLighting'
434         fedisplacementmap: 'feDisplacementMap'
435         fedistantlight: 'feDistantLight'
436         fedropshadow: 'feDropShadow'
437         feflood: 'feFlood'
438         fefunca: 'feFuncA'
439         fefuncb: 'feFuncB'
440         fefuncg: 'feFuncG'
441         fefuncr: 'feFuncR'
442         fegaussianblur: 'feGaussianBlur'
443         feimage: 'feImage'
444         femerge: 'feMerge'
445         femergenode: 'feMergeNode'
446         femorphology: 'feMorphology'
447         feoffset: 'feOffset'
448         fepointlight: 'fePointLight'
449         fespecularlighting: 'feSpecularLighting'
450         fespotlight: 'feSpotLight'
451         fetile: 'feTile'
452         feturbulence: 'feTurbulence'
453         foreignobject: 'foreignObject'
454         glyphref: 'glyphRef'
455         lineargradient: 'linearGradient'
456         radialgradient: 'radialGradient'
457         textpath: 'textPath'
458 }
459 svg_attribute_fixes = {
460         attributename: 'attributeName'
461         attributetype: 'attributeType'
462         basefrequency: 'baseFrequency'
463         baseprofile: 'baseProfile'
464         calcmode: 'calcMode'
465         clippathunits: 'clipPathUnits'
466         contentscripttype: 'contentScriptType'
467         contentstyletype: 'contentStyleType'
468         diffuseconstant: 'diffuseConstant'
469         edgemode: 'edgeMode'
470         externalresourcesrequired: 'externalResourcesRequired'
471         filterres: 'filterRes'
472         filterunits: 'filterUnits'
473         glyphref: 'glyphRef'
474         gradienttransform: 'gradientTransform'
475         gradientunits: 'gradientUnits'
476         kernelmatrix: 'kernelMatrix'
477         kernelunitlength: 'kernelUnitLength'
478         keypoints: 'keyPoints'
479         keysplines: 'keySplines'
480         keytimes: 'keyTimes'
481         lengthadjust: 'lengthAdjust'
482         limitingconeangle: 'limitingConeAngle'
483         markerheight: 'markerHeight'
484         markerunits: 'markerUnits'
485         markerwidth: 'markerWidth'
486         maskcontentunits: 'maskContentUnits'
487         maskunits: 'maskUnits'
488         numoctaves: 'numOctaves'
489         pathlength: 'pathLength'
490         patterncontentunits: 'patternContentUnits'
491         patterntransform: 'patternTransform'
492         patternunits: 'patternUnits'
493         pointsatx: 'pointsAtX'
494         pointsaty: 'pointsAtY'
495         pointsatz: 'pointsAtZ'
496         preservealpha: 'preserveAlpha'
497         preserveaspectratio: 'preserveAspectRatio'
498         primitiveunits: 'primitiveUnits'
499         refx: 'refX'
500         refy: 'refY'
501         repeatcount: 'repeatCount'
502         repeatdur: 'repeatDur'
503         requiredextensions: 'requiredExtensions'
504         requiredfeatures: 'requiredFeatures'
505         specularconstant: 'specularConstant'
506         specularexponent: 'specularExponent'
507         spreadmethod: 'spreadMethod'
508         startoffset: 'startOffset'
509         stddeviation: 'stdDeviation'
510         stitchtiles: 'stitchTiles'
511         surfacescale: 'surfaceScale'
512         systemlanguage: 'systemLanguage'
513         tablevalues: 'tableValues'
514         targetx: 'targetX'
515         targety: 'targetY'
516         textlength: 'textLength'
517         viewbox: 'viewBox'
518         viewtarget: 'viewTarget'
519         xchannelselector: 'xChannelSelector'
520         ychannelselector: 'yChannelSelector'
521         zoomandpan: 'zoomAndPan'
522 }
523 foreign_attr_fixes = {
524         'xlink:actuate': 'xlink actuate'
525         'xlink:arcrole': 'xlink arcrole'
526         'xlink:href': 'xlink href'
527         'xlink:role': 'xlink role'
528         'xlink:show': 'xlink show'
529         'xlink:title': 'xlink title'
530         'xlink:type': 'xlink type'
531         'xml:base': 'xml base'
532         'xml:lang': 'xml lang'
533         'xml:space': 'xml space'
534         'xmlns': 'xmlns'
535         'xmlns:xlink': 'xmlns xlink'
536 }
537 adjust_mathml_attributes = (t) ->
538         for a in t.attrs_a
539                 if a[0] is 'definitionurl'
540                         a[0] = 'definitionURL'
541         return
542 adjust_svg_attributes = (t) ->
543         for a in t.attrs_a
544                 if svg_attribute_fixes[a[0]]?
545                         a[0] = svg_attribute_fixes[a[0]]
546         return
547 adjust_foreign_attributes = (t) ->
548         # fixfull
549         for a in t.attrs_a
550                 if foreign_attr_fixes[a[0]]?
551                         a[0] = foreign_attr_fixes[a[0]]
552         return
553
554 # decode_named_char_ref()
555 #
556 # The list of named character references is _huge_ so ask the browser to decode
557 # for us instead of wasting bandwidth/space on including the table here.
558 #
559 # Pass without the "&" but with the ";" examples:
560 #    for "&amp" pass "amp;"
561 #    for "&#x2032" pass "x2032;"
562 g_dncr = {
563         cache: {}
564         textarea: document.createElement('textarea')
565 }
566 # TODO test this in IE8
567 decode_named_char_ref = (txt) ->
568         txt = "&#{txt}"
569         decoded = g_dncr.cache[txt]
570         return decoded if decoded?
571         g_dncr.textarea.innerHTML = txt
572         decoded = g_dncr.textarea.value
573         return null if decoded is txt
574         return g_dncr.cache[txt] = decoded
575
576 parse_html = (args) ->
577         txt = null
578         cur = null # index of next char in txt to be parsed
579         # declare doc and tokenizer variables so they're in scope below
580         doc = null
581         open_els = null # stack of open elements
582         afe = null # active formatting elements
583         template_ins_modes = null
584         ins_mode = null
585         original_ins_mode = null
586         tok_state = null
587         tok_cur_tag = null # partially parsed tag
588         flag_scripting = null
589         flag_frameset_ok = null
590         flag_parsing = null
591         flag_foster_parenting = null
592         form_element_pointer = null
593         temporary_buffer = null
594         pending_table_character_tokens = null
595         head_element_pointer = null
596         flag_fragment_parsing = null
597         context_element = null
598
599         stop_parsing = ->
600                 flag_parsing = false
601
602         parse_error = ->
603                 if args.error_cb?
604                         args.error_cb cur
605                 else
606                         console.log "Parse error at character #{cur} of #{txt.length}"
607
608         afe_push = (new_el) ->
609                 matches = 0
610                 for el, i in afe
611                         if el.name is new_el.name and el.namespace is new_el.namespace
612                                 for k, v of el.attrs
613                                         continue unless new_el.attrs[k] is v
614                                 for k, v of new_el.attrs
615                                         continue unless el.attrs[k] is v
616                                 matches += 1
617                                 if matches is 3
618                                         afe.splice i, 1
619                                         break
620                 afe.unshift new_el
621         afe_push_marker = ->
622                 afe.unshift new_afe_marker()
623
624         # the functions below impliment the Tree Contstruction algorithm
625         # http://www.w3.org/TR/html5/syntax.html#tree-construction
626
627         # But first... the helpers
628         template_tag_is_open = ->
629                 for t in open_els
630                         if t.name is 'template' and t.namespace is NS_HTML
631                                 return true
632                 return false
633         is_in_scope_x = (tag_name, scope, namespace) ->
634                 for t in open_els
635                         if t.name is tag_name and (namespace is null or namespace is t.namespace)
636                                 return true
637                         if scope[t.name] is t.namespace
638                                 return false
639                 return false
640         is_in_scope_x_y = (tag_name, scope, scope2, namespace) ->
641                 for t in open_els
642                         if t.name is tag_name and (namespace is null or namespace is t.namespace)
643                                 return true
644                         if scope[t.name] is t.namespace
645                                 return false
646                         if scope2[t.name] is t.namespace
647                                 return false
648                 return false
649         standard_scopers = {
650                 applet: NS_HTML, caption: NS_HTML, html: NS_HTML, table: NS_HTML,
651                 td: NS_HTML, th: NS_HTML, marquee: NS_HTML, object: NS_HTML,
652                 template: NS_HTML,
653
654                 mi: NS_MATHML, mo: NS_MATHML, mn: NS_MATHML, ms: NS_MATHML,
655                 mtext: NS_MATHML, 'annotation-xml': NS_MATHML,
656
657                 foreignObject: NS_SVG, desc: NS_SVG, title: NS_SVG
658         }
659         button_scopers = button: NS_HTML
660         li_scopers = ol: NS_HTML, ul: NS_HTML
661         table_scopers = html: NS_HTML, table: NS_HTML, template: NS_HTML
662         is_in_scope = (tag_name, namespace = null) ->
663                 return is_in_scope_x tag_name, standard_scopers, namespace
664         is_in_button_scope = (tag_name, namespace = null) ->
665                 return is_in_scope_x_y tag_name, standard_scopers, button_scopers, namespace
666         is_in_table_scope = (tag_name, namespace = null) ->
667                 return is_in_scope_x tag_name, table_scopers, namespace
668         # aka is_in_list_item_scope
669         is_in_li_scope = (tag_name, namespace = null) ->
670                 return is_in_scope_x_y tag_name, standard_scopers, li_scopers, namespace
671         is_in_select_scope = (tag_name, namespace = null) ->
672                 for t in open_els
673                         if t.name is tag_name and (namespace is null or namespace is t.namespace)
674                                 return true
675                         if t.namespace isnt NS_HTML and t.name isnt 'optgroup' and t.name isnt 'option'
676                                 return false
677                 return false
678         # this checks for a particular element, not by name
679         # this requires a namespace match
680         el_is_in_scope = (needle) ->
681                 for el in open_els
682                         if el is needle
683                                 return true
684                         if standard_scopers[el.name] is el.namespace
685                                 return false
686                 return false
687
688         clear_to_table_stopers = {
689                 'table': true
690                 'template': true
691                 'html': true
692         }
693         clear_stack_to_table_context = ->
694                 loop
695                         if clear_to_table_stopers[open_els[0].name]?
696                                 break
697                         open_els.shift()
698                 return
699         clear_to_table_body_stopers = {
700                 tbody: NS_HTML
701                 tfoot: NS_HTML
702                 thead: NS_HTML
703                 template: NS_HTML
704                 html: NS_HTML
705         }
706         clear_stack_to_table_body_context = ->
707                 loop
708                         if clear_to_table_body_stopers[open_els[0].name] is open_els[0].namespace
709                                 break
710                         open_els.shift()
711                 return
712         clear_to_table_row_stopers = {
713                 'tr': true
714                 'template': true
715                 'html': true
716         }
717         clear_stack_to_table_row_context = ->
718                 loop
719                         if clear_to_table_row_stopers[open_els[0].name]?
720                                 break
721                         open_els.shift()
722                 return
723         clear_afe_to_marker = ->
724                 loop
725                         return unless afe.length > 0 # this happens in fragment case, ?spec error
726                         el = afe.shift()
727                         if el.type is TYPE_AFE_MARKER
728                                 return
729                 return
730
731         # 8.2.3.1 ...
732         # http://www.w3.org/TR/html5/syntax.html#reset-the-insertion-mode-appropriately
733         reset_ins_mode = ->
734                 # 1. Let last be false.
735                 last = false
736                 # 2. Let node be the last node in the stack of open elements.
737                 node_i = 0
738                 node = open_els[node_i]
739                 # 3. Loop: If node is the first node in the stack of open elements,
740                 # then set last to true, and, if the parser was originally created as
741                 # part of the HTML fragment parsing algorithm (fragment case) set node
742                 # to the context element.
743                 loop
744                         if node_i is open_els.length - 1
745                                 last = true
746                                 # fixfull (fragment case)
747
748                         # 4. If node is a select element, run these substeps:
749                         if node.name is 'select' and node.namespace is NS_HTML
750                                 # 1. If last is true, jump to the step below labeled done.
751                                 unless last
752                                         # 2. Let ancestor be node.
753                                         ancestor_i = node_i
754                                         ancestor = node
755                                         # 3. Loop: If ancestor is the first node in the stack of
756                                         # open elements, jump to the step below labeled done.
757                                         loop
758                                                 if ancestor_i is open_els.length - 1
759                                                         break
760                                                 # 4. Let ancestor be the node before ancestor in the stack
761                                                 # of open elements.
762                                                 ancestor_i += 1
763                                                 ancestor = open_els[ancestor_i]
764                                                 # 5. If ancestor is a template node, jump to the step below
765                                                 # labeled done.
766                                                 if ancestor.name is 'template' and ancestor.namespace is NS_HTML
767                                                         break
768                                                 # 6. If ancestor is a table node, switch the insertion mode
769                                                 # to "in select in table" and abort these steps.
770                                                 if ancestor.name is 'table' and ancestor.namespace is NS_HTML
771                                                         ins_mode = ins_mode_in_select_in_table
772                                                         return
773                                                 # 7. Jump back to the step labeled loop.
774                                 # 8. Done: Switch the insertion mode to "in select" and abort
775                                 # these steps.
776                                 ins_mode = ins_mode_in_select
777                                 return
778                         # 5. If node is a td or th element and last is false, then switch
779                         # the insertion mode to "in cell" and abort these steps.
780                         if (node.name is 'td' or node.name is 'th') and node.namespace is NS_HTML and last is false
781                                 ins_mode = ins_mode_in_cell
782                                 return
783                         # 6. If node is a tr element, then switch the insertion mode to "in
784                         # row" and abort these steps.
785                         if node.name is 'tr' and node.namespace is NS_HTML
786                                 ins_mode = ins_mode_in_row
787                                 return
788                         # 7. If node is a tbody, thead, or tfoot element, then switch the
789                         # insertion mode to "in table body" and abort these steps.
790                         if (node.name is 'tbody' or node.name is 'thead' or node.name is 'tfoot') and node.namespace is NS_HTML
791                                 ins_mode = ins_mode_in_table_body
792                                 return
793                         # 8. If node is a caption element, then switch the insertion mode
794                         # to "in caption" and abort these steps.
795                         if node.name is 'caption' and node.namespace is NS_HTML
796                                 ins_mode = ins_mode_in_caption
797                                 return
798                         # 9. If node is a colgroup element, then switch the insertion mode
799                         # to "in column group" and abort these steps.
800                         if node.name is 'colgroup' and node.namespace is NS_HTML
801                                 ins_mode = ins_mode_in_column_group
802                                 return
803                         # 10. If node is a table element, then switch the insertion mode to
804                         # "in table" and abort these steps.
805                         if node.name is 'table' and node.namespace is NS_HTML
806                                 ins_mode = ins_mode_in_table
807                                 return
808                         # 11. If node is a template element, then switch the insertion mode
809                         # to the current template insertion mode and abort these steps.
810                         if node.name is 'template' and node.namespace is NS_HTML
811                                 ins_mode = template_ins_modes[0]
812                                 return
813                         # 12. If node is a head element and last is true, then switch the
814                         # insertion mode to "in body" ("in body"! not "in head"!) and abort
815                         # these steps. (fragment case)
816                         if node.name is 'head' and node.namespace is NS_HTML and last
817                                 ins_mode = ins_mode_in_body
818                                 return
819                         # 13. If node is a head element and last is false, then switch the
820                         # insertion mode to "in head" and abort these steps.
821                         if node.name is 'head' and node.namespace is NS_HTML and last is false
822                                 ins_mode = ins_mode_in_head
823                                 return
824                         # 14. If node is a body element, then switch the insertion mode to
825                         # "in body" and abort these steps.
826                         if node.name is 'body' and node.namespace is NS_HTML
827                                 ins_mode = ins_mode_in_body
828                                 return
829                         # 15. If node is a frameset element, then switch the insertion mode
830                         # to "in frameset" and abort these steps. (fragment case)
831                         if node.name is 'frameset' and node.namespace is NS_HTML
832                                 ins_mode = ins_mode_in_frameset
833                                 return
834                         # 16. If node is an html element, run these substeps:
835                         if node.name is 'html' and node.namespace is NS_HTML
836                                 # 1. If the head element pointer is null, switch the insertion
837                                 # mode to "before head" and abort these steps. (fragment case)
838                                 if head_element_pointer is null
839                                         ins_mode = ins_mode_before_head
840                                 else
841                                         # 2. Otherwise, the head element pointer is not null,
842                                         # switch the insertion mode to "after head" and abort these
843                                         # steps.
844                                         ins_mode = ins_mode_after_head
845                                 return
846                         # 17. If last is true, then switch the insertion mode to "in body"
847                         # and abort these steps. (fragment case)
848                         if last
849                                 ins_mode = ins_mode_in_body
850                                 return
851                         # 18. Let node now be the node before node in the stack of open
852                         # elements.
853                         node_i += 1
854                         node = open_els[node_i]
855                         # 19. Return to the step labeled loop.
856
857         # 8.2.3.2
858
859         # http://www.w3.org/TR/html5/syntax.html#adjusted-current-node
860         adjusted_current_node = ->
861                 if open_els.length is 1 and flag_fragment_parsing
862                         return context_element
863                 return open_els[0]
864
865         # http://www.w3.org/TR/html5/syntax.html#reconstruct-the-active-formatting-elements
866         # this implementation is structured (mostly) as described at the link above.
867         # capitalized comments are the "labels" described at the link above.
868         reconstruct_afe = ->
869                 return if afe.length is 0
870                 if afe[0].type is TYPE_AFE_MARKER or afe[0] in open_els
871                         return
872                 # Rewind
873                 i = 0
874                 loop
875                         if i is afe.length - 1
876                                 break
877                         i += 1
878                         if afe[i].type is TYPE_AFE_MARKER or afe[i] in open_els
879                                 i -= 1 # Advance
880                                 break
881                 # Create
882                 loop
883                         el = insert_html_element afe[i].token
884                         afe[i] = el
885                         break if i is 0
886                         i -= 1 # Advance
887
888         # http://www.w3.org/TR/html5/syntax.html#adoption-agency-algorithm
889         # adoption agency algorithm
890         # overview here:
891         #   http://www.w3.org/TR/html5/syntax.html#misnested-tags:-b-i-/b-/i
892         #   http://www.w3.org/TR/html5/syntax.html#misnested-tags:-b-p-/b-/p
893         #   http://www.w3.org/TR/html5/syntax.html#unclosed-formatting-elements
894         adoption_agency = (subject) ->
895                 debug_log "adoption_agency()"
896                 debug_log "tree: #{serialize_els doc.children, false, true}"
897                 debug_log "open_els: #{serialize_els open_els, true, true}"
898                 debug_log "afe: #{serialize_els afe, true, true}"
899                 if open_els[0].name is subject and open_els[0].namespace is NS_HTML
900                         el = open_els[0]
901                         open_els.shift()
902                         # remove it from the list of active formatting elements (if found)
903                         for t, i in afe
904                                 if t is el
905                                         afe.splice i, 1
906                                         break
907                         debug_log "aaa: starting off with subject on top of stack, exiting"
908                         return
909                 outer = 0
910                 loop
911                         if outer >= 8
912                                 return
913                         outer += 1
914                         # 5. Let formatting element be the last element in the list of
915                         # active formatting elements that: is between the end of the list
916                         # and the last scope marker in the list, if any, or the start of
917                         # the list otherwise, and  has the tag name subject.
918                         fe = null
919                         for t, fe_of_afe in afe
920                                 if t.type is TYPE_AFE_MARKER
921                                         break
922                                 if t.name is subject
923                                         fe = t
924                                         break
925                         # If there is no such element, then abort these steps and instead
926                         # act as described in the "any other end tag" entry above.
927                         if fe is null
928                                 debug_log "aaa: fe not found in afe"
929                                 in_body_any_other_end_tag subject
930                                 return
931                         # 6. If formatting element is not in the stack of open elements,
932                         # then this is a parse error; remove the element from the list, and
933                         # abort these steps.
934                         in_open_els = false
935                         for t, fe_of_open_els in open_els
936                                 if t is fe
937                                         in_open_els = true
938                                         break
939                         unless in_open_els
940                                 debug_log "aaa: fe not found in open_els"
941                                 parse_error()
942                                 # "remove it from the list" must mean afe, since it's not in open_els
943                                 afe.splice fe_of_afe, 1
944                                 return
945                         # 7. If formatting element is in the stack of open elements, but
946                         # the element is not in scope, then this is a parse error; abort
947                         # these steps.
948                         unless el_is_in_scope fe
949                                 debug_log "aaa: fe not in scope"
950                                 parse_error()
951                                 return
952                         # 8. If formatting element is not the current node, this is a parse
953                         # error. (But do not abort these steps.)
954                         unless open_els[0] is fe
955                                 parse_error()
956                                 # continue
957                         # 9. Let furthest block be the topmost node in the stack of open
958                         # elements that is lower in the stack than formatting element, and
959                         # is an element in the special category. There might not be one.
960                         fb = null
961                         fb_of_open_els = null
962                         for t, i in open_els
963                                 if t is fe
964                                         break
965                                 if el_is_special t
966                                         fb = t
967                                         fb_of_open_els = i
968                                         # and continue, to see if there's one that's more "topmost"
969                         # 10. If there is no furthest block, then the UA must first pop all
970                         # the nodes from the bottom of the stack of open elements, from the
971                         # current node up to and including formatting element, then remove
972                         # formatting element from the list of active formatting elements,
973                         # and finally abort these steps.
974                         if fb is null
975                                 debug_log "aaa: no fb"
976                                 loop
977                                         t = open_els.shift()
978                                         if t is fe
979                                                 afe.splice fe_of_afe, 1
980                                                 return
981                         # 11. Let common ancestor be the element immediately above
982                         # formatting element in the stack of open elements.
983                         ca = open_els[fe_of_open_els + 1] # common ancestor
984
985                         node_above = open_els[fb_of_open_els + 1] # next node if node isn't in open_els anymore
986                         # 12. Let a bookmark note the position of formatting element in the list of active formatting elements relative to the elements on either side of it in the list.
987                         bookmark = new_aaa_bookmark()
988                         for t, i in afe
989                                 if t is fe
990                                         afe.splice i, 0, bookmark
991                                         break
992                         node = last_node = fb
993                         inner = 0
994                         loop
995                                 inner += 1
996                                 # 3. Let node be the element immediately above node in the
997                                 # stack of open elements, or if node is no longer in the stack
998                                 # of open elements (e.g. because it got removed by this
999                                 # algorithm), the element that was immediately above node in
1000                                 # the stack of open elements before node was removed.
1001                                 node_next = null
1002                                 for t, i in open_els
1003                                         if t is node
1004                                                 node_next = open_els[i + 1]
1005                                                 break
1006                                 node = node_next ? node_above
1007                                 debug_log "inner loop #{inner}"
1008                                 debug_log "tree: #{serialize_els doc.children, false, true}"
1009                                 debug_log "open_els: #{serialize_els open_els, true, true}"
1010                                 debug_log "afe: #{serialize_els afe, true, true}"
1011                                 debug_log "ca: #{ca.name}##{ca.id} children: #{serialize_els ca.children, true, true}"
1012                                 debug_log "fe: #{fe.name}##{fe.id} children: #{serialize_els fe.children, true, true}"
1013                                 debug_log "fb: #{fb.name}##{fb.id} children: #{serialize_els fb.children, true, true}"
1014                                 debug_log "node: #{node.serialize true, true}"
1015                                 # TODO make sure node_above gets re-set if/when node is removed from open_els
1016
1017                                 # 4. If node is formatting element, then go to the next step in
1018                                 # the overall algorithm.
1019                                 if node is fe
1020                                         break
1021                                 debug_log "the meat"
1022                                 # 5. If inner loop counter is greater than three and node is in
1023                                 # the list of active formatting elements, then remove node from
1024                                 # the list of active formatting elements.
1025                                 node_in_afe = false
1026                                 for t, i in afe
1027                                         if t is node
1028                                                 if inner > 3
1029                                                         afe.splice i, 1
1030                                                         debug_log "max out inner"
1031                                                 else
1032                                                         node_in_afe = true
1033                                                         debug_log "in afe"
1034                                                 break
1035                                 # 6. If node is not in the list of active formatting elements,
1036                                 # then remove node from the stack of open elements and then go
1037                                 # back to the step labeled inner loop.
1038                                 unless node_in_afe
1039                                         debug_log "not in afe"
1040                                         for t, i in open_els
1041                                                 if t is node
1042                                                         node_above = open_els[i + 1]
1043                                                         open_els.splice i, 1
1044                                                         break
1045                                         continue
1046                                 debug_log "the bones"
1047                                 # 7. create an element for the token for which the element node
1048                                 # was created, in the HTML namespace, with common ancestor as
1049                                 # the intended parent; replace the entry for node in the list
1050                                 # of active formatting elements with an entry for the new
1051                                 # element, replace the entry for node in the stack of open
1052                                 # elements with an entry for the new element, and let node be
1053                                 # the new element.
1054                                 new_node = token_to_element node.token, NS_HTML, ca
1055                                 for t, i in afe
1056                                         if t is node
1057                                                 afe[i] = new_node
1058                                                 debug_log "replaced in afe"
1059                                                 break
1060                                 for t, i in open_els
1061                                         if t is node
1062                                                 node_above = open_els[i + 1]
1063                                                 open_els[i] = new_node
1064                                                 debug_log "replaced in open_els"
1065                                                 break
1066                                 node = new_node
1067                                 # 8. If last node is furthest block, then move the
1068                                 # aforementioned bookmark to be immediately after the new node
1069                                 # in the list of active formatting elements.
1070                                 if last_node is fb
1071                                         for t, i in afe
1072                                                 if t is bookmark
1073                                                         afe.splice i, 1
1074                                                         debug_log "removed bookmark"
1075                                                         break
1076                                         for t, i in afe
1077                                                 if t is node
1078                                                         # "after" means lower
1079                                                         afe.splice i, 0, bookmark # "after as <-
1080                                                         debug_log "placed bookmark after node"
1081                                                         debug_log "node: #{node.id} afe: #{serialize_els afe, true, true}"
1082                                                         break
1083                                 # 9. Insert last node into node, first removing it from its
1084                                 # previous parent node if any.
1085                                 if last_node.parent?
1086                                         debug_log "last_node has parent"
1087                                         for c, i in last_node.parent.children
1088                                                 if c is last_node
1089                                                         debug_log "removing last_node from parent"
1090                                                         last_node.parent.children.splice i, 1
1091                                                         break
1092                                 node.children.push last_node
1093                                 last_node.parent = node
1094                                 # 10. Let last node be node.
1095                                 last_node = node
1096                                 debug_log "at last"
1097                                 # 11. Return to the step labeled inner loop.
1098                         # 14. Insert whatever last node ended up being in the previous step
1099                         # at the appropriate place for inserting a node, but using common
1100                         # ancestor as the override target.
1101
1102                         # In the case where fe is immediately followed by fb:
1103                         #   * inner loop exits out early (node==fe)
1104                         #   * last_node is fb
1105                         #   * last_node is still in the tree (not a duplicate)
1106                         if last_node.parent?
1107                                 debug_log "FEFIRST? last_node has parent"
1108                                 for c, i in last_node.parent.children
1109                                         if c is last_node
1110                                                 debug_log "removing last_node from parent"
1111                                                 last_node.parent.children.splice i, 1
1112                                                 break
1113
1114                         debug_log "after aaa inner loop"
1115                         debug_log "ca: #{ca.name}##{ca.id} children: #{serialize_els ca.children, true, true}"
1116                         debug_log "fe: #{fe.name}##{fe.id} children: #{serialize_els fe.children, true, true}"
1117                         debug_log "fb: #{fb.name}##{fb.id} children: #{serialize_els fb.children, true, true}"
1118                         debug_log "last_node: #{last_node.name}##{last_node.id} children: #{serialize_els last_node.children, true, true}"
1119                         debug_log "tree: #{serialize_els doc.children, false, true}"
1120
1121                         debug_log "insert"
1122
1123
1124                         # can't use standard insert token thing, because it's already in
1125                         # open_els and must stay at it's current position in open_els
1126                         dest = adjusted_insertion_location ca
1127                         dest[0].children.splice dest[1], 0, last_node
1128                         last_node.parent = dest[0]
1129
1130
1131                         debug_log "ca: #{ca.name}##{ca.id} children: #{serialize_els ca.children, true, true}"
1132                         debug_log "fe: #{fe.name}##{fe.id} children: #{serialize_els fe.children, true, true}"
1133                         debug_log "fb: #{fb.name}##{fb.id} children: #{serialize_els fb.children, true, true}"
1134                         debug_log "last_node: #{last_node.name}##{last_node.id} children: #{serialize_els last_node.children, true, true}"
1135                         debug_log "tree: #{serialize_els doc.children, false, true}"
1136
1137                         # 15. Create an element for the token for which formatting element
1138                         # was created, in the HTML namespace, with furthest block as the
1139                         # intended parent.
1140                         new_element = token_to_element fe.token, NS_HTML, fb
1141                         # 16. Take all of the child nodes of furthest block and append them
1142                         # to the element created in the last step.
1143                         while fb.children.length
1144                                 t = fb.children.shift()
1145                                 t.parent = new_element
1146                                 new_element.children.push t
1147                         # 17. Append that new element to furthest block.
1148                         new_element.parent = fb
1149                         fb.children.push new_element
1150                         # 18. Remove formatting element from the list of active formatting
1151                         # elements, and insert the new element into the list of active
1152                         # formatting elements at the position of the aforementioned
1153                         # bookmark.
1154                         for t, i in afe
1155                                 if t is fe
1156                                         afe.splice i, 1
1157                                         break
1158                         for t, i in afe
1159                                 if t is bookmark
1160                                         afe[i] = new_element
1161                                         break
1162                         # 19. Remove formatting element from the stack of open elements,
1163                         # and insert the new element into the stack of open elements
1164                         # immediately below the position of furthest block in that stack.
1165                         for t, i in open_els
1166                                 if t is fe
1167                                         open_els.splice i, 1
1168                                         break
1169                         for t, i in open_els
1170                                 if t is fb
1171                                         open_els.splice i, 0, new_element
1172                                         break
1173                         # 20. Jump back to the step labeled outer loop.
1174                         debug_log "done wrapping fb's children. new_element: #{new_element.name}##{new_element.id}"
1175                         debug_log "tree: #{serialize_els doc.children, false, true}"
1176                         debug_log "open_els: #{serialize_els open_els, true, true}"
1177                         debug_log "afe: #{serialize_els afe, true, true}"
1178                 debug_log "AAA DONE"
1179
1180         # http://www.w3.org/TR/html5/syntax.html#close-a-p-element
1181         close_p_element = ->
1182                 generate_implied_end_tags 'p' # arg is exception
1183                 unless open_els[0].name is 'p' and open_els[0].namespace is NS_HTML
1184                         parse_error()
1185                 while open_els.length > 1 # just in case
1186                         el = open_els.shift()
1187                         if el.name is 'p' and el.namespace is NS_HTML
1188                                 return
1189         close_p_if_in_button_scope = ->
1190                 if is_in_button_scope 'p', NS_HTML
1191                         close_p_element()
1192
1193         # http://www.w3.org/TR/html5/syntax.html#insert-a-character
1194         # aka insert_a_character = (t) ->
1195         insert_character = (t) ->
1196                 dest = adjusted_insertion_location()
1197                 # fixfull check for Document node
1198                 if dest[1] > 0
1199                         prev = dest[0].children[dest[1] - 1]
1200                         if prev.type is TYPE_TEXT
1201                                 prev.text += t.text
1202                                 return
1203                 dest[0].children.splice dest[1], 0, t
1204
1205
1206         # 8.2.5 http://www.w3.org/TR/html5/syntax.html#tree-construction
1207         process_token = (t) ->
1208                 acn = adjusted_current_node()
1209                 unless acn?
1210                         ins_mode t
1211                         return
1212                 if acn.namespace is NS_HTML
1213                         ins_mode t
1214                         return
1215                 if is_mathml_text_integration_point(acn)
1216                         if t.type is TYPE_START_TAG and not (t.name is 'mglyph' or t.name is 'malignmark')
1217                                 ins_mode t
1218                                 return
1219                         if t.type is TYPE_TEXT
1220                                 ins_mode t
1221                                 return
1222                 if acn.namespace is NS_MATHML and acn.name is 'annotation-xml' and t.type is TYPE_START_TAG and t.name is 'svg'
1223                         ins_mode t
1224                         return
1225                 if is_html_integration acn
1226                         if t.type is TYPE_START_TAG or t.type is TYPE_TEXT
1227                                 ins_mode t
1228                                 return
1229                 if t.type is TYPE_EOF
1230                         ins_mode t
1231                         return
1232                 in_foreign_content t
1233                 return
1234
1235         # 8.2.5.1
1236         # http://www.w3.org/TR/html5/syntax.html#creating-and-inserting-nodes
1237         # http://www.w3.org/TR/html5/syntax.html#appropriate-place-for-inserting-a-node
1238         adjusted_insertion_location = (override_target = null) ->
1239                 # 1. If there was an override target specified, then let target be the
1240                 # override target.
1241                 if override_target?
1242                         target = override_target
1243                 else # Otherwise, let target be the current node.
1244                         target = open_els[0]
1245                 # 2. Determine the adjusted insertion location using the first matching
1246                 # steps from the following list:
1247                 #
1248                 # If foster parenting is enabled and target is a table, tbody, tfoot,
1249                 # thead, or tr element Foster parenting happens when content is
1250                 # misnested in tables.
1251                 if flag_foster_parenting and foster_parenting_targets[target.name] is target.namespace
1252                         loop # once. this is here so we can ``break`` to "abort these substeps"
1253                                 # 1. Let last template be the last template element in the
1254                                 # stack of open elements, if any.
1255                                 last_template = null
1256                                 last_template_i = null
1257                                 for el, i in open_els
1258                                         if el.name is 'template' and el.namespace is NS_HTML
1259                                                 last_template = el
1260                                                 last_template_i = i
1261                                                 break
1262                                 # 2. Let last table be the last table element in the stack of
1263                                 # open elements, if any.
1264                                 last_table = null
1265                                 last_table_i
1266                                 for el, i in open_els
1267                                         if el.name is 'table' and el.namespace is NS_HTML
1268                                                 last_table = el
1269                                                 last_table_i = i
1270                                                 break
1271                                 # 3. If there is a last template and either there is no last
1272                                 # table, or there is one, but last template is lower (more
1273                                 # recently added) than last table in the stack of open
1274                                 # elements, then: let adjusted insertion location be inside
1275                                 # last template's template contents, after its last child (if
1276                                 # any), and abort these substeps.
1277                                 if last_template and (last_table is null or last_template_i < last_table_i)
1278                                         target = last_template # fixfull should be it's contents
1279                                         target_i = target.children.length
1280                                         break
1281                                 # 4. If there is no last table, then let adjusted insertion
1282                                 # location be inside the first element in the stack of open
1283                                 # elements (the html element), after its last child (if any),
1284                                 # and abort these substeps. (fragment case)
1285                                 if last_table is null
1286                                         # this is odd
1287                                         target = open_els[open_els.length - 1]
1288                                         target_i = target.children.length
1289                                         break
1290                                 # 5. If last table has a parent element, then let adjusted
1291                                 # insertion location be inside last table's parent element,
1292                                 # immediately before last table, and abort these substeps.
1293                                 if last_table.parent?
1294                                         for c, i in last_table.parent.children
1295                                                 if c is last_table
1296                                                         target = last_table.parent
1297                                                         target_i = i
1298                                                         break
1299                                         break
1300                                 # 6. Let previous element be the element immediately above last
1301                                 # table in the stack of open elements.
1302                                 #
1303                                 # huh? how could it not have a parent?
1304                                 previous_element = open_els[last_table_i + 1]
1305                                 # 7. Let adjusted insertion location be inside previous
1306                                 # element, after its last child (if any).
1307                                 target = previous_element
1308                                 target_i = target.children.length
1309                                 # Note: These steps are involved in part because it's possible
1310                                 # for elements, the table element in this case in particular,
1311                                 # to have been moved by a script around in the DOM, or indeed
1312                                 # removed from the DOM entirely, after the element was inserted
1313                                 # by the parser.
1314                                 break # don't really loop
1315                 else
1316                         # Otherwise Let adjusted insertion location be inside target, after
1317                         # its last child (if any).
1318                         target_i = target.children.length
1319
1320                 # 3. If the adjusted insertion location is inside a template element,
1321                 # let it instead be inside the template element's template contents,
1322                 # after its last child (if any).
1323                 # fixfull (template)
1324
1325                 # 4. Return the adjusted insertion location.
1326                 return [target, target_i]
1327
1328         # http://www.w3.org/TR/html5/syntax.html#create-an-element-for-the-token
1329         # aka create_an_element_for_token
1330         token_to_element = (t, namespace, intended_parent) ->
1331                 # convert attributes into a hash
1332                 attrs = {}
1333                 for a in t.attrs_a
1334                         attrs[a[0]] = a[1] # TODO check what to do with dupilcate attrs
1335                 el = new Node TYPE_TAG, name: t.name, namespace: namespace, attrs: attrs, token: t
1336
1337                 # TODO 2. If the newly created element has an xmlns attribute in the
1338                 # XMLNS namespace whose value is not exactly the same as the element's
1339                 # namespace, that is a parse error. Similarly, if the newly created
1340                 # element has an xmlns:xlink attribute in the XMLNS namespace whose
1341                 # value is not the XLink Namespace, that is a parse error.
1342
1343                 # fixfull: the spec says stuff about form pointers and ownerDocument
1344
1345                 return el
1346
1347         # http://www.w3.org/TR/html5/syntax.html#insert-a-foreign-element
1348         insert_foreign_element = (token, namespace) ->
1349                 ail = adjusted_insertion_location()
1350                 ail_el = ail[0]
1351                 ail_i = ail[1]
1352                 el = token_to_element token, namespace, ail_el
1353                 # TODO skip this next step if it's broken (eg ail_el is document with child already)
1354                 el.parent = ail_el
1355                 ail_el.children.splice ail_i, 0, el
1356                 open_els.unshift el
1357                 return el
1358         # http://www.w3.org/TR/html5/syntax.html#insert-an-html-element
1359         insert_html_element = (token) ->
1360                 insert_foreign_element token, NS_HTML
1361
1362         # http://www.w3.org/TR/html5/syntax.html#insert-a-comment
1363         # position should be [node, index_within_children]
1364         insert_comment = (t, position = null) ->
1365                 position ?= adjusted_insertion_location()
1366                 position[0].children.splice position[1], 0, t
1367
1368         # 8.2.5.2
1369         # http://www.w3.org/TR/html5/syntax.html#generic-raw-text-element-parsing-algorithm
1370         parse_generic_raw_text = (t) ->
1371                 insert_html_element t
1372                 tok_state = tok_state_rawtext
1373                 original_ins_mode = ins_mode
1374                 ins_mode = ins_mode_text
1375         parse_generic_rcdata_text = (t) ->
1376                 insert_html_element t
1377                 tok_state = tok_state_rcdata
1378                 original_ins_mode = ins_mode
1379                 ins_mode = ins_mode_text
1380
1381         # 8.2.5.3 http://www.w3.org/TR/html5/syntax.html#closing-elements-that-have-implied-end-tags
1382         # http://www.w3.org/TR/html5/syntax.html#generate-implied-end-tags
1383         generate_implied_end_tags = (except = null) ->
1384                 while end_tag_implied[open_els[0].name] is open_els[0].namespace and open_els[0].name isnt except
1385                         open_els.shift()
1386
1387         # 8.2.5.4 The rules for parsing tokens in HTML content
1388         # http://www.w3.org/TR/html5/syntax.html#parsing-main-inhtml
1389
1390         # 8.2.5.4.1 The "initial" insertion mode
1391         # http://www.w3.org/TR/html5/syntax.html#the-initial-insertion-mode
1392         ins_mode_initial = (t) ->
1393                 if is_space_tok t
1394                         return
1395                 if t.type is TYPE_COMMENT
1396                         # ?fixfull
1397                         doc.children.push t
1398                         return
1399                 if t.type is TYPE_DOCTYPE
1400                         # FIXME check identifiers, set quirks, etc
1401                         # fixfull
1402                         doc.children.push t
1403                         ins_mode = ins_mode_before_html
1404                         return
1405                 # Anything else
1406                 #fixfull (iframe, quirks)
1407                 ins_mode = ins_mode_before_html
1408                 process_token t
1409                 return
1410
1411         # 8.2.5.4.2 http://www.w3.org/TR/html5/syntax.html#the-before-html-insertion-mode
1412         ins_mode_before_html = (t) ->
1413                 if t.type is TYPE_DOCTYPE
1414                         parse_error()
1415                         return
1416                 if t.type is TYPE_COMMENT
1417                         doc.children.push t
1418                         return
1419                 if is_space_tok t
1420                         return
1421                 if t.type is TYPE_START_TAG and t.name is 'html'
1422                         el = token_to_element t, NS_HTML, doc
1423                         doc.children.push el
1424                         open_els.unshift(el)
1425                         # fixfull (big paragraph in spec about manifest, fragment, urls, etc)
1426                         ins_mode = ins_mode_before_head
1427                         return
1428                 if t.type is TYPE_END_TAG
1429                         if t.name is 'head' or t.name is 'body' or t.name is 'html' or t.name is 'br'
1430                                 # fall through to "anything else"
1431                         else
1432                                 parse_error()
1433                                 return
1434                 # Anything else
1435                 html_tok = new_open_tag 'html'
1436                 el = token_to_element html_tok, NS_HTML, doc
1437                 doc.children.push el
1438                 open_els.unshift el
1439                 # ?fixfull browsing context
1440                 ins_mode = ins_mode_before_head
1441                 process_token t
1442                 return
1443
1444         # 8.2.5.4.3 http://www.w3.org/TR/html5/syntax.html#the-before-head-insertion-mode
1445         ins_mode_before_head = (t) ->
1446                 if is_space_tok t
1447                         return
1448                 if t.type is TYPE_COMMENT
1449                         insert_comment t
1450                         return
1451                 if t.type is TYPE_DOCTYPE
1452                         parse_error()
1453                         return
1454                 if t.type is TYPE_START_TAG and t.name is 'html'
1455                         ins_mode_in_body t
1456                         return
1457                 if t.type is TYPE_START_TAG and t.name is 'head'
1458                         el = insert_html_element t
1459                         head_element_pointer = el
1460                         ins_mode = ins_mode_in_head
1461                         return
1462                 if t.type is TYPE_END_TAG
1463                         if t.name is 'head' or t.name is 'body' or t.name is 'html' or t.name is 'br'
1464                                 # fall through to Anything else below
1465                         else
1466                                 parse_error()
1467                                 return
1468                 # Anything else
1469                 head_tok = new_open_tag 'head'
1470                 el = insert_html_element head_tok
1471                 head_element_pointer = el
1472                 ins_mode = ins_mode_in_head
1473                 process_token t
1474
1475         # 8.2.5.4.4 http://www.w3.org/TR/html5/syntax.html#parsing-main-inhead
1476         ins_mode_in_head_else = (t) -> # factored out for same-as-spec flow control
1477                 open_els.shift() # spec says this will be a 'head' node
1478                 ins_mode = ins_mode_after_head
1479                 process_token t
1480         ins_mode_in_head = (t) ->
1481                 if t.type is TYPE_TEXT and (t.text is "\t" or t.text is "\n" or t.text is "\u000c" or t.text is ' ')
1482                         insert_character t
1483                         return
1484                 if t.type is TYPE_COMMENT
1485                         insert_comment t
1486                         return
1487                 if t.type is TYPE_DOCTYPE
1488                         parse_error()
1489                         return
1490                 if t.type is TYPE_START_TAG and t.name is 'html'
1491                         ins_mode_in_body t
1492                         return
1493                 if t.type is TYPE_START_TAG and (t.name is 'base' or t.name is 'basefont' or t.name is 'bgsound' or t.name is 'link')
1494                         el = insert_html_element t
1495                         open_els.shift()
1496                         t.acknowledge_self_closing()
1497                         return
1498                 if t.type is TYPE_START_TAG and t.name is 'meta'
1499                         el = insert_html_element t
1500                         open_els.shift()
1501                         t.acknowledge_self_closing()
1502                         # fixfull encoding stuff
1503                         return
1504                 if t.type is TYPE_START_TAG and t.name is 'title'
1505                         parse_generic_rcdata_text t
1506                         return
1507                 if t.type is TYPE_START_TAG and ((t.name is 'noscript' and flag_scripting) or t.name is 'noframes' or t.name is 'style')
1508                         parse_generic_raw_text t
1509                         return
1510                 if t.type is TYPE_START_TAG and t.name is 'noscript' and flag_scripting is false
1511                         insert_html_element t
1512                         ins_mode = ins_mode_in_head_noscript
1513                         return
1514                 if t.type is TYPE_START_TAG and t.name is 'script'
1515                         ail = adjusted_insertion_location()
1516                         el = token_to_element t, NS_HTML, ail
1517                         el.flag 'parser-inserted', true
1518                         # fixfull frament case
1519                         ail[0].children.splice ail[1], 0, el
1520                         open_els.unshift el
1521                         tok_state = tok_state_script_data
1522                         original_ins_mode = ins_mode # make sure orig... is defined
1523                         ins_mode = ins_mode_text
1524                         return
1525                 if t.type is TYPE_END_TAG and t.name is 'head'
1526                         open_els.shift() # will be a head element... spec says so
1527                         ins_mode = ins_mode_after_head
1528                         return
1529                 if t.type is TYPE_END_TAG and (t.name is 'body' or t.name is 'html' or t.name is 'br')
1530                         ins_mode_in_head_else t
1531                         return
1532                 if t.type is TYPE_START_TAG and t.name is 'template'
1533                         insert_html_element t
1534                         afe_push_marker()
1535                         flag_frameset_ok = false
1536                         ins_mode = ins_mode_in_template
1537                         template_ins_modes.unshift ins_mode_in_template
1538                         return
1539                 if t.type is TYPE_END_TAG and t.name is 'template'
1540                         if template_tag_is_open()
1541                                 generate_implied_end_tags
1542                                 if open_els[0].name isnt 'template'
1543                                         parse_error()
1544                                 loop
1545                                         el = open_els.shift()
1546                                         if el.name is 'template' and el.namespace is NS_HTML
1547                                                 break
1548                                 clear_afe_to_marker()
1549                                 template_ins_modes.shift()
1550                                 reset_ins_mode()
1551                         else
1552                                 parse_error()
1553                         return
1554                 if (t.type is TYPE_START_TAG and t.name is 'head') or t.type is TYPE_END_TAG
1555                         parse_error()
1556                         return
1557                 ins_mode_in_head_else t
1558
1559         # 8.2.5.4.5 http://www.w3.org/TR/html5/syntax.html#parsing-main-inheadnoscript
1560         ins_mode_in_head_noscript_else = (t) ->
1561                 parse_error()
1562                 open_els.shift()
1563                 ins_mode = ins_mode_in_head
1564                 process_token t
1565         ins_mode_in_head_noscript = (t) ->
1566                 if t.type is TYPE_DOCTYPE
1567                         parse_error()
1568                         return
1569                 if t.type is TYPE_START_TAG and t.name is 'html'
1570                         ins_mode_in_body t
1571                         return
1572                 if t.type is TYPE_END_TAG and t.name is 'noscript'
1573                         open_els.shift()
1574                         ins_mode = ins_mode_in_head
1575                         return
1576                 if is_space_tok(t) or t.type is TYPE_COMMENT or (t.type is TYPE_START_TAG and (t.name is 'basefont' or t.name is 'bgsound' or t.name is 'link' or t.name is 'meta' or t.name is 'noframes' or t.name is 'style'))
1577                         ins_mode_in_head t
1578                         return
1579                 if t.type is TYPE_END_TAG and t.name is 'br'
1580                         ins_mode_in_head_noscript_else t
1581                         return
1582                 if (t.type is TYPE_START_TAG and (t.name is 'head' or t.name is 'noscript')) or t.type is TYPE_END_TAG
1583                         parse_error()
1584                         return
1585                 # Anything else
1586                 ins_mode_in_head_noscript_else t
1587                 return
1588
1589
1590
1591         # 8.2.5.4.6 http://www.w3.org/TR/html5/syntax.html#the-after-head-insertion-mode
1592         ins_mode_after_head_else = (t) ->
1593                 body_tok = new_open_tag 'body'
1594                 insert_html_element body_tok
1595                 ins_mode = ins_mode_in_body
1596                 process_token t
1597                 return
1598         ins_mode_after_head = (t) ->
1599                 if is_space_tok t
1600                         insert_character t
1601                         return
1602                 if t.type is TYPE_COMMENT
1603                         insert_comment t
1604                         return
1605                 if t.type is TYPE_DOCTYPE
1606                         parse_error()
1607                         return
1608                 if t.type is TYPE_START_TAG and t.name is 'html'
1609                         ins_mode_in_body t
1610                         return
1611                 if t.type is TYPE_START_TAG and t.name is 'body'
1612                         insert_html_element t
1613                         flag_frameset_ok = false
1614                         ins_mode = ins_mode_in_body
1615                         return
1616                 if t.type is TYPE_START_TAG and t.name is 'frameset'
1617                         insert_html_element t
1618                         ins_mode = ins_mode_in_frameset
1619                         return
1620                 if t.type is TYPE_START_TAG and (t.name is 'base' or t.name is 'basefont' or t.name is 'bgsound' or t.name is 'link' or t.name is 'meta' or t.name is 'noframes' or t.name is 'script' or t.name is 'style' or t.name is 'template' or t.name is 'title')
1621                         parse_error()
1622                         open_els.unshift head_element_pointer
1623                         ins_mode_in_head t
1624                         for el, i of open_els
1625                                 if el is head_element_pointer
1626                                         open_els.splice i, 1
1627                                         return
1628                         console.log "warning: 23904 couldn't find head element in open_els"
1629                         return
1630                 if t.type is TYPE_END_TAG and t.name is 'template'
1631                         ins_mode_in_head t
1632                         return
1633                 if t.type is TYPE_END_TAG and (t.name is 'body' or t.name is 'html' or t.name is 'br')
1634                         ins_mode_after_head_else t
1635                         return
1636                 if (t.type is TYPE_START_TAG and t.name is 'head') or t.type is TYPE_END_TAG
1637                         parse_error()
1638                         return
1639                 # Anything else
1640                 ins_mode_after_head_else t
1641
1642         # 8.2.5.4.7 http://www.w3.org/TR/html5/syntax.html#parsing-main-inbody
1643         in_body_any_other_end_tag = (name) -> # factored out because adoption agency calls it
1644                 for el, i in open_els
1645                         if el.name is name and el.namespace is NS_HTML
1646                                 generate_implied_end_tags name # arg is exception
1647                                 parse_error() unless i is 0
1648                                 while i >= 0
1649                                         open_els.shift()
1650                                         i -= 1
1651                                 return
1652                         if special_elements[el.name] is el.namespace
1653                                 parse_error()
1654                                 return
1655                 return
1656         ins_mode_in_body = (t) ->
1657                 if t.type is TYPE_TEXT and t.text is "\u0000"
1658                         parse_error()
1659                         return
1660                 if is_space_tok t
1661                         reconstruct_afe()
1662                         insert_character t
1663                         return
1664                 if t.type is TYPE_TEXT
1665                         reconstruct_afe()
1666                         insert_character t
1667                         flag_frameset_ok = false
1668                         return
1669                 if t.type is TYPE_COMMENT
1670                         insert_comment t
1671                         return
1672                 if t.type is TYPE_DOCTYPE
1673                         parse_error()
1674                         return
1675                 if t.type is TYPE_START_TAG and t.name is 'html'
1676                         parse_error()
1677                         return if template_tag_is_open()
1678                         root_attrs = open_els[open_els.length - 1].attrs
1679                         for a of t.attrs_a
1680                                 root_attrs[a[0]] = a[1] unless root_attrs[a[0]]?
1681                         return
1682
1683                 if (t.type is TYPE_START_TAG and (t.name is 'base' or t.name is 'basefont' or t.name is 'bgsound' or t.name is 'link' or t.name is 'meta' or t.name is 'noframes' or t.name is 'script' or t.name is 'style' or t.name is 'template' or t.name is 'title')) or (t.type is TYPE_END_TAG and t.name is 'template')
1684                         ins_mode_in_head t
1685                         return
1686                 if t.type is TYPE_START_TAG and t.name is 'body'
1687                         parse_error()
1688                         return if open_els.length < 2
1689                         second = open_els[open_els.length - 2]
1690                         return unless second.namespace is NS_HTML
1691                         return unless second.name is 'body'
1692                         return if template_tag_is_open()
1693                         flag_frameset_ok = false
1694                         for a of t.attrs_a
1695                                 second.attrs[a[0]] = a[1] unless second.attrs[a[0]]?
1696                         return
1697                 if t.type is TYPE_START_TAG and t.name is 'frameset'
1698                         parse_error()
1699                         return if open_els.length < 2
1700                         second_i = open_els.length - 2
1701                         second = open_els[second_i]
1702                         return unless second.namespace is NS_HTML
1703                         return unless second.name is 'body'
1704                         if flag_frameset_ok is false
1705                                 return
1706                         if second.parent?
1707                                 for el, i in second.parent.children
1708                                         if el is second
1709                                                 second.parent.children.splice i, 1
1710                                                 break
1711                         open_els.splice second_i, 1
1712                         # pop everything except the "root html element"
1713                         while open_els.length > 1
1714                                 open_els.shift()
1715                         insert_html_element t
1716                         ins_mode = ins_mode_in_frameset
1717                         return
1718                 if t.type is TYPE_EOF
1719                         ok_tags = {
1720                                 dd:NS_HTML, dt:NS_HTML, li:NS_HTML, p:NS_HTML, tbody:NS_HTML,
1721                                 td:NS_HTML, tfoot:NS_HTML, th:NS_HTML, thead:NS_HTML,
1722                                 tr:NS_HTML, body:NS_HTML, html:NS_HTML,
1723                         }
1724                         for el in open_els
1725                                 unless ok_tags[t.name] is el.namespace
1726                                         parse_error()
1727                                         break
1728                         if template_ins_modes.length > 0
1729                                 ins_mode_in_template t
1730                         else
1731                                 stop_parsing()
1732                         return
1733                 if t.type is TYPE_END_TAG and t.name is 'body'
1734                         unless is_in_scope 'body', NS_HTML
1735                                 parse_error()
1736                                 return
1737                         ok_tags = {
1738                                 dd:NS_HTML, dt:NS_HTML, li:NS_HTML, optgroup:NS_HTML,
1739                                 option:NS_HTML, p:NS_HTML, rb:NS_HTML, rp:NS_HTML, rt:NS_HTML,
1740                                 rtc:NS_HTML, tbody:NS_HTML, td:NS_HTML, tfoot:NS_HTML,
1741                                 th:NS_HTML, thead:NS_HTML, tr:NS_HTML, body:NS_HTML,
1742                                 html:NS_HTML
1743                         }
1744                         for el in open_els
1745                                 unless ok_tags[t.name] is el.namespace
1746                                         parse_error()
1747                                         break
1748                         ins_mode = ins_mode_after_body
1749                         return
1750                 if t.type is TYPE_END_TAG and t.name is 'html'
1751                         unless is_in_scope 'body', NS_HTML
1752                                 parse_error()
1753                                 return
1754                         ok_tags = {
1755                                 dd:NS_HTML, dt:NS_HTML, li:NS_HTML, optgroup:NS_HTML,
1756                                 option:NS_HTML, p:NS_HTML, rb:NS_HTML, rp:NS_HTML, rt:NS_HTML,
1757                                 rtc:NS_HTML, tbody:NS_HTML, td:NS_HTML, tfoot:NS_HTML,
1758                                 th:NS_HTML, thead:NS_HTML, tr:NS_HTML, body:NS_HTML,
1759                                 html:NS_HTML
1760                         }
1761                         for el in open_els
1762                                 unless ok_tags[t.name] is el.namespace
1763                                         parse_error()
1764                                         break
1765                         ins_mode = ins_mode_after_body
1766                         process_token t
1767                         return
1768                 if t.type is TYPE_START_TAG and (t.name is 'address' or t.name is 'article' or t.name is 'aside' or t.name is 'blockquote' or t.name is 'center' or t.name is 'details' or t.name is 'dialog' or t.name is 'dir' or t.name is 'div' or t.name is 'dl' or t.name is 'fieldset' or t.name is 'figcaption' or t.name is 'figure' or t.name is 'footer' or t.name is 'header' or t.name is 'hgroup' or t.name is 'main' or t.name is 'nav' or t.name is 'ol' or t.name is 'p' or t.name is 'section' or t.name is 'summary' or t.name is 'ul')
1769                         close_p_if_in_button_scope()
1770                         insert_html_element t
1771                         return
1772                 if t.type is TYPE_START_TAG and h_tags[t.name]?
1773                         close_p_if_in_button_scope()
1774                         if h_tags[open_els[0].name] is open_els[0].namespace
1775                                 parse_error()
1776                                 open_els.shift()
1777                         insert_html_element t
1778                         return
1779                 if t.type is TYPE_START_TAG and (t.name is 'pre' or t.name is 'listing')
1780                         close_p_if_in_button_scope()
1781                         insert_html_element t
1782                         # spec: If the next token is a "LF" (U+000A) character token, then
1783                         # ignore that token and move on to the next one. (Newlines at the
1784                         # start of pre blocks are ignored as an authoring convenience.)
1785                         if txt.charAt(cur) is "\u000a" # FIXME check for crlf?
1786                                 cur += 1
1787                         flag_frameset_ok = false
1788                         return
1789                 if t.type is TYPE_START_TAG and t.name is 'form'
1790                         unless form_element_pointer is null or template_tag_is_open()
1791                                 parse_error()
1792                                 return
1793                         close_p_if_in_button_scope()
1794                         el = insert_html_element t
1795                         unless template_tag_is_open()
1796                                 form_element_pointer = el
1797                         return
1798                 if t.type is TYPE_START_TAG and t.name is 'li'
1799                         flag_frameset_ok = false
1800                         for node in open_els
1801                                 if node.name is 'li' and node.namespace is NS_HTML
1802                                         generate_implied_end_tags 'li' # arg is exception
1803                                         if open_els[0].name isnt 'li' or open_els[0].namespace isnt NS_HTML
1804                                                 parse_error()
1805                                         loop
1806                                                 el = open_els.shift()
1807                                                 if el.name is 'li' and el.namespace is NS_HTML
1808                                                         break
1809                                         break
1810                                 if el_is_special_not_adp node
1811                                                 break
1812                         close_p_if_in_button_scope()
1813                         insert_html_element t
1814                         return
1815                 if t.type is TYPE_START_TAG and (t.name is 'dd' or t.name is 'dt')
1816                         flag_frameset_ok = false
1817                         for node in open_els
1818                                 if node.name is 'dd' and node.namespace is NS_HTML
1819                                         generate_implied_end_tags 'dd' # arg is exception
1820                                         if open_els[0].name isnt 'dd' or open_els[0].namespace isnt NS_HTML
1821                                                 parse_error()
1822                                         loop
1823                                                 el = open_els.shift()
1824                                                 if el.name is 'dd' and el.namespace is NS_HTML
1825                                                         break
1826                                         break
1827                                 if node.name is 'dt' and node.namespace is NS_HTML
1828                                         generate_implied_end_tags 'dt' # arg is exception
1829                                         if open_els[0].name isnt 'dt' or open_els[0].namespace isnt NS_HTML
1830                                                 parse_error()
1831                                         loop
1832                                                 el = open_els.shift()
1833                                                 if el.name is 'dt' and el.namespace is NS_HTML
1834                                                         break
1835                                         break
1836                                 if el_is_special_not_adp node
1837                                         break
1838                         close_p_if_in_button_scope()
1839                         insert_html_element t
1840                         return
1841                 if t.type is TYPE_START_TAG and t.name is 'plaintext'
1842                         close_p_if_in_button_scope()
1843                         insert_html_element t
1844                         tok_state = tok_state_plaintext
1845                         return
1846                 if t.type is TYPE_START_TAG and t.name is 'button'
1847                         if is_in_scope 'button', NS_HTML
1848                                 parse_error()
1849                                 generate_implied_end_tags()
1850                                 loop
1851                                         el = open_els.shift()
1852                                         if el.name is 'button' and el.namespace is NS_HTML
1853                                                 break
1854                         reconstruct_afe()
1855                         insert_html_element t
1856                         flag_frameset_ok = false
1857                         return
1858                 if t.type is TYPE_END_TAG and (t.name is 'address' or t.name is 'article' or t.name is 'aside' or t.name is 'blockquote' or t.name is 'button' or t.name is 'center' or t.name is 'details' or t.name is 'dialog' or t.name is 'dir' or t.name is 'div' or t.name is 'dl' or t.name is 'fieldset' or t.name is 'figcaption' or t.name is 'figure' or t.name is 'footer' or t.name is 'header' or t.name is 'hgroup' or t.name is 'listing' or t.name is 'main' or t.name is 'nav' or t.name is 'ol' or t.name is 'pre' or t.name is 'section' or t.name is 'summary' or t.name is 'ul')
1859                         unless is_in_scope t.name, NS_HTML
1860                                 parse_error()
1861                                 return
1862                         generate_implied_end_tags()
1863                         unless open_els[0].name is t.name and open_els[0].namespace is NS_HTML
1864                                 parse_error()
1865                         loop
1866                                 el = open_els.shift()
1867                                 if el.name is t.name and el.namespace is NS_HTML
1868                                         return
1869                         return
1870                 if t.type is TYPE_END_TAG and t.name is 'form'
1871                         unless template_tag_is_open()
1872                                 node = form_element_pointer
1873                                 form_element_pointer = null
1874                                 if node is null or not el_is_in_scope node
1875                                         parse_error()
1876                                         return
1877                                 generate_implied_end_tags()
1878                                 if open_els[0] isnt node
1879                                         parse_error()
1880                                 for el, i in open_els
1881                                         if el is node
1882                                                 open_els.splice i, 1
1883                                                 break
1884                         else
1885                                 unless is_in_scope 'form', NS_HTML
1886                                         parse_error()
1887                                         return
1888                                 generate_implied_end_tags()
1889                                 if open_els[0].name isnt 'form' or open_els[0].namespace isnt NS_HTML
1890                                         parse_error()
1891                                 loop
1892                                         el = open_els.shift()
1893                                         if el.name is 'form' and el.namespace is NS_HTML
1894                                                 break
1895                         return
1896                 if t.type is TYPE_END_TAG and t.name is 'p'
1897                         unless is_in_button_scope 'p', NS_HTML
1898                                 parse_error()
1899                                 insert_html_element new_open_tag 'p'
1900                         close_p_element()
1901                         return
1902                 if t.type is TYPE_END_TAG and t.name is 'li'
1903                         unless is_in_li_scope 'li', NS_HTML
1904                                 parse_error()
1905                                 return
1906                         generate_implied_end_tags 'li' # arg is exception
1907                         if open_els[0].name isnt 'li' or open_els[0].namespace isnt NS_HTML
1908                                 parse_error()
1909                         loop
1910                                 el = open_els.shift()
1911                                 if el.name is 'li' and el.namespace is NS_HTML
1912                                         break
1913                         return
1914                 if t.type is TYPE_END_TAG and (t.name is 'dd' or t.name is 'dt')
1915                         unless is_in_scope t.name, NS_HTML
1916                                 parse_error()
1917                                 return
1918                         generate_implied_end_tags t.name # arg is exception
1919                         if open_els[0].name isnt t.name or open_els[0].namespace isnt NS_HTML
1920                                 parse_error()
1921                         loop
1922                                 el = open_els.shift()
1923                                 if el.name is t.name and el.namespace is NS_HTML
1924                                         break
1925                         return
1926                 if t.type is TYPE_END_TAG and h_tags[t.name]?
1927                         h_in_scope = false
1928                         for el in open_els
1929                                 if h_tags[el.name] is el.namespace
1930                                         h_in_scope = true
1931                                         break
1932                                 if standard_scopers[el.name] is el.namespace
1933                                         break
1934                         unless h_in_scope
1935                                 parse_error()
1936                                 return
1937                         generate_implied_end_tags()
1938                         if open_els[0].name isnt t.name or open_els[0].namespace isnt NS_HTML
1939                                 parse_error()
1940                         loop
1941                                 el = open_els.shift()
1942                                 if h_tags[el.name] is el.namespace
1943                                         break
1944                         return
1945                 # deep breath!
1946                 if t.type is TYPE_START_TAG and t.name is 'a'
1947                         # If the list of active formatting elements contains an a element
1948                         # between the end of the list and the last marker on the list (or
1949                         # the start of the list if there is no marker on the list), then
1950                         # this is a parse error; run the adoption agency algorithm for the
1951                         # tag name "a", then remove that element from the list of active
1952                         # formatting elements and the stack of open elements if the
1953                         # adoption agency algorithm didn't already remove it (it might not
1954                         # have if the element is not in table scope).
1955                         found = false
1956                         for el in afe
1957                                 if el.type is TYPE_AFE_MARKER
1958                                         break
1959                                 if el.name is 'a' and el.namespace is NS_HTML
1960                                         found = el
1961                         if found?
1962                                 parse_error()
1963                                 adoption_agency 'a'
1964                                 for el, i in afe
1965                                         if el is found
1966                                                 afe.splice i, 1
1967                                 for el, i in open_els
1968                                         if el is found
1969                                                 open_els.splice i, 1
1970                         reconstruct_afe()
1971                         el = insert_html_element t
1972                         afe_push el
1973                         return
1974                 if t.type is TYPE_START_TAG and (t.name is 'b' or t.name is 'big' or t.name is 'code' or t.name is 'em' or t.name is 'font' or t.name is 'i' or t.name is 's' or t.name is 'small' or t.name is 'strike' or t.name is 'strong' or t.name is 'tt' or t.name is 'u')
1975                         reconstruct_afe()
1976                         el = insert_html_element t
1977                         afe_push el
1978                         return
1979                 if t.type is TYPE_START_TAG and t.name is 'nobr'
1980                         reconstruct_afe()
1981                         el = insert_html_element t
1982                         afe_push el
1983                         return
1984                 if t.type is TYPE_END_TAG and (t.name is 'a' or t.name is 'b' or t.name is 'big' or t.name is 'code' or t.name is 'em' or t.name is 'font' or t.name is 'i' or t.name is 'nobr' or t.name is 's' or t.name is 'small' or t.name is 'strike' or t.name is 'strong' or t.name is 'tt' or t.name is 'u')
1985                         adoption_agency t.name
1986                         return
1987                 if t.type is TYPE_START_TAG and (t.name is 'applet' or t.name is 'marquee' or t.name is 'object')
1988                         reconstruct_afe()
1989                         insert_html_element t
1990                         afe_push_marker()
1991                         flag_frameset_ok = false
1992                         return
1993                 if t.type is TYPE_END_TAG and (t.name is 'applet' or t.name is 'marquee' or t.name is 'object')
1994                         unless is_in_scope t.name, NS_HTML
1995                                 parse_error()
1996                                 return
1997                         generate_implied_end_tags()
1998                         if open_els[0].name isnt t.name or open_els[0].namespace isnt NS_HTML
1999                                 parse_error()
2000                         loop
2001                                 el = open_els.shift()
2002                                 if el.name is t.name and el.namespace is NS_HTML
2003                                         break
2004                         clear_afe_to_marker()
2005                         return
2006                 if t.type is TYPE_START_TAG and t.name is 'table'
2007                         close_p_if_in_button_scope() # fixfull quirksmode thing
2008                         insert_html_element t
2009                         flag_frameset_ok = false
2010                         ins_mode = ins_mode_in_table
2011                         return
2012                 if t.type is TYPE_END_TAG and t.name is 'br'
2013                         parse_error()
2014                         t.type is TYPE_START_TAG
2015                         # fall through
2016                 if t.type is TYPE_START_TAG and (t.name is 'area' or t.name is 'br' or t.name is 'embed' or t.name is 'img' or t.name is 'keygen' or t.name is 'wbr')
2017                         reconstruct_afe()
2018                         insert_html_element t
2019                         open_els.shift()
2020                         t.acknowledge_self_closing()
2021                         flag_frameset_ok = false
2022                         return
2023                 if t.type is TYPE_START_TAG and t.name is 'input'
2024                         reconstruct_afe()
2025                         insert_html_element t
2026                         open_els.shift()
2027                         t.acknowledge_self_closing()
2028                         unless is_input_hidden_tok t
2029                                 flag_frameset_ok = false
2030                         return
2031                 if t.type is TYPE_START_TAG and (t.name is 'param' or t.name is 'source' or t.name is 'track')
2032                         insert_html_element t
2033                         open_els.shift()
2034                         t.acknowledge_self_closing()
2035                         return
2036                 if t.type is TYPE_START_TAG and t.name is 'hr'
2037                         close_p_if_in_button_scope()
2038                         insert_html_element t
2039                         open_els.shift()
2040                         t.acknowledge_self_closing()
2041                         flag_frameset_ok = false
2042                         return
2043                 if t.type is TYPE_START_TAG and t.name is 'image'
2044                         parse_error()
2045                         t.name = 'img'
2046                         process_token t
2047                         return
2048                 if t.type is TYPE_START_TAG and t.name is 'isindex'
2049                         parse_error()
2050                         if template_tag_is_open() is false and form_element_pointer isnt null
2051                                 return
2052                         t.acknowledge_self_closing()
2053                         flag_frameset_ok = false
2054                         close_p_if_in_button_scope()
2055                         el = insert_html_element new_open_tag 'form'
2056                         unless template_tag_is_open()
2057                                 form_element_pointer = el
2058                         for a in t.attrs_a
2059                                 if a[0] is 'action'
2060                                         el.attrs['action'] = a[1]
2061                                         break
2062                         insert_html_element new_open_tag 'hr'
2063                         open_els.shift()
2064                         reconstruct_afe()
2065                         insert_html_element new_open_tag 'label'
2066                         # note: this is a little out-of-spec-order so we only have to scan t.attrs_a once
2067                         input_el = new_open_tag 'input'
2068                         prompt = null
2069                         for a in t.attrs_a
2070                                 if a[0] is 'prompt'
2071                                         prompt = a[1]
2072                                 if a[0] isnt 'name' and a[0] isnt 'action' and a[0] isnt 'prompt'
2073                                         input_el.attrs_a.push [a[0], a[1]]
2074                         input_el.attrs_a.push ['name', 'isindex']
2075                         # fixfull this next bit is in english... internationalize?
2076                         prompt ?= "This is a searchable index. Enter search keywords: "
2077                         insert_character new_character_token prompt # fixfull split
2078                         # TODO submit typo "balue" in spec
2079                         insert_html_element input_el
2080                         open_els.shift()
2081                         # insert_character '' # you can put chars here if promt attr missing
2082                         open_els.shift()
2083                         insert_html_element new_open_tag 'hr'
2084                         open_els.shift()
2085                         open_els.shift()
2086                         unless template_tag_is_open()
2087                                 form_element_pointer = null
2088                         return
2089                 if t.type is TYPE_START_TAG and t.name is 'textarea'
2090                         insert_html_element t
2091                         if txt.charAt(cur) is "\u000a" # FIXME check for crlf?
2092                                 cur += 1
2093                         tok_state = tok_state_rcdata
2094                         original_ins_mode = ins_mode
2095                         flag_frameset_ok = false
2096                         ins_mode = ins_mode_text
2097                         return
2098                 if t.type is TYPE_START_TAG and t.name is 'xmp'
2099                         close_p_if_in_button_scope()
2100                         reconstruct_afe()
2101                         flag_frameset_ok = false
2102                         parse_generic_raw_text t
2103                         return
2104                 if t.type is TYPE_START_TAG and t.name is 'iframe'
2105                         flag_frameset_ok = false
2106                         parse_generic_raw_text t
2107                         return
2108                 if t.type is TYPE_START_TAG and (t.name is 'noembed' or (t.name is 'noscript' and flag_scripting))
2109                         parse_generic_raw_text t
2110                         return
2111                 if t.type is TYPE_START_TAG and t.name is 'select'
2112                         reconstruct_afe()
2113                         insert_html_element t
2114                         flag_frameset_ok = false
2115                         if ins_mode is ins_mode_in_table or ins_mode is ins_mode_in_caption or ins_mode is ins_mode_in_table_body or ins_mode is ins_mode_in_row or ins_mode is ins_mode_in_cell
2116                                 ins_mode = ins_mode_in_select_in_table
2117                         else
2118                                 ins_mode = ins_mode_in_select
2119                         return
2120                 if t.type is TYPE_START_TAG and (t.name is 'optgroup' or t.name is 'option')
2121                         if open_els[0].name is 'option' and open_els[0].namespace is NS_HTML
2122                                 open_els.shift()
2123                         reconstruct_afe()
2124                         insert_html_element t
2125                         return
2126 # this comment block implements the W3C spec
2127 #               if t.type is TYPE_START_TAG and (t.name is 'rb' or t.name is 'rp' or t.name is 'rtc')
2128 #                       if is_in_scope 'ruby', NS_HTML
2129 #                               generate_implied_end_tags()
2130 #                               unless open_els[0].name is 'ruby' and open_els[0].namespace is NS_HTML
2131 #                                       parse_error()
2132 #                       insert_html_element t
2133 #                       return
2134 #               if t.type is TYPE_START_TAG and t.name is 'rt'
2135 #                       if is_in_scope 'ruby', NS_HTML
2136 #                               generate_implied_end_tags 'rtc' # arg is exception
2137 #                               unless (open_els[0].name is 'ruby' or open_els[0].name is 'rtc') and open_els[0].namespace is NS_HTML
2138 #                                       parse_error()
2139 #                       insert_html_element t
2140 #                       return
2141 # below implements the WATWG spec https://html.spec.whatwg.org/multipage/syntax.html#parsing-main-inbody
2142                 if t.type is TYPE_START_TAG and (t.name is 'rb' or t.name is 'rtc')
2143                         if is_in_scope 'ruby', NS_HTML
2144                                 generate_implied_end_tags()
2145                                 unless open_els[0].name is 'ruby' and open_els[0].namespace is NS_HTML
2146                                         parse_error()
2147                         insert_html_element t
2148                         return
2149                 if t.type is TYPE_START_TAG and (t.name is 'rp' or t.name is 'rt')
2150                         if is_in_scope 'ruby', NS_HTML
2151                                 generate_implied_end_tags 'rtc'
2152                                 unless (open_els[0].name is 'ruby' or open_els[0].name is 'rtc') and open_els[0].namespace is NS_HTML
2153                                         parse_error()
2154                         insert_html_element t
2155                         return
2156 # end WATWG chunk
2157                 if t.type is TYPE_START_TAG and t.name is 'math'
2158                         reconstruct_afe()
2159                         adjust_mathml_attributes t
2160                         adjust_foreign_attributes t
2161                         insert_foreign_element t, NS_MATHML
2162                         if t.flag 'self-closing'
2163                                 open_els.shift()
2164                                 t.acknowledge_self_closing()
2165                         return
2166                 if t.type is TYPE_START_TAG and t.name is 'svg'
2167                         reconstruct_afe()
2168                         adjust_svg_attributes t
2169                         adjust_foreign_attributes t
2170                         insert_foreign_element t, NS_SVG
2171                         if t.flag 'self-closing'
2172                                 open_els.shift()
2173                                 t.acknowledge_self_closing()
2174                         return
2175                 if t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'frame' or t.name is 'head' or t.name is 'tbody' or t.name is 'td' or t.name is 'tfoot' or t.name is 'th' or t.name is 'thead' or t.name is 'tr')
2176                         parse_error()
2177                         return
2178                 if t.type is TYPE_START_TAG # any other start tag
2179                         reconstruct_afe()
2180                         insert_html_element t
2181                         return
2182                 if t.type is TYPE_END_TAG # any other end tag
2183                         in_body_any_other_end_tag t.name
2184                         return
2185                 return
2186
2187         # 8.2.5.4.8 http://www.w3.org/TR/html5/syntax.html#parsing-main-incdata
2188         ins_mode_text = (t) ->
2189                 if t.type is TYPE_TEXT
2190                         insert_character t
2191                         return
2192                 if t.type is TYPE_EOF
2193                         parse_error()
2194                         if open_els[0].name is 'script' and open_els[0].namespace is NS_HTML
2195                                 open_els[0].flag 'already started', true
2196                         open_els.shift()
2197                         ins_mode = original_ins_mode
2198                         process_token t
2199                         return
2200                 if t.type is TYPE_END_TAG and t.name is 'script'
2201                         open_els.shift()
2202                         ins_mode = original_ins_mode
2203                         # fixfull the spec seems to assume that I'm going to run the script
2204                         # http://www.w3.org/TR/html5/syntax.html#scriptEndTag
2205                         return
2206                 if t.type is TYPE_END_TAG
2207                         open_els.shift()
2208                         ins_mode = original_ins_mode
2209                         return
2210                 console.log 'warning: end of ins_mode_text reached'
2211
2212         # the functions below implement the tokenizer stats described here:
2213         # http://www.w3.org/TR/html5/syntax.html#tokenization
2214
2215         # 8.2.5.4.9 http://www.w3.org/TR/html5/syntax.html#parsing-main-intable
2216         ins_mode_in_table_else = (t) ->
2217                 parse_error()
2218                 flag_foster_parenting = true
2219                 ins_mode_in_body t
2220                 flag_foster_parenting = false
2221                 return
2222         ins_mode_in_table = (t) ->
2223                 switch t.type
2224                         when TYPE_TEXT
2225                                 if (open_els[0].name is 'table' or open_els[0].name is 'tbody' or open_els[0].name is 'tfoot' or open_els[0].name is 'thead' or open_els[0].name is 'tr') and open_els[0].namespace is NS_HTML
2226                                         pending_table_character_tokens = []
2227                                         original_ins_mode = ins_mode
2228                                         ins_mode = ins_mode_in_table_text
2229                                         process_token t
2230                                 else
2231                                         ins_mode_in_table_else t
2232                         when TYPE_COMMENT
2233                                 insert_comment t
2234                         when TYPE_DOCTYPE
2235                                 parse_error()
2236                         when TYPE_START_TAG
2237                                 switch t.name
2238                                         when 'caption'
2239                                                 clear_stack_to_table_context()
2240                                                 afe_push_marker()
2241                                                 insert_html_element t
2242                                                 ins_mode = ins_mode_in_caption
2243                                         when 'colgroup'
2244                                                 clear_stack_to_table_context()
2245                                                 insert_html_element t
2246                                                 ins_mode = ins_mode_in_column_group
2247                                         when 'col'
2248                                                 clear_stack_to_table_context()
2249                                                 insert_html_element new_open_tag 'colgroup'
2250                                                 ins_mode = ins_mode_in_column_group
2251                                                 process_token t
2252                                         when 'tbody', 'tfoot', 'thead'
2253                                                 clear_stack_to_table_context()
2254                                                 insert_html_element t
2255                                                 ins_mode = ins_mode_in_table_body
2256                                         when 'td', 'th', 'tr'
2257                                                 clear_stack_to_table_context()
2258                                                 insert_html_element new_open_tag 'tbody'
2259                                                 ins_mode = ins_mode_in_table_body
2260                                                 process_token t
2261                                         when 'table'
2262                                                 parse_error()
2263                                                 if is_in_table_scope 'table', NS_HTML
2264                                                         loop
2265                                                                 el = open_els.shift()
2266                                                                 if el.name is 'table' and el.namespace is NS_HTML
2267                                                                         break
2268                                                         reset_ins_mode()
2269                                                         process_token t
2270                                         when 'style', 'script', 'template'
2271                                                 ins_mode_in_head t
2272                                         when 'input'
2273                                                 unless is_input_hidden_tok t
2274                                                         ins_mode_in_table_else t
2275                                                 else
2276                                                         parse_error()
2277                                                         el = insert_html_element t
2278                                                         open_els.shift()
2279                                                         t.acknowledge_self_closing()
2280                                         when 'form'
2281                                                 parse_error()
2282                                                 if form_element_pointer?
2283                                                         return
2284                                                 if template_tag_is_open()
2285                                                         return
2286                                                 form_element_pointer = insert_html_element t
2287                                                 open_els.shift()
2288                                         else
2289                                                 ins_mode_in_table_else t
2290                         when TYPE_END_TAG
2291                                 switch t.name
2292                                         when 'table'
2293                                                 if is_in_table_scope 'table', NS_HTML
2294                                                         loop
2295                                                                 el = open_els.shift()
2296                                                                 if el.name is 'table' and el.namespace is NS_HTML
2297                                                                         break
2298                                                         reset_ins_mode()
2299                                                 else
2300                                                         parse_error()
2301                                         when 'body', 'caption', 'col', 'colgroup', 'html', 'tbody', 'td', 'tfoot', 'th', 'thead', 'tr'
2302                                                 parse_error()
2303                                         when 'template'
2304                                                 ins_mode_in_head t
2305                                         else
2306                                                 ins_mode_in_table_else t
2307                         when TYPE_EOF
2308                                 ins_mode_in_body t
2309                         else
2310                                 ins_mode_in_table_else t
2311
2312
2313         # 8.2.5.4.10 http://www.w3.org/TR/html5/syntax.html#parsing-main-intabletext
2314         ins_mode_in_table_text = (t) ->
2315                 if t.type is TYPE_TEXT and t.text is "\u0000"
2316                         # from javascript?
2317                         parse_error()
2318                         return
2319                 if t.type is TYPE_TEXT
2320                         pending_table_character_tokens.push t
2321                         return
2322                 # Anything else
2323                 all_space = true
2324                 for old in pending_table_character_tokens
2325                         unless is_space_tok old
2326                                 all_space = false
2327                                 break
2328                 if all_space
2329                         for old in pending_table_character_tokens
2330                                 insert_character old
2331                 else
2332                         for old in pending_table_character_tokens
2333                                 ins_mode_in_table_else old
2334                 pending_table_character_tokens = []
2335                 ins_mode = original_ins_mode
2336                 process_token t
2337
2338         # 8.2.5.4.11 http://www.w3.org/TR/html5/syntax.html#parsing-main-incaption
2339         ins_mode_in_caption = (t) ->
2340                 if t.type is TYPE_END_TAG and t.name is 'caption'
2341                         if is_in_table_scope 'caption', NS_HTML
2342                                 generate_implied_end_tags()
2343                                 if open_els[0].name isnt 'caption'
2344                                         parse_error()
2345                                 loop
2346                                         el = open_els.shift()
2347                                         if el.name is 'caption' and el.namespace is NS_HTML
2348                                                 break
2349                                 clear_afe_to_marker()
2350                                 ins_mode = ins_mode_in_table
2351                         else
2352                                 parse_error()
2353                                 # fragment case
2354                         return
2355                 if (t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'tbody' or t.name is 'td' or t.name is 'tfoot' or t.name is 'th' or t.name is 'thead' or t.name is 'tr')) or t.type is TYPE_END_TAG and t.name is 'table'
2356                         parse_error()
2357                         if is_in_table_scope 'caption', NS_HTML
2358                                 loop
2359                                         el = open_els.shift()
2360                                         if el.name is 'caption' and el.namespace is NS_HTML
2361                                                 break
2362                                 clear_afe_to_marker()
2363                                 ins_mode = ins_mode_in_table
2364                                 process_token t
2365                         # else fragment case
2366                         return
2367                 if t.type is TYPE_END_TAG and (t.name is 'body' or t.name is 'col' or t.name is 'colgroup' or t.name is 'html' or t.name is 'tbody' or t.name is 'td' or t.name is 'tfoot' or t.name is 'th' or t.name is 'thead' or t.name is 'tr')
2368                         parse_error()
2369                         return
2370                 # Anything else
2371                 ins_mode_in_body t
2372
2373         # 8.2.5.4.12 http://www.w3.org/TR/html5/syntax.html#parsing-main-incolgroup
2374         ins_mode_in_column_group = (t) ->
2375                 if is_space_tok t
2376                         insert_character t
2377                         return
2378                 if t.type is TYPE_COMMENT
2379                         insert_comment t
2380                         return
2381                 if t.type is TYPE_DOCTYPE
2382                         parse_error()
2383                         return
2384                 if t.type is TYPE_START_TAG and t.name is 'html'
2385                         ins_mode_in_body t
2386                         return
2387                 if t.type is TYPE_START_TAG and t.name is 'col'
2388                         el = insert_html_element t
2389                         open_els.shift()
2390                         t.acknowledge_self_closing()
2391                         return
2392                 if t.type is TYPE_END_TAG and t.name is 'colgroup'
2393                         if open_els[0].name is 'colgroup' and open_els.namespace is NS_HTML
2394                                 open_els.shift()
2395                                 ins_mode = ins_mode_in_table
2396                         else
2397                                 parse_error()
2398                         return
2399                 if t.type is TYPE_END_TAG and t.name is 'col'
2400                         parse_error()
2401                         return
2402                 if (t.type is TYPE_START_TAG or t.type is TYPE_END_TAG) and t.name is 'template'
2403                         ins_mode_in_head t
2404                         return
2405                 if t.type is TYPE_EOF
2406                         ins_mode_in_body t
2407                         return
2408                 # Anything else
2409                 if open_els[0].name isnt 'colgroup'
2410                         parse_error()
2411                         return
2412                 open_els.shift()
2413                 ins_mode = ins_mode_in_table
2414                 process_token t
2415                 return
2416
2417         # 8.2.5.4.13 http://www.w3.org/TR/html5/syntax.html#parsing-main-intbody
2418         ins_mode_in_table_body = (t) ->
2419                 if t.type is TYPE_START_TAG and t.name is 'tr'
2420                         clear_stack_to_table_body_context()
2421                         insert_html_element t
2422                         ins_mode = ins_mode_in_row
2423                         return
2424                 if t.type is TYPE_START_TAG and (t.name is 'th' or t.name is 'td')
2425                         parse_error()
2426                         clear_stack_to_table_body_context()
2427                         insert_html_element new_open_tag 'tr'
2428                         ins_mode = ins_mode_in_row
2429                         process_token t
2430                         return
2431                 if t.type is TYPE_END_TAG and (t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead')
2432                         unless is_in_table_scope t.name, NS_HTML
2433                                 parse_error()
2434                                 return
2435                         clear_stack_to_table_body_context()
2436                         open_els.shift()
2437                         ins_mode = ins_mode_in_table
2438                         return
2439                 if (t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead')) or (t.type is TYPE_END_TAG and t.name is 'table')
2440                         has = false
2441                         for el in open_els
2442                                 if el.namespace is NS_HTML and (el.name is 'tbody' or el.name is 'tfoot' or el.name is 'thead')
2443                                         has = true
2444                                         break
2445                                 if table_scopers[el.name] is el.namespace
2446                                         break
2447                         if !has
2448                                 parse_error()
2449                                 return
2450                         clear_stack_to_table_body_context()
2451                         open_els.shift()
2452                         ins_mode = ins_mode_in_table
2453                         process_token t
2454                         return
2455                 if t.type is TYPE_END_TAG and (t.name is 'body' or t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'html' or t.name is 'td' or t.name is 'th' or t.name is 'tr')
2456                         parse_error()
2457                         return
2458                 # Anything else
2459                 ins_mode_in_table t
2460
2461         # 8.2.5.4.14 http://www.w3.org/TR/html5/syntax.html#parsing-main-intr
2462         ins_mode_in_row = (t) ->
2463                 if t.type is TYPE_START_TAG and (t.name is 'th' or t.name is 'td')
2464                         clear_stack_to_table_row_context()
2465                         insert_html_element t
2466                         ins_mode = ins_mode_in_cell
2467                         afe_push_marker()
2468                         return
2469                 if t.type is TYPE_END_TAG and t.name is 'tr'
2470                         if is_in_table_scope 'tr', NS_HTML
2471                                 clear_stack_to_table_row_context()
2472                                 open_els.shift()
2473                                 ins_mode = ins_mode_in_table_body
2474                         else
2475                                 parse_error()
2476                         return
2477                 if (t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead' or t.name is 'tr')) or t.type is TYPE_END_TAG and t.name is 'table'
2478                         if is_in_table_scope 'tr', NS_HTML
2479                                 clear_stack_to_table_row_context()
2480                                 open_els.shift()
2481                                 ins_mode = ins_mode_in_table_body
2482                                 process_token t
2483                         else
2484                                 parse_error()
2485                         return
2486                 if t.type is TYPE_END_TAG and (t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead')
2487                         if is_in_table_scope t.name, NS_HTML
2488                                 if is_in_table_scope 'tr', NS_HTML
2489                                         clear_stack_to_table_row_context()
2490                                         open_els.shift()
2491                                         ins_mode = ins_mode_in_table_body
2492                                         process_token t
2493                         else
2494                                 parse_error()
2495                         return
2496                 if t.type is TYPE_END_TAG and (t.name is 'body' or t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'html' or t.name is 'td' or t.name is 'th')
2497                         parse_error()
2498                         return
2499                 # Anything else
2500                 ins_mode_in_table t
2501
2502         # http://www.w3.org/TR/html5/syntax.html#close-the-cell
2503         close_the_cell = ->
2504                 generate_implied_end_tags()
2505                 unless (open_els[0].name is 'td' or open_els[0] is 'th') and open_els[0].namespace is NS_HTML
2506                         parse_error()
2507                 loop
2508                         el = open_els.shift()
2509                         if el.namespace is NS_HTML and (el.name is 'td' or el.name is 'th')
2510                                 break
2511                 clear_afe_to_marker()
2512                 ins_mode = ins_mode_in_row
2513
2514         # 8.2.5.4.15 http://www.w3.org/TR/html5/syntax.html#parsing-main-intd
2515         ins_mode_in_cell = (t) ->
2516                 if t.type is TYPE_END_TAG and (t.name is 'td' or t.name is 'th')
2517                         if is_in_table_scope t.name, NS_HTML
2518                                 generate_implied_end_tags()
2519                                 unless (open_els[0].name is t.name) and open_els[0].namespace is NS_HTML
2520                                         parse_error()
2521                                 loop
2522                                         el = open_els.shift()
2523                                         if el.name is t.name and el.namespace is NS_HTML
2524                                                 break
2525                                 clear_afe_to_marker()
2526                                 ins_mode = ins_mode_in_row
2527                         else
2528                                 parse_error()
2529                         return
2530                 if t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'tbody' or t.name is 'td' or t.name is 'tfoot' or t.name is 'th' or t.name is 'thead' or t.name is 'tr')
2531                         has = false
2532                         for el in open_els
2533                                 if el.namespace is NS_HTML and (el.name is 'td' or el.name is 'th')
2534                                         has = true
2535                                         break
2536                                 if table_scopers[el.name] is el.namespace
2537                                         break
2538                         if !has
2539                                 parse_error()
2540                                 return
2541                         close_the_cell()
2542                         process_token t
2543                         return
2544                 if t.type is TYPE_END_TAG and (t.name is 'body' or t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'html')
2545                         parse_error()
2546                         return
2547                 if t.type is TYPE_END_TAG and (t.name is 'table' or t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead' or t.name is 'tr')
2548                         if is_in_table_scope t.name, NS_HTML
2549                                 close_the_cell()
2550                                 process_token t
2551                         else
2552                                 parse_error()
2553                         return
2554                 # Anything Else
2555                 ins_mode_in_body t
2556
2557         # 8.2.5.4.16 http://www.w3.org/TR/html5/syntax.html#parsing-main-inselect
2558         ins_mode_in_select = (t) ->
2559                 if t.type is TYPE_TEXT and t.text is "\u0000"
2560                         parse_error()
2561                         return
2562                 if t.type is TYPE_TEXT
2563                         insert_character t
2564                         return
2565                 if t.type is TYPE_COMMENT
2566                         insert_comment t
2567                         return
2568                 if t.type is TYPE_DOCTYPE
2569                         parse_error()
2570                         return
2571                 if t.type is TYPE_START_TAG and t.name is 'html'
2572                         ins_mode_in_body t
2573                         return
2574                 if t.type is TYPE_START_TAG and t.name is 'option'
2575                         if open_els[0].name is 'option' and open_els[0].namespace is NS_HTML
2576                                 open_els.shift()
2577                         insert_html_element t
2578                         return
2579                 if t.type is TYPE_START_TAG and t.name is 'optgroup'
2580                         if open_els[0].name is 'option' and open_els[0].namespace is NS_HTML
2581                                 open_els.shift()
2582                         if open_els[0].name is 'optgroup' and open_els[0].namespace is NS_HTML
2583                                 open_els.shift()
2584                         insert_html_element t
2585                         return
2586                 if t.type is TYPE_END_TAG and t.name is 'optgroup'
2587                         if open_els[0].name is 'option' and open_els[0].namespace in NS_HTML
2588                                 if open_els[1].name is 'optgroup' and open_els[0].namespace is NS_HTML
2589                                         open_els.shift()
2590                         if open_els[0].name is 'optgroup' and open_els[0].namespace is NS_HTML
2591                                 open_els.shift()
2592                         else
2593                                 parse_error()
2594                         return
2595                 if t.type is TYPE_END_TAG and t.name is 'option'
2596                         if open_els[0].name is 'option' and open_els[0].namespace is NS_HTML
2597                                 open_els.shift()
2598                         else
2599                                 parse_error()
2600                         return
2601                 if t.type is TYPE_END_TAG and t.name is 'select'
2602                         if is_in_select_scope 'select', NS_HTML
2603                                 loop
2604                                         el = open_els.shift()
2605                                         if el.name is 'select' and el.namespace is NS_HTML
2606                                                 break
2607                                 reset_ins_mode()
2608                         else
2609                                 parse_error()
2610                         return
2611                 if t.type is TYPE_START_TAG and t.name is 'select'
2612                         parse_error()
2613                         loop
2614                                 el = open_els.shift()
2615                                 if el.name is 'select' and el.namespace is NS_HTML
2616                                         break
2617                         reset_ins_mode()
2618                         # spec says that this is the same as </select> but it doesn't say
2619                         # to check scope first
2620                         return
2621                 if t.type is TYPE_START_TAG and (t.name is 'input' or t.name is 'keygen' or t.name is 'textarea')
2622                         parse_error()
2623                         if is_in_select_scope 'select', NS_HTML
2624                                 return
2625                         loop
2626                                 el = open_els.shift()
2627                                 if el.name is 'select' and el.namespace is NS_HTML
2628                                         break
2629                         reset_ins_mode()
2630                         process_token t
2631                         return
2632                 if t.type is TYPE_START_TAG and (t.name is 'script' or t.name is 'template')
2633                         ins_mode_in_head t
2634                         return
2635                 if t.type is TYPE_EOF
2636                         ins_mode_in_body t
2637                         return
2638                 # Anything else
2639                 parse_error()
2640                 return
2641
2642         # 8.2.5.4.17 http://www.w3.org/TR/html5/syntax.html#parsing-main-inselectintable
2643         ins_mode_in_select_in_table = (t) ->
2644                 if t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'table' or t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead' or t.name is 'tr' or t.name is 'td' or t.name is 'th')
2645                         parse_error()
2646                         loop
2647                                 el = open_els.shift()
2648                                 if el.name is 'select' and el.namespace is NS_HTML
2649                                         break
2650                         reset_ins_mode()
2651                         process_token t
2652                         return
2653                 if t.type is TYPE_END_TAG and (t.name is 'caption' or t.name is 'table' or t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead' or t.name is 'tr' or t.name is 'td' or t.name is 'th')
2654                         parse_error()
2655                         unless is_in_table_scope t.name, NS_HTML
2656                                 return
2657                         loop
2658                                 el = open_els.shift()
2659                                 if el.name is 'select' and el.namespace is NS_HTML
2660                                         break
2661                         reset_ins_mode()
2662                         process_token t
2663                         return
2664                 # Anything else
2665                 ins_mode_in_select t
2666                 return
2667
2668         # 8.2.5.4.18 http://www.w3.org/TR/html5/syntax.html#parsing-main-intemplate
2669         ins_mode_in_template = (t) ->
2670                 if t.type is TYPE_TEXT or t.type is TYPE_COMMENT or t.type is TYPE_DOCTYPE
2671                         ins_mode_in_body t
2672                         return
2673                 if (t.type is TYPE_START_TAG and (t.name is 'base' or t.name is 'basefont' or t.name is 'bgsound' or t.name is 'link' or t.name is 'meta' or t.name is 'noframes' or t.name is 'script' or t.name is 'style' or t.name is 'template' or t.name is 'title')) or (t.type is TYPE_END_TAG and t.name is 'template')
2674                         ins_mode_in_head t
2675                         return
2676                 if t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'colgroup' or t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead')
2677                         template_ins_modes.shift()
2678                         template_ins_modes.unshift ins_mode_in_table
2679                         ins_mode = ins_mode_in_table
2680                         process_token t
2681                         return
2682                 if t.type is TYPE_START_TAG and t.name is 'col'
2683                         template_ins_modes.shift()
2684                         template_ins_modes.unshift ins_mode_in_column_group
2685                         ins_mode = ins_mode_in_column_group
2686                         process_token t
2687                         return
2688                 if t.type is TYPE_START_TAG and t.name is 'tr'
2689                         template_ins_modes.shift()
2690                         template_ins_modes.unshift ins_mode_in_table_body
2691                         ins_mode = ins_mode_in_table_body
2692                         process_token t
2693                         return
2694                 if t.type is TYPE_START_TAG and (t.name is 'td' or t.name is 'th')
2695                         template_ins_modes.shift()
2696                         template_ins_modes.unshift ins_mode_in_row
2697                         ins_mode = ins_mode_in_row
2698                         process_token t
2699                         return
2700                 if t.type is TYPE_START_TAG
2701                         template_ins_modes.shift()
2702                         template_ins_modes.unshift ins_mode_in_body
2703                         ins_mode = ins_mode_in_body
2704                         process_token t
2705                         return
2706                 if t.type is TYPE_END_TAG
2707                         parse_error()
2708                         return
2709                 if t.type is TYPE_EOF
2710                         unless template_tag_is_open()
2711                                 stop_parsing()
2712                                 return
2713                         parse_error()
2714                         loop
2715                                 el = open_els.shift()
2716                                 if el.name is 'template' and el.namespace is NS_HTML
2717                                         break
2718                         clear_afe_to_marker()
2719                         template_ins_modes.shift()
2720                         reset_ins_mode()
2721                         process_token t
2722
2723         # 8.2.5.4.19 http://www.w3.org/TR/html5/syntax.html#parsing-main-afterbody
2724         ins_mode_after_body = (t) ->
2725                 if is_space_tok t
2726                         ins_mode_in_body t
2727                         return
2728                 if t.type is TYPE_COMMENT
2729                         insert_comment t, [open_els[0], open_els[0].children.length]
2730                         return
2731                 if t.type is TYPE_DOCTYPE
2732                         parse_error()
2733                         return
2734                 if t.type is TYPE_START_TAG and t.name is 'html'
2735                         ins_mode_in_body t
2736                         return
2737                 if t.type is TYPE_END_TAG and t.name is 'html'
2738                         if flag_fragment_parsing
2739                                 parse_error()
2740                                 return
2741                         ins_mode = ins_mode_after_after_body
2742                         return
2743                 if t.type is TYPE_EOF
2744                         stop_parsing()
2745                         return
2746                 # Anything ELse
2747                 parse_error()
2748                 ins_mode = ins_mode_in_body
2749                 process_token t
2750
2751         # 8.2.5.4.20 http://www.w3.org/TR/html5/syntax.html#parsing-main-inframeset
2752         ins_mode_in_frameset = (t) ->
2753                 if is_space_tok t
2754                         insert_character t
2755                         return
2756                 if t.type is TYPE_COMMENT
2757                         insert_comment t
2758                         return
2759                 if t.type is TYPE_DOCTYPE
2760                         parse_error()
2761                         return
2762                 if t.type is TYPE_START_TAG and t.name is 'html'
2763                         ins_mode_in_body t
2764                         return
2765                 if t.type is TYPE_START_TAG and t.name is 'frameset'
2766                         insert_html_element t
2767                         return
2768                 if t.type is TYPE_END_TAG and t.name is 'frameset'
2769                         if open_els.length is 1
2770                                 parse_error()
2771                                 return # fragment case
2772                         open_els.shift()
2773                         if flag_fragment_parsing is false and open_els[0].name isnt 'frameset'
2774                                 ins_mode = ins_mode_after_frameset
2775                         return
2776                 if t.type is TYPE_START_TAG and t.name is 'frame'
2777                         insert_html_element t
2778                         open_els.shift()
2779                         t.acknowledge_self_closing()
2780                         return
2781                 if t.type is TYPE_START_TAG and t.name is 'noframes'
2782                         ins_mode_in_head t
2783                         return
2784                 if t.type is TYPE_EOF
2785                         if open_els.length isnt 1
2786                                 parse_error()
2787                         stop_parsing()
2788                         return
2789                 # Anything else
2790                 parse_error()
2791                 return
2792
2793         # 8.2.5.4.21 http://www.w3.org/TR/html5/syntax.html#parsing-main-afterframeset
2794         ins_mode_after_frameset = (t) ->
2795                 if is_space_tok t
2796                         insert_character t
2797                         return
2798                 if t.type is TYPE_COMMENT
2799                         insert_comment t
2800                         return
2801                 if t.type is TYPE_DOCTYPE
2802                         parse_error()
2803                         return
2804                 if t.type is TYPE_START_TAG and t.name is 'html'
2805                         ins_mode_in_body t
2806                         return
2807                 if t.type is TYPE_END_TAG and t.name is 'html'
2808                         insert_mode = ins_mode_after_after_frameset
2809                         return
2810                 if t.type is TYPE_START_TAG and t.name is 'noframes'
2811                         ins_mode_in_head t
2812                         return
2813                 if t.type is TYPE_EOF
2814                         stop_parsing()
2815                         return
2816                 # Anything else
2817                 parse_error()
2818                 return
2819
2820         # 8.2.5.4.22 http://www.w3.org/TR/html5/syntax.html#the-after-after-body-insertion-mode
2821         ins_mode_after_after_body = (t) ->
2822                 if t.type is TYPE_COMMENT
2823                         insert_comment t, [doc, doc.children.length]
2824                         return
2825                 if t.type is TYPE_DOCTYPE or is_space_tok(t) or (t.type is TYPE_START_TAG and t.name is 'html')
2826                         ins_mode_in_body t
2827                         return
2828                 if t.type is TYPE_EOF
2829                         stop_parsing()
2830                         return
2831                 # Anything else
2832                 parse_error()
2833                 ins_mode = ins_mode_in_body
2834                 process_token t
2835                 return
2836
2837         # 8.2.5.4.23 http://www.w3.org/TR/html5/syntax.html#the-after-after-frameset-insertion-mode
2838         ins_mode_after_after_frameset = (t) ->
2839                 if t.type is TYPE_COMMENT
2840                         insert_comment t, [doc, doc.children.length]
2841                         return
2842                 if t.type is TYPE_DOCTYPE or is_space_tok(t) or (t.type is TYPE_START_TAG and t.name is 'html')
2843                         ins_mode_in_body t
2844                         return
2845                 if t.type is TYPE_EOF
2846                         stop_parsing()
2847                         return
2848                 if t.type is TYPE_START_TAG and t.name is 'noframes'
2849                         ins_mode_in_head t
2850                         return
2851                 # Anything else
2852                 parse_error()
2853                 return
2854
2855         # 8.2.5.5 http://www.w3.org/TR/html5/syntax.html#parsing-main-inforeign
2856         has_color_face_or_size = (t) ->
2857                 for a in t.attrs_a
2858                         if a[0] is 'color' or a[0] is 'face' or a[0] is 'size'
2859                                 return true
2860                 return false
2861         in_foreign_content_end_script = ->
2862                 open_els.shift()
2863                 # fixfull
2864                 return
2865         in_foreign_content_other_start = (t) ->
2866                 acn = adjusted_current_node()
2867                 if acn.namespace is NS_MATHML
2868                         adjust_mathml_attributes t
2869                 if acn.namespace is NS_SVG and svg_name_fixes[t.name]?
2870                         t.name = svg_name_fixes[t.name]
2871                 if acn.namespace is NS_SVG
2872                         adjust_svg_attributes t
2873                 adjust_foreign_attributes t
2874                 insert_foreign_element t, acn.namespace
2875                 if t.flag 'self-closing'
2876                         if t.name is 'script'
2877                                 t.acknowledge_self_closing()
2878                                 in_foreign_content_end_script()
2879                                 # fixfull
2880                         else
2881                                 open_els.shift()
2882                                 t.acknowledge_self_closing()
2883                 return
2884         in_foreign_content = (t) ->
2885                 if t.type is TYPE_TEXT and t.text is "\u0000"
2886                         parse_error()
2887                         insert_character new_character_token "\ufffd"
2888                         return
2889                 if is_space_tok t
2890                         insert_character t
2891                         return
2892                 if t.type is TYPE_TEXT
2893                         flag_frameset_ok = false
2894                         insert_character t
2895                         return
2896                 if t.type is TYPE_COMMENT
2897                         insert_comment t
2898                         return
2899                 if t.type is TYPE_DOCTYPE
2900                         parse_error()
2901                         return
2902                 if t.type is TYPE_START_TAG and (t.name is 'b' or t.name is 'big' or t.name is 'blockquote' or t.name is 'body' or t.name is 'br' or t.name is 'center' or t.name is 'code' or t.name is 'dd' or t.name is 'div' or t.name is 'dl' or t.name is 'dt' or t.name is 'em' or t.name is 'embed' or t.name is 'h1' or t.name is 'h2' or t.name is 'h3' or t.name is 'h4' or t.name is 'h5' or t.name is 'h6' or t.name is 'head' or t.name is 'hr' or t.name is 'i' or t.name is 'img' or t.name is 'li' or t.name is 'listing' or t.name is 'main' or t.name is 'meta' or t.name is 'nobr' or t.name is 'ol' or t.name is 'p' or t.name is 'pre' or t.name is 'ruby' or t.name is 's' or t.name is 'small' or t.name is 'span' or t.name is 'strong' or t.name is 'strike' or t.name is 'sub' or t.name is 'sup' or t.name is 'table' or t.name is 'tt' or t.name is 'u' or t.name is 'ul' or t.name is 'var' or (t.name is 'font' and has_color_face_or_size(t)))
2903                         parse_error()
2904                         if flag_fragment_parsing
2905                                 in_foreign_content_other_start t
2906                                 return
2907                         loop # is this safe?
2908                                 open_els.shift()
2909                                 if is_mathml_text_integration_point(open_els[0]) or is_html_integration(open_els[0]) or open_els[0].namespace is NS_HTML
2910                                         break
2911                         process_token t
2912                         return
2913                 if t.type is TYPE_START_TAG
2914                         in_foreign_content_other_start t
2915                         return
2916                 if t.type is TYPE_END_TAG and t.name is 'script' and open_els[0].name is 'script' and open_els[0].namespace is NS_SVG
2917                         in_foreign_content_end_script()
2918                         return
2919                 if t.type is TYPE_END_TAG
2920                         i = 0
2921                         node = open_els[i]
2922                         if node.name.toLowerCase() isnt t.name
2923                                 parse_error()
2924                         loop
2925                                 if node is open_els[open_els.length - 1]
2926                                         return
2927                                 if node.name.toLowerCase() is t.name
2928                                         loop
2929                                                 el = open_els.shift()
2930                                                 if el is node
2931                                                         return
2932                                 i += 1
2933                                 node = open_els[i]
2934                                 if node.namespace is NS_HTML
2935                                         break
2936                         ins_mode t # explicitly call HTML insertion mode
2937
2938
2939         # 8.2.4.1 http://www.w3.org/TR/html5/syntax.html#data-state
2940         tok_state_data = ->
2941                 switch c = txt.charAt(cur++)
2942                         when '&'
2943                                 return new_text_node parse_character_reference()
2944                         when '<'
2945                                 tok_state = tok_state_tag_open
2946                         when "\u0000"
2947                                 parse_error()
2948                                 return new_text_node "\ufffd"
2949                         when '' # EOF
2950                                 return new_eof_token()
2951                         else
2952                                 return new_text_node c
2953                 return null
2954
2955         # 8.2.4.2 http://www.w3.org/TR/html5/syntax.html#character-reference-in-data-state
2956         # not needed: tok_state_character_reference_in_data = ->
2957         # just call parse_character_reference()
2958
2959         # 8.2.4.3 http://www.w3.org/TR/html5/syntax.html#rcdata-state
2960         tok_state_rcdata = ->
2961                 switch c = txt.charAt(cur++)
2962                         when '&'
2963                                 return new_text_node parse_character_reference()
2964                         when '<'
2965                                 tok_state = tok_state_rcdata_less_than_sign
2966                         when "\u0000"
2967                                 parse_error()
2968                                 return new_character_token "\ufffd"
2969                         when '' # EOF
2970                                 return new_eof_token()
2971                         else
2972                                 return new_character_token c
2973                 return null
2974
2975         # 8.2.4.4 http://www.w3.org/TR/html5/syntax.html#character-reference-in-rcdata-state
2976         # not needed: tok_state_character_reference_in_rcdata = ->
2977         # just call parse_character_reference()
2978
2979         # 8.2.4.5 http://www.w3.org/TR/html5/syntax.html#rawtext-state
2980         tok_state_rawtext = ->
2981                 switch c = txt.charAt(cur++)
2982                         when '<'
2983                                 tok_state = tok_state_rawtext_less_than_sign
2984                         when "\u0000"
2985                                 parse_error()
2986                                 return new_character_token "\ufffd"
2987                         when '' # EOF
2988                                 return new_eof_token()
2989                         else
2990                                 return new_character_token c
2991                 return null
2992
2993         # 8.2.4.6 http://www.w3.org/TR/html5/syntax.html#script-data-state
2994         tok_state_script_data = ->
2995                 switch c = txt.charAt(cur++)
2996                         when '<'
2997                                 tok_state = tok_state_script_data_less_than_sign
2998                         when "\u0000"
2999                                 parse_error()
3000                                 return new_character_token "\ufffd"
3001                         when '' # EOF
3002                                 return new_eof_token()
3003                         else
3004                                 return new_character_token c
3005                 return null
3006
3007         # 8.2.4.7 http://www.w3.org/TR/html5/syntax.html#plaintext-state
3008         tok_state_plaintext = ->
3009                 switch c = txt.charAt(cur++)
3010                         when "\u0000"
3011                                 parse_error()
3012                                 return new_character_token "\ufffd"
3013                         when '' # EOF
3014                                 return new_eof_token()
3015                         else
3016                                 return new_character_token c
3017                 return null
3018
3019
3020         # 8.2.4.8 http://www.w3.org/TR/html5/syntax.html#tag-open-state
3021         tok_state_tag_open = ->
3022                 switch c = txt.charAt(cur++)
3023                         when '!'
3024                                 tok_state = tok_state_markup_declaration_open
3025                         when '/'
3026                                 tok_state = tok_state_end_tag_open
3027                         when '?'
3028                                 parse_error()
3029                                 tok_cur_tag = new_comment_token '?'
3030                                 tok_state = tok_state_bogus_comment
3031                         else
3032                                 if is_lc_alpha(c)
3033                                         tok_cur_tag = new_open_tag c
3034                                         tok_state = tok_state_tag_name
3035                                 else if is_uc_alpha(c)
3036                                         tok_cur_tag = new_open_tag c.toLowerCase()
3037                                         tok_state = tok_state_tag_name
3038                                 else
3039                                         parse_error()
3040                                         tok_state = tok_state_data
3041                                         cur -= 1 # we didn't parse/handle the char after <
3042                                         return new_text_node '<'
3043                 return null
3044
3045         # 8.2.4.9 http://www.w3.org/TR/html5/syntax.html#end-tag-open-state
3046         tok_state_end_tag_open = ->
3047                 switch c = txt.charAt(cur++)
3048                         when '>'
3049                                 parse_error()
3050                                 tok_state = tok_state_data
3051                         when '' # EOF
3052                                 parse_error()
3053                                 tok_state = tok_state_data
3054                                 return new_text_node '</'
3055                         else
3056                                 if is_uc_alpha(c)
3057                                         tok_cur_tag = new_end_tag c.toLowerCase()
3058                                         tok_state = tok_state_tag_name
3059                                 else if is_lc_alpha(c)
3060                                         tok_cur_tag = new_end_tag c
3061                                         tok_state = tok_state_tag_name
3062                                 else
3063                                         parse_error()
3064                                         tok_cur_tag = new_comment_token '/'
3065                                         tok_state = tok_state_bogus_comment
3066                 return null
3067
3068         # 8.2.4.10 http://www.w3.org/TR/html5/syntax.html#tag-name-state
3069         tok_state_tag_name = ->
3070                 switch c = txt.charAt(cur++)
3071                         when "\t", "\n", "\u000c", ' '
3072                                 tok_state = tok_state_before_attribute_name
3073                         when '/'
3074                                 tok_state = tok_state_self_closing_start_tag
3075                         when '>'
3076                                 tok_state = tok_state_data
3077                                 tmp = tok_cur_tag
3078                                 tok_cur_tag = null
3079                                 return tmp
3080                         when "\u0000"
3081                                 parse_error()
3082                                 tok_cur_tag.name += "\ufffd"
3083                         when '' # EOF
3084                                 parse_error()
3085                                 tok_state = tok_state_data
3086                         else
3087                                 if is_uc_alpha(c)
3088                                         tok_cur_tag.name += c.toLowerCase()
3089                                 else
3090                                         tok_cur_tag.name += c
3091                 return null
3092
3093         # 8.2.4.11 http://www.w3.org/TR/html5/syntax.html#rcdata-less-than-sign-state
3094         tok_state_rcdata_less_than_sign = ->
3095                 c = txt.charAt(cur++)
3096                 if c is '/'
3097                         temporary_buffer = ''
3098                         tok_state = tok_state_rcdata_end_tag_open
3099                         return null
3100                 # Anything else
3101                 tok_state = tok_state_rcdata
3102                 cur -= 1 # reconsume the input character
3103                 return new_character_token '<'
3104
3105         # 8.2.4.12 http://www.w3.org/TR/html5/syntax.html#rcdata-end-tag-open-state
3106         tok_state_rcdata_end_tag_open = ->
3107                 c = txt.charAt(cur++)
3108                 if is_uc_alpha(c)
3109                         tok_cur_tag = new_end_tag c.toLowerCase()
3110                         temporary_buffer += c
3111                         tok_state = tok_state_rcdata_end_tag_name
3112                         return null
3113                 if is_lc_alpha(c)
3114                         tok_cur_tag = new_end_tag c
3115                         temporary_buffer += c
3116                         tok_state = tok_state_rcdata_end_tag_name
3117                         return null
3118                 # Anything else
3119                 tok_state = tok_state_rcdata
3120                 cur -= 1 # reconsume the input character
3121                 return new_character_token "</" # fixfull separate these
3122
3123         # http://www.w3.org/TR/html5/syntax.html#appropriate-end-tag-token
3124         is_appropriate_end_tag = (t) ->
3125                 # spec says to check against "the tag name of the last start tag to
3126                 # have been emitted from this tokenizer", but this is only called from
3127                 # the various "raw" states, so it's hopefully ok to assume that
3128                 # open_els[0].name will work instead TODO: verify this after the script
3129                 # data states are implemented
3130                 debug_log "#{t.type}, #{t.name} open_els: #{serialize_els open_els, true, true}"
3131                 return t.type is TYPE_END_TAG and t.name is open_els[0].name
3132
3133         # 8.2.4.13 http://www.w3.org/TR/html5/syntax.html#rcdata-end-tag-name-state
3134         tok_state_rcdata_end_tag_name = ->
3135                 c = txt.charAt(cur++)
3136                 if c is "\t" or c is "\n" or c is "\u000c" or c is ' '
3137                         if is_appropriate_end_tag tok_cur_tag
3138                                 tok_state = tok_state_before_attribute_name
3139                                 return
3140                         # else fall through to "Anything else"
3141                 if c is '/'
3142                         if is_appropriate_end_tag tok_cur_tag
3143                                 tok_state = tok_state_self_closing_start_tag # FIXME spec typo?
3144                                 return
3145                         # else fall through to "Anything else"
3146                 if c is '>'
3147                         if is_appropriate_end_tag tok_cur_tag
3148                                 tok_state = tok_state_data
3149                                 return tok_cur_tag
3150                         # else fall through to "Anything else"
3151                 if is_uc_alpha(c)
3152                         tok_cur_tag.name += c.toLowerCase()
3153                         temporary_buffer += c
3154                         return null
3155                 if is_lc_alpha(c)
3156                         tok_cur_tag.name += c
3157                         temporary_buffer += c
3158                         return null
3159                 # Anything else
3160                 tok_state = tok_state_rcdata
3161                 cur -= 1 # reconsume the input character
3162                 return new_character_token '</' + temporary_buffer # fixfull separate these
3163
3164         # 8.2.4.14 http://www.w3.org/TR/html5/syntax.html#rawtext-less-than-sign-state
3165         tok_state_rawtext_less_than_sign = ->
3166                 c = txt.charAt(cur++)
3167                 if c is '/'
3168                         temporary_buffer = ''
3169                         tok_state = tok_state_rawtext_end_tag_open
3170                         return null
3171                 # Anything else
3172                 tok_state = tok_state_rawtext
3173                 cur -= 1 # reconsume the input character
3174                 return new_character_token '<'
3175
3176         # 8.2.4.15 http://www.w3.org/TR/html5/syntax.html#rawtext-end-tag-open-state
3177         tok_state_rawtext_end_tag_open = ->
3178                 c = txt.charAt(cur++)
3179                 if is_uc_alpha(c)
3180                         tok_cur_tag = new_end_tag c.toLowerCase()
3181                         temporary_buffer += c
3182                         tok_state = tok_state_rawtext_end_tag_name
3183                         return null
3184                 if is_lc_alpha(c)
3185                         tok_cur_tag = new_end_tag c
3186                         temporary_buffer += c
3187                         tok_state = tok_state_rawtext_end_tag_name
3188                         return null
3189                 # Anything else
3190                 tok_state = tok_state_rawtext
3191                 cur -= 1 # reconsume the input character
3192                 return new_character_token "</" # fixfull separate these
3193
3194         # 8.2.4.16 http://www.w3.org/TR/html5/syntax.html#rawtext-end-tag-name-state
3195         tok_state_rawtext_end_tag_name = ->
3196                 c = txt.charAt(cur++)
3197                 if c is "\t" or c is "\n" or c is "\u000c" or c is ' '
3198                         if is_appropriate_end_tag tok_cur_tag
3199                                 tok_state = tok_state_before_attribute_name
3200                                 return
3201                         # else fall through to "Anything else"
3202                 if c is '/'
3203                         if is_appropriate_end_tag tok_cur_tag
3204                                 tok_state = tok_state_self_closing_start_tag
3205                                 return
3206                         # else fall through to "Anything else"
3207                 if c is '>'
3208                         if is_appropriate_end_tag tok_cur_tag
3209                                 tok_state = tok_state_data
3210                                 return tok_cur_tag
3211                         # else fall through to "Anything else"
3212                 if is_uc_alpha(c)
3213                         tok_cur_tag.name += c.toLowerCase()
3214                         temporary_buffer += c
3215                         return null
3216                 if is_lc_alpha(c)
3217                         tok_cur_tag.name += c
3218                         temporary_buffer += c
3219                         return null
3220                 # Anything else
3221                 tok_state = tok_state_rawtext
3222                 cur -= 1 # reconsume the input character
3223                 return new_character_token '</' + temporary_buffer # fixfull separate these
3224
3225         # 8.2.4.17 http://www.w3.org/TR/html5/syntax.html#script-data-less-than-sign-state
3226         tok_state_script_data_less_than_sign = ->
3227                 c = txt.charAt(cur++)
3228                 if c is '/'
3229                         temporary_buffer = ''
3230                         tok_state = tok_state_script_data_end_tag_open
3231                         return
3232                 if c is '!'
3233                         tok_state = tok_state_script_data_escape_start
3234                         return new_character_token '<!' # fixfull split
3235                 # Anything else
3236                 tok_state = tok_state_script_data
3237                 cur -= 1 # Reconsume
3238                 return new_character_token '<'
3239
3240         # 8.2.4.18 http://www.w3.org/TR/html5/syntax.html#script-data-end-tag-open-state
3241         tok_state_script_data_end_tag_open = ->
3242                 c = txt.charAt(cur++)
3243                 if is_uc_alpha(c)
3244                         tok_cur_tag = new_end_tag c.toLowerCase()
3245                         temporary_buffer += c
3246                         tok_state = tok_state_script_data_end_tag_name
3247                         return
3248                 if is_lc_alpha(c)
3249                         tok_cur_tag = new_end_tag c
3250                         temporary_buffer += c
3251                         tok_state = tok_state_script_data_end_tag_name
3252                         return
3253                 # Anything else
3254                 tok_state = tok_state_script_data
3255                 cur -= 1 # Reconsume
3256                 return new_character_token '</'
3257
3258         # 8.2.4.19 http://www.w3.org/TR/html5/syntax.html#script-data-end-tag-open-state
3259         tok_state_script_data_end_tag_name = ->
3260                 c = txt.charAt(cur++)
3261                 if c is "\t" or c is "\n" or c is "\u000c" or c is ' '
3262                         if is_appropriate_end_tag tok_cur_tag
3263                                 tok_state = tok_state_before_attribute_name
3264                                 return
3265                         # fall through
3266                 if c is '/'
3267                         if is_appropriate_end_tag tok_cur_tag
3268                                 tok_state = tok_state_self_closing_start_tag
3269                                 return
3270                         # fall through
3271                 if c is '>'
3272                         if is_appropriate_end_tag tok_cur_tag
3273                                 tok_state = tok_state_data
3274                                 return tok_cur_tag
3275                         # fall through
3276                 if is_uc_alpha(c)
3277                         tok_cur_tag.name += c.toLowerCase()
3278                         temporary_buffer += c
3279                         return
3280                 if is_lc_alpha(c)
3281                         tok_cur_tag.name += c
3282                         temporary_buffer += c
3283                         return
3284                 # Anything else
3285                 tok_state = tok_state_script_data
3286                 cur -= 1 # Reconsume
3287                 return new_character_token "</#{temporary_buffer}" # fixfull split
3288
3289         # 8.2.4.20 http://www.w3.org/TR/html5/syntax.html#script-data-escape-start-state
3290         tok_state_script_data_escape_start = ->
3291                 c = txt.charAt(cur++)
3292                 if c is '-'
3293                         tok_state = tok_state_script_data_escape_start_dash
3294                         return new_character_token '-'
3295                 # Anything else
3296                 tok_state = tok_state_script_data
3297                 cur -= 1 # Reconsume
3298                 return
3299
3300         # 8.2.4.21 http://www.w3.org/TR/html5/syntax.html#script-data-escape-start-dash-state
3301         tok_state_script_data_escape_start_dash = ->
3302                 c = txt.charAt(cur++)
3303                 if c is '-'
3304                         tok_state = tok_state_script_data_escaped_dash_dash
3305                         return new_character_token '-'
3306                 # Anything else
3307                 tok_state = tok_state_script_data
3308                 cur -= 1 # Reconsume
3309                 return
3310
3311         # 8.2.4.22 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-state
3312         tok_state_script_data_escaped = ->
3313                 c = txt.charAt(cur++)
3314                 if c is '-'
3315                         tok_state = tok_state_script_data_escaped_dash
3316                         return new_character_token '-'
3317                 if c is '<'
3318                         tok_state = tok_state_script_data_escaped_less_than_sign
3319                         return
3320                 if c is "\u0000"
3321                         parse_error()
3322                         return new_character_token "\ufffd"
3323                 if c is '' # EOF
3324                         tok_state = tok_state_data
3325                         parse_error()
3326                         cur -= 1 # Reconsume
3327                         return
3328                 # Anything else
3329                 return new_character_token c
3330
3331         # 8.2.4.23 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-dash-state
3332         tok_state_script_data_escaped_dash = ->
3333                 c = txt.charAt(cur++)
3334                 if c is '-'
3335                         tok_state = tok_state_script_data_escaped_dash_dash
3336                         return new_character_token '-'
3337                 if c is '<'
3338                         tok_state = tok_state_script_data_escaped_less_than_sign
3339                         return
3340                 if c is "\u0000"
3341                         parse_error()
3342                         tok_state = tok_state_script_data_escaped
3343                         return new_character_token "\ufffd"
3344                 if c is '' # EOF
3345                         tok_state = tok_state_data
3346                         parse_error()
3347                         cur -= 1 # Reconsume
3348                         return
3349                 # Anything else
3350                 tok_state = tok_state_script_data_escaped
3351                 return new_character_token c
3352
3353         # 8.2.4.24 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-dash-dash-state
3354         tok_state_script_data_escaped_dash_dash = ->
3355                 c = txt.charAt(cur++)
3356                 if c is '-'
3357                         return new_character_token '-'
3358                 if c is '<'
3359                         tok_state = tok_state_script_data_escaped_less_than_sign
3360                         return
3361                 if c is '>'
3362                         tok_state = tok_state_script_data
3363                         return new_character_token '>'
3364                 if c is "\u0000"
3365                         parse_error()
3366                         tok_state = tok_state_script_data_escaped
3367                         return new_character_token "\ufffd"
3368                 if c is '' # EOF
3369                         parse_error()
3370                         tok_state = tok_state_data
3371                         cur -= 1 # Reconsume
3372                         return
3373                 # Anything else
3374                 tok_state = tok_state_script_data_escaped
3375                 return new_character_token c
3376
3377         # 8.2.4.25 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-less-than-sign-state
3378         tok_state_script_data_escaped_less_than_sign = ->
3379                 c = txt.charAt(cur++)
3380                 if c is '/'
3381                         temporary_buffer = ''
3382                         tok_state = tok_state_script_data_escaped_end_tag_open
3383                         return
3384                 if is_uc_alpha(c)
3385                         temporary_buffer = c.toLowerCase() # yes, really
3386                         tok_state = tok_state_script_data_double_escape_start
3387                         return new_character_token "<#{c}" # fixfull split
3388                 if is_lc_alpha(c)
3389                         temporary_buffer = c
3390                         tok_state = tok_state_script_data_double_escape_start
3391                         return new_character_token "<#{c}" # fixfull split
3392                 # Anything else
3393                 tok_state = tok_state_script_data_escaped
3394                 cur -= 1 # Reconsume
3395                 return new_character_token c
3396
3397         # 8.2.4.26 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-end-tag-open-state
3398         tok_state_script_data_escaped_end_tag_open = ->
3399                 c = txt.charAt(cur++)
3400                 if is_uc_alpha(c)
3401                         tok_cur_tag = new_end_tag c.toLowerCase()
3402                         temporary_buffer += c
3403                         tok_state = tok_state_script_data_escaped_end_tag_name
3404                         return
3405                 if is_lc_alpha(c)
3406                         tok_cur_tag = new_end_tag c
3407                         temporary_buffer += c
3408                         tok_state = tok_state_script_data_escaped_end_tag_name
3409                         return
3410                 # Anything else
3411                 tok_state = tok_state_script_data_escaped
3412                 cur -= 1 # Reconsume
3413                 return new_character_token '</' # fixfull split
3414
3415         # 8.2.4.27 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-end-tag-name-state
3416         tok_state_script_data_escaped_end_tag_name = ->
3417                 c = txt.charAt(cur++)
3418                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
3419                         if is_appropriate_end_tag tok_cur_tag
3420                                 tok_state = tok_state_before_attribute_name
3421                                 return
3422                         # fall through
3423                 if c is '/'
3424                         if is_appropriate_end_tag tok_cur_tag
3425                                 tok_state = tok_state_self_closing_start_tag
3426                                 return
3427                         # fall through
3428                 if c is '>'
3429                         if is_appropriate_end_tag tok_cur_tag
3430                                 tok_state = tok_state_data
3431                                 return tok_cur_tag
3432                         # fall through
3433                 if is_uc_alpha(c)
3434                         tok_cur_tag.name += c.toLowerCase()
3435                         temporary_buffer += c.toLowerCase()
3436                         return
3437                 if is_lc_alpha(c)
3438                         tok_cur_tag.name += c
3439                         temporary_buffer += c.toLowerCase()
3440                         return
3441                 # Anything else
3442                 tok_state = tok_state_script_data_escaped
3443                 cur -= 1 # Reconsume
3444                 return new_character_token "</#{temporary_buffer}" # fixfull split
3445
3446         # 8.2.4.28 http://www.w3.org/TR/html5/syntax.html#script-data-double-escape-start-state
3447         tok_state_script_data_double_escape_start = ->
3448                 c = txt.charAt(cur++)
3449                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' ' or c is '/' or c is '>'
3450                         if temporary_buffer is 'script'
3451                                 tok_state = tok_state_script_data_double_escaped
3452                         else
3453                                 tok_state = tok_state_script_data_escaped
3454                         return new_character_token c
3455                 if is_uc_alpha(c)
3456                         temporary_buffer += c.toLowerCase() # yes, really lowercase
3457                         return new_character_token c
3458                 if is_lc_alpha(c)
3459                         temporary_buffer += c
3460                         return new_character_token c
3461                 # Anything else
3462                 tok_state = tok_state_script_data_escaped
3463                 cur -= 1 # Reconsume
3464                 return
3465
3466         # 8.2.4.29 http://www.w3.org/TR/html5/syntax.html#script-data-double-escaped-state
3467         tok_state_script_data_double_escaped = ->
3468                 c = txt.charAt(cur++)
3469                 if c is '-'
3470                         tok_state = tok_state_script_data_double_escaped_dash
3471                         return new_character_token '-'
3472                 if c is '<'
3473                         tok_state = tok_state_script_data_double_escaped_less_than_sign
3474                         return new_character_token '<'
3475                 if c is "\u0000"
3476                         parse_error()
3477                         return new_character_token "\ufffd"
3478                 if c is '' # EOF
3479                         parse_error()
3480                         tok_state = tok_state_data
3481                         cur -= 1 # Reconsume
3482                         return
3483                 # Anything else
3484                 return new_character_token c
3485
3486         # 8.2.4.30 http://www.w3.org/TR/html5/syntax.html#script-data-double-escaped-dash-state
3487         tok_state_script_data_double_escaped_dash = ->
3488                 c = txt.charAt(cur++)
3489                 if c is '-'
3490                         tok_state = tok_state_script_data_double_escaped_dash_dash
3491                         return new_character_token '-'
3492                 if c is '<'
3493                         tok_state = tok_state_script_data_double_escaped_less_than_sign
3494                         return new_character_token '<'
3495                 if c is "\u0000"
3496                         parse_error()
3497                         tok_state = tok_state_script_data_double_escaped
3498                         return new_character_token "\ufffd"
3499                 if c is '' # EOF
3500                         parse_error()
3501                         tok_state = tok_state_data
3502                         cur -= 1 # Reconsume
3503                         return
3504                 # Anything else
3505                 tok_state = tok_state_script_data_double_escaped
3506                 return new_character_token c
3507
3508         # 8.2.4.31 http://www.w3.org/TR/html5/syntax.html#script-data-double-escaped-dash-dash-state
3509         tok_state_script_data_double_escaped_dash_dash = ->
3510                 c = txt.charAt(cur++)
3511                 if c is '-'
3512                         return new_character_token '-'
3513                 if c is '<'
3514                         tok_state = tok_state_script_data_double_escaped_less_than_sign
3515                         return new_character_token '<'
3516                 if c is '>'
3517                         tok_state = tok_state_script_data
3518                         return new_character_token '>'
3519                 if c is "\u0000"
3520                         parse_error()
3521                         tok_state = tok_state_script_data_double_escaped
3522                         return new_character_token "\ufffd"
3523                 if c is '' # EOF
3524                         parse_error()
3525                         tok_state = tok_state_data
3526                         cur -= 1 # Reconsume
3527                         return
3528                 # Anything else
3529                 tok_state = tok_state_script_data_double_escaped
3530                 return new_character_token c
3531
3532         # 8.2.4.32 http://www.w3.org/TR/html5/syntax.html#script-data-double-escaped-less-than-sign-state
3533         tok_state_script_data_double_escaped_less_than_sign = ->
3534                 c = txt.charAt(cur++)
3535                 if c is '/'
3536                         temporary_buffer = ''
3537                         tok_state = tok_state_script_data_double_escape_end
3538                         return new_character_token '/'
3539                 # Anything else
3540                 tok_state = tok_state_script_data_double_escaped
3541                 cur -= 1 # Reconsume
3542                 return
3543
3544         # 8.2.4.33 http://www.w3.org/TR/html5/syntax.html#script-data-double-escape-end-state
3545         tok_state_script_data_double_escape_end = ->
3546                 c = txt.charAt(cur++)
3547                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' ' or c is '/' or c is '>'
3548                         if temporary_buffer is 'script'
3549                                 tok_state = tok_state_script_data_escaped
3550                         else
3551                                 tok_state = tok_state_script_data_double_escaped
3552                         return new_character_token c
3553                 if is_uc_alpha(c)
3554                         temporary_buffer += c.toLowerCase() # yes, really lowercase
3555                         return new_character_token c
3556                 if is_lc_alpha(c)
3557                         temporary_buffer += c
3558                         return new_character_token c
3559                 # Anything else
3560                 tok_state = tok_state_script_data_double_escaped
3561                 cur -= 1 # Reconsume
3562                 return
3563
3564         # 8.2.4.34 http://www.w3.org/TR/html5/syntax.html#before-attribute-name-state
3565         tok_state_before_attribute_name = ->
3566                 attr_name = null
3567                 switch c = txt.charAt(cur++)
3568                         when "\t", "\n", "\u000c", ' '
3569                                 return null
3570                         when '/'
3571                                 tok_state = tok_state_self_closing_start_tag
3572                                 return null
3573                         when '>'
3574                                 tok_state = tok_state_data
3575                                 tmp = tok_cur_tag
3576                                 tok_cur_tag = null
3577                                 return tmp
3578                         when "\u0000"
3579                                 parse_error()
3580                                 attr_name = "\ufffd"
3581                         when '"', "'", '<', '='
3582                                 parse_error()
3583                                 attr_name = c
3584                         when '' # EOF
3585                                 parse_error()
3586                                 tok_state = tok_state_data
3587                         else
3588                                 if is_uc_alpha(c)
3589                                         attr_name = c.toLowerCase()
3590                                 else
3591                                         attr_name = c
3592                 if attr_name?
3593                         tok_cur_tag.attrs_a.unshift [attr_name, '']
3594                         tok_state = tok_state_attribute_name
3595                 return null
3596
3597         # 8.2.4.35 http://www.w3.org/TR/html5/syntax.html#attribute-name-state
3598         tok_state_attribute_name = ->
3599                 switch c = txt.charAt(cur++)
3600                         when "\t", "\n", "\u000c", ' '
3601                                 tok_state = tok_state_after_attribute_name
3602                         when '/'
3603                                 tok_state = tok_state_self_closing_start_tag
3604                         when '='
3605                                 tok_state = tok_state_before_attribute_value
3606                         when '>'
3607                                 tok_state = tok_state_data
3608                                 tmp = tok_cur_tag
3609                                 tok_cur_tag = null
3610                                 return tmp
3611                         when "\u0000"
3612                                 parse_error()
3613                                 tok_cur_tag.attrs_a[0][0] += "\ufffd"
3614                         when '"', "'", '<'
3615                                 parse_error()
3616                                 tok_cur_tag.attrs_a[0][0] += c
3617                         when '' # EOF
3618                                 parse_error()
3619                                 tok_state = tok_state_data
3620                         else
3621                                 if is_uc_alpha(c)
3622                                         tok_cur_tag.attrs_a[0][0] += c.toLowerCase()
3623                                 else
3624                                         tok_cur_tag.attrs_a[0][0] += c
3625                 return null
3626
3627         # 8.2.4.36 http://www.w3.org/TR/html5/syntax.html#after-attribute-name-state
3628         tok_state_after_attribute_name = ->
3629                 c = txt.charAt(cur++)
3630                 if c is "\t" or c is "\n" or c is "\u000c" or c is ' '
3631                         return
3632                 if c is '/'
3633                         tok_state = tok_state_self_closing_start_tag
3634                         return
3635                 if c is '='
3636                         tok_state = tok_state_before_attribute_value
3637                         return
3638                 if c is '>'
3639                         tok_state = tok_state_data
3640                         return
3641                 if is_uc_alpha(c)
3642                         tok_cur_tag.attrs_a.unshift [c.toLowerCase(), '']
3643                         tok_state = tok_state_attribute_name
3644                         return
3645                 if c is "\u0000"
3646                         parse_error()
3647                         tok_cur_tag.attrs_a.unshift ["\ufffd", '']
3648                         tok_state = tok_state_attribute_name
3649                         return
3650                 if c is '' # EOF
3651                         parse_error()
3652                         tok_state = tok_state_data
3653                         cur -= 1 # reconsume
3654                         return
3655                 if c is '"' or c is "'" or c is '<'
3656                         parse_error()
3657                         # fall through to Anything else
3658                 # Anything else
3659                 tok_cur_tag.attrs_a.unshift [c, '']
3660                 tok_state = tok_state_attribute_name
3661
3662         # 8.2.4.37 http://www.w3.org/TR/html5/syntax.html#before-attribute-value-state
3663         tok_state_before_attribute_value = ->
3664                 switch c = txt.charAt(cur++)
3665                         when "\t", "\n", "\u000c", ' '
3666                                 return null
3667                         when '"'
3668                                 tok_state = tok_state_attribute_value_double_quoted
3669                         when '&'
3670                                 tok_state = tok_state_attribute_value_unquoted
3671                                 cur -= 1
3672                         when "'"
3673                                 tok_state = tok_state_attribute_value_single_quoted
3674                         when "\u0000"
3675                                 # Parse error
3676                                 tok_cur_tag.attrs_a[0][1] += "\ufffd"
3677                                 tok_state = tok_state_attribute_value_unquoted
3678                         when '>'
3679                                 # Parse error
3680                                 tok_state = tok_state_data
3681                                 tmp = tok_cur_tag
3682                                 tok_cur_tag = null
3683                                 return tmp
3684                         when '' # EOF
3685                                 parse_error()
3686                                 tok_state = tok_state_data
3687                         else
3688                                 tok_cur_tag.attrs_a[0][1] += c
3689                                 tok_state = tok_state_attribute_value_unquoted
3690                 return null
3691
3692         # 8.2.4.38 http://www.w3.org/TR/html5/syntax.html#attribute-value-(double-quoted)-state
3693         tok_state_attribute_value_double_quoted = ->
3694                 switch c = txt.charAt(cur++)
3695                         when '"'
3696                                 tok_state = tok_state_after_attribute_value_quoted
3697                         when '&'
3698                                 tok_cur_tag.attrs_a[0][1] += parse_character_reference '"', true
3699                         when "\u0000"
3700                                 # Parse error
3701                                 tok_cur_tag.attrs_a[0][1] += "\ufffd"
3702                         when '' # EOF
3703                                 parse_error()
3704                                 tok_state = tok_state_data
3705                         else
3706                                 tok_cur_tag.attrs_a[0][1] += c
3707                 return null
3708
3709         # 8.2.4.39 http://www.w3.org/TR/html5/syntax.html#attribute-value-(single-quoted)-state
3710         tok_state_attribute_value_single_quoted = ->
3711                 switch c = txt.charAt(cur++)
3712                         when "'"
3713                                 tok_state = tok_state_after_attribute_value_quoted
3714                         when '&'
3715                                 tok_cur_tag.attrs_a[0][1] += parse_character_reference "'", true
3716                         when "\u0000"
3717                                 # Parse error
3718                                 tok_cur_tag.attrs_a[0][1] += "\ufffd"
3719                         when '' # EOF
3720                                 parse_error()
3721                                 tok_state = tok_state_data
3722                         else
3723                                 tok_cur_tag.attrs_a[0][1] += c
3724                 return null
3725
3726         # 8.2.4.40 http://www.w3.org/TR/html5/syntax.html#attribute-value-(unquoted)-state
3727         tok_state_attribute_value_unquoted = ->
3728                 switch c = txt.charAt(cur++)
3729                         when "\t", "\n", "\u000c", ' '
3730                                 tok_state = tok_state_before_attribute_name
3731                         when '&'
3732                                 tok_cur_tag.attrs_a[0][1] += parse_character_reference '>', true
3733                         when '>'
3734                                 tok_state = tok_state_data
3735                                 tmp = tok_cur_tag
3736                                 tok_cur_tag = null
3737                                 return tmp
3738                         when "\u0000"
3739                                 tok_cur_tag.attrs_a[0][1] += "\ufffd"
3740                         when '' # EOF
3741                                 parse_error()
3742                                 tok_state = tok_state_data
3743                         else
3744                                 # Parse Error if ', <, = or ` (backtick)
3745                                 tok_cur_tag.attrs_a[0][1] += c
3746                 return null
3747
3748         # 8.2.4.42 http://www.w3.org/TR/html5/syntax.html#after-attribute-value-(quoted)-state
3749         tok_state_after_attribute_value_quoted = ->
3750                 switch c = txt.charAt(cur++)
3751                         when "\t", "\n", "\u000c", ' '
3752                                 tok_state = tok_state_before_attribute_name
3753                         when '/'
3754                                 tok_state = tok_state_self_closing_start_tag
3755                         when '>'
3756                                 tok_state = tok_state_data
3757                                 tmp = tok_cur_tag
3758                                 tok_cur_tag = null
3759                                 return tmp
3760                         when '' # EOF
3761                                 parse_error()
3762                                 tok_state = tok_state_data
3763                         else
3764                                 # Parse Error
3765                                 tok_state = tok_state_before_attribute_name
3766                                 cur -= 1 # we didn't handle that char
3767                 return null
3768
3769         # 8.2.4.43 http://www.w3.org/TR/html5/syntax.html#self-closing-start-tag-state
3770         tok_state_self_closing_start_tag = ->
3771                 c = txt.charAt(cur++)
3772                 if c is '>'
3773                         tok_cur_tag.flag 'self-closing', true
3774                         tok_state = tok_state_data
3775                         return tok_cur_tag
3776                 if c is ''
3777                         parse_error()
3778                         tok_state = tok_state_data
3779                         cur -= 1 # Reconsume
3780                         return
3781                 # Anything else
3782                 parse_error()
3783                 tok_state = tok_state_before_attribute_name
3784                 cur -= 1 # Reconsume
3785                 return
3786
3787         # 8.2.4.44 http://www.w3.org/TR/html5/syntax.html#bogus-comment-state
3788         # WARNING: put a comment token in tok_cur_tag before setting this state
3789         tok_state_bogus_comment = ->
3790                 next_gt = txt.indexOf '>', cur
3791                 if next_gt is -1
3792                         val = txt.substr cur
3793                         cur = txt.length
3794                 else
3795                         val = txt.substr cur, (next_gt - cur)
3796                         cur = next_gt + 1
3797                 val = val.replace(new RegExp("\u0000", 'g'), "\ufffd")
3798                 tok_cur_tag.text += val
3799                 tok_state = tok_state_data
3800                 return tok_cur_tag
3801
3802         # 8.2.4.45 http://www.w3.org/TR/html5/syntax.html#markup-declaration-open-state
3803         tok_state_markup_declaration_open = ->
3804                 if txt.substr(cur, 2) is '--'
3805                         cur += 2
3806                         tok_cur_tag = new_comment_token ''
3807                         tok_state = tok_state_comment_start
3808                         return
3809                 if txt.substr(cur, 7).toLowerCase() is 'doctype'
3810                         cur += 7
3811                         tok_state = tok_state_doctype
3812                         return
3813                 acn = adjusted_current_node()
3814                 if acn and acn.namespace isnt NS_HTML and txt.substr(cur, 7) is '[CDATA['
3815                         cur += 7
3816                         tok_state = tok_state_cdata_section
3817                         return
3818                 # Otherwise
3819                 parse_error()
3820                 tok_cur_tag = new_comment_token ''
3821                 tok_state = tok_state_bogus_comment
3822                 return
3823
3824         # 8.2.4.46 http://www.w3.org/TR/html5/syntax.html#comment-start-state
3825         tok_state_comment_start = ->
3826                 switch c = txt.charAt(cur++)
3827                         when '-'
3828                                 tok_state = tok_state_comment_start_dash
3829                         when "\u0000"
3830                                 parse_error()
3831                                 tok_state = tok_state_comment
3832                                 return new_character_token "\ufffd"
3833                         when '>'
3834                                 parse_error()
3835                                 tok_state = tok_state_data
3836                                 return tok_cur_tag
3837                         when '' # EOF
3838                                 parse_error()
3839                                 tok_state = tok_state_data
3840                                 cur -= 1 # Reconsume
3841                                 return tok_cur_tag
3842                         else
3843                                 tok_cur_tag.text += c
3844                                 tok_state = tok_state_comment
3845                 return null
3846
3847         # 8.2.4.47 http://www.w3.org/TR/html5/syntax.html#comment-start-dash-state
3848         tok_state_comment_start_dash = ->
3849                 switch c = txt.charAt(cur++)
3850                         when '-'
3851                                 tok_state = tok_state_comment_end
3852                         when "\u0000"
3853                                 parse_error()
3854                                 tok_cur_tag.text += "-\ufffd"
3855                                 tok_state = tok_state_comment
3856                         when '>'
3857                                 parse_error()
3858                                 tok_state = tok_state_data
3859                                 return tok_cur_tag
3860                         when '' # EOF
3861                                 parse_error()
3862                                 tok_state = tok_state_data
3863                                 cur -= 1 # Reconsume
3864                                 return tok_cur_tag
3865                         else
3866                                 tok_cur_tag.text += "-#{c}"
3867                                 tok_state = tok_state_comment
3868                 return null
3869
3870         # 8.2.4.48 http://www.w3.org/TR/html5/syntax.html#comment-state
3871         tok_state_comment = ->
3872                 switch c = txt.charAt(cur++)
3873                         when '-'
3874                                 tok_state = tok_state_comment_end_dash
3875                         when "\u0000"
3876                                 parse_error()
3877                                 tok_cur_tag.text += "\ufffd"
3878                         when '' # EOF
3879                                 parse_error()
3880                                 tok_state = tok_state_data
3881                                 cur -= 1 # Reconsume
3882                                 return tok_cur_tag
3883                         else
3884                                 tok_cur_tag.text += c
3885                 return null
3886
3887         # 8.2.4.49 http://www.w3.org/TR/html5/syntax.html#comment-end-dash-state
3888         tok_state_comment_end_dash = ->
3889                 switch c = txt.charAt(cur++)
3890                         when '-'
3891                                 tok_state = tok_state_comment_end
3892                         when "\u0000"
3893                                 parse_error()
3894                                 tok_cur_tag.text += "-\ufffd"
3895                                 tok_state = tok_state_comment
3896                         when '' # EOF
3897                                 parse_error()
3898                                 tok_state = tok_state_data
3899                                 cur -= 1 # Reconsume
3900                                 return tok_cur_tag
3901                         else
3902                                 tok_cur_tag.text += "-#{c}"
3903                                 tok_state = tok_state_comment
3904                 return null
3905
3906         # 8.2.4.50 http://www.w3.org/TR/html5/syntax.html#comment-end-state
3907         tok_state_comment_end = ->
3908                 switch c = txt.charAt(cur++)
3909                         when '>'
3910                                 tok_state = tok_state_data
3911                                 return tok_cur_tag
3912                         when "\u0000"
3913                                 parse_error()
3914                                 tok_cur_tag.text += "--\ufffd"
3915                                 tok_state = tok_state_comment
3916                         when '!'
3917                                 parse_error()
3918                                 tok_state = tok_state_comment_end_bang
3919                         when '-'
3920                                 parse_error()
3921                                 tok_cur_tag.text += '-'
3922                         when '' # EOF
3923                                 parse_error()
3924                                 tok_state = tok_state_data
3925                                 cur -= 1 # Reconsume
3926                                 return tok_cur_tag
3927                         else
3928                                 parse_error()
3929                                 tok_cur_tag.text += "--#{c}"
3930                                 tok_state = tok_state_comment
3931                 return null
3932
3933         # 8.2.4.51 http://www.w3.org/TR/html5/syntax.html#comment-end-bang-state
3934         tok_state_comment_end_bang = ->
3935                 switch c = txt.charAt(cur++)
3936                         when '-'
3937                                 tok_cur_tag.text += "--!#{c}"
3938                                 tok_state = tok_state_comment_end_dash
3939                         when '>'
3940                                 tok_state = tok_state_data
3941                                 return tok_cur_tag
3942                         when "\u0000"
3943                                 parse_error()
3944                                 tok_cur_tag.text += "--!\ufffd"
3945                                 tok_state = tok_state_comment
3946                         when '' # EOF
3947                                 parse_error()
3948                                 tok_state = tok_state_data
3949                                 cur -= 1 # Reconsume
3950                                 return tok_cur_tag
3951                         else
3952                                 tok_cur_tag.text += "--!#{c}"
3953                                 tok_state = tok_state_comment
3954                 return null
3955
3956         # 8.2.4.52 http://www.w3.org/TR/html5/syntax.html#doctype-state
3957         tok_state_doctype = ->
3958                 switch c = txt.charAt(cur++)
3959                         when "\t", "\u000a", "\u000c", ' '
3960                                 tok_state = tok_state_before_doctype_name
3961                         when '' # EOF
3962                                 parse_error()
3963                                 tok_state = tok_state_data
3964                                 el = new_doctype_token ''
3965                                 el.flag 'force-quirks', true
3966                                 cur -= 1 # Reconsume
3967                                 return el
3968                         else
3969                                 parse_error()
3970                                 tok_state = tok_state_before_doctype_name
3971                                 cur -= 1 # Reconsume
3972                 return null
3973
3974         # 8.2.4.52 http://www.w3.org/TR/html5/syntax.html#doctype-state
3975         tok_state_before_doctype_name = ->
3976                 c = txt.charAt(cur++)
3977                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
3978                         return
3979                 if is_uc_alpha(c)
3980                         tok_cur_tag = new_doctype_token c.toLowerCase()
3981                         tok_state = tok_state_doctype_name
3982                         return
3983                 if c is "\u0000"
3984                         parse_error()
3985                         tok_cur_tag = new_doctype_token "\ufffd"
3986                         tok_state = tok_state_doctype_name
3987                         return
3988                 if c is '>'
3989                         parse_error()
3990                         el = new_doctype_token ''
3991                         el.flag 'force-quirks', true
3992                         tok_state = tok_state_data
3993                         return el
3994                 if c is '' # EOF
3995                         parse_error()
3996                         tok_state = tok_state_data
3997                         el = new_doctype_token ''
3998                         el.flag 'force-quirks', true
3999                         cur -= 1 # Reconsume
4000                         return el
4001                 # Anything else
4002                 tok_cur_tag = new_doctype_token c
4003                 tok_state = tok_state_doctype_name
4004                 return null
4005
4006         # 8.2.4.54 http://www.w3.org/TR/html5/syntax.html#doctype-name-state
4007         tok_state_doctype_name = ->
4008                 c = txt.charAt(cur++)
4009                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4010                         tok_state = tok_state_after_doctype_name
4011                         return
4012                 if c is '>'
4013                         tok_state = tok_state_data
4014                         return tok_cur_tag
4015                 if is_uc_alpha(c)
4016                         tok_cur_tag.name += c.toLowerCase()
4017                         return
4018                 if c is "\u0000"
4019                         parse_error()
4020                         tok_cur_tag.name += "\ufffd"
4021                         return
4022                 if c is '' # EOF
4023                         parse_error()
4024                         tok_state = tok_state_data
4025                         tok_cur_tag.flag 'force-quirks', true
4026                         cur -= 1 # Reconsume
4027                         return tok_cur_tag
4028                 # Anything else
4029                 tok_cur_tag.name += c
4030                 return null
4031
4032         # 8.2.4.55 http://www.w3.org/TR/html5/syntax.html#after-doctype-name-state
4033         tok_state_after_doctype_name = ->
4034                 c = txt.charAt(cur++)
4035                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4036                         return
4037                 if c is '>'
4038                         tok_state = tok_state_data
4039                         return tok_cur_tag
4040                 if c is '' # EOF
4041                         parse_error()
4042                         tok_state = tok_state_data
4043                         tok_cur_tag.flag 'force-quirks', true
4044                         cur -= 1 # Reconsume
4045                         return tok_cur_tag
4046                 # Anything else
4047                 if txt.substr(cur - 1, 6).toLowerCase() is 'public'
4048                         cur += 5
4049                         tok_state = tok_state_after_doctype_public_keyword
4050                         return
4051                 if txt.substr(cur - 1, 6).toLowerCase() is 'system'
4052                         cur += 5
4053                         tok_state = tok_state_after_doctype_system_keyword
4054                         return
4055                 parse_error()
4056                 tok_cur_tag.flag 'force-quirks', true
4057                 tok_state = tok_state_bogus_doctype
4058                 return null
4059
4060         # 8.2.4.56 http://www.w3.org/TR/html5/syntax.html#after-doctype-public-keyword-state
4061         tok_state_after_doctype_public_keyword = ->
4062                 c = txt.charAt(cur++)
4063                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4064                         tok_state = tok_state_before_doctype_public_identifier
4065                         return
4066                 if c is '"'
4067                         parse_error()
4068                         tok_cur_tag.public_identifier = ''
4069                         tok_state = tok_state_doctype_public_identifier_double_quoted
4070                         return
4071                 if c is "'"
4072                         parse_error()
4073                         tok_cur_tag.public_identifier = ''
4074                         tok_state = tok_state_doctype_public_identifier_single_quoted
4075                         return
4076                 if c is '>'
4077                         parse_error()
4078                         tok_cur_tag.flag 'force-quirks', true
4079                         tok_state = tok_state_data
4080                         return tok_cur_tag
4081                 if c is '' # EOF
4082                         parse_error()
4083                         tok_state = tok_state_data
4084                         tok_cur_tag.flag 'force-quirks', true
4085                         cur -= 1 # Reconsume
4086                         return tok_cur_tag
4087                 # Anything else
4088                 parse_error()
4089                 tok_cur_tag.flag 'force-quirks', true
4090                 tok_state = tok_state_bogus_doctype
4091                 return null
4092
4093         # 8.2.4.57 http://www.w3.org/TR/html5/syntax.html#before-doctype-public-identifier-state
4094         tok_state_before_doctype_public_identifier = ->
4095                 c = txt.charAt(cur++)
4096                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4097                         return
4098                 if c is '"'
4099                         parse_error()
4100                         tok_cur_tag.public_identifier = ''
4101                         tok_state = tok_state_doctype_public_identifier_double_quoted
4102                         return
4103                 if c is "'"
4104                         parse_error()
4105                         tok_cur_tag.public_identifier = ''
4106                         tok_state = tok_state_doctype_public_identifier_single_quoted
4107                         return
4108                 if c is '>'
4109                         parse_error()
4110                         tok_cur_tag.flag 'force-quirks', true
4111                         tok_state = tok_state_data
4112                         return tok_cur_tag
4113                 if c is '' # EOF
4114                         parse_error()
4115                         tok_state = tok_state_data
4116                         tok_cur_tag.flag 'force-quirks', true
4117                         cur -= 1 # Reconsume
4118                         return tok_cur_tag
4119                 # Anything else
4120                 parse_error()
4121                 tok_cur_tag.flag 'force-quirks', true
4122                 tok_state = tok_state_bogus_doctype
4123                 return null
4124
4125
4126         # 8.2.4.58 http://www.w3.org/TR/html5/syntax.html#doctype-public-identifier-(double-quoted)-state
4127         tok_state_doctype_public_identifier_double_quoted = ->
4128                 c = txt.charAt(cur++)
4129                 if c is '"'
4130                         tok_state = tok_state_after_doctype_public_identifier
4131                         return
4132                 if c is "\u0000"
4133                         parse_error()
4134                         tok_cur_tag.public_identifier += "\ufffd"
4135                         return
4136                 if c is '>'
4137                         parse_error()
4138                         tok_cur_tag.flag 'force-quirks', true
4139                         tok_state = tok_state_data
4140                         return tok_cur_tag
4141                 if c is '' # EOF
4142                         parse_error()
4143                         tok_state = tok_state_data
4144                         tok_cur_tag.flag 'force-quirks', true
4145                         cur -= 1 # Reconsume
4146                         return tok_cur_tag
4147                 # Anything else
4148                 tok_cur_tag.public_identifier += c
4149                 return null
4150
4151         # 8.2.4.59 http://www.w3.org/TR/html5/syntax.html#doctype-public-identifier-(single-quoted)-state
4152         tok_state_doctype_public_identifier_single_quoted = ->
4153                 c = txt.charAt(cur++)
4154                 if c is "'"
4155                         tok_state = tok_state_after_doctype_public_identifier
4156                         return
4157                 if c is "\u0000"
4158                         parse_error()
4159                         tok_cur_tag.public_identifier += "\ufffd"
4160                         return
4161                 if c is '>'
4162                         parse_error()
4163                         tok_cur_tag.flag 'force-quirks', true
4164                         tok_state = tok_state_data
4165                         return tok_cur_tag
4166                 if c is '' # EOF
4167                         parse_error()
4168                         tok_state = tok_state_data
4169                         tok_cur_tag.flag 'force-quirks', true
4170                         cur -= 1 # Reconsume
4171                         return tok_cur_tag
4172                 # Anything else
4173                 tok_cur_tag.public_identifier += c
4174                 return null
4175
4176         # 8.2.4.60 http://www.w3.org/TR/html5/syntax.html#after-doctype-public-identifier-state
4177         tok_state_after_doctype_public_identifier = ->
4178                 c = txt.charAt(cur++)
4179                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4180                         tok_state = tok_state_between_doctype_public_and_system_identifiers
4181                         return
4182                 if c is '>'
4183                         tok_state = tok_state_data
4184                         return tok_cur_tag
4185                 if c is '"'
4186                         parse_error()
4187                         tok_cur_tag.system_identifier = ''
4188                         tok_state = tok_state_doctype_system_identifier_double_quoted
4189                         return
4190                 if c is "'"
4191                         parse_error()
4192                         tok_cur_tag.system_identifier = ''
4193                         tok_state = tok_state_doctype_system_identifier_single_quoted
4194                         return
4195                 if c is '' # EOF
4196                         parse_error()
4197                         tok_state = tok_state_data
4198                         tok_cur_tag.flag 'force-quirks', true
4199                         cur -= 1 # Reconsume
4200                         return tok_cur_tag
4201                 # Anything else
4202                 parse_error()
4203                 tok_cur_tag.flag 'force-quirks', true
4204                 tok_state = tok_state_bogus_doctype
4205                 return null
4206
4207         # 8.2.4.61 http://www.w3.org/TR/html5/syntax.html#between-doctype-public-and-system-identifiers-state
4208         tok_state_between_doctype_public_and_system_identifiers = ->
4209                 c = txt.charAt(cur++)
4210                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4211                         return
4212                 if c is '>'
4213                         tok_state = tok_state_data
4214                         return tok_cur_tag
4215                 if c is '"'
4216                         parse_error()
4217                         tok_cur_tag.system_identifier = ''
4218                         tok_state = tok_state_doctype_system_identifier_double_quoted
4219                         return
4220                 if c is "'"
4221                         parse_error()
4222                         tok_cur_tag.system_identifier = ''
4223                         tok_state = tok_state_doctype_system_identifier_single_quoted
4224                         return
4225                 if c is '' # EOF
4226                         parse_error()
4227                         tok_state = tok_state_data
4228                         tok_cur_tag.flag 'force-quirks', true
4229                         cur -= 1 # Reconsume
4230                         return tok_cur_tag
4231                 # Anything else
4232                 parse_error()
4233                 tok_cur_tag.flag 'force-quirks', true
4234                 tok_state = tok_state_bogus_doctype
4235                 return null
4236
4237         # 8.2.4.62 http://www.w3.org/TR/html5/syntax.html#after-doctype-system-keyword-state
4238         tok_state_after_doctype_system_keyword = ->
4239                 c = txt.charAt(cur++)
4240                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4241                         tok_state = tok_state_before_doctype_system_identifier
4242                         return
4243                 if c is '"'
4244                         parse_error()
4245                         tok_cur_tag.system_identifier = ''
4246                         tok_state = tok_state_doctype_system_identifier_double_quoted
4247                         return
4248                 if c is "'"
4249                         parse_error()
4250                         tok_cur_tag.system_identifier = ''
4251                         tok_state = tok_state_doctype_system_identifier_single_quoted
4252                         return
4253                 if c is '>'
4254                         parse_error()
4255                         tok_cur_tag.flag 'force-quirks', true
4256                         tok_state = tok_state_data
4257                         return tok_cur_tag
4258                 if c is '' # EOF
4259                         parse_error()
4260                         tok_state = tok_state_data
4261                         tok_cur_tag.flag 'force-quirks', true
4262                         cur -= 1 # Reconsume
4263                         return tok_cur_tag
4264                 # Anything else
4265                 parse_error()
4266                 tok_cur_tag.flag 'force-quirks', true
4267                 tok_state = tok_state_bogus_doctype
4268                 return null
4269
4270         # 8.2.4.63 http://www.w3.org/TR/html5/syntax.html#before-doctype-system-identifier-state
4271         tok_state_before_doctype_system_identifier = ->
4272                 c = txt.charAt(cur++)
4273                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4274                         return
4275                 if c is '"'
4276                         tok_cur_tag.system_identifier = ''
4277                         tok_state = tok_state_doctype_system_identifier_double_quoted
4278                         return
4279                 if c is "'"
4280                         tok_cur_tag.system_identifier = ''
4281                         tok_state = tok_state_doctype_system_identifier_single_quoted
4282                         return
4283                 if c is '>'
4284                         parse_error()
4285                         tok_cur_tag.flag 'force-quirks', true
4286                         tok_state = tok_state_data
4287                         return tok_cur_tag
4288                 if c is '' # EOF
4289                         parse_error()
4290                         tok_state = tok_state_data
4291                         tok_cur_tag.flag 'force-quirks', true
4292                         cur -= 1 # Reconsume
4293                         return tok_cur_tag
4294                 # Anything else
4295                 parse_error()
4296                 tok_cur_tag.flag 'force-quirks', true
4297                 tok_state = tok_state_bogus_doctype
4298                 return null
4299
4300         # 8.2.4.64 http://www.w3.org/TR/html5/syntax.html#doctype-system-identifier-(double-quoted)-state
4301         tok_state_doctype_system_identifier_double_quoted = ->
4302                 c = txt.charAt(cur++)
4303                 if c is '"'
4304                         tok_state = tok_state_after_doctype_system_identifier
4305                         return
4306                 if c is "\u0000"
4307                         parse_error()
4308                         tok_cur_tag.system_identifier += "\ufffd"
4309                         return
4310                 if c is '>'
4311                         parse_error()
4312                         tok_cur_tag.flag 'force-quirks', true
4313                         tok_state = tok_state_data
4314                         return tok_cur_tag
4315                 if c is '' # EOF
4316                         parse_error()
4317                         tok_state = tok_state_data
4318                         tok_cur_tag.flag 'force-quirks', true
4319                         cur -= 1 # Reconsume
4320                         return tok_cur_tag
4321                 # Anything else
4322                 tok_cur_tag.system_identifier += c
4323                 return null
4324
4325         # 8.2.4.65 http://www.w3.org/TR/html5/syntax.html#doctype-system-identifier-(single-quoted)-state
4326         tok_state_doctype_system_identifier_single_quoted = ->
4327                 c = txt.charAt(cur++)
4328                 if c is "'"
4329                         tok_state = tok_state_after_doctype_system_identifier
4330                         return
4331                 if c is "\u0000"
4332                         parse_error()
4333                         tok_cur_tag.system_identifier += "\ufffd"
4334                         return
4335                 if c is '>'
4336                         parse_error()
4337                         tok_cur_tag.flag 'force-quirks', true
4338                         tok_state = tok_state_data
4339                         return tok_cur_tag
4340                 if c is '' # EOF
4341                         parse_error()
4342                         tok_state = tok_state_data
4343                         tok_cur_tag.flag 'force-quirks', true
4344                         cur -= 1 # Reconsume
4345                         return tok_cur_tag
4346                 # Anything else
4347                 tok_cur_tag.system_identifier += c
4348                 return null
4349
4350         # 8.2.4.66 http://www.w3.org/TR/html5/syntax.html#after-doctype-system-identifier-state
4351         tok_state_after_doctype_system_identifier = ->
4352                 c = txt.charAt(cur++)
4353                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4354                         return
4355                 if c is '>'
4356                         tok_state = tok_state_data
4357                         return tok_cur_tag
4358                 if c is '' # EOF
4359                         parse_error()
4360                         tok_state = tok_state_data
4361                         tok_cur_tag.flag 'force-quirks', true
4362                         cur -= 1 # Reconsume
4363                         return tok_cur_tag
4364                 # Anything else
4365                 parse_error()
4366                 # do _not_ tok_cur_tag.flag 'force-quirks', true
4367                 tok_state = tok_state_bogus_doctype
4368                 return null
4369
4370         # 8.2.4.67 http://www.w3.org/TR/html5/syntax.html#bogus-doctype-state
4371         tok_state_bogus_doctype = ->
4372                 c = txt.charAt(cur++)
4373                 if c is '>'
4374                         tok_state = tok_state_data
4375                         return tok_cur_tag
4376                 if c is '' # EOF
4377                         tok_state = tok_state_data
4378                         cur -= 1 # Reconsume
4379                         return tok_cur_tag
4380                 # Anything else
4381                 return null
4382
4383         # 8.2.4.68 http://www.w3.org/TR/html5/syntax.html#cdata-section-state
4384         tok_state_cdata_section = ->
4385                 tok_state = tok_state_data
4386                 next_gt = txt.indexOf ']]>', cur
4387                 if next_gt is -1
4388                         val = txt.substr cur
4389                         cur = txt.length
4390                 else
4391                         val = txt.substr cur, (next_gt - cur)
4392                         cur = next_gt + 3
4393                 return new_character_token val # fixfull split
4394
4395         # 8.2.4.69 http://www.w3.org/TR/html5/syntax.html#consume-a-character-reference
4396         # Don't set this as a state, just call it
4397         # returns a string (NOT a text node)
4398         parse_character_reference = (allowed_char = null, in_attr = false) ->
4399                 if cur >= txt.length
4400                         return '&'
4401                 switch c = txt.charAt(cur)
4402                         when "\t", "\n", "\u000c", ' ', '<', '&', '', allowed_char
4403                                 # explicitly not a parse error
4404                                 return '&'
4405                         when ';'
4406                                 # there has to be "one or more" alnums between & and ; to be a parse error
4407                                 return '&'
4408                         when '#'
4409                                 if cur + 1 >= txt.length
4410                                         return '&'
4411                                 if txt.charAt(cur + 1).toLowerCase() is 'x'
4412                                         base = 16
4413                                         charset = hex_chars
4414                                         start = cur + 2
4415                                 else
4416                                         charset = digits
4417                                         start = cur + 1
4418                                         base = 10
4419                                 i = 0
4420                                 while start + i < txt.length and charset.indexOf(txt.charAt(start + i)) > -1
4421                                         i += 1
4422                                 if i is 0
4423                                         return '&'
4424                                 cur = start + i
4425                                 if txt.charAt(start + i) is ';'
4426                                         cur += 1
4427                                 else
4428                                         parse_error()
4429                                 code_point = txt.substr(start, i)
4430                                 while code_point.charAt(0) is '0' and code_point.length > 1
4431                                         code_point = code_point.substr 1
4432                                 code_point = parseInt(code_point, base)
4433                                 if unicode_fixes[code_point]?
4434                                         parse_error()
4435                                         return unicode_fixes[code_point]
4436                                 else
4437                                         if (code_point >= 0xd800 and code_point <= 0xdfff) or code_point > 0x10ffff
4438                                                 parse_error()
4439                                                 return "\ufffd"
4440                                         else
4441                                                 if (code_point >= 0x0001 and code_point <= 0x0008) or (code_point >= 0x000D and code_point <= 0x001F) or (code_point >= 0x007F and code_point <= 0x009F) or (code_point >= 0xFDD0 and code_point <= 0xFDEF) or code_point is 0x000B or code_point is 0xFFFE or code_point is 0xFFFF or code_point is 0x1FFFE or code_point is 0x1FFFF or code_point is 0x2FFFE or code_point is 0x2FFFF or code_point is 0x3FFFE or code_point is 0x3FFFF or code_point is 0x4FFFE or code_point is 0x4FFFF or code_point is 0x5FFFE or code_point is 0x5FFFF or code_point is 0x6FFFE or code_point is 0x6FFFF or code_point is 0x7FFFE or code_point is 0x7FFFF or code_point is 0x8FFFE or code_point is 0x8FFFF or code_point is 0x9FFFE or code_point is 0x9FFFF or code_point is 0xAFFFE or code_point is 0xAFFFF or code_point is 0xBFFFE or code_point is 0xBFFFF or code_point is 0xCFFFE or code_point is 0xCFFFF or code_point is 0xDFFFE or code_point is 0xDFFFF or code_point is 0xEFFFE or code_point is 0xEFFFF or code_point is 0xFFFFE or code_point is 0xFFFFF or code_point is 0x10FFFE or code_point is 0x10FFFF
4442                                                         parse_error()
4443                                                 return from_code_point code_point
4444                                 return
4445                         else
4446                                 for i in [0...31]
4447                                         if alnum.indexOf(txt.charAt(cur + i)) is -1
4448                                                 break
4449                                 if i is 0
4450                                         # exit early, because parse_error() below needs at least one alnum
4451                                         return '&'
4452                                 if txt.charAt(cur + i) is ';'
4453                                         i += 1 # include ';' terminator in value
4454                                         decoded = decode_named_char_ref txt.substr(cur, i)
4455                                         if decoded?
4456                                                 cur += i
4457                                                 return decoded
4458                                         parse_error()
4459                                         return '&'
4460                                 else
4461                                         # no ';' terminator (only legacy char refs)
4462                                         max = i
4463                                         for i in [2..max] # no prefix matches, so ok to check shortest first
4464                                                 c = legacy_char_refs[txt.substr(cur, i)]
4465                                                 if c?
4466                                                         if in_attr
4467                                                                 if txt.charAt(cur + i) is '='
4468                                                                         # "because some legacy user agents will
4469                                                                         # misinterpret the markup in those cases"
4470                                                                         parse_error()
4471                                                                         return '&'
4472                                                                 if alnum.indexOf(txt.charAt(cur + i)) > -1
4473                                                                         # this makes attributes forgiving about url args
4474                                                                         return '&'
4475                                                         # ok, and besides the weird exceptions for attributes...
4476                                                         # return the matching char
4477                                                         cur += i # consume entity chars
4478                                                         parse_error() # because no terminating ";"
4479                                                         return c
4480                                         parse_error()
4481                                         return '&'
4482                 return # never reached
4483
4484         # tree constructor initialization
4485         # see comments on TYPE_TAG/etc for the structure of this data
4486         txt = args.html
4487         cur = 0
4488         doc = new Node TYPE_TAG, name: 'html', namespace: NS_HTML
4489         open_els = []
4490         afe = [] # active formatting elements
4491         template_ins_modes = []
4492         ins_mode = ins_mode_initial
4493         original_ins_mode = ins_mode # TODO check spec
4494         flag_scripting = args.scripting ? true # TODO might need an extra flag to get <noscript> to parse correctly
4495         flag_frameset_ok = true
4496         flag_parsing = true
4497         flag_foster_parenting = false
4498         form_element_pointer = null
4499         temporary_buffer = null
4500         pending_table_character_tokens = []
4501         head_element_pointer = null
4502         flag_fragment_parsing = false # parser originally created as part of the html fragment parsing algorithm (fragment case)
4503         context_element = null # FIXME initialize from args.fragment http://www.w3.org/TR/html5/syntax.html#parsing-html-fragments
4504
4505         # tokenizer initialization
4506         tok_state = tok_state_data
4507
4508         # text pre-processing
4509         # FIXME http://www.w3.org/TR/html5/syntax.html#preprocessing-the-input-stream
4510         txt = txt.replace(new RegExp("\u0000", 'g'), "\ufffd") # fixfull spec doesn't say this
4511         txt = txt.replace(new RegExp("\r\n", 'g'), "\n") # fixfull spec doesn't say this
4512         txt = txt.replace(new RegExp("\r", 'g'), "\n") # fixfull spec doesn't say this
4513
4514         if args.name is "plain-text-unsafe.dat #4"
4515                 console.log "hi"
4516         # proccess input
4517         # http://www.w3.org/TR/html5/syntax.html#tree-construction
4518         while flag_parsing
4519                 t = tok_state()
4520                 if t?
4521                         process_token t
4522                         # fixfull parse error if has self-closing flag, but it wasn't acknolwedged
4523         return doc.children
4524
4525 serialize_els = (els, shallow, show_ids) ->
4526         serialized = ''
4527         sep = ''
4528         for t in els
4529                 serialized += sep
4530                 sep = ','
4531                 serialized += t.serialize shallow, show_ids
4532         return serialized
4533
4534 module.exports.parse_html = parse_html
4535 module.exports.debug_log_reset = debug_log_reset
4536 module.exports.debug_log_each = debug_log_each
4537 module.exports.TYPE_TAG = TYPE_TAG
4538 module.exports.TYPE_TEXT = TYPE_TEXT
4539 module.exports.TYPE_COMMENT = TYPE_COMMENT
4540 module.exports.TYPE_DOCTYPE = TYPE_DOCTYPE
4541 module.exports.NS_HTML = NS_HTML
4542 module.exports.NS_MATHML = NS_MATHML
4543 module.exports.NS_SVG = NS_SVG