JasonWoof Got questions, comments, patches, etc.? Contact Jason Woofenden
0e79a1d5499b24c8b79834148cdcb9bfa74d4821
[peach-html5-editor.git] / parse-html.coffee
1 # HTML parser meant to run in a browser, in support of WYSIWYG editor
2 # Copyright 2015 Jason Woofenden
3 #
4 # This program is free software: you can redistribute it and/or modify it under
5 # the terms of the GNU Affero General Public License as published by the Free
6 # Software Foundation, either version 3 of the License, or (at your option) any
7 # later version.
8 #
9 # This program is distributed in the hope that it will be useful, but WITHOUT
10 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11 # FOR A PARTICULAR PURPOSE.  See the GNU Affero General Public License for more
12 # details.
13 #
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
16
17
18 # This file implements a parser for html snippets, meant to be used by a
19 # WYSIWYG editor.
20
21 # The implementation is a pretty direct implementation of the parsing algorithm
22 # described here:
23 # http://www.w3.org/TR/html5/syntax.html#preprocessing-the-input-stream
24 #
25 # Deviations from that spec:
26 #
27 #   Purposeful: search this file for "WTAG"
28 #
29 #   Not finished yet: search this file for "fixfull", "TODO" and "FIXME"
30
31
32 # stacks/lists
33 #
34 # the spec uses a many different words do indicate which ends of lists/stacks
35 # they are talking about (and relative movement within the lists/stacks). This
36 # section splains. I'm implementing "lists" (afe and open_els) the same way
37 # (both as stacks)
38 #
39 # stacks grow downward (current element is index=0)
40 #
41 # example: open_els = [a, b, c, d, e, f, g]
42 #
43 # "grows downwards" means it's visualized like this: (index: el, names)
44 #
45 #   6: g "start of the list", "topmost", "first"
46 #   5: f
47 #   4: e "previous" (to d), "above", "before"
48 #   3: d   (previous/next are relative to this element)
49 #   2: c "next", "after", "lower", "below"
50 #   1: b
51 #   0: a "end of the list", "current node", "bottommost", "last"
52
53
54 # browser
55 # note: to get this to run outside a browser, you'll have to write a native
56 # implementation of decode_named_char_ref()
57 unless module?.exports?
58         window.wheic = {}
59         module = exports: window.wheic
60
61 from_code_point = (x) ->
62         if String.fromCodePoint?
63                 return String.fromCodePoint x
64         else
65                 if x <= 0xffff
66                         return String.fromCharCode x
67                 x -= 0x10000
68                 return String.fromCharCode((x >> 10) + 0xd800, (x % 0x400) + 0xdc00)
69
70 # Each node is an obect of the Node class. Here are the Node types:
71 TYPE_TAG = 0 # name, {attributes}, [children]
72 TYPE_TEXT = 1 # "text"
73 TYPE_COMMENT = 2
74 TYPE_DOCTYPE = 3
75 # the following types are emited by the tokenizer, but shouldn't end up in the tree:
76 TYPE_START_TAG = 4 # name, [attributes ([key,value]...) in reverse order], [children]
77 TYPE_END_TAG = 5 # name
78 TYPE_EOF = 6
79 TYPE_AFE_MARKER = 7 # http://www.w3.org/TR/html5/syntax.html#reconstruct-the-active-formatting-elements
80 TYPE_AAA_BOOKMARK = 8 # http://www.w3.org/TR/html5/syntax.html#adoption-agency-algorithm
81
82 # namespace constants
83 NS_HTML = 1
84 NS_MATHML = 2
85 NS_SVG = 3
86
87 g_debug_log = []
88 debug_log_reset = ->
89         g_debug_log = []
90 debug_log = (str) ->
91         g_debug_log.push str
92 debug_log_each = (cb) ->
93         for str in g_debug_log
94                 cb str
95
96 prev_node_id = 0
97 class Node
98         constructor: (type, args = {}) ->
99                 @type = type # one of the TYPE_* constants above
100                 @name = args.name ? '' # tag name
101                 @text = args.text ? '' # contents for text/comment nodes
102                 @attrs = args.attrs ? {}
103                 @attrs_a = args.attr_k ? [] # attrs in progress, TYPE_START_TAG only
104                 @children = args.children ? []
105                 @namespace = args.namespace ? NS_HTML
106                 @parent = args.parent ? null
107                 @token = args.token ? null
108                 @flags = args.flags ? {}
109                 if args.id?
110                         @id = "#{args.id}+"
111                 else
112                         @id = "#{++prev_node_id}"
113         acknowledge_self_closing: ->
114                 if @token?
115                         @token.flag 'did_self_close'
116                 else
117                         @flag 'did_self_close', true
118         flag: (key, value = null) ->
119                 if value?
120                         @flags[key] = value
121                 else
122                         return @flags[key]
123         serialize: (shallow = false, show_ids = false) -> # for unit tests
124                 ret = ''
125                 switch @type
126                         when TYPE_TAG
127                                 ret += 'tag:'
128                                 ret += JSON.stringify @name
129                                 ret += ','
130                                 if show_ids
131                                         ret += "##{@id},"
132                                 if shallow
133                                         break
134                                 attr_keys = []
135                                 for k of @attrs
136                                         attr_keys.push k
137                                 attr_keys.sort()
138                                 ret += '{'
139                                 sep = ''
140                                 for k in attr_keys
141                                         ret += sep
142                                         sep = ','
143                                         ret += "#{JSON.stringify k}:#{JSON.stringify @attrs[k]}"
144                                 ret += '},['
145                                 sep = ''
146                                 for c in @children
147                                         ret += sep
148                                         sep = ','
149                                         ret += c.serialize shallow, show_ids
150                                 ret += ']'
151                         when TYPE_TEXT
152                                 ret += 'text:'
153                                 ret += JSON.stringify @text
154                         when TYPE_COMMENT
155                                 ret += 'comment:'
156                                 ret += JSON.stringify @text
157                         when TYPE_DOCTYPE
158                                 ret += "doctype:#{@name},#{JSON.stringify(@public_identifier ? '')},#{JSON.stringify(@system_identifier ? '')}"
159                         when TYPE_AFE_MARKER
160                                 ret += 'marker'
161                         when TYPE_AAA_BOOKMARK
162                                 ret += 'aaa_bookmark'
163                         else
164                                 ret += 'unknown:'
165                                 console.log "unknown: #{JSON.stringify @}" # backtrace is just as well
166                 return ret
167
168 # helpers: (only take args that are normally known when parser creates nodes)
169 new_open_tag = (name) ->
170         return new Node TYPE_START_TAG, name: name
171 new_end_tag = (name) ->
172         return new Node TYPE_END_TAG, name: name
173 new_element = (name) ->
174         return new Node TYPE_TAG, name: name
175 new_text_node = (txt) ->
176         return new Node TYPE_TEXT, text: txt
177 new_character_token = new_text_node
178 new_comment_token = (txt) ->
179         return new Node TYPE_COMMENT, text: txt
180 new_doctype_token = (name) ->
181         return new Node TYPE_DOCTYPE, name: name
182 new_eof_token = ->
183         return new Node TYPE_EOF
184 new_afe_marker = ->
185         return new Node TYPE_AFE_MARKER
186 new_aaa_bookmark = ->
187         return new Node TYPE_AAA_BOOKMARK
188
189 lc_alpha = "abcdefghijklmnopqrstuvwxyz"
190 uc_alpha = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
191 digits = "0123456789"
192 alnum = lc_alpha + uc_alpha + digits
193 hex_chars = digits + "abcdefABCDEF"
194
195 is_uc_alpha = (str) ->
196         return str.length is 1 and uc_alpha.indexOf(str) > -1
197 is_lc_alpha = (str) ->
198         return str.length is 1 and lc_alpha.indexOf(str) > -1
199
200 # some SVG elements have dashes in them
201 tag_name_chars = alnum + "-"
202
203 # http://www.w3.org/TR/html5/infrastructure.html#space-character
204 space_chars = "\u0009\u000a\u000c\u000d\u0020"
205 is_space = (txt) ->
206         return txt.length is 1 and space_chars.indexOf(txt) > -1
207 is_space_tok = (t) ->
208         return t.type is TYPE_TEXT && t.text.length is 1 and space_chars.indexOf(t.text) > -1
209
210 is_input_hidden_tok = (t) ->
211         return false unless t.type is TYPE_START_TAG
212         for a in t.attrs_a
213                 if a[0] is 'type'
214                         if a[1].toLowerCase() is 'hidden'
215                                 return true
216                         return false
217         return false
218
219 # https://en.wikipedia.org/wiki/Whitespace_character#Unicode
220 whitespace_chars = "\u0009\u000a\u000b\u000c\u000d\u0020\u0085\u00a0\u1680\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200a\u2028\u2029\u202f\u205f\u3000"
221
222 unicode_fixes = {}
223 unicode_fixes[0x00] = "\uFFFD"
224 unicode_fixes[0x80] = "\u20AC"
225 unicode_fixes[0x82] = "\u201A"
226 unicode_fixes[0x83] = "\u0192"
227 unicode_fixes[0x84] = "\u201E"
228 unicode_fixes[0x85] = "\u2026"
229 unicode_fixes[0x86] = "\u2020"
230 unicode_fixes[0x87] = "\u2021"
231 unicode_fixes[0x88] = "\u02C6"
232 unicode_fixes[0x89] = "\u2030"
233 unicode_fixes[0x8A] = "\u0160"
234 unicode_fixes[0x8B] = "\u2039"
235 unicode_fixes[0x8C] = "\u0152"
236 unicode_fixes[0x8E] = "\u017D"
237 unicode_fixes[0x91] = "\u2018"
238 unicode_fixes[0x92] = "\u2019"
239 unicode_fixes[0x93] = "\u201C"
240 unicode_fixes[0x94] = "\u201D"
241 unicode_fixes[0x95] = "\u2022"
242 unicode_fixes[0x96] = "\u2013"
243 unicode_fixes[0x97] = "\u2014"
244 unicode_fixes[0x98] = "\u02DC"
245 unicode_fixes[0x99] = "\u2122"
246 unicode_fixes[0x9A] = "\u0161"
247 unicode_fixes[0x9B] = "\u203A"
248 unicode_fixes[0x9C] = "\u0153"
249 unicode_fixes[0x9E] = "\u017E"
250 unicode_fixes[0x9F] = "\u0178"
251
252 # These are the character references that don't need a terminating semicolon
253 # min length: 2, max: 6, none are a prefix of any other.
254 legacy_char_refs = {
255         Aacute: 'Á', aacute: 'á', Acirc: 'Â', acirc: 'â', acute: '´', AElig: 'Æ',
256         aelig: 'æ', Agrave: 'À', agrave: 'à', AMP: '&', amp: '&', Aring: 'Å',
257         aring: 'å', Atilde: 'Ã', atilde: 'ã', Auml: 'Ä', auml: 'ä', brvbar: '¦',
258         Ccedil: 'Ç', ccedil: 'ç', cedil: '¸', cent: '¢', COPY: '©', copy: '©',
259         curren: '¤', deg: '°', divide: '÷', Eacute: 'É', eacute: 'é', Ecirc: 'Ê',
260         ecirc: 'ê', Egrave: 'È', egrave: 'è', ETH: 'Ð', eth: 'ð', Euml: 'Ë',
261         euml: 'ë', frac12: '½', frac14: '¼', frac34: '¾', GT: '>', gt: '>',
262         Iacute: 'Í', iacute: 'í', Icirc: 'Î', icirc: 'î', iexcl: '¡', Igrave: 'Ì',
263         igrave: 'ì', iquest: '¿', Iuml: 'Ï', iuml: 'ï', laquo: '«', LT: '<',
264         lt: '<', macr: '¯', micro: 'µ', middot: '·', nbsp: "\u00a0", not: '¬',
265         Ntilde: 'Ñ', ntilde: 'ñ', Oacute: 'Ó', oacute: 'ó', Ocirc: 'Ô', ocirc: 'ô',
266         Ograve: 'Ò', ograve: 'ò', ordf: 'ª', ordm: 'º', Oslash: 'Ø', oslash: 'ø',
267         Otilde: 'Õ', otilde: 'õ', Ouml: 'Ö', ouml: 'ö', para: '¶', plusmn: '±',
268         pound: '£', QUOT: '"', quot: '"', raquo: '»', REG: '®', reg: '®', sect: '§',
269         shy: '­', sup1: '¹', sup2: '²', sup3: '³', szlig: 'ß', THORN: 'Þ', thorn: 'þ',
270         times: '×', Uacute: 'Ú', uacute: 'ú', Ucirc: 'Û', ucirc: 'û', Ugrave: 'Ù',
271         ugrave: 'ù', uml: '¨', Uuml: 'Ü', uuml: 'ü', Yacute: 'Ý', yacute: 'ý',
272         yen: '¥', yuml: 'ÿ'
273 }
274
275 void_elements = ['area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input', 'keygen', 'link', 'meta', 'param', 'source', 'track', 'wbr']
276 raw_text_elements = ['script', 'style']
277 escapable_raw_text_elements = ['textarea', 'title']
278 # http://www.w3.org/TR/SVG/ 1.1 (Second Edition)
279 svg_elements = [
280         'a', 'altGlyph', 'altGlyphDef', 'altGlyphItem', 'animate', 'animateColor',
281         'animateMotion', 'animateTransform', 'circle', 'clipPath', 'color-profile',
282         'cursor', 'defs', 'desc', 'ellipse', 'feBlend', 'feColorMatrix',
283         'feComponentTransfer', 'feComposite', 'feConvolveMatrix',
284         'feDiffuseLighting', 'feDisplacementMap', 'feDistantLight', 'feFlood',
285         'feFuncA', 'feFuncB', 'feFuncG', 'feFuncR', 'feGaussianBlur', 'feImage',
286         'feMerge', 'feMergeNode', 'feMorphology', 'feOffset', 'fePointLight',
287         'feSpecularLighting', 'feSpotLight', 'feTile', 'feTurbulence', 'filter',
288         'font', 'font-face', 'font-face-format', 'font-face-name', 'font-face-src',
289         'font-face-uri', 'foreignObject', 'g', 'glyph', 'glyphRef', 'hkern',
290         'image', 'line', 'linearGradient', 'marker', 'mask', 'metadata',
291         'missing-glyph', 'mpath', 'path', 'pattern', 'polygon', 'polyline',
292         'radialGradient', 'rect', 'script', 'set', 'stop', 'style', 'svg',
293         'switch', 'symbol', 'text', 'textPath', 'title', 'tref', 'tspan', 'use',
294         'view', 'vkern'
295 ]
296
297 # http://www.w3.org/TR/MathML/ Version 3.0 2nd Edition
298 mathml_elements = [
299         'abs', 'and', 'annotation', 'annotation-xml', 'apply', 'approx', 'arccos',
300         'arccosh', 'arccot', 'arccoth', 'arccsc', 'arccsch', 'arcsec', 'arcsech',
301         'arcsin', 'arcsinh', 'arctan', 'arctanh', 'arg', 'bind', 'bvar', 'card',
302         'cartesianproduct', 'cbytes', 'ceiling', 'cerror', 'ci', 'cn', 'codomain',
303         'complexes', 'compose', 'condition', 'conjugate', 'cos', 'cosh', 'cot',
304         'coth', 'cs', 'csc', 'csch', 'csymbol', 'curl', 'declare', 'degree',
305         'determinant', 'diff', 'divergence', 'divide', 'domain',
306         'domainofapplication', 'emptyset', 'eq', 'equivalent', 'eulergamma',
307         'exists', 'exp', 'exponentiale', 'factorial', 'factorof', 'false', 'floor',
308         'fn', 'forall', 'gcd', 'geq', 'grad', 'gt', 'ident', 'image', 'imaginary',
309         'imaginaryi', 'implies', 'in', 'infinity', 'int', 'integers', 'intersect',
310         'interval', 'inverse', 'lambda', 'laplacian', 'lcm', 'leq', 'limit',
311         'list', 'ln', 'log', 'logbase', 'lowlimit', 'lt', 'maction', 'maligngroup',
312         'malignmark', 'math', 'matrix', 'matrixrow', 'max', 'mean', 'median',
313         'menclose', 'merror', 'mfenced', 'mfrac', 'mglyph', 'mi', 'mi', 'min',
314         'minus', 'mlabeledtr', 'mlongdiv', 'mmultiscripts', 'mn', 'mo', 'mode',
315         'moment', 'momentabout', 'mover', 'mpadded', 'mphantom', 'mprescripts',
316         'mroot', 'mrow', 'ms', 'mscarries', 'mscarry', 'msgroup', 'msline',
317         'mspace', 'msqrt', 'msrow', 'mstack', 'mstyle', 'msub', 'msubsup', 'msup',
318         'mtable', 'mtd', 'mtext', 'mtr', 'munder', 'munderover', 'naturalnumbers',
319         'neq', 'none', 'not', 'notanumber', 'notin', 'notprsubset', 'notsubset',
320         'or', 'otherwise', 'outerproduct', 'partialdiff', 'pi', 'piece',
321         'piecewise', 'plus', 'power', 'primes', 'product', 'prsubset', 'quotient',
322         'rationals', 'real', 'reals', 'reln', 'rem', 'root', 'scalarproduct',
323         'sdev', 'sec', 'sech', 'selector', 'semantics', 'sep', 'set', 'setdiff',
324         'share', 'sin', 'sinh', 'span', 'subset', 'sum', 'tan', 'tanh', 'tendsto',
325         'times', 'transpose', 'true', 'union', 'uplimit', 'variance', 'vector',
326         'vectorproduct', 'xor'
327 ]
328 # foreign_elements = [svg_elements..., mathml_elements...]
329 #normal_elements = All other allowed HTML elements are normal elements.
330
331 special_elements = {
332         # HTML:
333         address:NS_HTML, applet:NS_HTML, area:NS_HTML, article:NS_HTML,
334         aside:NS_HTML, base:NS_HTML, basefont:NS_HTML, bgsound:NS_HTML,
335         blockquote:NS_HTML, body:NS_HTML, br:NS_HTML, button:NS_HTML,
336         caption:NS_HTML, center:NS_HTML, col:NS_HTML, colgroup:NS_HTML, dd:NS_HTML,
337         details:NS_HTML, dir:NS_HTML, div:NS_HTML, dl:NS_HTML, dt:NS_HTML,
338         embed:NS_HTML, fieldset:NS_HTML, figcaption:NS_HTML, figure:NS_HTML,
339         footer:NS_HTML, form:NS_HTML, frame:NS_HTML, frameset:NS_HTML, h1:NS_HTML,
340         h2:NS_HTML, h3:NS_HTML, h4:NS_HTML, h5:NS_HTML, h6:NS_HTML, head:NS_HTML,
341         header:NS_HTML, hgroup:NS_HTML, hr:NS_HTML, html:NS_HTML, iframe:NS_HTML,
342         img:NS_HTML, input:NS_HTML, isindex:NS_HTML, li:NS_HTML, link:NS_HTML,
343         listing:NS_HTML, main:NS_HTML, marquee:NS_HTML,
344
345         menu:NS_HTML,menuitem:NS_HTML, # WATWG adds these
346
347         meta:NS_HTML, nav:NS_HTML, noembed:NS_HTML, noframes:NS_HTML,
348         noscript:NS_HTML, object:NS_HTML, ol:NS_HTML, p:NS_HTML, param:NS_HTML,
349         plaintext:NS_HTML, pre:NS_HTML, script:NS_HTML, section:NS_HTML,
350         select:NS_HTML, source:NS_HTML, style:NS_HTML, summary:NS_HTML,
351         table:NS_HTML, tbody:NS_HTML, td:NS_HTML, template:NS_HTML,
352         textarea:NS_HTML, tfoot:NS_HTML, th:NS_HTML, thead:NS_HTML, title:NS_HTML,
353         tr:NS_HTML, track:NS_HTML, ul:NS_HTML, wbr:NS_HTML, xmp:NS_HTML,
354
355         # MathML:
356         mi:NS_MATHML, mo:NS_MATHML, mn:NS_MATHML, ms:NS_MATHML, mtext:NS_MATHML,
357         'annotation-xml':NS_MATHML,
358
359         # SVG:
360         foreignObject:NS_SVG, desc:NS_SVG, title:NS_SVG
361 }
362
363 formatting_elements = {
364          a: true, b: true, big: true, code: true, em: true, font: true, i: true,
365          nobr: true, s: true, small: true, strike: true, strong: true, tt: true,
366          u: true
367 }
368
369 mathml_text_integration = {
370         mi: NS_MATHML, mo: NS_MATHML, mn: NS_MATHML, ms: NS_MATHML, mtext: NS_MATHML
371 }
372 is_mathml_text_integration_point = (el) ->
373         return mathml_text_integration[el.name] is el.namespace
374 is_html_integration = (el) -> # DON'T PASS A TOKEN
375         if el.namespace is NS_MATHML
376                 if el.name is 'annotation-xml'
377                         if el.attrs.encoding?
378                                 if el.attrs.encoding.toLowerCase() is 'text/html'
379                                         return true
380                                 if el.attrs.encoding.toLowerCase() is 'application/xhtml+xml'
381                                         return true
382                 return false
383         if el.namespace is NS_SVG
384                 if el.name is 'foreignObject' or el.name is 'desc' or el.name is 'title'
385                         return true
386         return false
387
388 h_tags = {
389         h1:NS_HTML, h2:NS_HTML, h3:NS_HTML, h4:NS_HTML, h5:NS_HTML, h6:NS_HTML
390 }
391
392 foster_parenting_targets = {
393         table: NS_HTML
394         tbody: NS_HTML
395         tfoot: NS_HTML
396         thead: NS_HTML
397         tr: NS_HTML
398 }
399
400 end_tag_implied = {
401         dd: NS_HTML
402         dt: NS_HTML
403         li: NS_HTML
404         option: NS_HTML
405         optgroup: NS_HTML
406         p: NS_HTML
407         rb: NS_HTML
408         rp: NS_HTML
409         rt: NS_HTML
410         rtc: NS_HTML
411 }
412
413 el_is_special = (e) ->
414         return special_elements[e.name] is e.namespace
415
416 adp_els = { address: NS_HTML, div: NS_HTML, p: NS_HTML }
417 el_is_special_not_adp = (el) ->
418         return special_elements[el.name] is el.namespace and adp_els[el.name] isnt el.namespace
419
420 svg_name_fixes = {
421         altglyph: 'altGlyph'
422         altglyphdef: 'altGlyphDef'
423         altglyphitem: 'altGlyphItem'
424         animatecolor: 'animateColor'
425         animatemotion: 'animateMotion'
426         animatetransform: 'animateTransform'
427         clippath: 'clipPath'
428         feblend: 'feBlend'
429         fecolormatrix: 'feColorMatrix'
430         fecomponenttransfer: 'feComponentTransfer'
431         fecomposite: 'feComposite'
432         feconvolvematrix: 'feConvolveMatrix'
433         fediffuselighting: 'feDiffuseLighting'
434         fedisplacementmap: 'feDisplacementMap'
435         fedistantlight: 'feDistantLight'
436         fedropshadow: 'feDropShadow'
437         feflood: 'feFlood'
438         fefunca: 'feFuncA'
439         fefuncb: 'feFuncB'
440         fefuncg: 'feFuncG'
441         fefuncr: 'feFuncR'
442         fegaussianblur: 'feGaussianBlur'
443         feimage: 'feImage'
444         femerge: 'feMerge'
445         femergenode: 'feMergeNode'
446         femorphology: 'feMorphology'
447         feoffset: 'feOffset'
448         fepointlight: 'fePointLight'
449         fespecularlighting: 'feSpecularLighting'
450         fespotlight: 'feSpotLight'
451         fetile: 'feTile'
452         feturbulence: 'feTurbulence'
453         foreignobject: 'foreignObject'
454         glyphref: 'glyphRef'
455         lineargradient: 'linearGradient'
456         radialgradient: 'radialGradient'
457         textpath: 'textPath'
458 }
459 svg_attribute_fixes = {
460         attributename: 'attributeName'
461         attributetype: 'attributeType'
462         basefrequency: 'baseFrequency'
463         baseprofile: 'baseProfile'
464         calcmode: 'calcMode'
465         clippathunits: 'clipPathUnits'
466         contentscripttype: 'contentScriptType'
467         contentstyletype: 'contentStyleType'
468         diffuseconstant: 'diffuseConstant'
469         edgemode: 'edgeMode'
470         externalresourcesrequired: 'externalResourcesRequired'
471         filterres: 'filterRes'
472         filterunits: 'filterUnits'
473         glyphref: 'glyphRef'
474         gradienttransform: 'gradientTransform'
475         gradientunits: 'gradientUnits'
476         kernelmatrix: 'kernelMatrix'
477         kernelunitlength: 'kernelUnitLength'
478         keypoints: 'keyPoints'
479         keysplines: 'keySplines'
480         keytimes: 'keyTimes'
481         lengthadjust: 'lengthAdjust'
482         limitingconeangle: 'limitingConeAngle'
483         markerheight: 'markerHeight'
484         markerunits: 'markerUnits'
485         markerwidth: 'markerWidth'
486         maskcontentunits: 'maskContentUnits'
487         maskunits: 'maskUnits'
488         numoctaves: 'numOctaves'
489         pathlength: 'pathLength'
490         patterncontentunits: 'patternContentUnits'
491         patterntransform: 'patternTransform'
492         patternunits: 'patternUnits'
493         pointsatx: 'pointsAtX'
494         pointsaty: 'pointsAtY'
495         pointsatz: 'pointsAtZ'
496         preservealpha: 'preserveAlpha'
497         preserveaspectratio: 'preserveAspectRatio'
498         primitiveunits: 'primitiveUnits'
499         refx: 'refX'
500         refy: 'refY'
501         repeatcount: 'repeatCount'
502         repeatdur: 'repeatDur'
503         requiredextensions: 'requiredExtensions'
504         requiredfeatures: 'requiredFeatures'
505         specularconstant: 'specularConstant'
506         specularexponent: 'specularExponent'
507         spreadmethod: 'spreadMethod'
508         startoffset: 'startOffset'
509         stddeviation: 'stdDeviation'
510         stitchtiles: 'stitchTiles'
511         surfacescale: 'surfaceScale'
512         systemlanguage: 'systemLanguage'
513         tablevalues: 'tableValues'
514         targetx: 'targetX'
515         targety: 'targetY'
516         textlength: 'textLength'
517         viewbox: 'viewBox'
518         viewtarget: 'viewTarget'
519         xchannelselector: 'xChannelSelector'
520         ychannelselector: 'yChannelSelector'
521         zoomandpan: 'zoomAndPan'
522 }
523 foreign_attr_fixes = {
524         'xlink:actuate': 'xlink actuate'
525         'xlink:arcrole': 'xlink arcrole'
526         'xlink:href': 'xlink href'
527         'xlink:role': 'xlink role'
528         'xlink:show': 'xlink show'
529         'xlink:title': 'xlink title'
530         'xlink:type': 'xlink type'
531         'xml:base': 'xml base'
532         'xml:lang': 'xml lang'
533         'xml:space': 'xml space'
534         'xmlns': 'xmlns'
535         'xmlns:xlink': 'xmlns xlink'
536 }
537 adjust_mathml_attributes = (t) ->
538         for a in t.attrs_a
539                 if a[0] is 'definitionurl'
540                         a[0] = 'definitionURL'
541         return
542 adjust_svg_attributes = (t) ->
543         for a in t.attrs_a
544                 if svg_attribute_fixes[a[0]]?
545                         a[0] = svg_attribute_fixes[a[0]]
546         return
547 adjust_foreign_attributes = (t) ->
548         # fixfull
549         for a in t.attrs_a
550                 if foreign_attr_fixes[a[0]]?
551                         a[0] = foreign_attr_fixes[a[0]]
552         return
553
554 # decode_named_char_ref()
555 #
556 # The list of named character references is _huge_ so ask the browser to decode
557 # for us instead of wasting bandwidth/space on including the table here.
558 #
559 # Pass without the "&" but with the ";" examples:
560 #    for "&amp" pass "amp;"
561 #    for "&#x2032" pass "x2032;"
562 g_dncr = {
563         cache: {}
564         textarea: document.createElement('textarea')
565 }
566 # TODO test this in IE8
567 decode_named_char_ref = (txt) ->
568         txt = "&#{txt}"
569         decoded = g_dncr.cache[txt]
570         return decoded if decoded?
571         g_dncr.textarea.innerHTML = txt
572         decoded = g_dncr.textarea.value
573         return null if decoded is txt
574         return g_dncr.cache[txt] = decoded
575
576 parse_html = (args) ->
577         txt = null
578         cur = null # index of next char in txt to be parsed
579         # declare doc and tokenizer variables so they're in scope below
580         doc = null
581         open_els = null # stack of open elements
582         afe = null # active formatting elements
583         template_ins_modes = null
584         ins_mode = null
585         original_ins_mode = null
586         tok_state = null
587         tok_cur_tag = null # partially parsed tag
588         flag_scripting = null
589         flag_frameset_ok = null
590         flag_parsing = null
591         flag_foster_parenting = null
592         form_element_pointer = null
593         temporary_buffer = null
594         pending_table_character_tokens = null
595         head_element_pointer = null
596         flag_fragment_parsing = null
597         context_element = null
598
599         stop_parsing = ->
600                 flag_parsing = false
601
602         parse_error = ->
603                 if args.error_cb?
604                         args.error_cb cur
605                 else
606                         console.log "Parse error at character #{cur} of #{txt.length}"
607
608         afe_push = (new_el) ->
609                 matches = 0
610                 for el, i in afe
611                         if el.name is new_el.name and el.namespace is new_el.namespace
612                                 for k, v of el.attrs
613                                         continue unless new_el.attrs[k] is v
614                                 for k, v of new_el.attrs
615                                         continue unless el.attrs[k] is v
616                                 matches += 1
617                                 if matches is 3
618                                         afe.splice i, 1
619                                         break
620                 afe.unshift new_el
621         afe_push_marker = ->
622                 afe.unshift new_afe_marker()
623
624         # the functions below impliment the Tree Contstruction algorithm
625         # http://www.w3.org/TR/html5/syntax.html#tree-construction
626
627         # But first... the helpers
628         template_tag_is_open = ->
629                 for t in open_els
630                         if t.name is 'template' and t.namespace is NS_HTML
631                                 return true
632                 return false
633         is_in_scope_x = (tag_name, scope, namespace) ->
634                 for t in open_els
635                         if t.name is tag_name and (namespace is null or namespace is t.namespace)
636                                 return true
637                         if scope[t.name] is t.namespace
638                                 return false
639                 return false
640         is_in_scope_x_y = (tag_name, scope, scope2, namespace) ->
641                 for t in open_els
642                         if t.name is tag_name and (namespace is null or namespace is t.namespace)
643                                 return true
644                         if scope[t.name] is t.namespace
645                                 return false
646                         if scope2[t.name] is t.namespace
647                                 return false
648                 return false
649         standard_scopers = {
650                 applet: NS_HTML, caption: NS_HTML, html: NS_HTML, table: NS_HTML,
651                 td: NS_HTML, th: NS_HTML, marquee: NS_HTML, object: NS_HTML,
652                 template: NS_HTML, mi: NS_MATHML,
653
654                 mo: NS_MATHML, mn: NS_MATHML, ms: NS_MATHML, mtext: NS_MATHML,
655                 'annotation-xml': NS_MATHML,
656
657                 foreignObject: NS_SVG, desc: NS_SVG, title: NS_SVG
658         }
659         button_scopers = button: NS_HTML
660         li_scopers = ol: NS_HTML, ul: NS_HTML
661         table_scopers = html: NS_HTML, table: NS_HTML, template: NS_HTML
662         is_in_scope = (tag_name, namespace = null) ->
663                 return is_in_scope_x tag_name, standard_scopers, namespace
664         is_in_button_scope = (tag_name, namespace = null) ->
665                 return is_in_scope_x_y tag_name, standard_scopers, button_scopers, namespace
666         is_in_table_scope = (tag_name, namespace = null) ->
667                 return is_in_scope_x tag_name, table_scopers, namespace
668         # aka is_in_list_item_scope
669         is_in_li_scope = (tag_name, namespace = null) ->
670                 return is_in_scope_x_y tag_name, standard_scopers, li_scopers, namespace
671         is_in_select_scope = (tag_name, namespace = null) ->
672                 for t in open_els
673                         if t.name is tag_name and (namespace is null or namespace is t.namespace)
674                                 return true
675                         if t.namespace isnt NS_HTML and t.name isnt 'optgroup' and t.name isnt 'option'
676                                 return false
677                 return false
678         # this checks for a particular element, not by name
679         # this requires a namespace match
680         el_is_in_scope = (needle) ->
681                 for el in open_els
682                         if el is needle
683                                 return true
684                         if standard_scopers[el.name] is el.namespace
685                                 return false
686                 return false
687
688         clear_to_table_stopers = {
689                 'table': true
690                 'template': true
691                 'html': true
692         }
693         clear_stack_to_table_context = ->
694                 loop
695                         if clear_to_table_stopers[open_els[0].name]?
696                                 break
697                         open_els.shift()
698                 return
699         clear_to_table_body_stopers = {
700                 tbody: NS_HTML
701                 tfoot: NS_HTML
702                 thead: NS_HTML
703                 template: NS_HTML
704                 html: NS_HTML
705         }
706         clear_stack_to_table_body_context = ->
707                 loop
708                         if clear_to_table_body_stopers[open_els[0].name] is open_els[0].namespace
709                                 break
710                         open_els.shift()
711                 return
712         clear_to_table_row_stopers = {
713                 'tr': true
714                 'template': true
715                 'html': true
716         }
717         clear_stack_to_table_row_context = ->
718                 loop
719                         if clear_to_table_row_stopers[open_els[0].name]?
720                                 break
721                         open_els.shift()
722                 return
723         clear_afe_to_marker = ->
724                 loop
725                         return unless afe.length > 0 # this happens in fragment case, ?spec error
726                         el = afe.shift()
727                         if el.type is TYPE_AFE_MARKER
728                                 return
729                 return
730
731         # 8.2.3.1 ...
732         # http://www.w3.org/TR/html5/syntax.html#reset-the-insertion-mode-appropriately
733         reset_ins_mode = ->
734                 # 1. Let last be false.
735                 last = false
736                 # 2. Let node be the last node in the stack of open elements.
737                 node_i = 0
738                 node = open_els[node_i]
739                 # 3. Loop: If node is the first node in the stack of open elements,
740                 # then set last to true, and, if the parser was originally created as
741                 # part of the HTML fragment parsing algorithm (fragment case) set node
742                 # to the context element.
743                 loop
744                         if node_i is open_els.length - 1
745                                 last = true
746                                 # fixfull (fragment case)
747
748                         # 4. If node is a select element, run these substeps:
749                         if node.name is 'select' and node.namespace is NS_HTML
750                                 # 1. If last is true, jump to the step below labeled done.
751                                 unless last
752                                         # 2. Let ancestor be node.
753                                         ancestor_i = node_i
754                                         ancestor = node
755                                         # 3. Loop: If ancestor is the first node in the stack of
756                                         # open elements, jump to the step below labeled done.
757                                         loop
758                                                 if ancestor_i is open_els.length - 1
759                                                         break
760                                                 # 4. Let ancestor be the node before ancestor in the stack
761                                                 # of open elements.
762                                                 ancestor_i += 1
763                                                 ancestor = open_els[ancestor_i]
764                                                 # 5. If ancestor is a template node, jump to the step below
765                                                 # labeled done.
766                                                 if ancestor.name is 'template' and ancestor.namespace is NS_HTML
767                                                         break
768                                                 # 6. If ancestor is a table node, switch the insertion mode
769                                                 # to "in select in table" and abort these steps.
770                                                 if ancestor.name is 'table' and ancestor.namespace is NS_HTML
771                                                         ins_mode = ins_mode_in_select_in_table
772                                                         return
773                                                 # 7. Jump back to the step labeled loop.
774                                 # 8. Done: Switch the insertion mode to "in select" and abort
775                                 # these steps.
776                                 ins_mode = ins_mode_in_select
777                                 return
778                         # 5. If node is a td or th element and last is false, then switch
779                         # the insertion mode to "in cell" and abort these steps.
780                         if (node.name is 'td' or node.name is 'th') and node.namespace is NS_HTML and last is false
781                                 ins_mode = ins_mode_in_cell
782                                 return
783                         # 6. If node is a tr element, then switch the insertion mode to "in
784                         # row" and abort these steps.
785                         if node.name is 'tr' and node.namespace is NS_HTML
786                                 ins_mode = ins_mode_in_row
787                                 return
788                         # 7. If node is a tbody, thead, or tfoot element, then switch the
789                         # insertion mode to "in table body" and abort these steps.
790                         if (node.name is 'tbody' or node.name is 'thead' or node.name is 'tfoot') and node.namespace is NS_HTML
791                                 ins_mode = ins_mode_in_table_body
792                                 return
793                         # 8. If node is a caption element, then switch the insertion mode
794                         # to "in caption" and abort these steps.
795                         if node.name is 'caption' and node.namespace is NS_HTML
796                                 ins_mode = ins_mode_in_caption
797                                 return
798                         # 9. If node is a colgroup element, then switch the insertion mode
799                         # to "in column group" and abort these steps.
800                         if node.name is 'colgroup' and node.namespace is NS_HTML
801                                 ins_mode = ins_mode_in_column_group
802                                 return
803                         # 10. If node is a table element, then switch the insertion mode to
804                         # "in table" and abort these steps.
805                         if node.name is 'table' and node.namespace is NS_HTML
806                                 ins_mode = ins_mode_in_table
807                                 return
808                         # 11. If node is a template element, then switch the insertion mode
809                         # to the current template insertion mode and abort these steps.
810                         if node.name is 'template' and node.namespace is NS_HTML
811                                 ins_mode = template_ins_modes[0]
812                                 return
813                         # 12. If node is a head element and last is true, then switch the
814                         # insertion mode to "in body" ("in body"! not "in head"!) and abort
815                         # these steps. (fragment case)
816                         if node.name is 'head' and node.namespace is NS_HTML and last
817                                 ins_mode = ins_mode_in_body
818                                 return
819                         # 13. If node is a head element and last is false, then switch the
820                         # insertion mode to "in head" and abort these steps.
821                         if node.name is 'head' and node.namespace is NS_HTML and last is false
822                                 ins_mode = ins_mode_in_head
823                                 return
824                         # 14. If node is a body element, then switch the insertion mode to
825                         # "in body" and abort these steps.
826                         if node.name is 'body' and node.namespace is NS_HTML
827                                 ins_mode = ins_mode_in_body
828                                 return
829                         # 15. If node is a frameset element, then switch the insertion mode
830                         # to "in frameset" and abort these steps. (fragment case)
831                         if node.name is 'frameset' and node.namespace is NS_HTML
832                                 ins_mode = ins_mode_in_frameset
833                                 return
834                         # 16. If node is an html element, run these substeps:
835                         if node.name is 'html' and node.namespace is NS_HTML
836                                 # 1. If the head element pointer is null, switch the insertion
837                                 # mode to "before head" and abort these steps. (fragment case)
838                                 if head_element_pointer is null
839                                         ins_mode = ins_mode_before_head
840                                 else
841                                         # 2. Otherwise, the head element pointer is not null,
842                                         # switch the insertion mode to "after head" and abort these
843                                         # steps.
844                                         ins_mode = ins_mode_after_head
845                                 return
846                         # 17. If last is true, then switch the insertion mode to "in body"
847                         # and abort these steps. (fragment case)
848                         if last
849                                 ins_mode = ins_mode_in_body
850                                 return
851                         # 18. Let node now be the node before node in the stack of open
852                         # elements.
853                         node_i += 1
854                         node = open_els[node_i]
855                         # 19. Return to the step labeled loop.
856
857         # 8.2.3.2
858
859         # http://www.w3.org/TR/html5/syntax.html#adjusted-current-node
860         adjusted_current_node = ->
861                 if open_els.length is 1 and flag_fragment_parsing
862                         return context_element
863                 return open_els[0]
864
865         # http://www.w3.org/TR/html5/syntax.html#reconstruct-the-active-formatting-elements
866         # this implementation is structured (mostly) as described at the link above.
867         # capitalized comments are the "labels" described at the link above.
868         reconstruct_afe = ->
869                 return if afe.length is 0
870                 if afe[0].type is TYPE_AFE_MARKER or afe[0] in open_els
871                         return
872                 # Rewind
873                 i = 0
874                 loop
875                         if i is afe.length - 1
876                                 break
877                         i += 1
878                         if afe[i].type is TYPE_AFE_MARKER or afe[i] in open_els
879                                 i -= 1 # Advance
880                                 break
881                 # Create
882                 loop
883                         el = insert_html_element afe[i].token
884                         afe[i] = el
885                         break if i is 0
886                         i -= 1 # Advance
887
888         # http://www.w3.org/TR/html5/syntax.html#adoption-agency-algorithm
889         # adoption agency algorithm
890         # overview here:
891         #   http://www.w3.org/TR/html5/syntax.html#misnested-tags:-b-i-/b-/i
892         #   http://www.w3.org/TR/html5/syntax.html#misnested-tags:-b-p-/b-/p
893         #   http://www.w3.org/TR/html5/syntax.html#unclosed-formatting-elements
894         adoption_agency = (subject) ->
895                 debug_log "adoption_agency()"
896                 debug_log "tree: #{serialize_els doc.children, false, true}"
897                 debug_log "open_els: #{serialize_els open_els, true, true}"
898                 debug_log "afe: #{serialize_els afe, true, true}"
899                 if open_els[0].name is subject and open_els[0].namespace is NS_HTML
900                         el = open_els[0]
901                         open_els.shift()
902                         # remove it from the list of active formatting elements (if found)
903                         for t, i in afe
904                                 if t is el
905                                         afe.splice i, 1
906                                         break
907                         debug_log "aaa: starting off with subject on top of stack, exiting"
908                         return
909                 outer = 0
910                 loop
911                         if outer >= 8
912                                 return
913                         outer += 1
914                         # 5. Let formatting element be the last element in the list of
915                         # active formatting elements that: is between the end of the list
916                         # and the last scope marker in the list, if any, or the start of
917                         # the list otherwise, and  has the tag name subject.
918                         fe = null
919                         for t, fe_of_afe in afe
920                                 if t.type is TYPE_AFE_MARKER
921                                         break
922                                 if t.name is subject
923                                         fe = t
924                                         break
925                         # If there is no such element, then abort these steps and instead
926                         # act as described in the "any other end tag" entry above.
927                         if fe is null
928                                 debug_log "aaa: fe not found in afe"
929                                 in_body_any_other_end_tag subject
930                                 return
931                         # 6. If formatting element is not in the stack of open elements,
932                         # then this is a parse error; remove the element from the list, and
933                         # abort these steps.
934                         in_open_els = false
935                         for t, fe_of_open_els in open_els
936                                 if t is fe
937                                         in_open_els = true
938                                         break
939                         unless in_open_els
940                                 debug_log "aaa: fe not found in open_els"
941                                 parse_error()
942                                 # "remove it from the list" must mean afe, since it's not in open_els
943                                 afe.splice fe_of_afe, 1
944                                 return
945                         # 7. If formatting element is in the stack of open elements, but
946                         # the element is not in scope, then this is a parse error; abort
947                         # these steps.
948                         unless el_is_in_scope fe
949                                 debug_log "aaa: fe not in scope"
950                                 parse_error()
951                                 return
952                         # 8. If formatting element is not the current node, this is a parse
953                         # error. (But do not abort these steps.)
954                         unless open_els[0] is fe
955                                 parse_error()
956                                 # continue
957                         # 9. Let furthest block be the topmost node in the stack of open
958                         # elements that is lower in the stack than formatting element, and
959                         # is an element in the special category. There might not be one.
960                         fb = null
961                         fb_of_open_els = null
962                         for t, i in open_els
963                                 if t is fe
964                                         break
965                                 if el_is_special t
966                                         fb = t
967                                         fb_of_open_els = i
968                                         # and continue, to see if there's one that's more "topmost"
969                         # 10. If there is no furthest block, then the UA must first pop all
970                         # the nodes from the bottom of the stack of open elements, from the
971                         # current node up to and including formatting element, then remove
972                         # formatting element from the list of active formatting elements,
973                         # and finally abort these steps.
974                         if fb is null
975                                 debug_log "aaa: no fb"
976                                 loop
977                                         t = open_els.shift()
978                                         if t is fe
979                                                 afe.splice fe_of_afe, 1
980                                                 return
981                         # 11. Let common ancestor be the element immediately above
982                         # formatting element in the stack of open elements.
983                         ca = open_els[fe_of_open_els + 1] # common ancestor
984
985                         node_above = open_els[fb_of_open_els + 1] # next node if node isn't in open_els anymore
986                         # 12. Let a bookmark note the position of formatting element in the list of active formatting elements relative to the elements on either side of it in the list.
987                         bookmark = new_aaa_bookmark()
988                         for t, i in afe
989                                 if t is fe
990                                         afe.splice i, 0, bookmark
991                                         break
992                         node = last_node = fb
993                         inner = 0
994                         loop
995                                 inner += 1
996                                 # 3. Let node be the element immediately above node in the
997                                 # stack of open elements, or if node is no longer in the stack
998                                 # of open elements (e.g. because it got removed by this
999                                 # algorithm), the element that was immediately above node in
1000                                 # the stack of open elements before node was removed.
1001                                 node_next = null
1002                                 for t, i in open_els
1003                                         if t is node
1004                                                 node_next = open_els[i + 1]
1005                                                 break
1006                                 node = node_next ? node_above
1007                                 debug_log "inner loop #{inner}"
1008                                 debug_log "tree: #{serialize_els doc.children, false, true}"
1009                                 debug_log "open_els: #{serialize_els open_els, true, true}"
1010                                 debug_log "afe: #{serialize_els afe, true, true}"
1011                                 debug_log "ca: #{ca.name}##{ca.id} children: #{serialize_els ca.children, true, true}"
1012                                 debug_log "fe: #{fe.name}##{fe.id} children: #{serialize_els fe.children, true, true}"
1013                                 debug_log "fb: #{fb.name}##{fb.id} children: #{serialize_els fb.children, true, true}"
1014                                 debug_log "node: #{node.serialize true, true}"
1015                                 # TODO make sure node_above gets re-set if/when node is removed from open_els
1016
1017                                 # 4. If node is formatting element, then go to the next step in
1018                                 # the overall algorithm.
1019                                 if node is fe
1020                                         break
1021                                 debug_log "the meat"
1022                                 # 5. If inner loop counter is greater than three and node is in
1023                                 # the list of active formatting elements, then remove node from
1024                                 # the list of active formatting elements.
1025                                 node_in_afe = false
1026                                 for t, i in afe
1027                                         if t is node
1028                                                 if inner > 3
1029                                                         afe.splice i, 1
1030                                                         debug_log "max out inner"
1031                                                 else
1032                                                         node_in_afe = true
1033                                                         debug_log "in afe"
1034                                                 break
1035                                 # 6. If node is not in the list of active formatting elements,
1036                                 # then remove node from the stack of open elements and then go
1037                                 # back to the step labeled inner loop.
1038                                 unless node_in_afe
1039                                         debug_log "not in afe"
1040                                         for t, i in open_els
1041                                                 if t is node
1042                                                         node_above = open_els[i + 1]
1043                                                         open_els.splice i, 1
1044                                                         break
1045                                         continue
1046                                 debug_log "the bones"
1047                                 # 7. create an element for the token for which the element node
1048                                 # was created, in the HTML namespace, with common ancestor as
1049                                 # the intended parent; replace the entry for node in the list
1050                                 # of active formatting elements with an entry for the new
1051                                 # element, replace the entry for node in the stack of open
1052                                 # elements with an entry for the new element, and let node be
1053                                 # the new element.
1054                                 new_node = token_to_element node.token, NS_HTML, ca
1055                                 for t, i in afe
1056                                         if t is node
1057                                                 afe[i] = new_node
1058                                                 debug_log "replaced in afe"
1059                                                 break
1060                                 for t, i in open_els
1061                                         if t is node
1062                                                 node_above = open_els[i + 1]
1063                                                 open_els[i] = new_node
1064                                                 debug_log "replaced in open_els"
1065                                                 break
1066                                 node = new_node
1067                                 # 8. If last node is furthest block, then move the
1068                                 # aforementioned bookmark to be immediately after the new node
1069                                 # in the list of active formatting elements.
1070                                 if last_node is fb
1071                                         for t, i in afe
1072                                                 if t is bookmark
1073                                                         afe.splice i, 1
1074                                                         debug_log "removed bookmark"
1075                                                         break
1076                                         for t, i in afe
1077                                                 if t is node
1078                                                         # "after" means lower
1079                                                         afe.splice i, 0, bookmark # "after as <-
1080                                                         debug_log "placed bookmark after node"
1081                                                         debug_log "node: #{node.id} afe: #{serialize_els afe, true, true}"
1082                                                         break
1083                                 # 9. Insert last node into node, first removing it from its
1084                                 # previous parent node if any.
1085                                 if last_node.parent?
1086                                         debug_log "last_node has parent"
1087                                         for c, i in last_node.parent.children
1088                                                 if c is last_node
1089                                                         debug_log "removing last_node from parent"
1090                                                         last_node.parent.children.splice i, 1
1091                                                         break
1092                                 node.children.push last_node
1093                                 last_node.parent = node
1094                                 # 10. Let last node be node.
1095                                 last_node = node
1096                                 debug_log "at last"
1097                                 # 11. Return to the step labeled inner loop.
1098                         # 14. Insert whatever last node ended up being in the previous step
1099                         # at the appropriate place for inserting a node, but using common
1100                         # ancestor as the override target.
1101
1102                         # In the case where fe is immediately followed by fb:
1103                         #   * inner loop exits out early (node==fe)
1104                         #   * last_node is fb
1105                         #   * last_node is still in the tree (not a duplicate)
1106                         if last_node.parent?
1107                                 debug_log "FEFIRST? last_node has parent"
1108                                 for c, i in last_node.parent.children
1109                                         if c is last_node
1110                                                 debug_log "removing last_node from parent"
1111                                                 last_node.parent.children.splice i, 1
1112                                                 break
1113
1114                         debug_log "after aaa inner loop"
1115                         debug_log "ca: #{ca.name}##{ca.id} children: #{serialize_els ca.children, true, true}"
1116                         debug_log "fe: #{fe.name}##{fe.id} children: #{serialize_els fe.children, true, true}"
1117                         debug_log "fb: #{fb.name}##{fb.id} children: #{serialize_els fb.children, true, true}"
1118                         debug_log "last_node: #{last_node.name}##{last_node.id} children: #{serialize_els last_node.children, true, true}"
1119                         debug_log "tree: #{serialize_els doc.children, false, true}"
1120
1121                         debug_log "insert"
1122
1123
1124                         # can't use standard insert token thing, because it's already in
1125                         # open_els and must stay at it's current position in open_els
1126                         dest = adjusted_insertion_location ca
1127                         dest[0].children.splice dest[1], 0, last_node
1128                         last_node.parent = dest[0]
1129
1130
1131                         debug_log "ca: #{ca.name}##{ca.id} children: #{serialize_els ca.children, true, true}"
1132                         debug_log "fe: #{fe.name}##{fe.id} children: #{serialize_els fe.children, true, true}"
1133                         debug_log "fb: #{fb.name}##{fb.id} children: #{serialize_els fb.children, true, true}"
1134                         debug_log "last_node: #{last_node.name}##{last_node.id} children: #{serialize_els last_node.children, true, true}"
1135                         debug_log "tree: #{serialize_els doc.children, false, true}"
1136
1137                         # 15. Create an element for the token for which formatting element
1138                         # was created, in the HTML namespace, with furthest block as the
1139                         # intended parent.
1140                         new_element = token_to_element fe.token, NS_HTML, fb
1141                         # 16. Take all of the child nodes of furthest block and append them
1142                         # to the element created in the last step.
1143                         while fb.children.length
1144                                 t = fb.children.shift()
1145                                 t.parent = new_element
1146                                 new_element.children.push t
1147                         # 17. Append that new element to furthest block.
1148                         new_element.parent = fb
1149                         fb.children.push new_element
1150                         # 18. Remove formatting element from the list of active formatting
1151                         # elements, and insert the new element into the list of active
1152                         # formatting elements at the position of the aforementioned
1153                         # bookmark.
1154                         for t, i in afe
1155                                 if t is fe
1156                                         afe.splice i, 1
1157                                         break
1158                         for t, i in afe
1159                                 if t is bookmark
1160                                         afe[i] = new_element
1161                                         break
1162                         # 19. Remove formatting element from the stack of open elements,
1163                         # and insert the new element into the stack of open elements
1164                         # immediately below the position of furthest block in that stack.
1165                         for t, i in open_els
1166                                 if t is fe
1167                                         open_els.splice i, 1
1168                                         break
1169                         for t, i in open_els
1170                                 if t is fb
1171                                         open_els.splice i, 0, new_element
1172                                         break
1173                         # 20. Jump back to the step labeled outer loop.
1174                         debug_log "done wrapping fb's children. new_element: #{new_element.name}##{new_element.id}"
1175                         debug_log "tree: #{serialize_els doc.children, false, true}"
1176                         debug_log "open_els: #{serialize_els open_els, true, true}"
1177                         debug_log "afe: #{serialize_els afe, true, true}"
1178                 debug_log "AAA DONE"
1179
1180         # http://www.w3.org/TR/html5/syntax.html#close-a-p-element
1181         close_p_element = ->
1182                 generate_implied_end_tags 'p' # arg is exception
1183                 unless open_els[0].name is 'p' and open_els[0].namespace is NS_HTML
1184                         parse_error()
1185                 while open_els.length > 1 # just in case
1186                         el = open_els.shift()
1187                         if el.name is 'p' and el.namespace is NS_HTML
1188                                 return
1189         close_p_if_in_button_scope = ->
1190                 if is_in_button_scope 'p', NS_HTML
1191                         close_p_element()
1192
1193         # http://www.w3.org/TR/html5/syntax.html#insert-a-character
1194         # aka insert_a_character = (t) ->
1195         insert_character = (t) ->
1196                 dest = adjusted_insertion_location()
1197                 # fixfull check for Document node
1198                 if dest[1] > 0
1199                         prev = dest[0].children[dest[1] - 1]
1200                         if prev.type is TYPE_TEXT
1201                                 prev.text += t.text
1202                                 return
1203                 dest[0].children.splice dest[1], 0, t
1204
1205
1206         # 8.2.5 http://www.w3.org/TR/html5/syntax.html#tree-construction
1207         process_token = (t) ->
1208                 acn = adjusted_current_node()
1209                 unless acn?
1210                         ins_mode t
1211                         return
1212                 if acn.namespace is NS_HTML
1213                         ins_mode t
1214                         return
1215                 if is_mathml_text_integration_point(acn)
1216                         if t.type is TYPE_START_TAG and (t.name is 'mglyph' or t.name is 'malignmark')
1217                                 ins_mode t
1218                                 return
1219                         if t.type is TYPE_TEXT
1220                                 ins_mode t
1221                                 return
1222                 if acn.namespace is NS_MATHML and acn.name is 'annotation-xml' and t.type is TYPE_START_TAG and t.name is 'svg'
1223                         ins_mode t
1224                         return
1225                 if is_html_integration acn
1226                         if t.type is TYPE_START_TAG or t.type is TYPE_TEXT
1227                                 ins_mode t
1228                                 return
1229                 if t.type is TYPE_EOF
1230                         ins_mode t
1231                         return
1232                 in_foreign_content t
1233                 return
1234
1235         # 8.2.5.1
1236         # http://www.w3.org/TR/html5/syntax.html#creating-and-inserting-nodes
1237         # http://www.w3.org/TR/html5/syntax.html#appropriate-place-for-inserting-a-node
1238         adjusted_insertion_location = (override_target = null) ->
1239                 # 1. If there was an override target specified, then let target be the
1240                 # override target.
1241                 if override_target?
1242                         target = override_target
1243                 else # Otherwise, let target be the current node.
1244                         target = open_els[0]
1245                 # 2. Determine the adjusted insertion location using the first matching
1246                 # steps from the following list:
1247                 #
1248                 # If foster parenting is enabled and target is a table, tbody, tfoot,
1249                 # thead, or tr element Foster parenting happens when content is
1250                 # misnested in tables.
1251                 if flag_foster_parenting and foster_parenting_targets[target.name] is target.namespace
1252                         loop # once. this is here so we can ``break`` to "abort these substeps"
1253                                 # 1. Let last template be the last template element in the
1254                                 # stack of open elements, if any.
1255                                 last_template = null
1256                                 last_template_i = null
1257                                 for el, i in open_els
1258                                         if el.name is 'template' and el.namespace is NS_HTML
1259                                                 last_template = el
1260                                                 last_template_i = i
1261                                                 break
1262                                 # 2. Let last table be the last table element in the stack of
1263                                 # open elements, if any.
1264                                 last_table = null
1265                                 last_table_i
1266                                 for el, i in open_els
1267                                         if el.name is 'table' and el.namespace is NS_HTML
1268                                                 last_table = el
1269                                                 last_table_i = i
1270                                                 break
1271                                 # 3. If there is a last template and either there is no last
1272                                 # table, or there is one, but last template is lower (more
1273                                 # recently added) than last table in the stack of open
1274                                 # elements, then: let adjusted insertion location be inside
1275                                 # last template's template contents, after its last child (if
1276                                 # any), and abort these substeps.
1277                                 if last_template and (last_table is null or last_template_i < last_table_i)
1278                                         target = last_template # fixfull should be it's contents
1279                                         target_i = target.children.length
1280                                         break
1281                                 # 4. If there is no last table, then let adjusted insertion
1282                                 # location be inside the first element in the stack of open
1283                                 # elements (the html element), after its last child (if any),
1284                                 # and abort these substeps. (fragment case)
1285                                 if last_table is null
1286                                         # this is odd
1287                                         target = open_els[open_els.length - 1]
1288                                         target_i = target.children.length
1289                                         break
1290                                 # 5. If last table has a parent element, then let adjusted
1291                                 # insertion location be inside last table's parent element,
1292                                 # immediately before last table, and abort these substeps.
1293                                 if last_table.parent?
1294                                         for c, i in last_table.parent.children
1295                                                 if c is last_table
1296                                                         target = last_table.parent
1297                                                         target_i = i
1298                                                         break
1299                                         break
1300                                 # 6. Let previous element be the element immediately above last
1301                                 # table in the stack of open elements.
1302                                 #
1303                                 # huh? how could it not have a parent?
1304                                 previous_element = open_els[last_table_i + 1]
1305                                 # 7. Let adjusted insertion location be inside previous
1306                                 # element, after its last child (if any).
1307                                 target = previous_element
1308                                 target_i = target.children.length
1309                                 # Note: These steps are involved in part because it's possible
1310                                 # for elements, the table element in this case in particular,
1311                                 # to have been moved by a script around in the DOM, or indeed
1312                                 # removed from the DOM entirely, after the element was inserted
1313                                 # by the parser.
1314                                 break # don't really loop
1315                 else
1316                         # Otherwise Let adjusted insertion location be inside target, after
1317                         # its last child (if any).
1318                         target_i = target.children.length
1319
1320                 # 3. If the adjusted insertion location is inside a template element,
1321                 # let it instead be inside the template element's template contents,
1322                 # after its last child (if any).
1323                 # fixfull (template)
1324
1325                 # 4. Return the adjusted insertion location.
1326                 return [target, target_i]
1327
1328         # http://www.w3.org/TR/html5/syntax.html#create-an-element-for-the-token
1329         # aka create_an_element_for_token
1330         token_to_element = (t, namespace, intended_parent) ->
1331                 # convert attributes into a hash
1332                 attrs = {}
1333                 for a in t.attrs_a
1334                         attrs[a[0]] = a[1] # TODO check what to do with dupilcate attrs
1335                 el = new Node TYPE_TAG, name: t.name, namespace: namespace, attrs: attrs, token: t
1336
1337                 # TODO 2. If the newly created element has an xmlns attribute in the
1338                 # XMLNS namespace whose value is not exactly the same as the element's
1339                 # namespace, that is a parse error. Similarly, if the newly created
1340                 # element has an xmlns:xlink attribute in the XMLNS namespace whose
1341                 # value is not the XLink Namespace, that is a parse error.
1342
1343                 # fixfull: the spec says stuff about form pointers and ownerDocument
1344
1345                 return el
1346
1347         # http://www.w3.org/TR/html5/syntax.html#insert-a-foreign-element
1348         insert_foreign_element = (token, namespace) ->
1349                 ail = adjusted_insertion_location()
1350                 ail_el = ail[0]
1351                 ail_i = ail[1]
1352                 el = token_to_element token, namespace, ail_el
1353                 # TODO skip this next step if it's broken (eg ail_el is document with child already)
1354                 el.parent = ail_el
1355                 ail_el.children.splice ail_i, 0, el
1356                 open_els.unshift el
1357                 return el
1358         # http://www.w3.org/TR/html5/syntax.html#insert-an-html-element
1359         insert_html_element = (token) ->
1360                 insert_foreign_element token, NS_HTML
1361
1362         # http://www.w3.org/TR/html5/syntax.html#insert-a-comment
1363         # position should be [node, index_within_children]
1364         insert_comment = (t, position = null) ->
1365                 position ?= adjusted_insertion_location()
1366                 position[0].children.splice position[1], 0, t
1367
1368         # 8.2.5.2
1369         # http://www.w3.org/TR/html5/syntax.html#generic-raw-text-element-parsing-algorithm
1370         parse_generic_raw_text = (t) ->
1371                 insert_html_element t
1372                 tok_state = tok_state_rawtext
1373                 original_ins_mode = ins_mode
1374                 ins_mode = ins_mode_text
1375         parse_generic_rcdata_text = (t) ->
1376                 insert_html_element t
1377                 tok_state = tok_state_rcdata
1378                 original_ins_mode = ins_mode
1379                 ins_mode = ins_mode_text
1380
1381         # 8.2.5.3 http://www.w3.org/TR/html5/syntax.html#closing-elements-that-have-implied-end-tags
1382         # http://www.w3.org/TR/html5/syntax.html#generate-implied-end-tags
1383         generate_implied_end_tags = (except = null) ->
1384                 while end_tag_implied[open_els[0].name] is open_els[0].namespace and open_els[0].name isnt except
1385                         open_els.shift()
1386
1387         # 8.2.5.4 The rules for parsing tokens in HTML content
1388         # http://www.w3.org/TR/html5/syntax.html#parsing-main-inhtml
1389
1390         # 8.2.5.4.1 The "initial" insertion mode
1391         # http://www.w3.org/TR/html5/syntax.html#the-initial-insertion-mode
1392         ins_mode_initial = (t) ->
1393                 if is_space_tok t
1394                         return
1395                 if t.type is TYPE_COMMENT
1396                         # ?fixfull
1397                         doc.children.push t
1398                         return
1399                 if t.type is TYPE_DOCTYPE
1400                         # FIXME check identifiers, set quirks, etc
1401                         # fixfull
1402                         doc.children.push t
1403                         ins_mode = ins_mode_before_html
1404                         return
1405                 # Anything else
1406                 #fixfull (iframe, quirks)
1407                 ins_mode = ins_mode_before_html
1408                 process_token t
1409                 return
1410
1411         # 8.2.5.4.2 http://www.w3.org/TR/html5/syntax.html#the-before-html-insertion-mode
1412         ins_mode_before_html = (t) ->
1413                 if t.type is TYPE_DOCTYPE
1414                         parse_error()
1415                         return
1416                 if t.type is TYPE_COMMENT
1417                         doc.children.push t
1418                         return
1419                 if is_space_tok t
1420                         return
1421                 if t.type is TYPE_START_TAG and t.name is 'html'
1422                         el = token_to_element t, NS_HTML, doc
1423                         doc.children.push el
1424                         open_els.unshift(el)
1425                         # fixfull (big paragraph in spec about manifest, fragment, urls, etc)
1426                         ins_mode = ins_mode_before_head
1427                         return
1428                 if t.type is TYPE_END_TAG
1429                         if t.name is 'head' or t.name is 'body' or t.name is 'html' or t.name is 'br'
1430                                 # fall through to "anything else"
1431                         else
1432                                 parse_error()
1433                                 return
1434                 # Anything else
1435                 html_tok = new_open_tag 'html'
1436                 el = token_to_element html_tok, NS_HTML, doc
1437                 doc.children.push el
1438                 open_els.unshift el
1439                 # ?fixfull browsing context
1440                 ins_mode = ins_mode_before_head
1441                 process_token t
1442                 return
1443
1444         # 8.2.5.4.3 http://www.w3.org/TR/html5/syntax.html#the-before-head-insertion-mode
1445         ins_mode_before_head = (t) ->
1446                 if is_space_tok t
1447                         return
1448                 if t.type is TYPE_COMMENT
1449                         insert_comment t
1450                         return
1451                 if t.type is TYPE_DOCTYPE
1452                         parse_error()
1453                         return
1454                 if t.type is TYPE_START_TAG and t.name is 'html'
1455                         ins_mode_in_body t
1456                         return
1457                 if t.type is TYPE_START_TAG and t.name is 'head'
1458                         el = insert_html_element t
1459                         head_element_pointer = el
1460                         ins_mode = ins_mode_in_head
1461                         return
1462                 if t.type is TYPE_END_TAG
1463                         if t.name is 'head' or t.name is 'body' or t.name is 'html' or t.name is 'br'
1464                                 # fall through to Anything else below
1465                         else
1466                                 parse_error()
1467                                 return
1468                 # Anything else
1469                 head_tok = new_open_tag 'head'
1470                 el = insert_html_element head_tok
1471                 head_element_pointer = el
1472                 ins_mode = ins_mode_in_head
1473                 process_token t
1474
1475         # 8.2.5.4.4 http://www.w3.org/TR/html5/syntax.html#parsing-main-inhead
1476         ins_mode_in_head_else = (t) -> # factored out for same-as-spec flow control
1477                 open_els.shift() # spec says this will be a 'head' node
1478                 ins_mode = ins_mode_after_head
1479                 process_token t
1480         ins_mode_in_head = (t) ->
1481                 if t.type is TYPE_TEXT and (t.text is "\t" or t.text is "\n" or t.text is "\u000c" or t.text is ' ')
1482                         insert_character t
1483                         return
1484                 if t.type is TYPE_COMMENT
1485                         insert_comment t
1486                         return
1487                 if t.type is TYPE_DOCTYPE
1488                         parse_error()
1489                         return
1490                 if t.type is TYPE_START_TAG and t.name is 'html'
1491                         ins_mode_in_body t
1492                         return
1493                 if t.type is TYPE_START_TAG and (t.name is 'base' or t.name is 'basefont' or t.name is 'bgsound' or t.name is 'link')
1494                         el = insert_html_element t
1495                         open_els.shift()
1496                         t.acknowledge_self_closing()
1497                         return
1498                 if t.type is TYPE_START_TAG and t.name is 'meta'
1499                         el = insert_html_element t
1500                         open_els.shift()
1501                         t.acknowledge_self_closing()
1502                         # fixfull encoding stuff
1503                         return
1504                 if t.type is TYPE_START_TAG and t.name is 'title'
1505                         parse_generic_rcdata_text t
1506                         return
1507                 if t.type is TYPE_START_TAG and ((t.name is 'noscript' and flag_scripting) or t.name is 'noframes' or t.name is 'style')
1508                         parse_generic_raw_text t
1509                         return
1510                 if t.type is TYPE_START_TAG and t.name is 'noscript' and flag_scripting is false
1511                         insert_html_element t
1512                         ins_mode = ins_mode_in_head_noscript
1513                         return
1514                 if t.type is TYPE_START_TAG and t.name is 'script'
1515                         ail = adjusted_insertion_location()
1516                         el = token_to_element t, NS_HTML, ail
1517                         el.flag 'parser-inserted', true
1518                         # fixfull frament case
1519                         ail[0].children.splice ail[1], 0, el
1520                         open_els.unshift el
1521                         tok_state = tok_state_script_data
1522                         original_ins_mode = ins_mode # make sure orig... is defined
1523                         ins_mode = ins_mode_text
1524                         return
1525                 if t.type is TYPE_END_TAG and t.name is 'head'
1526                         open_els.shift() # will be a head element... spec says so
1527                         ins_mode = ins_mode_after_head
1528                         return
1529                 if t.type is TYPE_END_TAG and (t.name is 'body' or t.name is 'html' or t.name is 'br')
1530                         ins_mode_in_head_else t
1531                         return
1532                 if t.type is TYPE_START_TAG and t.name is 'template'
1533                         insert_html_element t
1534                         afe_push_marker()
1535                         flag_frameset_ok = false
1536                         ins_mode = ins_mode_in_template
1537                         template_ins_modes.unshift ins_mode_in_template
1538                         return
1539                 if t.type is TYPE_END_TAG and t.name is 'template'
1540                         if template_tag_is_open()
1541                                 generate_implied_end_tags
1542                                 if open_els[0].name isnt 'template'
1543                                         parse_error()
1544                                 loop
1545                                         el = open_els.shift()
1546                                         if el.name is 'template' and el.namespace is NS_HTML
1547                                                 break
1548                                 clear_afe_to_marker()
1549                                 template_ins_modes.shift()
1550                                 reset_ins_mode()
1551                         else
1552                                 parse_error()
1553                         return
1554                 if (t.type is TYPE_START_TAG and t.name is 'head') or t.type is TYPE_END_TAG
1555                         parse_error()
1556                         return
1557                 ins_mode_in_head_else t
1558
1559         # 8.2.5.4.5 http://www.w3.org/TR/html5/syntax.html#parsing-main-inheadnoscript
1560         ins_mode_in_head_noscript_else = (t) ->
1561                 parse_error()
1562                 open_els.shift()
1563                 ins_mode = ins_mode_in_head
1564                 process_token t
1565         ins_mode_in_head_noscript = (t) ->
1566                 if t.type is TYPE_DOCTYPE
1567                         parse_error()
1568                         return
1569                 if t.type is TYPE_START_TAG and t.name is 'html'
1570                         ins_mode_in_body t
1571                         return
1572                 if t.type is TYPE_END_TAG and t.name is 'noscript'
1573                         open_els.shift()
1574                         ins_mode = ins_mode_in_head
1575                         return
1576                 if is_space_tok(t) or t.type is TYPE_COMMENT or (t.type is TYPE_START_TAG and (t.name is 'basefont' or t.name is 'bgsound' or t.name is 'link' or t.name is 'meta' or t.name is 'noframes' or t.name is 'style'))
1577                         ins_mode_in_head t
1578                         return
1579                 if t.type is TYPE_END_TAG and t.name is 'br'
1580                         ins_mode_in_head_noscript_else t
1581                         return
1582                 if (t.type is TYPE_START_TAG and (t.name is 'head' or t.name is 'noscript')) or t.type is TYPE_END_TAG
1583                         parse_error()
1584                         return
1585                 # Anything else
1586                 ins_mode_in_head_noscript_else t
1587                 return
1588
1589
1590
1591         # 8.2.5.4.6 http://www.w3.org/TR/html5/syntax.html#the-after-head-insertion-mode
1592         ins_mode_after_head_else = (t) ->
1593                 body_tok = new_open_tag 'body'
1594                 insert_html_element body_tok
1595                 ins_mode = ins_mode_in_body
1596                 process_token t
1597                 return
1598         ins_mode_after_head = (t) ->
1599                 if is_space_tok t
1600                         insert_character t
1601                         return
1602                 if t.type is TYPE_COMMENT
1603                         insert_comment t
1604                         return
1605                 if t.type is TYPE_DOCTYPE
1606                         parse_error()
1607                         return
1608                 if t.type is TYPE_START_TAG and t.name is 'html'
1609                         ins_mode_in_body t
1610                         return
1611                 if t.type is TYPE_START_TAG and t.name is 'body'
1612                         insert_html_element t
1613                         flag_frameset_ok = false
1614                         ins_mode = ins_mode_in_body
1615                         return
1616                 if t.type is TYPE_START_TAG and t.name is 'frameset'
1617                         insert_html_element t
1618                         ins_mode = ins_mode_in_frameset
1619                         return
1620                 if t.type is TYPE_START_TAG and (t.name is 'base' or t.name is 'basefont' or t.name is 'bgsound' or t.name is 'link' or t.name is 'meta' or t.name is 'noframes' or t.name is 'script' or t.name is 'style' or t.name is 'template' or t.name is 'title')
1621                         parse_error()
1622                         open_els.unshift head_element_pointer
1623                         ins_mode_in_head t
1624                         for el, i of open_els
1625                                 if el is head_element_pointer
1626                                         open_els.splice i, 1
1627                                         return
1628                         console.log "warning: 23904 couldn't find head element in open_els"
1629                         return
1630                 if t.type is TYPE_END_TAG and t.name is 'template'
1631                         ins_mode_in_head t
1632                         return
1633                 if t.type is TYPE_END_TAG and (t.name is 'body' or t.name is 'html' or t.name is 'br')
1634                         ins_mode_after_head_else t
1635                         return
1636                 if (t.type is TYPE_START_TAG and t.name is 'head') or t.type is TYPE_END_TAG
1637                         parse_error()
1638                         return
1639                 # Anything else
1640                 ins_mode_after_head_else t
1641
1642         # 8.2.5.4.7 http://www.w3.org/TR/html5/syntax.html#parsing-main-inbody
1643         in_body_any_other_end_tag = (name) -> # factored out because adoption agency calls it
1644                 for el, i in open_els
1645                         if el.name is name and el.namespace is NS_HTML
1646                                 generate_implied_end_tags name # arg is exception
1647                                 parse_error() unless i is 0
1648                                 while i >= 0
1649                                         open_els.shift()
1650                                         i -= 1
1651                                 return
1652                         if special_elements[el.name] is el.namespace
1653                                 parse_error()
1654                                 return
1655                 return
1656         ins_mode_in_body = (t) ->
1657                 if t.type is TYPE_TEXT and t.text is "\u0000"
1658                         parse_error()
1659                         return
1660                 if is_space_tok t
1661                         reconstruct_afe()
1662                         insert_character t
1663                         return
1664                 if t.type is TYPE_TEXT
1665                         reconstruct_afe()
1666                         insert_character t
1667                         flag_frameset_ok = false
1668                         return
1669                 if t.type is TYPE_COMMENT
1670                         insert_comment t
1671                         return
1672                 if t.type is TYPE_DOCTYPE
1673                         parse_error()
1674                         return
1675                 if t.type is TYPE_START_TAG and t.name is 'html'
1676                         parse_error()
1677                         return if template_tag_is_open()
1678                         root_attrs = open_els[open_els.length - 1].attrs
1679                         for a of t.attrs_a
1680                                 root_attrs[a[0]] = a[1] unless root_attrs[a[0]]?
1681                         return
1682
1683                 if (t.type is TYPE_START_TAG and (t.name is 'base' or t.name is 'basefont' or t.name is 'bgsound' or t.name is 'link' or t.name is 'meta' or t.name is 'noframes' or t.name is 'script' or t.name is 'style' or t.name is 'template' or t.name is 'title')) or (t.type is TYPE_END_TAG and t.name is 'template')
1684                         ins_mode_in_head t
1685                         return
1686                 if t.type is TYPE_START_TAG and t.name is 'body'
1687                         parse_error()
1688                         return if open_els.length < 2
1689                         second = open_els[open_els.length - 2]
1690                         return unless second.namespace is NS_HTML
1691                         return unless second.name is 'body'
1692                         return if template_tag_is_open()
1693                         flag_frameset_ok = false
1694                         for a of t.attrs_a
1695                                 second.attrs[a[0]] = a[1] unless second.attrs[a[0]]?
1696                         return
1697                 if t.type is TYPE_START_TAG and t.name is 'frameset'
1698                         parse_error()
1699                         return if open_els.length < 2
1700                         second_i = open_els.length - 2
1701                         second = open_els[second_i]
1702                         return unless second.namespace is NS_HTML
1703                         return unless second.name is 'body'
1704                         if flag_frameset_ok is false
1705                                 return
1706                         if second.parent?
1707                                 for el, i in second.parent.children
1708                                         if el is second
1709                                                 second.parent.children.splice i, 1
1710                                                 break
1711                         open_els.splice second_i, 1
1712                         # pop everything except the "root html element"
1713                         while open_els.length > 1
1714                                 open_els.shift()
1715                         insert_html_element t
1716                         ins_mode = ins_mode_in_frameset
1717                         return
1718                 if t.type is TYPE_EOF
1719                         ok_tags = {
1720                                 dd:NS_HTML, dt:NS_HTML, li:NS_HTML, p:NS_HTML, tbody:NS_HTML,
1721                                 td:NS_HTML, tfoot:NS_HTML, th:NS_HTML, thead:NS_HTML,
1722                                 tr:NS_HTML, body:NS_HTML, html:NS_HTML,
1723                         }
1724                         for el in open_els
1725                                 unless ok_tags[t.name] is el.namespace
1726                                         parse_error()
1727                                         break
1728                         if template_ins_modes.length > 0
1729                                 ins_mode_in_template t
1730                         else
1731                                 stop_parsing()
1732                         return
1733                 if t.type is TYPE_END_TAG and t.name is 'body'
1734                         unless is_in_scope 'body', NS_HTML
1735                                 parse_error()
1736                                 return
1737                         ok_tags = {
1738                                 dd:NS_HTML, dt:NS_HTML, li:NS_HTML, optgroup:NS_HTML,
1739                                 option:NS_HTML, p:NS_HTML, rb:NS_HTML, rp:NS_HTML, rt:NS_HTML,
1740                                 rtc:NS_HTML, tbody:NS_HTML, td:NS_HTML, tfoot:NS_HTML,
1741                                 th:NS_HTML, thead:NS_HTML, tr:NS_HTML, body:NS_HTML,
1742                                 html:NS_HTML
1743                         }
1744                         for el in open_els
1745                                 unless ok_tags[t.name] is el.namespace
1746                                         parse_error()
1747                                         break
1748                         ins_mode = ins_mode_after_body
1749                         return
1750                 if t.type is TYPE_END_TAG and t.name is 'html'
1751                         unless is_in_scope 'body', NS_HTML
1752                                 parse_error()
1753                                 return
1754                         ok_tags = {
1755                                 dd:NS_HTML, dt:NS_HTML, li:NS_HTML, optgroup:NS_HTML,
1756                                 option:NS_HTML, p:NS_HTML, rb:NS_HTML, rp:NS_HTML, rt:NS_HTML,
1757                                 rtc:NS_HTML, tbody:NS_HTML, td:NS_HTML, tfoot:NS_HTML,
1758                                 th:NS_HTML, thead:NS_HTML, tr:NS_HTML, body:NS_HTML,
1759                                 html:NS_HTML
1760                         }
1761                         for el in open_els
1762                                 unless ok_tags[t.name] is el.namespace
1763                                         parse_error()
1764                                         break
1765                         ins_mode = ins_mode_after_body
1766                         process_token t
1767                         return
1768                 if t.type is TYPE_START_TAG and (t.name is 'address' or t.name is 'article' or t.name is 'aside' or t.name is 'blockquote' or t.name is 'center' or t.name is 'details' or t.name is 'dialog' or t.name is 'dir' or t.name is 'div' or t.name is 'dl' or t.name is 'fieldset' or t.name is 'figcaption' or t.name is 'figure' or t.name is 'footer' or t.name is 'header' or t.name is 'hgroup' or t.name is 'main' or t.name is 'nav' or t.name is 'ol' or t.name is 'p' or t.name is 'section' or t.name is 'summary' or t.name is 'ul')
1769                         close_p_if_in_button_scope()
1770                         insert_html_element t
1771                         return
1772                 if t.type is TYPE_START_TAG and h_tags[t.name]?
1773                         close_p_if_in_button_scope()
1774                         if h_tags[open_els[0].name] is open_els[0].namespace
1775                                 parse_error()
1776                                 open_els.shift()
1777                         insert_html_element t
1778                         return
1779                 if t.type is TYPE_START_TAG and (t.name is 'pre' or t.name is 'listing')
1780                         close_p_if_in_button_scope()
1781                         insert_html_element t
1782                         # spec: If the next token is a "LF" (U+000A) character token, then
1783                         # ignore that token and move on to the next one. (Newlines at the
1784                         # start of pre blocks are ignored as an authoring convenience.)
1785                         if txt.charAt(cur) is "\u000a" # FIXME check for crlf?
1786                                 cur += 1
1787                         flag_frameset_ok = false
1788                         return
1789                 if t.type is TYPE_START_TAG and t.name is 'form'
1790                         unless form_element_pointer is null or template_tag_is_open()
1791                                 parse_error()
1792                                 return
1793                         close_p_if_in_button_scope()
1794                         el = insert_html_element t
1795                         unless template_tag_is_open()
1796                                 form_element_pointer = el
1797                         return
1798                 if t.type is TYPE_START_TAG and t.name is 'li'
1799                         flag_frameset_ok = false
1800                         for node in open_els
1801                                 if node.name is 'li' and node.namespace is NS_HTML
1802                                         generate_implied_end_tags 'li' # arg is exception
1803                                         if open_els[0].name isnt 'li' or open_els[0].namespace isnt NS_HTML
1804                                                 parse_error()
1805                                         loop
1806                                                 el = open_els.shift()
1807                                                 if el.name is 'li' and el.namespace is NS_HTML
1808                                                         break
1809                                         break
1810                                 if el_is_special_not_adp node
1811                                                 break
1812                         close_p_if_in_button_scope()
1813                         insert_html_element t
1814                         return
1815                 if t.type is TYPE_START_TAG and (t.name is 'dd' or t.name is 'dt')
1816                         flag_frameset_ok = false
1817                         for node in open_els
1818                                 if node.name is 'dd' and node.namespace is NS_HTML
1819                                         generate_implied_end_tags 'dd' # arg is exception
1820                                         if open_els[0].name isnt 'dd' or open_els[0].namespace isnt NS_HTML
1821                                                 parse_error()
1822                                         loop
1823                                                 el = open_els.shift()
1824                                                 if el.name is 'dd' and el.namespace is NS_HTML
1825                                                         break
1826                                         break
1827                                 if node.name is 'dt' and node.namespace is NS_HTML
1828                                         generate_implied_end_tags 'dt' # arg is exception
1829                                         if open_els[0].name isnt 'dt' or open_els[0].namespace isnt NS_HTML
1830                                                 parse_error()
1831                                         loop
1832                                                 el = open_els.shift()
1833                                                 if el.name is 'dt' and el.namespace is NS_HTML
1834                                                         break
1835                                         break
1836                                 if el_is_special_not_adp node
1837                                         break
1838                         close_p_if_in_button_scope()
1839                         insert_html_element t
1840                         return
1841                 if t.type is TYPE_START_TAG and t.name is 'plaintext'
1842                         close_p_if_in_button_scope()
1843                         insert_html_element t
1844                         tok_state = tok_state_plaintext
1845                         return
1846                 if t.type is TYPE_START_TAG and t.name is 'button'
1847                         if is_in_scope 'button', NS_HTML
1848                                 parse_error()
1849                                 generate_implied_end_tags()
1850                                 loop
1851                                         el = open_els.shift()
1852                                         if el.name is 'button' and el.namespace is NS_HTML
1853                                                 break
1854                         reconstruct_afe()
1855                         insert_html_element t
1856                         flag_frameset_ok = false
1857                         return
1858                 if t.type is TYPE_END_TAG and (t.name is 'address' or t.name is 'article' or t.name is 'aside' or t.name is 'blockquote' or t.name is 'button' or t.name is 'center' or t.name is 'details' or t.name is 'dialog' or t.name is 'dir' or t.name is 'div' or t.name is 'dl' or t.name is 'fieldset' or t.name is 'figcaption' or t.name is 'figure' or t.name is 'footer' or t.name is 'header' or t.name is 'hgroup' or t.name is 'listing' or t.name is 'main' or t.name is 'nav' or t.name is 'ol' or t.name is 'pre' or t.name is 'section' or t.name is 'summary' or t.name is 'ul')
1859                         unless is_in_scope t.name, NS_HTML
1860                                 parse_error()
1861                                 return
1862                         generate_implied_end_tags()
1863                         unless open_els[0].name is t.name and open_els[0].namespace is NS_HTML
1864                                 parse_error()
1865                         loop
1866                                 el = open_els.shift()
1867                                 if el.name is t.name and el.namespace is NS_HTML
1868                                         return
1869                         return
1870                 if t.type is TYPE_END_TAG and t.name is 'form'
1871                         unless template_tag_is_open()
1872                                 node = form_element_pointer
1873                                 form_element_pointer = null
1874                                 if node is null or not el_is_in_scope node
1875                                         parse_error()
1876                                         return
1877                                 generate_implied_end_tags()
1878                                 if open_els[0] isnt node
1879                                         parse_error()
1880                                 for el, i in open_els
1881                                         if el is node
1882                                                 open_els.splice i, 1
1883                                                 break
1884                         else
1885                                 unless is_in_scope 'form', NS_HTML
1886                                         parse_error()
1887                                         return
1888                                 generate_implied_end_tags()
1889                                 if open_els[0].name isnt 'form' or open_els[0].namespace isnt NS_HTML
1890                                         parse_error()
1891                                 loop
1892                                         el = open_els.shift()
1893                                         if el.name is 'form' and el.namespace is NS_HTML
1894                                                 break
1895                         return
1896                 if t.type is TYPE_END_TAG and t.name is 'p'
1897                         unless is_in_button_scope 'p', NS_HTML
1898                                 parse_error()
1899                                 insert_html_element new_open_tag 'p'
1900                         close_p_element()
1901                         return
1902                 if t.type is TYPE_END_TAG and t.name is 'li'
1903                         unless is_in_li_scope 'li', NS_HTML
1904                                 parse_error()
1905                                 return
1906                         generate_implied_end_tags 'li' # arg is exception
1907                         if open_els[0].name isnt 'li' or open_els[0].namespace isnt NS_HTML
1908                                 parse_error()
1909                         loop
1910                                 el = open_els.shift()
1911                                 if el.name is 'li' and el.namespace is NS_HTML
1912                                         break
1913                         return
1914                 if t.type is TYPE_END_TAG and (t.name is 'dd' or t.name is 'dt')
1915                         unless is_in_scope t.name, NS_HTML
1916                                 parse_error()
1917                                 return
1918                         generate_implied_end_tags t.name # arg is exception
1919                         if open_els[0].name isnt t.name or open_els[0].namespace isnt NS_HTML
1920                                 parse_error()
1921                         loop
1922                                 el = open_els.shift()
1923                                 if el.name is t.name and el.namespace is NS_HTML
1924                                         break
1925                         return
1926                 if t.type is TYPE_END_TAG and h_tags[t.name]?
1927                         h_in_scope = false
1928                         for el in open_els
1929                                 if h_tags[el.name] is el.namespace
1930                                         h_in_scope = true
1931                                         break
1932                                 if standard_scopers[el.name] is el.namespace
1933                                         break
1934                         unless h_in_scope
1935                                 parse_error()
1936                                 return
1937                         generate_implied_end_tags()
1938                         if open_els[0].name isnt t.name or open_els[0].namespace isnt NS_HTML
1939                                 parse_error()
1940                         loop
1941                                 el = open_els.shift()
1942                                 if h_tags[el.name] is el.namespace
1943                                         break
1944                         return
1945                 # deep breath!
1946                 if t.type is TYPE_START_TAG and t.name is 'a'
1947                         # If the list of active formatting elements contains an a element
1948                         # between the end of the list and the last marker on the list (or
1949                         # the start of the list if there is no marker on the list), then
1950                         # this is a parse error; run the adoption agency algorithm for the
1951                         # tag name "a", then remove that element from the list of active
1952                         # formatting elements and the stack of open elements if the
1953                         # adoption agency algorithm didn't already remove it (it might not
1954                         # have if the element is not in table scope).
1955                         found = false
1956                         for el in afe
1957                                 if el.type is TYPE_AFE_MARKER
1958                                         break
1959                                 if el.name is 'a' and el.namespace is NS_HTML
1960                                         found = el
1961                         if found?
1962                                 parse_error()
1963                                 adoption_agency 'a'
1964                                 for el, i in afe
1965                                         if el is found
1966                                                 afe.splice i, 1
1967                                 for el, i in open_els
1968                                         if el is found
1969                                                 open_els.splice i, 1
1970                         reconstruct_afe()
1971                         el = insert_html_element t
1972                         afe_push el
1973                         return
1974                 if t.type is TYPE_START_TAG and (t.name is 'b' or t.name is 'big' or t.name is 'code' or t.name is 'em' or t.name is 'font' or t.name is 'i' or t.name is 's' or t.name is 'small' or t.name is 'strike' or t.name is 'strong' or t.name is 'tt' or t.name is 'u')
1975                         reconstruct_afe()
1976                         el = insert_html_element t
1977                         afe_push el
1978                         return
1979                 if t.type is TYPE_START_TAG and t.name is 'nobr'
1980                         reconstruct_afe()
1981                         el = insert_html_element t
1982                         afe_push el
1983                         return
1984                 if t.type is TYPE_END_TAG and (t.name is 'a' or t.name is 'b' or t.name is 'big' or t.name is 'code' or t.name is 'em' or t.name is 'font' or t.name is 'i' or t.name is 'nobr' or t.name is 's' or t.name is 'small' or t.name is 'strike' or t.name is 'strong' or t.name is 'tt' or t.name is 'u')
1985                         adoption_agency t.name
1986                         return
1987                 if t.type is TYPE_START_TAG and (t.name is 'applet' or t.name is 'marquee' or t.name is 'object')
1988                         reconstruct_afe()
1989                         insert_html_element t
1990                         afe_push_marker()
1991                         flag_frameset_ok = false
1992                         return
1993                 if t.type is TYPE_END_TAG and (t.name is 'applet' or t.name is 'marquee' or t.name is 'object')
1994                         unless is_in_scope t.name, NS_HTML
1995                                 parse_error()
1996                                 return
1997                         generate_implied_end_tags()
1998                         if open_els[0].name isnt t.name or open_els[0].namespace isnt NS_HTML
1999                                 parse_error()
2000                         loop
2001                                 el = open_els.shift()
2002                                 if el.name is t.name and el.namespace is NS_HTML
2003                                         break
2004                         clear_afe_to_marker()
2005                         return
2006                 if t.type is TYPE_START_TAG and t.name is 'table'
2007                         close_p_if_in_button_scope() # fixfull quirksmode thing
2008                         insert_html_element t
2009                         flag_frameset_ok = false
2010                         ins_mode = ins_mode_in_table
2011                         return
2012                 if t.type is TYPE_END_TAG and t.name is 'br'
2013                         parse_error()
2014                         t.type is TYPE_START_TAG
2015                         # fall through
2016                 if t.type is TYPE_START_TAG and (t.name is 'area' or t.name is 'br' or t.name is 'embed' or t.name is 'img' or t.name is 'keygen' or t.name is 'wbr')
2017                         reconstruct_afe()
2018                         insert_html_element t
2019                         open_els.shift()
2020                         t.acknowledge_self_closing()
2021                         flag_frameset_ok = false
2022                         return
2023                 if t.type is TYPE_START_TAG and t.name is 'input'
2024                         reconstruct_afe()
2025                         insert_html_element t
2026                         open_els.shift()
2027                         t.acknowledge_self_closing()
2028                         unless is_input_hidden_tok t
2029                                 flag_frameset_ok = false
2030                         return
2031                 if t.type is TYPE_START_TAG and (t.name is 'param' or t.name is 'source' or t.name is 'track')
2032                         insert_html_element t
2033                         open_els.shift()
2034                         t.acknowledge_self_closing()
2035                         return
2036                 if t.type is TYPE_START_TAG and t.name is 'hr'
2037                         close_p_if_in_button_scope()
2038                         insert_html_element t
2039                         open_els.shift()
2040                         t.acknowledge_self_closing()
2041                         flag_frameset_ok = false
2042                         return
2043                 if t.type is TYPE_START_TAG and t.name is 'image'
2044                         parse_error()
2045                         t.name = 'img'
2046                         process_token t
2047                         return
2048                 if t.type is TYPE_START_TAG and t.name is 'isindex'
2049                         parse_error()
2050                         if template_tag_is_open() is false and form_element_pointer isnt null
2051                                 return
2052                         t.acknowledge_self_closing()
2053                         flag_frameset_ok = false
2054                         close_p_if_in_button_scope()
2055                         el = insert_html_element new_open_tag 'form'
2056                         unless template_tag_is_open()
2057                                 form_element_pointer = el
2058                         for a in t.attrs_a
2059                                 if a[0] is 'action'
2060                                         el.attrs['action'] = a[1]
2061                                         break
2062                         insert_html_element new_open_tag 'hr'
2063                         open_els.shift()
2064                         reconstruct_afe()
2065                         insert_html_element new_open_tag 'label'
2066                         # note: this is a little out-of-spec-order so we only have to scan t.attrs_a once
2067                         input_el = new_open_tag 'input'
2068                         prompt = null
2069                         for a in t.attrs_a
2070                                 if a[0] is 'prompt'
2071                                         prompt = a[1]
2072                                 if a[0] isnt 'name' and a[0] isnt 'action' and a[0] isnt 'prompt'
2073                                         input_el.attrs_a.push [a[0], a[1]]
2074                         input_el.attrs_a.push ['name', 'isindex']
2075                         # fixfull this next bit is in english... internationalize?
2076                         prompt ?= "This is a searchable index. Enter search keywords: "
2077                         insert_character new_character_token prompt # fixfull split
2078                         # TODO submit typo "balue" in spec
2079                         insert_html_element input_el
2080                         open_els.shift()
2081                         # insert_character '' # you can put chars here if promt attr missing
2082                         open_els.shift()
2083                         insert_html_element new_open_tag 'hr'
2084                         open_els.shift()
2085                         open_els.shift()
2086                         unless template_tag_is_open()
2087                                 form_element_pointer = null
2088                         return
2089                 if t.type is TYPE_START_TAG and t.name is 'textarea'
2090                         insert_html_element t
2091                         if txt.charAt(cur) is "\u000a" # FIXME check for crlf?
2092                                 cur += 1
2093                         tok_state = tok_state_rcdata
2094                         original_ins_mode = ins_mode
2095                         flag_frameset_ok = false
2096                         ins_mode = ins_mode_text
2097                         return
2098                 if t.type is TYPE_START_TAG and t.name is 'xmp'
2099                         close_p_if_in_button_scope()
2100                         reconstruct_afe()
2101                         flag_frameset_ok = false
2102                         parse_generic_raw_text t
2103                         return
2104                 if t.type is TYPE_START_TAG and t.name is 'iframe'
2105                         flag_frameset_ok = false
2106                         parse_generic_raw_text t
2107                         return
2108                 if t.type is TYPE_START_TAG and (t.name is 'noembed' or (t.name is 'noscript' and flag_scripting))
2109                         parse_generic_raw_text t
2110                         return
2111                 if t.type is TYPE_START_TAG and t.name is 'select'
2112                         reconstruct_afe()
2113                         insert_html_element t
2114                         flag_frameset_ok = false
2115                         if ins_mode is ins_mode_in_table or ins_mode is ins_mode_in_caption or ins_mode is ins_mode_in_table_body or ins_mode is ins_mode_in_row or ins_mode is ins_mode_in_cell
2116                                 ins_mode = ins_mode_in_select_in_table
2117                         else
2118                                 ins_mode = ins_mode_in_select
2119                         return
2120                 if t.type is TYPE_START_TAG and (t.name is 'optgroup' or t.name is 'option')
2121                         if open_els[0].name is 'option' and open_els[0].namespace is NS_HTML
2122                                 open_els.shift()
2123                         reconstruct_afe()
2124                         insert_html_element t
2125                         return
2126 # this comment block implements the W3C spec
2127 #               if t.type is TYPE_START_TAG and (t.name is 'rb' or t.name is 'rp' or t.name is 'rtc')
2128 #                       if is_in_scope 'ruby', NS_HTML
2129 #                               generate_implied_end_tags()
2130 #                               unless open_els[0].name is 'ruby' and open_els[0].namespace is NS_HTML
2131 #                                       parse_error()
2132 #                       insert_html_element t
2133 #                       return
2134 #               if t.type is TYPE_START_TAG and t.name is 'rt'
2135 #                       if is_in_scope 'ruby', NS_HTML
2136 #                               generate_implied_end_tags 'rtc' # arg is exception
2137 #                               unless (open_els[0].name is 'ruby' or open_els[0].name is 'rtc') and open_els[0].namespace is NS_HTML
2138 #                                       parse_error()
2139 #                       insert_html_element t
2140 #                       return
2141 # below implements the WATWG spec https://html.spec.whatwg.org/multipage/syntax.html#parsing-main-inbody
2142                 if t.type is TYPE_START_TAG and (t.name is 'rb' or t.name is 'rtc')
2143                         if is_in_scope 'ruby', NS_HTML
2144                                 generate_implied_end_tags()
2145                                 unless open_els[0].name is 'ruby' and open_els[0].namespace is NS_HTML
2146                                         parse_error()
2147                         insert_html_element t
2148                         return
2149                 if t.type is TYPE_START_TAG and (t.name is 'rp' or t.name is 'rt')
2150                         if is_in_scope 'ruby', NS_HTML
2151                                 generate_implied_end_tags 'rtc'
2152                                 unless (open_els[0].name is 'ruby' or open_els[0].name is 'rtc') and open_els[0].namespace is NS_HTML
2153                                         parse_error()
2154                         insert_html_element t
2155                         return
2156 # end WATWG chunk
2157                 if t.type is TYPE_START_TAG and t.name is 'math'
2158                         reconstruct_afe()
2159                         adjust_mathml_attributes t
2160                         adjust_foreign_attributes t
2161                         insert_foreign_element t, NS_MATHML
2162                         if t.flag 'self-closing'
2163                                 open_els.shift()
2164                                 t.acknowledge_self_closing()
2165                         return
2166                 if t.type is TYPE_START_TAG and t.name is 'svg'
2167                         reconstruct_afe()
2168                         adjust_svg_attributes t
2169                         adjust_foreign_attributes t
2170                         insert_foreign_element t, NS_SVG
2171                         if t.flag 'self-closing'
2172                                 open_els.shift()
2173                                 t.acknowledge_self_closing()
2174                         return
2175                 if t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'frame' or t.name is 'head' or t.name is 'tbody' or t.name is 'td' or t.name is 'tfoot' or t.name is 'th' or t.name is 'thead' or t.name is 'tr')
2176                         parse_error()
2177                         return
2178                 if t.type is TYPE_START_TAG # any other start tag
2179                         reconstruct_afe()
2180                         insert_html_element t
2181                         return
2182                 if t.type is TYPE_END_TAG # any other end tag
2183                         in_body_any_other_end_tag t.name
2184                         return
2185                 return
2186
2187         # 8.2.5.4.8 http://www.w3.org/TR/html5/syntax.html#parsing-main-incdata
2188         ins_mode_text = (t) ->
2189                 if t.type is TYPE_TEXT
2190                         insert_character t
2191                         return
2192                 if t.type is TYPE_EOF
2193                         parse_error()
2194                         if open_els[0].name is 'script' and open_els[0].namespace is NS_HTML
2195                                 open_els[0].flag 'already started', true
2196                         open_els.shift()
2197                         ins_mode = original_ins_mode
2198                         process_token t
2199                         return
2200                 if t.type is TYPE_END_TAG and t.name is 'script'
2201                         open_els.shift()
2202                         ins_mode = original_ins_mode
2203                         # fixfull the spec seems to assume that I'm going to run the script
2204                         # http://www.w3.org/TR/html5/syntax.html#scriptEndTag
2205                         return
2206                 if t.type is TYPE_END_TAG
2207                         open_els.shift()
2208                         ins_mode = original_ins_mode
2209                         return
2210                 console.log 'warning: end of ins_mode_text reached'
2211
2212         # the functions below implement the tokenizer stats described here:
2213         # http://www.w3.org/TR/html5/syntax.html#tokenization
2214
2215         # 8.2.5.4.9 http://www.w3.org/TR/html5/syntax.html#parsing-main-intable
2216         ins_mode_in_table_else = (t) ->
2217                 parse_error()
2218                 flag_foster_parenting = true
2219                 ins_mode_in_body t
2220                 flag_foster_parenting = false
2221                 return
2222         ins_mode_in_table = (t) ->
2223                 switch t.type
2224                         when TYPE_TEXT
2225                                 if (open_els[0].name is 'table' or open_els[0].name is 'tbody' or open_els[0].name is 'tfoot' or open_els[0].name is 'thead' or open_els[0].name is 'tr') and open_els[0].namespace is NS_HTML
2226                                         pending_table_character_tokens = []
2227                                         original_ins_mode = ins_mode
2228                                         ins_mode = ins_mode_in_table_text
2229                                         process_token t
2230                                 else
2231                                         ins_mode_in_table_else t
2232                         when TYPE_COMMENT
2233                                 insert_comment t
2234                         when TYPE_DOCTYPE
2235                                 parse_error()
2236                         when TYPE_START_TAG
2237                                 switch t.name
2238                                         when 'caption'
2239                                                 clear_stack_to_table_context()
2240                                                 afe_push_marker()
2241                                                 insert_html_element t
2242                                                 ins_mode = ins_mode_in_caption
2243                                         when 'colgroup'
2244                                                 clear_stack_to_table_context()
2245                                                 insert_html_element t
2246                                                 ins_mode = ins_mode_in_column_group
2247                                         when 'col'
2248                                                 clear_stack_to_table_context()
2249                                                 insert_html_element new_open_tag 'colgroup'
2250                                                 ins_mode = ins_mode_in_column_group
2251                                                 process_token t
2252                                         when 'tbody', 'tfoot', 'thead'
2253                                                 clear_stack_to_table_context()
2254                                                 insert_html_element t
2255                                                 ins_mode = ins_mode_in_table_body
2256                                         when 'td', 'th', 'tr'
2257                                                 clear_stack_to_table_context()
2258                                                 insert_html_element new_open_tag 'tbody'
2259                                                 ins_mode = ins_mode_in_table_body
2260                                                 process_token t
2261                                         when 'table'
2262                                                 parse_error()
2263                                                 if is_in_table_scope 'table', NS_HTML
2264                                                         loop
2265                                                                 el = open_els.shift()
2266                                                                 if el.name is 'table' and el.namespace is NS_HTML
2267                                                                         break
2268                                                         reset_ins_mode()
2269                                                         process_token t
2270                                         when 'style', 'script', 'template'
2271                                                 ins_mode_in_head t
2272                                         when 'input'
2273                                                 unless is_input_hidden_tok t
2274                                                         ins_mode_in_table_else t
2275                                                 else
2276                                                         parse_error()
2277                                                         el = insert_html_element t
2278                                                         open_els.shift()
2279                                                         t.acknowledge_self_closing()
2280                                         when 'form'
2281                                                 parse_error()
2282                                                 if form_element_pointer?
2283                                                         return
2284                                                 if template_tag_is_open()
2285                                                         return
2286                                                 form_element_pointer = insert_html_element t
2287                                                 open_els.shift()
2288                                         else
2289                                                 ins_mode_in_table_else t
2290                         when TYPE_END_TAG
2291                                 switch t.name
2292                                         when 'table'
2293                                                 if is_in_table_scope 'table', NS_HTML
2294                                                         loop
2295                                                                 el = open_els.shift()
2296                                                                 if el.name is 'table' and el.namespace is NS_HTML
2297                                                                         break
2298                                                         reset_ins_mode()
2299                                                 else
2300                                                         parse_error()
2301                                         when 'body', 'caption', 'col', 'colgroup', 'html', 'tbody', 'td', 'tfoot', 'th', 'thead', 'tr'
2302                                                 parse_error()
2303                                         when 'template'
2304                                                 ins_mode_in_head t
2305                                         else
2306                                                 ins_mode_in_table_else t
2307                         when TYPE_EOF
2308                                 ins_mode_in_body t
2309                         else
2310                                 ins_mode_in_table_else t
2311
2312
2313         # 8.2.5.4.10 http://www.w3.org/TR/html5/syntax.html#parsing-main-intabletext
2314         ins_mode_in_table_text = (t) ->
2315                 if t.type is TYPE_TEXT and t.text is "\u0000"
2316                         # from javascript?
2317                         parse_error()
2318                         return
2319                 if t.type is TYPE_TEXT
2320                         pending_table_character_tokens.push t
2321                         return
2322                 # Anything else
2323                 all_space = true
2324                 for old in pending_table_character_tokens
2325                         unless is_space_tok old
2326                                 all_space = false
2327                                 break
2328                 if all_space
2329                         for old in pending_table_character_tokens
2330                                 insert_character old
2331                 else
2332                         for old in pending_table_character_tokens
2333                                 ins_mode_in_table_else old
2334                 pending_table_character_tokens = []
2335                 ins_mode = original_ins_mode
2336                 process_token t
2337
2338         # 8.2.5.4.11 http://www.w3.org/TR/html5/syntax.html#parsing-main-incaption
2339         ins_mode_in_caption = (t) ->
2340                 if t.type is TYPE_END_TAG and t.name is 'caption'
2341                         if is_in_table_scope 'caption', NS_HTML
2342                                 generate_implied_end_tags()
2343                                 if open_els[0].name isnt 'caption'
2344                                         parse_error()
2345                                 loop
2346                                         el = open_els.shift()
2347                                         if el.name is 'caption' and el.namespace is NS_HTML
2348                                                 break
2349                                 clear_afe_to_marker()
2350                                 ins_mode = ins_mode_in_table
2351                         else
2352                                 parse_error()
2353                                 # fragment case
2354                         return
2355                 if (t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'tbody' or t.name is 'td' or t.name is 'tfoot' or t.name is 'th' or t.name is 'thead' or t.name is 'tr')) or t.type is TYPE_END_TAG and t.name is 'table'
2356                         parse_error()
2357                         if is_in_table_scope 'caption', NS_HTML
2358                                 loop
2359                                         el = open_els.shift()
2360                                         if el.name is 'caption' and el.namespace is NS_HTML
2361                                                 break
2362                                 clear_afe_to_marker()
2363                                 ins_mode = ins_mode_in_table
2364                                 process_token t
2365                         # else fragment case
2366                         return
2367                 if t.type is TYPE_END_TAG and (t.name is 'body' or t.name is 'col' or t.name is 'colgroup' or t.name is 'html' or t.name is 'tbody' or t.name is 'td' or t.name is 'tfoot' or t.name is 'th' or t.name is 'thead' or t.name is 'tr')
2368                         parse_error()
2369                         return
2370                 # Anything else
2371                 ins_mode_in_body t
2372
2373         # 8.2.5.4.12 http://www.w3.org/TR/html5/syntax.html#parsing-main-incolgroup
2374         ins_mode_in_column_group = (t) ->
2375                 if is_space_tok t
2376                         insert_character t
2377                         return
2378                 if t.type is TYPE_COMMENT
2379                         insert_comment t
2380                         return
2381                 if t.type is TYPE_DOCTYPE
2382                         parse_error()
2383                         return
2384                 if t.type is TYPE_START_TAG and t.name is 'html'
2385                         ins_mode_in_body t
2386                         return
2387                 if t.type is TYPE_START_TAG and t.name is 'col'
2388                         el = insert_html_element t
2389                         open_els.shift()
2390                         t.acknowledge_self_closing()
2391                         return
2392                 if t.type is TYPE_END_TAG and t.name is 'colgroup'
2393                         if open_els[0].name is 'colgroup' and open_els.namespace is NS_HTML
2394                                 open_els.shift()
2395                                 ins_mode = ins_mode_in_table
2396                         else
2397                                 parse_error()
2398                         return
2399                 if t.type is TYPE_END_TAG and t.name is 'col'
2400                         parse_error()
2401                         return
2402                 if (t.type is TYPE_START_TAG or t.type is TYPE_END_TAG) and t.name is 'template'
2403                         ins_mode_in_head t
2404                         return
2405                 if t.type is TYPE_EOF
2406                         ins_mode_in_body t
2407                         return
2408                 # Anything else
2409                 if open_els[0].name isnt 'colgroup'
2410                         parse_error()
2411                         return
2412                 open_els.shift()
2413                 ins_mode = ins_mode_in_table
2414                 process_token t
2415                 return
2416
2417         # 8.2.5.4.13 http://www.w3.org/TR/html5/syntax.html#parsing-main-intbody
2418         ins_mode_in_table_body = (t) ->
2419                 if t.type is TYPE_START_TAG and t.name is 'tr'
2420                         clear_stack_to_table_body_context()
2421                         insert_html_element t
2422                         ins_mode = ins_mode_in_row
2423                         return
2424                 if t.type is TYPE_START_TAG and (t.name is 'th' or t.name is 'td')
2425                         parse_error()
2426                         clear_stack_to_table_body_context()
2427                         insert_html_element new_open_tag 'tr'
2428                         ins_mode = ins_mode_in_row
2429                         process_token t
2430                         return
2431                 if t.type is TYPE_END_TAG and (t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead')
2432                         unless is_in_table_scope t.name, NS_HTML
2433                                 parse_error()
2434                                 return
2435                         clear_stack_to_table_body_context()
2436                         open_els.shift()
2437                         ins_mode = ins_mode_in_table
2438                         return
2439                 if (t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead')) or (t.type is TYPE_END_TAG and t.name is 'table')
2440                         has = false
2441                         for el in open_els
2442                                 if el.namespace is NS_HTML and (el.name is 'tbody' or el.name is 'tfoot' or el.name is 'thead')
2443                                         has = true
2444                                         break
2445                                 if table_scopers[el.name] is el.namespace
2446                                         break
2447                         if !has
2448                                 parse_error()
2449                                 return
2450                         clear_stack_to_table_body_context()
2451                         open_els.shift()
2452                         ins_mode = ins_mode_in_table
2453                         process_token t
2454                         return
2455                 if t.type is TYPE_END_TAG and (t.name is 'body' or t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'html' or t.name is 'td' or t.name is 'th' or t.name is 'tr')
2456                         parse_error()
2457                         return
2458                 # Anything else
2459                 ins_mode_in_table t
2460
2461         # 8.2.5.4.14 http://www.w3.org/TR/html5/syntax.html#parsing-main-intr
2462         ins_mode_in_row = (t) ->
2463                 if t.type is TYPE_START_TAG and (t.name is 'th' or t.name is 'td')
2464                         clear_stack_to_table_row_context()
2465                         insert_html_element t
2466                         ins_mode = ins_mode_in_cell
2467                         afe_push_marker()
2468                         return
2469                 if t.type is TYPE_END_TAG and t.name is 'tr'
2470                         if is_in_table_scope 'tr', NS_HTML
2471                                 clear_stack_to_table_row_context()
2472                                 open_els.shift()
2473                                 ins_mode = ins_mode_in_table_body
2474                         else
2475                                 parse_error()
2476                         return
2477                 if (t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead' or t.name is 'tr')) or t.type is TYPE_END_TAG and t.name is 'table'
2478                         if is_in_table_scope 'tr', NS_HTML
2479                                 clear_stack_to_table_row_context()
2480                                 open_els.shift()
2481                                 ins_mode = ins_mode_in_table_body
2482                                 process_token t
2483                         else
2484                                 parse_error()
2485                         return
2486                 if t.type is TYPE_END_TAG and (t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead')
2487                         if is_in_table_scope t.name, NS_HTML
2488                                 if is_in_table_scope 'tr', NS_HTML
2489                                         clear_stack_to_table_row_context()
2490                                         open_els.shift()
2491                                         ins_mode = ins_mode_in_table_body
2492                                         process_token t
2493                         else
2494                                 parse_error()
2495                         return
2496                 if t.type is TYPE_END_TAG and (t.name is 'body' or t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'html' or t.name is 'td' or t.name is 'th')
2497                         parse_error()
2498                         return
2499                 # Anything else
2500                 ins_mode_in_table t
2501
2502         # http://www.w3.org/TR/html5/syntax.html#close-the-cell
2503         close_the_cell = ->
2504                 generate_implied_end_tags()
2505                 unless (open_els[0].name is 'td' or open_els[0] is 'th') and open_els[0].namespace is NS_HTML
2506                         parse_error()
2507                 loop
2508                         el = open_els.shift()
2509                         if el.namespace is NS_HTML and (el.name is 'td' or el.name is 'th')
2510                                 break
2511                 clear_afe_to_marker()
2512                 ins_mode = ins_mode_in_row
2513
2514         # 8.2.5.4.15 http://www.w3.org/TR/html5/syntax.html#parsing-main-intd
2515         ins_mode_in_cell = (t) ->
2516                 if t.type is TYPE_END_TAG and (t.name is 'td' or t.name is 'th')
2517                         if is_in_table_scope t.name, NS_HTML
2518                                 generate_implied_end_tags()
2519                                 unless (open_els[0].name is t.name) and open_els[0].namespace is NS_HTML
2520                                         parse_error()
2521                                 loop
2522                                         el = open_els.shift()
2523                                         if el.name is t.name and el.namespace is NS_HTML
2524                                                 break
2525                                 clear_afe_to_marker()
2526                                 ins_mode = ins_mode_in_row
2527                         else
2528                                 parse_error()
2529                         return
2530                 if t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'tbody' or t.name is 'td' or t.name is 'tfoot' or t.name is 'th' or t.name is 'thead' or t.name is 'tr')
2531                         has = false
2532                         for el in open_els
2533                                 if el.namespace is NS_HTML and (el.name is 'td' or el.name is 'th')
2534                                         has = true
2535                                         break
2536                                 if table_scopers[el.name] is el.namespace
2537                                         break
2538                         if !has
2539                                 parse_error()
2540                                 return
2541                         close_the_cell()
2542                         process_token t
2543                         return
2544                 if t.type is TYPE_END_TAG and (t.name is 'body' or t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'html')
2545                         parse_error()
2546                         return
2547                 if t.type is TYPE_END_TAG and (t.name is 'table' or t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead' or t.name is 'tr')
2548                         if is_in_table_scope t.name, NS_HTML
2549                                 close_the_cell()
2550                                 process_token t
2551                         else
2552                                 parse_error()
2553                         return
2554                 # Anything Else
2555                 ins_mode_in_body t
2556
2557         # 8.2.5.4.16 http://www.w3.org/TR/html5/syntax.html#parsing-main-inselect
2558         ins_mode_in_select = (t) ->
2559                 if t.type is TYPE_TEXT and t.text is "\u0000"
2560                         parse_error()
2561                         return
2562                 if t.type is TYPE_TEXT
2563                         insert_character t
2564                         return
2565                 if t.type is TYPE_COMMENT
2566                         insert_comment t
2567                         return
2568                 if t.type is TYPE_DOCTYPE
2569                         parse_error()
2570                         return
2571                 if t.type is TYPE_START_TAG and t.name is 'html'
2572                         ins_mode_in_body t
2573                         return
2574                 if t.type is TYPE_START_TAG and t.name is 'option'
2575                         if open_els[0].name is 'option' and open_els[0].namespace is NS_HTML
2576                                 open_els.shift()
2577                         insert_html_element t
2578                         return
2579                 if t.type is TYPE_START_TAG and t.name is 'optgroup'
2580                         if open_els[0].name is 'option' and open_els[0].namespace is NS_HTML
2581                                 open_els.shift()
2582                         if open_els[0].name is 'optgroup' and open_els[0].namespace is NS_HTML
2583                                 open_els.shift()
2584                         insert_html_element t
2585                         return
2586                 if t.type is TYPE_END_TAG and t.name is 'optgroup'
2587                         if open_els[0].name is 'option' and open_els[0].namespace in NS_HTML
2588                                 if open_els[1].name is 'optgroup' and open_els[0].namespace is NS_HTML
2589                                         open_els.shift()
2590                         if open_els[0].name is 'optgroup' and open_els[0].namespace is NS_HTML
2591                                 open_els.shift()
2592                         else
2593                                 parse_error()
2594                         return
2595                 if t.type is TYPE_END_TAG and t.name is 'option'
2596                         if open_els[0].name is 'option' and open_els[0].namespace is NS_HTML
2597                                 open_els.shift()
2598                         else
2599                                 parse_error()
2600                         return
2601                 if t.type is TYPE_END_TAG and t.name is 'select'
2602                         if is_in_select_scope 'select', NS_HTML
2603                                 loop
2604                                         el = open_els.shift()
2605                                         if el.name is 'select' and el.namespace is NS_HTML
2606                                                 break
2607                                 reset_ins_mode()
2608                         else
2609                                 parse_error()
2610                         return
2611                 if t.type is TYPE_START_TAG and t.name is 'select'
2612                         parse_error()
2613                         loop
2614                                 el = open_els.shift()
2615                                 if el.name is 'select' and el.namespace is NS_HTML
2616                                         break
2617                         reset_ins_mode()
2618                         # spec says that this is the same as </select> but it doesn't say
2619                         # to check scope first
2620                         return
2621                 if t.type is TYPE_START_TAG and (t.name is 'input' or t.name is 'keygen' or t.name is 'textarea')
2622                         parse_error()
2623                         if is_in_select_scope 'select', NS_HTML
2624                                 return
2625                         loop
2626                                 el = open_els.shift()
2627                                 if el.name is 'select' and el.namespace is NS_HTML
2628                                         break
2629                         reset_ins_mode()
2630                         process_token t
2631                         return
2632                 if t.type is TYPE_START_TAG and (t.name is 'script' or t.name is 'template')
2633                         ins_mode_in_head t
2634                         return
2635                 if t.type is TYPE_EOF
2636                         ins_mode_in_body t
2637                         return
2638                 # Anything else
2639                 parse_error()
2640                 return
2641
2642         # 8.2.5.4.17 http://www.w3.org/TR/html5/syntax.html#parsing-main-inselectintable
2643         ins_mode_in_select_in_table = (t) ->
2644                 if t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'table' or t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead' or t.name is 'tr' or t.name is 'td' or t.name is 'th')
2645                         parse_error()
2646                         loop
2647                                 el = open_els.shift()
2648                                 if el.name is 'select' and el.namespace is NS_HTML
2649                                         break
2650                         reset_ins_mode()
2651                         process_token t
2652                         return
2653                 if t.type is TYPE_END_TAG and (t.name is 'caption' or t.name is 'table' or t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead' or t.name is 'tr' or t.name is 'td' or t.name is 'th')
2654                         parse_error()
2655                         unless is_in_table_scope t.name, NS_HTML
2656                                 return
2657                         loop
2658                                 el = open_els.shift()
2659                                 if el.name is 'select' and el.namespace is NS_HTML
2660                                         break
2661                         reset_ins_mode()
2662                         process_token t
2663                         return
2664                 # Anything else
2665                 ins_mode_in_select t
2666                 return
2667
2668         # 8.2.5.4.18 http://www.w3.org/TR/html5/syntax.html#parsing-main-intemplate
2669         ins_mode_in_template = (t) ->
2670                 if t.type is TYPE_TEXT or t.type is TYPE_COMMENT or t.type is TYPE_DOCTYPE
2671                         ins_mode_in_body t
2672                         return
2673                 if (t.type is TYPE_START_TAG and (t.name is 'base' or t.name is 'basefont' or t.name is 'bgsound' or t.name is 'link' or t.name is 'meta' or t.name is 'noframes' or t.name is 'script' or t.name is 'style' or t.name is 'template' or t.name is 'title')) or (t.type is TYPE_END_TAG and t.name is 'template')
2674                         ins_mode_in_head t
2675                         return
2676                 if t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'colgroup' or t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead')
2677                         template_ins_modes.shift()
2678                         template_ins_modes.unshift ins_mode_in_table
2679                         ins_mode = ins_mode_in_table
2680                         process_token t
2681                         return
2682                 if t.type is TYPE_START_TAG and t.name is 'col'
2683                         template_ins_modes.shift()
2684                         template_ins_modes.unshift ins_mode_in_column_group
2685                         ins_mode = ins_mode_in_column_group
2686                         process_token t
2687                         return
2688                 if t.type is TYPE_START_TAG and t.name is 'tr'
2689                         template_ins_modes.shift()
2690                         template_ins_modes.unshift ins_mode_in_table_body
2691                         ins_mode = ins_mode_in_table_body
2692                         process_token t
2693                         return
2694                 if t.type is TYPE_START_TAG and (t.name is 'td' or t.name is 'th')
2695                         template_ins_modes.shift()
2696                         template_ins_modes.unshift ins_mode_in_row
2697                         ins_mode = ins_mode_in_row
2698                         process_token t
2699                         return
2700                 if t.type is TYPE_START_TAG
2701                         template_ins_modes.shift()
2702                         template_ins_modes.unshift ins_mode_in_body
2703                         ins_mode = ins_mode_in_body
2704                         process_token t
2705                         return
2706                 if t.type is TYPE_END_TAG
2707                         parse_error()
2708                         return
2709                 if t.type is TYPE_EOF
2710                         unless template_tag_is_open()
2711                                 stop_parsing()
2712                                 return
2713                         parse_error()
2714                         loop
2715                                 el = open_els.shift()
2716                                 if el.name is 'template' and el.namespace is NS_HTML
2717                                         break
2718                         clear_afe_to_marker()
2719                         template_ins_modes.shift()
2720                         reset_ins_mode()
2721                         process_token t
2722
2723         # 8.2.5.4.19 http://www.w3.org/TR/html5/syntax.html#parsing-main-afterbody
2724         ins_mode_after_body = (t) ->
2725                 if is_space_tok t
2726                         ins_mode_in_body t
2727                         return
2728                 if t.type is TYPE_COMMENT
2729                         insert_comment t, [open_els[0], open_els[0].children.length]
2730                         return
2731                 if t.type is TYPE_DOCTYPE
2732                         parse_error()
2733                         return
2734                 if t.type is TYPE_START_TAG and t.name is 'html'
2735                         ins_mode_in_body t
2736                         return
2737                 if t.type is TYPE_END_TAG and t.name is 'html'
2738                         if flag_fragment_parsing
2739                                 parse_error()
2740                                 return
2741                         ins_mode = ins_mode_after_after_body
2742                         return
2743                 if t.type is TYPE_EOF
2744                         stop_parsing()
2745                         return
2746                 # Anything ELse
2747                 parse_error()
2748                 ins_mode = ins_mode_in_body
2749                 process_token t
2750
2751         # 8.2.5.4.20 http://www.w3.org/TR/html5/syntax.html#parsing-main-inframeset
2752         ins_mode_in_frameset = (t) ->
2753                 if is_space_tok t
2754                         insert_character t
2755                         return
2756                 if t.type is TYPE_COMMENT
2757                         insert_comment t
2758                         return
2759                 if t.type is TYPE_DOCTYPE
2760                         parse_error()
2761                         return
2762                 if t.type is TYPE_START_TAG and t.name is 'html'
2763                         ins_mode_in_body t
2764                         return
2765                 if t.type is TYPE_START_TAG and t.name is 'frameset'
2766                         insert_html_element t
2767                         return
2768                 if t.type is TYPE_END_TAG and t.name is 'frameset'
2769                         if open_els.length is 1
2770                                 parse_error()
2771                                 return # fragment case
2772                         open_els.shift()
2773                         if flag_fragment_parsing is false and open_els[0].name isnt 'frameset'
2774                                 ins_mode = ins_mode_after_frameset
2775                         return
2776                 if t.type is TYPE_START_TAG and t.name is 'frame'
2777                         insert_html_element t
2778                         open_els.shift()
2779                         t.acknowledge_self_closing()
2780                         return
2781                 if t.type is TYPE_START_TAG and t.name is 'noframes'
2782                         ins_mode_in_head t
2783                         return
2784                 if t.type is TYPE_EOF
2785                         if open_els.length isnt 1
2786                                 parse_error()
2787                         stop_parsing()
2788                         return
2789                 # Anything else
2790                 parse_error()
2791                 return
2792
2793         # 8.2.5.4.21 http://www.w3.org/TR/html5/syntax.html#parsing-main-afterframeset
2794         ins_mode_after_frameset = (t) ->
2795                 if is_space_tok t
2796                         insert_character t
2797                         return
2798                 if t.type is TYPE_COMMENT
2799                         insert_comment t
2800                         return
2801                 if t.type is TYPE_DOCTYPE
2802                         parse_error()
2803                         return
2804                 if t.type is TYPE_START_TAG and t.name is 'html'
2805                         ins_mode_in_body t
2806                         return
2807                 if t.type is TYPE_END_TAG and t.name is 'html'
2808                         insert_mode = ins_mode_after_after_frameset
2809                         return
2810                 if t.type is TYPE_START_TAG and t.name is 'noframes'
2811                         ins_mode_in_head t
2812                         return
2813                 if t.type is TYPE_EOF
2814                         stop_parsing()
2815                         return
2816                 # Anything else
2817                 parse_error()
2818                 return
2819
2820         # 8.2.5.4.22 http://www.w3.org/TR/html5/syntax.html#the-after-after-body-insertion-mode
2821         ins_mode_after_after_body = (t) ->
2822                 if t.type is TYPE_COMMENT
2823                         insert_comment t, [doc, doc.children.length]
2824                         return
2825                 if t.type is TYPE_DOCTYPE or is_space_tok(t) or (t.type is TYPE_START_TAG and t.name is 'html')
2826                         ins_mode_in_body t
2827                         return
2828                 if t.type is TYPE_EOF
2829                         stop_parsing()
2830                         return
2831                 # Anything else
2832                 parse_error()
2833                 ins_mode = ins_mode_in_body
2834                 process_token t
2835                 return
2836
2837         # 8.2.5.4.23 http://www.w3.org/TR/html5/syntax.html#the-after-after-frameset-insertion-mode
2838         ins_mode_after_after_frameset = (t) ->
2839                 if t.type is TYPE_COMMENT
2840                         insert_comment t, [doc, doc.children.length]
2841                         return
2842                 if t.type is TYPE_DOCTYPE or is_space_tok(t) or (t.type is TYPE_START_TAG and t.name is 'html')
2843                         ins_mode_in_body t
2844                         return
2845                 if t.type is TYPE_EOF
2846                         stop_parsing()
2847                         return
2848                 if t.type is TYPE_START_TAG and t.name is 'noframes'
2849                         ins_mode_in_head t
2850                         return
2851                 # Anything else
2852                 parse_error()
2853                 return
2854
2855         # 8.2.5.5 http://www.w3.org/TR/html5/syntax.html#parsing-main-inforeign
2856         has_color_face_or_size = (t) ->
2857                 for a in t.attrs_a
2858                         if a[0] is 'color' or a[0] is 'face' or a[0] is 'size'
2859                                 return true
2860                 return false
2861         in_foreign_content_end_script = ->
2862                 open_els.shift()
2863                 # fixfull
2864                 return
2865         in_foreign_content_other_start = (t) ->
2866                 acn = adjusted_current_node()
2867                 if acn.namespace is NS_MATHML
2868                         adjust_mathml_attributes t
2869                 if acn.namespace is NS_SVG and svg_name_fixes[t.name]?
2870                         t.name = svg_name_fixes[t.name]
2871                 if acn.namespace is NS_SVG
2872                         adjust_svg_attributes t
2873                 adjust_foreign_attributes t
2874                 insert_foreign_element t, acn.namespace
2875                 if t.flag 'self-closing' # FIXME CONTINUE this isn't getting set
2876                         if t.name is 'script'
2877                                 t.acknowledge_self_closing()
2878                                 in_foreign_content_end_script()
2879                                 # fixfull
2880                         else
2881                                 open_els.shift()
2882                                 t.acknowledge_self_closing()
2883                 return
2884         in_foreign_content = (t) ->
2885                 if t.type is TYPE_TEXT and t.text is "\u0000"
2886                         parse_error()
2887                         insert_character new_character_token "\ufffd"
2888                         return
2889                 if is_space_tok t
2890                         insert_character t
2891                         return
2892                 if t.type is TYPE_TEXT
2893                         flag_frameset_ok = false
2894                         insert_character t
2895                         return
2896                 if t.type is TYPE_COMMENT
2897                         insert_comment t
2898                         return
2899                 if t.type is TYPE_DOCTYPE
2900                         parse_error()
2901                         return
2902                 if t.type is TYPE_START_TAG and (t.name is 'b' or t.name is 'big' or t.name is 'blockquote' or t.name is 'body' or t.name is 'br' or t.name is 'center' or t.name is 'code' or t.name is 'dd' or t.name is 'div' or t.name is 'dl' or t.name is 'dt' or t.name is 'em' or t.name is 'embed' or t.name is 'h1' or t.name is 'h2' or t.name is 'h3' or t.name is 'h4' or t.name is 'h5' or t.name is 'h6' or t.name is 'head' or t.name is 'hr' or t.name is 'i' or t.name is 'img' or t.name is 'li' or t.name is 'listing' or t.name is 'main' or t.name is 'meta' or t.name is 'nobr' or t.name is 'ol' or t.name is 'p' or t.name is 'pre' or t.name is 'ruby' or t.name is 's' or t.name is 'small' or t.name is 'span' or t.name is 'strong' or t.name is 'strike' or t.name is 'sub' or t.name is 'sup' or t.name is 'table' or t.name is 'tt' or t.name is 'u' or t.name is 'ul' or t.name is 'var' or (t.name is 'font' and has_color_face_or_size(t)))
2903                         parse_error()
2904                         if flag_fragment_parsing
2905                                 in_foreign_content_other_start t
2906                                 return
2907                         loop # is this safe?
2908                                 open_els.shift()
2909                                 cn = open_els[0]
2910                                 if is_mathml_text_integration_point(cn) or is_html_integration(cn) or cn.namespace is NS_HTML
2911                                         break
2912                         process_token t
2913                         return
2914                 if t.type is TYPE_START_TAG
2915                         in_foreign_content_other_start t
2916                         return
2917                 if t.type is TYPE_END_TAG and t.name is 'script' and open_els[0].name is 'script' and open_els[0].namespace is NS_SVG
2918                         in_foreign_content_end_script()
2919                         return
2920                 if t.type is TYPE_END_TAG
2921                         if open_els[0].name.toLowerCase() isnt t.name
2922                                 parse_error()
2923                         for node in open_els
2924                                 if node is open_els[open_els.length - 1]
2925                                         return
2926                                 if node.name.toLowerCase() is t.name
2927                                         loop
2928                                                 el = open_els.shift()
2929                                                 if el is node
2930                                                         return
2931                                 if node.namespace is NS_HTML
2932                                         break
2933                         ins_mode t # explicitly call HTML insertion mode
2934
2935
2936         # 8.2.4.1 http://www.w3.org/TR/html5/syntax.html#data-state
2937         tok_state_data = ->
2938                 switch c = txt.charAt(cur++)
2939                         when '&'
2940                                 return new_text_node parse_character_reference()
2941                         when '<'
2942                                 tok_state = tok_state_tag_open
2943                         when "\u0000"
2944                                 parse_error()
2945                                 return new_text_node "\ufffd"
2946                         when '' # EOF
2947                                 return new_eof_token()
2948                         else
2949                                 return new_text_node c
2950                 return null
2951
2952         # 8.2.4.2 http://www.w3.org/TR/html5/syntax.html#character-reference-in-data-state
2953         # not needed: tok_state_character_reference_in_data = ->
2954         # just call parse_character_reference()
2955
2956         # 8.2.4.3 http://www.w3.org/TR/html5/syntax.html#rcdata-state
2957         tok_state_rcdata = ->
2958                 switch c = txt.charAt(cur++)
2959                         when '&'
2960                                 return new_text_node parse_character_reference()
2961                         when '<'
2962                                 tok_state = tok_state_rcdata_less_than_sign
2963                         when "\u0000"
2964                                 parse_error()
2965                                 return new_character_token "\ufffd"
2966                         when '' # EOF
2967                                 return new_eof_token()
2968                         else
2969                                 return new_character_token c
2970                 return null
2971
2972         # 8.2.4.4 http://www.w3.org/TR/html5/syntax.html#character-reference-in-rcdata-state
2973         # not needed: tok_state_character_reference_in_rcdata = ->
2974         # just call parse_character_reference()
2975
2976         # 8.2.4.5 http://www.w3.org/TR/html5/syntax.html#rawtext-state
2977         tok_state_rawtext = ->
2978                 switch c = txt.charAt(cur++)
2979                         when '<'
2980                                 tok_state = tok_state_rawtext_less_than_sign
2981                         when "\u0000"
2982                                 parse_error()
2983                                 return new_character_token "\ufffd"
2984                         when '' # EOF
2985                                 return new_eof_token()
2986                         else
2987                                 return new_character_token c
2988                 return null
2989
2990         # 8.2.4.6 http://www.w3.org/TR/html5/syntax.html#script-data-state
2991         tok_state_script_data = ->
2992                 switch c = txt.charAt(cur++)
2993                         when '<'
2994                                 tok_state = tok_state_script_data_less_than_sign
2995                         when "\u0000"
2996                                 parse_error()
2997                                 return new_character_token "\ufffd"
2998                         when '' # EOF
2999                                 return new_eof_token()
3000                         else
3001                                 return new_character_token c
3002                 return null
3003
3004         # 8.2.4.7 http://www.w3.org/TR/html5/syntax.html#plaintext-state
3005         tok_state_plaintext = ->
3006                 switch c = txt.charAt(cur++)
3007                         when "\u0000"
3008                                 parse_error()
3009                                 return new_character_token "\ufffd"
3010                         when '' # EOF
3011                                 return new_eof_token()
3012                         else
3013                                 return new_character_token c
3014                 return null
3015
3016
3017         # 8.2.4.8 http://www.w3.org/TR/html5/syntax.html#tag-open-state
3018         tok_state_tag_open = ->
3019                 switch c = txt.charAt(cur++)
3020                         when '!'
3021                                 tok_state = tok_state_markup_declaration_open
3022                         when '/'
3023                                 tok_state = tok_state_end_tag_open
3024                         when '?'
3025                                 parse_error()
3026                                 tok_cur_tag = new_comment_token '?'
3027                                 tok_state = tok_state_bogus_comment
3028                         else
3029                                 if is_lc_alpha(c)
3030                                         tok_cur_tag = new_open_tag c
3031                                         tok_state = tok_state_tag_name
3032                                 else if is_uc_alpha(c)
3033                                         tok_cur_tag = new_open_tag c.toLowerCase()
3034                                         tok_state = tok_state_tag_name
3035                                 else
3036                                         parse_error()
3037                                         tok_state = tok_state_data
3038                                         cur -= 1 # we didn't parse/handle the char after <
3039                                         return new_text_node '<'
3040                 return null
3041
3042         # 8.2.4.9 http://www.w3.org/TR/html5/syntax.html#end-tag-open-state
3043         tok_state_end_tag_open = ->
3044                 switch c = txt.charAt(cur++)
3045                         when '>'
3046                                 parse_error()
3047                                 tok_state = tok_state_data
3048                         when '' # EOF
3049                                 parse_error()
3050                                 tok_state = tok_state_data
3051                                 return new_text_node '</'
3052                         else
3053                                 if is_uc_alpha(c)
3054                                         tok_cur_tag = new_end_tag c.toLowerCase()
3055                                         tok_state = tok_state_tag_name
3056                                 else if is_lc_alpha(c)
3057                                         tok_cur_tag = new_end_tag c
3058                                         tok_state = tok_state_tag_name
3059                                 else
3060                                         parse_error()
3061                                         tok_cur_tag = new_comment_token '/'
3062                                         tok_state = tok_state_bogus_comment
3063                 return null
3064
3065         # 8.2.4.10 http://www.w3.org/TR/html5/syntax.html#tag-name-state
3066         tok_state_tag_name = ->
3067                 switch c = txt.charAt(cur++)
3068                         when "\t", "\n", "\u000c", ' '
3069                                 tok_state = tok_state_before_attribute_name
3070                         when '/'
3071                                 tok_state = tok_state_self_closing_start_tag
3072                         when '>'
3073                                 tok_state = tok_state_data
3074                                 tmp = tok_cur_tag
3075                                 tok_cur_tag = null
3076                                 return tmp
3077                         when "\u0000"
3078                                 parse_error()
3079                                 tok_cur_tag.name += "\ufffd"
3080                         when '' # EOF
3081                                 parse_error()
3082                                 tok_state = tok_state_data
3083                         else
3084                                 if is_uc_alpha(c)
3085                                         tok_cur_tag.name += c.toLowerCase()
3086                                 else
3087                                         tok_cur_tag.name += c
3088                 return null
3089
3090         # 8.2.4.11 http://www.w3.org/TR/html5/syntax.html#rcdata-less-than-sign-state
3091         tok_state_rcdata_less_than_sign = ->
3092                 c = txt.charAt(cur++)
3093                 if c is '/'
3094                         temporary_buffer = ''
3095                         tok_state = tok_state_rcdata_end_tag_open
3096                         return null
3097                 # Anything else
3098                 tok_state = tok_state_rcdata
3099                 cur -= 1 # reconsume the input character
3100                 return new_character_token '<'
3101
3102         # 8.2.4.12 http://www.w3.org/TR/html5/syntax.html#rcdata-end-tag-open-state
3103         tok_state_rcdata_end_tag_open = ->
3104                 c = txt.charAt(cur++)
3105                 if is_uc_alpha(c)
3106                         tok_cur_tag = new_end_tag c.toLowerCase()
3107                         temporary_buffer += c
3108                         tok_state = tok_state_rcdata_end_tag_name
3109                         return null
3110                 if is_lc_alpha(c)
3111                         tok_cur_tag = new_end_tag c
3112                         temporary_buffer += c
3113                         tok_state = tok_state_rcdata_end_tag_name
3114                         return null
3115                 # Anything else
3116                 tok_state = tok_state_rcdata
3117                 cur -= 1 # reconsume the input character
3118                 return new_character_token "</" # fixfull separate these
3119
3120         # http://www.w3.org/TR/html5/syntax.html#appropriate-end-tag-token
3121         is_appropriate_end_tag = (t) ->
3122                 # spec says to check against "the tag name of the last start tag to
3123                 # have been emitted from this tokenizer", but this is only called from
3124                 # the various "raw" states, so it's hopefully ok to assume that
3125                 # open_els[0].name will work instead TODO: verify this after the script
3126                 # data states are implemented
3127                 debug_log "#{t.type}, #{t.name} open_els: #{serialize_els open_els, true, true}"
3128                 return t.type is TYPE_END_TAG and t.name is open_els[0].name
3129
3130         # 8.2.4.13 http://www.w3.org/TR/html5/syntax.html#rcdata-end-tag-name-state
3131         tok_state_rcdata_end_tag_name = ->
3132                 c = txt.charAt(cur++)
3133                 if c is "\t" or c is "\n" or c is "\u000c" or c is ' '
3134                         if is_appropriate_end_tag tok_cur_tag
3135                                 tok_state = tok_state_before_attribute_name
3136                                 return
3137                         # else fall through to "Anything else"
3138                 if c is '/'
3139                         if is_appropriate_end_tag tok_cur_tag
3140                                 tok_state = tok_state_self_closing_start_tag # FIXME spec typo?
3141                                 return
3142                         # else fall through to "Anything else"
3143                 if c is '>'
3144                         if is_appropriate_end_tag tok_cur_tag
3145                                 tok_state = tok_state_data
3146                                 return tok_cur_tag
3147                         # else fall through to "Anything else"
3148                 if is_uc_alpha(c)
3149                         tok_cur_tag.name += c.toLowerCase()
3150                         temporary_buffer += c
3151                         return null
3152                 if is_lc_alpha(c)
3153                         tok_cur_tag.name += c
3154                         temporary_buffer += c
3155                         return null
3156                 # Anything else
3157                 tok_state = tok_state_rcdata
3158                 cur -= 1 # reconsume the input character
3159                 return new_character_token '</' + temporary_buffer # fixfull separate these
3160
3161         # 8.2.4.14 http://www.w3.org/TR/html5/syntax.html#rawtext-less-than-sign-state
3162         tok_state_rawtext_less_than_sign = ->
3163                 c = txt.charAt(cur++)
3164                 if c is '/'
3165                         temporary_buffer = ''
3166                         tok_state = tok_state_rawtext_end_tag_open
3167                         return null
3168                 # Anything else
3169                 tok_state = tok_state_rawtext
3170                 cur -= 1 # reconsume the input character
3171                 return new_character_token '<'
3172
3173         # 8.2.4.15 http://www.w3.org/TR/html5/syntax.html#rawtext-end-tag-open-state
3174         tok_state_rawtext_end_tag_open = ->
3175                 c = txt.charAt(cur++)
3176                 if is_uc_alpha(c)
3177                         tok_cur_tag = new_end_tag c.toLowerCase()
3178                         temporary_buffer += c
3179                         tok_state = tok_state_rawtext_end_tag_name
3180                         return null
3181                 if is_lc_alpha(c)
3182                         tok_cur_tag = new_end_tag c
3183                         temporary_buffer += c
3184                         tok_state = tok_state_rawtext_end_tag_name
3185                         return null
3186                 # Anything else
3187                 tok_state = tok_state_rawtext
3188                 cur -= 1 # reconsume the input character
3189                 return new_character_token "</" # fixfull separate these
3190
3191         # 8.2.4.16 http://www.w3.org/TR/html5/syntax.html#rawtext-end-tag-name-state
3192         tok_state_rawtext_end_tag_name = ->
3193                 c = txt.charAt(cur++)
3194                 if c is "\t" or c is "\n" or c is "\u000c" or c is ' '
3195                         if is_appropriate_end_tag tok_cur_tag
3196                                 tok_state = tok_state_before_attribute_name
3197                                 return
3198                         # else fall through to "Anything else"
3199                 if c is '/'
3200                         if is_appropriate_end_tag tok_cur_tag
3201                                 tok_state = tok_state_self_closing_start_tag
3202                                 return
3203                         # else fall through to "Anything else"
3204                 if c is '>'
3205                         if is_appropriate_end_tag tok_cur_tag
3206                                 tok_state = tok_state_data
3207                                 return tok_cur_tag
3208                         # else fall through to "Anything else"
3209                 if is_uc_alpha(c)
3210                         tok_cur_tag.name += c.toLowerCase()
3211                         temporary_buffer += c
3212                         return null
3213                 if is_lc_alpha(c)
3214                         tok_cur_tag.name += c
3215                         temporary_buffer += c
3216                         return null
3217                 # Anything else
3218                 tok_state = tok_state_rawtext
3219                 cur -= 1 # reconsume the input character
3220                 return new_character_token '</' + temporary_buffer # fixfull separate these
3221
3222         # 8.2.4.17 http://www.w3.org/TR/html5/syntax.html#script-data-less-than-sign-state
3223         tok_state_script_data_less_than_sign = ->
3224                 c = txt.charAt(cur++)
3225                 if c is '/'
3226                         temporary_buffer = ''
3227                         tok_state = tok_state_script_data_end_tag_open
3228                         return
3229                 if c is '!'
3230                         tok_state = tok_state_script_data_escape_start
3231                         return new_character_token '<!' # fixfull split
3232                 # Anything else
3233                 tok_state = tok_state_script_data
3234                 cur -= 1 # Reconsume
3235                 return new_character_token '<'
3236
3237         # 8.2.4.18 http://www.w3.org/TR/html5/syntax.html#script-data-end-tag-open-state
3238         tok_state_script_data_end_tag_open = ->
3239                 c = txt.charAt(cur++)
3240                 if is_uc_alpha(c)
3241                         tok_cur_tag = new_end_tag c.toLowerCase()
3242                         temporary_buffer += c
3243                         tok_state = tok_state_script_data_end_tag_name
3244                         return
3245                 if is_lc_alpha(c)
3246                         tok_cur_tag = new_end_tag c
3247                         temporary_buffer += c
3248                         tok_state = tok_state_script_data_end_tag_name
3249                         return
3250                 # Anything else
3251                 tok_state = tok_state_script_data
3252                 cur -= 1 # Reconsume
3253                 return new_character_token '</'
3254
3255         # 8.2.4.19 http://www.w3.org/TR/html5/syntax.html#script-data-end-tag-open-state
3256         tok_state_script_data_end_tag_name = ->
3257                 c = txt.charAt(cur++)
3258                 if c is "\t" or c is "\n" or c is "\u000c" or c is ' '
3259                         if is_appropriate_end_tag tok_cur_tag
3260                                 tok_state = tok_state_before_attribute_name
3261                                 return
3262                         # fall through
3263                 if c is '/'
3264                         if is_appropriate_end_tag tok_cur_tag
3265                                 tok_state = tok_state_self_closing_start_tag
3266                                 return
3267                         # fall through
3268                 if c is '>'
3269                         if is_appropriate_end_tag tok_cur_tag
3270                                 tok_state = tok_state_data
3271                                 return tok_cur_tag
3272                         # fall through
3273                 if is_uc_alpha(c)
3274                         tok_cur_tag.name += c.toLowerCase()
3275                         temporary_buffer += c
3276                         return
3277                 if is_lc_alpha(c)
3278                         tok_cur_tag.name += c
3279                         temporary_buffer += c
3280                         return
3281                 # Anything else
3282                 tok_state = tok_state_script_data
3283                 cur -= 1 # Reconsume
3284                 return new_character_token "</#{temporary_buffer}" # fixfull split
3285
3286         # 8.2.4.20 http://www.w3.org/TR/html5/syntax.html#script-data-escape-start-state
3287         tok_state_script_data_escape_start = ->
3288                 c = txt.charAt(cur++)
3289                 if c is '-'
3290                         tok_state = tok_state_script_data_escape_start_dash
3291                         return new_character_token '-'
3292                 # Anything else
3293                 tok_state = tok_state_script_data
3294                 cur -= 1 # Reconsume
3295                 return
3296
3297         # 8.2.4.21 http://www.w3.org/TR/html5/syntax.html#script-data-escape-start-dash-state
3298         tok_state_script_data_escape_start_dash = ->
3299                 c = txt.charAt(cur++)
3300                 if c is '-'
3301                         tok_state = tok_state_script_data_escaped_dash_dash
3302                         return new_character_token '-'
3303                 # Anything else
3304                 tok_state = tok_state_script_data
3305                 cur -= 1 # Reconsume
3306                 return
3307
3308         # 8.2.4.22 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-state
3309         tok_state_script_data_escaped = ->
3310                 c = txt.charAt(cur++)
3311                 if c is '-'
3312                         tok_state = tok_state_script_data_escaped_dash
3313                         return new_character_token '-'
3314                 if c is '<'
3315                         tok_state = tok_state_script_data_escaped_less_than_sign
3316                         return
3317                 if c is "\u0000"
3318                         parse_error()
3319                         return new_character_token "\ufffd"
3320                 if c is '' # EOF
3321                         tok_state = tok_state_data
3322                         parse_error()
3323                         cur -= 1 # Reconsume
3324                         return
3325                 # Anything else
3326                 return new_character_token c
3327
3328         # 8.2.4.23 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-dash-state
3329         tok_state_script_data_escaped_dash = ->
3330                 c = txt.charAt(cur++)
3331                 if c is '-'
3332                         tok_state = tok_state_script_data_escaped_dash_dash
3333                         return new_character_token '-'
3334                 if c is '<'
3335                         tok_state = tok_state_script_data_escaped_less_than_sign
3336                         return
3337                 if c is "\u0000"
3338                         parse_error()
3339                         tok_state = tok_state_script_data_escaped
3340                         return new_character_token "\ufffd"
3341                 if c is '' # EOF
3342                         tok_state = tok_state_data
3343                         parse_error()
3344                         cur -= 1 # Reconsume
3345                         return
3346                 # Anything else
3347                 tok_state = tok_state_script_data_escaped
3348                 return new_character_token c
3349
3350         # 8.2.4.24 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-dash-dash-state
3351         tok_state_script_data_escaped_dash_dash = ->
3352                 c = txt.charAt(cur++)
3353                 if c is '-'
3354                         return new_character_token '-'
3355                 if c is '<'
3356                         tok_state = tok_state_script_data_escaped_less_than_sign
3357                         return
3358                 if c is '>'
3359                         tok_state = tok_state_script_data
3360                         return new_character_token '>'
3361                 if c is "\u0000"
3362                         parse_error()
3363                         tok_state = tok_state_script_data_escaped
3364                         return new_character_token "\ufffd"
3365                 if c is '' # EOF
3366                         parse_error()
3367                         tok_state = tok_state_data
3368                         cur -= 1 # Reconsume
3369                         return
3370                 # Anything else
3371                 tok_state = tok_state_script_data_escaped
3372                 return new_character_token c
3373
3374         # 8.2.4.25 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-less-than-sign-state
3375         tok_state_script_data_escaped_less_than_sign = ->
3376                 c = txt.charAt(cur++)
3377                 if c is '/'
3378                         temporary_buffer = ''
3379                         tok_state = tok_state_script_data_escaped_end_tag_open
3380                         return
3381                 if is_uc_alpha(c)
3382                         temporary_buffer = c.toLowerCase() # yes, really
3383                         tok_state = tok_state_script_data_double_escape_start
3384                         return new_character_token "<#{c}" # fixfull split
3385                 if is_lc_alpha(c)
3386                         temporary_buffer = c
3387                         tok_state = tok_state_script_data_double_escape_start
3388                         return new_character_token "<#{c}" # fixfull split
3389                 # Anything else
3390                 tok_state = tok_state_script_data_escaped
3391                 cur -= 1 # Reconsume
3392                 return new_character_token c
3393
3394         # 8.2.4.26 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-end-tag-open-state
3395         tok_state_script_data_escaped_end_tag_open = ->
3396                 c = txt.charAt(cur++)
3397                 if is_uc_alpha(c)
3398                         tok_cur_tag = new_end_tag c.toLowerCase()
3399                         temporary_buffer += c
3400                         tok_state = tok_state_script_data_escaped_end_tag_name
3401                         return
3402                 if is_lc_alpha(c)
3403                         tok_cur_tag = new_end_tag c
3404                         temporary_buffer += c
3405                         tok_state = tok_state_script_data_escaped_end_tag_name
3406                         return
3407                 # Anything else
3408                 tok_state = tok_state_script_data_escaped
3409                 cur -= 1 # Reconsume
3410                 return new_character_token '</' # fixfull split
3411
3412         # 8.2.4.27 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-end-tag-name-state
3413         tok_state_script_data_escaped_end_tag_name = ->
3414                 c = txt.charAt(cur++)
3415                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
3416                         if is_appropriate_end_tag tok_cur_tag
3417                                 tok_state = tok_state_before_attribute_name
3418                                 return
3419                         # fall through
3420                 if c is '/'
3421                         if is_appropriate_end_tag tok_cur_tag
3422                                 tok_state = tok_state_self_closing_start_tag
3423                                 return
3424                         # fall through
3425                 if c is '>'
3426                         if is_appropriate_end_tag tok_cur_tag
3427                                 tok_state = tok_state_data
3428                                 return tok_cur_tag
3429                         # fall through
3430                 if is_uc_alpha(c)
3431                         tok_cur_tag.name += c.toLowerCase()
3432                         temporary_buffer += c.toLowerCase()
3433                         return
3434                 if is_lc_alpha(c)
3435                         tok_cur_tag.name += c
3436                         temporary_buffer += c.toLowerCase()
3437                         return
3438                 # Anything else
3439                 tok_state = tok_state_script_data_escaped
3440                 cur -= 1 # Reconsume
3441                 return new_character_token "</#{temporary_buffer}" # fixfull split
3442
3443         # 8.2.4.28 http://www.w3.org/TR/html5/syntax.html#script-data-double-escape-start-state
3444         tok_state_script_data_double_escape_start = ->
3445                 c = txt.charAt(cur++)
3446                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' ' or c is '/' or c is '>'
3447                         if temporary_buffer is 'script'
3448                                 tok_state = tok_state_script_data_double_escaped
3449                         else
3450                                 tok_state = tok_state_script_data_escaped
3451                         return new_character_token c
3452                 if is_uc_alpha(c)
3453                         temporary_buffer += c.toLowerCase() # yes, really lowercase
3454                         return new_character_token c
3455                 if is_lc_alpha(c)
3456                         temporary_buffer += c
3457                         return new_character_token c
3458                 # Anything else
3459                 tok_state = tok_state_script_data_escaped
3460                 cur -= 1 # Reconsume
3461                 return
3462
3463         # 8.2.4.29 http://www.w3.org/TR/html5/syntax.html#script-data-double-escaped-state
3464         tok_state_script_data_double_escaped = ->
3465                 c = txt.charAt(cur++)
3466                 if c is '-'
3467                         tok_state = tok_state_script_data_double_escaped_dash
3468                         return new_character_token '-'
3469                 if c is '<'
3470                         tok_state = tok_state_script_data_double_escaped_less_than_sign
3471                         return new_character_token '<'
3472                 if c is "\u0000"
3473                         parse_error()
3474                         return new_character_token "\ufffd"
3475                 if c is '' # EOF
3476                         parse_error()
3477                         tok_state = tok_state_data
3478                         cur -= 1 # Reconsume
3479                         return
3480                 # Anything else
3481                 return new_character_token c
3482
3483         # 8.2.4.30 http://www.w3.org/TR/html5/syntax.html#script-data-double-escaped-dash-state
3484         tok_state_script_data_double_escaped_dash = ->
3485                 c = txt.charAt(cur++)
3486                 if c is '-'
3487                         tok_state = tok_state_script_data_double_escaped_dash_dash
3488                         return new_character_token '-'
3489                 if c is '<'
3490                         tok_state = tok_state_script_data_double_escaped_less_than_sign
3491                         return new_character_token '<'
3492                 if c is "\u0000"
3493                         parse_error()
3494                         tok_state = tok_state_script_data_double_escaped
3495                         return new_character_token "\ufffd"
3496                 if c is '' # EOF
3497                         parse_error()
3498                         tok_state = tok_state_data
3499                         cur -= 1 # Reconsume
3500                         return
3501                 # Anything else
3502                 tok_state = tok_state_script_data_double_escaped
3503                 return new_character_token c
3504
3505         # 8.2.4.31 http://www.w3.org/TR/html5/syntax.html#script-data-double-escaped-dash-dash-state
3506         tok_state_script_data_double_escaped_dash_dash = ->
3507                 c = txt.charAt(cur++)
3508                 if c is '-'
3509                         return new_character_token '-'
3510                 if c is '<'
3511                         tok_state = tok_state_script_data_double_escaped_less_than_sign
3512                         return new_character_token '<'
3513                 if c is '>'
3514                         tok_state = tok_state_script_data
3515                         return new_character_token '>'
3516                 if c is "\u0000"
3517                         parse_error()
3518                         tok_state = tok_state_script_data_double_escaped
3519                         return new_character_token "\ufffd"
3520                 if c is '' # EOF
3521                         parse_error()
3522                         tok_state = tok_state_data
3523                         cur -= 1 # Reconsume
3524                         return
3525                 # Anything else
3526                 tok_state = tok_state_script_data_double_escaped
3527                 return new_character_token c
3528
3529         # 8.2.4.32 http://www.w3.org/TR/html5/syntax.html#script-data-double-escaped-less-than-sign-state
3530         tok_state_script_data_double_escaped_less_than_sign = ->
3531                 c = txt.charAt(cur++)
3532                 if c is '/'
3533                         temporary_buffer = ''
3534                         tok_state = tok_state_script_data_double_escape_end
3535                         return new_character_token '/'
3536                 # Anything else
3537                 tok_state = tok_state_script_data_double_escaped
3538                 cur -= 1 # Reconsume
3539                 return
3540
3541         # 8.2.4.33 http://www.w3.org/TR/html5/syntax.html#script-data-double-escape-end-state
3542         tok_state_script_data_double_escape_end = ->
3543                 c = txt.charAt(cur++)
3544                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' ' or c is '/' or c is '>'
3545                         if temporary_buffer is 'script'
3546                                 tok_state = tok_state_script_data_escaped
3547                         else
3548                                 tok_state = tok_state_script_data_double_escaped
3549                         return new_character_token c
3550                 if is_uc_alpha(c)
3551                         temporary_buffer += c.toLowerCase() # yes, really lowercase
3552                         return new_character_token c
3553                 if is_lc_alpha(c)
3554                         temporary_buffer += c
3555                         return new_character_token c
3556                 # Anything else
3557                 tok_state = tok_state_script_data_double_escaped
3558                 cur -= 1 # Reconsume
3559                 return
3560
3561         # 8.2.4.34 http://www.w3.org/TR/html5/syntax.html#before-attribute-name-state
3562         tok_state_before_attribute_name = ->
3563                 attr_name = null
3564                 switch c = txt.charAt(cur++)
3565                         when "\t", "\n", "\u000c", ' '
3566                                 return null
3567                         when '/'
3568                                 tok_state = tok_state_self_closing_start_tag
3569                                 return null
3570                         when '>'
3571                                 tok_state = tok_state_data
3572                                 tmp = tok_cur_tag
3573                                 tok_cur_tag = null
3574                                 return tmp
3575                         when "\u0000"
3576                                 parse_error()
3577                                 attr_name = "\ufffd"
3578                         when '"', "'", '<', '='
3579                                 parse_error()
3580                                 attr_name = c
3581                         when '' # EOF
3582                                 parse_error()
3583                                 tok_state = tok_state_data
3584                         else
3585                                 if is_uc_alpha(c)
3586                                         attr_name = c.toLowerCase()
3587                                 else
3588                                         attr_name = c
3589                 if attr_name?
3590                         tok_cur_tag.attrs_a.unshift [attr_name, '']
3591                         tok_state = tok_state_attribute_name
3592                 return null
3593
3594         # 8.2.4.35 http://www.w3.org/TR/html5/syntax.html#attribute-name-state
3595         tok_state_attribute_name = ->
3596                 switch c = txt.charAt(cur++)
3597                         when "\t", "\n", "\u000c", ' '
3598                                 tok_state = tok_state_after_attribute_name
3599                         when '/'
3600                                 tok_state = tok_state_self_closing_start_tag
3601                         when '='
3602                                 tok_state = tok_state_before_attribute_value
3603                         when '>'
3604                                 tok_state = tok_state_data
3605                                 tmp = tok_cur_tag
3606                                 tok_cur_tag = null
3607                                 return tmp
3608                         when "\u0000"
3609                                 parse_error()
3610                                 tok_cur_tag.attrs_a[0][0] += "\ufffd"
3611                         when '"', "'", '<'
3612                                 parse_error()
3613                                 tok_cur_tag.attrs_a[0][0] += c
3614                         when '' # EOF
3615                                 parse_error()
3616                                 tok_state = tok_state_data
3617                         else
3618                                 if is_uc_alpha(c)
3619                                         tok_cur_tag.attrs_a[0][0] += c.toLowerCase()
3620                                 else
3621                                         tok_cur_tag.attrs_a[0][0] += c
3622                 return null
3623
3624         # 8.2.4.36 http://www.w3.org/TR/html5/syntax.html#after-attribute-name-state
3625         tok_state_after_attribute_name = ->
3626                 c = txt.charAt(cur++)
3627                 if c is "\t" or c is "\n" or c is "\u000c" or c is ' '
3628                         return
3629                 if c is '/'
3630                         tok_state = tok_state_self_closing_start_tag
3631                         return
3632                 if c is '='
3633                         tok_state = tok_state_before_attribute_value
3634                         return
3635                 if c is '>'
3636                         tok_state = tok_state_data
3637                         return
3638                 if is_uc_alpha(c)
3639                         tok_cur_tag.attrs_a.unshift [c.toLowerCase(), '']
3640                         tok_state = tok_state_attribute_name
3641                         return
3642                 if c is "\u0000"
3643                         parse_error()
3644                         tok_cur_tag.attrs_a.unshift ["\ufffd", '']
3645                         tok_state = tok_state_attribute_name
3646                         return
3647                 if c is '' # EOF
3648                         parse_error()
3649                         tok_state = tok_state_data
3650                         cur -= 1 # reconsume
3651                         return
3652                 if c is '"' or c is "'" or c is '<'
3653                         parse_error()
3654                         # fall through to Anything else
3655                 # Anything else
3656                 tok_cur_tag.attrs_a.unshift [c, '']
3657                 tok_state = tok_state_attribute_name
3658
3659         # 8.2.4.37 http://www.w3.org/TR/html5/syntax.html#before-attribute-value-state
3660         tok_state_before_attribute_value = ->
3661                 switch c = txt.charAt(cur++)
3662                         when "\t", "\n", "\u000c", ' '
3663                                 return null
3664                         when '"'
3665                                 tok_state = tok_state_attribute_value_double_quoted
3666                         when '&'
3667                                 tok_state = tok_state_attribute_value_unquoted
3668                                 cur -= 1
3669                         when "'"
3670                                 tok_state = tok_state_attribute_value_single_quoted
3671                         when "\u0000"
3672                                 # Parse error
3673                                 tok_cur_tag.attrs_a[0][1] += "\ufffd"
3674                                 tok_state = tok_state_attribute_value_unquoted
3675                         when '>'
3676                                 # Parse error
3677                                 tok_state = tok_state_data
3678                                 tmp = tok_cur_tag
3679                                 tok_cur_tag = null
3680                                 return tmp
3681                         when '' # EOF
3682                                 parse_error()
3683                                 tok_state = tok_state_data
3684                         else
3685                                 tok_cur_tag.attrs_a[0][1] += c
3686                                 tok_state = tok_state_attribute_value_unquoted
3687                 return null
3688
3689         # 8.2.4.38 http://www.w3.org/TR/html5/syntax.html#attribute-value-(double-quoted)-state
3690         tok_state_attribute_value_double_quoted = ->
3691                 switch c = txt.charAt(cur++)
3692                         when '"'
3693                                 tok_state = tok_state_after_attribute_value_quoted
3694                         when '&'
3695                                 tok_cur_tag.attrs_a[0][1] += parse_character_reference '"', true
3696                         when "\u0000"
3697                                 # Parse error
3698                                 tok_cur_tag.attrs_a[0][1] += "\ufffd"
3699                         when '' # EOF
3700                                 parse_error()
3701                                 tok_state = tok_state_data
3702                         else
3703                                 tok_cur_tag.attrs_a[0][1] += c
3704                 return null
3705
3706         # 8.2.4.39 http://www.w3.org/TR/html5/syntax.html#attribute-value-(single-quoted)-state
3707         tok_state_attribute_value_single_quoted = ->
3708                 switch c = txt.charAt(cur++)
3709                         when "'"
3710                                 tok_state = tok_state_after_attribute_value_quoted
3711                         when '&'
3712                                 tok_cur_tag.attrs_a[0][1] += parse_character_reference "'", true
3713                         when "\u0000"
3714                                 # Parse error
3715                                 tok_cur_tag.attrs_a[0][1] += "\ufffd"
3716                         when '' # EOF
3717                                 parse_error()
3718                                 tok_state = tok_state_data
3719                         else
3720                                 tok_cur_tag.attrs_a[0][1] += c
3721                 return null
3722
3723         # 8.2.4.40 http://www.w3.org/TR/html5/syntax.html#attribute-value-(unquoted)-state
3724         tok_state_attribute_value_unquoted = ->
3725                 switch c = txt.charAt(cur++)
3726                         when "\t", "\n", "\u000c", ' '
3727                                 tok_state = tok_state_before_attribute_name
3728                         when '&'
3729                                 tok_cur_tag.attrs_a[0][1] += parse_character_reference '>', true
3730                         when '>'
3731                                 tok_state = tok_state_data
3732                                 tmp = tok_cur_tag
3733                                 tok_cur_tag = null
3734                                 return tmp
3735                         when "\u0000"
3736                                 tok_cur_tag.attrs_a[0][1] += "\ufffd"
3737                         when '' # EOF
3738                                 parse_error()
3739                                 tok_state = tok_state_data
3740                         else
3741                                 # Parse Error if ', <, = or ` (backtick)
3742                                 tok_cur_tag.attrs_a[0][1] += c
3743                 return null
3744
3745         # 8.2.4.42 http://www.w3.org/TR/html5/syntax.html#after-attribute-value-(quoted)-state
3746         tok_state_after_attribute_value_quoted = ->
3747                 switch c = txt.charAt(cur++)
3748                         when "\t", "\n", "\u000c", ' '
3749                                 tok_state = tok_state_before_attribute_name
3750                         when '/'
3751                                 tok_state = tok_state_self_closing_start_tag
3752                         when '>'
3753                                 tok_state = tok_state_data
3754                                 tmp = tok_cur_tag
3755                                 tok_cur_tag = null
3756                                 return tmp
3757                         when '' # EOF
3758                                 parse_error()
3759                                 tok_state = tok_state_data
3760                         else
3761                                 # Parse Error
3762                                 tok_state = tok_state_before_attribute_name
3763                                 cur -= 1 # we didn't handle that char
3764                 return null
3765
3766         # 8.2.4.43 http://www.w3.org/TR/html5/syntax.html#self-closing-start-tag-state
3767         tok_state_self_closing_start_tag = ->
3768                 c = txt.charAt(cur++)
3769                 if c is '>'
3770                         tok_cur_tag.flag 'self-closing'
3771                         tok_state = tok_state_data
3772                         return tok_cur_tag
3773                 if c is ''
3774                         parse_error()
3775                         tok_state = tok_state_data
3776                         cur -= 1 # Reconsume
3777                         return
3778                 # Anything else
3779                 parse_error()
3780                 tok_state = tok_state_before_attribute_name
3781                 cur -= 1 # Reconsume
3782                 return
3783
3784         # 8.2.4.44 http://www.w3.org/TR/html5/syntax.html#bogus-comment-state
3785         # WARNING: put a comment token in tok_cur_tag before setting this state
3786         tok_state_bogus_comment = ->
3787                 next_gt = txt.indexOf '>', cur
3788                 if next_gt is -1
3789                         val = txt.substr cur
3790                         cur = txt.length
3791                 else
3792                         val = txt.substr cur, (next_gt - cur)
3793                         cur = next_gt + 1
3794                 val = val.replace(new RegExp("\u0000", 'g'), "\ufffd")
3795                 tok_cur_tag.text += val
3796                 tok_state = tok_state_data
3797                 return tok_cur_tag
3798
3799         # 8.2.4.45 http://www.w3.org/TR/html5/syntax.html#markup-declaration-open-state
3800         tok_state_markup_declaration_open = ->
3801                 if txt.substr(cur, 2) is '--'
3802                         cur += 2
3803                         tok_cur_tag = new_comment_token ''
3804                         tok_state = tok_state_comment_start
3805                         return
3806                 if txt.substr(cur, 7).toLowerCase() is 'doctype'
3807                         cur += 7
3808                         tok_state = tok_state_doctype
3809                         return
3810                 acn = adjusted_current_node()
3811                 if acn and acn.namespace isnt NS_HTML and txt.substr(cur, 7) is '[CDATA['
3812                         cur += 7
3813                         tok_state = tok_state_cdata_section
3814                         return
3815                 # Otherwise
3816                 parse_error()
3817                 tok_cur_tag = new_comment_token ''
3818                 tok_state = tok_state_bogus_comment
3819                 return
3820
3821         # 8.2.4.46 http://www.w3.org/TR/html5/syntax.html#comment-start-state
3822         tok_state_comment_start = ->
3823                 switch c = txt.charAt(cur++)
3824                         when '-'
3825                                 tok_state = tok_state_comment_start_dash
3826                         when "\u0000"
3827                                 parse_error()
3828                                 tok_state = tok_state_comment
3829                                 return new_character_token "\ufffd"
3830                         when '>'
3831                                 parse_error()
3832                                 tok_state = tok_state_data
3833                                 return tok_cur_tag
3834                         when '' # EOF
3835                                 parse_error()
3836                                 tok_state = tok_state_data
3837                                 cur -= 1 # Reconsume
3838                                 return tok_cur_tag
3839                         else
3840                                 tok_cur_tag.text += c
3841                                 tok_state = tok_state_comment
3842                 return null
3843
3844         # 8.2.4.47 http://www.w3.org/TR/html5/syntax.html#comment-start-dash-state
3845         tok_state_comment_start_dash = ->
3846                 switch c = txt.charAt(cur++)
3847                         when '-'
3848                                 tok_state = tok_state_comment_end
3849                         when "\u0000"
3850                                 parse_error()
3851                                 tok_cur_tag.text += "-\ufffd"
3852                                 tok_state = tok_state_comment
3853                         when '>'
3854                                 parse_error()
3855                                 tok_state = tok_state_data
3856                                 return tok_cur_tag
3857                         when '' # EOF
3858                                 parse_error()
3859                                 tok_state = tok_state_data
3860                                 cur -= 1 # Reconsume
3861                                 return tok_cur_tag
3862                         else
3863                                 tok_cur_tag.text += "-#{c}"
3864                                 tok_state = tok_state_comment
3865                 return null
3866
3867         # 8.2.4.48 http://www.w3.org/TR/html5/syntax.html#comment-state
3868         tok_state_comment = ->
3869                 switch c = txt.charAt(cur++)
3870                         when '-'
3871                                 tok_state = tok_state_comment_end_dash
3872                         when "\u0000"
3873                                 parse_error()
3874                                 tok_cur_tag.text += "\ufffd"
3875                         when '' # EOF
3876                                 parse_error()
3877                                 tok_state = tok_state_data
3878                                 cur -= 1 # Reconsume
3879                                 return tok_cur_tag
3880                         else
3881                                 tok_cur_tag.text += c
3882                 return null
3883
3884         # 8.2.4.49 http://www.w3.org/TR/html5/syntax.html#comment-end-dash-state
3885         tok_state_comment_end_dash = ->
3886                 switch c = txt.charAt(cur++)
3887                         when '-'
3888                                 tok_state = tok_state_comment_end
3889                         when "\u0000"
3890                                 parse_error()
3891                                 tok_cur_tag.text += "-\ufffd"
3892                                 tok_state = tok_state_comment
3893                         when '' # EOF
3894                                 parse_error()
3895                                 tok_state = tok_state_data
3896                                 cur -= 1 # Reconsume
3897                                 return tok_cur_tag
3898                         else
3899                                 tok_cur_tag.text += "-#{c}"
3900                                 tok_state = tok_state_comment
3901                 return null
3902
3903         # 8.2.4.50 http://www.w3.org/TR/html5/syntax.html#comment-end-state
3904         tok_state_comment_end = ->
3905                 switch c = txt.charAt(cur++)
3906                         when '>'
3907                                 tok_state = tok_state_data
3908                                 return tok_cur_tag
3909                         when "\u0000"
3910                                 parse_error()
3911                                 tok_cur_tag.text += "--\ufffd"
3912                                 tok_state = tok_state_comment
3913                         when '!'
3914                                 parse_error()
3915                                 tok_state = tok_state_comment_end_bang
3916                         when '-'
3917                                 parse_error()
3918                                 tok_cur_tag.text += '-'
3919                         when '' # EOF
3920                                 parse_error()
3921                                 tok_state = tok_state_data
3922                                 cur -= 1 # Reconsume
3923                                 return tok_cur_tag
3924                         else
3925                                 parse_error()
3926                                 tok_cur_tag.text += "--#{c}"
3927                                 tok_state = tok_state_comment
3928                 return null
3929
3930         # 8.2.4.51 http://www.w3.org/TR/html5/syntax.html#comment-end-bang-state
3931         tok_state_comment_end_bang = ->
3932                 switch c = txt.charAt(cur++)
3933                         when '-'
3934                                 tok_cur_tag.text += "--!#{c}"
3935                                 tok_state = tok_state_comment_end_dash
3936                         when '>'
3937                                 tok_state = tok_state_data
3938                                 return tok_cur_tag
3939                         when "\u0000"
3940                                 parse_error()
3941                                 tok_cur_tag.text += "--!\ufffd"
3942                                 tok_state = tok_state_comment
3943                         when '' # EOF
3944                                 parse_error()
3945                                 tok_state = tok_state_data
3946                                 cur -= 1 # Reconsume
3947                                 return tok_cur_tag
3948                         else
3949                                 tok_cur_tag.text += "--!#{c}"
3950                                 tok_state = tok_state_comment
3951                 return null
3952
3953         # 8.2.4.52 http://www.w3.org/TR/html5/syntax.html#doctype-state
3954         tok_state_doctype = ->
3955                 switch c = txt.charAt(cur++)
3956                         when "\t", "\u000a", "\u000c", ' '
3957                                 tok_state = tok_state_before_doctype_name
3958                         when '' # EOF
3959                                 parse_error()
3960                                 tok_state = tok_state_data
3961                                 el = new_doctype_token ''
3962                                 el.flag 'force-quirks', true
3963                                 cur -= 1 # Reconsume
3964                                 return el
3965                         else
3966                                 parse_error()
3967                                 tok_state = tok_state_before_doctype_name
3968                                 cur -= 1 # Reconsume
3969                 return null
3970
3971         # 8.2.4.52 http://www.w3.org/TR/html5/syntax.html#doctype-state
3972         tok_state_before_doctype_name = ->
3973                 c = txt.charAt(cur++)
3974                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
3975                         return
3976                 if is_uc_alpha(c)
3977                         tok_cur_tag = new_doctype_token c.toLowerCase()
3978                         tok_state = tok_state_doctype_name
3979                         return
3980                 if c is "\u0000"
3981                         parse_error()
3982                         tok_cur_tag = new_doctype_token "\ufffd"
3983                         tok_state = tok_state_doctype_name
3984                         return
3985                 if c is '>'
3986                         parse_error()
3987                         el = new_doctype_token ''
3988                         el.flag 'force-quirks', true
3989                         tok_state = tok_state_data
3990                         return el
3991                 if c is '' # EOF
3992                         parse_error()
3993                         tok_state = tok_state_data
3994                         el = new_doctype_token ''
3995                         el.flag 'force-quirks', true
3996                         cur -= 1 # Reconsume
3997                         return el
3998                 # Anything else
3999                 tok_cur_tag = new_doctype_token c
4000                 tok_state = tok_state_doctype_name
4001                 return null
4002
4003         # 8.2.4.54 http://www.w3.org/TR/html5/syntax.html#doctype-name-state
4004         tok_state_doctype_name = ->
4005                 c = txt.charAt(cur++)
4006                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4007                         tok_state = tok_state_after_doctype_name
4008                         return
4009                 if c is '>'
4010                         tok_state = tok_state_data
4011                         return tok_cur_tag
4012                 if is_uc_alpha(c)
4013                         tok_cur_tag.name += c.toLowerCase()
4014                         return
4015                 if c is "\u0000"
4016                         parse_error()
4017                         tok_cur_tag.name += "\ufffd"
4018                         return
4019                 if c is '' # EOF
4020                         parse_error()
4021                         tok_state = tok_state_data
4022                         tok_cur_tag.flag 'force-quirks', true
4023                         cur -= 1 # Reconsume
4024                         return tok_cur_tag
4025                 # Anything else
4026                 tok_cur_tag.name += c
4027                 return null
4028
4029         # 8.2.4.55 http://www.w3.org/TR/html5/syntax.html#after-doctype-name-state
4030         tok_state_after_doctype_name = ->
4031                 c = txt.charAt(cur++)
4032                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4033                         return
4034                 if c is '>'
4035                         tok_state = tok_state_data
4036                         return tok_cur_tag
4037                 if c is '' # EOF
4038                         parse_error()
4039                         tok_state = tok_state_data
4040                         tok_cur_tag.flag 'force-quirks', true
4041                         cur -= 1 # Reconsume
4042                         return tok_cur_tag
4043                 # Anything else
4044                 if txt.substr(cur - 1, 6).toLowerCase() is 'public'
4045                         cur += 5
4046                         tok_state = tok_state_after_doctype_public_keyword
4047                         return
4048                 if txt.substr(cur - 1, 6).toLowerCase() is 'system'
4049                         cur += 5
4050                         tok_state = tok_state_after_doctype_system_keyword
4051                         return
4052                 parse_error()
4053                 tok_cur_tag.flag 'force-quirks', true
4054                 tok_state = tok_state_bogus_doctype
4055                 return null
4056
4057         # 8.2.4.56 http://www.w3.org/TR/html5/syntax.html#after-doctype-public-keyword-state
4058         tok_state_after_doctype_public_keyword = ->
4059                 c = txt.charAt(cur++)
4060                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4061                         tok_state = tok_state_before_doctype_public_identifier
4062                         return
4063                 if c is '"'
4064                         parse_error()
4065                         tok_cur_tag.public_identifier = ''
4066                         tok_state = tok_state_doctype_public_identifier_double_quoted
4067                         return
4068                 if c is "'"
4069                         parse_error()
4070                         tok_cur_tag.public_identifier = ''
4071                         tok_state = tok_state_doctype_public_identifier_single_quoted
4072                         return
4073                 if c is '>'
4074                         parse_error()
4075                         tok_cur_tag.flag 'force-quirks', true
4076                         tok_state = tok_state_data
4077                         return tok_cur_tag
4078                 if c is '' # EOF
4079                         parse_error()
4080                         tok_state = tok_state_data
4081                         tok_cur_tag.flag 'force-quirks', true
4082                         cur -= 1 # Reconsume
4083                         return tok_cur_tag
4084                 # Anything else
4085                 parse_error()
4086                 tok_cur_tag.flag 'force-quirks', true
4087                 tok_state = tok_state_bogus_doctype
4088                 return null
4089
4090         # 8.2.4.57 http://www.w3.org/TR/html5/syntax.html#before-doctype-public-identifier-state
4091         tok_state_before_doctype_public_identifier = ->
4092                 c = txt.charAt(cur++)
4093                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4094                         return
4095                 if c is '"'
4096                         parse_error()
4097                         tok_cur_tag.public_identifier = ''
4098                         tok_state = tok_state_doctype_public_identifier_double_quoted
4099                         return
4100                 if c is "'"
4101                         parse_error()
4102                         tok_cur_tag.public_identifier = ''
4103                         tok_state = tok_state_doctype_public_identifier_single_quoted
4104                         return
4105                 if c is '>'
4106                         parse_error()
4107                         tok_cur_tag.flag 'force-quirks', true
4108                         tok_state = tok_state_data
4109                         return tok_cur_tag
4110                 if c is '' # EOF
4111                         parse_error()
4112                         tok_state = tok_state_data
4113                         tok_cur_tag.flag 'force-quirks', true
4114                         cur -= 1 # Reconsume
4115                         return tok_cur_tag
4116                 # Anything else
4117                 parse_error()
4118                 tok_cur_tag.flag 'force-quirks', true
4119                 tok_state = tok_state_bogus_doctype
4120                 return null
4121
4122
4123         # 8.2.4.58 http://www.w3.org/TR/html5/syntax.html#doctype-public-identifier-(double-quoted)-state
4124         tok_state_doctype_public_identifier_double_quoted = ->
4125                 c = txt.charAt(cur++)
4126                 if c is '"'
4127                         tok_state = tok_state_after_doctype_public_identifier
4128                         return
4129                 if c is "\u0000"
4130                         parse_error()
4131                         tok_cur_tag.public_identifier += "\ufffd"
4132                         return
4133                 if c is '>'
4134                         parse_error()
4135                         tok_cur_tag.flag 'force-quirks', true
4136                         tok_state = tok_state_data
4137                         return tok_cur_tag
4138                 if c is '' # EOF
4139                         parse_error()
4140                         tok_state = tok_state_data
4141                         tok_cur_tag.flag 'force-quirks', true
4142                         cur -= 1 # Reconsume
4143                         return tok_cur_tag
4144                 # Anything else
4145                 tok_cur_tag.public_identifier += c
4146                 return null
4147
4148         # 8.2.4.59 http://www.w3.org/TR/html5/syntax.html#doctype-public-identifier-(single-quoted)-state
4149         tok_state_doctype_public_identifier_single_quoted = ->
4150                 c = txt.charAt(cur++)
4151                 if c is "'"
4152                         tok_state = tok_state_after_doctype_public_identifier
4153                         return
4154                 if c is "\u0000"
4155                         parse_error()
4156                         tok_cur_tag.public_identifier += "\ufffd"
4157                         return
4158                 if c is '>'
4159                         parse_error()
4160                         tok_cur_tag.flag 'force-quirks', true
4161                         tok_state = tok_state_data
4162                         return tok_cur_tag
4163                 if c is '' # EOF
4164                         parse_error()
4165                         tok_state = tok_state_data
4166                         tok_cur_tag.flag 'force-quirks', true
4167                         cur -= 1 # Reconsume
4168                         return tok_cur_tag
4169                 # Anything else
4170                 tok_cur_tag.public_identifier += c
4171                 return null
4172
4173         # 8.2.4.60 http://www.w3.org/TR/html5/syntax.html#after-doctype-public-identifier-state
4174         tok_state_after_doctype_public_identifier = ->
4175                 c = txt.charAt(cur++)
4176                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4177                         tok_state = tok_state_between_doctype_public_and_system_identifiers
4178                         return
4179                 if c is '>'
4180                         tok_state = tok_state_data
4181                         return tok_cur_tag
4182                 if c is '"'
4183                         parse_error()
4184                         tok_cur_tag.system_identifier = ''
4185                         tok_state = tok_state_doctype_system_identifier_double_quoted
4186                         return
4187                 if c is "'"
4188                         parse_error()
4189                         tok_cur_tag.system_identifier = ''
4190                         tok_state = tok_state_doctype_system_identifier_single_quoted
4191                         return
4192                 if c is '' # EOF
4193                         parse_error()
4194                         tok_state = tok_state_data
4195                         tok_cur_tag.flag 'force-quirks', true
4196                         cur -= 1 # Reconsume
4197                         return tok_cur_tag
4198                 # Anything else
4199                 parse_error()
4200                 tok_cur_tag.flag 'force-quirks', true
4201                 tok_state = tok_state_bogus_doctype
4202                 return null
4203
4204         # 8.2.4.61 http://www.w3.org/TR/html5/syntax.html#between-doctype-public-and-system-identifiers-state
4205         tok_state_between_doctype_public_and_system_identifiers = ->
4206                 c = txt.charAt(cur++)
4207                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4208                         return
4209                 if c is '>'
4210                         tok_state = tok_state_data
4211                         return tok_cur_tag
4212                 if c is '"'
4213                         parse_error()
4214                         tok_cur_tag.system_identifier = ''
4215                         tok_state = tok_state_doctype_system_identifier_double_quoted
4216                         return
4217                 if c is "'"
4218                         parse_error()
4219                         tok_cur_tag.system_identifier = ''
4220                         tok_state = tok_state_doctype_system_identifier_single_quoted
4221                         return
4222                 if c is '' # EOF
4223                         parse_error()
4224                         tok_state = tok_state_data
4225                         tok_cur_tag.flag 'force-quirks', true
4226                         cur -= 1 # Reconsume
4227                         return tok_cur_tag
4228                 # Anything else
4229                 parse_error()
4230                 tok_cur_tag.flag 'force-quirks', true
4231                 tok_state = tok_state_bogus_doctype
4232                 return null
4233
4234         # 8.2.4.62 http://www.w3.org/TR/html5/syntax.html#after-doctype-system-keyword-state
4235         tok_state_after_doctype_system_keyword = ->
4236                 c = txt.charAt(cur++)
4237                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4238                         tok_state = tok_state_before_doctype_system_identifier
4239                         return
4240                 if c is '"'
4241                         parse_error()
4242                         tok_cur_tag.system_identifier = ''
4243                         tok_state = tok_state_doctype_system_identifier_double_quoted
4244                         return
4245                 if c is "'"
4246                         parse_error()
4247                         tok_cur_tag.system_identifier = ''
4248                         tok_state = tok_state_doctype_system_identifier_single_quoted
4249                         return
4250                 if c is '>'
4251                         parse_error()
4252                         tok_cur_tag.flag 'force-quirks', true
4253                         tok_state = tok_state_data
4254                         return tok_cur_tag
4255                 if c is '' # EOF
4256                         parse_error()
4257                         tok_state = tok_state_data
4258                         tok_cur_tag.flag 'force-quirks', true
4259                         cur -= 1 # Reconsume
4260                         return tok_cur_tag
4261                 # Anything else
4262                 parse_error()
4263                 tok_cur_tag.flag 'force-quirks', true
4264                 tok_state = tok_state_bogus_doctype
4265                 return null
4266
4267         # 8.2.4.63 http://www.w3.org/TR/html5/syntax.html#before-doctype-system-identifier-state
4268         tok_state_before_doctype_system_identifier = ->
4269                 c = txt.charAt(cur++)
4270                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4271                         return
4272                 if c is '"'
4273                         tok_cur_tag.system_identifier = ''
4274                         tok_state = tok_state_doctype_system_identifier_double_quoted
4275                         return
4276                 if c is "'"
4277                         tok_cur_tag.system_identifier = ''
4278                         tok_state = tok_state_doctype_system_identifier_single_quoted
4279                         return
4280                 if c is '>'
4281                         parse_error()
4282                         tok_cur_tag.flag 'force-quirks', true
4283                         tok_state = tok_state_data
4284                         return tok_cur_tag
4285                 if c is '' # EOF
4286                         parse_error()
4287                         tok_state = tok_state_data
4288                         tok_cur_tag.flag 'force-quirks', true
4289                         cur -= 1 # Reconsume
4290                         return tok_cur_tag
4291                 # Anything else
4292                 parse_error()
4293                 tok_cur_tag.flag 'force-quirks', true
4294                 tok_state = tok_state_bogus_doctype
4295                 return null
4296
4297         # 8.2.4.64 http://www.w3.org/TR/html5/syntax.html#doctype-system-identifier-(double-quoted)-state
4298         tok_state_doctype_system_identifier_double_quoted = ->
4299                 c = txt.charAt(cur++)
4300                 if c is '"'
4301                         tok_state = tok_state_after_doctype_system_identifier
4302                         return
4303                 if c is "\u0000"
4304                         parse_error()
4305                         tok_cur_tag.system_identifier += "\ufffd"
4306                         return
4307                 if c is '>'
4308                         parse_error()
4309                         tok_cur_tag.flag 'force-quirks', true
4310                         tok_state = tok_state_data
4311                         return tok_cur_tag
4312                 if c is '' # EOF
4313                         parse_error()
4314                         tok_state = tok_state_data
4315                         tok_cur_tag.flag 'force-quirks', true
4316                         cur -= 1 # Reconsume
4317                         return tok_cur_tag
4318                 # Anything else
4319                 tok_cur_tag.system_identifier += c
4320                 return null
4321
4322         # 8.2.4.65 http://www.w3.org/TR/html5/syntax.html#doctype-system-identifier-(single-quoted)-state
4323         tok_state_doctype_system_identifier_single_quoted = ->
4324                 c = txt.charAt(cur++)
4325                 if c is "'"
4326                         tok_state = tok_state_after_doctype_system_identifier
4327                         return
4328                 if c is "\u0000"
4329                         parse_error()
4330                         tok_cur_tag.system_identifier += "\ufffd"
4331                         return
4332                 if c is '>'
4333                         parse_error()
4334                         tok_cur_tag.flag 'force-quirks', true
4335                         tok_state = tok_state_data
4336                         return tok_cur_tag
4337                 if c is '' # EOF
4338                         parse_error()
4339                         tok_state = tok_state_data
4340                         tok_cur_tag.flag 'force-quirks', true
4341                         cur -= 1 # Reconsume
4342                         return tok_cur_tag
4343                 # Anything else
4344                 tok_cur_tag.system_identifier += c
4345                 return null
4346
4347         # 8.2.4.66 http://www.w3.org/TR/html5/syntax.html#after-doctype-system-identifier-state
4348         tok_state_after_doctype_system_identifier = ->
4349                 c = txt.charAt(cur++)
4350                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4351                         return
4352                 if c is '>'
4353                         tok_state = tok_state_data
4354                         return tok_cur_tag
4355                 if c is '' # EOF
4356                         parse_error()
4357                         tok_state = tok_state_data
4358                         tok_cur_tag.flag 'force-quirks', true
4359                         cur -= 1 # Reconsume
4360                         return tok_cur_tag
4361                 # Anything else
4362                 parse_error()
4363                 # do _not_ tok_cur_tag.flag 'force-quirks', true
4364                 tok_state = tok_state_bogus_doctype
4365                 return null
4366
4367         # 8.2.4.67 http://www.w3.org/TR/html5/syntax.html#bogus-doctype-state
4368         tok_state_bogus_doctype = ->
4369                 c = txt.charAt(cur++)
4370                 if c is '>'
4371                         tok_state = tok_state_data
4372                         return tok_cur_tag
4373                 if c is '' # EOF
4374                         tok_state = tok_state_data
4375                         cur -= 1 # Reconsume
4376                         return tok_cur_tag
4377                 # Anything else
4378                 return null
4379
4380         # 8.2.4.68 http://www.w3.org/TR/html5/syntax.html#cdata-section-state
4381         tok_state_cdata_section = ->
4382                 tok_state = tok_state_data
4383                 next_gt = txt.indexOf ']]>', cur
4384                 if next_gt is -1
4385                         val = txt.substr cur
4386                         cur = txt.length
4387                 else
4388                         val = txt.substr cur, (next_gt - cur)
4389                         cur = next_gt + 3
4390                 return new_character_token val # fixfull split
4391
4392         # 8.2.4.69 http://www.w3.org/TR/html5/syntax.html#consume-a-character-reference
4393         # Don't set this as a state, just call it
4394         # returns a string (NOT a text node)
4395         parse_character_reference = (allowed_char = null, in_attr = false) ->
4396                 if cur >= txt.length
4397                         return '&'
4398                 switch c = txt.charAt(cur)
4399                         when "\t", "\n", "\u000c", ' ', '<', '&', '', allowed_char
4400                                 # explicitly not a parse error
4401                                 return '&'
4402                         when ';'
4403                                 # there has to be "one or more" alnums between & and ; to be a parse error
4404                                 return '&'
4405                         when '#'
4406                                 if cur + 1 >= txt.length
4407                                         return '&'
4408                                 if txt.charAt(cur + 1).toLowerCase() is 'x'
4409                                         base = 16
4410                                         charset = hex_chars
4411                                         start = cur + 2
4412                                 else
4413                                         charset = digits
4414                                         start = cur + 1
4415                                         base = 10
4416                                 i = 0
4417                                 while start + i < txt.length and charset.indexOf(txt.charAt(start + i)) > -1
4418                                         i += 1
4419                                 if i is 0
4420                                         return '&'
4421                                 cur = start + i
4422                                 if txt.charAt(start + i) is ';'
4423                                         cur += 1
4424                                 else
4425                                         parse_error()
4426                                 code_point = txt.substr(start, i)
4427                                 while code_point.charAt(0) is '0' and code_point.length > 1
4428                                         code_point = code_point.substr 1
4429                                 code_point = parseInt(code_point, base)
4430                                 if unicode_fixes[code_point]?
4431                                         parse_error()
4432                                         return unicode_fixes[code_point]
4433                                 else
4434                                         if (code_point >= 0xd800 and code_point <= 0xdfff) or code_point > 0x10ffff
4435                                                 parse_error()
4436                                                 return "\ufffd"
4437                                         else
4438                                                 if (code_point >= 0x0001 and code_point <= 0x0008) or (code_point >= 0x000D and code_point <= 0x001F) or (code_point >= 0x007F and code_point <= 0x009F) or (code_point >= 0xFDD0 and code_point <= 0xFDEF) or code_point is 0x000B or code_point is 0xFFFE or code_point is 0xFFFF or code_point is 0x1FFFE or code_point is 0x1FFFF or code_point is 0x2FFFE or code_point is 0x2FFFF or code_point is 0x3FFFE or code_point is 0x3FFFF or code_point is 0x4FFFE or code_point is 0x4FFFF or code_point is 0x5FFFE or code_point is 0x5FFFF or code_point is 0x6FFFE or code_point is 0x6FFFF or code_point is 0x7FFFE or code_point is 0x7FFFF or code_point is 0x8FFFE or code_point is 0x8FFFF or code_point is 0x9FFFE or code_point is 0x9FFFF or code_point is 0xAFFFE or code_point is 0xAFFFF or code_point is 0xBFFFE or code_point is 0xBFFFF or code_point is 0xCFFFE or code_point is 0xCFFFF or code_point is 0xDFFFE or code_point is 0xDFFFF or code_point is 0xEFFFE or code_point is 0xEFFFF or code_point is 0xFFFFE or code_point is 0xFFFFF or code_point is 0x10FFFE or code_point is 0x10FFFF
4439                                                         parse_error()
4440                                                 return from_code_point code_point
4441                                 return
4442                         else
4443                                 for i in [0...31]
4444                                         if alnum.indexOf(txt.charAt(cur + i)) is -1
4445                                                 break
4446                                 if i is 0
4447                                         # exit early, because parse_error() below needs at least one alnum
4448                                         return '&'
4449                                 if txt.charAt(cur + i) is ';'
4450                                         i += 1 # include ';' terminator in value
4451                                         decoded = decode_named_char_ref txt.substr(cur, i)
4452                                         if decoded?
4453                                                 cur += i
4454                                                 return decoded
4455                                         parse_error()
4456                                         return '&'
4457                                 else
4458                                         # no ';' terminator (only legacy char refs)
4459                                         max = i
4460                                         for i in [2..max] # no prefix matches, so ok to check shortest first
4461                                                 c = legacy_char_refs[txt.substr(cur, i)]
4462                                                 if c?
4463                                                         if in_attr
4464                                                                 if txt.charAt(cur + i) is '='
4465                                                                         # "because some legacy user agents will
4466                                                                         # misinterpret the markup in those cases"
4467                                                                         parse_error()
4468                                                                         return '&'
4469                                                                 if alnum.indexOf(txt.charAt(cur + i)) > -1
4470                                                                         # this makes attributes forgiving about url args
4471                                                                         return '&'
4472                                                         # ok, and besides the weird exceptions for attributes...
4473                                                         # return the matching char
4474                                                         cur += i # consume entity chars
4475                                                         parse_error() # because no terminating ";"
4476                                                         return c
4477                                         parse_error()
4478                                         return '&'
4479                 return # never reached
4480
4481         # tree constructor initialization
4482         # see comments on TYPE_TAG/etc for the structure of this data
4483         txt = args.html
4484         cur = 0
4485         doc = new Node TYPE_TAG, name: 'html', namespace: NS_HTML
4486         open_els = []
4487         afe = [] # active formatting elements
4488         template_ins_modes = []
4489         ins_mode = ins_mode_initial
4490         original_ins_mode = ins_mode # TODO check spec
4491         flag_scripting = args.scripting ? true # TODO might need an extra flag to get <noscript> to parse correctly
4492         flag_frameset_ok = true
4493         flag_parsing = true
4494         flag_foster_parenting = false
4495         form_element_pointer = null
4496         temporary_buffer = null
4497         pending_table_character_tokens = []
4498         head_element_pointer = null
4499         flag_fragment_parsing = false # parser originally created as part of the html fragment parsing algorithm (fragment case)
4500         context_element = null # FIXME initialize from args.fragment http://www.w3.org/TR/html5/syntax.html#parsing-html-fragments
4501
4502         # tokenizer initialization
4503         tok_state = tok_state_data
4504
4505         # text pre-processing
4506         # FIXME http://www.w3.org/TR/html5/syntax.html#preprocessing-the-input-stream
4507         txt = txt.replace(new RegExp("\u0000", 'g'), "\ufffd") # fixfull spec doesn't say this
4508         txt = txt.replace(new RegExp("\r\n", 'g'), "\n") # fixfull spec doesn't say this
4509         txt = txt.replace(new RegExp("\r", 'g'), "\n") # fixfull spec doesn't say this
4510
4511         if args.name is "plain-text-unsafe.dat #4"
4512                 console.log "hi"
4513         # proccess input
4514         # http://www.w3.org/TR/html5/syntax.html#tree-construction
4515         while flag_parsing
4516                 t = tok_state()
4517                 if t?
4518                         process_token t
4519                         # fixfull parse error if has self-closing flag, but it wasn't acknolwedged
4520         return doc.children
4521
4522 serialize_els = (els, shallow, show_ids) ->
4523         serialized = ''
4524         sep = ''
4525         for t in els
4526                 serialized += sep
4527                 sep = ','
4528                 serialized += t.serialize shallow, show_ids
4529         return serialized
4530
4531 module.exports.parse_html = parse_html
4532 module.exports.debug_log_reset = debug_log_reset
4533 module.exports.debug_log_each = debug_log_each
4534 module.exports.TYPE_TAG = TYPE_TAG
4535 module.exports.TYPE_TEXT = TYPE_TEXT
4536 module.exports.TYPE_COMMENT = TYPE_COMMENT
4537 module.exports.TYPE_DOCTYPE = TYPE_DOCTYPE
4538 module.exports.NS_HTML = NS_HTML
4539 module.exports.NS_MATHML = NS_MATHML
4540 module.exports.NS_SVG = NS_SVG