JasonWoof Got questions, comments, patches, etc.? Contact Jason Woofenden
fix garbage after </
[peach-html5-editor.git] / parse-html.coffee
1 # HTML parser meant to run in a browser, in support of WYSIWYG editor
2 # Copyright 2015 Jason Woofenden
3 #
4 # This program is free software: you can redistribute it and/or modify it under
5 # the terms of the GNU Affero General Public License as published by the Free
6 # Software Foundation, either version 3 of the License, or (at your option) any
7 # later version.
8 #
9 # This program is distributed in the hope that it will be useful, but WITHOUT
10 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11 # FOR A PARTICULAR PURPOSE.  See the GNU Affero General Public License for more
12 # details.
13 #
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
16
17
18 # This file implements a parser for html snippets, meant to be used by a
19 # WYSIWYG editor.
20
21 # The implementation is a pretty direct implementation of the parsing algorithm
22 # described here:
23 # http://www.w3.org/TR/html5/syntax.html#preprocessing-the-input-stream
24 #
25 # Deviations from that spec:
26 #
27 #   Purposeful: search this file for "WTAG"
28 #
29 #   Not finished yet: search this file for "fixfull", "TODO" and "FIXME"
30
31
32 # stacks/lists
33 #
34 # the spec uses a many different words do indicate which ends of lists/stacks
35 # they are talking about (and relative movement within the lists/stacks). This
36 # section splains. I'm implementing "lists" (afe and open_els) the same way
37 # (both as stacks)
38 #
39 # stacks grow downward (current element is index=0)
40 #
41 # example: open_els = [a, b, c, d, e, f, g]
42 #
43 # "grows downwards" means it's visualized like this: (index: el, names)
44 #
45 #   6: g "start of the list", "topmost", "first"
46 #   5: f
47 #   4: e "previous" (to d), "above", "before"
48 #   3: d   (previous/next are relative to this element)
49 #   2: c "next", "after", "lower", "below"
50 #   1: b
51 #   0: a "end of the list", "current node", "bottommost", "last"
52
53
54 # browser
55 # note: to get this to run outside a browser, you'll have to write a native
56 # implementation of decode_named_char_ref()
57 unless module?.exports?
58         window.wheic = {}
59         module = exports: window.wheic
60
61 from_code_point = (x) ->
62         if String.fromCodePoint?
63                 return String.fromCodePoint x
64         else
65                 if x <= 0xffff
66                         return String.fromCharCode x
67                 x -= 0x10000
68                 return String.fromCharCode((x >> 10) + 0xd800, (x % 0x400) + 0xdc00)
69
70 # Each node is an obect of the Node class. Here are the Node types:
71 TYPE_TAG = 0 # name, {attributes}, [children]
72 TYPE_TEXT = 1 # "text"
73 TYPE_COMMENT = 2
74 TYPE_DOCTYPE = 3
75 # the following types are emited by the tokenizer, but shouldn't end up in the tree:
76 TYPE_START_TAG = 4 # name, [attributes ([key,value]...) in reverse order], [children]
77 TYPE_END_TAG = 5 # name
78 TYPE_EOF = 6
79 TYPE_AFE_MARKER = 7 # http://www.w3.org/TR/html5/syntax.html#reconstruct-the-active-formatting-elements
80 TYPE_AAA_BOOKMARK = 8 # http://www.w3.org/TR/html5/syntax.html#adoption-agency-algorithm
81
82 # namespace constants
83 NS_HTML = 1
84 NS_MATHML = 2
85 NS_SVG = 3
86
87 g_debug_log = []
88 debug_log_reset = ->
89         g_debug_log = []
90 debug_log = (str) ->
91         g_debug_log.push str
92 debug_log_each = (cb) ->
93         for str in g_debug_log
94                 cb str
95
96 prev_node_id = 0
97 class Node
98         constructor: (type, args = {}) ->
99                 @type = type # one of the TYPE_* constants above
100                 @name = args.name ? '' # tag name
101                 @text = args.text ? '' # contents for text/comment nodes
102                 @attrs = args.attrs ? {}
103                 @attrs_a = args.attr_k ? [] # attrs in progress, TYPE_START_TAG only
104                 @children = args.children ? []
105                 @namespace = args.namespace ? NS_HTML
106                 @parent = args.parent ? null
107                 @token = args.token ? null
108                 @flags = args.flags ? {}
109                 if args.id?
110                         @id = "#{args.id}+"
111                 else
112                         @id = "#{++prev_node_id}"
113         acknowledge_self_closing: ->
114                 if @token?
115                         @token.flag 'did_self_close', true
116                 else
117                         @flag 'did_self_close', true
118         flag: (key, value = null) ->
119                 if value?
120                         @flags[key] = value
121                 else
122                         return @flags[key]
123         serialize: (shallow = false, show_ids = false) -> # for unit tests
124                 ret = ''
125                 switch @type
126                         when TYPE_TAG
127                                 ret += 'tag:'
128                                 ret += JSON.stringify @name
129                                 ret += ','
130                                 if show_ids
131                                         ret += "##{@id},"
132                                 if shallow
133                                         break
134                                 attr_keys = []
135                                 for k of @attrs
136                                         attr_keys.push k
137                                 attr_keys.sort()
138                                 ret += '{'
139                                 sep = ''
140                                 for k in attr_keys
141                                         ret += sep
142                                         sep = ','
143                                         ret += "#{JSON.stringify k}:#{JSON.stringify @attrs[k]}"
144                                 ret += '},['
145                                 sep = ''
146                                 for c in @children
147                                         ret += sep
148                                         sep = ','
149                                         ret += c.serialize shallow, show_ids
150                                 ret += ']'
151                         when TYPE_TEXT
152                                 ret += 'text:'
153                                 ret += JSON.stringify @text
154                         when TYPE_COMMENT
155                                 ret += 'comment:'
156                                 ret += JSON.stringify @text
157                         when TYPE_DOCTYPE
158                                 ret += "doctype:#{@name},#{JSON.stringify(@public_identifier ? '')},#{JSON.stringify(@system_identifier ? '')}"
159                         when TYPE_AFE_MARKER
160                                 ret += 'marker'
161                         when TYPE_AAA_BOOKMARK
162                                 ret += 'aaa_bookmark'
163                         else
164                                 ret += 'unknown:'
165                                 console.log "unknown: #{JSON.stringify @}" # backtrace is just as well
166                 return ret
167
168 # helpers: (only take args that are normally known when parser creates nodes)
169 new_open_tag = (name) ->
170         return new Node TYPE_START_TAG, name: name
171 new_end_tag = (name) ->
172         return new Node TYPE_END_TAG, name: name
173 new_element = (name) ->
174         return new Node TYPE_TAG, name: name
175 new_text_node = (txt) ->
176         return new Node TYPE_TEXT, text: txt
177 new_character_token = new_text_node
178 new_comment_token = (txt) ->
179         return new Node TYPE_COMMENT, text: txt
180 new_doctype_token = (name) ->
181         return new Node TYPE_DOCTYPE, name: name
182 new_eof_token = ->
183         return new Node TYPE_EOF
184 new_afe_marker = ->
185         return new Node TYPE_AFE_MARKER
186 new_aaa_bookmark = ->
187         return new Node TYPE_AAA_BOOKMARK
188
189 lc_alpha = "abcdefghijklmnopqrstuvwxyz"
190 uc_alpha = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
191 digits = "0123456789"
192 alnum = lc_alpha + uc_alpha + digits
193 hex_chars = digits + "abcdefABCDEF"
194
195 is_uc_alpha = (str) ->
196         return str.length is 1 and uc_alpha.indexOf(str) > -1
197 is_lc_alpha = (str) ->
198         return str.length is 1 and lc_alpha.indexOf(str) > -1
199
200 # some SVG elements have dashes in them
201 tag_name_chars = alnum + "-"
202
203 # http://www.w3.org/TR/html5/infrastructure.html#space-character
204 space_chars = "\u0009\u000a\u000c\u000d\u0020"
205 is_space = (txt) ->
206         return txt.length is 1 and space_chars.indexOf(txt) > -1
207 is_space_tok = (t) ->
208         return t.type is TYPE_TEXT && t.text.length is 1 and space_chars.indexOf(t.text) > -1
209
210 is_input_hidden_tok = (t) ->
211         return false unless t.type is TYPE_START_TAG
212         for a in t.attrs_a
213                 if a[0] is 'type'
214                         if a[1].toLowerCase() is 'hidden'
215                                 return true
216                         return false
217         return false
218
219 # https://en.wikipedia.org/wiki/Whitespace_character#Unicode
220 whitespace_chars = "\u0009\u000a\u000b\u000c\u000d\u0020\u0085\u00a0\u1680\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200a\u2028\u2029\u202f\u205f\u3000"
221
222 unicode_fixes = {}
223 unicode_fixes[0x00] = "\uFFFD"
224 unicode_fixes[0x80] = "\u20AC"
225 unicode_fixes[0x82] = "\u201A"
226 unicode_fixes[0x83] = "\u0192"
227 unicode_fixes[0x84] = "\u201E"
228 unicode_fixes[0x85] = "\u2026"
229 unicode_fixes[0x86] = "\u2020"
230 unicode_fixes[0x87] = "\u2021"
231 unicode_fixes[0x88] = "\u02C6"
232 unicode_fixes[0x89] = "\u2030"
233 unicode_fixes[0x8A] = "\u0160"
234 unicode_fixes[0x8B] = "\u2039"
235 unicode_fixes[0x8C] = "\u0152"
236 unicode_fixes[0x8E] = "\u017D"
237 unicode_fixes[0x91] = "\u2018"
238 unicode_fixes[0x92] = "\u2019"
239 unicode_fixes[0x93] = "\u201C"
240 unicode_fixes[0x94] = "\u201D"
241 unicode_fixes[0x95] = "\u2022"
242 unicode_fixes[0x96] = "\u2013"
243 unicode_fixes[0x97] = "\u2014"
244 unicode_fixes[0x98] = "\u02DC"
245 unicode_fixes[0x99] = "\u2122"
246 unicode_fixes[0x9A] = "\u0161"
247 unicode_fixes[0x9B] = "\u203A"
248 unicode_fixes[0x9C] = "\u0153"
249 unicode_fixes[0x9E] = "\u017E"
250 unicode_fixes[0x9F] = "\u0178"
251
252 # These are the character references that don't need a terminating semicolon
253 # min length: 2, max: 6, none are a prefix of any other.
254 legacy_char_refs = {
255         Aacute: 'Á', aacute: 'á', Acirc: 'Â', acirc: 'â', acute: '´', AElig: 'Æ',
256         aelig: 'æ', Agrave: 'À', agrave: 'à', AMP: '&', amp: '&', Aring: 'Å',
257         aring: 'å', Atilde: 'Ã', atilde: 'ã', Auml: 'Ä', auml: 'ä', brvbar: '¦',
258         Ccedil: 'Ç', ccedil: 'ç', cedil: '¸', cent: '¢', COPY: '©', copy: '©',
259         curren: '¤', deg: '°', divide: '÷', Eacute: 'É', eacute: 'é', Ecirc: 'Ê',
260         ecirc: 'ê', Egrave: 'È', egrave: 'è', ETH: 'Ð', eth: 'ð', Euml: 'Ë',
261         euml: 'ë', frac12: '½', frac14: '¼', frac34: '¾', GT: '>', gt: '>',
262         Iacute: 'Í', iacute: 'í', Icirc: 'Î', icirc: 'î', iexcl: '¡', Igrave: 'Ì',
263         igrave: 'ì', iquest: '¿', Iuml: 'Ï', iuml: 'ï', laquo: '«', LT: '<',
264         lt: '<', macr: '¯', micro: 'µ', middot: '·', nbsp: "\u00a0", not: '¬',
265         Ntilde: 'Ñ', ntilde: 'ñ', Oacute: 'Ó', oacute: 'ó', Ocirc: 'Ô', ocirc: 'ô',
266         Ograve: 'Ò', ograve: 'ò', ordf: 'ª', ordm: 'º', Oslash: 'Ø', oslash: 'ø',
267         Otilde: 'Õ', otilde: 'õ', Ouml: 'Ö', ouml: 'ö', para: '¶', plusmn: '±',
268         pound: '£', QUOT: '"', quot: '"', raquo: '»', REG: '®', reg: '®', sect: '§',
269         shy: '­', sup1: '¹', sup2: '²', sup3: '³', szlig: 'ß', THORN: 'Þ', thorn: 'þ',
270         times: '×', Uacute: 'Ú', uacute: 'ú', Ucirc: 'Û', ucirc: 'û', Ugrave: 'Ù',
271         ugrave: 'ù', uml: '¨', Uuml: 'Ü', uuml: 'ü', Yacute: 'Ý', yacute: 'ý',
272         yen: '¥', yuml: 'ÿ'
273 }
274
275 void_elements = ['area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input', 'keygen', 'link', 'meta', 'param', 'source', 'track', 'wbr']
276 raw_text_elements = ['script', 'style']
277 escapable_raw_text_elements = ['textarea', 'title']
278 # http://www.w3.org/TR/SVG/ 1.1 (Second Edition)
279 svg_elements = [
280         'a', 'altGlyph', 'altGlyphDef', 'altGlyphItem', 'animate', 'animateColor',
281         'animateMotion', 'animateTransform', 'circle', 'clipPath', 'color-profile',
282         'cursor', 'defs', 'desc', 'ellipse', 'feBlend', 'feColorMatrix',
283         'feComponentTransfer', 'feComposite', 'feConvolveMatrix',
284         'feDiffuseLighting', 'feDisplacementMap', 'feDistantLight', 'feFlood',
285         'feFuncA', 'feFuncB', 'feFuncG', 'feFuncR', 'feGaussianBlur', 'feImage',
286         'feMerge', 'feMergeNode', 'feMorphology', 'feOffset', 'fePointLight',
287         'feSpecularLighting', 'feSpotLight', 'feTile', 'feTurbulence', 'filter',
288         'font', 'font-face', 'font-face-format', 'font-face-name', 'font-face-src',
289         'font-face-uri', 'foreignObject', 'g', 'glyph', 'glyphRef', 'hkern',
290         'image', 'line', 'linearGradient', 'marker', 'mask', 'metadata',
291         'missing-glyph', 'mpath', 'path', 'pattern', 'polygon', 'polyline',
292         'radialGradient', 'rect', 'script', 'set', 'stop', 'style', 'svg',
293         'switch', 'symbol', 'text', 'textPath', 'title', 'tref', 'tspan', 'use',
294         'view', 'vkern'
295 ]
296
297 # http://www.w3.org/TR/MathML/ Version 3.0 2nd Edition
298 mathml_elements = [
299         'abs', 'and', 'annotation', 'annotation-xml', 'apply', 'approx', 'arccos',
300         'arccosh', 'arccot', 'arccoth', 'arccsc', 'arccsch', 'arcsec', 'arcsech',
301         'arcsin', 'arcsinh', 'arctan', 'arctanh', 'arg', 'bind', 'bvar', 'card',
302         'cartesianproduct', 'cbytes', 'ceiling', 'cerror', 'ci', 'cn', 'codomain',
303         'complexes', 'compose', 'condition', 'conjugate', 'cos', 'cosh', 'cot',
304         'coth', 'cs', 'csc', 'csch', 'csymbol', 'curl', 'declare', 'degree',
305         'determinant', 'diff', 'divergence', 'divide', 'domain',
306         'domainofapplication', 'emptyset', 'eq', 'equivalent', 'eulergamma',
307         'exists', 'exp', 'exponentiale', 'factorial', 'factorof', 'false', 'floor',
308         'fn', 'forall', 'gcd', 'geq', 'grad', 'gt', 'ident', 'image', 'imaginary',
309         'imaginaryi', 'implies', 'in', 'infinity', 'int', 'integers', 'intersect',
310         'interval', 'inverse', 'lambda', 'laplacian', 'lcm', 'leq', 'limit',
311         'list', 'ln', 'log', 'logbase', 'lowlimit', 'lt', 'maction', 'maligngroup',
312         'malignmark', 'math', 'matrix', 'matrixrow', 'max', 'mean', 'median',
313         'menclose', 'merror', 'mfenced', 'mfrac', 'mglyph', 'mi', 'mi', 'min',
314         'minus', 'mlabeledtr', 'mlongdiv', 'mmultiscripts', 'mn', 'mo', 'mode',
315         'moment', 'momentabout', 'mover', 'mpadded', 'mphantom', 'mprescripts',
316         'mroot', 'mrow', 'ms', 'mscarries', 'mscarry', 'msgroup', 'msline',
317         'mspace', 'msqrt', 'msrow', 'mstack', 'mstyle', 'msub', 'msubsup', 'msup',
318         'mtable', 'mtd', 'mtext', 'mtr', 'munder', 'munderover', 'naturalnumbers',
319         'neq', 'none', 'not', 'notanumber', 'notin', 'notprsubset', 'notsubset',
320         'or', 'otherwise', 'outerproduct', 'partialdiff', 'pi', 'piece',
321         'piecewise', 'plus', 'power', 'primes', 'product', 'prsubset', 'quotient',
322         'rationals', 'real', 'reals', 'reln', 'rem', 'root', 'scalarproduct',
323         'sdev', 'sec', 'sech', 'selector', 'semantics', 'sep', 'set', 'setdiff',
324         'share', 'sin', 'sinh', 'span', 'subset', 'sum', 'tan', 'tanh', 'tendsto',
325         'times', 'transpose', 'true', 'union', 'uplimit', 'variance', 'vector',
326         'vectorproduct', 'xor'
327 ]
328 # foreign_elements = [svg_elements..., mathml_elements...]
329 #normal_elements = All other allowed HTML elements are normal elements.
330
331 special_elements = {
332         # HTML:
333         address:NS_HTML, applet:NS_HTML, area:NS_HTML, article:NS_HTML,
334         aside:NS_HTML, base:NS_HTML, basefont:NS_HTML, bgsound:NS_HTML,
335         blockquote:NS_HTML, body:NS_HTML, br:NS_HTML, button:NS_HTML,
336         caption:NS_HTML, center:NS_HTML, col:NS_HTML, colgroup:NS_HTML, dd:NS_HTML,
337         details:NS_HTML, dir:NS_HTML, div:NS_HTML, dl:NS_HTML, dt:NS_HTML,
338         embed:NS_HTML, fieldset:NS_HTML, figcaption:NS_HTML, figure:NS_HTML,
339         footer:NS_HTML, form:NS_HTML, frame:NS_HTML, frameset:NS_HTML, h1:NS_HTML,
340         h2:NS_HTML, h3:NS_HTML, h4:NS_HTML, h5:NS_HTML, h6:NS_HTML, head:NS_HTML,
341         header:NS_HTML, hgroup:NS_HTML, hr:NS_HTML, html:NS_HTML, iframe:NS_HTML,
342         img:NS_HTML, input:NS_HTML, isindex:NS_HTML, li:NS_HTML, link:NS_HTML,
343         listing:NS_HTML, main:NS_HTML, marquee:NS_HTML,
344
345         menu:NS_HTML,menuitem:NS_HTML, # WATWG adds these
346
347         meta:NS_HTML, nav:NS_HTML, noembed:NS_HTML, noframes:NS_HTML,
348         noscript:NS_HTML, object:NS_HTML, ol:NS_HTML, p:NS_HTML, param:NS_HTML,
349         plaintext:NS_HTML, pre:NS_HTML, script:NS_HTML, section:NS_HTML,
350         select:NS_HTML, source:NS_HTML, style:NS_HTML, summary:NS_HTML,
351         table:NS_HTML, tbody:NS_HTML, td:NS_HTML, template:NS_HTML,
352         textarea:NS_HTML, tfoot:NS_HTML, th:NS_HTML, thead:NS_HTML, title:NS_HTML,
353         tr:NS_HTML, track:NS_HTML, ul:NS_HTML, wbr:NS_HTML, xmp:NS_HTML,
354
355         # MathML:
356         mi:NS_MATHML, mo:NS_MATHML, mn:NS_MATHML, ms:NS_MATHML, mtext:NS_MATHML,
357         'annotation-xml':NS_MATHML,
358
359         # SVG:
360         foreignObject:NS_SVG, desc:NS_SVG, title:NS_SVG
361 }
362
363 formatting_elements = {
364          a: true, b: true, big: true, code: true, em: true, font: true, i: true,
365          nobr: true, s: true, small: true, strike: true, strong: true, tt: true,
366          u: true
367 }
368
369 mathml_text_integration = {
370         mi: NS_MATHML, mo: NS_MATHML, mn: NS_MATHML, ms: NS_MATHML, mtext: NS_MATHML
371 }
372 is_mathml_text_integration_point = (el) ->
373         return mathml_text_integration[el.name] is el.namespace
374 is_html_integration = (el) -> # DON'T PASS A TOKEN
375         if el.namespace is NS_MATHML
376                 if el.name is 'annotation-xml'
377                         if el.attrs.encoding?
378                                 if el.attrs.encoding.toLowerCase() is 'text/html'
379                                         return true
380                                 if el.attrs.encoding.toLowerCase() is 'application/xhtml+xml'
381                                         return true
382                 return false
383         if el.namespace is NS_SVG
384                 if el.name is 'foreignObject' or el.name is 'desc' or el.name is 'title'
385                         return true
386         return false
387
388 h_tags = {
389         h1:NS_HTML, h2:NS_HTML, h3:NS_HTML, h4:NS_HTML, h5:NS_HTML, h6:NS_HTML
390 }
391
392 foster_parenting_targets = {
393         table: NS_HTML
394         tbody: NS_HTML
395         tfoot: NS_HTML
396         thead: NS_HTML
397         tr: NS_HTML
398 }
399
400 end_tag_implied = {
401         dd: NS_HTML
402         dt: NS_HTML
403         li: NS_HTML
404         option: NS_HTML
405         optgroup: NS_HTML
406         p: NS_HTML
407         rb: NS_HTML
408         rp: NS_HTML
409         rt: NS_HTML
410         rtc: NS_HTML
411 }
412
413 el_is_special = (e) ->
414         return special_elements[e.name] is e.namespace
415
416 adp_els = { address: NS_HTML, div: NS_HTML, p: NS_HTML }
417 el_is_special_not_adp = (el) ->
418         return special_elements[el.name] is el.namespace and adp_els[el.name] isnt el.namespace
419
420 svg_name_fixes = {
421         altglyph: 'altGlyph'
422         altglyphdef: 'altGlyphDef'
423         altglyphitem: 'altGlyphItem'
424         animatecolor: 'animateColor'
425         animatemotion: 'animateMotion'
426         animatetransform: 'animateTransform'
427         clippath: 'clipPath'
428         feblend: 'feBlend'
429         fecolormatrix: 'feColorMatrix'
430         fecomponenttransfer: 'feComponentTransfer'
431         fecomposite: 'feComposite'
432         feconvolvematrix: 'feConvolveMatrix'
433         fediffuselighting: 'feDiffuseLighting'
434         fedisplacementmap: 'feDisplacementMap'
435         fedistantlight: 'feDistantLight'
436         fedropshadow: 'feDropShadow'
437         feflood: 'feFlood'
438         fefunca: 'feFuncA'
439         fefuncb: 'feFuncB'
440         fefuncg: 'feFuncG'
441         fefuncr: 'feFuncR'
442         fegaussianblur: 'feGaussianBlur'
443         feimage: 'feImage'
444         femerge: 'feMerge'
445         femergenode: 'feMergeNode'
446         femorphology: 'feMorphology'
447         feoffset: 'feOffset'
448         fepointlight: 'fePointLight'
449         fespecularlighting: 'feSpecularLighting'
450         fespotlight: 'feSpotLight'
451         fetile: 'feTile'
452         feturbulence: 'feTurbulence'
453         foreignobject: 'foreignObject'
454         glyphref: 'glyphRef'
455         lineargradient: 'linearGradient'
456         radialgradient: 'radialGradient'
457         textpath: 'textPath'
458 }
459 svg_attribute_fixes = {
460         attributename: 'attributeName'
461         attributetype: 'attributeType'
462         basefrequency: 'baseFrequency'
463         baseprofile: 'baseProfile'
464         calcmode: 'calcMode'
465         clippathunits: 'clipPathUnits'
466         contentscripttype: 'contentScriptType'
467         contentstyletype: 'contentStyleType'
468         diffuseconstant: 'diffuseConstant'
469         edgemode: 'edgeMode'
470         externalresourcesrequired: 'externalResourcesRequired'
471         # WTAG removes this: filterres: 'filterRes'
472         filterunits: 'filterUnits'
473         glyphref: 'glyphRef'
474         gradienttransform: 'gradientTransform'
475         gradientunits: 'gradientUnits'
476         kernelmatrix: 'kernelMatrix'
477         kernelunitlength: 'kernelUnitLength'
478         keypoints: 'keyPoints'
479         keysplines: 'keySplines'
480         keytimes: 'keyTimes'
481         lengthadjust: 'lengthAdjust'
482         limitingconeangle: 'limitingConeAngle'
483         markerheight: 'markerHeight'
484         markerunits: 'markerUnits'
485         markerwidth: 'markerWidth'
486         maskcontentunits: 'maskContentUnits'
487         maskunits: 'maskUnits'
488         numoctaves: 'numOctaves'
489         pathlength: 'pathLength'
490         patterncontentunits: 'patternContentUnits'
491         patterntransform: 'patternTransform'
492         patternunits: 'patternUnits'
493         pointsatx: 'pointsAtX'
494         pointsaty: 'pointsAtY'
495         pointsatz: 'pointsAtZ'
496         preservealpha: 'preserveAlpha'
497         preserveaspectratio: 'preserveAspectRatio'
498         primitiveunits: 'primitiveUnits'
499         refx: 'refX'
500         refy: 'refY'
501         repeatcount: 'repeatCount'
502         repeatdur: 'repeatDur'
503         requiredextensions: 'requiredExtensions'
504         requiredfeatures: 'requiredFeatures'
505         specularconstant: 'specularConstant'
506         specularexponent: 'specularExponent'
507         spreadmethod: 'spreadMethod'
508         startoffset: 'startOffset'
509         stddeviation: 'stdDeviation'
510         stitchtiles: 'stitchTiles'
511         surfacescale: 'surfaceScale'
512         systemlanguage: 'systemLanguage'
513         tablevalues: 'tableValues'
514         targetx: 'targetX'
515         targety: 'targetY'
516         textlength: 'textLength'
517         viewbox: 'viewBox'
518         viewtarget: 'viewTarget'
519         xchannelselector: 'xChannelSelector'
520         ychannelselector: 'yChannelSelector'
521         zoomandpan: 'zoomAndPan'
522 }
523 foreign_attr_fixes = {
524         'xlink:actuate': 'xlink actuate'
525         'xlink:arcrole': 'xlink arcrole'
526         'xlink:href': 'xlink href'
527         'xlink:role': 'xlink role'
528         'xlink:show': 'xlink show'
529         'xlink:title': 'xlink title'
530         'xlink:type': 'xlink type'
531         'xml:base': 'xml base'
532         'xml:lang': 'xml lang'
533         'xml:space': 'xml space'
534         'xmlns': 'xmlns'
535         'xmlns:xlink': 'xmlns xlink'
536 }
537 adjust_mathml_attributes = (t) ->
538         for a in t.attrs_a
539                 if a[0] is 'definitionurl'
540                         a[0] = 'definitionURL'
541         return
542 adjust_svg_attributes = (t) ->
543         for a in t.attrs_a
544                 if svg_attribute_fixes[a[0]]?
545                         a[0] = svg_attribute_fixes[a[0]]
546         return
547 adjust_foreign_attributes = (t) ->
548         # fixfull
549         for a in t.attrs_a
550                 if foreign_attr_fixes[a[0]]?
551                         a[0] = foreign_attr_fixes[a[0]]
552         return
553
554 # decode_named_char_ref()
555 #
556 # The list of named character references is _huge_ so ask the browser to decode
557 # for us instead of wasting bandwidth/space on including the table here.
558 #
559 # Pass without the "&" but with the ";" examples:
560 #    for "&amp" pass "amp;"
561 #    for "&#x2032" pass "x2032;"
562 g_dncr = {
563         cache: {}
564         textarea: document.createElement('textarea')
565 }
566 # TODO test this in IE8
567 decode_named_char_ref = (txt) ->
568         txt = "&#{txt}"
569         decoded = g_dncr.cache[txt]
570         return decoded if decoded?
571         g_dncr.textarea.innerHTML = txt
572         decoded = g_dncr.textarea.value
573         return null if decoded is txt
574         return g_dncr.cache[txt] = decoded
575
576 parse_html = (args) ->
577         txt = null
578         cur = null # index of next char in txt to be parsed
579         # declare doc and tokenizer variables so they're in scope below
580         doc = null
581         open_els = null # stack of open elements
582         afe = null # active formatting elements
583         template_ins_modes = null
584         ins_mode = null
585         original_ins_mode = null
586         tok_state = null
587         tok_cur_tag = null # partially parsed tag
588         flag_scripting = null
589         flag_frameset_ok = null
590         flag_parsing = null
591         flag_foster_parenting = null
592         form_element_pointer = null
593         temporary_buffer = null
594         pending_table_character_tokens = null
595         head_element_pointer = null
596         flag_fragment_parsing = null
597         context_element = null
598
599         stop_parsing = ->
600                 flag_parsing = false
601
602         parse_error = ->
603                 if args.error_cb?
604                         args.error_cb cur
605                 else
606                         console.log "Parse error at character #{cur} of #{txt.length}"
607
608         afe_push = (new_el) ->
609                 matches = 0
610                 for el, i in afe
611                         if el.name is new_el.name and el.namespace is new_el.namespace
612                                 for k, v of el.attrs
613                                         continue unless new_el.attrs[k] is v
614                                 for k, v of new_el.attrs
615                                         continue unless el.attrs[k] is v
616                                 matches += 1
617                                 if matches is 3
618                                         afe.splice i, 1
619                                         break
620                 afe.unshift new_el
621         afe_push_marker = ->
622                 afe.unshift new_afe_marker()
623
624         # the functions below impliment the Tree Contstruction algorithm
625         # http://www.w3.org/TR/html5/syntax.html#tree-construction
626
627         # But first... the helpers
628         template_tag_is_open = ->
629                 for t in open_els
630                         if t.name is 'template' and t.namespace is NS_HTML
631                                 return true
632                 return false
633         is_in_scope_x = (tag_name, scope, namespace) ->
634                 for t in open_els
635                         if t.name is tag_name and (namespace is null or namespace is t.namespace)
636                                 return true
637                         if scope[t.name] is t.namespace
638                                 return false
639                 return false
640         is_in_scope_x_y = (tag_name, scope, scope2, namespace) ->
641                 for t in open_els
642                         if t.name is tag_name and (namespace is null or namespace is t.namespace)
643                                 return true
644                         if scope[t.name] is t.namespace
645                                 return false
646                         if scope2[t.name] is t.namespace
647                                 return false
648                 return false
649         standard_scopers = {
650                 applet: NS_HTML, caption: NS_HTML, html: NS_HTML, table: NS_HTML,
651                 td: NS_HTML, th: NS_HTML, marquee: NS_HTML, object: NS_HTML,
652                 template: NS_HTML,
653
654                 mi: NS_MATHML, mo: NS_MATHML, mn: NS_MATHML, ms: NS_MATHML,
655                 mtext: NS_MATHML, 'annotation-xml': NS_MATHML,
656
657                 foreignObject: NS_SVG, desc: NS_SVG, title: NS_SVG
658         }
659         button_scopers = button: NS_HTML
660         li_scopers = ol: NS_HTML, ul: NS_HTML
661         table_scopers = html: NS_HTML, table: NS_HTML, template: NS_HTML
662         is_in_scope = (tag_name, namespace = null) ->
663                 return is_in_scope_x tag_name, standard_scopers, namespace
664         is_in_button_scope = (tag_name, namespace = null) ->
665                 return is_in_scope_x_y tag_name, standard_scopers, button_scopers, namespace
666         is_in_table_scope = (tag_name, namespace = null) ->
667                 return is_in_scope_x tag_name, table_scopers, namespace
668         # aka is_in_list_item_scope
669         is_in_li_scope = (tag_name, namespace = null) ->
670                 return is_in_scope_x_y tag_name, standard_scopers, li_scopers, namespace
671         is_in_select_scope = (tag_name, namespace = null) ->
672                 for t in open_els
673                         if t.name is tag_name and (namespace is null or namespace is t.namespace)
674                                 return true
675                         if t.namespace isnt NS_HTML and t.name isnt 'optgroup' and t.name isnt 'option'
676                                 return false
677                 return false
678         # this checks for a particular element, not by name
679         # this requires a namespace match
680         el_is_in_scope = (needle) ->
681                 for el in open_els
682                         if el is needle
683                                 return true
684                         if standard_scopers[el.name] is el.namespace
685                                 return false
686                 return false
687
688         clear_to_table_stopers = {
689                 'table': true
690                 'template': true
691                 'html': true
692         }
693         clear_stack_to_table_context = ->
694                 loop
695                         if clear_to_table_stopers[open_els[0].name]?
696                                 break
697                         open_els.shift()
698                 return
699         clear_to_table_body_stopers = {
700                 tbody: NS_HTML
701                 tfoot: NS_HTML
702                 thead: NS_HTML
703                 template: NS_HTML
704                 html: NS_HTML
705         }
706         clear_stack_to_table_body_context = ->
707                 loop
708                         if clear_to_table_body_stopers[open_els[0].name] is open_els[0].namespace
709                                 break
710                         open_els.shift()
711                 return
712         clear_to_table_row_stopers = {
713                 'tr': true
714                 'template': true
715                 'html': true
716         }
717         clear_stack_to_table_row_context = ->
718                 loop
719                         if clear_to_table_row_stopers[open_els[0].name]?
720                                 break
721                         open_els.shift()
722                 return
723         clear_afe_to_marker = ->
724                 loop
725                         return unless afe.length > 0 # this happens in fragment case, ?spec error
726                         el = afe.shift()
727                         if el.type is TYPE_AFE_MARKER
728                                 return
729                 return
730
731         # 8.2.3.1 ...
732         # http://www.w3.org/TR/html5/syntax.html#reset-the-insertion-mode-appropriately
733         reset_ins_mode = ->
734                 # 1. Let last be false.
735                 last = false
736                 # 2. Let node be the last node in the stack of open elements.
737                 node_i = 0
738                 node = open_els[node_i]
739                 # 3. Loop: If node is the first node in the stack of open elements,
740                 # then set last to true, and, if the parser was originally created as
741                 # part of the HTML fragment parsing algorithm (fragment case) set node
742                 # to the context element.
743                 loop
744                         if node_i is open_els.length - 1
745                                 last = true
746                                 # fixfull (fragment case)
747
748                         # 4. If node is a select element, run these substeps:
749                         if node.name is 'select' and node.namespace is NS_HTML
750                                 # 1. If last is true, jump to the step below labeled done.
751                                 unless last
752                                         # 2. Let ancestor be node.
753                                         ancestor_i = node_i
754                                         ancestor = node
755                                         # 3. Loop: If ancestor is the first node in the stack of
756                                         # open elements, jump to the step below labeled done.
757                                         loop
758                                                 if ancestor_i is open_els.length - 1
759                                                         break
760                                                 # 4. Let ancestor be the node before ancestor in the stack
761                                                 # of open elements.
762                                                 ancestor_i += 1
763                                                 ancestor = open_els[ancestor_i]
764                                                 # 5. If ancestor is a template node, jump to the step below
765                                                 # labeled done.
766                                                 if ancestor.name is 'template' and ancestor.namespace is NS_HTML
767                                                         break
768                                                 # 6. If ancestor is a table node, switch the insertion mode
769                                                 # to "in select in table" and abort these steps.
770                                                 if ancestor.name is 'table' and ancestor.namespace is NS_HTML
771                                                         ins_mode = ins_mode_in_select_in_table
772                                                         return
773                                                 # 7. Jump back to the step labeled loop.
774                                 # 8. Done: Switch the insertion mode to "in select" and abort
775                                 # these steps.
776                                 ins_mode = ins_mode_in_select
777                                 return
778                         # 5. If node is a td or th element and last is false, then switch
779                         # the insertion mode to "in cell" and abort these steps.
780                         if (node.name is 'td' or node.name is 'th') and node.namespace is NS_HTML and last is false
781                                 ins_mode = ins_mode_in_cell
782                                 return
783                         # 6. If node is a tr element, then switch the insertion mode to "in
784                         # row" and abort these steps.
785                         if node.name is 'tr' and node.namespace is NS_HTML
786                                 ins_mode = ins_mode_in_row
787                                 return
788                         # 7. If node is a tbody, thead, or tfoot element, then switch the
789                         # insertion mode to "in table body" and abort these steps.
790                         if (node.name is 'tbody' or node.name is 'thead' or node.name is 'tfoot') and node.namespace is NS_HTML
791                                 ins_mode = ins_mode_in_table_body
792                                 return
793                         # 8. If node is a caption element, then switch the insertion mode
794                         # to "in caption" and abort these steps.
795                         if node.name is 'caption' and node.namespace is NS_HTML
796                                 ins_mode = ins_mode_in_caption
797                                 return
798                         # 9. If node is a colgroup element, then switch the insertion mode
799                         # to "in column group" and abort these steps.
800                         if node.name is 'colgroup' and node.namespace is NS_HTML
801                                 ins_mode = ins_mode_in_column_group
802                                 return
803                         # 10. If node is a table element, then switch the insertion mode to
804                         # "in table" and abort these steps.
805                         if node.name is 'table' and node.namespace is NS_HTML
806                                 ins_mode = ins_mode_in_table
807                                 return
808                         # 11. If node is a template element, then switch the insertion mode
809                         # to the current template insertion mode and abort these steps.
810                         if node.name is 'template' and node.namespace is NS_HTML
811                                 ins_mode = template_ins_modes[0]
812                                 return
813                         # 12. If node is a head element and last is true, then switch the
814                         # insertion mode to "in body" ("in body"! not "in head"!) and abort
815                         # these steps. (fragment case)
816                         if node.name is 'head' and node.namespace is NS_HTML and last
817                                 ins_mode = ins_mode_in_body
818                                 return
819                         # 13. If node is a head element and last is false, then switch the
820                         # insertion mode to "in head" and abort these steps.
821                         if node.name is 'head' and node.namespace is NS_HTML and last is false
822                                 ins_mode = ins_mode_in_head
823                                 return
824                         # 14. If node is a body element, then switch the insertion mode to
825                         # "in body" and abort these steps.
826                         if node.name is 'body' and node.namespace is NS_HTML
827                                 ins_mode = ins_mode_in_body
828                                 return
829                         # 15. If node is a frameset element, then switch the insertion mode
830                         # to "in frameset" and abort these steps. (fragment case)
831                         if node.name is 'frameset' and node.namespace is NS_HTML
832                                 ins_mode = ins_mode_in_frameset
833                                 return
834                         # 16. If node is an html element, run these substeps:
835                         if node.name is 'html' and node.namespace is NS_HTML
836                                 # 1. If the head element pointer is null, switch the insertion
837                                 # mode to "before head" and abort these steps. (fragment case)
838                                 if head_element_pointer is null
839                                         ins_mode = ins_mode_before_head
840                                 else
841                                         # 2. Otherwise, the head element pointer is not null,
842                                         # switch the insertion mode to "after head" and abort these
843                                         # steps.
844                                         ins_mode = ins_mode_after_head
845                                 return
846                         # 17. If last is true, then switch the insertion mode to "in body"
847                         # and abort these steps. (fragment case)
848                         if last
849                                 ins_mode = ins_mode_in_body
850                                 return
851                         # 18. Let node now be the node before node in the stack of open
852                         # elements.
853                         node_i += 1
854                         node = open_els[node_i]
855                         # 19. Return to the step labeled loop.
856
857         # 8.2.3.2
858
859         # http://www.w3.org/TR/html5/syntax.html#adjusted-current-node
860         adjusted_current_node = ->
861                 if open_els.length is 1 and flag_fragment_parsing
862                         return context_element
863                 return open_els[0]
864
865         # http://www.w3.org/TR/html5/syntax.html#reconstruct-the-active-formatting-elements
866         # this implementation is structured (mostly) as described at the link above.
867         # capitalized comments are the "labels" described at the link above.
868         reconstruct_afe = ->
869                 return if afe.length is 0
870                 if afe[0].type is TYPE_AFE_MARKER or afe[0] in open_els
871                         return
872                 # Rewind
873                 i = 0
874                 loop
875                         if i is afe.length - 1
876                                 break
877                         i += 1
878                         if afe[i].type is TYPE_AFE_MARKER or afe[i] in open_els
879                                 i -= 1 # Advance
880                                 break
881                 # Create
882                 loop
883                         el = insert_html_element afe[i].token
884                         afe[i] = el
885                         break if i is 0
886                         i -= 1 # Advance
887
888         # http://www.w3.org/TR/html5/syntax.html#adoption-agency-algorithm
889         # adoption agency algorithm
890         # overview here:
891         #   http://www.w3.org/TR/html5/syntax.html#misnested-tags:-b-i-/b-/i
892         #   http://www.w3.org/TR/html5/syntax.html#misnested-tags:-b-p-/b-/p
893         #   http://www.w3.org/TR/html5/syntax.html#unclosed-formatting-elements
894         adoption_agency = (subject) ->
895                 debug_log "adoption_agency()"
896                 debug_log "tree: #{serialize_els doc.children, false, true}"
897                 debug_log "open_els: #{serialize_els open_els, true, true}"
898                 debug_log "afe: #{serialize_els afe, true, true}"
899                 # FIXME CONTINUE do WATWG thing here
900                 if open_els[0].name is subject and open_els[0].namespace is NS_HTML
901                         el = open_els.shift()
902                         # remove it from the list of active formatting elements (if found)
903                         for t, i in afe
904                                 if t is el
905                                         afe.splice i, 1
906                                         break
907                         debug_log "aaa: starting off with subject on top of stack, exiting"
908                         return
909                 outer = 0
910                 loop
911                         if outer >= 8
912                                 return
913                         outer += 1
914                         # 5. Let formatting element be the last element in the list of
915                         # active formatting elements that: is between the end of the list
916                         # and the last scope marker in the list, if any, or the start of
917                         # the list otherwise, and  has the tag name subject.
918                         fe = null
919                         for t, fe_of_afe in afe
920                                 if t.type is TYPE_AFE_MARKER
921                                         break
922                                 if t.name is subject
923                                         fe = t
924                                         break
925                         # If there is no such element, then abort these steps and instead
926                         # act as described in the "any other end tag" entry above.
927                         if fe is null
928                                 debug_log "aaa: fe not found in afe"
929                                 in_body_any_other_end_tag subject
930                                 return
931                         # 6. If formatting element is not in the stack of open elements,
932                         # then this is a parse error; remove the element from the list, and
933                         # abort these steps.
934                         in_open_els = false
935                         for t, fe_of_open_els in open_els
936                                 if t is fe
937                                         in_open_els = true
938                                         break
939                         unless in_open_els
940                                 debug_log "aaa: fe not found in open_els"
941                                 parse_error()
942                                 # "remove it from the list" must mean afe, since it's not in open_els
943                                 afe.splice fe_of_afe, 1
944                                 return
945                         # 7. If formatting element is in the stack of open elements, but
946                         # the element is not in scope, then this is a parse error; abort
947                         # these steps.
948                         unless el_is_in_scope fe
949                                 debug_log "aaa: fe not in scope"
950                                 parse_error()
951                                 return
952                         # 8. If formatting element is not the current node, this is a parse
953                         # error. (But do not abort these steps.)
954                         unless open_els[0] is fe
955                                 parse_error()
956                                 # continue
957                         # 9. Let furthest block be the topmost node in the stack of open
958                         # elements that is lower in the stack than formatting element, and
959                         # is an element in the special category. There might not be one.
960                         fb = null
961                         fb_of_open_els = null
962                         for t, i in open_els
963                                 if t is fe
964                                         break
965                                 if el_is_special t
966                                         fb = t
967                                         fb_of_open_els = i
968                                         # and continue, to see if there's one that's more "topmost"
969                         # 10. If there is no furthest block, then the UA must first pop all
970                         # the nodes from the bottom of the stack of open elements, from the
971                         # current node up to and including formatting element, then remove
972                         # formatting element from the list of active formatting elements,
973                         # and finally abort these steps.
974                         if fb is null
975                                 debug_log "aaa: no fb"
976                                 loop
977                                         t = open_els.shift()
978                                         if t is fe
979                                                 afe.splice fe_of_afe, 1
980                                                 return
981                         # 11. Let common ancestor be the element immediately above
982                         # formatting element in the stack of open elements.
983                         ca = open_els[fe_of_open_els + 1] # common ancestor
984
985                         node_above = open_els[fb_of_open_els + 1] # next node if node isn't in open_els anymore
986                         # 12. Let a bookmark note the position of formatting element in the list of active formatting elements relative to the elements on either side of it in the list.
987                         bookmark = new_aaa_bookmark()
988                         for t, i in afe
989                                 if t is fe
990                                         afe.splice i, 0, bookmark
991                                         break
992                         node = last_node = fb
993                         inner = 0
994                         loop
995                                 inner += 1
996                                 # 3. Let node be the element immediately above node in the
997                                 # stack of open elements, or if node is no longer in the stack
998                                 # of open elements (e.g. because it got removed by this
999                                 # algorithm), the element that was immediately above node in
1000                                 # the stack of open elements before node was removed.
1001                                 node_next = null
1002                                 for t, i in open_els
1003                                         if t is node
1004                                                 node_next = open_els[i + 1]
1005                                                 break
1006                                 node = node_next ? node_above
1007                                 debug_log "inner loop #{inner}"
1008                                 debug_log "tree: #{serialize_els doc.children, false, true}"
1009                                 debug_log "open_els: #{serialize_els open_els, true, true}"
1010                                 debug_log "afe: #{serialize_els afe, true, true}"
1011                                 debug_log "ca: #{ca.name}##{ca.id} children: #{serialize_els ca.children, true, true}"
1012                                 debug_log "fe: #{fe.name}##{fe.id} children: #{serialize_els fe.children, true, true}"
1013                                 debug_log "fb: #{fb.name}##{fb.id} children: #{serialize_els fb.children, true, true}"
1014                                 debug_log "node: #{node.serialize true, true}"
1015                                 # TODO make sure node_above gets re-set if/when node is removed from open_els
1016
1017                                 # 4. If node is formatting element, then go to the next step in
1018                                 # the overall algorithm.
1019                                 if node is fe
1020                                         break
1021                                 debug_log "the meat"
1022                                 # 5. If inner loop counter is greater than three and node is in
1023                                 # the list of active formatting elements, then remove node from
1024                                 # the list of active formatting elements.
1025                                 node_in_afe = false
1026                                 for t, i in afe
1027                                         if t is node
1028                                                 if inner > 3
1029                                                         afe.splice i, 1
1030                                                         debug_log "max out inner"
1031                                                 else
1032                                                         node_in_afe = true
1033                                                         debug_log "in afe"
1034                                                 break
1035                                 # 6. If node is not in the list of active formatting elements,
1036                                 # then remove node from the stack of open elements and then go
1037                                 # back to the step labeled inner loop.
1038                                 unless node_in_afe
1039                                         debug_log "not in afe"
1040                                         for t, i in open_els
1041                                                 if t is node
1042                                                         node_above = open_els[i + 1]
1043                                                         open_els.splice i, 1
1044                                                         break
1045                                         continue
1046                                 debug_log "the bones"
1047                                 # 7. create an element for the token for which the element node
1048                                 # was created, in the HTML namespace, with common ancestor as
1049                                 # the intended parent; replace the entry for node in the list
1050                                 # of active formatting elements with an entry for the new
1051                                 # element, replace the entry for node in the stack of open
1052                                 # elements with an entry for the new element, and let node be
1053                                 # the new element.
1054                                 new_node = token_to_element node.token, NS_HTML, ca
1055                                 for t, i in afe
1056                                         if t is node
1057                                                 afe[i] = new_node
1058                                                 debug_log "replaced in afe"
1059                                                 break
1060                                 for t, i in open_els
1061                                         if t is node
1062                                                 node_above = open_els[i + 1]
1063                                                 open_els[i] = new_node
1064                                                 debug_log "replaced in open_els"
1065                                                 break
1066                                 node = new_node
1067                                 # 8. If last node is furthest block, then move the
1068                                 # aforementioned bookmark to be immediately after the new node
1069                                 # in the list of active formatting elements.
1070                                 if last_node is fb
1071                                         for t, i in afe
1072                                                 if t is bookmark
1073                                                         afe.splice i, 1
1074                                                         debug_log "removed bookmark"
1075                                                         break
1076                                         for t, i in afe
1077                                                 if t is node
1078                                                         # "after" means lower
1079                                                         afe.splice i, 0, bookmark # "after as <-
1080                                                         debug_log "placed bookmark after node"
1081                                                         debug_log "node: #{node.id} afe: #{serialize_els afe, true, true}"
1082                                                         break
1083                                 # 9. Insert last node into node, first removing it from its
1084                                 # previous parent node if any.
1085                                 if last_node.parent?
1086                                         debug_log "last_node has parent"
1087                                         for c, i in last_node.parent.children
1088                                                 if c is last_node
1089                                                         debug_log "removing last_node from parent"
1090                                                         last_node.parent.children.splice i, 1
1091                                                         break
1092                                 node.children.push last_node
1093                                 last_node.parent = node
1094                                 # 10. Let last node be node.
1095                                 last_node = node
1096                                 debug_log "at last"
1097                                 # 11. Return to the step labeled inner loop.
1098                         # 14. Insert whatever last node ended up being in the previous step
1099                         # at the appropriate place for inserting a node, but using common
1100                         # ancestor as the override target.
1101
1102                         # In the case where fe is immediately followed by fb:
1103                         #   * inner loop exits out early (node==fe)
1104                         #   * last_node is fb
1105                         #   * last_node is still in the tree (not a duplicate)
1106                         if last_node.parent?
1107                                 debug_log "FEFIRST? last_node has parent"
1108                                 for c, i in last_node.parent.children
1109                                         if c is last_node
1110                                                 debug_log "removing last_node from parent"
1111                                                 last_node.parent.children.splice i, 1
1112                                                 break
1113
1114                         debug_log "after aaa inner loop"
1115                         debug_log "ca: #{ca.name}##{ca.id} children: #{serialize_els ca.children, true, true}"
1116                         debug_log "fe: #{fe.name}##{fe.id} children: #{serialize_els fe.children, true, true}"
1117                         debug_log "fb: #{fb.name}##{fb.id} children: #{serialize_els fb.children, true, true}"
1118                         debug_log "last_node: #{last_node.name}##{last_node.id} children: #{serialize_els last_node.children, true, true}"
1119                         debug_log "tree: #{serialize_els doc.children, false, true}"
1120
1121                         debug_log "insert"
1122
1123
1124                         # can't use standard insert token thing, because it's already in
1125                         # open_els and must stay at it's current position in open_els
1126                         dest = adjusted_insertion_location ca
1127                         dest[0].children.splice dest[1], 0, last_node
1128                         last_node.parent = dest[0]
1129
1130
1131                         debug_log "ca: #{ca.name}##{ca.id} children: #{serialize_els ca.children, true, true}"
1132                         debug_log "fe: #{fe.name}##{fe.id} children: #{serialize_els fe.children, true, true}"
1133                         debug_log "fb: #{fb.name}##{fb.id} children: #{serialize_els fb.children, true, true}"
1134                         debug_log "last_node: #{last_node.name}##{last_node.id} children: #{serialize_els last_node.children, true, true}"
1135                         debug_log "tree: #{serialize_els doc.children, false, true}"
1136
1137                         # 15. Create an element for the token for which formatting element
1138                         # was created, in the HTML namespace, with furthest block as the
1139                         # intended parent.
1140                         new_element = token_to_element fe.token, NS_HTML, fb
1141                         # 16. Take all of the child nodes of furthest block and append them
1142                         # to the element created in the last step.
1143                         while fb.children.length
1144                                 t = fb.children.shift()
1145                                 t.parent = new_element
1146                                 new_element.children.push t
1147                         # 17. Append that new element to furthest block.
1148                         new_element.parent = fb
1149                         fb.children.push new_element
1150                         # 18. Remove formatting element from the list of active formatting
1151                         # elements, and insert the new element into the list of active
1152                         # formatting elements at the position of the aforementioned
1153                         # bookmark.
1154                         for t, i in afe
1155                                 if t is fe
1156                                         afe.splice i, 1
1157                                         break
1158                         for t, i in afe
1159                                 if t is bookmark
1160                                         afe[i] = new_element
1161                                         break
1162                         # 19. Remove formatting element from the stack of open elements,
1163                         # and insert the new element into the stack of open elements
1164                         # immediately below the position of furthest block in that stack.
1165                         for t, i in open_els
1166                                 if t is fe
1167                                         open_els.splice i, 1
1168                                         break
1169                         for t, i in open_els
1170                                 if t is fb
1171                                         open_els.splice i, 0, new_element
1172                                         break
1173                         # 20. Jump back to the step labeled outer loop.
1174                         debug_log "done wrapping fb's children. new_element: #{new_element.name}##{new_element.id}"
1175                         debug_log "tree: #{serialize_els doc.children, false, true}"
1176                         debug_log "open_els: #{serialize_els open_els, true, true}"
1177                         debug_log "afe: #{serialize_els afe, true, true}"
1178                 debug_log "AAA DONE"
1179
1180         # http://www.w3.org/TR/html5/syntax.html#close-a-p-element
1181         close_p_element = ->
1182                 generate_implied_end_tags 'p' # arg is exception
1183                 unless open_els[0].name is 'p' and open_els[0].namespace is NS_HTML
1184                         parse_error()
1185                 while open_els.length > 1 # just in case
1186                         el = open_els.shift()
1187                         if el.name is 'p' and el.namespace is NS_HTML
1188                                 return
1189         close_p_if_in_button_scope = ->
1190                 if is_in_button_scope 'p', NS_HTML
1191                         close_p_element()
1192
1193         # http://www.w3.org/TR/html5/syntax.html#insert-a-character
1194         # aka insert_a_character = (t) ->
1195         insert_character = (t) ->
1196                 dest = adjusted_insertion_location()
1197                 # fixfull check for Document node
1198                 if dest[1] > 0
1199                         prev = dest[0].children[dest[1] - 1]
1200                         if prev.type is TYPE_TEXT
1201                                 prev.text += t.text
1202                                 return
1203                 dest[0].children.splice dest[1], 0, t
1204
1205
1206         # 8.2.5 http://www.w3.org/TR/html5/syntax.html#tree-construction
1207         process_token = (t) ->
1208                 acn = adjusted_current_node()
1209                 unless acn?
1210                         ins_mode t
1211                         return
1212                 if acn.namespace is NS_HTML
1213                         ins_mode t
1214                         return
1215                 if is_mathml_text_integration_point(acn)
1216                         if t.type is TYPE_START_TAG and not (t.name is 'mglyph' or t.name is 'malignmark')
1217                                 ins_mode t
1218                                 return
1219                         if t.type is TYPE_TEXT
1220                                 ins_mode t
1221                                 return
1222                 if acn.namespace is NS_MATHML and acn.name is 'annotation-xml' and t.type is TYPE_START_TAG and t.name is 'svg'
1223                         ins_mode t
1224                         return
1225                 if is_html_integration acn
1226                         if t.type is TYPE_START_TAG or t.type is TYPE_TEXT
1227                                 ins_mode t
1228                                 return
1229                 if t.type is TYPE_EOF
1230                         ins_mode t
1231                         return
1232                 in_foreign_content t
1233                 return
1234
1235         # 8.2.5.1
1236         # http://www.w3.org/TR/html5/syntax.html#creating-and-inserting-nodes
1237         # http://www.w3.org/TR/html5/syntax.html#appropriate-place-for-inserting-a-node
1238         adjusted_insertion_location = (override_target = null) ->
1239                 # 1. If there was an override target specified, then let target be the
1240                 # override target.
1241                 if override_target?
1242                         target = override_target
1243                 else # Otherwise, let target be the current node.
1244                         target = open_els[0]
1245                 # 2. Determine the adjusted insertion location using the first matching
1246                 # steps from the following list:
1247                 #
1248                 # If foster parenting is enabled and target is a table, tbody, tfoot,
1249                 # thead, or tr element Foster parenting happens when content is
1250                 # misnested in tables.
1251                 if flag_foster_parenting and foster_parenting_targets[target.name] is target.namespace
1252                         loop # once. this is here so we can ``break`` to "abort these substeps"
1253                                 # 1. Let last template be the last template element in the
1254                                 # stack of open elements, if any.
1255                                 last_template = null
1256                                 last_template_i = null
1257                                 for el, i in open_els
1258                                         if el.name is 'template' and el.namespace is NS_HTML
1259                                                 last_template = el
1260                                                 last_template_i = i
1261                                                 break
1262                                 # 2. Let last table be the last table element in the stack of
1263                                 # open elements, if any.
1264                                 last_table = null
1265                                 last_table_i
1266                                 for el, i in open_els
1267                                         if el.name is 'table' and el.namespace is NS_HTML
1268                                                 last_table = el
1269                                                 last_table_i = i
1270                                                 break
1271                                 # 3. If there is a last template and either there is no last
1272                                 # table, or there is one, but last template is lower (more
1273                                 # recently added) than last table in the stack of open
1274                                 # elements, then: let adjusted insertion location be inside
1275                                 # last template's template contents, after its last child (if
1276                                 # any), and abort these substeps.
1277                                 if last_template and (last_table is null or last_template_i < last_table_i)
1278                                         target = last_template # fixfull should be it's contents
1279                                         target_i = target.children.length
1280                                         break
1281                                 # 4. If there is no last table, then let adjusted insertion
1282                                 # location be inside the first element in the stack of open
1283                                 # elements (the html element), after its last child (if any),
1284                                 # and abort these substeps. (fragment case)
1285                                 if last_table is null
1286                                         # this is odd
1287                                         target = open_els[open_els.length - 1]
1288                                         target_i = target.children.length
1289                                         break
1290                                 # 5. If last table has a parent element, then let adjusted
1291                                 # insertion location be inside last table's parent element,
1292                                 # immediately before last table, and abort these substeps.
1293                                 if last_table.parent?
1294                                         for c, i in last_table.parent.children
1295                                                 if c is last_table
1296                                                         target = last_table.parent
1297                                                         target_i = i
1298                                                         break
1299                                         break
1300                                 # 6. Let previous element be the element immediately above last
1301                                 # table in the stack of open elements.
1302                                 #
1303                                 # huh? how could it not have a parent?
1304                                 previous_element = open_els[last_table_i + 1]
1305                                 # 7. Let adjusted insertion location be inside previous
1306                                 # element, after its last child (if any).
1307                                 target = previous_element
1308                                 target_i = target.children.length
1309                                 # Note: These steps are involved in part because it's possible
1310                                 # for elements, the table element in this case in particular,
1311                                 # to have been moved by a script around in the DOM, or indeed
1312                                 # removed from the DOM entirely, after the element was inserted
1313                                 # by the parser.
1314                                 break # don't really loop
1315                 else
1316                         # Otherwise Let adjusted insertion location be inside target, after
1317                         # its last child (if any).
1318                         target_i = target.children.length
1319
1320                 # 3. If the adjusted insertion location is inside a template element,
1321                 # let it instead be inside the template element's template contents,
1322                 # after its last child (if any).
1323                 # fixfull (template)
1324
1325                 # 4. Return the adjusted insertion location.
1326                 return [target, target_i]
1327
1328         # http://www.w3.org/TR/html5/syntax.html#create-an-element-for-the-token
1329         # aka create_an_element_for_token
1330         token_to_element = (t, namespace, intended_parent) ->
1331                 # convert attributes into a hash
1332                 attrs = {}
1333                 for a in t.attrs_a
1334                         attrs[a[0]] = a[1] # TODO check what to do with dupilcate attrs
1335                 el = new Node TYPE_TAG, name: t.name, namespace: namespace, attrs: attrs, token: t
1336
1337                 # TODO 2. If the newly created element has an xmlns attribute in the
1338                 # XMLNS namespace whose value is not exactly the same as the element's
1339                 # namespace, that is a parse error. Similarly, if the newly created
1340                 # element has an xmlns:xlink attribute in the XMLNS namespace whose
1341                 # value is not the XLink Namespace, that is a parse error.
1342
1343                 # fixfull: the spec says stuff about form pointers and ownerDocument
1344
1345                 return el
1346
1347         # http://www.w3.org/TR/html5/syntax.html#insert-a-foreign-element
1348         insert_foreign_element = (token, namespace) ->
1349                 ail = adjusted_insertion_location()
1350                 ail_el = ail[0]
1351                 ail_i = ail[1]
1352                 el = token_to_element token, namespace, ail_el
1353                 # TODO skip this next step if it's broken (eg ail_el is document with child already)
1354                 el.parent = ail_el
1355                 ail_el.children.splice ail_i, 0, el
1356                 open_els.unshift el
1357                 return el
1358         # http://www.w3.org/TR/html5/syntax.html#insert-an-html-element
1359         insert_html_element = (token) ->
1360                 insert_foreign_element token, NS_HTML
1361
1362         # http://www.w3.org/TR/html5/syntax.html#insert-a-comment
1363         # position should be [node, index_within_children]
1364         insert_comment = (t, position = null) ->
1365                 position ?= adjusted_insertion_location()
1366                 position[0].children.splice position[1], 0, t
1367
1368         # 8.2.5.2
1369         # http://www.w3.org/TR/html5/syntax.html#generic-raw-text-element-parsing-algorithm
1370         parse_generic_raw_text = (t) ->
1371                 insert_html_element t
1372                 tok_state = tok_state_rawtext
1373                 original_ins_mode = ins_mode
1374                 ins_mode = ins_mode_text
1375         parse_generic_rcdata_text = (t) ->
1376                 insert_html_element t
1377                 tok_state = tok_state_rcdata
1378                 original_ins_mode = ins_mode
1379                 ins_mode = ins_mode_text
1380
1381         # 8.2.5.3 http://www.w3.org/TR/html5/syntax.html#closing-elements-that-have-implied-end-tags
1382         # http://www.w3.org/TR/html5/syntax.html#generate-implied-end-tags
1383         generate_implied_end_tags = (except = null) ->
1384                 while end_tag_implied[open_els[0].name] is open_els[0].namespace and open_els[0].name isnt except
1385                         open_els.shift()
1386
1387         # 8.2.5.4 The rules for parsing tokens in HTML content
1388         # http://www.w3.org/TR/html5/syntax.html#parsing-main-inhtml
1389
1390         # 8.2.5.4.1 The "initial" insertion mode
1391         # http://www.w3.org/TR/html5/syntax.html#the-initial-insertion-mode
1392         ins_mode_initial = (t) ->
1393                 if is_space_tok t
1394                         return
1395                 if t.type is TYPE_COMMENT
1396                         # ?fixfull
1397                         doc.children.push t
1398                         return
1399                 if t.type is TYPE_DOCTYPE
1400                         # FIXME check identifiers, set quirks, etc
1401                         # fixfull
1402                         doc.children.push t
1403                         ins_mode = ins_mode_before_html
1404                         return
1405                 # Anything else
1406                 #fixfull (iframe, quirks)
1407                 ins_mode = ins_mode_before_html
1408                 process_token t
1409                 return
1410
1411         # 8.2.5.4.2 http://www.w3.org/TR/html5/syntax.html#the-before-html-insertion-mode
1412         ins_mode_before_html = (t) ->
1413                 if t.type is TYPE_DOCTYPE
1414                         parse_error()
1415                         return
1416                 if t.type is TYPE_COMMENT
1417                         doc.children.push t
1418                         return
1419                 if is_space_tok t
1420                         return
1421                 if t.type is TYPE_START_TAG and t.name is 'html'
1422                         el = token_to_element t, NS_HTML, doc
1423                         doc.children.push el
1424                         open_els.unshift(el)
1425                         # fixfull (big paragraph in spec about manifest, fragment, urls, etc)
1426                         ins_mode = ins_mode_before_head
1427                         return
1428                 if t.type is TYPE_END_TAG
1429                         if t.name is 'head' or t.name is 'body' or t.name is 'html' or t.name is 'br'
1430                                 # fall through to "anything else"
1431                         else
1432                                 parse_error()
1433                                 return
1434                 # Anything else
1435                 html_tok = new_open_tag 'html'
1436                 el = token_to_element html_tok, NS_HTML, doc
1437                 doc.children.push el
1438                 open_els.unshift el
1439                 # ?fixfull browsing context
1440                 ins_mode = ins_mode_before_head
1441                 process_token t
1442                 return
1443
1444         # 8.2.5.4.3 http://www.w3.org/TR/html5/syntax.html#the-before-head-insertion-mode
1445         ins_mode_before_head = (t) ->
1446                 if is_space_tok t
1447                         return
1448                 if t.type is TYPE_COMMENT
1449                         insert_comment t
1450                         return
1451                 if t.type is TYPE_DOCTYPE
1452                         parse_error()
1453                         return
1454                 if t.type is TYPE_START_TAG and t.name is 'html'
1455                         ins_mode_in_body t
1456                         return
1457                 if t.type is TYPE_START_TAG and t.name is 'head'
1458                         el = insert_html_element t
1459                         head_element_pointer = el
1460                         ins_mode = ins_mode_in_head
1461                         return
1462                 if t.type is TYPE_END_TAG
1463                         if t.name is 'head' or t.name is 'body' or t.name is 'html' or t.name is 'br'
1464                                 # fall through to Anything else below
1465                         else
1466                                 parse_error()
1467                                 return
1468                 # Anything else
1469                 head_tok = new_open_tag 'head'
1470                 el = insert_html_element head_tok
1471                 head_element_pointer = el
1472                 ins_mode = ins_mode_in_head
1473                 process_token t
1474
1475         # 8.2.5.4.4 http://www.w3.org/TR/html5/syntax.html#parsing-main-inhead
1476         ins_mode_in_head_else = (t) -> # factored out for same-as-spec flow control
1477                 open_els.shift() # spec says this will be a 'head' node
1478                 ins_mode = ins_mode_after_head
1479                 process_token t
1480         ins_mode_in_head = (t) ->
1481                 if t.type is TYPE_TEXT and (t.text is "\t" or t.text is "\n" or t.text is "\u000c" or t.text is ' ')
1482                         insert_character t
1483                         return
1484                 if t.type is TYPE_COMMENT
1485                         insert_comment t
1486                         return
1487                 if t.type is TYPE_DOCTYPE
1488                         parse_error()
1489                         return
1490                 if t.type is TYPE_START_TAG and t.name is 'html'
1491                         ins_mode_in_body t
1492                         return
1493                 if t.type is TYPE_START_TAG and (t.name is 'base' or t.name is 'basefont' or t.name is 'bgsound' or t.name is 'link')
1494                         el = insert_html_element t
1495                         open_els.shift()
1496                         t.acknowledge_self_closing()
1497                         return
1498                 if t.type is TYPE_START_TAG and t.name is 'meta'
1499                         el = insert_html_element t
1500                         open_els.shift()
1501                         t.acknowledge_self_closing()
1502                         # fixfull encoding stuff
1503                         return
1504                 if t.type is TYPE_START_TAG and t.name is 'title'
1505                         parse_generic_rcdata_text t
1506                         return
1507                 if t.type is TYPE_START_TAG and ((t.name is 'noscript' and flag_scripting) or t.name is 'noframes' or t.name is 'style')
1508                         parse_generic_raw_text t
1509                         return
1510                 if t.type is TYPE_START_TAG and t.name is 'noscript' and flag_scripting is false
1511                         insert_html_element t
1512                         ins_mode = ins_mode_in_head_noscript
1513                         return
1514                 if t.type is TYPE_START_TAG and t.name is 'script'
1515                         ail = adjusted_insertion_location()
1516                         el = token_to_element t, NS_HTML, ail
1517                         el.flag 'parser-inserted', true
1518                         # fixfull frament case
1519                         ail[0].children.splice ail[1], 0, el
1520                         open_els.unshift el
1521                         tok_state = tok_state_script_data
1522                         original_ins_mode = ins_mode # make sure orig... is defined
1523                         ins_mode = ins_mode_text
1524                         return
1525                 if t.type is TYPE_END_TAG and t.name is 'head'
1526                         open_els.shift() # will be a head element... spec says so
1527                         ins_mode = ins_mode_after_head
1528                         return
1529                 if t.type is TYPE_END_TAG and (t.name is 'body' or t.name is 'html' or t.name is 'br')
1530                         ins_mode_in_head_else t
1531                         return
1532                 if t.type is TYPE_START_TAG and t.name is 'template'
1533                         insert_html_element t
1534                         afe_push_marker()
1535                         flag_frameset_ok = false
1536                         ins_mode = ins_mode_in_template
1537                         template_ins_modes.unshift ins_mode_in_template
1538                         return
1539                 if t.type is TYPE_END_TAG and t.name is 'template'
1540                         if template_tag_is_open()
1541                                 generate_implied_end_tags
1542                                 if open_els[0].name isnt 'template'
1543                                         parse_error()
1544                                 loop
1545                                         el = open_els.shift()
1546                                         if el.name is 'template' and el.namespace is NS_HTML
1547                                                 break
1548                                 clear_afe_to_marker()
1549                                 template_ins_modes.shift()
1550                                 reset_ins_mode()
1551                         else
1552                                 parse_error()
1553                         return
1554                 if (t.type is TYPE_START_TAG and t.name is 'head') or t.type is TYPE_END_TAG
1555                         parse_error()
1556                         return
1557                 ins_mode_in_head_else t
1558
1559         # 8.2.5.4.5 http://www.w3.org/TR/html5/syntax.html#parsing-main-inheadnoscript
1560         ins_mode_in_head_noscript_else = (t) ->
1561                 parse_error()
1562                 open_els.shift()
1563                 ins_mode = ins_mode_in_head
1564                 process_token t
1565         ins_mode_in_head_noscript = (t) ->
1566                 if t.type is TYPE_DOCTYPE
1567                         parse_error()
1568                         return
1569                 if t.type is TYPE_START_TAG and t.name is 'html'
1570                         ins_mode_in_body t
1571                         return
1572                 if t.type is TYPE_END_TAG and t.name is 'noscript'
1573                         open_els.shift()
1574                         ins_mode = ins_mode_in_head
1575                         return
1576                 if is_space_tok(t) or t.type is TYPE_COMMENT or (t.type is TYPE_START_TAG and (t.name is 'basefont' or t.name is 'bgsound' or t.name is 'link' or t.name is 'meta' or t.name is 'noframes' or t.name is 'style'))
1577                         ins_mode_in_head t
1578                         return
1579                 if t.type is TYPE_END_TAG and t.name is 'br'
1580                         ins_mode_in_head_noscript_else t
1581                         return
1582                 if (t.type is TYPE_START_TAG and (t.name is 'head' or t.name is 'noscript')) or t.type is TYPE_END_TAG
1583                         parse_error()
1584                         return
1585                 # Anything else
1586                 ins_mode_in_head_noscript_else t
1587                 return
1588
1589
1590
1591         # 8.2.5.4.6 http://www.w3.org/TR/html5/syntax.html#the-after-head-insertion-mode
1592         ins_mode_after_head_else = (t) ->
1593                 body_tok = new_open_tag 'body'
1594                 insert_html_element body_tok
1595                 ins_mode = ins_mode_in_body
1596                 process_token t
1597                 return
1598         ins_mode_after_head = (t) ->
1599                 if is_space_tok t
1600                         insert_character t
1601                         return
1602                 if t.type is TYPE_COMMENT
1603                         insert_comment t
1604                         return
1605                 if t.type is TYPE_DOCTYPE
1606                         parse_error()
1607                         return
1608                 if t.type is TYPE_START_TAG and t.name is 'html'
1609                         ins_mode_in_body t
1610                         return
1611                 if t.type is TYPE_START_TAG and t.name is 'body'
1612                         insert_html_element t
1613                         flag_frameset_ok = false
1614                         ins_mode = ins_mode_in_body
1615                         return
1616                 if t.type is TYPE_START_TAG and t.name is 'frameset'
1617                         insert_html_element t
1618                         ins_mode = ins_mode_in_frameset
1619                         return
1620                 if t.type is TYPE_START_TAG and (t.name is 'base' or t.name is 'basefont' or t.name is 'bgsound' or t.name is 'link' or t.name is 'meta' or t.name is 'noframes' or t.name is 'script' or t.name is 'style' or t.name is 'template' or t.name is 'title')
1621                         parse_error()
1622                         open_els.unshift head_element_pointer
1623                         ins_mode_in_head t
1624                         for el, i of open_els
1625                                 if el is head_element_pointer
1626                                         open_els.splice i, 1
1627                                         return
1628                         console.log "warning: 23904 couldn't find head element in open_els"
1629                         return
1630                 if t.type is TYPE_END_TAG and t.name is 'template'
1631                         ins_mode_in_head t
1632                         return
1633                 if t.type is TYPE_END_TAG and (t.name is 'body' or t.name is 'html' or t.name is 'br')
1634                         ins_mode_after_head_else t
1635                         return
1636                 if (t.type is TYPE_START_TAG and t.name is 'head') or t.type is TYPE_END_TAG
1637                         parse_error()
1638                         return
1639                 # Anything else
1640                 ins_mode_after_head_else t
1641
1642         # 8.2.5.4.7 http://www.w3.org/TR/html5/syntax.html#parsing-main-inbody
1643         in_body_any_other_end_tag = (name) -> # factored out because adoption agency calls it
1644                 for el, i in open_els
1645                         if el.name is name and el.namespace is NS_HTML
1646                                 generate_implied_end_tags name # arg is exception
1647                                 parse_error() unless i is 0
1648                                 while i >= 0
1649                                         open_els.shift()
1650                                         i -= 1
1651                                 return
1652                         if special_elements[el.name] is el.namespace
1653                                 parse_error()
1654                                 return
1655                 return
1656         ins_mode_in_body = (t) ->
1657                 if t.type is TYPE_TEXT and t.text is "\u0000"
1658                         parse_error()
1659                         return
1660                 if is_space_tok t
1661                         reconstruct_afe()
1662                         insert_character t
1663                         return
1664                 if t.type is TYPE_TEXT
1665                         reconstruct_afe()
1666                         insert_character t
1667                         flag_frameset_ok = false
1668                         return
1669                 if t.type is TYPE_COMMENT
1670                         insert_comment t
1671                         return
1672                 if t.type is TYPE_DOCTYPE
1673                         parse_error()
1674                         return
1675                 if t.type is TYPE_START_TAG and t.name is 'html'
1676                         parse_error()
1677                         return if template_tag_is_open()
1678                         root_attrs = open_els[open_els.length - 1].attrs
1679                         for a in t.attrs_a
1680                                 root_attrs[a[0]] = a[1] unless root_attrs[a[0]]?
1681                         return
1682
1683                 if (t.type is TYPE_START_TAG and (t.name is 'base' or t.name is 'basefont' or t.name is 'bgsound' or t.name is 'link' or t.name is 'meta' or t.name is 'noframes' or t.name is 'script' or t.name is 'style' or t.name is 'template' or t.name is 'title')) or (t.type is TYPE_END_TAG and t.name is 'template')
1684                         ins_mode_in_head t
1685                         return
1686                 if t.type is TYPE_START_TAG and t.name is 'body'
1687                         parse_error()
1688                         return if open_els.length < 2
1689                         second = open_els[open_els.length - 2]
1690                         return unless second.namespace is NS_HTML
1691                         return unless second.name is 'body'
1692                         return if template_tag_is_open()
1693                         flag_frameset_ok = false
1694                         for a of t.attrs_a
1695                                 second.attrs[a[0]] = a[1] unless second.attrs[a[0]]?
1696                         return
1697                 if t.type is TYPE_START_TAG and t.name is 'frameset'
1698                         parse_error()
1699                         return if open_els.length < 2
1700                         second_i = open_els.length - 2
1701                         second = open_els[second_i]
1702                         return unless second.namespace is NS_HTML
1703                         return unless second.name is 'body'
1704                         if flag_frameset_ok is false
1705                                 return
1706                         if second.parent?
1707                                 for el, i in second.parent.children
1708                                         if el is second
1709                                                 second.parent.children.splice i, 1
1710                                                 break
1711                         open_els.splice second_i, 1
1712                         # pop everything except the "root html element"
1713                         while open_els.length > 1
1714                                 open_els.shift()
1715                         insert_html_element t
1716                         ins_mode = ins_mode_in_frameset
1717                         return
1718                 if t.type is TYPE_EOF
1719                         ok_tags = {
1720                                 dd:NS_HTML, dt:NS_HTML, li:NS_HTML, p:NS_HTML, tbody:NS_HTML,
1721                                 td:NS_HTML, tfoot:NS_HTML, th:NS_HTML, thead:NS_HTML,
1722                                 tr:NS_HTML, body:NS_HTML, html:NS_HTML,
1723                         }
1724                         for el in open_els
1725                                 unless ok_tags[t.name] is el.namespace
1726                                         parse_error()
1727                                         break
1728                         if template_ins_modes.length > 0
1729                                 ins_mode_in_template t
1730                         else
1731                                 stop_parsing()
1732                         return
1733                 if t.type is TYPE_END_TAG and t.name is 'body'
1734                         unless is_in_scope 'body', NS_HTML
1735                                 parse_error()
1736                                 return
1737                         ok_tags = {
1738                                 dd:NS_HTML, dt:NS_HTML, li:NS_HTML, optgroup:NS_HTML,
1739                                 option:NS_HTML, p:NS_HTML, rb:NS_HTML, rp:NS_HTML, rt:NS_HTML,
1740                                 rtc:NS_HTML, tbody:NS_HTML, td:NS_HTML, tfoot:NS_HTML,
1741                                 th:NS_HTML, thead:NS_HTML, tr:NS_HTML, body:NS_HTML,
1742                                 html:NS_HTML
1743                         }
1744                         for el in open_els
1745                                 unless ok_tags[t.name] is el.namespace
1746                                         parse_error()
1747                                         break
1748                         ins_mode = ins_mode_after_body
1749                         return
1750                 if t.type is TYPE_END_TAG and t.name is 'html'
1751                         unless is_in_scope 'body', NS_HTML
1752                                 parse_error()
1753                                 return
1754                         ok_tags = {
1755                                 dd:NS_HTML, dt:NS_HTML, li:NS_HTML, optgroup:NS_HTML,
1756                                 option:NS_HTML, p:NS_HTML, rb:NS_HTML, rp:NS_HTML, rt:NS_HTML,
1757                                 rtc:NS_HTML, tbody:NS_HTML, td:NS_HTML, tfoot:NS_HTML,
1758                                 th:NS_HTML, thead:NS_HTML, tr:NS_HTML, body:NS_HTML,
1759                                 html:NS_HTML
1760                         }
1761                         for el in open_els
1762                                 unless ok_tags[t.name] is el.namespace
1763                                         parse_error()
1764                                         break
1765                         ins_mode = ins_mode_after_body
1766                         process_token t
1767                         return
1768                 if t.type is TYPE_START_TAG and (t.name is 'address' or t.name is 'article' or t.name is 'aside' or t.name is 'blockquote' or t.name is 'center' or t.name is 'details' or t.name is 'dialog' or t.name is 'dir' or t.name is 'div' or t.name is 'dl' or t.name is 'fieldset' or t.name is 'figcaption' or t.name is 'figure' or t.name is 'footer' or t.name is 'header' or t.name is 'hgroup' or t.name is 'main' or t.name is 'nav' or t.name is 'ol' or t.name is 'p' or t.name is 'section' or t.name is 'summary' or t.name is 'ul')
1769                         close_p_if_in_button_scope()
1770                         insert_html_element t
1771                         return
1772                 if t.type is TYPE_START_TAG and h_tags[t.name]?
1773                         close_p_if_in_button_scope()
1774                         if h_tags[open_els[0].name] is open_els[0].namespace
1775                                 parse_error()
1776                                 open_els.shift()
1777                         insert_html_element t
1778                         return
1779                 if t.type is TYPE_START_TAG and (t.name is 'pre' or t.name is 'listing')
1780                         close_p_if_in_button_scope()
1781                         insert_html_element t
1782                         # spec: If the next token is a "LF" (U+000A) character token, then
1783                         # ignore that token and move on to the next one. (Newlines at the
1784                         # start of pre blocks are ignored as an authoring convenience.)
1785                         if txt.charAt(cur) is "\u000a" # FIXME check for crlf?
1786                                 cur += 1
1787                         flag_frameset_ok = false
1788                         return
1789                 if t.type is TYPE_START_TAG and t.name is 'form'
1790                         unless form_element_pointer is null or template_tag_is_open()
1791                                 parse_error()
1792                                 return
1793                         close_p_if_in_button_scope()
1794                         el = insert_html_element t
1795                         unless template_tag_is_open()
1796                                 form_element_pointer = el
1797                         return
1798                 if t.type is TYPE_START_TAG and t.name is 'li'
1799                         flag_frameset_ok = false
1800                         for node in open_els
1801                                 if node.name is 'li' and node.namespace is NS_HTML
1802                                         generate_implied_end_tags 'li' # arg is exception
1803                                         if open_els[0].name isnt 'li' or open_els[0].namespace isnt NS_HTML
1804                                                 parse_error()
1805                                         loop
1806                                                 el = open_els.shift()
1807                                                 if el.name is 'li' and el.namespace is NS_HTML
1808                                                         break
1809                                         break
1810                                 if el_is_special_not_adp node
1811                                                 break
1812                         close_p_if_in_button_scope()
1813                         insert_html_element t
1814                         return
1815                 if t.type is TYPE_START_TAG and (t.name is 'dd' or t.name is 'dt')
1816                         flag_frameset_ok = false
1817                         for node in open_els
1818                                 if node.name is 'dd' and node.namespace is NS_HTML
1819                                         generate_implied_end_tags 'dd' # arg is exception
1820                                         if open_els[0].name isnt 'dd' or open_els[0].namespace isnt NS_HTML
1821                                                 parse_error()
1822                                         loop
1823                                                 el = open_els.shift()
1824                                                 if el.name is 'dd' and el.namespace is NS_HTML
1825                                                         break
1826                                         break
1827                                 if node.name is 'dt' and node.namespace is NS_HTML
1828                                         generate_implied_end_tags 'dt' # arg is exception
1829                                         if open_els[0].name isnt 'dt' or open_els[0].namespace isnt NS_HTML
1830                                                 parse_error()
1831                                         loop
1832                                                 el = open_els.shift()
1833                                                 if el.name is 'dt' and el.namespace is NS_HTML
1834                                                         break
1835                                         break
1836                                 if el_is_special_not_adp node
1837                                         break
1838                         close_p_if_in_button_scope()
1839                         insert_html_element t
1840                         return
1841                 if t.type is TYPE_START_TAG and t.name is 'plaintext'
1842                         close_p_if_in_button_scope()
1843                         insert_html_element t
1844                         tok_state = tok_state_plaintext
1845                         return
1846                 if t.type is TYPE_START_TAG and t.name is 'button'
1847                         if is_in_scope 'button', NS_HTML
1848                                 parse_error()
1849                                 generate_implied_end_tags()
1850                                 loop
1851                                         el = open_els.shift()
1852                                         if el.name is 'button' and el.namespace is NS_HTML
1853                                                 break
1854                         reconstruct_afe()
1855                         insert_html_element t
1856                         flag_frameset_ok = false
1857                         return
1858                 if t.type is TYPE_END_TAG and (t.name is 'address' or t.name is 'article' or t.name is 'aside' or t.name is 'blockquote' or t.name is 'button' or t.name is 'center' or t.name is 'details' or t.name is 'dialog' or t.name is 'dir' or t.name is 'div' or t.name is 'dl' or t.name is 'fieldset' or t.name is 'figcaption' or t.name is 'figure' or t.name is 'footer' or t.name is 'header' or t.name is 'hgroup' or t.name is 'listing' or t.name is 'main' or t.name is 'nav' or t.name is 'ol' or t.name is 'pre' or t.name is 'section' or t.name is 'summary' or t.name is 'ul')
1859                         unless is_in_scope t.name, NS_HTML
1860                                 parse_error()
1861                                 return
1862                         generate_implied_end_tags()
1863                         unless open_els[0].name is t.name and open_els[0].namespace is NS_HTML
1864                                 parse_error()
1865                         loop
1866                                 el = open_els.shift()
1867                                 if el.name is t.name and el.namespace is NS_HTML
1868                                         return
1869                         return
1870                 if t.type is TYPE_END_TAG and t.name is 'form'
1871                         unless template_tag_is_open()
1872                                 node = form_element_pointer
1873                                 form_element_pointer = null
1874                                 if node is null or not el_is_in_scope node
1875                                         parse_error()
1876                                         return
1877                                 generate_implied_end_tags()
1878                                 if open_els[0] isnt node
1879                                         parse_error()
1880                                 for el, i in open_els
1881                                         if el is node
1882                                                 open_els.splice i, 1
1883                                                 break
1884                         else
1885                                 unless is_in_scope 'form', NS_HTML
1886                                         parse_error()
1887                                         return
1888                                 generate_implied_end_tags()
1889                                 if open_els[0].name isnt 'form' or open_els[0].namespace isnt NS_HTML
1890                                         parse_error()
1891                                 loop
1892                                         el = open_els.shift()
1893                                         if el.name is 'form' and el.namespace is NS_HTML
1894                                                 break
1895                         return
1896                 if t.type is TYPE_END_TAG and t.name is 'p'
1897                         unless is_in_button_scope 'p', NS_HTML
1898                                 parse_error()
1899                                 insert_html_element new_open_tag 'p'
1900                         close_p_element()
1901                         return
1902                 if t.type is TYPE_END_TAG and t.name is 'li'
1903                         unless is_in_li_scope 'li', NS_HTML
1904                                 parse_error()
1905                                 return
1906                         generate_implied_end_tags 'li' # arg is exception
1907                         if open_els[0].name isnt 'li' or open_els[0].namespace isnt NS_HTML
1908                                 parse_error()
1909                         loop
1910                                 el = open_els.shift()
1911                                 if el.name is 'li' and el.namespace is NS_HTML
1912                                         break
1913                         return
1914                 if t.type is TYPE_END_TAG and (t.name is 'dd' or t.name is 'dt')
1915                         unless is_in_scope t.name, NS_HTML
1916                                 parse_error()
1917                                 return
1918                         generate_implied_end_tags t.name # arg is exception
1919                         if open_els[0].name isnt t.name or open_els[0].namespace isnt NS_HTML
1920                                 parse_error()
1921                         loop
1922                                 el = open_els.shift()
1923                                 if el.name is t.name and el.namespace is NS_HTML
1924                                         break
1925                         return
1926                 if t.type is TYPE_END_TAG and h_tags[t.name]?
1927                         h_in_scope = false
1928                         for el in open_els
1929                                 if h_tags[el.name] is el.namespace
1930                                         h_in_scope = true
1931                                         break
1932                                 if standard_scopers[el.name] is el.namespace
1933                                         break
1934                         unless h_in_scope
1935                                 parse_error()
1936                                 return
1937                         generate_implied_end_tags()
1938                         if open_els[0].name isnt t.name or open_els[0].namespace isnt NS_HTML
1939                                 parse_error()
1940                         loop
1941                                 el = open_els.shift()
1942                                 if h_tags[el.name] is el.namespace
1943                                         break
1944                         return
1945                 # deep breath!
1946                 if t.type is TYPE_START_TAG and t.name is 'a'
1947                         # If the list of active formatting elements contains an a element
1948                         # between the end of the list and the last marker on the list (or
1949                         # the start of the list if there is no marker on the list), then
1950                         # this is a parse error; run the adoption agency algorithm for the
1951                         # tag name "a", then remove that element from the list of active
1952                         # formatting elements and the stack of open elements if the
1953                         # adoption agency algorithm didn't already remove it (it might not
1954                         # have if the element is not in table scope).
1955                         found = false
1956                         for el in afe
1957                                 if el.type is TYPE_AFE_MARKER
1958                                         break
1959                                 if el.name is 'a' and el.namespace is NS_HTML
1960                                         found = el
1961                         if found?
1962                                 parse_error()
1963                                 adoption_agency 'a'
1964                                 for el, i in afe
1965                                         if el is found
1966                                                 afe.splice i, 1
1967                                 for el, i in open_els
1968                                         if el is found
1969                                                 open_els.splice i, 1
1970                         reconstruct_afe()
1971                         el = insert_html_element t
1972                         afe_push el
1973                         return
1974                 if t.type is TYPE_START_TAG and (t.name is 'b' or t.name is 'big' or t.name is 'code' or t.name is 'em' or t.name is 'font' or t.name is 'i' or t.name is 's' or t.name is 'small' or t.name is 'strike' or t.name is 'strong' or t.name is 'tt' or t.name is 'u')
1975                         reconstruct_afe()
1976                         el = insert_html_element t
1977                         afe_push el
1978                         return
1979                 if t.type is TYPE_START_TAG and t.name is 'nobr'
1980                         reconstruct_afe()
1981                         el = insert_html_element t
1982                         afe_push el
1983                         return
1984                 if t.type is TYPE_END_TAG and (t.name is 'a' or t.name is 'b' or t.name is 'big' or t.name is 'code' or t.name is 'em' or t.name is 'font' or t.name is 'i' or t.name is 'nobr' or t.name is 's' or t.name is 'small' or t.name is 'strike' or t.name is 'strong' or t.name is 'tt' or t.name is 'u')
1985                         adoption_agency t.name
1986                         return
1987                 if t.type is TYPE_START_TAG and (t.name is 'applet' or t.name is 'marquee' or t.name is 'object')
1988                         reconstruct_afe()
1989                         insert_html_element t
1990                         afe_push_marker()
1991                         flag_frameset_ok = false
1992                         return
1993                 if t.type is TYPE_END_TAG and (t.name is 'applet' or t.name is 'marquee' or t.name is 'object')
1994                         unless is_in_scope t.name, NS_HTML
1995                                 parse_error()
1996                                 return
1997                         generate_implied_end_tags()
1998                         if open_els[0].name isnt t.name or open_els[0].namespace isnt NS_HTML
1999                                 parse_error()
2000                         loop
2001                                 el = open_els.shift()
2002                                 if el.name is t.name and el.namespace is NS_HTML
2003                                         break
2004                         clear_afe_to_marker()
2005                         return
2006                 if t.type is TYPE_START_TAG and t.name is 'table'
2007                         close_p_if_in_button_scope() # fixfull quirksmode thing
2008                         insert_html_element t
2009                         flag_frameset_ok = false
2010                         ins_mode = ins_mode_in_table
2011                         return
2012                 if t.type is TYPE_END_TAG and t.name is 'br'
2013                         parse_error()
2014                         t.type is TYPE_START_TAG
2015                         # fall through
2016                 if t.type is TYPE_START_TAG and (t.name is 'area' or t.name is 'br' or t.name is 'embed' or t.name is 'img' or t.name is 'keygen' or t.name is 'wbr')
2017                         reconstruct_afe()
2018                         insert_html_element t
2019                         open_els.shift()
2020                         t.acknowledge_self_closing()
2021                         flag_frameset_ok = false
2022                         return
2023                 if t.type is TYPE_START_TAG and t.name is 'input'
2024                         reconstruct_afe()
2025                         insert_html_element t
2026                         open_els.shift()
2027                         t.acknowledge_self_closing()
2028                         unless is_input_hidden_tok t
2029                                 flag_frameset_ok = false
2030                         return
2031                 if t.type is TYPE_START_TAG and (t.name is 'param' or t.name is 'source' or t.name is 'track')
2032                         insert_html_element t
2033                         open_els.shift()
2034                         t.acknowledge_self_closing()
2035                         return
2036                 if t.type is TYPE_START_TAG and t.name is 'hr'
2037                         close_p_if_in_button_scope()
2038                         insert_html_element t
2039                         open_els.shift()
2040                         t.acknowledge_self_closing()
2041                         flag_frameset_ok = false
2042                         return
2043                 if t.type is TYPE_START_TAG and t.name is 'image'
2044                         parse_error()
2045                         t.name = 'img'
2046                         process_token t
2047                         return
2048                 if t.type is TYPE_START_TAG and t.name is 'isindex'
2049                         parse_error()
2050                         if template_tag_is_open() is false and form_element_pointer isnt null
2051                                 return
2052                         t.acknowledge_self_closing()
2053                         flag_frameset_ok = false
2054                         close_p_if_in_button_scope()
2055                         el = insert_html_element new_open_tag 'form'
2056                         unless template_tag_is_open()
2057                                 form_element_pointer = el
2058                         for a in t.attrs_a
2059                                 if a[0] is 'action'
2060                                         el.attrs['action'] = a[1]
2061                                         break
2062                         insert_html_element new_open_tag 'hr'
2063                         open_els.shift()
2064                         reconstruct_afe()
2065                         insert_html_element new_open_tag 'label'
2066                         # note: this is a little out-of-spec-order so we only have to scan t.attrs_a once
2067                         input_el = new_open_tag 'input'
2068                         prompt = null
2069                         for a in t.attrs_a
2070                                 if a[0] is 'prompt'
2071                                         prompt = a[1]
2072                                 if a[0] isnt 'name' and a[0] isnt 'action' and a[0] isnt 'prompt'
2073                                         input_el.attrs_a.push [a[0], a[1]]
2074                         input_el.attrs_a.push ['name', 'isindex']
2075                         # fixfull this next bit is in english... internationalize?
2076                         prompt ?= "This is a searchable index. Enter search keywords: "
2077                         insert_character new_character_token prompt # fixfull split
2078                         # TODO submit typo "balue" in spec
2079                         insert_html_element input_el
2080                         open_els.shift()
2081                         # insert_character '' # you can put chars here if promt attr missing
2082                         open_els.shift()
2083                         insert_html_element new_open_tag 'hr'
2084                         open_els.shift()
2085                         open_els.shift()
2086                         unless template_tag_is_open()
2087                                 form_element_pointer = null
2088                         return
2089                 if t.type is TYPE_START_TAG and t.name is 'textarea'
2090                         insert_html_element t
2091                         if txt.charAt(cur) is "\u000a" # FIXME check for crlf?
2092                                 cur += 1
2093                         tok_state = tok_state_rcdata
2094                         original_ins_mode = ins_mode
2095                         flag_frameset_ok = false
2096                         ins_mode = ins_mode_text
2097                         return
2098                 if t.type is TYPE_START_TAG and t.name is 'xmp'
2099                         close_p_if_in_button_scope()
2100                         reconstruct_afe()
2101                         flag_frameset_ok = false
2102                         parse_generic_raw_text t
2103                         return
2104                 if t.type is TYPE_START_TAG and t.name is 'iframe'
2105                         flag_frameset_ok = false
2106                         parse_generic_raw_text t
2107                         return
2108                 if t.type is TYPE_START_TAG and (t.name is 'noembed' or (t.name is 'noscript' and flag_scripting))
2109                         parse_generic_raw_text t
2110                         return
2111                 if t.type is TYPE_START_TAG and t.name is 'select'
2112                         reconstruct_afe()
2113                         insert_html_element t
2114                         flag_frameset_ok = false
2115                         if ins_mode is ins_mode_in_table or ins_mode is ins_mode_in_caption or ins_mode is ins_mode_in_table_body or ins_mode is ins_mode_in_row or ins_mode is ins_mode_in_cell
2116                                 ins_mode = ins_mode_in_select_in_table
2117                         else
2118                                 ins_mode = ins_mode_in_select
2119                         return
2120                 if t.type is TYPE_START_TAG and (t.name is 'optgroup' or t.name is 'option')
2121                         if open_els[0].name is 'option' and open_els[0].namespace is NS_HTML
2122                                 open_els.shift()
2123                         reconstruct_afe()
2124                         insert_html_element t
2125                         return
2126 # this comment block implements the W3C spec
2127 #               if t.type is TYPE_START_TAG and (t.name is 'rb' or t.name is 'rp' or t.name is 'rtc')
2128 #                       if is_in_scope 'ruby', NS_HTML
2129 #                               generate_implied_end_tags()
2130 #                               unless open_els[0].name is 'ruby' and open_els[0].namespace is NS_HTML
2131 #                                       parse_error()
2132 #                       insert_html_element t
2133 #                       return
2134 #               if t.type is TYPE_START_TAG and t.name is 'rt'
2135 #                       if is_in_scope 'ruby', NS_HTML
2136 #                               generate_implied_end_tags 'rtc' # arg is exception
2137 #                               unless (open_els[0].name is 'ruby' or open_els[0].name is 'rtc') and open_els[0].namespace is NS_HTML
2138 #                                       parse_error()
2139 #                       insert_html_element t
2140 #                       return
2141 # below implements the WATWG spec https://html.spec.whatwg.org/multipage/syntax.html#parsing-main-inbody
2142                 if t.type is TYPE_START_TAG and (t.name is 'rb' or t.name is 'rtc')
2143                         if is_in_scope 'ruby', NS_HTML
2144                                 generate_implied_end_tags()
2145                                 unless open_els[0].name is 'ruby' and open_els[0].namespace is NS_HTML
2146                                         parse_error()
2147                         insert_html_element t
2148                         return
2149                 if t.type is TYPE_START_TAG and (t.name is 'rp' or t.name is 'rt')
2150                         if is_in_scope 'ruby', NS_HTML
2151                                 generate_implied_end_tags 'rtc'
2152                                 unless (open_els[0].name is 'ruby' or open_els[0].name is 'rtc') and open_els[0].namespace is NS_HTML
2153                                         parse_error()
2154                         insert_html_element t
2155                         return
2156 # end WATWG chunk
2157                 if t.type is TYPE_START_TAG and t.name is 'math'
2158                         reconstruct_afe()
2159                         adjust_mathml_attributes t
2160                         adjust_foreign_attributes t
2161                         insert_foreign_element t, NS_MATHML
2162                         if t.flag 'self-closing'
2163                                 open_els.shift()
2164                                 t.acknowledge_self_closing()
2165                         return
2166                 if t.type is TYPE_START_TAG and t.name is 'svg'
2167                         reconstruct_afe()
2168                         adjust_svg_attributes t
2169                         adjust_foreign_attributes t
2170                         insert_foreign_element t, NS_SVG
2171                         if t.flag 'self-closing'
2172                                 open_els.shift()
2173                                 t.acknowledge_self_closing()
2174                         return
2175                 if t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'frame' or t.name is 'head' or t.name is 'tbody' or t.name is 'td' or t.name is 'tfoot' or t.name is 'th' or t.name is 'thead' or t.name is 'tr')
2176                         parse_error()
2177                         return
2178                 if t.type is TYPE_START_TAG # any other start tag
2179                         reconstruct_afe()
2180                         insert_html_element t
2181                         return
2182                 if t.type is TYPE_END_TAG # any other end tag
2183                         in_body_any_other_end_tag t.name
2184                         return
2185                 return
2186
2187         # 8.2.5.4.8 http://www.w3.org/TR/html5/syntax.html#parsing-main-incdata
2188         ins_mode_text = (t) ->
2189                 if t.type is TYPE_TEXT
2190                         insert_character t
2191                         return
2192                 if t.type is TYPE_EOF
2193                         parse_error()
2194                         if open_els[0].name is 'script' and open_els[0].namespace is NS_HTML
2195                                 open_els[0].flag 'already started', true
2196                         open_els.shift()
2197                         ins_mode = original_ins_mode
2198                         process_token t
2199                         return
2200                 if t.type is TYPE_END_TAG and t.name is 'script'
2201                         open_els.shift()
2202                         ins_mode = original_ins_mode
2203                         # fixfull the spec seems to assume that I'm going to run the script
2204                         # http://www.w3.org/TR/html5/syntax.html#scriptEndTag
2205                         return
2206                 if t.type is TYPE_END_TAG
2207                         open_els.shift()
2208                         ins_mode = original_ins_mode
2209                         return
2210                 console.log 'warning: end of ins_mode_text reached'
2211
2212         # the functions below implement the tokenizer stats described here:
2213         # http://www.w3.org/TR/html5/syntax.html#tokenization
2214
2215         # 8.2.5.4.9 http://www.w3.org/TR/html5/syntax.html#parsing-main-intable
2216         ins_mode_in_table_else = (t) ->
2217                 parse_error()
2218                 flag_foster_parenting = true
2219                 ins_mode_in_body t
2220                 flag_foster_parenting = false
2221                 return
2222         ins_mode_in_table = (t) ->
2223                 switch t.type
2224                         when TYPE_TEXT
2225                                 if (open_els[0].name is 'table' or open_els[0].name is 'tbody' or open_els[0].name is 'tfoot' or open_els[0].name is 'thead' or open_els[0].name is 'tr') and open_els[0].namespace is NS_HTML
2226                                         pending_table_character_tokens = []
2227                                         original_ins_mode = ins_mode
2228                                         ins_mode = ins_mode_in_table_text
2229                                         process_token t
2230                                 else
2231                                         ins_mode_in_table_else t
2232                         when TYPE_COMMENT
2233                                 insert_comment t
2234                         when TYPE_DOCTYPE
2235                                 parse_error()
2236                         when TYPE_START_TAG
2237                                 switch t.name
2238                                         when 'caption'
2239                                                 clear_stack_to_table_context()
2240                                                 afe_push_marker()
2241                                                 insert_html_element t
2242                                                 ins_mode = ins_mode_in_caption
2243                                         when 'colgroup'
2244                                                 clear_stack_to_table_context()
2245                                                 insert_html_element t
2246                                                 ins_mode = ins_mode_in_column_group
2247                                         when 'col'
2248                                                 clear_stack_to_table_context()
2249                                                 insert_html_element new_open_tag 'colgroup'
2250                                                 ins_mode = ins_mode_in_column_group
2251                                                 process_token t
2252                                         when 'tbody', 'tfoot', 'thead'
2253                                                 clear_stack_to_table_context()
2254                                                 insert_html_element t
2255                                                 ins_mode = ins_mode_in_table_body
2256                                         when 'td', 'th', 'tr'
2257                                                 clear_stack_to_table_context()
2258                                                 insert_html_element new_open_tag 'tbody'
2259                                                 ins_mode = ins_mode_in_table_body
2260                                                 process_token t
2261                                         when 'table'
2262                                                 parse_error()
2263                                                 if is_in_table_scope 'table', NS_HTML
2264                                                         loop
2265                                                                 el = open_els.shift()
2266                                                                 if el.name is 'table' and el.namespace is NS_HTML
2267                                                                         break
2268                                                         reset_ins_mode()
2269                                                         process_token t
2270                                         when 'style', 'script', 'template'
2271                                                 ins_mode_in_head t
2272                                         when 'input'
2273                                                 unless is_input_hidden_tok t
2274                                                         ins_mode_in_table_else t
2275                                                 else
2276                                                         parse_error()
2277                                                         el = insert_html_element t
2278                                                         open_els.shift()
2279                                                         t.acknowledge_self_closing()
2280                                         when 'form'
2281                                                 parse_error()
2282                                                 if form_element_pointer?
2283                                                         return
2284                                                 if template_tag_is_open()
2285                                                         return
2286                                                 form_element_pointer = insert_html_element t
2287                                                 open_els.shift()
2288                                         else
2289                                                 ins_mode_in_table_else t
2290                         when TYPE_END_TAG
2291                                 switch t.name
2292                                         when 'table'
2293                                                 if is_in_table_scope 'table', NS_HTML
2294                                                         loop
2295                                                                 el = open_els.shift()
2296                                                                 if el.name is 'table' and el.namespace is NS_HTML
2297                                                                         break
2298                                                         reset_ins_mode()
2299                                                 else
2300                                                         parse_error()
2301                                         when 'body', 'caption', 'col', 'colgroup', 'html', 'tbody', 'td', 'tfoot', 'th', 'thead', 'tr'
2302                                                 parse_error()
2303                                         when 'template'
2304                                                 ins_mode_in_head t
2305                                         else
2306                                                 ins_mode_in_table_else t
2307                         when TYPE_EOF
2308                                 ins_mode_in_body t
2309                         else
2310                                 ins_mode_in_table_else t
2311
2312
2313         # 8.2.5.4.10 http://www.w3.org/TR/html5/syntax.html#parsing-main-intabletext
2314         ins_mode_in_table_text = (t) ->
2315                 if t.type is TYPE_TEXT and t.text is "\u0000"
2316                         # from javascript?
2317                         parse_error()
2318                         return
2319                 if t.type is TYPE_TEXT
2320                         pending_table_character_tokens.push t
2321                         return
2322                 # Anything else
2323                 all_space = true
2324                 for old in pending_table_character_tokens
2325                         unless is_space_tok old
2326                                 all_space = false
2327                                 break
2328                 if all_space
2329                         for old in pending_table_character_tokens
2330                                 insert_character old
2331                 else
2332                         for old in pending_table_character_tokens
2333                                 ins_mode_in_table_else old
2334                 pending_table_character_tokens = []
2335                 ins_mode = original_ins_mode
2336                 process_token t
2337
2338         # 8.2.5.4.11 http://www.w3.org/TR/html5/syntax.html#parsing-main-incaption
2339         ins_mode_in_caption = (t) ->
2340                 if t.type is TYPE_END_TAG and t.name is 'caption'
2341                         if is_in_table_scope 'caption', NS_HTML
2342                                 generate_implied_end_tags()
2343                                 if open_els[0].name isnt 'caption'
2344                                         parse_error()
2345                                 loop
2346                                         el = open_els.shift()
2347                                         if el.name is 'caption' and el.namespace is NS_HTML
2348                                                 break
2349                                 clear_afe_to_marker()
2350                                 ins_mode = ins_mode_in_table
2351                         else
2352                                 parse_error()
2353                                 # fragment case
2354                         return
2355                 if (t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'tbody' or t.name is 'td' or t.name is 'tfoot' or t.name is 'th' or t.name is 'thead' or t.name is 'tr')) or t.type is TYPE_END_TAG and t.name is 'table'
2356                         parse_error()
2357                         if is_in_table_scope 'caption', NS_HTML
2358                                 loop
2359                                         el = open_els.shift()
2360                                         if el.name is 'caption' and el.namespace is NS_HTML
2361                                                 break
2362                                 clear_afe_to_marker()
2363                                 ins_mode = ins_mode_in_table
2364                                 process_token t
2365                         # else fragment case
2366                         return
2367                 if t.type is TYPE_END_TAG and (t.name is 'body' or t.name is 'col' or t.name is 'colgroup' or t.name is 'html' or t.name is 'tbody' or t.name is 'td' or t.name is 'tfoot' or t.name is 'th' or t.name is 'thead' or t.name is 'tr')
2368                         parse_error()
2369                         return
2370                 # Anything else
2371                 ins_mode_in_body t
2372
2373         # 8.2.5.4.12 http://www.w3.org/TR/html5/syntax.html#parsing-main-incolgroup
2374         ins_mode_in_column_group = (t) ->
2375                 if is_space_tok t
2376                         insert_character t
2377                         return
2378                 if t.type is TYPE_COMMENT
2379                         insert_comment t
2380                         return
2381                 if t.type is TYPE_DOCTYPE
2382                         parse_error()
2383                         return
2384                 if t.type is TYPE_START_TAG and t.name is 'html'
2385                         ins_mode_in_body t
2386                         return
2387                 if t.type is TYPE_START_TAG and t.name is 'col'
2388                         el = insert_html_element t
2389                         open_els.shift()
2390                         t.acknowledge_self_closing()
2391                         return
2392                 if t.type is TYPE_END_TAG and t.name is 'colgroup'
2393                         if open_els[0].name is 'colgroup' and open_els.namespace is NS_HTML
2394                                 open_els.shift()
2395                                 ins_mode = ins_mode_in_table
2396                         else
2397                                 parse_error()
2398                         return
2399                 if t.type is TYPE_END_TAG and t.name is 'col'
2400                         parse_error()
2401                         return
2402                 if (t.type is TYPE_START_TAG or t.type is TYPE_END_TAG) and t.name is 'template'
2403                         ins_mode_in_head t
2404                         return
2405                 if t.type is TYPE_EOF
2406                         ins_mode_in_body t
2407                         return
2408                 # Anything else
2409                 if open_els[0].name isnt 'colgroup'
2410                         parse_error()
2411                         return
2412                 open_els.shift()
2413                 ins_mode = ins_mode_in_table
2414                 process_token t
2415                 return
2416
2417         # 8.2.5.4.13 http://www.w3.org/TR/html5/syntax.html#parsing-main-intbody
2418         ins_mode_in_table_body = (t) ->
2419                 if t.type is TYPE_START_TAG and t.name is 'tr'
2420                         clear_stack_to_table_body_context()
2421                         insert_html_element t
2422                         ins_mode = ins_mode_in_row
2423                         return
2424                 if t.type is TYPE_START_TAG and (t.name is 'th' or t.name is 'td')
2425                         parse_error()
2426                         clear_stack_to_table_body_context()
2427                         insert_html_element new_open_tag 'tr'
2428                         ins_mode = ins_mode_in_row
2429                         process_token t
2430                         return
2431                 if t.type is TYPE_END_TAG and (t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead')
2432                         unless is_in_table_scope t.name, NS_HTML
2433                                 parse_error()
2434                                 return
2435                         clear_stack_to_table_body_context()
2436                         open_els.shift()
2437                         ins_mode = ins_mode_in_table
2438                         return
2439                 if (t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead')) or (t.type is TYPE_END_TAG and t.name is 'table')
2440                         has = false
2441                         for el in open_els
2442                                 if el.namespace is NS_HTML and (el.name is 'tbody' or el.name is 'tfoot' or el.name is 'thead')
2443                                         has = true
2444                                         break
2445                                 if table_scopers[el.name] is el.namespace
2446                                         break
2447                         if !has
2448                                 parse_error()
2449                                 return
2450                         clear_stack_to_table_body_context()
2451                         open_els.shift()
2452                         ins_mode = ins_mode_in_table
2453                         process_token t
2454                         return
2455                 if t.type is TYPE_END_TAG and (t.name is 'body' or t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'html' or t.name is 'td' or t.name is 'th' or t.name is 'tr')
2456                         parse_error()
2457                         return
2458                 # Anything else
2459                 ins_mode_in_table t
2460
2461         # 8.2.5.4.14 http://www.w3.org/TR/html5/syntax.html#parsing-main-intr
2462         ins_mode_in_row = (t) ->
2463                 if t.type is TYPE_START_TAG and (t.name is 'th' or t.name is 'td')
2464                         clear_stack_to_table_row_context()
2465                         insert_html_element t
2466                         ins_mode = ins_mode_in_cell
2467                         afe_push_marker()
2468                         return
2469                 if t.type is TYPE_END_TAG and t.name is 'tr'
2470                         if is_in_table_scope 'tr', NS_HTML
2471                                 clear_stack_to_table_row_context()
2472                                 open_els.shift()
2473                                 ins_mode = ins_mode_in_table_body
2474                         else
2475                                 parse_error()
2476                         return
2477                 if (t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead' or t.name is 'tr')) or t.type is TYPE_END_TAG and t.name is 'table'
2478                         if is_in_table_scope 'tr', NS_HTML
2479                                 clear_stack_to_table_row_context()
2480                                 open_els.shift()
2481                                 ins_mode = ins_mode_in_table_body
2482                                 process_token t
2483                         else
2484                                 parse_error()
2485                         return
2486                 if t.type is TYPE_END_TAG and (t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead')
2487                         if is_in_table_scope t.name, NS_HTML
2488                                 if is_in_table_scope 'tr', NS_HTML
2489                                         clear_stack_to_table_row_context()
2490                                         open_els.shift()
2491                                         ins_mode = ins_mode_in_table_body
2492                                         process_token t
2493                         else
2494                                 parse_error()
2495                         return
2496                 if t.type is TYPE_END_TAG and (t.name is 'body' or t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'html' or t.name is 'td' or t.name is 'th')
2497                         parse_error()
2498                         return
2499                 # Anything else
2500                 ins_mode_in_table t
2501
2502         # http://www.w3.org/TR/html5/syntax.html#close-the-cell
2503         close_the_cell = ->
2504                 generate_implied_end_tags()
2505                 unless (open_els[0].name is 'td' or open_els[0] is 'th') and open_els[0].namespace is NS_HTML
2506                         parse_error()
2507                 loop
2508                         el = open_els.shift()
2509                         if el.namespace is NS_HTML and (el.name is 'td' or el.name is 'th')
2510                                 break
2511                 clear_afe_to_marker()
2512                 ins_mode = ins_mode_in_row
2513
2514         # 8.2.5.4.15 http://www.w3.org/TR/html5/syntax.html#parsing-main-intd
2515         ins_mode_in_cell = (t) ->
2516                 if t.type is TYPE_END_TAG and (t.name is 'td' or t.name is 'th')
2517                         if is_in_table_scope t.name, NS_HTML
2518                                 generate_implied_end_tags()
2519                                 unless (open_els[0].name is t.name) and open_els[0].namespace is NS_HTML
2520                                         parse_error()
2521                                 loop
2522                                         el = open_els.shift()
2523                                         if el.name is t.name and el.namespace is NS_HTML
2524                                                 break
2525                                 clear_afe_to_marker()
2526                                 ins_mode = ins_mode_in_row
2527                         else
2528                                 parse_error()
2529                         return
2530                 if t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'tbody' or t.name is 'td' or t.name is 'tfoot' or t.name is 'th' or t.name is 'thead' or t.name is 'tr')
2531                         has = false
2532                         for el in open_els
2533                                 if el.namespace is NS_HTML and (el.name is 'td' or el.name is 'th')
2534                                         has = true
2535                                         break
2536                                 if table_scopers[el.name] is el.namespace
2537                                         break
2538                         if !has
2539                                 parse_error()
2540                                 return
2541                         close_the_cell()
2542                         process_token t
2543                         return
2544                 if t.type is TYPE_END_TAG and (t.name is 'body' or t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'html')
2545                         parse_error()
2546                         return
2547                 if t.type is TYPE_END_TAG and (t.name is 'table' or t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead' or t.name is 'tr')
2548                         if is_in_table_scope t.name, NS_HTML
2549                                 close_the_cell()
2550                                 process_token t
2551                         else
2552                                 parse_error()
2553                         return
2554                 # Anything Else
2555                 ins_mode_in_body t
2556
2557         # 8.2.5.4.16 http://www.w3.org/TR/html5/syntax.html#parsing-main-inselect
2558         ins_mode_in_select = (t) ->
2559                 if t.type is TYPE_TEXT and t.text is "\u0000"
2560                         parse_error()
2561                         return
2562                 if t.type is TYPE_TEXT
2563                         insert_character t
2564                         return
2565                 if t.type is TYPE_COMMENT
2566                         insert_comment t
2567                         return
2568                 if t.type is TYPE_DOCTYPE
2569                         parse_error()
2570                         return
2571                 if t.type is TYPE_START_TAG and t.name is 'html'
2572                         ins_mode_in_body t
2573                         return
2574                 if t.type is TYPE_START_TAG and t.name is 'option'
2575                         if open_els[0].name is 'option' and open_els[0].namespace is NS_HTML
2576                                 open_els.shift()
2577                         insert_html_element t
2578                         return
2579                 if t.type is TYPE_START_TAG and t.name is 'optgroup'
2580                         if open_els[0].name is 'option' and open_els[0].namespace is NS_HTML
2581                                 open_els.shift()
2582                         if open_els[0].name is 'optgroup' and open_els[0].namespace is NS_HTML
2583                                 open_els.shift()
2584                         insert_html_element t
2585                         return
2586                 if t.type is TYPE_END_TAG and t.name is 'optgroup'
2587                         if open_els[0].name is 'option' and open_els[0].namespace in NS_HTML
2588                                 if open_els[1].name is 'optgroup' and open_els[0].namespace is NS_HTML
2589                                         open_els.shift()
2590                         if open_els[0].name is 'optgroup' and open_els[0].namespace is NS_HTML
2591                                 open_els.shift()
2592                         else
2593                                 parse_error()
2594                         return
2595                 if t.type is TYPE_END_TAG and t.name is 'option'
2596                         if open_els[0].name is 'option' and open_els[0].namespace is NS_HTML
2597                                 open_els.shift()
2598                         else
2599                                 parse_error()
2600                         return
2601                 if t.type is TYPE_END_TAG and t.name is 'select'
2602                         if is_in_select_scope 'select', NS_HTML
2603                                 loop
2604                                         el = open_els.shift()
2605                                         if el.name is 'select' and el.namespace is NS_HTML
2606                                                 break
2607                                 reset_ins_mode()
2608                         else
2609                                 parse_error()
2610                         return
2611                 if t.type is TYPE_START_TAG and t.name is 'select'
2612                         parse_error()
2613                         loop
2614                                 el = open_els.shift()
2615                                 if el.name is 'select' and el.namespace is NS_HTML
2616                                         break
2617                         reset_ins_mode()
2618                         # spec says that this is the same as </select> but it doesn't say
2619                         # to check scope first
2620                         return
2621                 if t.type is TYPE_START_TAG and (t.name is 'input' or t.name is 'keygen' or t.name is 'textarea')
2622                         parse_error()
2623                         if is_in_select_scope 'select', NS_HTML
2624                                 return
2625                         loop
2626                                 el = open_els.shift()
2627                                 if el.name is 'select' and el.namespace is NS_HTML
2628                                         break
2629                         reset_ins_mode()
2630                         process_token t
2631                         return
2632                 if t.type is TYPE_START_TAG and (t.name is 'script' or t.name is 'template')
2633                         ins_mode_in_head t
2634                         return
2635                 if t.type is TYPE_EOF
2636                         ins_mode_in_body t
2637                         return
2638                 # Anything else
2639                 parse_error()
2640                 return
2641
2642         # 8.2.5.4.17 http://www.w3.org/TR/html5/syntax.html#parsing-main-inselectintable
2643         ins_mode_in_select_in_table = (t) ->
2644                 if t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'table' or t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead' or t.name is 'tr' or t.name is 'td' or t.name is 'th')
2645                         parse_error()
2646                         loop
2647                                 el = open_els.shift()
2648                                 if el.name is 'select' and el.namespace is NS_HTML
2649                                         break
2650                         reset_ins_mode()
2651                         process_token t
2652                         return
2653                 if t.type is TYPE_END_TAG and (t.name is 'caption' or t.name is 'table' or t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead' or t.name is 'tr' or t.name is 'td' or t.name is 'th')
2654                         parse_error()
2655                         unless is_in_table_scope t.name, NS_HTML
2656                                 return
2657                         loop
2658                                 el = open_els.shift()
2659                                 if el.name is 'select' and el.namespace is NS_HTML
2660                                         break
2661                         reset_ins_mode()
2662                         process_token t
2663                         return
2664                 # Anything else
2665                 ins_mode_in_select t
2666                 return
2667
2668         # 8.2.5.4.18 http://www.w3.org/TR/html5/syntax.html#parsing-main-intemplate
2669         ins_mode_in_template = (t) ->
2670                 if t.type is TYPE_TEXT or t.type is TYPE_COMMENT or t.type is TYPE_DOCTYPE
2671                         ins_mode_in_body t
2672                         return
2673                 if (t.type is TYPE_START_TAG and (t.name is 'base' or t.name is 'basefont' or t.name is 'bgsound' or t.name is 'link' or t.name is 'meta' or t.name is 'noframes' or t.name is 'script' or t.name is 'style' or t.name is 'template' or t.name is 'title')) or (t.type is TYPE_END_TAG and t.name is 'template')
2674                         ins_mode_in_head t
2675                         return
2676                 if t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'colgroup' or t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead')
2677                         template_ins_modes.shift()
2678                         template_ins_modes.unshift ins_mode_in_table
2679                         ins_mode = ins_mode_in_table
2680                         process_token t
2681                         return
2682                 if t.type is TYPE_START_TAG and t.name is 'col'
2683                         template_ins_modes.shift()
2684                         template_ins_modes.unshift ins_mode_in_column_group
2685                         ins_mode = ins_mode_in_column_group
2686                         process_token t
2687                         return
2688                 if t.type is TYPE_START_TAG and t.name is 'tr'
2689                         template_ins_modes.shift()
2690                         template_ins_modes.unshift ins_mode_in_table_body
2691                         ins_mode = ins_mode_in_table_body
2692                         process_token t
2693                         return
2694                 if t.type is TYPE_START_TAG and (t.name is 'td' or t.name is 'th')
2695                         template_ins_modes.shift()
2696                         template_ins_modes.unshift ins_mode_in_row
2697                         ins_mode = ins_mode_in_row
2698                         process_token t
2699                         return
2700                 if t.type is TYPE_START_TAG
2701                         template_ins_modes.shift()
2702                         template_ins_modes.unshift ins_mode_in_body
2703                         ins_mode = ins_mode_in_body
2704                         process_token t
2705                         return
2706                 if t.type is TYPE_END_TAG
2707                         parse_error()
2708                         return
2709                 if t.type is TYPE_EOF
2710                         unless template_tag_is_open()
2711                                 stop_parsing()
2712                                 return
2713                         parse_error()
2714                         loop
2715                                 el = open_els.shift()
2716                                 if el.name is 'template' and el.namespace is NS_HTML
2717                                         break
2718                         clear_afe_to_marker()
2719                         template_ins_modes.shift()
2720                         reset_ins_mode()
2721                         process_token t
2722
2723         # 8.2.5.4.19 http://www.w3.org/TR/html5/syntax.html#parsing-main-afterbody
2724         ins_mode_after_body = (t) ->
2725                 if is_space_tok t
2726                         ins_mode_in_body t
2727                         return
2728                 if t.type is TYPE_COMMENT
2729                         first = open_els[open_els.length - 1]
2730                         insert_comment t, [first, first.children.length]
2731                         return
2732                 if t.type is TYPE_DOCTYPE
2733                         parse_error()
2734                         return
2735                 if t.type is TYPE_START_TAG and t.name is 'html'
2736                         ins_mode_in_body t
2737                         return
2738                 if t.type is TYPE_END_TAG and t.name is 'html'
2739                         if flag_fragment_parsing
2740                                 parse_error()
2741                                 return
2742                         ins_mode = ins_mode_after_after_body
2743                         return
2744                 if t.type is TYPE_EOF
2745                         stop_parsing()
2746                         return
2747                 # Anything ELse
2748                 parse_error()
2749                 ins_mode = ins_mode_in_body
2750                 process_token t
2751
2752         # 8.2.5.4.20 http://www.w3.org/TR/html5/syntax.html#parsing-main-inframeset
2753         ins_mode_in_frameset = (t) ->
2754                 if is_space_tok t
2755                         insert_character t
2756                         return
2757                 if t.type is TYPE_COMMENT
2758                         insert_comment t
2759                         return
2760                 if t.type is TYPE_DOCTYPE
2761                         parse_error()
2762                         return
2763                 if t.type is TYPE_START_TAG and t.name is 'html'
2764                         ins_mode_in_body t
2765                         return
2766                 if t.type is TYPE_START_TAG and t.name is 'frameset'
2767                         insert_html_element t
2768                         return
2769                 if t.type is TYPE_END_TAG and t.name is 'frameset'
2770                         if open_els.length is 1
2771                                 parse_error()
2772                                 return # fragment case
2773                         open_els.shift()
2774                         if flag_fragment_parsing is false and open_els[0].name isnt 'frameset'
2775                                 ins_mode = ins_mode_after_frameset
2776                         return
2777                 if t.type is TYPE_START_TAG and t.name is 'frame'
2778                         insert_html_element t
2779                         open_els.shift()
2780                         t.acknowledge_self_closing()
2781                         return
2782                 if t.type is TYPE_START_TAG and t.name is 'noframes'
2783                         ins_mode_in_head t
2784                         return
2785                 if t.type is TYPE_EOF
2786                         if open_els.length isnt 1
2787                                 parse_error()
2788                         stop_parsing()
2789                         return
2790                 # Anything else
2791                 parse_error()
2792                 return
2793
2794         # 8.2.5.4.21 http://www.w3.org/TR/html5/syntax.html#parsing-main-afterframeset
2795         ins_mode_after_frameset = (t) ->
2796                 if is_space_tok t
2797                         insert_character t
2798                         return
2799                 if t.type is TYPE_COMMENT
2800                         insert_comment t
2801                         return
2802                 if t.type is TYPE_DOCTYPE
2803                         parse_error()
2804                         return
2805                 if t.type is TYPE_START_TAG and t.name is 'html'
2806                         ins_mode_in_body t
2807                         return
2808                 if t.type is TYPE_END_TAG and t.name is 'html'
2809                         ins_mode = ins_mode_after_after_frameset
2810                         return
2811                 if t.type is TYPE_START_TAG and t.name is 'noframes'
2812                         ins_mode_in_head t
2813                         return
2814                 if t.type is TYPE_EOF
2815                         stop_parsing()
2816                         return
2817                 # Anything else
2818                 parse_error()
2819                 return
2820
2821         # 8.2.5.4.22 http://www.w3.org/TR/html5/syntax.html#the-after-after-body-insertion-mode
2822         ins_mode_after_after_body = (t) ->
2823                 if t.type is TYPE_COMMENT
2824                         insert_comment t, [doc, doc.children.length]
2825                         return
2826                 if t.type is TYPE_DOCTYPE or is_space_tok(t) or (t.type is TYPE_START_TAG and t.name is 'html')
2827                         ins_mode_in_body t
2828                         return
2829                 if t.type is TYPE_EOF
2830                         stop_parsing()
2831                         return
2832                 # Anything else
2833                 parse_error()
2834                 ins_mode = ins_mode_in_body
2835                 process_token t
2836                 return
2837
2838         # 8.2.5.4.23 http://www.w3.org/TR/html5/syntax.html#the-after-after-frameset-insertion-mode
2839         ins_mode_after_after_frameset = (t) ->
2840                 if t.type is TYPE_COMMENT
2841                         insert_comment t, [doc, doc.children.length]
2842                         return
2843                 if t.type is TYPE_DOCTYPE or is_space_tok(t) or (t.type is TYPE_START_TAG and t.name is 'html')
2844                         ins_mode_in_body t
2845                         return
2846                 if t.type is TYPE_EOF
2847                         stop_parsing()
2848                         return
2849                 if t.type is TYPE_START_TAG and t.name is 'noframes'
2850                         ins_mode_in_head t
2851                         return
2852                 # Anything else
2853                 parse_error()
2854                 return
2855
2856         # 8.2.5.5 http://www.w3.org/TR/html5/syntax.html#parsing-main-inforeign
2857         has_color_face_or_size = (t) ->
2858                 for a in t.attrs_a
2859                         if a[0] is 'color' or a[0] is 'face' or a[0] is 'size'
2860                                 return true
2861                 return false
2862         in_foreign_content_end_script = ->
2863                 open_els.shift()
2864                 # fixfull
2865                 return
2866         in_foreign_content_other_start = (t) ->
2867                 acn = adjusted_current_node()
2868                 if acn.namespace is NS_MATHML
2869                         adjust_mathml_attributes t
2870                 if acn.namespace is NS_SVG and svg_name_fixes[t.name]?
2871                         t.name = svg_name_fixes[t.name]
2872                 if acn.namespace is NS_SVG
2873                         adjust_svg_attributes t
2874                 adjust_foreign_attributes t
2875                 insert_foreign_element t, acn.namespace
2876                 if t.flag 'self-closing'
2877                         if t.name is 'script'
2878                                 t.acknowledge_self_closing()
2879                                 in_foreign_content_end_script()
2880                                 # fixfull
2881                         else
2882                                 open_els.shift()
2883                                 t.acknowledge_self_closing()
2884                 return
2885         in_foreign_content = (t) ->
2886                 if t.type is TYPE_TEXT and t.text is "\u0000"
2887                         parse_error()
2888                         insert_character new_character_token "\ufffd"
2889                         return
2890                 if is_space_tok t
2891                         insert_character t
2892                         return
2893                 if t.type is TYPE_TEXT
2894                         flag_frameset_ok = false
2895                         insert_character t
2896                         return
2897                 if t.type is TYPE_COMMENT
2898                         insert_comment t
2899                         return
2900                 if t.type is TYPE_DOCTYPE
2901                         parse_error()
2902                         return
2903                 if t.type is TYPE_START_TAG and (t.name is 'b' or t.name is 'big' or t.name is 'blockquote' or t.name is 'body' or t.name is 'br' or t.name is 'center' or t.name is 'code' or t.name is 'dd' or t.name is 'div' or t.name is 'dl' or t.name is 'dt' or t.name is 'em' or t.name is 'embed' or t.name is 'h1' or t.name is 'h2' or t.name is 'h3' or t.name is 'h4' or t.name is 'h5' or t.name is 'h6' or t.name is 'head' or t.name is 'hr' or t.name is 'i' or t.name is 'img' or t.name is 'li' or t.name is 'listing' or t.name is 'main' or t.name is 'meta' or t.name is 'nobr' or t.name is 'ol' or t.name is 'p' or t.name is 'pre' or t.name is 'ruby' or t.name is 's' or t.name is 'small' or t.name is 'span' or t.name is 'strong' or t.name is 'strike' or t.name is 'sub' or t.name is 'sup' or t.name is 'table' or t.name is 'tt' or t.name is 'u' or t.name is 'ul' or t.name is 'var' or (t.name is 'font' and has_color_face_or_size(t)))
2904                         parse_error()
2905                         if flag_fragment_parsing
2906                                 in_foreign_content_other_start t
2907                                 return
2908                         loop # is this safe?
2909                                 open_els.shift()
2910                                 if is_mathml_text_integration_point(open_els[0]) or is_html_integration(open_els[0]) or open_els[0].namespace is NS_HTML
2911                                         break
2912                         process_token t
2913                         return
2914                 if t.type is TYPE_START_TAG
2915                         in_foreign_content_other_start t
2916                         return
2917                 if t.type is TYPE_END_TAG and t.name is 'script' and open_els[0].name is 'script' and open_els[0].namespace is NS_SVG
2918                         in_foreign_content_end_script()
2919                         return
2920                 if t.type is TYPE_END_TAG
2921                         i = 0
2922                         node = open_els[i]
2923                         if node.name.toLowerCase() isnt t.name
2924                                 parse_error()
2925                         loop
2926                                 if node is open_els[open_els.length - 1]
2927                                         return
2928                                 if node.name.toLowerCase() is t.name
2929                                         loop
2930                                                 el = open_els.shift()
2931                                                 if el is node
2932                                                         return
2933                                 i += 1
2934                                 node = open_els[i]
2935                                 if node.namespace is NS_HTML
2936                                         break
2937                         ins_mode t # explicitly call HTML insertion mode
2938
2939
2940         # 8.2.4.1 http://www.w3.org/TR/html5/syntax.html#data-state
2941         tok_state_data = ->
2942                 switch c = txt.charAt(cur++)
2943                         when '&'
2944                                 return new_text_node parse_character_reference()
2945                         when '<'
2946                                 tok_state = tok_state_tag_open
2947                         when "\u0000"
2948                                 parse_error()
2949                                 return new_text_node "\ufffd"
2950                         when '' # EOF
2951                                 return new_eof_token()
2952                         else
2953                                 return new_text_node c
2954                 return null
2955
2956         # 8.2.4.2 http://www.w3.org/TR/html5/syntax.html#character-reference-in-data-state
2957         # not needed: tok_state_character_reference_in_data = ->
2958         # just call parse_character_reference()
2959
2960         # 8.2.4.3 http://www.w3.org/TR/html5/syntax.html#rcdata-state
2961         tok_state_rcdata = ->
2962                 switch c = txt.charAt(cur++)
2963                         when '&'
2964                                 return new_text_node parse_character_reference()
2965                         when '<'
2966                                 tok_state = tok_state_rcdata_less_than_sign
2967                         when "\u0000"
2968                                 parse_error()
2969                                 return new_character_token "\ufffd"
2970                         when '' # EOF
2971                                 return new_eof_token()
2972                         else
2973                                 return new_character_token c
2974                 return null
2975
2976         # 8.2.4.4 http://www.w3.org/TR/html5/syntax.html#character-reference-in-rcdata-state
2977         # not needed: tok_state_character_reference_in_rcdata = ->
2978         # just call parse_character_reference()
2979
2980         # 8.2.4.5 http://www.w3.org/TR/html5/syntax.html#rawtext-state
2981         tok_state_rawtext = ->
2982                 switch c = txt.charAt(cur++)
2983                         when '<'
2984                                 tok_state = tok_state_rawtext_less_than_sign
2985                         when "\u0000"
2986                                 parse_error()
2987                                 return new_character_token "\ufffd"
2988                         when '' # EOF
2989                                 return new_eof_token()
2990                         else
2991                                 return new_character_token c
2992                 return null
2993
2994         # 8.2.4.6 http://www.w3.org/TR/html5/syntax.html#script-data-state
2995         tok_state_script_data = ->
2996                 switch c = txt.charAt(cur++)
2997                         when '<'
2998                                 tok_state = tok_state_script_data_less_than_sign
2999                         when "\u0000"
3000                                 parse_error()
3001                                 return new_character_token "\ufffd"
3002                         when '' # EOF
3003                                 return new_eof_token()
3004                         else
3005                                 return new_character_token c
3006                 return null
3007
3008         # 8.2.4.7 http://www.w3.org/TR/html5/syntax.html#plaintext-state
3009         tok_state_plaintext = ->
3010                 switch c = txt.charAt(cur++)
3011                         when "\u0000"
3012                                 parse_error()
3013                                 return new_character_token "\ufffd"
3014                         when '' # EOF
3015                                 return new_eof_token()
3016                         else
3017                                 return new_character_token c
3018                 return null
3019
3020
3021         # 8.2.4.8 http://www.w3.org/TR/html5/syntax.html#tag-open-state
3022         tok_state_tag_open = ->
3023                 c = txt.charAt(cur++)
3024                 if c is '!'
3025                         tok_state = tok_state_markup_declaration_open
3026                         return
3027                 if c is '/'
3028                         tok_state = tok_state_end_tag_open
3029                         return
3030                 if is_uc_alpha(c)
3031                         tok_cur_tag = new_open_tag c.toLowerCase()
3032                         tok_state = tok_state_tag_name
3033                         return
3034                 if is_lc_alpha(c)
3035                         tok_cur_tag = new_open_tag c
3036                         tok_state = tok_state_tag_name
3037                         return
3038                 if c is '?'
3039                         parse_error()
3040                         tok_cur_tag = new_comment_token '?' # FIXME right?
3041                         tok_state = tok_state_bogus_comment
3042                         return
3043                 # Anything else
3044                 parse_error()
3045                 tok_state = tok_state_data
3046                 cur -= 1 # we didn't parse/handle the char after <
3047                 return new_text_node '<'
3048
3049         # 8.2.4.9 http://www.w3.org/TR/html5/syntax.html#end-tag-open-state
3050         tok_state_end_tag_open = ->
3051                 c = txt.charAt(cur++)
3052                 if is_uc_alpha(c)
3053                         tok_cur_tag = new_end_tag c.toLowerCase()
3054                         tok_state = tok_state_tag_name
3055                         return
3056                 if is_lc_alpha(c)
3057                         tok_cur_tag = new_end_tag c
3058                         tok_state = tok_state_tag_name
3059                         return
3060                 if c is '>'
3061                         parse_error()
3062                         tok_state = tok_state_data
3063                         return
3064                 if c is '' # EOF
3065                         parse_error()
3066                         tok_state = tok_state_data
3067                         return new_text_node '</'
3068                 # Anything else
3069                 parse_error()
3070                 tok_cur_tag = new_comment_token c
3071                 tok_state = tok_state_bogus_comment
3072                 return null
3073
3074         # 8.2.4.10 http://www.w3.org/TR/html5/syntax.html#tag-name-state
3075         tok_state_tag_name = ->
3076                 switch c = txt.charAt(cur++)
3077                         when "\t", "\n", "\u000c", ' '
3078                                 tok_state = tok_state_before_attribute_name
3079                         when '/'
3080                                 tok_state = tok_state_self_closing_start_tag
3081                         when '>'
3082                                 tok_state = tok_state_data
3083                                 tmp = tok_cur_tag
3084                                 tok_cur_tag = null
3085                                 return tmp
3086                         when "\u0000"
3087                                 parse_error()
3088                                 tok_cur_tag.name += "\ufffd"
3089                         when '' # EOF
3090                                 parse_error()
3091                                 tok_state = tok_state_data
3092                         else
3093                                 if is_uc_alpha(c)
3094                                         tok_cur_tag.name += c.toLowerCase()
3095                                 else
3096                                         tok_cur_tag.name += c
3097                 return null
3098
3099         # 8.2.4.11 http://www.w3.org/TR/html5/syntax.html#rcdata-less-than-sign-state
3100         tok_state_rcdata_less_than_sign = ->
3101                 c = txt.charAt(cur++)
3102                 if c is '/'
3103                         temporary_buffer = ''
3104                         tok_state = tok_state_rcdata_end_tag_open
3105                         return null
3106                 # Anything else
3107                 tok_state = tok_state_rcdata
3108                 cur -= 1 # reconsume the input character
3109                 return new_character_token '<'
3110
3111         # 8.2.4.12 http://www.w3.org/TR/html5/syntax.html#rcdata-end-tag-open-state
3112         tok_state_rcdata_end_tag_open = ->
3113                 c = txt.charAt(cur++)
3114                 if is_uc_alpha(c)
3115                         tok_cur_tag = new_end_tag c.toLowerCase()
3116                         temporary_buffer += c
3117                         tok_state = tok_state_rcdata_end_tag_name
3118                         return null
3119                 if is_lc_alpha(c)
3120                         tok_cur_tag = new_end_tag c
3121                         temporary_buffer += c
3122                         tok_state = tok_state_rcdata_end_tag_name
3123                         return null
3124                 # Anything else
3125                 tok_state = tok_state_rcdata
3126                 cur -= 1 # reconsume the input character
3127                 return new_character_token "</" # fixfull separate these
3128
3129         # http://www.w3.org/TR/html5/syntax.html#appropriate-end-tag-token
3130         is_appropriate_end_tag = (t) ->
3131                 # spec says to check against "the tag name of the last start tag to
3132                 # have been emitted from this tokenizer", but this is only called from
3133                 # the various "raw" states, so it's hopefully ok to assume that
3134                 # open_els[0].name will work instead TODO: verify this after the script
3135                 # data states are implemented
3136                 debug_log "#{t.type}, #{t.name} open_els: #{serialize_els open_els, true, true}"
3137                 return t.type is TYPE_END_TAG and t.name is open_els[0].name
3138
3139         # 8.2.4.13 http://www.w3.org/TR/html5/syntax.html#rcdata-end-tag-name-state
3140         tok_state_rcdata_end_tag_name = ->
3141                 c = txt.charAt(cur++)
3142                 if c is "\t" or c is "\n" or c is "\u000c" or c is ' '
3143                         if is_appropriate_end_tag tok_cur_tag
3144                                 tok_state = tok_state_before_attribute_name
3145                                 return
3146                         # else fall through to "Anything else"
3147                 if c is '/'
3148                         if is_appropriate_end_tag tok_cur_tag
3149                                 tok_state = tok_state_self_closing_start_tag # FIXME spec typo?
3150                                 return
3151                         # else fall through to "Anything else"
3152                 if c is '>'
3153                         if is_appropriate_end_tag tok_cur_tag
3154                                 tok_state = tok_state_data
3155                                 return tok_cur_tag
3156                         # else fall through to "Anything else"
3157                 if is_uc_alpha(c)
3158                         tok_cur_tag.name += c.toLowerCase()
3159                         temporary_buffer += c
3160                         return null
3161                 if is_lc_alpha(c)
3162                         tok_cur_tag.name += c
3163                         temporary_buffer += c
3164                         return null
3165                 # Anything else
3166                 tok_state = tok_state_rcdata
3167                 cur -= 1 # reconsume the input character
3168                 return new_character_token '</' + temporary_buffer # fixfull separate these
3169
3170         # 8.2.4.14 http://www.w3.org/TR/html5/syntax.html#rawtext-less-than-sign-state
3171         tok_state_rawtext_less_than_sign = ->
3172                 c = txt.charAt(cur++)
3173                 if c is '/'
3174                         temporary_buffer = ''
3175                         tok_state = tok_state_rawtext_end_tag_open
3176                         return null
3177                 # Anything else
3178                 tok_state = tok_state_rawtext
3179                 cur -= 1 # reconsume the input character
3180                 return new_character_token '<'
3181
3182         # 8.2.4.15 http://www.w3.org/TR/html5/syntax.html#rawtext-end-tag-open-state
3183         tok_state_rawtext_end_tag_open = ->
3184                 c = txt.charAt(cur++)
3185                 if is_uc_alpha(c)
3186                         tok_cur_tag = new_end_tag c.toLowerCase()
3187                         temporary_buffer += c
3188                         tok_state = tok_state_rawtext_end_tag_name
3189                         return null
3190                 if is_lc_alpha(c)
3191                         tok_cur_tag = new_end_tag c
3192                         temporary_buffer += c
3193                         tok_state = tok_state_rawtext_end_tag_name
3194                         return null
3195                 # Anything else
3196                 tok_state = tok_state_rawtext
3197                 cur -= 1 # reconsume the input character
3198                 return new_character_token "</" # fixfull separate these
3199
3200         # 8.2.4.16 http://www.w3.org/TR/html5/syntax.html#rawtext-end-tag-name-state
3201         tok_state_rawtext_end_tag_name = ->
3202                 c = txt.charAt(cur++)
3203                 if c is "\t" or c is "\n" or c is "\u000c" or c is ' '
3204                         if is_appropriate_end_tag tok_cur_tag
3205                                 tok_state = tok_state_before_attribute_name
3206                                 return
3207                         # else fall through to "Anything else"
3208                 if c is '/'
3209                         if is_appropriate_end_tag tok_cur_tag
3210                                 tok_state = tok_state_self_closing_start_tag
3211                                 return
3212                         # else fall through to "Anything else"
3213                 if c is '>'
3214                         if is_appropriate_end_tag tok_cur_tag
3215                                 tok_state = tok_state_data
3216                                 return tok_cur_tag
3217                         # else fall through to "Anything else"
3218                 if is_uc_alpha(c)
3219                         tok_cur_tag.name += c.toLowerCase()
3220                         temporary_buffer += c
3221                         return null
3222                 if is_lc_alpha(c)
3223                         tok_cur_tag.name += c
3224                         temporary_buffer += c
3225                         return null
3226                 # Anything else
3227                 tok_state = tok_state_rawtext
3228                 cur -= 1 # reconsume the input character
3229                 return new_character_token '</' + temporary_buffer # fixfull separate these
3230
3231         # 8.2.4.17 http://www.w3.org/TR/html5/syntax.html#script-data-less-than-sign-state
3232         tok_state_script_data_less_than_sign = ->
3233                 c = txt.charAt(cur++)
3234                 if c is '/'
3235                         temporary_buffer = ''
3236                         tok_state = tok_state_script_data_end_tag_open
3237                         return
3238                 if c is '!'
3239                         tok_state = tok_state_script_data_escape_start
3240                         return new_character_token '<!' # fixfull split
3241                 # Anything else
3242                 tok_state = tok_state_script_data
3243                 cur -= 1 # Reconsume
3244                 return new_character_token '<'
3245
3246         # 8.2.4.18 http://www.w3.org/TR/html5/syntax.html#script-data-end-tag-open-state
3247         tok_state_script_data_end_tag_open = ->
3248                 c = txt.charAt(cur++)
3249                 if is_uc_alpha(c)
3250                         tok_cur_tag = new_end_tag c.toLowerCase()
3251                         temporary_buffer += c
3252                         tok_state = tok_state_script_data_end_tag_name
3253                         return
3254                 if is_lc_alpha(c)
3255                         tok_cur_tag = new_end_tag c
3256                         temporary_buffer += c
3257                         tok_state = tok_state_script_data_end_tag_name
3258                         return
3259                 # Anything else
3260                 tok_state = tok_state_script_data
3261                 cur -= 1 # Reconsume
3262                 return new_character_token '</'
3263
3264         # 8.2.4.19 http://www.w3.org/TR/html5/syntax.html#script-data-end-tag-open-state
3265         tok_state_script_data_end_tag_name = ->
3266                 c = txt.charAt(cur++)
3267                 if c is "\t" or c is "\n" or c is "\u000c" or c is ' '
3268                         if is_appropriate_end_tag tok_cur_tag
3269                                 tok_state = tok_state_before_attribute_name
3270                                 return
3271                         # fall through
3272                 if c is '/'
3273                         if is_appropriate_end_tag tok_cur_tag
3274                                 tok_state = tok_state_self_closing_start_tag
3275                                 return
3276                         # fall through
3277                 if c is '>'
3278                         if is_appropriate_end_tag tok_cur_tag
3279                                 tok_state = tok_state_data
3280                                 return tok_cur_tag
3281                         # fall through
3282                 if is_uc_alpha(c)
3283                         tok_cur_tag.name += c.toLowerCase()
3284                         temporary_buffer += c
3285                         return
3286                 if is_lc_alpha(c)
3287                         tok_cur_tag.name += c
3288                         temporary_buffer += c
3289                         return
3290                 # Anything else
3291                 tok_state = tok_state_script_data
3292                 cur -= 1 # Reconsume
3293                 return new_character_token "</#{temporary_buffer}" # fixfull split
3294
3295         # 8.2.4.20 http://www.w3.org/TR/html5/syntax.html#script-data-escape-start-state
3296         tok_state_script_data_escape_start = ->
3297                 c = txt.charAt(cur++)
3298                 if c is '-'
3299                         tok_state = tok_state_script_data_escape_start_dash
3300                         return new_character_token '-'
3301                 # Anything else
3302                 tok_state = tok_state_script_data
3303                 cur -= 1 # Reconsume
3304                 return
3305
3306         # 8.2.4.21 http://www.w3.org/TR/html5/syntax.html#script-data-escape-start-dash-state
3307         tok_state_script_data_escape_start_dash = ->
3308                 c = txt.charAt(cur++)
3309                 if c is '-'
3310                         tok_state = tok_state_script_data_escaped_dash_dash
3311                         return new_character_token '-'
3312                 # Anything else
3313                 tok_state = tok_state_script_data
3314                 cur -= 1 # Reconsume
3315                 return
3316
3317         # 8.2.4.22 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-state
3318         tok_state_script_data_escaped = ->
3319                 c = txt.charAt(cur++)
3320                 if c is '-'
3321                         tok_state = tok_state_script_data_escaped_dash
3322                         return new_character_token '-'
3323                 if c is '<'
3324                         tok_state = tok_state_script_data_escaped_less_than_sign
3325                         return
3326                 if c is "\u0000"
3327                         parse_error()
3328                         return new_character_token "\ufffd"
3329                 if c is '' # EOF
3330                         tok_state = tok_state_data
3331                         parse_error()
3332                         cur -= 1 # Reconsume
3333                         return
3334                 # Anything else
3335                 return new_character_token c
3336
3337         # 8.2.4.23 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-dash-state
3338         tok_state_script_data_escaped_dash = ->
3339                 c = txt.charAt(cur++)
3340                 if c is '-'
3341                         tok_state = tok_state_script_data_escaped_dash_dash
3342                         return new_character_token '-'
3343                 if c is '<'
3344                         tok_state = tok_state_script_data_escaped_less_than_sign
3345                         return
3346                 if c is "\u0000"
3347                         parse_error()
3348                         tok_state = tok_state_script_data_escaped
3349                         return new_character_token "\ufffd"
3350                 if c is '' # EOF
3351                         tok_state = tok_state_data
3352                         parse_error()
3353                         cur -= 1 # Reconsume
3354                         return
3355                 # Anything else
3356                 tok_state = tok_state_script_data_escaped
3357                 return new_character_token c
3358
3359         # 8.2.4.24 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-dash-dash-state
3360         tok_state_script_data_escaped_dash_dash = ->
3361                 c = txt.charAt(cur++)
3362                 if c is '-'
3363                         return new_character_token '-'
3364                 if c is '<'
3365                         tok_state = tok_state_script_data_escaped_less_than_sign
3366                         return
3367                 if c is '>'
3368                         tok_state = tok_state_script_data
3369                         return new_character_token '>'
3370                 if c is "\u0000"
3371                         parse_error()
3372                         tok_state = tok_state_script_data_escaped
3373                         return new_character_token "\ufffd"
3374                 if c is '' # EOF
3375                         parse_error()
3376                         tok_state = tok_state_data
3377                         cur -= 1 # Reconsume
3378                         return
3379                 # Anything else
3380                 tok_state = tok_state_script_data_escaped
3381                 return new_character_token c
3382
3383         # 8.2.4.25 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-less-than-sign-state
3384         tok_state_script_data_escaped_less_than_sign = ->
3385                 c = txt.charAt(cur++)
3386                 if c is '/'
3387                         temporary_buffer = ''
3388                         tok_state = tok_state_script_data_escaped_end_tag_open
3389                         return
3390                 if is_uc_alpha(c)
3391                         temporary_buffer = c.toLowerCase() # yes, really
3392                         tok_state = tok_state_script_data_double_escape_start
3393                         return new_character_token "<#{c}" # fixfull split
3394                 if is_lc_alpha(c)
3395                         temporary_buffer = c
3396                         tok_state = tok_state_script_data_double_escape_start
3397                         return new_character_token "<#{c}" # fixfull split
3398                 # Anything else
3399                 tok_state = tok_state_script_data_escaped
3400                 cur -= 1 # Reconsume
3401                 return new_character_token '<'
3402
3403         # 8.2.4.26 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-end-tag-open-state
3404         tok_state_script_data_escaped_end_tag_open = ->
3405                 c = txt.charAt(cur++)
3406                 if is_uc_alpha(c)
3407                         tok_cur_tag = new_end_tag c.toLowerCase()
3408                         temporary_buffer += c
3409                         tok_state = tok_state_script_data_escaped_end_tag_name
3410                         return
3411                 if is_lc_alpha(c)
3412                         tok_cur_tag = new_end_tag c
3413                         temporary_buffer += c
3414                         tok_state = tok_state_script_data_escaped_end_tag_name
3415                         return
3416                 # Anything else
3417                 tok_state = tok_state_script_data_escaped
3418                 cur -= 1 # Reconsume
3419                 return new_character_token '</' # fixfull split
3420
3421         # 8.2.4.27 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-end-tag-name-state
3422         tok_state_script_data_escaped_end_tag_name = ->
3423                 c = txt.charAt(cur++)
3424                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
3425                         if is_appropriate_end_tag tok_cur_tag
3426                                 tok_state = tok_state_before_attribute_name
3427                                 return
3428                         # fall through
3429                 if c is '/'
3430                         if is_appropriate_end_tag tok_cur_tag
3431                                 tok_state = tok_state_self_closing_start_tag
3432                                 return
3433                         # fall through
3434                 if c is '>'
3435                         if is_appropriate_end_tag tok_cur_tag
3436                                 tok_state = tok_state_data
3437                                 return tok_cur_tag
3438                         # fall through
3439                 if is_uc_alpha(c)
3440                         tok_cur_tag.name += c.toLowerCase()
3441                         temporary_buffer += c.toLowerCase()
3442                         return
3443                 if is_lc_alpha(c)
3444                         tok_cur_tag.name += c
3445                         temporary_buffer += c.toLowerCase()
3446                         return
3447                 # Anything else
3448                 tok_state = tok_state_script_data_escaped
3449                 cur -= 1 # Reconsume
3450                 return new_character_token "</#{temporary_buffer}" # fixfull split
3451
3452         # 8.2.4.28 http://www.w3.org/TR/html5/syntax.html#script-data-double-escape-start-state
3453         tok_state_script_data_double_escape_start = ->
3454                 c = txt.charAt(cur++)
3455                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' ' or c is '/' or c is '>'
3456                         if temporary_buffer is 'script'
3457                                 tok_state = tok_state_script_data_double_escaped
3458                         else
3459                                 tok_state = tok_state_script_data_escaped
3460                         return new_character_token c
3461                 if is_uc_alpha(c)
3462                         temporary_buffer += c.toLowerCase() # yes, really lowercase
3463                         return new_character_token c
3464                 if is_lc_alpha(c)
3465                         temporary_buffer += c
3466                         return new_character_token c
3467                 # Anything else
3468                 tok_state = tok_state_script_data_escaped
3469                 cur -= 1 # Reconsume
3470                 return
3471
3472         # 8.2.4.29 http://www.w3.org/TR/html5/syntax.html#script-data-double-escaped-state
3473         tok_state_script_data_double_escaped = ->
3474                 c = txt.charAt(cur++)
3475                 if c is '-'
3476                         tok_state = tok_state_script_data_double_escaped_dash
3477                         return new_character_token '-'
3478                 if c is '<'
3479                         tok_state = tok_state_script_data_double_escaped_less_than_sign
3480                         return new_character_token '<'
3481                 if c is "\u0000"
3482                         parse_error()
3483                         return new_character_token "\ufffd"
3484                 if c is '' # EOF
3485                         parse_error()
3486                         tok_state = tok_state_data
3487                         cur -= 1 # Reconsume
3488                         return
3489                 # Anything else
3490                 return new_character_token c
3491
3492         # 8.2.4.30 http://www.w3.org/TR/html5/syntax.html#script-data-double-escaped-dash-state
3493         tok_state_script_data_double_escaped_dash = ->
3494                 c = txt.charAt(cur++)
3495                 if c is '-'
3496                         tok_state = tok_state_script_data_double_escaped_dash_dash
3497                         return new_character_token '-'
3498                 if c is '<'
3499                         tok_state = tok_state_script_data_double_escaped_less_than_sign
3500                         return new_character_token '<'
3501                 if c is "\u0000"
3502                         parse_error()
3503                         tok_state = tok_state_script_data_double_escaped
3504                         return new_character_token "\ufffd"
3505                 if c is '' # EOF
3506                         parse_error()
3507                         tok_state = tok_state_data
3508                         cur -= 1 # Reconsume
3509                         return
3510                 # Anything else
3511                 tok_state = tok_state_script_data_double_escaped
3512                 return new_character_token c
3513
3514         # 8.2.4.31 http://www.w3.org/TR/html5/syntax.html#script-data-double-escaped-dash-dash-state
3515         tok_state_script_data_double_escaped_dash_dash = ->
3516                 c = txt.charAt(cur++)
3517                 if c is '-'
3518                         return new_character_token '-'
3519                 if c is '<'
3520                         tok_state = tok_state_script_data_double_escaped_less_than_sign
3521                         return new_character_token '<'
3522                 if c is '>'
3523                         tok_state = tok_state_script_data
3524                         return new_character_token '>'
3525                 if c is "\u0000"
3526                         parse_error()
3527                         tok_state = tok_state_script_data_double_escaped
3528                         return new_character_token "\ufffd"
3529                 if c is '' # EOF
3530                         parse_error()
3531                         tok_state = tok_state_data
3532                         cur -= 1 # Reconsume
3533                         return
3534                 # Anything else
3535                 tok_state = tok_state_script_data_double_escaped
3536                 return new_character_token c
3537
3538         # 8.2.4.32 http://www.w3.org/TR/html5/syntax.html#script-data-double-escaped-less-than-sign-state
3539         tok_state_script_data_double_escaped_less_than_sign = ->
3540                 c = txt.charAt(cur++)
3541                 if c is '/'
3542                         temporary_buffer = ''
3543                         tok_state = tok_state_script_data_double_escape_end
3544                         return new_character_token '/'
3545                 # Anything else
3546                 tok_state = tok_state_script_data_double_escaped
3547                 cur -= 1 # Reconsume
3548                 return
3549
3550         # 8.2.4.33 http://www.w3.org/TR/html5/syntax.html#script-data-double-escape-end-state
3551         tok_state_script_data_double_escape_end = ->
3552                 c = txt.charAt(cur++)
3553                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' ' or c is '/' or c is '>'
3554                         if temporary_buffer is 'script'
3555                                 tok_state = tok_state_script_data_escaped
3556                         else
3557                                 tok_state = tok_state_script_data_double_escaped
3558                         return new_character_token c
3559                 if is_uc_alpha(c)
3560                         temporary_buffer += c.toLowerCase() # yes, really lowercase
3561                         return new_character_token c
3562                 if is_lc_alpha(c)
3563                         temporary_buffer += c
3564                         return new_character_token c
3565                 # Anything else
3566                 tok_state = tok_state_script_data_double_escaped
3567                 cur -= 1 # Reconsume
3568                 return
3569
3570         # 8.2.4.34 http://www.w3.org/TR/html5/syntax.html#before-attribute-name-state
3571         tok_state_before_attribute_name = ->
3572                 attr_name = null
3573                 switch c = txt.charAt(cur++)
3574                         when "\t", "\n", "\u000c", ' '
3575                                 return null
3576                         when '/'
3577                                 tok_state = tok_state_self_closing_start_tag
3578                                 return null
3579                         when '>'
3580                                 tok_state = tok_state_data
3581                                 tmp = tok_cur_tag
3582                                 tok_cur_tag = null
3583                                 return tmp
3584                         when "\u0000"
3585                                 parse_error()
3586                                 attr_name = "\ufffd"
3587                         when '"', "'", '<', '='
3588                                 parse_error()
3589                                 attr_name = c
3590                         when '' # EOF
3591                                 parse_error()
3592                                 tok_state = tok_state_data
3593                         else
3594                                 if is_uc_alpha(c)
3595                                         attr_name = c.toLowerCase()
3596                                 else
3597                                         attr_name = c
3598                 if attr_name?
3599                         tok_cur_tag.attrs_a.unshift [attr_name, '']
3600                         tok_state = tok_state_attribute_name
3601                 return null
3602
3603         # 8.2.4.35 http://www.w3.org/TR/html5/syntax.html#attribute-name-state
3604         tok_state_attribute_name = ->
3605                 switch c = txt.charAt(cur++)
3606                         when "\t", "\n", "\u000c", ' '
3607                                 tok_state = tok_state_after_attribute_name
3608                         when '/'
3609                                 tok_state = tok_state_self_closing_start_tag
3610                         when '='
3611                                 tok_state = tok_state_before_attribute_value
3612                         when '>'
3613                                 tok_state = tok_state_data
3614                                 tmp = tok_cur_tag
3615                                 tok_cur_tag = null
3616                                 return tmp
3617                         when "\u0000"
3618                                 parse_error()
3619                                 tok_cur_tag.attrs_a[0][0] += "\ufffd"
3620                         when '"', "'", '<'
3621                                 parse_error()
3622                                 tok_cur_tag.attrs_a[0][0] += c
3623                         when '' # EOF
3624                                 parse_error()
3625                                 tok_state = tok_state_data
3626                         else
3627                                 if is_uc_alpha(c)
3628                                         tok_cur_tag.attrs_a[0][0] += c.toLowerCase()
3629                                 else
3630                                         tok_cur_tag.attrs_a[0][0] += c
3631                 return null
3632
3633         # 8.2.4.36 http://www.w3.org/TR/html5/syntax.html#after-attribute-name-state
3634         tok_state_after_attribute_name = ->
3635                 c = txt.charAt(cur++)
3636                 if c is "\t" or c is "\n" or c is "\u000c" or c is ' '
3637                         return
3638                 if c is '/'
3639                         tok_state = tok_state_self_closing_start_tag
3640                         return
3641                 if c is '='
3642                         tok_state = tok_state_before_attribute_value
3643                         return
3644                 if c is '>'
3645                         tok_state = tok_state_data
3646                         return
3647                 if is_uc_alpha(c)
3648                         tok_cur_tag.attrs_a.unshift [c.toLowerCase(), '']
3649                         tok_state = tok_state_attribute_name
3650                         return
3651                 if c is "\u0000"
3652                         parse_error()
3653                         tok_cur_tag.attrs_a.unshift ["\ufffd", '']
3654                         tok_state = tok_state_attribute_name
3655                         return
3656                 if c is '' # EOF
3657                         parse_error()
3658                         tok_state = tok_state_data
3659                         cur -= 1 # reconsume
3660                         return
3661                 if c is '"' or c is "'" or c is '<'
3662                         parse_error()
3663                         # fall through to Anything else
3664                 # Anything else
3665                 tok_cur_tag.attrs_a.unshift [c, '']
3666                 tok_state = tok_state_attribute_name
3667
3668         # 8.2.4.37 http://www.w3.org/TR/html5/syntax.html#before-attribute-value-state
3669         tok_state_before_attribute_value = ->
3670                 switch c = txt.charAt(cur++)
3671                         when "\t", "\n", "\u000c", ' '
3672                                 return null
3673                         when '"'
3674                                 tok_state = tok_state_attribute_value_double_quoted
3675                         when '&'
3676                                 tok_state = tok_state_attribute_value_unquoted
3677                                 cur -= 1
3678                         when "'"
3679                                 tok_state = tok_state_attribute_value_single_quoted
3680                         when "\u0000"
3681                                 # Parse error
3682                                 tok_cur_tag.attrs_a[0][1] += "\ufffd"
3683                                 tok_state = tok_state_attribute_value_unquoted
3684                         when '>'
3685                                 # Parse error
3686                                 tok_state = tok_state_data
3687                                 tmp = tok_cur_tag
3688                                 tok_cur_tag = null
3689                                 return tmp
3690                         when '' # EOF
3691                                 parse_error()
3692                                 tok_state = tok_state_data
3693                         else
3694                                 tok_cur_tag.attrs_a[0][1] += c
3695                                 tok_state = tok_state_attribute_value_unquoted
3696                 return null
3697
3698         # 8.2.4.38 http://www.w3.org/TR/html5/syntax.html#attribute-value-(double-quoted)-state
3699         tok_state_attribute_value_double_quoted = ->
3700                 switch c = txt.charAt(cur++)
3701                         when '"'
3702                                 tok_state = tok_state_after_attribute_value_quoted
3703                         when '&'
3704                                 tok_cur_tag.attrs_a[0][1] += parse_character_reference '"', true
3705                         when "\u0000"
3706                                 # Parse error
3707                                 tok_cur_tag.attrs_a[0][1] += "\ufffd"
3708                         when '' # EOF
3709                                 parse_error()
3710                                 tok_state = tok_state_data
3711                         else
3712                                 tok_cur_tag.attrs_a[0][1] += c
3713                 return null
3714
3715         # 8.2.4.39 http://www.w3.org/TR/html5/syntax.html#attribute-value-(single-quoted)-state
3716         tok_state_attribute_value_single_quoted = ->
3717                 switch c = txt.charAt(cur++)
3718                         when "'"
3719                                 tok_state = tok_state_after_attribute_value_quoted
3720                         when '&'
3721                                 tok_cur_tag.attrs_a[0][1] += parse_character_reference "'", true
3722                         when "\u0000"
3723                                 # Parse error
3724                                 tok_cur_tag.attrs_a[0][1] += "\ufffd"
3725                         when '' # EOF
3726                                 parse_error()
3727                                 tok_state = tok_state_data
3728                         else
3729                                 tok_cur_tag.attrs_a[0][1] += c
3730                 return null
3731
3732         # 8.2.4.40 http://www.w3.org/TR/html5/syntax.html#attribute-value-(unquoted)-state
3733         tok_state_attribute_value_unquoted = ->
3734                 switch c = txt.charAt(cur++)
3735                         when "\t", "\n", "\u000c", ' '
3736                                 tok_state = tok_state_before_attribute_name
3737                         when '&'
3738                                 tok_cur_tag.attrs_a[0][1] += parse_character_reference '>', true
3739                         when '>'
3740                                 tok_state = tok_state_data
3741                                 tmp = tok_cur_tag
3742                                 tok_cur_tag = null
3743                                 return tmp
3744                         when "\u0000"
3745                                 tok_cur_tag.attrs_a[0][1] += "\ufffd"
3746                         when '' # EOF
3747                                 parse_error()
3748                                 tok_state = tok_state_data
3749                         else
3750                                 # Parse Error if ', <, = or ` (backtick)
3751                                 tok_cur_tag.attrs_a[0][1] += c
3752                 return null
3753
3754         # 8.2.4.42 http://www.w3.org/TR/html5/syntax.html#after-attribute-value-(quoted)-state
3755         tok_state_after_attribute_value_quoted = ->
3756                 switch c = txt.charAt(cur++)
3757                         when "\t", "\n", "\u000c", ' '
3758                                 tok_state = tok_state_before_attribute_name
3759                         when '/'
3760                                 tok_state = tok_state_self_closing_start_tag
3761                         when '>'
3762                                 tok_state = tok_state_data
3763                                 tmp = tok_cur_tag
3764                                 tok_cur_tag = null
3765                                 return tmp
3766                         when '' # EOF
3767                                 parse_error()
3768                                 tok_state = tok_state_data
3769                         else
3770                                 # Parse Error
3771                                 tok_state = tok_state_before_attribute_name
3772                                 cur -= 1 # we didn't handle that char
3773                 return null
3774
3775         # 8.2.4.43 http://www.w3.org/TR/html5/syntax.html#self-closing-start-tag-state
3776         tok_state_self_closing_start_tag = ->
3777                 c = txt.charAt(cur++)
3778                 if c is '>'
3779                         tok_cur_tag.flag 'self-closing', true
3780                         tok_state = tok_state_data
3781                         return tok_cur_tag
3782                 if c is ''
3783                         parse_error()
3784                         tok_state = tok_state_data
3785                         cur -= 1 # Reconsume
3786                         return
3787                 # Anything else
3788                 parse_error()
3789                 tok_state = tok_state_before_attribute_name
3790                 cur -= 1 # Reconsume
3791                 return
3792
3793         # 8.2.4.44 http://www.w3.org/TR/html5/syntax.html#bogus-comment-state
3794         # WARNING: put a comment token in tok_cur_tag before setting this state
3795         tok_state_bogus_comment = ->
3796                 next_gt = txt.indexOf '>', cur
3797                 if next_gt is -1
3798                         val = txt.substr cur
3799                         cur = txt.length
3800                 else
3801                         val = txt.substr cur, (next_gt - cur)
3802                         cur = next_gt + 1
3803                 val = val.replace(new RegExp("\u0000", 'g'), "\ufffd")
3804                 tok_cur_tag.text += val
3805                 tok_state = tok_state_data
3806                 return tok_cur_tag
3807
3808         # 8.2.4.45 http://www.w3.org/TR/html5/syntax.html#markup-declaration-open-state
3809         tok_state_markup_declaration_open = ->
3810                 if txt.substr(cur, 2) is '--'
3811                         cur += 2
3812                         tok_cur_tag = new_comment_token ''
3813                         tok_state = tok_state_comment_start
3814                         return
3815                 if txt.substr(cur, 7).toLowerCase() is 'doctype'
3816                         cur += 7
3817                         tok_state = tok_state_doctype
3818                         return
3819                 acn = adjusted_current_node()
3820                 if acn and acn.namespace isnt NS_HTML and txt.substr(cur, 7) is '[CDATA['
3821                         cur += 7
3822                         tok_state = tok_state_cdata_section
3823                         return
3824                 # Otherwise
3825                 parse_error()
3826                 tok_cur_tag = new_comment_token ''
3827                 tok_state = tok_state_bogus_comment
3828                 return
3829
3830         # 8.2.4.46 http://www.w3.org/TR/html5/syntax.html#comment-start-state
3831         tok_state_comment_start = ->
3832                 switch c = txt.charAt(cur++)
3833                         when '-'
3834                                 tok_state = tok_state_comment_start_dash
3835                         when "\u0000"
3836                                 parse_error()
3837                                 tok_state = tok_state_comment
3838                                 return new_character_token "\ufffd"
3839                         when '>'
3840                                 parse_error()
3841                                 tok_state = tok_state_data
3842                                 return tok_cur_tag
3843                         when '' # EOF
3844                                 parse_error()
3845                                 tok_state = tok_state_data
3846                                 cur -= 1 # Reconsume
3847                                 return tok_cur_tag
3848                         else
3849                                 tok_cur_tag.text += c
3850                                 tok_state = tok_state_comment
3851                 return null
3852
3853         # 8.2.4.47 http://www.w3.org/TR/html5/syntax.html#comment-start-dash-state
3854         tok_state_comment_start_dash = ->
3855                 switch c = txt.charAt(cur++)
3856                         when '-'
3857                                 tok_state = tok_state_comment_end
3858                         when "\u0000"
3859                                 parse_error()
3860                                 tok_cur_tag.text += "-\ufffd"
3861                                 tok_state = tok_state_comment
3862                         when '>'
3863                                 parse_error()
3864                                 tok_state = tok_state_data
3865                                 return tok_cur_tag
3866                         when '' # EOF
3867                                 parse_error()
3868                                 tok_state = tok_state_data
3869                                 cur -= 1 # Reconsume
3870                                 return tok_cur_tag
3871                         else
3872                                 tok_cur_tag.text += "-#{c}"
3873                                 tok_state = tok_state_comment
3874                 return null
3875
3876         # 8.2.4.48 http://www.w3.org/TR/html5/syntax.html#comment-state
3877         tok_state_comment = ->
3878                 switch c = txt.charAt(cur++)
3879                         when '-'
3880                                 tok_state = tok_state_comment_end_dash
3881                         when "\u0000"
3882                                 parse_error()
3883                                 tok_cur_tag.text += "\ufffd"
3884                         when '' # EOF
3885                                 parse_error()
3886                                 tok_state = tok_state_data
3887                                 cur -= 1 # Reconsume
3888                                 return tok_cur_tag
3889                         else
3890                                 tok_cur_tag.text += c
3891                 return null
3892
3893         # 8.2.4.49 http://www.w3.org/TR/html5/syntax.html#comment-end-dash-state
3894         tok_state_comment_end_dash = ->
3895                 switch c = txt.charAt(cur++)
3896                         when '-'
3897                                 tok_state = tok_state_comment_end
3898                         when "\u0000"
3899                                 parse_error()
3900                                 tok_cur_tag.text += "-\ufffd"
3901                                 tok_state = tok_state_comment
3902                         when '' # EOF
3903                                 parse_error()
3904                                 tok_state = tok_state_data
3905                                 cur -= 1 # Reconsume
3906                                 return tok_cur_tag
3907                         else
3908                                 tok_cur_tag.text += "-#{c}"
3909                                 tok_state = tok_state_comment
3910                 return null
3911
3912         # 8.2.4.50 http://www.w3.org/TR/html5/syntax.html#comment-end-state
3913         tok_state_comment_end = ->
3914                 switch c = txt.charAt(cur++)
3915                         when '>'
3916                                 tok_state = tok_state_data
3917                                 return tok_cur_tag
3918                         when "\u0000"
3919                                 parse_error()
3920                                 tok_cur_tag.text += "--\ufffd"
3921                                 tok_state = tok_state_comment
3922                         when '!'
3923                                 parse_error()
3924                                 tok_state = tok_state_comment_end_bang
3925                         when '-'
3926                                 parse_error()
3927                                 tok_cur_tag.text += '-'
3928                         when '' # EOF
3929                                 parse_error()
3930                                 tok_state = tok_state_data
3931                                 cur -= 1 # Reconsume
3932                                 return tok_cur_tag
3933                         else
3934                                 parse_error()
3935                                 tok_cur_tag.text += "--#{c}"
3936                                 tok_state = tok_state_comment
3937                 return null
3938
3939         # 8.2.4.51 http://www.w3.org/TR/html5/syntax.html#comment-end-bang-state
3940         tok_state_comment_end_bang = ->
3941                 switch c = txt.charAt(cur++)
3942                         when '-'
3943                                 tok_cur_tag.text += "--!#{c}"
3944                                 tok_state = tok_state_comment_end_dash
3945                         when '>'
3946                                 tok_state = tok_state_data
3947                                 return tok_cur_tag
3948                         when "\u0000"
3949                                 parse_error()
3950                                 tok_cur_tag.text += "--!\ufffd"
3951                                 tok_state = tok_state_comment
3952                         when '' # EOF
3953                                 parse_error()
3954                                 tok_state = tok_state_data
3955                                 cur -= 1 # Reconsume
3956                                 return tok_cur_tag
3957                         else
3958                                 tok_cur_tag.text += "--!#{c}"
3959                                 tok_state = tok_state_comment
3960                 return null
3961
3962         # 8.2.4.52 http://www.w3.org/TR/html5/syntax.html#doctype-state
3963         tok_state_doctype = ->
3964                 switch c = txt.charAt(cur++)
3965                         when "\t", "\u000a", "\u000c", ' '
3966                                 tok_state = tok_state_before_doctype_name
3967                         when '' # EOF
3968                                 parse_error()
3969                                 tok_state = tok_state_data
3970                                 el = new_doctype_token ''
3971                                 el.flag 'force-quirks', true
3972                                 cur -= 1 # Reconsume
3973                                 return el
3974                         else
3975                                 parse_error()
3976                                 tok_state = tok_state_before_doctype_name
3977                                 cur -= 1 # Reconsume
3978                 return null
3979
3980         # 8.2.4.52 http://www.w3.org/TR/html5/syntax.html#doctype-state
3981         tok_state_before_doctype_name = ->
3982                 c = txt.charAt(cur++)
3983                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
3984                         return
3985                 if is_uc_alpha(c)
3986                         tok_cur_tag = new_doctype_token c.toLowerCase()
3987                         tok_state = tok_state_doctype_name
3988                         return
3989                 if c is "\u0000"
3990                         parse_error()
3991                         tok_cur_tag = new_doctype_token "\ufffd"
3992                         tok_state = tok_state_doctype_name
3993                         return
3994                 if c is '>'
3995                         parse_error()
3996                         el = new_doctype_token ''
3997                         el.flag 'force-quirks', true
3998                         tok_state = tok_state_data
3999                         return el
4000                 if c is '' # EOF
4001                         parse_error()
4002                         tok_state = tok_state_data
4003                         el = new_doctype_token ''
4004                         el.flag 'force-quirks', true
4005                         cur -= 1 # Reconsume
4006                         return el
4007                 # Anything else
4008                 tok_cur_tag = new_doctype_token c
4009                 tok_state = tok_state_doctype_name
4010                 return null
4011
4012         # 8.2.4.54 http://www.w3.org/TR/html5/syntax.html#doctype-name-state
4013         tok_state_doctype_name = ->
4014                 c = txt.charAt(cur++)
4015                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4016                         tok_state = tok_state_after_doctype_name
4017                         return
4018                 if c is '>'
4019                         tok_state = tok_state_data
4020                         return tok_cur_tag
4021                 if is_uc_alpha(c)
4022                         tok_cur_tag.name += c.toLowerCase()
4023                         return
4024                 if c is "\u0000"
4025                         parse_error()
4026                         tok_cur_tag.name += "\ufffd"
4027                         return
4028                 if c is '' # EOF
4029                         parse_error()
4030                         tok_state = tok_state_data
4031                         tok_cur_tag.flag 'force-quirks', true
4032                         cur -= 1 # Reconsume
4033                         return tok_cur_tag
4034                 # Anything else
4035                 tok_cur_tag.name += c
4036                 return null
4037
4038         # 8.2.4.55 http://www.w3.org/TR/html5/syntax.html#after-doctype-name-state
4039         tok_state_after_doctype_name = ->
4040                 c = txt.charAt(cur++)
4041                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4042                         return
4043                 if c is '>'
4044                         tok_state = tok_state_data
4045                         return tok_cur_tag
4046                 if c is '' # EOF
4047                         parse_error()
4048                         tok_state = tok_state_data
4049                         tok_cur_tag.flag 'force-quirks', true
4050                         cur -= 1 # Reconsume
4051                         return tok_cur_tag
4052                 # Anything else
4053                 if txt.substr(cur - 1, 6).toLowerCase() is 'public'
4054                         cur += 5
4055                         tok_state = tok_state_after_doctype_public_keyword
4056                         return
4057                 if txt.substr(cur - 1, 6).toLowerCase() is 'system'
4058                         cur += 5
4059                         tok_state = tok_state_after_doctype_system_keyword
4060                         return
4061                 parse_error()
4062                 tok_cur_tag.flag 'force-quirks', true
4063                 tok_state = tok_state_bogus_doctype
4064                 return null
4065
4066         # 8.2.4.56 http://www.w3.org/TR/html5/syntax.html#after-doctype-public-keyword-state
4067         tok_state_after_doctype_public_keyword = ->
4068                 c = txt.charAt(cur++)
4069                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4070                         tok_state = tok_state_before_doctype_public_identifier
4071                         return
4072                 if c is '"'
4073                         parse_error()
4074                         tok_cur_tag.public_identifier = ''
4075                         tok_state = tok_state_doctype_public_identifier_double_quoted
4076                         return
4077                 if c is "'"
4078                         parse_error()
4079                         tok_cur_tag.public_identifier = ''
4080                         tok_state = tok_state_doctype_public_identifier_single_quoted
4081                         return
4082                 if c is '>'
4083                         parse_error()
4084                         tok_cur_tag.flag 'force-quirks', true
4085                         tok_state = tok_state_data
4086                         return tok_cur_tag
4087                 if c is '' # EOF
4088                         parse_error()
4089                         tok_state = tok_state_data
4090                         tok_cur_tag.flag 'force-quirks', true
4091                         cur -= 1 # Reconsume
4092                         return tok_cur_tag
4093                 # Anything else
4094                 parse_error()
4095                 tok_cur_tag.flag 'force-quirks', true
4096                 tok_state = tok_state_bogus_doctype
4097                 return null
4098
4099         # 8.2.4.57 http://www.w3.org/TR/html5/syntax.html#before-doctype-public-identifier-state
4100         tok_state_before_doctype_public_identifier = ->
4101                 c = txt.charAt(cur++)
4102                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4103                         return
4104                 if c is '"'
4105                         parse_error()
4106                         tok_cur_tag.public_identifier = ''
4107                         tok_state = tok_state_doctype_public_identifier_double_quoted
4108                         return
4109                 if c is "'"
4110                         parse_error()
4111                         tok_cur_tag.public_identifier = ''
4112                         tok_state = tok_state_doctype_public_identifier_single_quoted
4113                         return
4114                 if c is '>'
4115                         parse_error()
4116                         tok_cur_tag.flag 'force-quirks', true
4117                         tok_state = tok_state_data
4118                         return tok_cur_tag
4119                 if c is '' # EOF
4120                         parse_error()
4121                         tok_state = tok_state_data
4122                         tok_cur_tag.flag 'force-quirks', true
4123                         cur -= 1 # Reconsume
4124                         return tok_cur_tag
4125                 # Anything else
4126                 parse_error()
4127                 tok_cur_tag.flag 'force-quirks', true
4128                 tok_state = tok_state_bogus_doctype
4129                 return null
4130
4131
4132         # 8.2.4.58 http://www.w3.org/TR/html5/syntax.html#doctype-public-identifier-(double-quoted)-state
4133         tok_state_doctype_public_identifier_double_quoted = ->
4134                 c = txt.charAt(cur++)
4135                 if c is '"'
4136                         tok_state = tok_state_after_doctype_public_identifier
4137                         return
4138                 if c is "\u0000"
4139                         parse_error()
4140                         tok_cur_tag.public_identifier += "\ufffd"
4141                         return
4142                 if c is '>'
4143                         parse_error()
4144                         tok_cur_tag.flag 'force-quirks', true
4145                         tok_state = tok_state_data
4146                         return tok_cur_tag
4147                 if c is '' # EOF
4148                         parse_error()
4149                         tok_state = tok_state_data
4150                         tok_cur_tag.flag 'force-quirks', true
4151                         cur -= 1 # Reconsume
4152                         return tok_cur_tag
4153                 # Anything else
4154                 tok_cur_tag.public_identifier += c
4155                 return null
4156
4157         # 8.2.4.59 http://www.w3.org/TR/html5/syntax.html#doctype-public-identifier-(single-quoted)-state
4158         tok_state_doctype_public_identifier_single_quoted = ->
4159                 c = txt.charAt(cur++)
4160                 if c is "'"
4161                         tok_state = tok_state_after_doctype_public_identifier
4162                         return
4163                 if c is "\u0000"
4164                         parse_error()
4165                         tok_cur_tag.public_identifier += "\ufffd"
4166                         return
4167                 if c is '>'
4168                         parse_error()
4169                         tok_cur_tag.flag 'force-quirks', true
4170                         tok_state = tok_state_data
4171                         return tok_cur_tag
4172                 if c is '' # EOF
4173                         parse_error()
4174                         tok_state = tok_state_data
4175                         tok_cur_tag.flag 'force-quirks', true
4176                         cur -= 1 # Reconsume
4177                         return tok_cur_tag
4178                 # Anything else
4179                 tok_cur_tag.public_identifier += c
4180                 return null
4181
4182         # 8.2.4.60 http://www.w3.org/TR/html5/syntax.html#after-doctype-public-identifier-state
4183         tok_state_after_doctype_public_identifier = ->
4184                 c = txt.charAt(cur++)
4185                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4186                         tok_state = tok_state_between_doctype_public_and_system_identifiers
4187                         return
4188                 if c is '>'
4189                         tok_state = tok_state_data
4190                         return tok_cur_tag
4191                 if c is '"'
4192                         parse_error()
4193                         tok_cur_tag.system_identifier = ''
4194                         tok_state = tok_state_doctype_system_identifier_double_quoted
4195                         return
4196                 if c is "'"
4197                         parse_error()
4198                         tok_cur_tag.system_identifier = ''
4199                         tok_state = tok_state_doctype_system_identifier_single_quoted
4200                         return
4201                 if c is '' # EOF
4202                         parse_error()
4203                         tok_state = tok_state_data
4204                         tok_cur_tag.flag 'force-quirks', true
4205                         cur -= 1 # Reconsume
4206                         return tok_cur_tag
4207                 # Anything else
4208                 parse_error()
4209                 tok_cur_tag.flag 'force-quirks', true
4210                 tok_state = tok_state_bogus_doctype
4211                 return null
4212
4213         # 8.2.4.61 http://www.w3.org/TR/html5/syntax.html#between-doctype-public-and-system-identifiers-state
4214         tok_state_between_doctype_public_and_system_identifiers = ->
4215                 c = txt.charAt(cur++)
4216                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4217                         return
4218                 if c is '>'
4219                         tok_state = tok_state_data
4220                         return tok_cur_tag
4221                 if c is '"'
4222                         parse_error()
4223                         tok_cur_tag.system_identifier = ''
4224                         tok_state = tok_state_doctype_system_identifier_double_quoted
4225                         return
4226                 if c is "'"
4227                         parse_error()
4228                         tok_cur_tag.system_identifier = ''
4229                         tok_state = tok_state_doctype_system_identifier_single_quoted
4230                         return
4231                 if c is '' # EOF
4232                         parse_error()
4233                         tok_state = tok_state_data
4234                         tok_cur_tag.flag 'force-quirks', true
4235                         cur -= 1 # Reconsume
4236                         return tok_cur_tag
4237                 # Anything else
4238                 parse_error()
4239                 tok_cur_tag.flag 'force-quirks', true
4240                 tok_state = tok_state_bogus_doctype
4241                 return null
4242
4243         # 8.2.4.62 http://www.w3.org/TR/html5/syntax.html#after-doctype-system-keyword-state
4244         tok_state_after_doctype_system_keyword = ->
4245                 c = txt.charAt(cur++)
4246                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4247                         tok_state = tok_state_before_doctype_system_identifier
4248                         return
4249                 if c is '"'
4250                         parse_error()
4251                         tok_cur_tag.system_identifier = ''
4252                         tok_state = tok_state_doctype_system_identifier_double_quoted
4253                         return
4254                 if c is "'"
4255                         parse_error()
4256                         tok_cur_tag.system_identifier = ''
4257                         tok_state = tok_state_doctype_system_identifier_single_quoted
4258                         return
4259                 if c is '>'
4260                         parse_error()
4261                         tok_cur_tag.flag 'force-quirks', true
4262                         tok_state = tok_state_data
4263                         return tok_cur_tag
4264                 if c is '' # EOF
4265                         parse_error()
4266                         tok_state = tok_state_data
4267                         tok_cur_tag.flag 'force-quirks', true
4268                         cur -= 1 # Reconsume
4269                         return tok_cur_tag
4270                 # Anything else
4271                 parse_error()
4272                 tok_cur_tag.flag 'force-quirks', true
4273                 tok_state = tok_state_bogus_doctype
4274                 return null
4275
4276         # 8.2.4.63 http://www.w3.org/TR/html5/syntax.html#before-doctype-system-identifier-state
4277         tok_state_before_doctype_system_identifier = ->
4278                 c = txt.charAt(cur++)
4279                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4280                         return
4281                 if c is '"'
4282                         tok_cur_tag.system_identifier = ''
4283                         tok_state = tok_state_doctype_system_identifier_double_quoted
4284                         return
4285                 if c is "'"
4286                         tok_cur_tag.system_identifier = ''
4287                         tok_state = tok_state_doctype_system_identifier_single_quoted
4288                         return
4289                 if c is '>'
4290                         parse_error()
4291                         tok_cur_tag.flag 'force-quirks', true
4292                         tok_state = tok_state_data
4293                         return tok_cur_tag
4294                 if c is '' # EOF
4295                         parse_error()
4296                         tok_state = tok_state_data
4297                         tok_cur_tag.flag 'force-quirks', true
4298                         cur -= 1 # Reconsume
4299                         return tok_cur_tag
4300                 # Anything else
4301                 parse_error()
4302                 tok_cur_tag.flag 'force-quirks', true
4303                 tok_state = tok_state_bogus_doctype
4304                 return null
4305
4306         # 8.2.4.64 http://www.w3.org/TR/html5/syntax.html#doctype-system-identifier-(double-quoted)-state
4307         tok_state_doctype_system_identifier_double_quoted = ->
4308                 c = txt.charAt(cur++)
4309                 if c is '"'
4310                         tok_state = tok_state_after_doctype_system_identifier
4311                         return
4312                 if c is "\u0000"
4313                         parse_error()
4314                         tok_cur_tag.system_identifier += "\ufffd"
4315                         return
4316                 if c is '>'
4317                         parse_error()
4318                         tok_cur_tag.flag 'force-quirks', true
4319                         tok_state = tok_state_data
4320                         return tok_cur_tag
4321                 if c is '' # EOF
4322                         parse_error()
4323                         tok_state = tok_state_data
4324                         tok_cur_tag.flag 'force-quirks', true
4325                         cur -= 1 # Reconsume
4326                         return tok_cur_tag
4327                 # Anything else
4328                 tok_cur_tag.system_identifier += c
4329                 return null
4330
4331         # 8.2.4.65 http://www.w3.org/TR/html5/syntax.html#doctype-system-identifier-(single-quoted)-state
4332         tok_state_doctype_system_identifier_single_quoted = ->
4333                 c = txt.charAt(cur++)
4334                 if c is "'"
4335                         tok_state = tok_state_after_doctype_system_identifier
4336                         return
4337                 if c is "\u0000"
4338                         parse_error()
4339                         tok_cur_tag.system_identifier += "\ufffd"
4340                         return
4341                 if c is '>'
4342                         parse_error()
4343                         tok_cur_tag.flag 'force-quirks', true
4344                         tok_state = tok_state_data
4345                         return tok_cur_tag
4346                 if c is '' # EOF
4347                         parse_error()
4348                         tok_state = tok_state_data
4349                         tok_cur_tag.flag 'force-quirks', true
4350                         cur -= 1 # Reconsume
4351                         return tok_cur_tag
4352                 # Anything else
4353                 tok_cur_tag.system_identifier += c
4354                 return null
4355
4356         # 8.2.4.66 http://www.w3.org/TR/html5/syntax.html#after-doctype-system-identifier-state
4357         tok_state_after_doctype_system_identifier = ->
4358                 c = txt.charAt(cur++)
4359                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4360                         return
4361                 if c is '>'
4362                         tok_state = tok_state_data
4363                         return tok_cur_tag
4364                 if c is '' # EOF
4365                         parse_error()
4366                         tok_state = tok_state_data
4367                         tok_cur_tag.flag 'force-quirks', true
4368                         cur -= 1 # Reconsume
4369                         return tok_cur_tag
4370                 # Anything else
4371                 parse_error()
4372                 # do _not_ tok_cur_tag.flag 'force-quirks', true
4373                 tok_state = tok_state_bogus_doctype
4374                 return null
4375
4376         # 8.2.4.67 http://www.w3.org/TR/html5/syntax.html#bogus-doctype-state
4377         tok_state_bogus_doctype = ->
4378                 c = txt.charAt(cur++)
4379                 if c is '>'
4380                         tok_state = tok_state_data
4381                         return tok_cur_tag
4382                 if c is '' # EOF
4383                         tok_state = tok_state_data
4384                         cur -= 1 # Reconsume
4385                         return tok_cur_tag
4386                 # Anything else
4387                 return null
4388
4389         # 8.2.4.68 http://www.w3.org/TR/html5/syntax.html#cdata-section-state
4390         tok_state_cdata_section = ->
4391                 tok_state = tok_state_data
4392                 next_gt = txt.indexOf ']]>', cur
4393                 if next_gt is -1
4394                         val = txt.substr cur
4395                         cur = txt.length
4396                 else
4397                         val = txt.substr cur, (next_gt - cur)
4398                         cur = next_gt + 3
4399                 return new_character_token val # fixfull split
4400
4401         # 8.2.4.69 http://www.w3.org/TR/html5/syntax.html#consume-a-character-reference
4402         # Don't set this as a state, just call it
4403         # returns a string (NOT a text node)
4404         parse_character_reference = (allowed_char = null, in_attr = false) ->
4405                 if cur >= txt.length
4406                         return '&'
4407                 switch c = txt.charAt(cur)
4408                         when "\t", "\n", "\u000c", ' ', '<', '&', '', allowed_char
4409                                 # explicitly not a parse error
4410                                 return '&'
4411                         when ';'
4412                                 # there has to be "one or more" alnums between & and ; to be a parse error
4413                                 return '&'
4414                         when '#'
4415                                 if cur + 1 >= txt.length
4416                                         return '&'
4417                                 if txt.charAt(cur + 1).toLowerCase() is 'x'
4418                                         base = 16
4419                                         charset = hex_chars
4420                                         start = cur + 2
4421                                 else
4422                                         charset = digits
4423                                         start = cur + 1
4424                                         base = 10
4425                                 i = 0
4426                                 while start + i < txt.length and charset.indexOf(txt.charAt(start + i)) > -1
4427                                         i += 1
4428                                 if i is 0
4429                                         return '&'
4430                                 cur = start + i
4431                                 if txt.charAt(start + i) is ';'
4432                                         cur += 1
4433                                 else
4434                                         parse_error()
4435                                 code_point = txt.substr(start, i)
4436                                 while code_point.charAt(0) is '0' and code_point.length > 1
4437                                         code_point = code_point.substr 1
4438                                 code_point = parseInt(code_point, base)
4439                                 if unicode_fixes[code_point]?
4440                                         parse_error()
4441                                         return unicode_fixes[code_point]
4442                                 else
4443                                         if (code_point >= 0xd800 and code_point <= 0xdfff) or code_point > 0x10ffff
4444                                                 parse_error()
4445                                                 return "\ufffd"
4446                                         else
4447                                                 if (code_point >= 0x0001 and code_point <= 0x0008) or (code_point >= 0x000D and code_point <= 0x001F) or (code_point >= 0x007F and code_point <= 0x009F) or (code_point >= 0xFDD0 and code_point <= 0xFDEF) or code_point is 0x000B or code_point is 0xFFFE or code_point is 0xFFFF or code_point is 0x1FFFE or code_point is 0x1FFFF or code_point is 0x2FFFE or code_point is 0x2FFFF or code_point is 0x3FFFE or code_point is 0x3FFFF or code_point is 0x4FFFE or code_point is 0x4FFFF or code_point is 0x5FFFE or code_point is 0x5FFFF or code_point is 0x6FFFE or code_point is 0x6FFFF or code_point is 0x7FFFE or code_point is 0x7FFFF or code_point is 0x8FFFE or code_point is 0x8FFFF or code_point is 0x9FFFE or code_point is 0x9FFFF or code_point is 0xAFFFE or code_point is 0xAFFFF or code_point is 0xBFFFE or code_point is 0xBFFFF or code_point is 0xCFFFE or code_point is 0xCFFFF or code_point is 0xDFFFE or code_point is 0xDFFFF or code_point is 0xEFFFE or code_point is 0xEFFFF or code_point is 0xFFFFE or code_point is 0xFFFFF or code_point is 0x10FFFE or code_point is 0x10FFFF
4448                                                         parse_error()
4449                                                 return from_code_point code_point
4450                                 return
4451                         else
4452                                 for i in [0...31]
4453                                         if alnum.indexOf(txt.charAt(cur + i)) is -1
4454                                                 break
4455                                 if i is 0
4456                                         # exit early, because parse_error() below needs at least one alnum
4457                                         return '&'
4458                                 if txt.charAt(cur + i) is ';'
4459                                         i += 1 # include ';' terminator in value
4460                                         decoded = decode_named_char_ref txt.substr(cur, i)
4461                                         if decoded?
4462                                                 cur += i
4463                                                 return decoded
4464                                         parse_error()
4465                                         return '&'
4466                                 else
4467                                         # no ';' terminator (only legacy char refs)
4468                                         max = i
4469                                         for i in [2..max] # no prefix matches, so ok to check shortest first
4470                                                 c = legacy_char_refs[txt.substr(cur, i)]
4471                                                 if c?
4472                                                         if in_attr
4473                                                                 if txt.charAt(cur + i) is '='
4474                                                                         # "because some legacy user agents will
4475                                                                         # misinterpret the markup in those cases"
4476                                                                         parse_error()
4477                                                                         return '&'
4478                                                                 if alnum.indexOf(txt.charAt(cur + i)) > -1
4479                                                                         # this makes attributes forgiving about url args
4480                                                                         return '&'
4481                                                         # ok, and besides the weird exceptions for attributes...
4482                                                         # return the matching char
4483                                                         cur += i # consume entity chars
4484                                                         parse_error() # because no terminating ";"
4485                                                         return c
4486                                         parse_error()
4487                                         return '&'
4488                 return # never reached
4489
4490         # tree constructor initialization
4491         # see comments on TYPE_TAG/etc for the structure of this data
4492         txt = args.html
4493         cur = 0
4494         doc = new Node TYPE_TAG, name: 'html', namespace: NS_HTML
4495         open_els = []
4496         afe = [] # active formatting elements
4497         template_ins_modes = []
4498         ins_mode = ins_mode_initial
4499         original_ins_mode = ins_mode # TODO check spec
4500         flag_scripting = args.scripting ? true # TODO might need an extra flag to get <noscript> to parse correctly
4501         flag_frameset_ok = true
4502         flag_parsing = true
4503         flag_foster_parenting = false
4504         form_element_pointer = null
4505         temporary_buffer = null
4506         pending_table_character_tokens = []
4507         head_element_pointer = null
4508         flag_fragment_parsing = false # parser originally created as part of the html fragment parsing algorithm (fragment case)
4509         context_element = null # FIXME initialize from args.fragment http://www.w3.org/TR/html5/syntax.html#parsing-html-fragments
4510
4511         # tokenizer initialization
4512         tok_state = tok_state_data
4513
4514         # text pre-processing
4515         # FIXME http://www.w3.org/TR/html5/syntax.html#preprocessing-the-input-stream
4516         txt = txt.replace(new RegExp("\u0000", 'g'), "\ufffd") # fixfull spec doesn't say this
4517         txt = txt.replace(new RegExp("\r\n", 'g'), "\n") # fixfull spec doesn't say this
4518         txt = txt.replace(new RegExp("\r", 'g'), "\n") # fixfull spec doesn't say this
4519
4520         if args.name is "tests18.dat #17"
4521                 console.log "hi"
4522         # proccess input
4523         # http://www.w3.org/TR/html5/syntax.html#tree-construction
4524         while flag_parsing
4525                 t = tok_state()
4526                 if t?
4527                         process_token t
4528                         # fixfull parse error if has self-closing flag, but it wasn't acknolwedged
4529         return doc.children
4530
4531 serialize_els = (els, shallow, show_ids) ->
4532         serialized = ''
4533         sep = ''
4534         for t in els
4535                 serialized += sep
4536                 sep = ','
4537                 serialized += t.serialize shallow, show_ids
4538         return serialized
4539
4540 module.exports.parse_html = parse_html
4541 module.exports.debug_log_reset = debug_log_reset
4542 module.exports.debug_log_each = debug_log_each
4543 module.exports.TYPE_TAG = TYPE_TAG
4544 module.exports.TYPE_TEXT = TYPE_TEXT
4545 module.exports.TYPE_COMMENT = TYPE_COMMENT
4546 module.exports.TYPE_DOCTYPE = TYPE_DOCTYPE
4547 module.exports.NS_HTML = NS_HTML
4548 module.exports.NS_MATHML = NS_MATHML
4549 module.exports.NS_SVG = NS_SVG