JasonWoof Got questions, comments, patches, etc.? Contact Jason Woofenden
update aaa to WHATWG version
[peach-html5-editor.git] / parse-html.coffee
1 # HTML parser meant to run in a browser, in support of WYSIWYG editor
2 # Copyright 2015 Jason Woofenden
3 #
4 # This program is free software: you can redistribute it and/or modify it under
5 # the terms of the GNU Affero General Public License as published by the Free
6 # Software Foundation, either version 3 of the License, or (at your option) any
7 # later version.
8 #
9 # This program is distributed in the hope that it will be useful, but WITHOUT
10 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11 # FOR A PARTICULAR PURPOSE.  See the GNU Affero General Public License for more
12 # details.
13 #
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
16
17
18 # This file implements a parser for html snippets, meant to be used by a
19 # WYSIWYG editor.
20
21 # The implementation is a pretty direct implementation of the parsing algorithm
22 # described here:
23 # http://www.w3.org/TR/html5/syntax.html#preprocessing-the-input-stream
24 #
25 # Deviations from that spec:
26 #
27 #   Purposeful: search this file for "WHATWG"
28 #
29 #   Not finished yet: search this file for "fixfull", "TODO" and "FIXME"
30
31
32 # stacks/lists
33 #
34 # the spec uses a many different words do indicate which ends of lists/stacks
35 # they are talking about (and relative movement within the lists/stacks). This
36 # section splains. I'm implementing "lists" (afe and open_els) the same way
37 # (both as stacks)
38 #
39 # stacks grow downward (current element is index=0)
40 #
41 # example: open_els = [a, b, c, d, e, f, g]
42 #
43 # "grows downwards" means it's visualized like this: (index: el, names)
44 #
45 #   6: g "start of the list", "topmost", "first"
46 #   5: f
47 #   4: e "previous" (to d), "above", "before"
48 #   3: d   (previous/next are relative to this element)
49 #   2: c "next", "after", "lower", "below"
50 #   1: b
51 #   0: a "end of the list", "current node", "bottommost", "last"
52
53
54 # browser
55 # note: to get this to run outside a browser, you'll have to write a native
56 # implementation of decode_named_char_ref()
57 unless module?.exports?
58         window.wheic = {}
59         module = exports: window.wheic
60
61 from_code_point = (x) ->
62         if String.fromCodePoint?
63                 return String.fromCodePoint x
64         else
65                 if x <= 0xffff
66                         return String.fromCharCode x
67                 x -= 0x10000
68                 return String.fromCharCode((x >> 10) + 0xd800, (x % 0x400) + 0xdc00)
69
70 # Each node is an obect of the Node class. Here are the Node types:
71 TYPE_TAG = 0 # name, {attributes}, [children]
72 TYPE_TEXT = 1 # "text"
73 TYPE_COMMENT = 2
74 TYPE_DOCTYPE = 3
75 # the following types are emited by the tokenizer, but shouldn't end up in the tree:
76 TYPE_START_TAG = 4 # name, [attributes ([key,value]...) in reverse order], [children]
77 TYPE_END_TAG = 5 # name
78 TYPE_EOF = 6
79 TYPE_AFE_MARKER = 7 # http://www.w3.org/TR/html5/syntax.html#reconstruct-the-active-formatting-elements
80 TYPE_AAA_BOOKMARK = 8 # http://www.w3.org/TR/html5/syntax.html#adoption-agency-algorithm
81
82 # namespace constants
83 NS_HTML = 1
84 NS_MATHML = 2
85 NS_SVG = 3
86
87 g_debug_log = []
88 debug_log_reset = ->
89         g_debug_log = []
90 debug_log = (str) ->
91         g_debug_log.push str
92 debug_log_each = (cb) ->
93         for str in g_debug_log
94                 cb str
95
96 prev_node_id = 0
97 class Node
98         constructor: (type, args = {}) ->
99                 @type = type # one of the TYPE_* constants above
100                 @name = args.name ? '' # tag name
101                 @text = args.text ? '' # contents for text/comment nodes
102                 @attrs = args.attrs ? {}
103                 @attrs_a = args.attr_k ? [] # attrs in progress, TYPE_START_TAG only
104                 @children = args.children ? []
105                 @namespace = args.namespace ? NS_HTML
106                 @parent = args.parent ? null
107                 @token = args.token ? null
108                 @flags = args.flags ? {}
109                 if args.id?
110                         @id = "#{args.id}+"
111                 else
112                         @id = "#{++prev_node_id}"
113         acknowledge_self_closing: ->
114                 if @token?
115                         @token.flag 'did_self_close', true
116                 else
117                         @flag 'did_self_close', true
118         flag: (key, value = null) ->
119                 if value?
120                         @flags[key] = value
121                 else
122                         return @flags[key]
123         serialize: (shallow = false, show_ids = false) -> # for unit tests
124                 ret = ''
125                 switch @type
126                         when TYPE_TAG
127                                 ret += 'tag:'
128                                 ret += JSON.stringify @name
129                                 ret += ','
130                                 if show_ids
131                                         ret += "##{@id},"
132                                 if shallow
133                                         break
134                                 attr_keys = []
135                                 for k of @attrs
136                                         attr_keys.push k
137                                 attr_keys.sort()
138                                 ret += '{'
139                                 sep = ''
140                                 for k in attr_keys
141                                         ret += sep
142                                         sep = ','
143                                         ret += "#{JSON.stringify k}:#{JSON.stringify @attrs[k]}"
144                                 ret += '},['
145                                 sep = ''
146                                 for c in @children
147                                         ret += sep
148                                         sep = ','
149                                         ret += c.serialize shallow, show_ids
150                                 ret += ']'
151                         when TYPE_TEXT
152                                 ret += 'text:'
153                                 ret += JSON.stringify @text
154                         when TYPE_COMMENT
155                                 ret += 'comment:'
156                                 ret += JSON.stringify @text
157                         when TYPE_DOCTYPE
158                                 ret += "doctype:#{@name},#{JSON.stringify(@public_identifier ? '')},#{JSON.stringify(@system_identifier ? '')}"
159                         when TYPE_AFE_MARKER
160                                 ret += 'marker'
161                         when TYPE_AAA_BOOKMARK
162                                 ret += 'aaa_bookmark'
163                         else
164                                 ret += 'unknown:'
165                                 console.log "unknown: #{JSON.stringify @}" # backtrace is just as well
166                 return ret
167
168 # helpers: (only take args that are normally known when parser creates nodes)
169 new_open_tag = (name) ->
170         return new Node TYPE_START_TAG, name: name
171 new_end_tag = (name) ->
172         return new Node TYPE_END_TAG, name: name
173 new_element = (name) ->
174         return new Node TYPE_TAG, name: name
175 new_text_node = (txt) ->
176         return new Node TYPE_TEXT, text: txt
177 new_character_token = new_text_node
178 new_comment_token = (txt) ->
179         return new Node TYPE_COMMENT, text: txt
180 new_doctype_token = (name) ->
181         return new Node TYPE_DOCTYPE, name: name
182 new_eof_token = ->
183         return new Node TYPE_EOF
184 new_afe_marker = ->
185         return new Node TYPE_AFE_MARKER
186 new_aaa_bookmark = ->
187         return new Node TYPE_AAA_BOOKMARK
188
189 lc_alpha = "abcdefghijklmnopqrstuvwxyz"
190 uc_alpha = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
191 digits = "0123456789"
192 alnum = lc_alpha + uc_alpha + digits
193 hex_chars = digits + "abcdefABCDEF"
194
195 is_uc_alpha = (str) ->
196         return str.length is 1 and uc_alpha.indexOf(str) > -1
197 is_lc_alpha = (str) ->
198         return str.length is 1 and lc_alpha.indexOf(str) > -1
199
200 # some SVG elements have dashes in them
201 tag_name_chars = alnum + "-"
202
203 # http://www.w3.org/TR/html5/infrastructure.html#space-character
204 space_chars = "\u0009\u000a\u000c\u000d\u0020"
205 is_space = (txt) ->
206         return txt.length is 1 and space_chars.indexOf(txt) > -1
207 is_space_tok = (t) ->
208         return t.type is TYPE_TEXT && t.text.length is 1 and space_chars.indexOf(t.text) > -1
209
210 is_input_hidden_tok = (t) ->
211         return false unless t.type is TYPE_START_TAG
212         for a in t.attrs_a
213                 if a[0] is 'type'
214                         if a[1].toLowerCase() is 'hidden'
215                                 return true
216                         return false
217         return false
218
219 # https://en.wikipedia.org/wiki/Whitespace_character#Unicode
220 whitespace_chars = "\u0009\u000a\u000b\u000c\u000d\u0020\u0085\u00a0\u1680\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200a\u2028\u2029\u202f\u205f\u3000"
221
222 unicode_fixes = {}
223 unicode_fixes[0x00] = "\uFFFD"
224 unicode_fixes[0x80] = "\u20AC"
225 unicode_fixes[0x82] = "\u201A"
226 unicode_fixes[0x83] = "\u0192"
227 unicode_fixes[0x84] = "\u201E"
228 unicode_fixes[0x85] = "\u2026"
229 unicode_fixes[0x86] = "\u2020"
230 unicode_fixes[0x87] = "\u2021"
231 unicode_fixes[0x88] = "\u02C6"
232 unicode_fixes[0x89] = "\u2030"
233 unicode_fixes[0x8A] = "\u0160"
234 unicode_fixes[0x8B] = "\u2039"
235 unicode_fixes[0x8C] = "\u0152"
236 unicode_fixes[0x8E] = "\u017D"
237 unicode_fixes[0x91] = "\u2018"
238 unicode_fixes[0x92] = "\u2019"
239 unicode_fixes[0x93] = "\u201C"
240 unicode_fixes[0x94] = "\u201D"
241 unicode_fixes[0x95] = "\u2022"
242 unicode_fixes[0x96] = "\u2013"
243 unicode_fixes[0x97] = "\u2014"
244 unicode_fixes[0x98] = "\u02DC"
245 unicode_fixes[0x99] = "\u2122"
246 unicode_fixes[0x9A] = "\u0161"
247 unicode_fixes[0x9B] = "\u203A"
248 unicode_fixes[0x9C] = "\u0153"
249 unicode_fixes[0x9E] = "\u017E"
250 unicode_fixes[0x9F] = "\u0178"
251
252 # These are the character references that don't need a terminating semicolon
253 # min length: 2, max: 6, none are a prefix of any other.
254 legacy_char_refs = {
255         Aacute: 'Á', aacute: 'á', Acirc: 'Â', acirc: 'â', acute: '´', AElig: 'Æ',
256         aelig: 'æ', Agrave: 'À', agrave: 'à', AMP: '&', amp: '&', Aring: 'Å',
257         aring: 'å', Atilde: 'Ã', atilde: 'ã', Auml: 'Ä', auml: 'ä', brvbar: '¦',
258         Ccedil: 'Ç', ccedil: 'ç', cedil: '¸', cent: '¢', COPY: '©', copy: '©',
259         curren: '¤', deg: '°', divide: '÷', Eacute: 'É', eacute: 'é', Ecirc: 'Ê',
260         ecirc: 'ê', Egrave: 'È', egrave: 'è', ETH: 'Ð', eth: 'ð', Euml: 'Ë',
261         euml: 'ë', frac12: '½', frac14: '¼', frac34: '¾', GT: '>', gt: '>',
262         Iacute: 'Í', iacute: 'í', Icirc: 'Î', icirc: 'î', iexcl: '¡', Igrave: 'Ì',
263         igrave: 'ì', iquest: '¿', Iuml: 'Ï', iuml: 'ï', laquo: '«', LT: '<',
264         lt: '<', macr: '¯', micro: 'µ', middot: '·', nbsp: "\u00a0", not: '¬',
265         Ntilde: 'Ñ', ntilde: 'ñ', Oacute: 'Ó', oacute: 'ó', Ocirc: 'Ô', ocirc: 'ô',
266         Ograve: 'Ò', ograve: 'ò', ordf: 'ª', ordm: 'º', Oslash: 'Ø', oslash: 'ø',
267         Otilde: 'Õ', otilde: 'õ', Ouml: 'Ö', ouml: 'ö', para: '¶', plusmn: '±',
268         pound: '£', QUOT: '"', quot: '"', raquo: '»', REG: '®', reg: '®', sect: '§',
269         shy: '­', sup1: '¹', sup2: '²', sup3: '³', szlig: 'ß', THORN: 'Þ', thorn: 'þ',
270         times: '×', Uacute: 'Ú', uacute: 'ú', Ucirc: 'Û', ucirc: 'û', Ugrave: 'Ù',
271         ugrave: 'ù', uml: '¨', Uuml: 'Ü', uuml: 'ü', Yacute: 'Ý', yacute: 'ý',
272         yen: '¥', yuml: 'ÿ'
273 }
274
275 void_elements = ['area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input', 'keygen', 'link', 'meta', 'param', 'source', 'track', 'wbr']
276 raw_text_elements = ['script', 'style']
277 escapable_raw_text_elements = ['textarea', 'title']
278 # http://www.w3.org/TR/SVG/ 1.1 (Second Edition)
279 svg_elements = [
280         'a', 'altGlyph', 'altGlyphDef', 'altGlyphItem', 'animate', 'animateColor',
281         'animateMotion', 'animateTransform', 'circle', 'clipPath', 'color-profile',
282         'cursor', 'defs', 'desc', 'ellipse', 'feBlend', 'feColorMatrix',
283         'feComponentTransfer', 'feComposite', 'feConvolveMatrix',
284         'feDiffuseLighting', 'feDisplacementMap', 'feDistantLight', 'feFlood',
285         'feFuncA', 'feFuncB', 'feFuncG', 'feFuncR', 'feGaussianBlur', 'feImage',
286         'feMerge', 'feMergeNode', 'feMorphology', 'feOffset', 'fePointLight',
287         'feSpecularLighting', 'feSpotLight', 'feTile', 'feTurbulence', 'filter',
288         'font', 'font-face', 'font-face-format', 'font-face-name', 'font-face-src',
289         'font-face-uri', 'foreignObject', 'g', 'glyph', 'glyphRef', 'hkern',
290         'image', 'line', 'linearGradient', 'marker', 'mask', 'metadata',
291         'missing-glyph', 'mpath', 'path', 'pattern', 'polygon', 'polyline',
292         'radialGradient', 'rect', 'script', 'set', 'stop', 'style', 'svg',
293         'switch', 'symbol', 'text', 'textPath', 'title', 'tref', 'tspan', 'use',
294         'view', 'vkern'
295 ]
296
297 # http://www.w3.org/TR/MathML/ Version 3.0 2nd Edition
298 mathml_elements = [
299         'abs', 'and', 'annotation', 'annotation-xml', 'apply', 'approx', 'arccos',
300         'arccosh', 'arccot', 'arccoth', 'arccsc', 'arccsch', 'arcsec', 'arcsech',
301         'arcsin', 'arcsinh', 'arctan', 'arctanh', 'arg', 'bind', 'bvar', 'card',
302         'cartesianproduct', 'cbytes', 'ceiling', 'cerror', 'ci', 'cn', 'codomain',
303         'complexes', 'compose', 'condition', 'conjugate', 'cos', 'cosh', 'cot',
304         'coth', 'cs', 'csc', 'csch', 'csymbol', 'curl', 'declare', 'degree',
305         'determinant', 'diff', 'divergence', 'divide', 'domain',
306         'domainofapplication', 'emptyset', 'eq', 'equivalent', 'eulergamma',
307         'exists', 'exp', 'exponentiale', 'factorial', 'factorof', 'false', 'floor',
308         'fn', 'forall', 'gcd', 'geq', 'grad', 'gt', 'ident', 'image', 'imaginary',
309         'imaginaryi', 'implies', 'in', 'infinity', 'int', 'integers', 'intersect',
310         'interval', 'inverse', 'lambda', 'laplacian', 'lcm', 'leq', 'limit',
311         'list', 'ln', 'log', 'logbase', 'lowlimit', 'lt', 'maction', 'maligngroup',
312         'malignmark', 'math', 'matrix', 'matrixrow', 'max', 'mean', 'median',
313         'menclose', 'merror', 'mfenced', 'mfrac', 'mglyph', 'mi', 'mi', 'min',
314         'minus', 'mlabeledtr', 'mlongdiv', 'mmultiscripts', 'mn', 'mo', 'mode',
315         'moment', 'momentabout', 'mover', 'mpadded', 'mphantom', 'mprescripts',
316         'mroot', 'mrow', 'ms', 'mscarries', 'mscarry', 'msgroup', 'msline',
317         'mspace', 'msqrt', 'msrow', 'mstack', 'mstyle', 'msub', 'msubsup', 'msup',
318         'mtable', 'mtd', 'mtext', 'mtr', 'munder', 'munderover', 'naturalnumbers',
319         'neq', 'none', 'not', 'notanumber', 'notin', 'notprsubset', 'notsubset',
320         'or', 'otherwise', 'outerproduct', 'partialdiff', 'pi', 'piece',
321         'piecewise', 'plus', 'power', 'primes', 'product', 'prsubset', 'quotient',
322         'rationals', 'real', 'reals', 'reln', 'rem', 'root', 'scalarproduct',
323         'sdev', 'sec', 'sech', 'selector', 'semantics', 'sep', 'set', 'setdiff',
324         'share', 'sin', 'sinh', 'span', 'subset', 'sum', 'tan', 'tanh', 'tendsto',
325         'times', 'transpose', 'true', 'union', 'uplimit', 'variance', 'vector',
326         'vectorproduct', 'xor'
327 ]
328 # foreign_elements = [svg_elements..., mathml_elements...]
329 #normal_elements = All other allowed HTML elements are normal elements.
330
331 special_elements = {
332         # HTML:
333         address:NS_HTML, applet:NS_HTML, area:NS_HTML, article:NS_HTML,
334         aside:NS_HTML, base:NS_HTML, basefont:NS_HTML, bgsound:NS_HTML,
335         blockquote:NS_HTML, body:NS_HTML, br:NS_HTML, button:NS_HTML,
336         caption:NS_HTML, center:NS_HTML, col:NS_HTML, colgroup:NS_HTML, dd:NS_HTML,
337         details:NS_HTML, dir:NS_HTML, div:NS_HTML, dl:NS_HTML, dt:NS_HTML,
338         embed:NS_HTML, fieldset:NS_HTML, figcaption:NS_HTML, figure:NS_HTML,
339         footer:NS_HTML, form:NS_HTML, frame:NS_HTML, frameset:NS_HTML, h1:NS_HTML,
340         h2:NS_HTML, h3:NS_HTML, h4:NS_HTML, h5:NS_HTML, h6:NS_HTML, head:NS_HTML,
341         header:NS_HTML, hgroup:NS_HTML, hr:NS_HTML, html:NS_HTML, iframe:NS_HTML,
342         img:NS_HTML, input:NS_HTML, isindex:NS_HTML, li:NS_HTML, link:NS_HTML,
343         listing:NS_HTML, main:NS_HTML, marquee:NS_HTML,
344
345         menu:NS_HTML,menuitem:NS_HTML, # WHATWG adds these
346
347         meta:NS_HTML, nav:NS_HTML, noembed:NS_HTML, noframes:NS_HTML,
348         noscript:NS_HTML, object:NS_HTML, ol:NS_HTML, p:NS_HTML, param:NS_HTML,
349         plaintext:NS_HTML, pre:NS_HTML, script:NS_HTML, section:NS_HTML,
350         select:NS_HTML, source:NS_HTML, style:NS_HTML, summary:NS_HTML,
351         table:NS_HTML, tbody:NS_HTML, td:NS_HTML, template:NS_HTML,
352         textarea:NS_HTML, tfoot:NS_HTML, th:NS_HTML, thead:NS_HTML, title:NS_HTML,
353         tr:NS_HTML, track:NS_HTML, ul:NS_HTML, wbr:NS_HTML, xmp:NS_HTML,
354
355         # MathML:
356         mi:NS_MATHML, mo:NS_MATHML, mn:NS_MATHML, ms:NS_MATHML, mtext:NS_MATHML,
357         'annotation-xml':NS_MATHML,
358
359         # SVG:
360         foreignObject:NS_SVG, desc:NS_SVG, title:NS_SVG
361 }
362
363 formatting_elements = {
364          a: true, b: true, big: true, code: true, em: true, font: true, i: true,
365          nobr: true, s: true, small: true, strike: true, strong: true, tt: true,
366          u: true
367 }
368
369 mathml_text_integration = {
370         mi: NS_MATHML, mo: NS_MATHML, mn: NS_MATHML, ms: NS_MATHML, mtext: NS_MATHML
371 }
372 is_mathml_text_integration_point = (el) ->
373         return mathml_text_integration[el.name] is el.namespace
374 is_html_integration = (el) -> # DON'T PASS A TOKEN
375         if el.namespace is NS_MATHML
376                 if el.name is 'annotation-xml'
377                         if el.attrs.encoding?
378                                 if el.attrs.encoding.toLowerCase() is 'text/html'
379                                         return true
380                                 if el.attrs.encoding.toLowerCase() is 'application/xhtml+xml'
381                                         return true
382                 return false
383         if el.namespace is NS_SVG
384                 if el.name is 'foreignObject' or el.name is 'desc' or el.name is 'title'
385                         return true
386         return false
387
388 h_tags = {
389         h1:NS_HTML, h2:NS_HTML, h3:NS_HTML, h4:NS_HTML, h5:NS_HTML, h6:NS_HTML
390 }
391
392 foster_parenting_targets = {
393         table: NS_HTML
394         tbody: NS_HTML
395         tfoot: NS_HTML
396         thead: NS_HTML
397         tr: NS_HTML
398 }
399
400 end_tag_implied = {
401         dd: NS_HTML
402         dt: NS_HTML
403         li: NS_HTML
404         option: NS_HTML
405         optgroup: NS_HTML
406         p: NS_HTML
407         rb: NS_HTML
408         rp: NS_HTML
409         rt: NS_HTML
410         rtc: NS_HTML
411 }
412
413 el_is_special = (e) ->
414         return special_elements[e.name] is e.namespace
415
416 adp_els = { address: NS_HTML, div: NS_HTML, p: NS_HTML }
417 el_is_special_not_adp = (el) ->
418         return special_elements[el.name] is el.namespace and adp_els[el.name] isnt el.namespace
419
420 svg_name_fixes = {
421         altglyph: 'altGlyph'
422         altglyphdef: 'altGlyphDef'
423         altglyphitem: 'altGlyphItem'
424         animatecolor: 'animateColor'
425         animatemotion: 'animateMotion'
426         animatetransform: 'animateTransform'
427         clippath: 'clipPath'
428         feblend: 'feBlend'
429         fecolormatrix: 'feColorMatrix'
430         fecomponenttransfer: 'feComponentTransfer'
431         fecomposite: 'feComposite'
432         feconvolvematrix: 'feConvolveMatrix'
433         fediffuselighting: 'feDiffuseLighting'
434         fedisplacementmap: 'feDisplacementMap'
435         fedistantlight: 'feDistantLight'
436         fedropshadow: 'feDropShadow'
437         feflood: 'feFlood'
438         fefunca: 'feFuncA'
439         fefuncb: 'feFuncB'
440         fefuncg: 'feFuncG'
441         fefuncr: 'feFuncR'
442         fegaussianblur: 'feGaussianBlur'
443         feimage: 'feImage'
444         femerge: 'feMerge'
445         femergenode: 'feMergeNode'
446         femorphology: 'feMorphology'
447         feoffset: 'feOffset'
448         fepointlight: 'fePointLight'
449         fespecularlighting: 'feSpecularLighting'
450         fespotlight: 'feSpotLight'
451         fetile: 'feTile'
452         feturbulence: 'feTurbulence'
453         foreignobject: 'foreignObject'
454         glyphref: 'glyphRef'
455         lineargradient: 'linearGradient'
456         radialgradient: 'radialGradient'
457         textpath: 'textPath'
458 }
459 svg_attribute_fixes = {
460         attributename: 'attributeName'
461         attributetype: 'attributeType'
462         basefrequency: 'baseFrequency'
463         baseprofile: 'baseProfile'
464         calcmode: 'calcMode'
465         clippathunits: 'clipPathUnits'
466         contentscripttype: 'contentScriptType'
467         contentstyletype: 'contentStyleType'
468         diffuseconstant: 'diffuseConstant'
469         edgemode: 'edgeMode'
470         externalresourcesrequired: 'externalResourcesRequired'
471         # WHATWG removes this: filterres: 'filterRes'
472         filterunits: 'filterUnits'
473         glyphref: 'glyphRef'
474         gradienttransform: 'gradientTransform'
475         gradientunits: 'gradientUnits'
476         kernelmatrix: 'kernelMatrix'
477         kernelunitlength: 'kernelUnitLength'
478         keypoints: 'keyPoints'
479         keysplines: 'keySplines'
480         keytimes: 'keyTimes'
481         lengthadjust: 'lengthAdjust'
482         limitingconeangle: 'limitingConeAngle'
483         markerheight: 'markerHeight'
484         markerunits: 'markerUnits'
485         markerwidth: 'markerWidth'
486         maskcontentunits: 'maskContentUnits'
487         maskunits: 'maskUnits'
488         numoctaves: 'numOctaves'
489         pathlength: 'pathLength'
490         patterncontentunits: 'patternContentUnits'
491         patterntransform: 'patternTransform'
492         patternunits: 'patternUnits'
493         pointsatx: 'pointsAtX'
494         pointsaty: 'pointsAtY'
495         pointsatz: 'pointsAtZ'
496         preservealpha: 'preserveAlpha'
497         preserveaspectratio: 'preserveAspectRatio'
498         primitiveunits: 'primitiveUnits'
499         refx: 'refX'
500         refy: 'refY'
501         repeatcount: 'repeatCount'
502         repeatdur: 'repeatDur'
503         requiredextensions: 'requiredExtensions'
504         requiredfeatures: 'requiredFeatures'
505         specularconstant: 'specularConstant'
506         specularexponent: 'specularExponent'
507         spreadmethod: 'spreadMethod'
508         startoffset: 'startOffset'
509         stddeviation: 'stdDeviation'
510         stitchtiles: 'stitchTiles'
511         surfacescale: 'surfaceScale'
512         systemlanguage: 'systemLanguage'
513         tablevalues: 'tableValues'
514         targetx: 'targetX'
515         targety: 'targetY'
516         textlength: 'textLength'
517         viewbox: 'viewBox'
518         viewtarget: 'viewTarget'
519         xchannelselector: 'xChannelSelector'
520         ychannelselector: 'yChannelSelector'
521         zoomandpan: 'zoomAndPan'
522 }
523 foreign_attr_fixes = {
524         'xlink:actuate': 'xlink actuate'
525         'xlink:arcrole': 'xlink arcrole'
526         'xlink:href': 'xlink href'
527         'xlink:role': 'xlink role'
528         'xlink:show': 'xlink show'
529         'xlink:title': 'xlink title'
530         'xlink:type': 'xlink type'
531         'xml:base': 'xml base'
532         'xml:lang': 'xml lang'
533         'xml:space': 'xml space'
534         'xmlns': 'xmlns'
535         'xmlns:xlink': 'xmlns xlink'
536 }
537 adjust_mathml_attributes = (t) ->
538         for a in t.attrs_a
539                 if a[0] is 'definitionurl'
540                         a[0] = 'definitionURL'
541         return
542 adjust_svg_attributes = (t) ->
543         for a in t.attrs_a
544                 if svg_attribute_fixes[a[0]]?
545                         a[0] = svg_attribute_fixes[a[0]]
546         return
547 adjust_foreign_attributes = (t) ->
548         # fixfull
549         for a in t.attrs_a
550                 if foreign_attr_fixes[a[0]]?
551                         a[0] = foreign_attr_fixes[a[0]]
552         return
553
554 # decode_named_char_ref()
555 #
556 # The list of named character references is _huge_ so ask the browser to decode
557 # for us instead of wasting bandwidth/space on including the table here.
558 #
559 # Pass without the "&" but with the ";" examples:
560 #    for "&amp" pass "amp;"
561 #    for "&#x2032" pass "x2032;"
562 g_dncr = {
563         cache: {}
564         textarea: document.createElement('textarea')
565 }
566 # TODO test this in IE8
567 decode_named_char_ref = (txt) ->
568         txt = "&#{txt}"
569         decoded = g_dncr.cache[txt]
570         return decoded if decoded?
571         g_dncr.textarea.innerHTML = txt
572         decoded = g_dncr.textarea.value
573         return null if decoded is txt
574         return g_dncr.cache[txt] = decoded
575
576 parse_html = (args) ->
577         txt = null
578         cur = null # index of next char in txt to be parsed
579         # declare doc and tokenizer variables so they're in scope below
580         doc = null
581         open_els = null # stack of open elements
582         afe = null # active formatting elements
583         template_ins_modes = null
584         ins_mode = null
585         original_ins_mode = null
586         tok_state = null
587         tok_cur_tag = null # partially parsed tag
588         flag_scripting = null
589         flag_frameset_ok = null
590         flag_parsing = null
591         flag_foster_parenting = null
592         form_element_pointer = null
593         temporary_buffer = null
594         pending_table_character_tokens = null
595         head_element_pointer = null
596         flag_fragment_parsing = null
597         context_element = null
598
599         stop_parsing = ->
600                 flag_parsing = false
601
602         parse_error = ->
603                 if args.error_cb?
604                         args.error_cb cur
605                 else
606                         console.log "Parse error at character #{cur} of #{txt.length}"
607
608         afe_push = (new_el) ->
609                 matches = 0
610                 for el, i in afe
611                         if el.name is new_el.name and el.namespace is new_el.namespace
612                                 for k, v of el.attrs
613                                         continue unless new_el.attrs[k] is v
614                                 for k, v of new_el.attrs
615                                         continue unless el.attrs[k] is v
616                                 matches += 1
617                                 if matches is 3
618                                         afe.splice i, 1
619                                         break
620                 afe.unshift new_el
621         afe_push_marker = ->
622                 afe.unshift new_afe_marker()
623
624         # the functions below impliment the Tree Contstruction algorithm
625         # http://www.w3.org/TR/html5/syntax.html#tree-construction
626
627         # But first... the helpers
628         template_tag_is_open = ->
629                 for t in open_els
630                         if t.name is 'template' and t.namespace is NS_HTML
631                                 return true
632                 return false
633         is_in_scope_x = (tag_name, scope, namespace) ->
634                 for t in open_els
635                         if t.name is tag_name and (namespace is null or namespace is t.namespace)
636                                 return true
637                         if scope[t.name] is t.namespace
638                                 return false
639                 return false
640         is_in_scope_x_y = (tag_name, scope, scope2, namespace) ->
641                 for t in open_els
642                         if t.name is tag_name and (namespace is null or namespace is t.namespace)
643                                 return true
644                         if scope[t.name] is t.namespace
645                                 return false
646                         if scope2[t.name] is t.namespace
647                                 return false
648                 return false
649         standard_scopers = {
650                 applet: NS_HTML, caption: NS_HTML, html: NS_HTML, table: NS_HTML,
651                 td: NS_HTML, th: NS_HTML, marquee: NS_HTML, object: NS_HTML,
652                 template: NS_HTML,
653
654                 mi: NS_MATHML, mo: NS_MATHML, mn: NS_MATHML, ms: NS_MATHML,
655                 mtext: NS_MATHML, 'annotation-xml': NS_MATHML,
656
657                 foreignObject: NS_SVG, desc: NS_SVG, title: NS_SVG
658         }
659         button_scopers = button: NS_HTML
660         li_scopers = ol: NS_HTML, ul: NS_HTML
661         table_scopers = html: NS_HTML, table: NS_HTML, template: NS_HTML
662         is_in_scope = (tag_name, namespace = null) ->
663                 return is_in_scope_x tag_name, standard_scopers, namespace
664         is_in_button_scope = (tag_name, namespace = null) ->
665                 return is_in_scope_x_y tag_name, standard_scopers, button_scopers, namespace
666         is_in_table_scope = (tag_name, namespace = null) ->
667                 return is_in_scope_x tag_name, table_scopers, namespace
668         # aka is_in_list_item_scope
669         is_in_li_scope = (tag_name, namespace = null) ->
670                 return is_in_scope_x_y tag_name, standard_scopers, li_scopers, namespace
671         is_in_select_scope = (tag_name, namespace = null) ->
672                 for t in open_els
673                         if t.name is tag_name and (namespace is null or namespace is t.namespace)
674                                 return true
675                         if t.namespace isnt NS_HTML and t.name isnt 'optgroup' and t.name isnt 'option'
676                                 return false
677                 return false
678         # this checks for a particular element, not by name
679         # this requires a namespace match
680         el_is_in_scope = (needle) ->
681                 for el in open_els
682                         if el is needle
683                                 return true
684                         if standard_scopers[el.name] is el.namespace
685                                 return false
686                 return false
687
688         clear_to_table_stopers = {
689                 'table': true
690                 'template': true
691                 'html': true
692         }
693         clear_stack_to_table_context = ->
694                 loop
695                         if clear_to_table_stopers[open_els[0].name]?
696                                 break
697                         open_els.shift()
698                 return
699         clear_to_table_body_stopers = {
700                 tbody: NS_HTML
701                 tfoot: NS_HTML
702                 thead: NS_HTML
703                 template: NS_HTML
704                 html: NS_HTML
705         }
706         clear_stack_to_table_body_context = ->
707                 loop
708                         if clear_to_table_body_stopers[open_els[0].name] is open_els[0].namespace
709                                 break
710                         open_els.shift()
711                 return
712         clear_to_table_row_stopers = {
713                 'tr': true
714                 'template': true
715                 'html': true
716         }
717         clear_stack_to_table_row_context = ->
718                 loop
719                         if clear_to_table_row_stopers[open_els[0].name]?
720                                 break
721                         open_els.shift()
722                 return
723         clear_afe_to_marker = ->
724                 loop
725                         return unless afe.length > 0 # this happens in fragment case, ?spec error
726                         el = afe.shift()
727                         if el.type is TYPE_AFE_MARKER
728                                 return
729                 return
730
731         # 8.2.3.1 ...
732         # http://www.w3.org/TR/html5/syntax.html#reset-the-insertion-mode-appropriately
733         reset_ins_mode = ->
734                 # 1. Let last be false.
735                 last = false
736                 # 2. Let node be the last node in the stack of open elements.
737                 node_i = 0
738                 node = open_els[node_i]
739                 # 3. Loop: If node is the first node in the stack of open elements,
740                 # then set last to true, and, if the parser was originally created as
741                 # part of the HTML fragment parsing algorithm (fragment case) set node
742                 # to the context element.
743                 loop
744                         if node_i is open_els.length - 1
745                                 last = true
746                                 # fixfull (fragment case)
747
748                         # 4. If node is a select element, run these substeps:
749                         if node.name is 'select' and node.namespace is NS_HTML
750                                 # 1. If last is true, jump to the step below labeled done.
751                                 unless last
752                                         # 2. Let ancestor be node.
753                                         ancestor_i = node_i
754                                         ancestor = node
755                                         # 3. Loop: If ancestor is the first node in the stack of
756                                         # open elements, jump to the step below labeled done.
757                                         loop
758                                                 if ancestor_i is open_els.length - 1
759                                                         break
760                                                 # 4. Let ancestor be the node before ancestor in the stack
761                                                 # of open elements.
762                                                 ancestor_i += 1
763                                                 ancestor = open_els[ancestor_i]
764                                                 # 5. If ancestor is a template node, jump to the step below
765                                                 # labeled done.
766                                                 if ancestor.name is 'template' and ancestor.namespace is NS_HTML
767                                                         break
768                                                 # 6. If ancestor is a table node, switch the insertion mode
769                                                 # to "in select in table" and abort these steps.
770                                                 if ancestor.name is 'table' and ancestor.namespace is NS_HTML
771                                                         ins_mode = ins_mode_in_select_in_table
772                                                         return
773                                                 # 7. Jump back to the step labeled loop.
774                                 # 8. Done: Switch the insertion mode to "in select" and abort
775                                 # these steps.
776                                 ins_mode = ins_mode_in_select
777                                 return
778                         # 5. If node is a td or th element and last is false, then switch
779                         # the insertion mode to "in cell" and abort these steps.
780                         if (node.name is 'td' or node.name is 'th') and node.namespace is NS_HTML and last is false
781                                 ins_mode = ins_mode_in_cell
782                                 return
783                         # 6. If node is a tr element, then switch the insertion mode to "in
784                         # row" and abort these steps.
785                         if node.name is 'tr' and node.namespace is NS_HTML
786                                 ins_mode = ins_mode_in_row
787                                 return
788                         # 7. If node is a tbody, thead, or tfoot element, then switch the
789                         # insertion mode to "in table body" and abort these steps.
790                         if (node.name is 'tbody' or node.name is 'thead' or node.name is 'tfoot') and node.namespace is NS_HTML
791                                 ins_mode = ins_mode_in_table_body
792                                 return
793                         # 8. If node is a caption element, then switch the insertion mode
794                         # to "in caption" and abort these steps.
795                         if node.name is 'caption' and node.namespace is NS_HTML
796                                 ins_mode = ins_mode_in_caption
797                                 return
798                         # 9. If node is a colgroup element, then switch the insertion mode
799                         # to "in column group" and abort these steps.
800                         if node.name is 'colgroup' and node.namespace is NS_HTML
801                                 ins_mode = ins_mode_in_column_group
802                                 return
803                         # 10. If node is a table element, then switch the insertion mode to
804                         # "in table" and abort these steps.
805                         if node.name is 'table' and node.namespace is NS_HTML
806                                 ins_mode = ins_mode_in_table
807                                 return
808                         # 11. If node is a template element, then switch the insertion mode
809                         # to the current template insertion mode and abort these steps.
810                         if node.name is 'template' and node.namespace is NS_HTML
811                                 ins_mode = template_ins_modes[0]
812                                 return
813                         # 12. If node is a head element and last is true, then switch the
814                         # insertion mode to "in body" ("in body"! not "in head"!) and abort
815                         # these steps. (fragment case)
816                         if node.name is 'head' and node.namespace is NS_HTML and last
817                                 ins_mode = ins_mode_in_body
818                                 return
819                         # 13. If node is a head element and last is false, then switch the
820                         # insertion mode to "in head" and abort these steps.
821                         if node.name is 'head' and node.namespace is NS_HTML and last is false
822                                 ins_mode = ins_mode_in_head
823                                 return
824                         # 14. If node is a body element, then switch the insertion mode to
825                         # "in body" and abort these steps.
826                         if node.name is 'body' and node.namespace is NS_HTML
827                                 ins_mode = ins_mode_in_body
828                                 return
829                         # 15. If node is a frameset element, then switch the insertion mode
830                         # to "in frameset" and abort these steps. (fragment case)
831                         if node.name is 'frameset' and node.namespace is NS_HTML
832                                 ins_mode = ins_mode_in_frameset
833                                 return
834                         # 16. If node is an html element, run these substeps:
835                         if node.name is 'html' and node.namespace is NS_HTML
836                                 # 1. If the head element pointer is null, switch the insertion
837                                 # mode to "before head" and abort these steps. (fragment case)
838                                 if head_element_pointer is null
839                                         ins_mode = ins_mode_before_head
840                                 else
841                                         # 2. Otherwise, the head element pointer is not null,
842                                         # switch the insertion mode to "after head" and abort these
843                                         # steps.
844                                         ins_mode = ins_mode_after_head
845                                 return
846                         # 17. If last is true, then switch the insertion mode to "in body"
847                         # and abort these steps. (fragment case)
848                         if last
849                                 ins_mode = ins_mode_in_body
850                                 return
851                         # 18. Let node now be the node before node in the stack of open
852                         # elements.
853                         node_i += 1
854                         node = open_els[node_i]
855                         # 19. Return to the step labeled loop.
856
857         # 8.2.3.2
858
859         # http://www.w3.org/TR/html5/syntax.html#adjusted-current-node
860         adjusted_current_node = ->
861                 if open_els.length is 1 and flag_fragment_parsing
862                         return context_element
863                 return open_els[0]
864
865         # http://www.w3.org/TR/html5/syntax.html#reconstruct-the-active-formatting-elements
866         # this implementation is structured (mostly) as described at the link above.
867         # capitalized comments are the "labels" described at the link above.
868         reconstruct_afe = ->
869                 return if afe.length is 0
870                 if afe[0].type is TYPE_AFE_MARKER or afe[0] in open_els
871                         return
872                 # Rewind
873                 i = 0
874                 loop
875                         if i is afe.length - 1
876                                 break
877                         i += 1
878                         if afe[i].type is TYPE_AFE_MARKER or afe[i] in open_els
879                                 i -= 1 # Advance
880                                 break
881                 # Create
882                 loop
883                         el = insert_html_element afe[i].token
884                         afe[i] = el
885                         break if i is 0
886                         i -= 1 # Advance
887
888         # http://www.w3.org/TR/html5/syntax.html#adoption-agency-algorithm
889         # adoption agency algorithm
890         # overview here:
891         #   http://www.w3.org/TR/html5/syntax.html#misnested-tags:-b-i-/b-/i
892         #   http://www.w3.org/TR/html5/syntax.html#misnested-tags:-b-p-/b-/p
893         #   http://www.w3.org/TR/html5/syntax.html#unclosed-formatting-elements
894         adoption_agency = (subject) ->
895                 debug_log "adoption_agency()"
896                 debug_log "tree: #{serialize_els doc.children, false, true}"
897                 debug_log "open_els: #{serialize_els open_els, true, true}"
898                 debug_log "afe: #{serialize_els afe, true, true}"
899 # this block implements tha W3C spec
900 #               # 1. If the current node is an HTML element whose tag name is subject,
901 #               # then run these substeps:
902 #               #
903 #               # 1. Let element be the current node.
904 #               #
905 #               # 2. Pop element off the stack of open elements.
906 #               #
907 #               # 3. If element is also in the list of active formatting elements,
908 #               # remove the element from the list.
909 #               #
910 #               # 4. Abort the adoption agency algorithm.
911 #               if open_els[0].name is subject and open_els[0].namespace is NS_HTML
912 #                       el = open_els.shift()
913 #                       # remove it from the list of active formatting elements (if found)
914 #                       for t, i in afe
915 #                               if t is el
916 #                                       afe.splice i, 1
917 #                                       break
918 #                       debug_log "aaa: starting off with subject on top of stack, exiting"
919 #                       return
920 # WHATWG: https://html.spec.whatwg.org/multipage/syntax.html#adoption-agency-algorithm
921                 # If the current node is an HTML element whose tag name is subject, and
922                 # the current node is not in the list of active formatting elements,
923                 # then pop the current node off the stack of open elements, and abort
924                 # these steps.
925                 if open_els[0].name is subject and open_els[0].namespace is NS_HTML
926                         debug_log "aaa: starting off with subject on top of stack, exiting"
927                         # remove it from the list of active formatting elements (if found)
928                         in_afe = false
929                         for el, i in afe
930                                 if el is open_els[0]
931                                         in_afe = true
932                                         break
933                         unless in_afe
934                                 debug_log "aaa: ...and not in afe, aaa done"
935                                 open_els.shift()
936                                 return
937                         # fall through
938 # END WHATWG
939                 outer = 0
940                 loop
941                         if outer >= 8
942                                 return
943                         outer += 1
944                         # 5. Let formatting element be the last element in the list of
945                         # active formatting elements that: is between the end of the list
946                         # and the last scope marker in the list, if any, or the start of
947                         # the list otherwise, and  has the tag name subject.
948                         fe = null
949                         for t, fe_of_afe in afe
950                                 if t.type is TYPE_AFE_MARKER
951                                         break
952                                 if t.name is subject
953                                         fe = t
954                                         break
955                         # If there is no such element, then abort these steps and instead
956                         # act as described in the "any other end tag" entry above.
957                         if fe is null
958                                 debug_log "aaa: fe not found in afe"
959                                 in_body_any_other_end_tag subject
960                                 return
961                         # 6. If formatting element is not in the stack of open elements,
962                         # then this is a parse error; remove the element from the list, and
963                         # abort these steps.
964                         in_open_els = false
965                         for t, fe_of_open_els in open_els
966                                 if t is fe
967                                         in_open_els = true
968                                         break
969                         unless in_open_els
970                                 debug_log "aaa: fe not found in open_els"
971                                 parse_error()
972                                 # "remove it from the list" must mean afe, since it's not in open_els
973                                 afe.splice fe_of_afe, 1
974                                 return
975                         # 7. If formatting element is in the stack of open elements, but
976                         # the element is not in scope, then this is a parse error; abort
977                         # these steps.
978                         unless el_is_in_scope fe
979                                 debug_log "aaa: fe not in scope"
980                                 parse_error()
981                                 return
982                         # 8. If formatting element is not the current node, this is a parse
983                         # error. (But do not abort these steps.)
984                         unless open_els[0] is fe
985                                 parse_error()
986                                 # continue
987                         # 9. Let furthest block be the topmost node in the stack of open
988                         # elements that is lower in the stack than formatting element, and
989                         # is an element in the special category. There might not be one.
990                         fb = null
991                         fb_of_open_els = null
992                         for t, i in open_els
993                                 if t is fe
994                                         break
995                                 if el_is_special t
996                                         fb = t
997                                         fb_of_open_els = i
998                                         # and continue, to see if there's one that's more "topmost"
999                         # 10. If there is no furthest block, then the UA must first pop all
1000                         # the nodes from the bottom of the stack of open elements, from the
1001                         # current node up to and including formatting element, then remove
1002                         # formatting element from the list of active formatting elements,
1003                         # and finally abort these steps.
1004                         if fb is null
1005                                 debug_log "aaa: no fb"
1006                                 loop
1007                                         t = open_els.shift()
1008                                         if t is fe
1009                                                 afe.splice fe_of_afe, 1
1010                                                 return
1011                         # 11. Let common ancestor be the element immediately above
1012                         # formatting element in the stack of open elements.
1013                         ca = open_els[fe_of_open_els + 1] # common ancestor
1014
1015                         node_above = open_els[fb_of_open_els + 1] # next node if node isn't in open_els anymore
1016                         # 12. Let a bookmark note the position of formatting element in the list of active formatting elements relative to the elements on either side of it in the list.
1017                         bookmark = new_aaa_bookmark()
1018                         for t, i in afe
1019                                 if t is fe
1020                                         afe.splice i, 0, bookmark
1021                                         break
1022                         node = last_node = fb
1023                         inner = 0
1024                         loop
1025                                 inner += 1
1026                                 # 3. Let node be the element immediately above node in the
1027                                 # stack of open elements, or if node is no longer in the stack
1028                                 # of open elements (e.g. because it got removed by this
1029                                 # algorithm), the element that was immediately above node in
1030                                 # the stack of open elements before node was removed.
1031                                 node_next = null
1032                                 for t, i in open_els
1033                                         if t is node
1034                                                 node_next = open_els[i + 1]
1035                                                 break
1036                                 node = node_next ? node_above
1037                                 debug_log "inner loop #{inner}"
1038                                 debug_log "tree: #{serialize_els doc.children, false, true}"
1039                                 debug_log "open_els: #{serialize_els open_els, true, true}"
1040                                 debug_log "afe: #{serialize_els afe, true, true}"
1041                                 debug_log "ca: #{ca.name}##{ca.id} children: #{serialize_els ca.children, true, true}"
1042                                 debug_log "fe: #{fe.name}##{fe.id} children: #{serialize_els fe.children, true, true}"
1043                                 debug_log "fb: #{fb.name}##{fb.id} children: #{serialize_els fb.children, true, true}"
1044                                 debug_log "node: #{node.serialize true, true}"
1045                                 # TODO make sure node_above gets re-set if/when node is removed from open_els
1046
1047                                 # 4. If node is formatting element, then go to the next step in
1048                                 # the overall algorithm.
1049                                 if node is fe
1050                                         break
1051                                 debug_log "the meat"
1052                                 # 5. If inner loop counter is greater than three and node is in
1053                                 # the list of active formatting elements, then remove node from
1054                                 # the list of active formatting elements.
1055                                 node_in_afe = false
1056                                 for t, i in afe
1057                                         if t is node
1058                                                 if inner > 3
1059                                                         afe.splice i, 1
1060                                                         debug_log "max out inner"
1061                                                 else
1062                                                         node_in_afe = true
1063                                                         debug_log "in afe"
1064                                                 break
1065                                 # 6. If node is not in the list of active formatting elements,
1066                                 # then remove node from the stack of open elements and then go
1067                                 # back to the step labeled inner loop.
1068                                 unless node_in_afe
1069                                         debug_log "not in afe"
1070                                         for t, i in open_els
1071                                                 if t is node
1072                                                         node_above = open_els[i + 1]
1073                                                         open_els.splice i, 1
1074                                                         break
1075                                         continue
1076                                 debug_log "the bones"
1077                                 # 7. create an element for the token for which the element node
1078                                 # was created, in the HTML namespace, with common ancestor as
1079                                 # the intended parent; replace the entry for node in the list
1080                                 # of active formatting elements with an entry for the new
1081                                 # element, replace the entry for node in the stack of open
1082                                 # elements with an entry for the new element, and let node be
1083                                 # the new element.
1084                                 new_node = token_to_element node.token, NS_HTML, ca
1085                                 for t, i in afe
1086                                         if t is node
1087                                                 afe[i] = new_node
1088                                                 debug_log "replaced in afe"
1089                                                 break
1090                                 for t, i in open_els
1091                                         if t is node
1092                                                 node_above = open_els[i + 1]
1093                                                 open_els[i] = new_node
1094                                                 debug_log "replaced in open_els"
1095                                                 break
1096                                 node = new_node
1097                                 # 8. If last node is furthest block, then move the
1098                                 # aforementioned bookmark to be immediately after the new node
1099                                 # in the list of active formatting elements.
1100                                 if last_node is fb
1101                                         for t, i in afe
1102                                                 if t is bookmark
1103                                                         afe.splice i, 1
1104                                                         debug_log "removed bookmark"
1105                                                         break
1106                                         for t, i in afe
1107                                                 if t is node
1108                                                         # "after" means lower
1109                                                         afe.splice i, 0, bookmark # "after as <-
1110                                                         debug_log "placed bookmark after node"
1111                                                         debug_log "node: #{node.id} afe: #{serialize_els afe, true, true}"
1112                                                         break
1113                                 # 9. Insert last node into node, first removing it from its
1114                                 # previous parent node if any.
1115                                 if last_node.parent?
1116                                         debug_log "last_node has parent"
1117                                         for c, i in last_node.parent.children
1118                                                 if c is last_node
1119                                                         debug_log "removing last_node from parent"
1120                                                         last_node.parent.children.splice i, 1
1121                                                         break
1122                                 node.children.push last_node
1123                                 last_node.parent = node
1124                                 # 10. Let last node be node.
1125                                 last_node = node
1126                                 debug_log "at last"
1127                                 # 11. Return to the step labeled inner loop.
1128                         # 14. Insert whatever last node ended up being in the previous step
1129                         # at the appropriate place for inserting a node, but using common
1130                         # ancestor as the override target.
1131
1132                         # In the case where fe is immediately followed by fb:
1133                         #   * inner loop exits out early (node==fe)
1134                         #   * last_node is fb
1135                         #   * last_node is still in the tree (not a duplicate)
1136                         if last_node.parent?
1137                                 debug_log "FEFIRST? last_node has parent"
1138                                 for c, i in last_node.parent.children
1139                                         if c is last_node
1140                                                 debug_log "removing last_node from parent"
1141                                                 last_node.parent.children.splice i, 1
1142                                                 break
1143
1144                         debug_log "after aaa inner loop"
1145                         debug_log "ca: #{ca.name}##{ca.id} children: #{serialize_els ca.children, true, true}"
1146                         debug_log "fe: #{fe.name}##{fe.id} children: #{serialize_els fe.children, true, true}"
1147                         debug_log "fb: #{fb.name}##{fb.id} children: #{serialize_els fb.children, true, true}"
1148                         debug_log "last_node: #{last_node.name}##{last_node.id} children: #{serialize_els last_node.children, true, true}"
1149                         debug_log "tree: #{serialize_els doc.children, false, true}"
1150
1151                         debug_log "insert"
1152
1153
1154                         # can't use standard insert token thing, because it's already in
1155                         # open_els and must stay at it's current position in open_els
1156                         dest = adjusted_insertion_location ca
1157                         dest[0].children.splice dest[1], 0, last_node
1158                         last_node.parent = dest[0]
1159
1160
1161                         debug_log "ca: #{ca.name}##{ca.id} children: #{serialize_els ca.children, true, true}"
1162                         debug_log "fe: #{fe.name}##{fe.id} children: #{serialize_els fe.children, true, true}"
1163                         debug_log "fb: #{fb.name}##{fb.id} children: #{serialize_els fb.children, true, true}"
1164                         debug_log "last_node: #{last_node.name}##{last_node.id} children: #{serialize_els last_node.children, true, true}"
1165                         debug_log "tree: #{serialize_els doc.children, false, true}"
1166
1167                         # 15. Create an element for the token for which formatting element
1168                         # was created, in the HTML namespace, with furthest block as the
1169                         # intended parent.
1170                         new_element = token_to_element fe.token, NS_HTML, fb
1171                         # 16. Take all of the child nodes of furthest block and append them
1172                         # to the element created in the last step.
1173                         while fb.children.length
1174                                 t = fb.children.shift()
1175                                 t.parent = new_element
1176                                 new_element.children.push t
1177                         # 17. Append that new element to furthest block.
1178                         new_element.parent = fb
1179                         fb.children.push new_element
1180                         # 18. Remove formatting element from the list of active formatting
1181                         # elements, and insert the new element into the list of active
1182                         # formatting elements at the position of the aforementioned
1183                         # bookmark.
1184                         for t, i in afe
1185                                 if t is fe
1186                                         afe.splice i, 1
1187                                         break
1188                         for t, i in afe
1189                                 if t is bookmark
1190                                         afe[i] = new_element
1191                                         break
1192                         # 19. Remove formatting element from the stack of open elements,
1193                         # and insert the new element into the stack of open elements
1194                         # immediately below the position of furthest block in that stack.
1195                         for t, i in open_els
1196                                 if t is fe
1197                                         open_els.splice i, 1
1198                                         break
1199                         for t, i in open_els
1200                                 if t is fb
1201                                         open_els.splice i, 0, new_element
1202                                         break
1203                         # 20. Jump back to the step labeled outer loop.
1204                         debug_log "done wrapping fb's children. new_element: #{new_element.name}##{new_element.id}"
1205                         debug_log "tree: #{serialize_els doc.children, false, true}"
1206                         debug_log "open_els: #{serialize_els open_els, true, true}"
1207                         debug_log "afe: #{serialize_els afe, true, true}"
1208                 debug_log "AAA DONE"
1209
1210         # http://www.w3.org/TR/html5/syntax.html#close-a-p-element
1211         close_p_element = ->
1212                 generate_implied_end_tags 'p' # arg is exception
1213                 unless open_els[0].name is 'p' and open_els[0].namespace is NS_HTML
1214                         parse_error()
1215                 while open_els.length > 1 # just in case
1216                         el = open_els.shift()
1217                         if el.name is 'p' and el.namespace is NS_HTML
1218                                 return
1219         close_p_if_in_button_scope = ->
1220                 if is_in_button_scope 'p', NS_HTML
1221                         close_p_element()
1222
1223         # http://www.w3.org/TR/html5/syntax.html#insert-a-character
1224         # aka insert_a_character = (t) ->
1225         insert_character = (t) ->
1226                 dest = adjusted_insertion_location()
1227                 # fixfull check for Document node
1228                 if dest[1] > 0
1229                         prev = dest[0].children[dest[1] - 1]
1230                         if prev.type is TYPE_TEXT
1231                                 prev.text += t.text
1232                                 return
1233                 dest[0].children.splice dest[1], 0, t
1234
1235
1236         # 8.2.5 http://www.w3.org/TR/html5/syntax.html#tree-construction
1237         process_token = (t) ->
1238                 acn = adjusted_current_node()
1239                 unless acn?
1240                         ins_mode t
1241                         return
1242                 if acn.namespace is NS_HTML
1243                         ins_mode t
1244                         return
1245                 if is_mathml_text_integration_point(acn)
1246                         if t.type is TYPE_START_TAG and not (t.name is 'mglyph' or t.name is 'malignmark')
1247                                 ins_mode t
1248                                 return
1249                         if t.type is TYPE_TEXT
1250                                 ins_mode t
1251                                 return
1252                 if acn.namespace is NS_MATHML and acn.name is 'annotation-xml' and t.type is TYPE_START_TAG and t.name is 'svg'
1253                         ins_mode t
1254                         return
1255                 if is_html_integration acn
1256                         if t.type is TYPE_START_TAG or t.type is TYPE_TEXT
1257                                 ins_mode t
1258                                 return
1259                 if t.type is TYPE_EOF
1260                         ins_mode t
1261                         return
1262                 in_foreign_content t
1263                 return
1264
1265         # 8.2.5.1
1266         # http://www.w3.org/TR/html5/syntax.html#creating-and-inserting-nodes
1267         # http://www.w3.org/TR/html5/syntax.html#appropriate-place-for-inserting-a-node
1268         adjusted_insertion_location = (override_target = null) ->
1269                 # 1. If there was an override target specified, then let target be the
1270                 # override target.
1271                 if override_target?
1272                         target = override_target
1273                 else # Otherwise, let target be the current node.
1274                         target = open_els[0]
1275                 # 2. Determine the adjusted insertion location using the first matching
1276                 # steps from the following list:
1277                 #
1278                 # If foster parenting is enabled and target is a table, tbody, tfoot,
1279                 # thead, or tr element Foster parenting happens when content is
1280                 # misnested in tables.
1281                 if flag_foster_parenting and foster_parenting_targets[target.name] is target.namespace
1282                         loop # once. this is here so we can ``break`` to "abort these substeps"
1283                                 # 1. Let last template be the last template element in the
1284                                 # stack of open elements, if any.
1285                                 last_template = null
1286                                 last_template_i = null
1287                                 for el, i in open_els
1288                                         if el.name is 'template' and el.namespace is NS_HTML
1289                                                 last_template = el
1290                                                 last_template_i = i
1291                                                 break
1292                                 # 2. Let last table be the last table element in the stack of
1293                                 # open elements, if any.
1294                                 last_table = null
1295                                 last_table_i
1296                                 for el, i in open_els
1297                                         if el.name is 'table' and el.namespace is NS_HTML
1298                                                 last_table = el
1299                                                 last_table_i = i
1300                                                 break
1301                                 # 3. If there is a last template and either there is no last
1302                                 # table, or there is one, but last template is lower (more
1303                                 # recently added) than last table in the stack of open
1304                                 # elements, then: let adjusted insertion location be inside
1305                                 # last template's template contents, after its last child (if
1306                                 # any), and abort these substeps.
1307                                 if last_template and (last_table is null or last_template_i < last_table_i)
1308                                         target = last_template # fixfull should be it's contents
1309                                         target_i = target.children.length
1310                                         break
1311                                 # 4. If there is no last table, then let adjusted insertion
1312                                 # location be inside the first element in the stack of open
1313                                 # elements (the html element), after its last child (if any),
1314                                 # and abort these substeps. (fragment case)
1315                                 if last_table is null
1316                                         # this is odd
1317                                         target = open_els[open_els.length - 1]
1318                                         target_i = target.children.length
1319                                         break
1320                                 # 5. If last table has a parent element, then let adjusted
1321                                 # insertion location be inside last table's parent element,
1322                                 # immediately before last table, and abort these substeps.
1323                                 if last_table.parent?
1324                                         for c, i in last_table.parent.children
1325                                                 if c is last_table
1326                                                         target = last_table.parent
1327                                                         target_i = i
1328                                                         break
1329                                         break
1330                                 # 6. Let previous element be the element immediately above last
1331                                 # table in the stack of open elements.
1332                                 #
1333                                 # huh? how could it not have a parent?
1334                                 previous_element = open_els[last_table_i + 1]
1335                                 # 7. Let adjusted insertion location be inside previous
1336                                 # element, after its last child (if any).
1337                                 target = previous_element
1338                                 target_i = target.children.length
1339                                 # Note: These steps are involved in part because it's possible
1340                                 # for elements, the table element in this case in particular,
1341                                 # to have been moved by a script around in the DOM, or indeed
1342                                 # removed from the DOM entirely, after the element was inserted
1343                                 # by the parser.
1344                                 break # don't really loop
1345                 else
1346                         # Otherwise Let adjusted insertion location be inside target, after
1347                         # its last child (if any).
1348                         target_i = target.children.length
1349
1350                 # 3. If the adjusted insertion location is inside a template element,
1351                 # let it instead be inside the template element's template contents,
1352                 # after its last child (if any).
1353                 # fixfull (template)
1354
1355                 # 4. Return the adjusted insertion location.
1356                 return [target, target_i]
1357
1358         # http://www.w3.org/TR/html5/syntax.html#create-an-element-for-the-token
1359         # aka create_an_element_for_token
1360         token_to_element = (t, namespace, intended_parent) ->
1361                 # convert attributes into a hash
1362                 attrs = {}
1363                 for a in t.attrs_a
1364                         attrs[a[0]] = a[1] # TODO check what to do with dupilcate attrs
1365                 el = new Node TYPE_TAG, name: t.name, namespace: namespace, attrs: attrs, token: t
1366
1367                 # TODO 2. If the newly created element has an xmlns attribute in the
1368                 # XMLNS namespace whose value is not exactly the same as the element's
1369                 # namespace, that is a parse error. Similarly, if the newly created
1370                 # element has an xmlns:xlink attribute in the XMLNS namespace whose
1371                 # value is not the XLink Namespace, that is a parse error.
1372
1373                 # fixfull: the spec says stuff about form pointers and ownerDocument
1374
1375                 return el
1376
1377         # http://www.w3.org/TR/html5/syntax.html#insert-a-foreign-element
1378         insert_foreign_element = (token, namespace) ->
1379                 ail = adjusted_insertion_location()
1380                 ail_el = ail[0]
1381                 ail_i = ail[1]
1382                 el = token_to_element token, namespace, ail_el
1383                 # TODO skip this next step if it's broken (eg ail_el is document with child already)
1384                 el.parent = ail_el
1385                 ail_el.children.splice ail_i, 0, el
1386                 open_els.unshift el
1387                 return el
1388         # http://www.w3.org/TR/html5/syntax.html#insert-an-html-element
1389         insert_html_element = (token) ->
1390                 insert_foreign_element token, NS_HTML
1391
1392         # http://www.w3.org/TR/html5/syntax.html#insert-a-comment
1393         # position should be [node, index_within_children]
1394         insert_comment = (t, position = null) ->
1395                 position ?= adjusted_insertion_location()
1396                 position[0].children.splice position[1], 0, t
1397
1398         # 8.2.5.2
1399         # http://www.w3.org/TR/html5/syntax.html#generic-raw-text-element-parsing-algorithm
1400         parse_generic_raw_text = (t) ->
1401                 insert_html_element t
1402                 tok_state = tok_state_rawtext
1403                 original_ins_mode = ins_mode
1404                 ins_mode = ins_mode_text
1405         parse_generic_rcdata_text = (t) ->
1406                 insert_html_element t
1407                 tok_state = tok_state_rcdata
1408                 original_ins_mode = ins_mode
1409                 ins_mode = ins_mode_text
1410
1411         # 8.2.5.3 http://www.w3.org/TR/html5/syntax.html#closing-elements-that-have-implied-end-tags
1412         # http://www.w3.org/TR/html5/syntax.html#generate-implied-end-tags
1413         generate_implied_end_tags = (except = null) ->
1414                 while end_tag_implied[open_els[0].name] is open_els[0].namespace and open_els[0].name isnt except
1415                         open_els.shift()
1416
1417         # 8.2.5.4 The rules for parsing tokens in HTML content
1418         # http://www.w3.org/TR/html5/syntax.html#parsing-main-inhtml
1419
1420         # 8.2.5.4.1 The "initial" insertion mode
1421         # http://www.w3.org/TR/html5/syntax.html#the-initial-insertion-mode
1422         ins_mode_initial = (t) ->
1423                 if is_space_tok t
1424                         return
1425                 if t.type is TYPE_COMMENT
1426                         # ?fixfull
1427                         doc.children.push t
1428                         return
1429                 if t.type is TYPE_DOCTYPE
1430                         # FIXME check identifiers, set quirks, etc
1431                         # fixfull
1432                         doc.children.push t
1433                         ins_mode = ins_mode_before_html
1434                         return
1435                 # Anything else
1436                 #fixfull (iframe, quirks)
1437                 ins_mode = ins_mode_before_html
1438                 process_token t
1439                 return
1440
1441         # 8.2.5.4.2 http://www.w3.org/TR/html5/syntax.html#the-before-html-insertion-mode
1442         ins_mode_before_html = (t) ->
1443                 if t.type is TYPE_DOCTYPE
1444                         parse_error()
1445                         return
1446                 if t.type is TYPE_COMMENT
1447                         doc.children.push t
1448                         return
1449                 if is_space_tok t
1450                         return
1451                 if t.type is TYPE_START_TAG and t.name is 'html'
1452                         el = token_to_element t, NS_HTML, doc
1453                         doc.children.push el
1454                         open_els.unshift(el)
1455                         # fixfull (big paragraph in spec about manifest, fragment, urls, etc)
1456                         ins_mode = ins_mode_before_head
1457                         return
1458                 if t.type is TYPE_END_TAG
1459                         if t.name is 'head' or t.name is 'body' or t.name is 'html' or t.name is 'br'
1460                                 # fall through to "anything else"
1461                         else
1462                                 parse_error()
1463                                 return
1464                 # Anything else
1465                 html_tok = new_open_tag 'html'
1466                 el = token_to_element html_tok, NS_HTML, doc
1467                 doc.children.push el
1468                 open_els.unshift el
1469                 # ?fixfull browsing context
1470                 ins_mode = ins_mode_before_head
1471                 process_token t
1472                 return
1473
1474         # 8.2.5.4.3 http://www.w3.org/TR/html5/syntax.html#the-before-head-insertion-mode
1475         ins_mode_before_head = (t) ->
1476                 if is_space_tok t
1477                         return
1478                 if t.type is TYPE_COMMENT
1479                         insert_comment t
1480                         return
1481                 if t.type is TYPE_DOCTYPE
1482                         parse_error()
1483                         return
1484                 if t.type is TYPE_START_TAG and t.name is 'html'
1485                         ins_mode_in_body t
1486                         return
1487                 if t.type is TYPE_START_TAG and t.name is 'head'
1488                         el = insert_html_element t
1489                         head_element_pointer = el
1490                         ins_mode = ins_mode_in_head
1491                         return
1492                 if t.type is TYPE_END_TAG
1493                         if t.name is 'head' or t.name is 'body' or t.name is 'html' or t.name is 'br'
1494                                 # fall through to Anything else below
1495                         else
1496                                 parse_error()
1497                                 return
1498                 # Anything else
1499                 head_tok = new_open_tag 'head'
1500                 el = insert_html_element head_tok
1501                 head_element_pointer = el
1502                 ins_mode = ins_mode_in_head
1503                 process_token t
1504
1505         # 8.2.5.4.4 http://www.w3.org/TR/html5/syntax.html#parsing-main-inhead
1506         ins_mode_in_head_else = (t) -> # factored out for same-as-spec flow control
1507                 open_els.shift() # spec says this will be a 'head' node
1508                 ins_mode = ins_mode_after_head
1509                 process_token t
1510         ins_mode_in_head = (t) ->
1511                 if t.type is TYPE_TEXT and (t.text is "\t" or t.text is "\n" or t.text is "\u000c" or t.text is ' ')
1512                         insert_character t
1513                         return
1514                 if t.type is TYPE_COMMENT
1515                         insert_comment t
1516                         return
1517                 if t.type is TYPE_DOCTYPE
1518                         parse_error()
1519                         return
1520                 if t.type is TYPE_START_TAG and t.name is 'html'
1521                         ins_mode_in_body t
1522                         return
1523                 if t.type is TYPE_START_TAG and (t.name is 'base' or t.name is 'basefont' or t.name is 'bgsound' or t.name is 'link')
1524                         el = insert_html_element t
1525                         open_els.shift()
1526                         t.acknowledge_self_closing()
1527                         return
1528                 if t.type is TYPE_START_TAG and t.name is 'meta'
1529                         el = insert_html_element t
1530                         open_els.shift()
1531                         t.acknowledge_self_closing()
1532                         # fixfull encoding stuff
1533                         return
1534                 if t.type is TYPE_START_TAG and t.name is 'title'
1535                         parse_generic_rcdata_text t
1536                         return
1537                 if t.type is TYPE_START_TAG and ((t.name is 'noscript' and flag_scripting) or t.name is 'noframes' or t.name is 'style')
1538                         parse_generic_raw_text t
1539                         return
1540                 if t.type is TYPE_START_TAG and t.name is 'noscript' and flag_scripting is false
1541                         insert_html_element t
1542                         ins_mode = ins_mode_in_head_noscript
1543                         return
1544                 if t.type is TYPE_START_TAG and t.name is 'script'
1545                         ail = adjusted_insertion_location()
1546                         el = token_to_element t, NS_HTML, ail
1547                         el.flag 'parser-inserted', true
1548                         # fixfull frament case
1549                         ail[0].children.splice ail[1], 0, el
1550                         open_els.unshift el
1551                         tok_state = tok_state_script_data
1552                         original_ins_mode = ins_mode # make sure orig... is defined
1553                         ins_mode = ins_mode_text
1554                         return
1555                 if t.type is TYPE_END_TAG and t.name is 'head'
1556                         open_els.shift() # will be a head element... spec says so
1557                         ins_mode = ins_mode_after_head
1558                         return
1559                 if t.type is TYPE_END_TAG and (t.name is 'body' or t.name is 'html' or t.name is 'br')
1560                         ins_mode_in_head_else t
1561                         return
1562                 if t.type is TYPE_START_TAG and t.name is 'template'
1563                         insert_html_element t
1564                         afe_push_marker()
1565                         flag_frameset_ok = false
1566                         ins_mode = ins_mode_in_template
1567                         template_ins_modes.unshift ins_mode_in_template
1568                         return
1569                 if t.type is TYPE_END_TAG and t.name is 'template'
1570                         if template_tag_is_open()
1571                                 generate_implied_end_tags
1572                                 if open_els[0].name isnt 'template'
1573                                         parse_error()
1574                                 loop
1575                                         el = open_els.shift()
1576                                         if el.name is 'template' and el.namespace is NS_HTML
1577                                                 break
1578                                 clear_afe_to_marker()
1579                                 template_ins_modes.shift()
1580                                 reset_ins_mode()
1581                         else
1582                                 parse_error()
1583                         return
1584                 if (t.type is TYPE_START_TAG and t.name is 'head') or t.type is TYPE_END_TAG
1585                         parse_error()
1586                         return
1587                 ins_mode_in_head_else t
1588
1589         # 8.2.5.4.5 http://www.w3.org/TR/html5/syntax.html#parsing-main-inheadnoscript
1590         ins_mode_in_head_noscript_else = (t) ->
1591                 parse_error()
1592                 open_els.shift()
1593                 ins_mode = ins_mode_in_head
1594                 process_token t
1595         ins_mode_in_head_noscript = (t) ->
1596                 if t.type is TYPE_DOCTYPE
1597                         parse_error()
1598                         return
1599                 if t.type is TYPE_START_TAG and t.name is 'html'
1600                         ins_mode_in_body t
1601                         return
1602                 if t.type is TYPE_END_TAG and t.name is 'noscript'
1603                         open_els.shift()
1604                         ins_mode = ins_mode_in_head
1605                         return
1606                 if is_space_tok(t) or t.type is TYPE_COMMENT or (t.type is TYPE_START_TAG and (t.name is 'basefont' or t.name is 'bgsound' or t.name is 'link' or t.name is 'meta' or t.name is 'noframes' or t.name is 'style'))
1607                         ins_mode_in_head t
1608                         return
1609                 if t.type is TYPE_END_TAG and t.name is 'br'
1610                         ins_mode_in_head_noscript_else t
1611                         return
1612                 if (t.type is TYPE_START_TAG and (t.name is 'head' or t.name is 'noscript')) or t.type is TYPE_END_TAG
1613                         parse_error()
1614                         return
1615                 # Anything else
1616                 ins_mode_in_head_noscript_else t
1617                 return
1618
1619
1620
1621         # 8.2.5.4.6 http://www.w3.org/TR/html5/syntax.html#the-after-head-insertion-mode
1622         ins_mode_after_head_else = (t) ->
1623                 body_tok = new_open_tag 'body'
1624                 insert_html_element body_tok
1625                 ins_mode = ins_mode_in_body
1626                 process_token t
1627                 return
1628         ins_mode_after_head = (t) ->
1629                 if is_space_tok t
1630                         insert_character t
1631                         return
1632                 if t.type is TYPE_COMMENT
1633                         insert_comment t
1634                         return
1635                 if t.type is TYPE_DOCTYPE
1636                         parse_error()
1637                         return
1638                 if t.type is TYPE_START_TAG and t.name is 'html'
1639                         ins_mode_in_body t
1640                         return
1641                 if t.type is TYPE_START_TAG and t.name is 'body'
1642                         insert_html_element t
1643                         flag_frameset_ok = false
1644                         ins_mode = ins_mode_in_body
1645                         return
1646                 if t.type is TYPE_START_TAG and t.name is 'frameset'
1647                         insert_html_element t
1648                         ins_mode = ins_mode_in_frameset
1649                         return
1650                 if t.type is TYPE_START_TAG and (t.name is 'base' or t.name is 'basefont' or t.name is 'bgsound' or t.name is 'link' or t.name is 'meta' or t.name is 'noframes' or t.name is 'script' or t.name is 'style' or t.name is 'template' or t.name is 'title')
1651                         parse_error()
1652                         open_els.unshift head_element_pointer
1653                         ins_mode_in_head t
1654                         for el, i of open_els
1655                                 if el is head_element_pointer
1656                                         open_els.splice i, 1
1657                                         return
1658                         console.log "warning: 23904 couldn't find head element in open_els"
1659                         return
1660                 if t.type is TYPE_END_TAG and t.name is 'template'
1661                         ins_mode_in_head t
1662                         return
1663                 if t.type is TYPE_END_TAG and (t.name is 'body' or t.name is 'html' or t.name is 'br')
1664                         ins_mode_after_head_else t
1665                         return
1666                 if (t.type is TYPE_START_TAG and t.name is 'head') or t.type is TYPE_END_TAG
1667                         parse_error()
1668                         return
1669                 # Anything else
1670                 ins_mode_after_head_else t
1671
1672         # 8.2.5.4.7 http://www.w3.org/TR/html5/syntax.html#parsing-main-inbody
1673         in_body_any_other_end_tag = (name) -> # factored out because adoption agency calls it
1674                 for el, i in open_els
1675                         if el.name is name and el.namespace is NS_HTML
1676                                 generate_implied_end_tags name # arg is exception
1677                                 parse_error() unless i is 0
1678                                 while i >= 0
1679                                         open_els.shift()
1680                                         i -= 1
1681                                 return
1682                         if special_elements[el.name] is el.namespace
1683                                 parse_error()
1684                                 return
1685                 return
1686         ins_mode_in_body = (t) ->
1687                 if t.type is TYPE_TEXT and t.text is "\u0000"
1688                         parse_error()
1689                         return
1690                 if is_space_tok t
1691                         reconstruct_afe()
1692                         insert_character t
1693                         return
1694                 if t.type is TYPE_TEXT
1695                         reconstruct_afe()
1696                         insert_character t
1697                         flag_frameset_ok = false
1698                         return
1699                 if t.type is TYPE_COMMENT
1700                         insert_comment t
1701                         return
1702                 if t.type is TYPE_DOCTYPE
1703                         parse_error()
1704                         return
1705                 if t.type is TYPE_START_TAG and t.name is 'html'
1706                         parse_error()
1707                         return if template_tag_is_open()
1708                         root_attrs = open_els[open_els.length - 1].attrs
1709                         for a in t.attrs_a
1710                                 root_attrs[a[0]] = a[1] unless root_attrs[a[0]]?
1711                         return
1712
1713                 if (t.type is TYPE_START_TAG and (t.name is 'base' or t.name is 'basefont' or t.name is 'bgsound' or t.name is 'link' or t.name is 'meta' or t.name is 'noframes' or t.name is 'script' or t.name is 'style' or t.name is 'template' or t.name is 'title')) or (t.type is TYPE_END_TAG and t.name is 'template')
1714                         ins_mode_in_head t
1715                         return
1716                 if t.type is TYPE_START_TAG and t.name is 'body'
1717                         parse_error()
1718                         return if open_els.length < 2
1719                         second = open_els[open_els.length - 2]
1720                         return unless second.namespace is NS_HTML
1721                         return unless second.name is 'body'
1722                         return if template_tag_is_open()
1723                         flag_frameset_ok = false
1724                         for a of t.attrs_a
1725                                 second.attrs[a[0]] = a[1] unless second.attrs[a[0]]?
1726                         return
1727                 if t.type is TYPE_START_TAG and t.name is 'frameset'
1728                         parse_error()
1729                         return if open_els.length < 2
1730                         second_i = open_els.length - 2
1731                         second = open_els[second_i]
1732                         return unless second.namespace is NS_HTML
1733                         return unless second.name is 'body'
1734                         if flag_frameset_ok is false
1735                                 return
1736                         if second.parent?
1737                                 for el, i in second.parent.children
1738                                         if el is second
1739                                                 second.parent.children.splice i, 1
1740                                                 break
1741                         open_els.splice second_i, 1
1742                         # pop everything except the "root html element"
1743                         while open_els.length > 1
1744                                 open_els.shift()
1745                         insert_html_element t
1746                         ins_mode = ins_mode_in_frameset
1747                         return
1748                 if t.type is TYPE_EOF
1749                         ok_tags = {
1750                                 dd:NS_HTML, dt:NS_HTML, li:NS_HTML, p:NS_HTML, tbody:NS_HTML,
1751                                 td:NS_HTML, tfoot:NS_HTML, th:NS_HTML, thead:NS_HTML,
1752                                 tr:NS_HTML, body:NS_HTML, html:NS_HTML,
1753                         }
1754                         for el in open_els
1755                                 unless ok_tags[t.name] is el.namespace
1756                                         parse_error()
1757                                         break
1758                         if template_ins_modes.length > 0
1759                                 ins_mode_in_template t
1760                         else
1761                                 stop_parsing()
1762                         return
1763                 if t.type is TYPE_END_TAG and t.name is 'body'
1764                         unless is_in_scope 'body', NS_HTML
1765                                 parse_error()
1766                                 return
1767                         ok_tags = {
1768                                 dd:NS_HTML, dt:NS_HTML, li:NS_HTML, optgroup:NS_HTML,
1769                                 option:NS_HTML, p:NS_HTML, rb:NS_HTML, rp:NS_HTML, rt:NS_HTML,
1770                                 rtc:NS_HTML, tbody:NS_HTML, td:NS_HTML, tfoot:NS_HTML,
1771                                 th:NS_HTML, thead:NS_HTML, tr:NS_HTML, body:NS_HTML,
1772                                 html:NS_HTML
1773                         }
1774                         for el in open_els
1775                                 unless ok_tags[t.name] is el.namespace
1776                                         parse_error()
1777                                         break
1778                         ins_mode = ins_mode_after_body
1779                         return
1780                 if t.type is TYPE_END_TAG and t.name is 'html'
1781                         unless is_in_scope 'body', NS_HTML
1782                                 parse_error()
1783                                 return
1784                         ok_tags = {
1785                                 dd:NS_HTML, dt:NS_HTML, li:NS_HTML, optgroup:NS_HTML,
1786                                 option:NS_HTML, p:NS_HTML, rb:NS_HTML, rp:NS_HTML, rt:NS_HTML,
1787                                 rtc:NS_HTML, tbody:NS_HTML, td:NS_HTML, tfoot:NS_HTML,
1788                                 th:NS_HTML, thead:NS_HTML, tr:NS_HTML, body:NS_HTML,
1789                                 html:NS_HTML
1790                         }
1791                         for el in open_els
1792                                 unless ok_tags[t.name] is el.namespace
1793                                         parse_error()
1794                                         break
1795                         ins_mode = ins_mode_after_body
1796                         process_token t
1797                         return
1798                 if t.type is TYPE_START_TAG and (t.name is 'address' or t.name is 'article' or t.name is 'aside' or t.name is 'blockquote' or t.name is 'center' or t.name is 'details' or t.name is 'dialog' or t.name is 'dir' or t.name is 'div' or t.name is 'dl' or t.name is 'fieldset' or t.name is 'figcaption' or t.name is 'figure' or t.name is 'footer' or t.name is 'header' or t.name is 'hgroup' or t.name is 'main' or t.name is 'nav' or t.name is 'ol' or t.name is 'p' or t.name is 'section' or t.name is 'summary' or t.name is 'ul')
1799                         close_p_if_in_button_scope()
1800                         insert_html_element t
1801                         return
1802                 if t.type is TYPE_START_TAG and h_tags[t.name]?
1803                         close_p_if_in_button_scope()
1804                         if h_tags[open_els[0].name] is open_els[0].namespace
1805                                 parse_error()
1806                                 open_els.shift()
1807                         insert_html_element t
1808                         return
1809                 if t.type is TYPE_START_TAG and (t.name is 'pre' or t.name is 'listing')
1810                         close_p_if_in_button_scope()
1811                         insert_html_element t
1812                         # spec: If the next token is a "LF" (U+000A) character token, then
1813                         # ignore that token and move on to the next one. (Newlines at the
1814                         # start of pre blocks are ignored as an authoring convenience.)
1815                         if txt.charAt(cur) is "\u000a" # FIXME check for crlf?
1816                                 cur += 1
1817                         flag_frameset_ok = false
1818                         return
1819                 if t.type is TYPE_START_TAG and t.name is 'form'
1820                         unless form_element_pointer is null or template_tag_is_open()
1821                                 parse_error()
1822                                 return
1823                         close_p_if_in_button_scope()
1824                         el = insert_html_element t
1825                         unless template_tag_is_open()
1826                                 form_element_pointer = el
1827                         return
1828                 if t.type is TYPE_START_TAG and t.name is 'li'
1829                         flag_frameset_ok = false
1830                         for node in open_els
1831                                 if node.name is 'li' and node.namespace is NS_HTML
1832                                         generate_implied_end_tags 'li' # arg is exception
1833                                         if open_els[0].name isnt 'li' or open_els[0].namespace isnt NS_HTML
1834                                                 parse_error()
1835                                         loop
1836                                                 el = open_els.shift()
1837                                                 if el.name is 'li' and el.namespace is NS_HTML
1838                                                         break
1839                                         break
1840                                 if el_is_special_not_adp node
1841                                                 break
1842                         close_p_if_in_button_scope()
1843                         insert_html_element t
1844                         return
1845                 if t.type is TYPE_START_TAG and (t.name is 'dd' or t.name is 'dt')
1846                         flag_frameset_ok = false
1847                         for node in open_els
1848                                 if node.name is 'dd' and node.namespace is NS_HTML
1849                                         generate_implied_end_tags 'dd' # arg is exception
1850                                         if open_els[0].name isnt 'dd' or open_els[0].namespace isnt NS_HTML
1851                                                 parse_error()
1852                                         loop
1853                                                 el = open_els.shift()
1854                                                 if el.name is 'dd' and el.namespace is NS_HTML
1855                                                         break
1856                                         break
1857                                 if node.name is 'dt' and node.namespace is NS_HTML
1858                                         generate_implied_end_tags 'dt' # arg is exception
1859                                         if open_els[0].name isnt 'dt' or open_els[0].namespace isnt NS_HTML
1860                                                 parse_error()
1861                                         loop
1862                                                 el = open_els.shift()
1863                                                 if el.name is 'dt' and el.namespace is NS_HTML
1864                                                         break
1865                                         break
1866                                 if el_is_special_not_adp node
1867                                         break
1868                         close_p_if_in_button_scope()
1869                         insert_html_element t
1870                         return
1871                 if t.type is TYPE_START_TAG and t.name is 'plaintext'
1872                         close_p_if_in_button_scope()
1873                         insert_html_element t
1874                         tok_state = tok_state_plaintext
1875                         return
1876                 if t.type is TYPE_START_TAG and t.name is 'button'
1877                         if is_in_scope 'button', NS_HTML
1878                                 parse_error()
1879                                 generate_implied_end_tags()
1880                                 loop
1881                                         el = open_els.shift()
1882                                         if el.name is 'button' and el.namespace is NS_HTML
1883                                                 break
1884                         reconstruct_afe()
1885                         insert_html_element t
1886                         flag_frameset_ok = false
1887                         return
1888                 if t.type is TYPE_END_TAG and (t.name is 'address' or t.name is 'article' or t.name is 'aside' or t.name is 'blockquote' or t.name is 'button' or t.name is 'center' or t.name is 'details' or t.name is 'dialog' or t.name is 'dir' or t.name is 'div' or t.name is 'dl' or t.name is 'fieldset' or t.name is 'figcaption' or t.name is 'figure' or t.name is 'footer' or t.name is 'header' or t.name is 'hgroup' or t.name is 'listing' or t.name is 'main' or t.name is 'nav' or t.name is 'ol' or t.name is 'pre' or t.name is 'section' or t.name is 'summary' or t.name is 'ul')
1889                         unless is_in_scope t.name, NS_HTML
1890                                 parse_error()
1891                                 return
1892                         generate_implied_end_tags()
1893                         unless open_els[0].name is t.name and open_els[0].namespace is NS_HTML
1894                                 parse_error()
1895                         loop
1896                                 el = open_els.shift()
1897                                 if el.name is t.name and el.namespace is NS_HTML
1898                                         return
1899                         return
1900                 if t.type is TYPE_END_TAG and t.name is 'form'
1901                         unless template_tag_is_open()
1902                                 node = form_element_pointer
1903                                 form_element_pointer = null
1904                                 if node is null or not el_is_in_scope node
1905                                         parse_error()
1906                                         return
1907                                 generate_implied_end_tags()
1908                                 if open_els[0] isnt node
1909                                         parse_error()
1910                                 for el, i in open_els
1911                                         if el is node
1912                                                 open_els.splice i, 1
1913                                                 break
1914                         else
1915                                 unless is_in_scope 'form', NS_HTML
1916                                         parse_error()
1917                                         return
1918                                 generate_implied_end_tags()
1919                                 if open_els[0].name isnt 'form' or open_els[0].namespace isnt NS_HTML
1920                                         parse_error()
1921                                 loop
1922                                         el = open_els.shift()
1923                                         if el.name is 'form' and el.namespace is NS_HTML
1924                                                 break
1925                         return
1926                 if t.type is TYPE_END_TAG and t.name is 'p'
1927                         unless is_in_button_scope 'p', NS_HTML
1928                                 parse_error()
1929                                 insert_html_element new_open_tag 'p'
1930                         close_p_element()
1931                         return
1932                 if t.type is TYPE_END_TAG and t.name is 'li'
1933                         unless is_in_li_scope 'li', NS_HTML
1934                                 parse_error()
1935                                 return
1936                         generate_implied_end_tags 'li' # arg is exception
1937                         if open_els[0].name isnt 'li' or open_els[0].namespace isnt NS_HTML
1938                                 parse_error()
1939                         loop
1940                                 el = open_els.shift()
1941                                 if el.name is 'li' and el.namespace is NS_HTML
1942                                         break
1943                         return
1944                 if t.type is TYPE_END_TAG and (t.name is 'dd' or t.name is 'dt')
1945                         unless is_in_scope t.name, NS_HTML
1946                                 parse_error()
1947                                 return
1948                         generate_implied_end_tags t.name # arg is exception
1949                         if open_els[0].name isnt t.name or open_els[0].namespace isnt NS_HTML
1950                                 parse_error()
1951                         loop
1952                                 el = open_els.shift()
1953                                 if el.name is t.name and el.namespace is NS_HTML
1954                                         break
1955                         return
1956                 if t.type is TYPE_END_TAG and h_tags[t.name]?
1957                         h_in_scope = false
1958                         for el in open_els
1959                                 if h_tags[el.name] is el.namespace
1960                                         h_in_scope = true
1961                                         break
1962                                 if standard_scopers[el.name] is el.namespace
1963                                         break
1964                         unless h_in_scope
1965                                 parse_error()
1966                                 return
1967                         generate_implied_end_tags()
1968                         if open_els[0].name isnt t.name or open_els[0].namespace isnt NS_HTML
1969                                 parse_error()
1970                         loop
1971                                 el = open_els.shift()
1972                                 if h_tags[el.name] is el.namespace
1973                                         break
1974                         return
1975                 # deep breath!
1976                 if t.type is TYPE_START_TAG and t.name is 'a'
1977                         # If the list of active formatting elements contains an a element
1978                         # between the end of the list and the last marker on the list (or
1979                         # the start of the list if there is no marker on the list), then
1980                         # this is a parse error; run the adoption agency algorithm for the
1981                         # tag name "a", then remove that element from the list of active
1982                         # formatting elements and the stack of open elements if the
1983                         # adoption agency algorithm didn't already remove it (it might not
1984                         # have if the element is not in table scope).
1985                         found = false
1986                         for el in afe
1987                                 if el.type is TYPE_AFE_MARKER
1988                                         break
1989                                 if el.name is 'a' and el.namespace is NS_HTML
1990                                         found = el
1991                         if found?
1992                                 parse_error()
1993                                 adoption_agency 'a'
1994                                 for el, i in afe
1995                                         if el is found
1996                                                 afe.splice i, 1
1997                                 for el, i in open_els
1998                                         if el is found
1999                                                 open_els.splice i, 1
2000                         reconstruct_afe()
2001                         el = insert_html_element t
2002                         afe_push el
2003                         return
2004                 if t.type is TYPE_START_TAG and (t.name is 'b' or t.name is 'big' or t.name is 'code' or t.name is 'em' or t.name is 'font' or t.name is 'i' or t.name is 's' or t.name is 'small' or t.name is 'strike' or t.name is 'strong' or t.name is 'tt' or t.name is 'u')
2005                         reconstruct_afe()
2006                         el = insert_html_element t
2007                         afe_push el
2008                         return
2009                 if t.type is TYPE_START_TAG and t.name is 'nobr'
2010                         reconstruct_afe()
2011                         el = insert_html_element t
2012                         afe_push el
2013                         return
2014                 if t.type is TYPE_END_TAG and (t.name is 'a' or t.name is 'b' or t.name is 'big' or t.name is 'code' or t.name is 'em' or t.name is 'font' or t.name is 'i' or t.name is 'nobr' or t.name is 's' or t.name is 'small' or t.name is 'strike' or t.name is 'strong' or t.name is 'tt' or t.name is 'u')
2015                         adoption_agency t.name
2016                         return
2017                 if t.type is TYPE_START_TAG and (t.name is 'applet' or t.name is 'marquee' or t.name is 'object')
2018                         reconstruct_afe()
2019                         insert_html_element t
2020                         afe_push_marker()
2021                         flag_frameset_ok = false
2022                         return
2023                 if t.type is TYPE_END_TAG and (t.name is 'applet' or t.name is 'marquee' or t.name is 'object')
2024                         unless is_in_scope t.name, NS_HTML
2025                                 parse_error()
2026                                 return
2027                         generate_implied_end_tags()
2028                         if open_els[0].name isnt t.name or open_els[0].namespace isnt NS_HTML
2029                                 parse_error()
2030                         loop
2031                                 el = open_els.shift()
2032                                 if el.name is t.name and el.namespace is NS_HTML
2033                                         break
2034                         clear_afe_to_marker()
2035                         return
2036                 if t.type is TYPE_START_TAG and t.name is 'table'
2037                         close_p_if_in_button_scope() # fixfull quirksmode thing
2038                         insert_html_element t
2039                         flag_frameset_ok = false
2040                         ins_mode = ins_mode_in_table
2041                         return
2042                 if t.type is TYPE_END_TAG and t.name is 'br'
2043                         parse_error()
2044                         t.type is TYPE_START_TAG
2045                         # fall through
2046                 if t.type is TYPE_START_TAG and (t.name is 'area' or t.name is 'br' or t.name is 'embed' or t.name is 'img' or t.name is 'keygen' or t.name is 'wbr')
2047                         reconstruct_afe()
2048                         insert_html_element t
2049                         open_els.shift()
2050                         t.acknowledge_self_closing()
2051                         flag_frameset_ok = false
2052                         return
2053                 if t.type is TYPE_START_TAG and t.name is 'input'
2054                         reconstruct_afe()
2055                         insert_html_element t
2056                         open_els.shift()
2057                         t.acknowledge_self_closing()
2058                         unless is_input_hidden_tok t
2059                                 flag_frameset_ok = false
2060                         return
2061                 if t.type is TYPE_START_TAG and (t.name is 'param' or t.name is 'source' or t.name is 'track')
2062                         insert_html_element t
2063                         open_els.shift()
2064                         t.acknowledge_self_closing()
2065                         return
2066                 if t.type is TYPE_START_TAG and t.name is 'hr'
2067                         close_p_if_in_button_scope()
2068                         insert_html_element t
2069                         open_els.shift()
2070                         t.acknowledge_self_closing()
2071                         flag_frameset_ok = false
2072                         return
2073                 if t.type is TYPE_START_TAG and t.name is 'image'
2074                         parse_error()
2075                         t.name = 'img'
2076                         process_token t
2077                         return
2078                 if t.type is TYPE_START_TAG and t.name is 'isindex'
2079                         parse_error()
2080                         if template_tag_is_open() is false and form_element_pointer isnt null
2081                                 return
2082                         t.acknowledge_self_closing()
2083                         flag_frameset_ok = false
2084                         close_p_if_in_button_scope()
2085                         el = insert_html_element new_open_tag 'form'
2086                         unless template_tag_is_open()
2087                                 form_element_pointer = el
2088                         for a in t.attrs_a
2089                                 if a[0] is 'action'
2090                                         el.attrs['action'] = a[1]
2091                                         break
2092                         insert_html_element new_open_tag 'hr'
2093                         open_els.shift()
2094                         reconstruct_afe()
2095                         insert_html_element new_open_tag 'label'
2096                         # note: this is a little out-of-spec-order so we only have to scan t.attrs_a once
2097                         input_el = new_open_tag 'input'
2098                         prompt = null
2099                         for a in t.attrs_a
2100                                 if a[0] is 'prompt'
2101                                         prompt = a[1]
2102                                 if a[0] isnt 'name' and a[0] isnt 'action' and a[0] isnt 'prompt'
2103                                         input_el.attrs_a.push [a[0], a[1]]
2104                         input_el.attrs_a.push ['name', 'isindex']
2105                         # fixfull this next bit is in english... internationalize?
2106                         prompt ?= "This is a searchable index. Enter search keywords: "
2107                         insert_character new_character_token prompt # fixfull split
2108                         # TODO submit typo "balue" in spec
2109                         insert_html_element input_el
2110                         open_els.shift()
2111                         # insert_character '' # you can put chars here if promt attr missing
2112                         open_els.shift()
2113                         insert_html_element new_open_tag 'hr'
2114                         open_els.shift()
2115                         open_els.shift()
2116                         unless template_tag_is_open()
2117                                 form_element_pointer = null
2118                         return
2119                 if t.type is TYPE_START_TAG and t.name is 'textarea'
2120                         insert_html_element t
2121                         if txt.charAt(cur) is "\u000a" # FIXME check for crlf?
2122                                 cur += 1
2123                         tok_state = tok_state_rcdata
2124                         original_ins_mode = ins_mode
2125                         flag_frameset_ok = false
2126                         ins_mode = ins_mode_text
2127                         return
2128                 if t.type is TYPE_START_TAG and t.name is 'xmp'
2129                         close_p_if_in_button_scope()
2130                         reconstruct_afe()
2131                         flag_frameset_ok = false
2132                         parse_generic_raw_text t
2133                         return
2134                 if t.type is TYPE_START_TAG and t.name is 'iframe'
2135                         flag_frameset_ok = false
2136                         parse_generic_raw_text t
2137                         return
2138                 if t.type is TYPE_START_TAG and (t.name is 'noembed' or (t.name is 'noscript' and flag_scripting))
2139                         parse_generic_raw_text t
2140                         return
2141                 if t.type is TYPE_START_TAG and t.name is 'select'
2142                         reconstruct_afe()
2143                         insert_html_element t
2144                         flag_frameset_ok = false
2145                         if ins_mode is ins_mode_in_table or ins_mode is ins_mode_in_caption or ins_mode is ins_mode_in_table_body or ins_mode is ins_mode_in_row or ins_mode is ins_mode_in_cell
2146                                 ins_mode = ins_mode_in_select_in_table
2147                         else
2148                                 ins_mode = ins_mode_in_select
2149                         return
2150                 if t.type is TYPE_START_TAG and (t.name is 'optgroup' or t.name is 'option')
2151                         if open_els[0].name is 'option' and open_els[0].namespace is NS_HTML
2152                                 open_els.shift()
2153                         reconstruct_afe()
2154                         insert_html_element t
2155                         return
2156 # this comment block implements the W3C spec
2157 #               if t.type is TYPE_START_TAG and (t.name is 'rb' or t.name is 'rp' or t.name is 'rtc')
2158 #                       if is_in_scope 'ruby', NS_HTML
2159 #                               generate_implied_end_tags()
2160 #                               unless open_els[0].name is 'ruby' and open_els[0].namespace is NS_HTML
2161 #                                       parse_error()
2162 #                       insert_html_element t
2163 #                       return
2164 #               if t.type is TYPE_START_TAG and t.name is 'rt'
2165 #                       if is_in_scope 'ruby', NS_HTML
2166 #                               generate_implied_end_tags 'rtc' # arg is exception
2167 #                               unless (open_els[0].name is 'ruby' or open_els[0].name is 'rtc') and open_els[0].namespace is NS_HTML
2168 #                                       parse_error()
2169 #                       insert_html_element t
2170 #                       return
2171 # below implements the WHATWG spec https://html.spec.whatwg.org/multipage/syntax.html#parsing-main-inbody
2172                 if t.type is TYPE_START_TAG and (t.name is 'rb' or t.name is 'rtc')
2173                         if is_in_scope 'ruby', NS_HTML
2174                                 generate_implied_end_tags()
2175                                 unless open_els[0].name is 'ruby' and open_els[0].namespace is NS_HTML
2176                                         parse_error()
2177                         insert_html_element t
2178                         return
2179                 if t.type is TYPE_START_TAG and (t.name is 'rp' or t.name is 'rt')
2180                         if is_in_scope 'ruby', NS_HTML
2181                                 generate_implied_end_tags 'rtc'
2182                                 unless (open_els[0].name is 'ruby' or open_els[0].name is 'rtc') and open_els[0].namespace is NS_HTML
2183                                         parse_error()
2184                         insert_html_element t
2185                         return
2186 # end WHATWG chunk
2187                 if t.type is TYPE_START_TAG and t.name is 'math'
2188                         reconstruct_afe()
2189                         adjust_mathml_attributes t
2190                         adjust_foreign_attributes t
2191                         insert_foreign_element t, NS_MATHML
2192                         if t.flag 'self-closing'
2193                                 open_els.shift()
2194                                 t.acknowledge_self_closing()
2195                         return
2196                 if t.type is TYPE_START_TAG and t.name is 'svg'
2197                         reconstruct_afe()
2198                         adjust_svg_attributes t
2199                         adjust_foreign_attributes t
2200                         insert_foreign_element t, NS_SVG
2201                         if t.flag 'self-closing'
2202                                 open_els.shift()
2203                                 t.acknowledge_self_closing()
2204                         return
2205                 if t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'frame' or t.name is 'head' or t.name is 'tbody' or t.name is 'td' or t.name is 'tfoot' or t.name is 'th' or t.name is 'thead' or t.name is 'tr')
2206                         parse_error()
2207                         return
2208                 if t.type is TYPE_START_TAG # any other start tag
2209                         reconstruct_afe()
2210                         insert_html_element t
2211                         return
2212                 if t.type is TYPE_END_TAG # any other end tag
2213                         in_body_any_other_end_tag t.name
2214                         return
2215                 return
2216
2217         # 8.2.5.4.8 http://www.w3.org/TR/html5/syntax.html#parsing-main-incdata
2218         ins_mode_text = (t) ->
2219                 if t.type is TYPE_TEXT
2220                         insert_character t
2221                         return
2222                 if t.type is TYPE_EOF
2223                         parse_error()
2224                         if open_els[0].name is 'script' and open_els[0].namespace is NS_HTML
2225                                 open_els[0].flag 'already started', true
2226                         open_els.shift()
2227                         ins_mode = original_ins_mode
2228                         process_token t
2229                         return
2230                 if t.type is TYPE_END_TAG and t.name is 'script'
2231                         open_els.shift()
2232                         ins_mode = original_ins_mode
2233                         # fixfull the spec seems to assume that I'm going to run the script
2234                         # http://www.w3.org/TR/html5/syntax.html#scriptEndTag
2235                         return
2236                 if t.type is TYPE_END_TAG
2237                         open_els.shift()
2238                         ins_mode = original_ins_mode
2239                         return
2240                 console.log 'warning: end of ins_mode_text reached'
2241
2242         # the functions below implement the tokenizer stats described here:
2243         # http://www.w3.org/TR/html5/syntax.html#tokenization
2244
2245         # 8.2.5.4.9 http://www.w3.org/TR/html5/syntax.html#parsing-main-intable
2246         ins_mode_in_table_else = (t) ->
2247                 parse_error()
2248                 flag_foster_parenting = true
2249                 ins_mode_in_body t
2250                 flag_foster_parenting = false
2251                 return
2252         ins_mode_in_table = (t) ->
2253                 switch t.type
2254                         when TYPE_TEXT
2255                                 if (open_els[0].name is 'table' or open_els[0].name is 'tbody' or open_els[0].name is 'tfoot' or open_els[0].name is 'thead' or open_els[0].name is 'tr') and open_els[0].namespace is NS_HTML
2256                                         pending_table_character_tokens = []
2257                                         original_ins_mode = ins_mode
2258                                         ins_mode = ins_mode_in_table_text
2259                                         process_token t
2260                                 else
2261                                         ins_mode_in_table_else t
2262                         when TYPE_COMMENT
2263                                 insert_comment t
2264                         when TYPE_DOCTYPE
2265                                 parse_error()
2266                         when TYPE_START_TAG
2267                                 switch t.name
2268                                         when 'caption'
2269                                                 clear_stack_to_table_context()
2270                                                 afe_push_marker()
2271                                                 insert_html_element t
2272                                                 ins_mode = ins_mode_in_caption
2273                                         when 'colgroup'
2274                                                 clear_stack_to_table_context()
2275                                                 insert_html_element t
2276                                                 ins_mode = ins_mode_in_column_group
2277                                         when 'col'
2278                                                 clear_stack_to_table_context()
2279                                                 insert_html_element new_open_tag 'colgroup'
2280                                                 ins_mode = ins_mode_in_column_group
2281                                                 process_token t
2282                                         when 'tbody', 'tfoot', 'thead'
2283                                                 clear_stack_to_table_context()
2284                                                 insert_html_element t
2285                                                 ins_mode = ins_mode_in_table_body
2286                                         when 'td', 'th', 'tr'
2287                                                 clear_stack_to_table_context()
2288                                                 insert_html_element new_open_tag 'tbody'
2289                                                 ins_mode = ins_mode_in_table_body
2290                                                 process_token t
2291                                         when 'table'
2292                                                 parse_error()
2293                                                 if is_in_table_scope 'table', NS_HTML
2294                                                         loop
2295                                                                 el = open_els.shift()
2296                                                                 if el.name is 'table' and el.namespace is NS_HTML
2297                                                                         break
2298                                                         reset_ins_mode()
2299                                                         process_token t
2300                                         when 'style', 'script', 'template'
2301                                                 ins_mode_in_head t
2302                                         when 'input'
2303                                                 unless is_input_hidden_tok t
2304                                                         ins_mode_in_table_else t
2305                                                 else
2306                                                         parse_error()
2307                                                         el = insert_html_element t
2308                                                         open_els.shift()
2309                                                         t.acknowledge_self_closing()
2310                                         when 'form'
2311                                                 parse_error()
2312                                                 if form_element_pointer?
2313                                                         return
2314                                                 if template_tag_is_open()
2315                                                         return
2316                                                 form_element_pointer = insert_html_element t
2317                                                 open_els.shift()
2318                                         else
2319                                                 ins_mode_in_table_else t
2320                         when TYPE_END_TAG
2321                                 switch t.name
2322                                         when 'table'
2323                                                 if is_in_table_scope 'table', NS_HTML
2324                                                         loop
2325                                                                 el = open_els.shift()
2326                                                                 if el.name is 'table' and el.namespace is NS_HTML
2327                                                                         break
2328                                                         reset_ins_mode()
2329                                                 else
2330                                                         parse_error()
2331                                         when 'body', 'caption', 'col', 'colgroup', 'html', 'tbody', 'td', 'tfoot', 'th', 'thead', 'tr'
2332                                                 parse_error()
2333                                         when 'template'
2334                                                 ins_mode_in_head t
2335                                         else
2336                                                 ins_mode_in_table_else t
2337                         when TYPE_EOF
2338                                 ins_mode_in_body t
2339                         else
2340                                 ins_mode_in_table_else t
2341
2342
2343         # 8.2.5.4.10 http://www.w3.org/TR/html5/syntax.html#parsing-main-intabletext
2344         ins_mode_in_table_text = (t) ->
2345                 if t.type is TYPE_TEXT and t.text is "\u0000"
2346                         # from javascript?
2347                         parse_error()
2348                         return
2349                 if t.type is TYPE_TEXT
2350                         pending_table_character_tokens.push t
2351                         return
2352                 # Anything else
2353                 all_space = true
2354                 for old in pending_table_character_tokens
2355                         unless is_space_tok old
2356                                 all_space = false
2357                                 break
2358                 if all_space
2359                         for old in pending_table_character_tokens
2360                                 insert_character old
2361                 else
2362                         for old in pending_table_character_tokens
2363                                 ins_mode_in_table_else old
2364                 pending_table_character_tokens = []
2365                 ins_mode = original_ins_mode
2366                 process_token t
2367
2368         # 8.2.5.4.11 http://www.w3.org/TR/html5/syntax.html#parsing-main-incaption
2369         ins_mode_in_caption = (t) ->
2370                 if t.type is TYPE_END_TAG and t.name is 'caption'
2371                         if is_in_table_scope 'caption', NS_HTML
2372                                 generate_implied_end_tags()
2373                                 if open_els[0].name isnt 'caption'
2374                                         parse_error()
2375                                 loop
2376                                         el = open_els.shift()
2377                                         if el.name is 'caption' and el.namespace is NS_HTML
2378                                                 break
2379                                 clear_afe_to_marker()
2380                                 ins_mode = ins_mode_in_table
2381                         else
2382                                 parse_error()
2383                                 # fragment case
2384                         return
2385                 if (t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'tbody' or t.name is 'td' or t.name is 'tfoot' or t.name is 'th' or t.name is 'thead' or t.name is 'tr')) or t.type is TYPE_END_TAG and t.name is 'table'
2386                         parse_error()
2387                         if is_in_table_scope 'caption', NS_HTML
2388                                 loop
2389                                         el = open_els.shift()
2390                                         if el.name is 'caption' and el.namespace is NS_HTML
2391                                                 break
2392                                 clear_afe_to_marker()
2393                                 ins_mode = ins_mode_in_table
2394                                 process_token t
2395                         # else fragment case
2396                         return
2397                 if t.type is TYPE_END_TAG and (t.name is 'body' or t.name is 'col' or t.name is 'colgroup' or t.name is 'html' or t.name is 'tbody' or t.name is 'td' or t.name is 'tfoot' or t.name is 'th' or t.name is 'thead' or t.name is 'tr')
2398                         parse_error()
2399                         return
2400                 # Anything else
2401                 ins_mode_in_body t
2402
2403         # 8.2.5.4.12 http://www.w3.org/TR/html5/syntax.html#parsing-main-incolgroup
2404         ins_mode_in_column_group = (t) ->
2405                 if is_space_tok t
2406                         insert_character t
2407                         return
2408                 if t.type is TYPE_COMMENT
2409                         insert_comment t
2410                         return
2411                 if t.type is TYPE_DOCTYPE
2412                         parse_error()
2413                         return
2414                 if t.type is TYPE_START_TAG and t.name is 'html'
2415                         ins_mode_in_body t
2416                         return
2417                 if t.type is TYPE_START_TAG and t.name is 'col'
2418                         el = insert_html_element t
2419                         open_els.shift()
2420                         t.acknowledge_self_closing()
2421                         return
2422                 if t.type is TYPE_END_TAG and t.name is 'colgroup'
2423                         if open_els[0].name is 'colgroup' and open_els.namespace is NS_HTML
2424                                 open_els.shift()
2425                                 ins_mode = ins_mode_in_table
2426                         else
2427                                 parse_error()
2428                         return
2429                 if t.type is TYPE_END_TAG and t.name is 'col'
2430                         parse_error()
2431                         return
2432                 if (t.type is TYPE_START_TAG or t.type is TYPE_END_TAG) and t.name is 'template'
2433                         ins_mode_in_head t
2434                         return
2435                 if t.type is TYPE_EOF
2436                         ins_mode_in_body t
2437                         return
2438                 # Anything else
2439                 if open_els[0].name isnt 'colgroup'
2440                         parse_error()
2441                         return
2442                 open_els.shift()
2443                 ins_mode = ins_mode_in_table
2444                 process_token t
2445                 return
2446
2447         # 8.2.5.4.13 http://www.w3.org/TR/html5/syntax.html#parsing-main-intbody
2448         ins_mode_in_table_body = (t) ->
2449                 if t.type is TYPE_START_TAG and t.name is 'tr'
2450                         clear_stack_to_table_body_context()
2451                         insert_html_element t
2452                         ins_mode = ins_mode_in_row
2453                         return
2454                 if t.type is TYPE_START_TAG and (t.name is 'th' or t.name is 'td')
2455                         parse_error()
2456                         clear_stack_to_table_body_context()
2457                         insert_html_element new_open_tag 'tr'
2458                         ins_mode = ins_mode_in_row
2459                         process_token t
2460                         return
2461                 if t.type is TYPE_END_TAG and (t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead')
2462                         unless is_in_table_scope t.name, NS_HTML
2463                                 parse_error()
2464                                 return
2465                         clear_stack_to_table_body_context()
2466                         open_els.shift()
2467                         ins_mode = ins_mode_in_table
2468                         return
2469                 if (t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead')) or (t.type is TYPE_END_TAG and t.name is 'table')
2470                         has = false
2471                         for el in open_els
2472                                 if el.namespace is NS_HTML and (el.name is 'tbody' or el.name is 'tfoot' or el.name is 'thead')
2473                                         has = true
2474                                         break
2475                                 if table_scopers[el.name] is el.namespace
2476                                         break
2477                         if !has
2478                                 parse_error()
2479                                 return
2480                         clear_stack_to_table_body_context()
2481                         open_els.shift()
2482                         ins_mode = ins_mode_in_table
2483                         process_token t
2484                         return
2485                 if t.type is TYPE_END_TAG and (t.name is 'body' or t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'html' or t.name is 'td' or t.name is 'th' or t.name is 'tr')
2486                         parse_error()
2487                         return
2488                 # Anything else
2489                 ins_mode_in_table t
2490
2491         # 8.2.5.4.14 http://www.w3.org/TR/html5/syntax.html#parsing-main-intr
2492         ins_mode_in_row = (t) ->
2493                 if t.type is TYPE_START_TAG and (t.name is 'th' or t.name is 'td')
2494                         clear_stack_to_table_row_context()
2495                         insert_html_element t
2496                         ins_mode = ins_mode_in_cell
2497                         afe_push_marker()
2498                         return
2499                 if t.type is TYPE_END_TAG and t.name is 'tr'
2500                         if is_in_table_scope 'tr', NS_HTML
2501                                 clear_stack_to_table_row_context()
2502                                 open_els.shift()
2503                                 ins_mode = ins_mode_in_table_body
2504                         else
2505                                 parse_error()
2506                         return
2507                 if (t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead' or t.name is 'tr')) or t.type is TYPE_END_TAG and t.name is 'table'
2508                         if is_in_table_scope 'tr', NS_HTML
2509                                 clear_stack_to_table_row_context()
2510                                 open_els.shift()
2511                                 ins_mode = ins_mode_in_table_body
2512                                 process_token t
2513                         else
2514                                 parse_error()
2515                         return
2516                 if t.type is TYPE_END_TAG and (t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead')
2517                         if is_in_table_scope t.name, NS_HTML
2518                                 if is_in_table_scope 'tr', NS_HTML
2519                                         clear_stack_to_table_row_context()
2520                                         open_els.shift()
2521                                         ins_mode = ins_mode_in_table_body
2522                                         process_token t
2523                         else
2524                                 parse_error()
2525                         return
2526                 if t.type is TYPE_END_TAG and (t.name is 'body' or t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'html' or t.name is 'td' or t.name is 'th')
2527                         parse_error()
2528                         return
2529                 # Anything else
2530                 ins_mode_in_table t
2531
2532         # http://www.w3.org/TR/html5/syntax.html#close-the-cell
2533         close_the_cell = ->
2534                 generate_implied_end_tags()
2535                 unless (open_els[0].name is 'td' or open_els[0] is 'th') and open_els[0].namespace is NS_HTML
2536                         parse_error()
2537                 loop
2538                         el = open_els.shift()
2539                         if el.namespace is NS_HTML and (el.name is 'td' or el.name is 'th')
2540                                 break
2541                 clear_afe_to_marker()
2542                 ins_mode = ins_mode_in_row
2543
2544         # 8.2.5.4.15 http://www.w3.org/TR/html5/syntax.html#parsing-main-intd
2545         ins_mode_in_cell = (t) ->
2546                 if t.type is TYPE_END_TAG and (t.name is 'td' or t.name is 'th')
2547                         if is_in_table_scope t.name, NS_HTML
2548                                 generate_implied_end_tags()
2549                                 unless (open_els[0].name is t.name) and open_els[0].namespace is NS_HTML
2550                                         parse_error()
2551                                 loop
2552                                         el = open_els.shift()
2553                                         if el.name is t.name and el.namespace is NS_HTML
2554                                                 break
2555                                 clear_afe_to_marker()
2556                                 ins_mode = ins_mode_in_row
2557                         else
2558                                 parse_error()
2559                         return
2560                 if t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'tbody' or t.name is 'td' or t.name is 'tfoot' or t.name is 'th' or t.name is 'thead' or t.name is 'tr')
2561                         has = false
2562                         for el in open_els
2563                                 if el.namespace is NS_HTML and (el.name is 'td' or el.name is 'th')
2564                                         has = true
2565                                         break
2566                                 if table_scopers[el.name] is el.namespace
2567                                         break
2568                         if !has
2569                                 parse_error()
2570                                 return
2571                         close_the_cell()
2572                         process_token t
2573                         return
2574                 if t.type is TYPE_END_TAG and (t.name is 'body' or t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'html')
2575                         parse_error()
2576                         return
2577                 if t.type is TYPE_END_TAG and (t.name is 'table' or t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead' or t.name is 'tr')
2578                         if is_in_table_scope t.name, NS_HTML
2579                                 close_the_cell()
2580                                 process_token t
2581                         else
2582                                 parse_error()
2583                         return
2584                 # Anything Else
2585                 ins_mode_in_body t
2586
2587         # 8.2.5.4.16 http://www.w3.org/TR/html5/syntax.html#parsing-main-inselect
2588         ins_mode_in_select = (t) ->
2589                 if t.type is TYPE_TEXT and t.text is "\u0000"
2590                         parse_error()
2591                         return
2592                 if t.type is TYPE_TEXT
2593                         insert_character t
2594                         return
2595                 if t.type is TYPE_COMMENT
2596                         insert_comment t
2597                         return
2598                 if t.type is TYPE_DOCTYPE
2599                         parse_error()
2600                         return
2601                 if t.type is TYPE_START_TAG and t.name is 'html'
2602                         ins_mode_in_body t
2603                         return
2604                 if t.type is TYPE_START_TAG and t.name is 'option'
2605                         if open_els[0].name is 'option' and open_els[0].namespace is NS_HTML
2606                                 open_els.shift()
2607                         insert_html_element t
2608                         return
2609                 if t.type is TYPE_START_TAG and t.name is 'optgroup'
2610                         if open_els[0].name is 'option' and open_els[0].namespace is NS_HTML
2611                                 open_els.shift()
2612                         if open_els[0].name is 'optgroup' and open_els[0].namespace is NS_HTML
2613                                 open_els.shift()
2614                         insert_html_element t
2615                         return
2616                 if t.type is TYPE_END_TAG and t.name is 'optgroup'
2617                         if open_els[0].name is 'option' and open_els[0].namespace in NS_HTML
2618                                 if open_els[1].name is 'optgroup' and open_els[0].namespace is NS_HTML
2619                                         open_els.shift()
2620                         if open_els[0].name is 'optgroup' and open_els[0].namespace is NS_HTML
2621                                 open_els.shift()
2622                         else
2623                                 parse_error()
2624                         return
2625                 if t.type is TYPE_END_TAG and t.name is 'option'
2626                         if open_els[0].name is 'option' and open_els[0].namespace is NS_HTML
2627                                 open_els.shift()
2628                         else
2629                                 parse_error()
2630                         return
2631                 if t.type is TYPE_END_TAG and t.name is 'select'
2632                         if is_in_select_scope 'select', NS_HTML
2633                                 loop
2634                                         el = open_els.shift()
2635                                         if el.name is 'select' and el.namespace is NS_HTML
2636                                                 break
2637                                 reset_ins_mode()
2638                         else
2639                                 parse_error()
2640                         return
2641                 if t.type is TYPE_START_TAG and t.name is 'select'
2642                         parse_error()
2643                         loop
2644                                 el = open_els.shift()
2645                                 if el.name is 'select' and el.namespace is NS_HTML
2646                                         break
2647                         reset_ins_mode()
2648                         # spec says that this is the same as </select> but it doesn't say
2649                         # to check scope first
2650                         return
2651                 if t.type is TYPE_START_TAG and (t.name is 'input' or t.name is 'keygen' or t.name is 'textarea')
2652                         parse_error()
2653                         if is_in_select_scope 'select', NS_HTML
2654                                 return
2655                         loop
2656                                 el = open_els.shift()
2657                                 if el.name is 'select' and el.namespace is NS_HTML
2658                                         break
2659                         reset_ins_mode()
2660                         process_token t
2661                         return
2662                 if t.type is TYPE_START_TAG and (t.name is 'script' or t.name is 'template')
2663                         ins_mode_in_head t
2664                         return
2665                 if t.type is TYPE_EOF
2666                         ins_mode_in_body t
2667                         return
2668                 # Anything else
2669                 parse_error()
2670                 return
2671
2672         # 8.2.5.4.17 http://www.w3.org/TR/html5/syntax.html#parsing-main-inselectintable
2673         ins_mode_in_select_in_table = (t) ->
2674                 if t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'table' or t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead' or t.name is 'tr' or t.name is 'td' or t.name is 'th')
2675                         parse_error()
2676                         loop
2677                                 el = open_els.shift()
2678                                 if el.name is 'select' and el.namespace is NS_HTML
2679                                         break
2680                         reset_ins_mode()
2681                         process_token t
2682                         return
2683                 if t.type is TYPE_END_TAG and (t.name is 'caption' or t.name is 'table' or t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead' or t.name is 'tr' or t.name is 'td' or t.name is 'th')
2684                         parse_error()
2685                         unless is_in_table_scope t.name, NS_HTML
2686                                 return
2687                         loop
2688                                 el = open_els.shift()
2689                                 if el.name is 'select' and el.namespace is NS_HTML
2690                                         break
2691                         reset_ins_mode()
2692                         process_token t
2693                         return
2694                 # Anything else
2695                 ins_mode_in_select t
2696                 return
2697
2698         # 8.2.5.4.18 http://www.w3.org/TR/html5/syntax.html#parsing-main-intemplate
2699         ins_mode_in_template = (t) ->
2700                 if t.type is TYPE_TEXT or t.type is TYPE_COMMENT or t.type is TYPE_DOCTYPE
2701                         ins_mode_in_body t
2702                         return
2703                 if (t.type is TYPE_START_TAG and (t.name is 'base' or t.name is 'basefont' or t.name is 'bgsound' or t.name is 'link' or t.name is 'meta' or t.name is 'noframes' or t.name is 'script' or t.name is 'style' or t.name is 'template' or t.name is 'title')) or (t.type is TYPE_END_TAG and t.name is 'template')
2704                         ins_mode_in_head t
2705                         return
2706                 if t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'colgroup' or t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead')
2707                         template_ins_modes.shift()
2708                         template_ins_modes.unshift ins_mode_in_table
2709                         ins_mode = ins_mode_in_table
2710                         process_token t
2711                         return
2712                 if t.type is TYPE_START_TAG and t.name is 'col'
2713                         template_ins_modes.shift()
2714                         template_ins_modes.unshift ins_mode_in_column_group
2715                         ins_mode = ins_mode_in_column_group
2716                         process_token t
2717                         return
2718                 if t.type is TYPE_START_TAG and t.name is 'tr'
2719                         template_ins_modes.shift()
2720                         template_ins_modes.unshift ins_mode_in_table_body
2721                         ins_mode = ins_mode_in_table_body
2722                         process_token t
2723                         return
2724                 if t.type is TYPE_START_TAG and (t.name is 'td' or t.name is 'th')
2725                         template_ins_modes.shift()
2726                         template_ins_modes.unshift ins_mode_in_row
2727                         ins_mode = ins_mode_in_row
2728                         process_token t
2729                         return
2730                 if t.type is TYPE_START_TAG
2731                         template_ins_modes.shift()
2732                         template_ins_modes.unshift ins_mode_in_body
2733                         ins_mode = ins_mode_in_body
2734                         process_token t
2735                         return
2736                 if t.type is TYPE_END_TAG
2737                         parse_error()
2738                         return
2739                 if t.type is TYPE_EOF
2740                         unless template_tag_is_open()
2741                                 stop_parsing()
2742                                 return
2743                         parse_error()
2744                         loop
2745                                 el = open_els.shift()
2746                                 if el.name is 'template' and el.namespace is NS_HTML
2747                                         break
2748                         clear_afe_to_marker()
2749                         template_ins_modes.shift()
2750                         reset_ins_mode()
2751                         process_token t
2752
2753         # 8.2.5.4.19 http://www.w3.org/TR/html5/syntax.html#parsing-main-afterbody
2754         ins_mode_after_body = (t) ->
2755                 if is_space_tok t
2756                         ins_mode_in_body t
2757                         return
2758                 if t.type is TYPE_COMMENT
2759                         first = open_els[open_els.length - 1]
2760                         insert_comment t, [first, first.children.length]
2761                         return
2762                 if t.type is TYPE_DOCTYPE
2763                         parse_error()
2764                         return
2765                 if t.type is TYPE_START_TAG and t.name is 'html'
2766                         ins_mode_in_body t
2767                         return
2768                 if t.type is TYPE_END_TAG and t.name is 'html'
2769                         if flag_fragment_parsing
2770                                 parse_error()
2771                                 return
2772                         ins_mode = ins_mode_after_after_body
2773                         return
2774                 if t.type is TYPE_EOF
2775                         stop_parsing()
2776                         return
2777                 # Anything ELse
2778                 parse_error()
2779                 ins_mode = ins_mode_in_body
2780                 process_token t
2781
2782         # 8.2.5.4.20 http://www.w3.org/TR/html5/syntax.html#parsing-main-inframeset
2783         ins_mode_in_frameset = (t) ->
2784                 if is_space_tok t
2785                         insert_character t
2786                         return
2787                 if t.type is TYPE_COMMENT
2788                         insert_comment t
2789                         return
2790                 if t.type is TYPE_DOCTYPE
2791                         parse_error()
2792                         return
2793                 if t.type is TYPE_START_TAG and t.name is 'html'
2794                         ins_mode_in_body t
2795                         return
2796                 if t.type is TYPE_START_TAG and t.name is 'frameset'
2797                         insert_html_element t
2798                         return
2799                 if t.type is TYPE_END_TAG and t.name is 'frameset'
2800                         if open_els.length is 1
2801                                 parse_error()
2802                                 return # fragment case
2803                         open_els.shift()
2804                         if flag_fragment_parsing is false and open_els[0].name isnt 'frameset'
2805                                 ins_mode = ins_mode_after_frameset
2806                         return
2807                 if t.type is TYPE_START_TAG and t.name is 'frame'
2808                         insert_html_element t
2809                         open_els.shift()
2810                         t.acknowledge_self_closing()
2811                         return
2812                 if t.type is TYPE_START_TAG and t.name is 'noframes'
2813                         ins_mode_in_head t
2814                         return
2815                 if t.type is TYPE_EOF
2816                         if open_els.length isnt 1
2817                                 parse_error()
2818                         stop_parsing()
2819                         return
2820                 # Anything else
2821                 parse_error()
2822                 return
2823
2824         # 8.2.5.4.21 http://www.w3.org/TR/html5/syntax.html#parsing-main-afterframeset
2825         ins_mode_after_frameset = (t) ->
2826                 if is_space_tok t
2827                         insert_character t
2828                         return
2829                 if t.type is TYPE_COMMENT
2830                         insert_comment t
2831                         return
2832                 if t.type is TYPE_DOCTYPE
2833                         parse_error()
2834                         return
2835                 if t.type is TYPE_START_TAG and t.name is 'html'
2836                         ins_mode_in_body t
2837                         return
2838                 if t.type is TYPE_END_TAG and t.name is 'html'
2839                         ins_mode = ins_mode_after_after_frameset
2840                         return
2841                 if t.type is TYPE_START_TAG and t.name is 'noframes'
2842                         ins_mode_in_head t
2843                         return
2844                 if t.type is TYPE_EOF
2845                         stop_parsing()
2846                         return
2847                 # Anything else
2848                 parse_error()
2849                 return
2850
2851         # 8.2.5.4.22 http://www.w3.org/TR/html5/syntax.html#the-after-after-body-insertion-mode
2852         ins_mode_after_after_body = (t) ->
2853                 if t.type is TYPE_COMMENT
2854                         insert_comment t, [doc, doc.children.length]
2855                         return
2856                 if t.type is TYPE_DOCTYPE or is_space_tok(t) or (t.type is TYPE_START_TAG and t.name is 'html')
2857                         ins_mode_in_body t
2858                         return
2859                 if t.type is TYPE_EOF
2860                         stop_parsing()
2861                         return
2862                 # Anything else
2863                 parse_error()
2864                 ins_mode = ins_mode_in_body
2865                 process_token t
2866                 return
2867
2868         # 8.2.5.4.23 http://www.w3.org/TR/html5/syntax.html#the-after-after-frameset-insertion-mode
2869         ins_mode_after_after_frameset = (t) ->
2870                 if t.type is TYPE_COMMENT
2871                         insert_comment t, [doc, doc.children.length]
2872                         return
2873                 if t.type is TYPE_DOCTYPE or is_space_tok(t) or (t.type is TYPE_START_TAG and t.name is 'html')
2874                         ins_mode_in_body t
2875                         return
2876                 if t.type is TYPE_EOF
2877                         stop_parsing()
2878                         return
2879                 if t.type is TYPE_START_TAG and t.name is 'noframes'
2880                         ins_mode_in_head t
2881                         return
2882                 # Anything else
2883                 parse_error()
2884                 return
2885
2886         # 8.2.5.5 http://www.w3.org/TR/html5/syntax.html#parsing-main-inforeign
2887         has_color_face_or_size = (t) ->
2888                 for a in t.attrs_a
2889                         if a[0] is 'color' or a[0] is 'face' or a[0] is 'size'
2890                                 return true
2891                 return false
2892         in_foreign_content_end_script = ->
2893                 open_els.shift()
2894                 # fixfull
2895                 return
2896         in_foreign_content_other_start = (t) ->
2897                 acn = adjusted_current_node()
2898                 if acn.namespace is NS_MATHML
2899                         adjust_mathml_attributes t
2900                 if acn.namespace is NS_SVG and svg_name_fixes[t.name]?
2901                         t.name = svg_name_fixes[t.name]
2902                 if acn.namespace is NS_SVG
2903                         adjust_svg_attributes t
2904                 adjust_foreign_attributes t
2905                 insert_foreign_element t, acn.namespace
2906                 if t.flag 'self-closing'
2907                         if t.name is 'script'
2908                                 t.acknowledge_self_closing()
2909                                 in_foreign_content_end_script()
2910                                 # fixfull
2911                         else
2912                                 open_els.shift()
2913                                 t.acknowledge_self_closing()
2914                 return
2915         in_foreign_content = (t) ->
2916                 if t.type is TYPE_TEXT and t.text is "\u0000"
2917                         parse_error()
2918                         insert_character new_character_token "\ufffd"
2919                         return
2920                 if is_space_tok t
2921                         insert_character t
2922                         return
2923                 if t.type is TYPE_TEXT
2924                         flag_frameset_ok = false
2925                         insert_character t
2926                         return
2927                 if t.type is TYPE_COMMENT
2928                         insert_comment t
2929                         return
2930                 if t.type is TYPE_DOCTYPE
2931                         parse_error()
2932                         return
2933                 if t.type is TYPE_START_TAG and (t.name is 'b' or t.name is 'big' or t.name is 'blockquote' or t.name is 'body' or t.name is 'br' or t.name is 'center' or t.name is 'code' or t.name is 'dd' or t.name is 'div' or t.name is 'dl' or t.name is 'dt' or t.name is 'em' or t.name is 'embed' or t.name is 'h1' or t.name is 'h2' or t.name is 'h3' or t.name is 'h4' or t.name is 'h5' or t.name is 'h6' or t.name is 'head' or t.name is 'hr' or t.name is 'i' or t.name is 'img' or t.name is 'li' or t.name is 'listing' or t.name is 'main' or t.name is 'meta' or t.name is 'nobr' or t.name is 'ol' or t.name is 'p' or t.name is 'pre' or t.name is 'ruby' or t.name is 's' or t.name is 'small' or t.name is 'span' or t.name is 'strong' or t.name is 'strike' or t.name is 'sub' or t.name is 'sup' or t.name is 'table' or t.name is 'tt' or t.name is 'u' or t.name is 'ul' or t.name is 'var' or (t.name is 'font' and has_color_face_or_size(t)))
2934                         parse_error()
2935                         if flag_fragment_parsing
2936                                 in_foreign_content_other_start t
2937                                 return
2938                         loop # is this safe?
2939                                 open_els.shift()
2940                                 if is_mathml_text_integration_point(open_els[0]) or is_html_integration(open_els[0]) or open_els[0].namespace is NS_HTML
2941                                         break
2942                         process_token t
2943                         return
2944                 if t.type is TYPE_START_TAG
2945                         in_foreign_content_other_start t
2946                         return
2947                 if t.type is TYPE_END_TAG and t.name is 'script' and open_els[0].name is 'script' and open_els[0].namespace is NS_SVG
2948                         in_foreign_content_end_script()
2949                         return
2950                 if t.type is TYPE_END_TAG
2951                         i = 0
2952                         node = open_els[i]
2953                         if node.name.toLowerCase() isnt t.name
2954                                 parse_error()
2955                         loop
2956                                 if node is open_els[open_els.length - 1]
2957                                         return
2958                                 if node.name.toLowerCase() is t.name
2959                                         loop
2960                                                 el = open_els.shift()
2961                                                 if el is node
2962                                                         return
2963                                 i += 1
2964                                 node = open_els[i]
2965                                 if node.namespace is NS_HTML
2966                                         break
2967                         ins_mode t # explicitly call HTML insertion mode
2968
2969
2970         # 8.2.4.1 http://www.w3.org/TR/html5/syntax.html#data-state
2971         tok_state_data = ->
2972                 switch c = txt.charAt(cur++)
2973                         when '&'
2974                                 return new_text_node parse_character_reference()
2975                         when '<'
2976                                 tok_state = tok_state_tag_open
2977                         when "\u0000"
2978                                 parse_error()
2979                                 return new_text_node "\ufffd"
2980                         when '' # EOF
2981                                 return new_eof_token()
2982                         else
2983                                 return new_text_node c
2984                 return null
2985
2986         # 8.2.4.2 http://www.w3.org/TR/html5/syntax.html#character-reference-in-data-state
2987         # not needed: tok_state_character_reference_in_data = ->
2988         # just call parse_character_reference()
2989
2990         # 8.2.4.3 http://www.w3.org/TR/html5/syntax.html#rcdata-state
2991         tok_state_rcdata = ->
2992                 switch c = txt.charAt(cur++)
2993                         when '&'
2994                                 return new_text_node parse_character_reference()
2995                         when '<'
2996                                 tok_state = tok_state_rcdata_less_than_sign
2997                         when "\u0000"
2998                                 parse_error()
2999                                 return new_character_token "\ufffd"
3000                         when '' # EOF
3001                                 return new_eof_token()
3002                         else
3003                                 return new_character_token c
3004                 return null
3005
3006         # 8.2.4.4 http://www.w3.org/TR/html5/syntax.html#character-reference-in-rcdata-state
3007         # not needed: tok_state_character_reference_in_rcdata = ->
3008         # just call parse_character_reference()
3009
3010         # 8.2.4.5 http://www.w3.org/TR/html5/syntax.html#rawtext-state
3011         tok_state_rawtext = ->
3012                 switch c = txt.charAt(cur++)
3013                         when '<'
3014                                 tok_state = tok_state_rawtext_less_than_sign
3015                         when "\u0000"
3016                                 parse_error()
3017                                 return new_character_token "\ufffd"
3018                         when '' # EOF
3019                                 return new_eof_token()
3020                         else
3021                                 return new_character_token c
3022                 return null
3023
3024         # 8.2.4.6 http://www.w3.org/TR/html5/syntax.html#script-data-state
3025         tok_state_script_data = ->
3026                 switch c = txt.charAt(cur++)
3027                         when '<'
3028                                 tok_state = tok_state_script_data_less_than_sign
3029                         when "\u0000"
3030                                 parse_error()
3031                                 return new_character_token "\ufffd"
3032                         when '' # EOF
3033                                 return new_eof_token()
3034                         else
3035                                 return new_character_token c
3036                 return null
3037
3038         # 8.2.4.7 http://www.w3.org/TR/html5/syntax.html#plaintext-state
3039         tok_state_plaintext = ->
3040                 switch c = txt.charAt(cur++)
3041                         when "\u0000"
3042                                 parse_error()
3043                                 return new_character_token "\ufffd"
3044                         when '' # EOF
3045                                 return new_eof_token()
3046                         else
3047                                 return new_character_token c
3048                 return null
3049
3050
3051         # 8.2.4.8 http://www.w3.org/TR/html5/syntax.html#tag-open-state
3052         tok_state_tag_open = ->
3053                 c = txt.charAt(cur++)
3054                 if c is '!'
3055                         tok_state = tok_state_markup_declaration_open
3056                         return
3057                 if c is '/'
3058                         tok_state = tok_state_end_tag_open
3059                         return
3060                 if is_uc_alpha(c)
3061                         tok_cur_tag = new_open_tag c.toLowerCase()
3062                         tok_state = tok_state_tag_name
3063                         return
3064                 if is_lc_alpha(c)
3065                         tok_cur_tag = new_open_tag c
3066                         tok_state = tok_state_tag_name
3067                         return
3068                 if c is '?'
3069                         parse_error()
3070                         tok_cur_tag = new_comment_token '?' # FIXME right?
3071                         tok_state = tok_state_bogus_comment
3072                         return
3073                 # Anything else
3074                 parse_error()
3075                 tok_state = tok_state_data
3076                 cur -= 1 # we didn't parse/handle the char after <
3077                 return new_text_node '<'
3078
3079         # 8.2.4.9 http://www.w3.org/TR/html5/syntax.html#end-tag-open-state
3080         tok_state_end_tag_open = ->
3081                 c = txt.charAt(cur++)
3082                 if is_uc_alpha(c)
3083                         tok_cur_tag = new_end_tag c.toLowerCase()
3084                         tok_state = tok_state_tag_name
3085                         return
3086                 if is_lc_alpha(c)
3087                         tok_cur_tag = new_end_tag c
3088                         tok_state = tok_state_tag_name
3089                         return
3090                 if c is '>'
3091                         parse_error()
3092                         tok_state = tok_state_data
3093                         return
3094                 if c is '' # EOF
3095                         parse_error()
3096                         tok_state = tok_state_data
3097                         return new_text_node '</'
3098                 # Anything else
3099                 parse_error()
3100                 tok_cur_tag = new_comment_token c
3101                 tok_state = tok_state_bogus_comment
3102                 return null
3103
3104         # 8.2.4.10 http://www.w3.org/TR/html5/syntax.html#tag-name-state
3105         tok_state_tag_name = ->
3106                 switch c = txt.charAt(cur++)
3107                         when "\t", "\n", "\u000c", ' '
3108                                 tok_state = tok_state_before_attribute_name
3109                         when '/'
3110                                 tok_state = tok_state_self_closing_start_tag
3111                         when '>'
3112                                 tok_state = tok_state_data
3113                                 tmp = tok_cur_tag
3114                                 tok_cur_tag = null
3115                                 return tmp
3116                         when "\u0000"
3117                                 parse_error()
3118                                 tok_cur_tag.name += "\ufffd"
3119                         when '' # EOF
3120                                 parse_error()
3121                                 tok_state = tok_state_data
3122                         else
3123                                 if is_uc_alpha(c)
3124                                         tok_cur_tag.name += c.toLowerCase()
3125                                 else
3126                                         tok_cur_tag.name += c
3127                 return null
3128
3129         # 8.2.4.11 http://www.w3.org/TR/html5/syntax.html#rcdata-less-than-sign-state
3130         tok_state_rcdata_less_than_sign = ->
3131                 c = txt.charAt(cur++)
3132                 if c is '/'
3133                         temporary_buffer = ''
3134                         tok_state = tok_state_rcdata_end_tag_open
3135                         return null
3136                 # Anything else
3137                 tok_state = tok_state_rcdata
3138                 cur -= 1 # reconsume the input character
3139                 return new_character_token '<'
3140
3141         # 8.2.4.12 http://www.w3.org/TR/html5/syntax.html#rcdata-end-tag-open-state
3142         tok_state_rcdata_end_tag_open = ->
3143                 c = txt.charAt(cur++)
3144                 if is_uc_alpha(c)
3145                         tok_cur_tag = new_end_tag c.toLowerCase()
3146                         temporary_buffer += c
3147                         tok_state = tok_state_rcdata_end_tag_name
3148                         return null
3149                 if is_lc_alpha(c)
3150                         tok_cur_tag = new_end_tag c
3151                         temporary_buffer += c
3152                         tok_state = tok_state_rcdata_end_tag_name
3153                         return null
3154                 # Anything else
3155                 tok_state = tok_state_rcdata
3156                 cur -= 1 # reconsume the input character
3157                 return new_character_token "</" # fixfull separate these
3158
3159         # http://www.w3.org/TR/html5/syntax.html#appropriate-end-tag-token
3160         is_appropriate_end_tag = (t) ->
3161                 # spec says to check against "the tag name of the last start tag to
3162                 # have been emitted from this tokenizer", but this is only called from
3163                 # the various "raw" states, so it's hopefully ok to assume that
3164                 # open_els[0].name will work instead TODO: verify this after the script
3165                 # data states are implemented
3166                 debug_log "#{t.type}, #{t.name} open_els: #{serialize_els open_els, true, true}"
3167                 return t.type is TYPE_END_TAG and t.name is open_els[0].name
3168
3169         # 8.2.4.13 http://www.w3.org/TR/html5/syntax.html#rcdata-end-tag-name-state
3170         tok_state_rcdata_end_tag_name = ->
3171                 c = txt.charAt(cur++)
3172                 if c is "\t" or c is "\n" or c is "\u000c" or c is ' '
3173                         if is_appropriate_end_tag tok_cur_tag
3174                                 tok_state = tok_state_before_attribute_name
3175                                 return
3176                         # else fall through to "Anything else"
3177                 if c is '/'
3178                         if is_appropriate_end_tag tok_cur_tag
3179                                 tok_state = tok_state_self_closing_start_tag # FIXME spec typo?
3180                                 return
3181                         # else fall through to "Anything else"
3182                 if c is '>'
3183                         if is_appropriate_end_tag tok_cur_tag
3184                                 tok_state = tok_state_data
3185                                 return tok_cur_tag
3186                         # else fall through to "Anything else"
3187                 if is_uc_alpha(c)
3188                         tok_cur_tag.name += c.toLowerCase()
3189                         temporary_buffer += c
3190                         return null
3191                 if is_lc_alpha(c)
3192                         tok_cur_tag.name += c
3193                         temporary_buffer += c
3194                         return null
3195                 # Anything else
3196                 tok_state = tok_state_rcdata
3197                 cur -= 1 # reconsume the input character
3198                 return new_character_token '</' + temporary_buffer # fixfull separate these
3199
3200         # 8.2.4.14 http://www.w3.org/TR/html5/syntax.html#rawtext-less-than-sign-state
3201         tok_state_rawtext_less_than_sign = ->
3202                 c = txt.charAt(cur++)
3203                 if c is '/'
3204                         temporary_buffer = ''
3205                         tok_state = tok_state_rawtext_end_tag_open
3206                         return null
3207                 # Anything else
3208                 tok_state = tok_state_rawtext
3209                 cur -= 1 # reconsume the input character
3210                 return new_character_token '<'
3211
3212         # 8.2.4.15 http://www.w3.org/TR/html5/syntax.html#rawtext-end-tag-open-state
3213         tok_state_rawtext_end_tag_open = ->
3214                 c = txt.charAt(cur++)
3215                 if is_uc_alpha(c)
3216                         tok_cur_tag = new_end_tag c.toLowerCase()
3217                         temporary_buffer += c
3218                         tok_state = tok_state_rawtext_end_tag_name
3219                         return null
3220                 if is_lc_alpha(c)
3221                         tok_cur_tag = new_end_tag c
3222                         temporary_buffer += c
3223                         tok_state = tok_state_rawtext_end_tag_name
3224                         return null
3225                 # Anything else
3226                 tok_state = tok_state_rawtext
3227                 cur -= 1 # reconsume the input character
3228                 return new_character_token "</" # fixfull separate these
3229
3230         # 8.2.4.16 http://www.w3.org/TR/html5/syntax.html#rawtext-end-tag-name-state
3231         tok_state_rawtext_end_tag_name = ->
3232                 c = txt.charAt(cur++)
3233                 if c is "\t" or c is "\n" or c is "\u000c" or c is ' '
3234                         if is_appropriate_end_tag tok_cur_tag
3235                                 tok_state = tok_state_before_attribute_name
3236                                 return
3237                         # else fall through to "Anything else"
3238                 if c is '/'
3239                         if is_appropriate_end_tag tok_cur_tag
3240                                 tok_state = tok_state_self_closing_start_tag
3241                                 return
3242                         # else fall through to "Anything else"
3243                 if c is '>'
3244                         if is_appropriate_end_tag tok_cur_tag
3245                                 tok_state = tok_state_data
3246                                 return tok_cur_tag
3247                         # else fall through to "Anything else"
3248                 if is_uc_alpha(c)
3249                         tok_cur_tag.name += c.toLowerCase()
3250                         temporary_buffer += c
3251                         return null
3252                 if is_lc_alpha(c)
3253                         tok_cur_tag.name += c
3254                         temporary_buffer += c
3255                         return null
3256                 # Anything else
3257                 tok_state = tok_state_rawtext
3258                 cur -= 1 # reconsume the input character
3259                 return new_character_token '</' + temporary_buffer # fixfull separate these
3260
3261         # 8.2.4.17 http://www.w3.org/TR/html5/syntax.html#script-data-less-than-sign-state
3262         tok_state_script_data_less_than_sign = ->
3263                 c = txt.charAt(cur++)
3264                 if c is '/'
3265                         temporary_buffer = ''
3266                         tok_state = tok_state_script_data_end_tag_open
3267                         return
3268                 if c is '!'
3269                         tok_state = tok_state_script_data_escape_start
3270                         return new_character_token '<!' # fixfull split
3271                 # Anything else
3272                 tok_state = tok_state_script_data
3273                 cur -= 1 # Reconsume
3274                 return new_character_token '<'
3275
3276         # 8.2.4.18 http://www.w3.org/TR/html5/syntax.html#script-data-end-tag-open-state
3277         tok_state_script_data_end_tag_open = ->
3278                 c = txt.charAt(cur++)
3279                 if is_uc_alpha(c)
3280                         tok_cur_tag = new_end_tag c.toLowerCase()
3281                         temporary_buffer += c
3282                         tok_state = tok_state_script_data_end_tag_name
3283                         return
3284                 if is_lc_alpha(c)
3285                         tok_cur_tag = new_end_tag c
3286                         temporary_buffer += c
3287                         tok_state = tok_state_script_data_end_tag_name
3288                         return
3289                 # Anything else
3290                 tok_state = tok_state_script_data
3291                 cur -= 1 # Reconsume
3292                 return new_character_token '</'
3293
3294         # 8.2.4.19 http://www.w3.org/TR/html5/syntax.html#script-data-end-tag-open-state
3295         tok_state_script_data_end_tag_name = ->
3296                 c = txt.charAt(cur++)
3297                 if c is "\t" or c is "\n" or c is "\u000c" or c is ' '
3298                         if is_appropriate_end_tag tok_cur_tag
3299                                 tok_state = tok_state_before_attribute_name
3300                                 return
3301                         # fall through
3302                 if c is '/'
3303                         if is_appropriate_end_tag tok_cur_tag
3304                                 tok_state = tok_state_self_closing_start_tag
3305                                 return
3306                         # fall through
3307                 if c is '>'
3308                         if is_appropriate_end_tag tok_cur_tag
3309                                 tok_state = tok_state_data
3310                                 return tok_cur_tag
3311                         # fall through
3312                 if is_uc_alpha(c)
3313                         tok_cur_tag.name += c.toLowerCase()
3314                         temporary_buffer += c
3315                         return
3316                 if is_lc_alpha(c)
3317                         tok_cur_tag.name += c
3318                         temporary_buffer += c
3319                         return
3320                 # Anything else
3321                 tok_state = tok_state_script_data
3322                 cur -= 1 # Reconsume
3323                 return new_character_token "</#{temporary_buffer}" # fixfull split
3324
3325         # 8.2.4.20 http://www.w3.org/TR/html5/syntax.html#script-data-escape-start-state
3326         tok_state_script_data_escape_start = ->
3327                 c = txt.charAt(cur++)
3328                 if c is '-'
3329                         tok_state = tok_state_script_data_escape_start_dash
3330                         return new_character_token '-'
3331                 # Anything else
3332                 tok_state = tok_state_script_data
3333                 cur -= 1 # Reconsume
3334                 return
3335
3336         # 8.2.4.21 http://www.w3.org/TR/html5/syntax.html#script-data-escape-start-dash-state
3337         tok_state_script_data_escape_start_dash = ->
3338                 c = txt.charAt(cur++)
3339                 if c is '-'
3340                         tok_state = tok_state_script_data_escaped_dash_dash
3341                         return new_character_token '-'
3342                 # Anything else
3343                 tok_state = tok_state_script_data
3344                 cur -= 1 # Reconsume
3345                 return
3346
3347         # 8.2.4.22 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-state
3348         tok_state_script_data_escaped = ->
3349                 c = txt.charAt(cur++)
3350                 if c is '-'
3351                         tok_state = tok_state_script_data_escaped_dash
3352                         return new_character_token '-'
3353                 if c is '<'
3354                         tok_state = tok_state_script_data_escaped_less_than_sign
3355                         return
3356                 if c is "\u0000"
3357                         parse_error()
3358                         return new_character_token "\ufffd"
3359                 if c is '' # EOF
3360                         tok_state = tok_state_data
3361                         parse_error()
3362                         cur -= 1 # Reconsume
3363                         return
3364                 # Anything else
3365                 return new_character_token c
3366
3367         # 8.2.4.23 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-dash-state
3368         tok_state_script_data_escaped_dash = ->
3369                 c = txt.charAt(cur++)
3370                 if c is '-'
3371                         tok_state = tok_state_script_data_escaped_dash_dash
3372                         return new_character_token '-'
3373                 if c is '<'
3374                         tok_state = tok_state_script_data_escaped_less_than_sign
3375                         return
3376                 if c is "\u0000"
3377                         parse_error()
3378                         tok_state = tok_state_script_data_escaped
3379                         return new_character_token "\ufffd"
3380                 if c is '' # EOF
3381                         tok_state = tok_state_data
3382                         parse_error()
3383                         cur -= 1 # Reconsume
3384                         return
3385                 # Anything else
3386                 tok_state = tok_state_script_data_escaped
3387                 return new_character_token c
3388
3389         # 8.2.4.24 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-dash-dash-state
3390         tok_state_script_data_escaped_dash_dash = ->
3391                 c = txt.charAt(cur++)
3392                 if c is '-'
3393                         return new_character_token '-'
3394                 if c is '<'
3395                         tok_state = tok_state_script_data_escaped_less_than_sign
3396                         return
3397                 if c is '>'
3398                         tok_state = tok_state_script_data
3399                         return new_character_token '>'
3400                 if c is "\u0000"
3401                         parse_error()
3402                         tok_state = tok_state_script_data_escaped
3403                         return new_character_token "\ufffd"
3404                 if c is '' # EOF
3405                         parse_error()
3406                         tok_state = tok_state_data
3407                         cur -= 1 # Reconsume
3408                         return
3409                 # Anything else
3410                 tok_state = tok_state_script_data_escaped
3411                 return new_character_token c
3412
3413         # 8.2.4.25 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-less-than-sign-state
3414         tok_state_script_data_escaped_less_than_sign = ->
3415                 c = txt.charAt(cur++)
3416                 if c is '/'
3417                         temporary_buffer = ''
3418                         tok_state = tok_state_script_data_escaped_end_tag_open
3419                         return
3420                 if is_uc_alpha(c)
3421                         temporary_buffer = c.toLowerCase() # yes, really
3422                         tok_state = tok_state_script_data_double_escape_start
3423                         return new_character_token "<#{c}" # fixfull split
3424                 if is_lc_alpha(c)
3425                         temporary_buffer = c
3426                         tok_state = tok_state_script_data_double_escape_start
3427                         return new_character_token "<#{c}" # fixfull split
3428                 # Anything else
3429                 tok_state = tok_state_script_data_escaped
3430                 cur -= 1 # Reconsume
3431                 return new_character_token '<'
3432
3433         # 8.2.4.26 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-end-tag-open-state
3434         tok_state_script_data_escaped_end_tag_open = ->
3435                 c = txt.charAt(cur++)
3436                 if is_uc_alpha(c)
3437                         tok_cur_tag = new_end_tag c.toLowerCase()
3438                         temporary_buffer += c
3439                         tok_state = tok_state_script_data_escaped_end_tag_name
3440                         return
3441                 if is_lc_alpha(c)
3442                         tok_cur_tag = new_end_tag c
3443                         temporary_buffer += c
3444                         tok_state = tok_state_script_data_escaped_end_tag_name
3445                         return
3446                 # Anything else
3447                 tok_state = tok_state_script_data_escaped
3448                 cur -= 1 # Reconsume
3449                 return new_character_token '</' # fixfull split
3450
3451         # 8.2.4.27 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-end-tag-name-state
3452         tok_state_script_data_escaped_end_tag_name = ->
3453                 c = txt.charAt(cur++)
3454                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
3455                         if is_appropriate_end_tag tok_cur_tag
3456                                 tok_state = tok_state_before_attribute_name
3457                                 return
3458                         # fall through
3459                 if c is '/'
3460                         if is_appropriate_end_tag tok_cur_tag
3461                                 tok_state = tok_state_self_closing_start_tag
3462                                 return
3463                         # fall through
3464                 if c is '>'
3465                         if is_appropriate_end_tag tok_cur_tag
3466                                 tok_state = tok_state_data
3467                                 return tok_cur_tag
3468                         # fall through
3469                 if is_uc_alpha(c)
3470                         tok_cur_tag.name += c.toLowerCase()
3471                         temporary_buffer += c.toLowerCase()
3472                         return
3473                 if is_lc_alpha(c)
3474                         tok_cur_tag.name += c
3475                         temporary_buffer += c.toLowerCase()
3476                         return
3477                 # Anything else
3478                 tok_state = tok_state_script_data_escaped
3479                 cur -= 1 # Reconsume
3480                 return new_character_token "</#{temporary_buffer}" # fixfull split
3481
3482         # 8.2.4.28 http://www.w3.org/TR/html5/syntax.html#script-data-double-escape-start-state
3483         tok_state_script_data_double_escape_start = ->
3484                 c = txt.charAt(cur++)
3485                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' ' or c is '/' or c is '>'
3486                         if temporary_buffer is 'script'
3487                                 tok_state = tok_state_script_data_double_escaped
3488                         else
3489                                 tok_state = tok_state_script_data_escaped
3490                         return new_character_token c
3491                 if is_uc_alpha(c)
3492                         temporary_buffer += c.toLowerCase() # yes, really lowercase
3493                         return new_character_token c
3494                 if is_lc_alpha(c)
3495                         temporary_buffer += c
3496                         return new_character_token c
3497                 # Anything else
3498                 tok_state = tok_state_script_data_escaped
3499                 cur -= 1 # Reconsume
3500                 return
3501
3502         # 8.2.4.29 http://www.w3.org/TR/html5/syntax.html#script-data-double-escaped-state
3503         tok_state_script_data_double_escaped = ->
3504                 c = txt.charAt(cur++)
3505                 if c is '-'
3506                         tok_state = tok_state_script_data_double_escaped_dash
3507                         return new_character_token '-'
3508                 if c is '<'
3509                         tok_state = tok_state_script_data_double_escaped_less_than_sign
3510                         return new_character_token '<'
3511                 if c is "\u0000"
3512                         parse_error()
3513                         return new_character_token "\ufffd"
3514                 if c is '' # EOF
3515                         parse_error()
3516                         tok_state = tok_state_data
3517                         cur -= 1 # Reconsume
3518                         return
3519                 # Anything else
3520                 return new_character_token c
3521
3522         # 8.2.4.30 http://www.w3.org/TR/html5/syntax.html#script-data-double-escaped-dash-state
3523         tok_state_script_data_double_escaped_dash = ->
3524                 c = txt.charAt(cur++)
3525                 if c is '-'
3526                         tok_state = tok_state_script_data_double_escaped_dash_dash
3527                         return new_character_token '-'
3528                 if c is '<'
3529                         tok_state = tok_state_script_data_double_escaped_less_than_sign
3530                         return new_character_token '<'
3531                 if c is "\u0000"
3532                         parse_error()
3533                         tok_state = tok_state_script_data_double_escaped
3534                         return new_character_token "\ufffd"
3535                 if c is '' # EOF
3536                         parse_error()
3537                         tok_state = tok_state_data
3538                         cur -= 1 # Reconsume
3539                         return
3540                 # Anything else
3541                 tok_state = tok_state_script_data_double_escaped
3542                 return new_character_token c
3543
3544         # 8.2.4.31 http://www.w3.org/TR/html5/syntax.html#script-data-double-escaped-dash-dash-state
3545         tok_state_script_data_double_escaped_dash_dash = ->
3546                 c = txt.charAt(cur++)
3547                 if c is '-'
3548                         return new_character_token '-'
3549                 if c is '<'
3550                         tok_state = tok_state_script_data_double_escaped_less_than_sign
3551                         return new_character_token '<'
3552                 if c is '>'
3553                         tok_state = tok_state_script_data
3554                         return new_character_token '>'
3555                 if c is "\u0000"
3556                         parse_error()
3557                         tok_state = tok_state_script_data_double_escaped
3558                         return new_character_token "\ufffd"
3559                 if c is '' # EOF
3560                         parse_error()
3561                         tok_state = tok_state_data
3562                         cur -= 1 # Reconsume
3563                         return
3564                 # Anything else
3565                 tok_state = tok_state_script_data_double_escaped
3566                 return new_character_token c
3567
3568         # 8.2.4.32 http://www.w3.org/TR/html5/syntax.html#script-data-double-escaped-less-than-sign-state
3569         tok_state_script_data_double_escaped_less_than_sign = ->
3570                 c = txt.charAt(cur++)
3571                 if c is '/'
3572                         temporary_buffer = ''
3573                         tok_state = tok_state_script_data_double_escape_end
3574                         return new_character_token '/'
3575                 # Anything else
3576                 tok_state = tok_state_script_data_double_escaped
3577                 cur -= 1 # Reconsume
3578                 return
3579
3580         # 8.2.4.33 http://www.w3.org/TR/html5/syntax.html#script-data-double-escape-end-state
3581         tok_state_script_data_double_escape_end = ->
3582                 c = txt.charAt(cur++)
3583                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' ' or c is '/' or c is '>'
3584                         if temporary_buffer is 'script'
3585                                 tok_state = tok_state_script_data_escaped
3586                         else
3587                                 tok_state = tok_state_script_data_double_escaped
3588                         return new_character_token c
3589                 if is_uc_alpha(c)
3590                         temporary_buffer += c.toLowerCase() # yes, really lowercase
3591                         return new_character_token c
3592                 if is_lc_alpha(c)
3593                         temporary_buffer += c
3594                         return new_character_token c
3595                 # Anything else
3596                 tok_state = tok_state_script_data_double_escaped
3597                 cur -= 1 # Reconsume
3598                 return
3599
3600         # 8.2.4.34 http://www.w3.org/TR/html5/syntax.html#before-attribute-name-state
3601         tok_state_before_attribute_name = ->
3602                 attr_name = null
3603                 switch c = txt.charAt(cur++)
3604                         when "\t", "\n", "\u000c", ' '
3605                                 return null
3606                         when '/'
3607                                 tok_state = tok_state_self_closing_start_tag
3608                                 return null
3609                         when '>'
3610                                 tok_state = tok_state_data
3611                                 tmp = tok_cur_tag
3612                                 tok_cur_tag = null
3613                                 return tmp
3614                         when "\u0000"
3615                                 parse_error()
3616                                 attr_name = "\ufffd"
3617                         when '"', "'", '<', '='
3618                                 parse_error()
3619                                 attr_name = c
3620                         when '' # EOF
3621                                 parse_error()
3622                                 tok_state = tok_state_data
3623                         else
3624                                 if is_uc_alpha(c)
3625                                         attr_name = c.toLowerCase()
3626                                 else
3627                                         attr_name = c
3628                 if attr_name?
3629                         tok_cur_tag.attrs_a.unshift [attr_name, '']
3630                         tok_state = tok_state_attribute_name
3631                 return null
3632
3633         # 8.2.4.35 http://www.w3.org/TR/html5/syntax.html#attribute-name-state
3634         tok_state_attribute_name = ->
3635                 switch c = txt.charAt(cur++)
3636                         when "\t", "\n", "\u000c", ' '
3637                                 tok_state = tok_state_after_attribute_name
3638                         when '/'
3639                                 tok_state = tok_state_self_closing_start_tag
3640                         when '='
3641                                 tok_state = tok_state_before_attribute_value
3642                         when '>'
3643                                 tok_state = tok_state_data
3644                                 tmp = tok_cur_tag
3645                                 tok_cur_tag = null
3646                                 return tmp
3647                         when "\u0000"
3648                                 parse_error()
3649                                 tok_cur_tag.attrs_a[0][0] += "\ufffd"
3650                         when '"', "'", '<'
3651                                 parse_error()
3652                                 tok_cur_tag.attrs_a[0][0] += c
3653                         when '' # EOF
3654                                 parse_error()
3655                                 tok_state = tok_state_data
3656                         else
3657                                 if is_uc_alpha(c)
3658                                         tok_cur_tag.attrs_a[0][0] += c.toLowerCase()
3659                                 else
3660                                         tok_cur_tag.attrs_a[0][0] += c
3661                 return null
3662
3663         # 8.2.4.36 http://www.w3.org/TR/html5/syntax.html#after-attribute-name-state
3664         tok_state_after_attribute_name = ->
3665                 c = txt.charAt(cur++)
3666                 if c is "\t" or c is "\n" or c is "\u000c" or c is ' '
3667                         return
3668                 if c is '/'
3669                         tok_state = tok_state_self_closing_start_tag
3670                         return
3671                 if c is '='
3672                         tok_state = tok_state_before_attribute_value
3673                         return
3674                 if c is '>'
3675                         tok_state = tok_state_data
3676                         return
3677                 if is_uc_alpha(c)
3678                         tok_cur_tag.attrs_a.unshift [c.toLowerCase(), '']
3679                         tok_state = tok_state_attribute_name
3680                         return
3681                 if c is "\u0000"
3682                         parse_error()
3683                         tok_cur_tag.attrs_a.unshift ["\ufffd", '']
3684                         tok_state = tok_state_attribute_name
3685                         return
3686                 if c is '' # EOF
3687                         parse_error()
3688                         tok_state = tok_state_data
3689                         cur -= 1 # reconsume
3690                         return
3691                 if c is '"' or c is "'" or c is '<'
3692                         parse_error()
3693                         # fall through to Anything else
3694                 # Anything else
3695                 tok_cur_tag.attrs_a.unshift [c, '']
3696                 tok_state = tok_state_attribute_name
3697
3698         # 8.2.4.37 http://www.w3.org/TR/html5/syntax.html#before-attribute-value-state
3699         tok_state_before_attribute_value = ->
3700                 switch c = txt.charAt(cur++)
3701                         when "\t", "\n", "\u000c", ' '
3702                                 return null
3703                         when '"'
3704                                 tok_state = tok_state_attribute_value_double_quoted
3705                         when '&'
3706                                 tok_state = tok_state_attribute_value_unquoted
3707                                 cur -= 1
3708                         when "'"
3709                                 tok_state = tok_state_attribute_value_single_quoted
3710                         when "\u0000"
3711                                 # Parse error
3712                                 tok_cur_tag.attrs_a[0][1] += "\ufffd"
3713                                 tok_state = tok_state_attribute_value_unquoted
3714                         when '>'
3715                                 # Parse error
3716                                 tok_state = tok_state_data
3717                                 tmp = tok_cur_tag
3718                                 tok_cur_tag = null
3719                                 return tmp
3720                         when '' # EOF
3721                                 parse_error()
3722                                 tok_state = tok_state_data
3723                         else
3724                                 tok_cur_tag.attrs_a[0][1] += c
3725                                 tok_state = tok_state_attribute_value_unquoted
3726                 return null
3727
3728         # 8.2.4.38 http://www.w3.org/TR/html5/syntax.html#attribute-value-(double-quoted)-state
3729         tok_state_attribute_value_double_quoted = ->
3730                 switch c = txt.charAt(cur++)
3731                         when '"'
3732                                 tok_state = tok_state_after_attribute_value_quoted
3733                         when '&'
3734                                 tok_cur_tag.attrs_a[0][1] += parse_character_reference '"', true
3735                         when "\u0000"
3736                                 # Parse error
3737                                 tok_cur_tag.attrs_a[0][1] += "\ufffd"
3738                         when '' # EOF
3739                                 parse_error()
3740                                 tok_state = tok_state_data
3741                         else
3742                                 tok_cur_tag.attrs_a[0][1] += c
3743                 return null
3744
3745         # 8.2.4.39 http://www.w3.org/TR/html5/syntax.html#attribute-value-(single-quoted)-state
3746         tok_state_attribute_value_single_quoted = ->
3747                 switch c = txt.charAt(cur++)
3748                         when "'"
3749                                 tok_state = tok_state_after_attribute_value_quoted
3750                         when '&'
3751                                 tok_cur_tag.attrs_a[0][1] += parse_character_reference "'", true
3752                         when "\u0000"
3753                                 # Parse error
3754                                 tok_cur_tag.attrs_a[0][1] += "\ufffd"
3755                         when '' # EOF
3756                                 parse_error()
3757                                 tok_state = tok_state_data
3758                         else
3759                                 tok_cur_tag.attrs_a[0][1] += c
3760                 return null
3761
3762         # 8.2.4.40 http://www.w3.org/TR/html5/syntax.html#attribute-value-(unquoted)-state
3763         tok_state_attribute_value_unquoted = ->
3764                 switch c = txt.charAt(cur++)
3765                         when "\t", "\n", "\u000c", ' '
3766                                 tok_state = tok_state_before_attribute_name
3767                         when '&'
3768                                 tok_cur_tag.attrs_a[0][1] += parse_character_reference '>', true
3769                         when '>'
3770                                 tok_state = tok_state_data
3771                                 tmp = tok_cur_tag
3772                                 tok_cur_tag = null
3773                                 return tmp
3774                         when "\u0000"
3775                                 tok_cur_tag.attrs_a[0][1] += "\ufffd"
3776                         when '' # EOF
3777                                 parse_error()
3778                                 tok_state = tok_state_data
3779                         else
3780                                 # Parse Error if ', <, = or ` (backtick)
3781                                 tok_cur_tag.attrs_a[0][1] += c
3782                 return null
3783
3784         # 8.2.4.42 http://www.w3.org/TR/html5/syntax.html#after-attribute-value-(quoted)-state
3785         tok_state_after_attribute_value_quoted = ->
3786                 switch c = txt.charAt(cur++)
3787                         when "\t", "\n", "\u000c", ' '
3788                                 tok_state = tok_state_before_attribute_name
3789                         when '/'
3790                                 tok_state = tok_state_self_closing_start_tag
3791                         when '>'
3792                                 tok_state = tok_state_data
3793                                 tmp = tok_cur_tag
3794                                 tok_cur_tag = null
3795                                 return tmp
3796                         when '' # EOF
3797                                 parse_error()
3798                                 tok_state = tok_state_data
3799                         else
3800                                 # Parse Error
3801                                 tok_state = tok_state_before_attribute_name
3802                                 cur -= 1 # we didn't handle that char
3803                 return null
3804
3805         # 8.2.4.43 http://www.w3.org/TR/html5/syntax.html#self-closing-start-tag-state
3806         tok_state_self_closing_start_tag = ->
3807                 c = txt.charAt(cur++)
3808                 if c is '>'
3809                         tok_cur_tag.flag 'self-closing', true
3810                         tok_state = tok_state_data
3811                         return tok_cur_tag
3812                 if c is ''
3813                         parse_error()
3814                         tok_state = tok_state_data
3815                         cur -= 1 # Reconsume
3816                         return
3817                 # Anything else
3818                 parse_error()
3819                 tok_state = tok_state_before_attribute_name
3820                 cur -= 1 # Reconsume
3821                 return
3822
3823         # 8.2.4.44 http://www.w3.org/TR/html5/syntax.html#bogus-comment-state
3824         # WARNING: put a comment token in tok_cur_tag before setting this state
3825         tok_state_bogus_comment = ->
3826                 next_gt = txt.indexOf '>', cur
3827                 if next_gt is -1
3828                         val = txt.substr cur
3829                         cur = txt.length
3830                 else
3831                         val = txt.substr cur, (next_gt - cur)
3832                         cur = next_gt + 1
3833                 val = val.replace(new RegExp("\u0000", 'g'), "\ufffd")
3834                 tok_cur_tag.text += val
3835                 tok_state = tok_state_data
3836                 return tok_cur_tag
3837
3838         # 8.2.4.45 http://www.w3.org/TR/html5/syntax.html#markup-declaration-open-state
3839         tok_state_markup_declaration_open = ->
3840                 if txt.substr(cur, 2) is '--'
3841                         cur += 2
3842                         tok_cur_tag = new_comment_token ''
3843                         tok_state = tok_state_comment_start
3844                         return
3845                 if txt.substr(cur, 7).toLowerCase() is 'doctype'
3846                         cur += 7
3847                         tok_state = tok_state_doctype
3848                         return
3849                 acn = adjusted_current_node()
3850                 if acn and acn.namespace isnt NS_HTML and txt.substr(cur, 7) is '[CDATA['
3851                         cur += 7
3852                         tok_state = tok_state_cdata_section
3853                         return
3854                 # Otherwise
3855                 parse_error()
3856                 tok_cur_tag = new_comment_token ''
3857                 tok_state = tok_state_bogus_comment
3858                 return
3859
3860         # 8.2.4.46 http://www.w3.org/TR/html5/syntax.html#comment-start-state
3861         tok_state_comment_start = ->
3862                 switch c = txt.charAt(cur++)
3863                         when '-'
3864                                 tok_state = tok_state_comment_start_dash
3865                         when "\u0000"
3866                                 parse_error()
3867                                 tok_state = tok_state_comment
3868                                 return new_character_token "\ufffd"
3869                         when '>'
3870                                 parse_error()
3871                                 tok_state = tok_state_data
3872                                 return tok_cur_tag
3873                         when '' # EOF
3874                                 parse_error()
3875                                 tok_state = tok_state_data
3876                                 cur -= 1 # Reconsume
3877                                 return tok_cur_tag
3878                         else
3879                                 tok_cur_tag.text += c
3880                                 tok_state = tok_state_comment
3881                 return null
3882
3883         # 8.2.4.47 http://www.w3.org/TR/html5/syntax.html#comment-start-dash-state
3884         tok_state_comment_start_dash = ->
3885                 switch c = txt.charAt(cur++)
3886                         when '-'
3887                                 tok_state = tok_state_comment_end
3888                         when "\u0000"
3889                                 parse_error()
3890                                 tok_cur_tag.text += "-\ufffd"
3891                                 tok_state = tok_state_comment
3892                         when '>'
3893                                 parse_error()
3894                                 tok_state = tok_state_data
3895                                 return tok_cur_tag
3896                         when '' # EOF
3897                                 parse_error()
3898                                 tok_state = tok_state_data
3899                                 cur -= 1 # Reconsume
3900                                 return tok_cur_tag
3901                         else
3902                                 tok_cur_tag.text += "-#{c}"
3903                                 tok_state = tok_state_comment
3904                 return null
3905
3906         # 8.2.4.48 http://www.w3.org/TR/html5/syntax.html#comment-state
3907         tok_state_comment = ->
3908                 switch c = txt.charAt(cur++)
3909                         when '-'
3910                                 tok_state = tok_state_comment_end_dash
3911                         when "\u0000"
3912                                 parse_error()
3913                                 tok_cur_tag.text += "\ufffd"
3914                         when '' # EOF
3915                                 parse_error()
3916                                 tok_state = tok_state_data
3917                                 cur -= 1 # Reconsume
3918                                 return tok_cur_tag
3919                         else
3920                                 tok_cur_tag.text += c
3921                 return null
3922
3923         # 8.2.4.49 http://www.w3.org/TR/html5/syntax.html#comment-end-dash-state
3924         tok_state_comment_end_dash = ->
3925                 switch c = txt.charAt(cur++)
3926                         when '-'
3927                                 tok_state = tok_state_comment_end
3928                         when "\u0000"
3929                                 parse_error()
3930                                 tok_cur_tag.text += "-\ufffd"
3931                                 tok_state = tok_state_comment
3932                         when '' # EOF
3933                                 parse_error()
3934                                 tok_state = tok_state_data
3935                                 cur -= 1 # Reconsume
3936                                 return tok_cur_tag
3937                         else
3938                                 tok_cur_tag.text += "-#{c}"
3939                                 tok_state = tok_state_comment
3940                 return null
3941
3942         # 8.2.4.50 http://www.w3.org/TR/html5/syntax.html#comment-end-state
3943         tok_state_comment_end = ->
3944                 switch c = txt.charAt(cur++)
3945                         when '>'
3946                                 tok_state = tok_state_data
3947                                 return tok_cur_tag
3948                         when "\u0000"
3949                                 parse_error()
3950                                 tok_cur_tag.text += "--\ufffd"
3951                                 tok_state = tok_state_comment
3952                         when '!'
3953                                 parse_error()
3954                                 tok_state = tok_state_comment_end_bang
3955                         when '-'
3956                                 parse_error()
3957                                 tok_cur_tag.text += '-'
3958                         when '' # EOF
3959                                 parse_error()
3960                                 tok_state = tok_state_data
3961                                 cur -= 1 # Reconsume
3962                                 return tok_cur_tag
3963                         else
3964                                 parse_error()
3965                                 tok_cur_tag.text += "--#{c}"
3966                                 tok_state = tok_state_comment
3967                 return null
3968
3969         # 8.2.4.51 http://www.w3.org/TR/html5/syntax.html#comment-end-bang-state
3970         tok_state_comment_end_bang = ->
3971                 switch c = txt.charAt(cur++)
3972                         when '-'
3973                                 tok_cur_tag.text += "--!#{c}"
3974                                 tok_state = tok_state_comment_end_dash
3975                         when '>'
3976                                 tok_state = tok_state_data
3977                                 return tok_cur_tag
3978                         when "\u0000"
3979                                 parse_error()
3980                                 tok_cur_tag.text += "--!\ufffd"
3981                                 tok_state = tok_state_comment
3982                         when '' # EOF
3983                                 parse_error()
3984                                 tok_state = tok_state_data
3985                                 cur -= 1 # Reconsume
3986                                 return tok_cur_tag
3987                         else
3988                                 tok_cur_tag.text += "--!#{c}"
3989                                 tok_state = tok_state_comment
3990                 return null
3991
3992         # 8.2.4.52 http://www.w3.org/TR/html5/syntax.html#doctype-state
3993         tok_state_doctype = ->
3994                 switch c = txt.charAt(cur++)
3995                         when "\t", "\u000a", "\u000c", ' '
3996                                 tok_state = tok_state_before_doctype_name
3997                         when '' # EOF
3998                                 parse_error()
3999                                 tok_state = tok_state_data
4000                                 el = new_doctype_token ''
4001                                 el.flag 'force-quirks', true
4002                                 cur -= 1 # Reconsume
4003                                 return el
4004                         else
4005                                 parse_error()
4006                                 tok_state = tok_state_before_doctype_name
4007                                 cur -= 1 # Reconsume
4008                 return null
4009
4010         # 8.2.4.52 http://www.w3.org/TR/html5/syntax.html#doctype-state
4011         tok_state_before_doctype_name = ->
4012                 c = txt.charAt(cur++)
4013                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4014                         return
4015                 if is_uc_alpha(c)
4016                         tok_cur_tag = new_doctype_token c.toLowerCase()
4017                         tok_state = tok_state_doctype_name
4018                         return
4019                 if c is "\u0000"
4020                         parse_error()
4021                         tok_cur_tag = new_doctype_token "\ufffd"
4022                         tok_state = tok_state_doctype_name
4023                         return
4024                 if c is '>'
4025                         parse_error()
4026                         el = new_doctype_token ''
4027                         el.flag 'force-quirks', true
4028                         tok_state = tok_state_data
4029                         return el
4030                 if c is '' # EOF
4031                         parse_error()
4032                         tok_state = tok_state_data
4033                         el = new_doctype_token ''
4034                         el.flag 'force-quirks', true
4035                         cur -= 1 # Reconsume
4036                         return el
4037                 # Anything else
4038                 tok_cur_tag = new_doctype_token c
4039                 tok_state = tok_state_doctype_name
4040                 return null
4041
4042         # 8.2.4.54 http://www.w3.org/TR/html5/syntax.html#doctype-name-state
4043         tok_state_doctype_name = ->
4044                 c = txt.charAt(cur++)
4045                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4046                         tok_state = tok_state_after_doctype_name
4047                         return
4048                 if c is '>'
4049                         tok_state = tok_state_data
4050                         return tok_cur_tag
4051                 if is_uc_alpha(c)
4052                         tok_cur_tag.name += c.toLowerCase()
4053                         return
4054                 if c is "\u0000"
4055                         parse_error()
4056                         tok_cur_tag.name += "\ufffd"
4057                         return
4058                 if c is '' # EOF
4059                         parse_error()
4060                         tok_state = tok_state_data
4061                         tok_cur_tag.flag 'force-quirks', true
4062                         cur -= 1 # Reconsume
4063                         return tok_cur_tag
4064                 # Anything else
4065                 tok_cur_tag.name += c
4066                 return null
4067
4068         # 8.2.4.55 http://www.w3.org/TR/html5/syntax.html#after-doctype-name-state
4069         tok_state_after_doctype_name = ->
4070                 c = txt.charAt(cur++)
4071                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4072                         return
4073                 if c is '>'
4074                         tok_state = tok_state_data
4075                         return tok_cur_tag
4076                 if c is '' # EOF
4077                         parse_error()
4078                         tok_state = tok_state_data
4079                         tok_cur_tag.flag 'force-quirks', true
4080                         cur -= 1 # Reconsume
4081                         return tok_cur_tag
4082                 # Anything else
4083                 if txt.substr(cur - 1, 6).toLowerCase() is 'public'
4084                         cur += 5
4085                         tok_state = tok_state_after_doctype_public_keyword
4086                         return
4087                 if txt.substr(cur - 1, 6).toLowerCase() is 'system'
4088                         cur += 5
4089                         tok_state = tok_state_after_doctype_system_keyword
4090                         return
4091                 parse_error()
4092                 tok_cur_tag.flag 'force-quirks', true
4093                 tok_state = tok_state_bogus_doctype
4094                 return null
4095
4096         # 8.2.4.56 http://www.w3.org/TR/html5/syntax.html#after-doctype-public-keyword-state
4097         tok_state_after_doctype_public_keyword = ->
4098                 c = txt.charAt(cur++)
4099                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4100                         tok_state = tok_state_before_doctype_public_identifier
4101                         return
4102                 if c is '"'
4103                         parse_error()
4104                         tok_cur_tag.public_identifier = ''
4105                         tok_state = tok_state_doctype_public_identifier_double_quoted
4106                         return
4107                 if c is "'"
4108                         parse_error()
4109                         tok_cur_tag.public_identifier = ''
4110                         tok_state = tok_state_doctype_public_identifier_single_quoted
4111                         return
4112                 if c is '>'
4113                         parse_error()
4114                         tok_cur_tag.flag 'force-quirks', true
4115                         tok_state = tok_state_data
4116                         return tok_cur_tag
4117                 if c is '' # EOF
4118                         parse_error()
4119                         tok_state = tok_state_data
4120                         tok_cur_tag.flag 'force-quirks', true
4121                         cur -= 1 # Reconsume
4122                         return tok_cur_tag
4123                 # Anything else
4124                 parse_error()
4125                 tok_cur_tag.flag 'force-quirks', true
4126                 tok_state = tok_state_bogus_doctype
4127                 return null
4128
4129         # 8.2.4.57 http://www.w3.org/TR/html5/syntax.html#before-doctype-public-identifier-state
4130         tok_state_before_doctype_public_identifier = ->
4131                 c = txt.charAt(cur++)
4132                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4133                         return
4134                 if c is '"'
4135                         parse_error()
4136                         tok_cur_tag.public_identifier = ''
4137                         tok_state = tok_state_doctype_public_identifier_double_quoted
4138                         return
4139                 if c is "'"
4140                         parse_error()
4141                         tok_cur_tag.public_identifier = ''
4142                         tok_state = tok_state_doctype_public_identifier_single_quoted
4143                         return
4144                 if c is '>'
4145                         parse_error()
4146                         tok_cur_tag.flag 'force-quirks', true
4147                         tok_state = tok_state_data
4148                         return tok_cur_tag
4149                 if c is '' # EOF
4150                         parse_error()
4151                         tok_state = tok_state_data
4152                         tok_cur_tag.flag 'force-quirks', true
4153                         cur -= 1 # Reconsume
4154                         return tok_cur_tag
4155                 # Anything else
4156                 parse_error()
4157                 tok_cur_tag.flag 'force-quirks', true
4158                 tok_state = tok_state_bogus_doctype
4159                 return null
4160
4161
4162         # 8.2.4.58 http://www.w3.org/TR/html5/syntax.html#doctype-public-identifier-(double-quoted)-state
4163         tok_state_doctype_public_identifier_double_quoted = ->
4164                 c = txt.charAt(cur++)
4165                 if c is '"'
4166                         tok_state = tok_state_after_doctype_public_identifier
4167                         return
4168                 if c is "\u0000"
4169                         parse_error()
4170                         tok_cur_tag.public_identifier += "\ufffd"
4171                         return
4172                 if c is '>'
4173                         parse_error()
4174                         tok_cur_tag.flag 'force-quirks', true
4175                         tok_state = tok_state_data
4176                         return tok_cur_tag
4177                 if c is '' # EOF
4178                         parse_error()
4179                         tok_state = tok_state_data
4180                         tok_cur_tag.flag 'force-quirks', true
4181                         cur -= 1 # Reconsume
4182                         return tok_cur_tag
4183                 # Anything else
4184                 tok_cur_tag.public_identifier += c
4185                 return null
4186
4187         # 8.2.4.59 http://www.w3.org/TR/html5/syntax.html#doctype-public-identifier-(single-quoted)-state
4188         tok_state_doctype_public_identifier_single_quoted = ->
4189                 c = txt.charAt(cur++)
4190                 if c is "'"
4191                         tok_state = tok_state_after_doctype_public_identifier
4192                         return
4193                 if c is "\u0000"
4194                         parse_error()
4195                         tok_cur_tag.public_identifier += "\ufffd"
4196                         return
4197                 if c is '>'
4198                         parse_error()
4199                         tok_cur_tag.flag 'force-quirks', true
4200                         tok_state = tok_state_data
4201                         return tok_cur_tag
4202                 if c is '' # EOF
4203                         parse_error()
4204                         tok_state = tok_state_data
4205                         tok_cur_tag.flag 'force-quirks', true
4206                         cur -= 1 # Reconsume
4207                         return tok_cur_tag
4208                 # Anything else
4209                 tok_cur_tag.public_identifier += c
4210                 return null
4211
4212         # 8.2.4.60 http://www.w3.org/TR/html5/syntax.html#after-doctype-public-identifier-state
4213         tok_state_after_doctype_public_identifier = ->
4214                 c = txt.charAt(cur++)
4215                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4216                         tok_state = tok_state_between_doctype_public_and_system_identifiers
4217                         return
4218                 if c is '>'
4219                         tok_state = tok_state_data
4220                         return tok_cur_tag
4221                 if c is '"'
4222                         parse_error()
4223                         tok_cur_tag.system_identifier = ''
4224                         tok_state = tok_state_doctype_system_identifier_double_quoted
4225                         return
4226                 if c is "'"
4227                         parse_error()
4228                         tok_cur_tag.system_identifier = ''
4229                         tok_state = tok_state_doctype_system_identifier_single_quoted
4230                         return
4231                 if c is '' # EOF
4232                         parse_error()
4233                         tok_state = tok_state_data
4234                         tok_cur_tag.flag 'force-quirks', true
4235                         cur -= 1 # Reconsume
4236                         return tok_cur_tag
4237                 # Anything else
4238                 parse_error()
4239                 tok_cur_tag.flag 'force-quirks', true
4240                 tok_state = tok_state_bogus_doctype
4241                 return null
4242
4243         # 8.2.4.61 http://www.w3.org/TR/html5/syntax.html#between-doctype-public-and-system-identifiers-state
4244         tok_state_between_doctype_public_and_system_identifiers = ->
4245                 c = txt.charAt(cur++)
4246                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4247                         return
4248                 if c is '>'
4249                         tok_state = tok_state_data
4250                         return tok_cur_tag
4251                 if c is '"'
4252                         parse_error()
4253                         tok_cur_tag.system_identifier = ''
4254                         tok_state = tok_state_doctype_system_identifier_double_quoted
4255                         return
4256                 if c is "'"
4257                         parse_error()
4258                         tok_cur_tag.system_identifier = ''
4259                         tok_state = tok_state_doctype_system_identifier_single_quoted
4260                         return
4261                 if c is '' # EOF
4262                         parse_error()
4263                         tok_state = tok_state_data
4264                         tok_cur_tag.flag 'force-quirks', true
4265                         cur -= 1 # Reconsume
4266                         return tok_cur_tag
4267                 # Anything else
4268                 parse_error()
4269                 tok_cur_tag.flag 'force-quirks', true
4270                 tok_state = tok_state_bogus_doctype
4271                 return null
4272
4273         # 8.2.4.62 http://www.w3.org/TR/html5/syntax.html#after-doctype-system-keyword-state
4274         tok_state_after_doctype_system_keyword = ->
4275                 c = txt.charAt(cur++)
4276                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4277                         tok_state = tok_state_before_doctype_system_identifier
4278                         return
4279                 if c is '"'
4280                         parse_error()
4281                         tok_cur_tag.system_identifier = ''
4282                         tok_state = tok_state_doctype_system_identifier_double_quoted
4283                         return
4284                 if c is "'"
4285                         parse_error()
4286                         tok_cur_tag.system_identifier = ''
4287                         tok_state = tok_state_doctype_system_identifier_single_quoted
4288                         return
4289                 if c is '>'
4290                         parse_error()
4291                         tok_cur_tag.flag 'force-quirks', true
4292                         tok_state = tok_state_data
4293                         return tok_cur_tag
4294                 if c is '' # EOF
4295                         parse_error()
4296                         tok_state = tok_state_data
4297                         tok_cur_tag.flag 'force-quirks', true
4298                         cur -= 1 # Reconsume
4299                         return tok_cur_tag
4300                 # Anything else
4301                 parse_error()
4302                 tok_cur_tag.flag 'force-quirks', true
4303                 tok_state = tok_state_bogus_doctype
4304                 return null
4305
4306         # 8.2.4.63 http://www.w3.org/TR/html5/syntax.html#before-doctype-system-identifier-state
4307         tok_state_before_doctype_system_identifier = ->
4308                 c = txt.charAt(cur++)
4309                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4310                         return
4311                 if c is '"'
4312                         tok_cur_tag.system_identifier = ''
4313                         tok_state = tok_state_doctype_system_identifier_double_quoted
4314                         return
4315                 if c is "'"
4316                         tok_cur_tag.system_identifier = ''
4317                         tok_state = tok_state_doctype_system_identifier_single_quoted
4318                         return
4319                 if c is '>'
4320                         parse_error()
4321                         tok_cur_tag.flag 'force-quirks', true
4322                         tok_state = tok_state_data
4323                         return tok_cur_tag
4324                 if c is '' # EOF
4325                         parse_error()
4326                         tok_state = tok_state_data
4327                         tok_cur_tag.flag 'force-quirks', true
4328                         cur -= 1 # Reconsume
4329                         return tok_cur_tag
4330                 # Anything else
4331                 parse_error()
4332                 tok_cur_tag.flag 'force-quirks', true
4333                 tok_state = tok_state_bogus_doctype
4334                 return null
4335
4336         # 8.2.4.64 http://www.w3.org/TR/html5/syntax.html#doctype-system-identifier-(double-quoted)-state
4337         tok_state_doctype_system_identifier_double_quoted = ->
4338                 c = txt.charAt(cur++)
4339                 if c is '"'
4340                         tok_state = tok_state_after_doctype_system_identifier
4341                         return
4342                 if c is "\u0000"
4343                         parse_error()
4344                         tok_cur_tag.system_identifier += "\ufffd"
4345                         return
4346                 if c is '>'
4347                         parse_error()
4348                         tok_cur_tag.flag 'force-quirks', true
4349                         tok_state = tok_state_data
4350                         return tok_cur_tag
4351                 if c is '' # EOF
4352                         parse_error()
4353                         tok_state = tok_state_data
4354                         tok_cur_tag.flag 'force-quirks', true
4355                         cur -= 1 # Reconsume
4356                         return tok_cur_tag
4357                 # Anything else
4358                 tok_cur_tag.system_identifier += c
4359                 return null
4360
4361         # 8.2.4.65 http://www.w3.org/TR/html5/syntax.html#doctype-system-identifier-(single-quoted)-state
4362         tok_state_doctype_system_identifier_single_quoted = ->
4363                 c = txt.charAt(cur++)
4364                 if c is "'"
4365                         tok_state = tok_state_after_doctype_system_identifier
4366                         return
4367                 if c is "\u0000"
4368                         parse_error()
4369                         tok_cur_tag.system_identifier += "\ufffd"
4370                         return
4371                 if c is '>'
4372                         parse_error()
4373                         tok_cur_tag.flag 'force-quirks', true
4374                         tok_state = tok_state_data
4375                         return tok_cur_tag
4376                 if c is '' # EOF
4377                         parse_error()
4378                         tok_state = tok_state_data
4379                         tok_cur_tag.flag 'force-quirks', true
4380                         cur -= 1 # Reconsume
4381                         return tok_cur_tag
4382                 # Anything else
4383                 tok_cur_tag.system_identifier += c
4384                 return null
4385
4386         # 8.2.4.66 http://www.w3.org/TR/html5/syntax.html#after-doctype-system-identifier-state
4387         tok_state_after_doctype_system_identifier = ->
4388                 c = txt.charAt(cur++)
4389                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
4390                         return
4391                 if c is '>'
4392                         tok_state = tok_state_data
4393                         return tok_cur_tag
4394                 if c is '' # EOF
4395                         parse_error()
4396                         tok_state = tok_state_data
4397                         tok_cur_tag.flag 'force-quirks', true
4398                         cur -= 1 # Reconsume
4399                         return tok_cur_tag
4400                 # Anything else
4401                 parse_error()
4402                 # do _not_ tok_cur_tag.flag 'force-quirks', true
4403                 tok_state = tok_state_bogus_doctype
4404                 return null
4405
4406         # 8.2.4.67 http://www.w3.org/TR/html5/syntax.html#bogus-doctype-state
4407         tok_state_bogus_doctype = ->
4408                 c = txt.charAt(cur++)
4409                 if c is '>'
4410                         tok_state = tok_state_data
4411                         return tok_cur_tag
4412                 if c is '' # EOF
4413                         tok_state = tok_state_data
4414                         cur -= 1 # Reconsume
4415                         return tok_cur_tag
4416                 # Anything else
4417                 return null
4418
4419         # 8.2.4.68 http://www.w3.org/TR/html5/syntax.html#cdata-section-state
4420         tok_state_cdata_section = ->
4421                 tok_state = tok_state_data
4422                 next_gt = txt.indexOf ']]>', cur
4423                 if next_gt is -1
4424                         val = txt.substr cur
4425                         cur = txt.length
4426                 else
4427                         val = txt.substr cur, (next_gt - cur)
4428                         cur = next_gt + 3
4429                 return new_character_token val # fixfull split
4430
4431         # 8.2.4.69 http://www.w3.org/TR/html5/syntax.html#consume-a-character-reference
4432         # Don't set this as a state, just call it
4433         # returns a string (NOT a text node)
4434         parse_character_reference = (allowed_char = null, in_attr = false) ->
4435                 if cur >= txt.length
4436                         return '&'
4437                 switch c = txt.charAt(cur)
4438                         when "\t", "\n", "\u000c", ' ', '<', '&', '', allowed_char
4439                                 # explicitly not a parse error
4440                                 return '&'
4441                         when ';'
4442                                 # there has to be "one or more" alnums between & and ; to be a parse error
4443                                 return '&'
4444                         when '#'
4445                                 if cur + 1 >= txt.length
4446                                         return '&'
4447                                 if txt.charAt(cur + 1).toLowerCase() is 'x'
4448                                         base = 16
4449                                         charset = hex_chars
4450                                         start = cur + 2
4451                                 else
4452                                         charset = digits
4453                                         start = cur + 1
4454                                         base = 10
4455                                 i = 0
4456                                 while start + i < txt.length and charset.indexOf(txt.charAt(start + i)) > -1
4457                                         i += 1
4458                                 if i is 0
4459                                         return '&'
4460                                 cur = start + i
4461                                 if txt.charAt(start + i) is ';'
4462                                         cur += 1
4463                                 else
4464                                         parse_error()
4465                                 code_point = txt.substr(start, i)
4466                                 while code_point.charAt(0) is '0' and code_point.length > 1
4467                                         code_point = code_point.substr 1
4468                                 code_point = parseInt(code_point, base)
4469                                 if unicode_fixes[code_point]?
4470                                         parse_error()
4471                                         return unicode_fixes[code_point]
4472                                 else
4473                                         if (code_point >= 0xd800 and code_point <= 0xdfff) or code_point > 0x10ffff
4474                                                 parse_error()
4475                                                 return "\ufffd"
4476                                         else
4477                                                 if (code_point >= 0x0001 and code_point <= 0x0008) or (code_point >= 0x000D and code_point <= 0x001F) or (code_point >= 0x007F and code_point <= 0x009F) or (code_point >= 0xFDD0 and code_point <= 0xFDEF) or code_point is 0x000B or code_point is 0xFFFE or code_point is 0xFFFF or code_point is 0x1FFFE or code_point is 0x1FFFF or code_point is 0x2FFFE or code_point is 0x2FFFF or code_point is 0x3FFFE or code_point is 0x3FFFF or code_point is 0x4FFFE or code_point is 0x4FFFF or code_point is 0x5FFFE or code_point is 0x5FFFF or code_point is 0x6FFFE or code_point is 0x6FFFF or code_point is 0x7FFFE or code_point is 0x7FFFF or code_point is 0x8FFFE or code_point is 0x8FFFF or code_point is 0x9FFFE or code_point is 0x9FFFF or code_point is 0xAFFFE or code_point is 0xAFFFF or code_point is 0xBFFFE or code_point is 0xBFFFF or code_point is 0xCFFFE or code_point is 0xCFFFF or code_point is 0xDFFFE or code_point is 0xDFFFF or code_point is 0xEFFFE or code_point is 0xEFFFF or code_point is 0xFFFFE or code_point is 0xFFFFF or code_point is 0x10FFFE or code_point is 0x10FFFF
4478                                                         parse_error()
4479                                                 return from_code_point code_point
4480                                 return
4481                         else
4482                                 for i in [0...31]
4483                                         if alnum.indexOf(txt.charAt(cur + i)) is -1
4484                                                 break
4485                                 if i is 0
4486                                         # exit early, because parse_error() below needs at least one alnum
4487                                         return '&'
4488                                 if txt.charAt(cur + i) is ';'
4489                                         i += 1 # include ';' terminator in value
4490                                         decoded = decode_named_char_ref txt.substr(cur, i)
4491                                         if decoded?
4492                                                 cur += i
4493                                                 return decoded
4494                                         parse_error()
4495                                         return '&'
4496                                 else
4497                                         # no ';' terminator (only legacy char refs)
4498                                         max = i
4499                                         for i in [2..max] # no prefix matches, so ok to check shortest first
4500                                                 c = legacy_char_refs[txt.substr(cur, i)]
4501                                                 if c?
4502                                                         if in_attr
4503                                                                 if txt.charAt(cur + i) is '='
4504                                                                         # "because some legacy user agents will
4505                                                                         # misinterpret the markup in those cases"
4506                                                                         parse_error()
4507                                                                         return '&'
4508                                                                 if alnum.indexOf(txt.charAt(cur + i)) > -1
4509                                                                         # this makes attributes forgiving about url args
4510                                                                         return '&'
4511                                                         # ok, and besides the weird exceptions for attributes...
4512                                                         # return the matching char
4513                                                         cur += i # consume entity chars
4514                                                         parse_error() # because no terminating ";"
4515                                                         return c
4516                                         parse_error()
4517                                         return '&'
4518                 return # never reached
4519
4520         # tree constructor initialization
4521         # see comments on TYPE_TAG/etc for the structure of this data
4522         txt = args.html
4523         cur = 0
4524         doc = new Node TYPE_TAG, name: 'html', namespace: NS_HTML
4525         open_els = []
4526         afe = [] # active formatting elements
4527         template_ins_modes = []
4528         ins_mode = ins_mode_initial
4529         original_ins_mode = ins_mode # TODO check spec
4530         flag_scripting = args.scripting ? true # TODO might need an extra flag to get <noscript> to parse correctly
4531         flag_frameset_ok = true
4532         flag_parsing = true
4533         flag_foster_parenting = false
4534         form_element_pointer = null
4535         temporary_buffer = null
4536         pending_table_character_tokens = []
4537         head_element_pointer = null
4538         flag_fragment_parsing = false # parser originally created as part of the html fragment parsing algorithm (fragment case)
4539         context_element = null # FIXME initialize from args.fragment http://www.w3.org/TR/html5/syntax.html#parsing-html-fragments
4540         prev_node_id = 0 # just for debugging
4541
4542         # tokenizer initialization
4543         tok_state = tok_state_data
4544
4545         # text pre-processing
4546         # FIXME http://www.w3.org/TR/html5/syntax.html#preprocessing-the-input-stream
4547         txt = txt.replace(new RegExp("\u0000", 'g'), "\ufffd") # fixfull spec doesn't say this
4548         txt = txt.replace(new RegExp("\r\n", 'g'), "\n") # fixfull spec doesn't say this
4549         txt = txt.replace(new RegExp("\r", 'g'), "\n") # fixfull spec doesn't say this
4550
4551         if args.name is "tests18.dat #17"
4552                 console.log "hi"
4553         # proccess input
4554         # http://www.w3.org/TR/html5/syntax.html#tree-construction
4555         while flag_parsing
4556                 t = tok_state()
4557                 if t?
4558                         process_token t
4559                         # fixfull parse error if has self-closing flag, but it wasn't acknolwedged
4560         return doc.children
4561
4562 serialize_els = (els, shallow, show_ids) ->
4563         serialized = ''
4564         sep = ''
4565         for t in els
4566                 serialized += sep
4567                 sep = ','
4568                 serialized += t.serialize shallow, show_ids
4569         return serialized
4570
4571 module.exports.parse_html = parse_html
4572 module.exports.debug_log_reset = debug_log_reset
4573 module.exports.debug_log_each = debug_log_each
4574 module.exports.TYPE_TAG = TYPE_TAG
4575 module.exports.TYPE_TEXT = TYPE_TEXT
4576 module.exports.TYPE_COMMENT = TYPE_COMMENT
4577 module.exports.TYPE_DOCTYPE = TYPE_DOCTYPE
4578 module.exports.NS_HTML = NS_HTML
4579 module.exports.NS_MATHML = NS_MATHML
4580 module.exports.NS_SVG = NS_SVG