JasonWoof Got questions, comments, patches, etc.? Contact Jason Woofenden
more ins_mode_in_table
[peach-html5-editor.git] / parse-html.coffee
1 # HTML parser meant to run in a browser, in support of WYSIWYG editor
2 # Copyright 2015 Jason Woofenden
3 #
4 # This program is free software: you can redistribute it and/or modify it under
5 # the terms of the GNU Affero General Public License as published by the Free
6 # Software Foundation, either version 3 of the License, or (at your option) any
7 # later version.
8 #
9 # This program is distributed in the hope that it will be useful, but WITHOUT
10 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11 # FOR A PARTICULAR PURPOSE.  See the GNU Affero General Public License for more
12 # details.
13 #
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
16
17
18 # This file implements a parser for html snippets, meant to be used by a
19 # WYSIWYG editor. Hence it does not attempt to parse doctypes, <html>, <head>
20 # or <body> tags, nor does it produce the top level "document" node in the dom
21 # tree, nor nodes for html, head or body. Comments containing "fixfull"
22 # indicate places where additional code is needed for full HTML document
23 # parsing.
24 #
25 # Instead, the data structure produced by this parser is an array of Nodes.
26
27
28 # stacks/lists
29 #
30 # the spec uses a many different words do indicate which ends of lists/stacks
31 # they are talking about (and relative movement within the lists/stacks). This
32 # section splains. I'm implementing "lists" (afe and open_els) the same way
33 # (both as stacks)
34 #
35 # stacks grow downward (current element is index=0)
36 #
37 # example: open_els = [a, b, c, d, e, f, g]
38 #
39 # "grows downwards" means it's visualized like this: (index: el, names)
40 #
41 #   6: g "start of the list", "topmost", "first"
42 #   5: f
43 #   4: e "previous" (to d), "above", "before"
44 #   3: d   (previous/next are relative to this element)
45 #   2: c "next", "after", "lower", "below"
46 #   1: b
47 #   0: a "end of the list", "current node", "bottommost", "last"
48
49
50 # browser
51 # note: to get this to run outside a browser, you'll have to write a native
52 # implementation of decode_named_char_ref()
53 unless module?.exports?
54         window.wheic = {}
55         module = exports: window.wheic
56
57 # Each node is an obect of the Node class. Here are the Node types:
58 TYPE_TAG = 0 # name, {attributes}, [children]
59 TYPE_TEXT = 1 # "text"
60 TYPE_COMMENT = 2
61 TYPE_DOCTYPE = 3
62 # the following types are emited by the tokenizer, but shouldn't end up in the tree:
63 TYPE_START_TAG = 4 # name, [attributes ([key,value]...) in reverse order], [children]
64 TYPE_END_TAG = 5 # name
65 TYPE_EOF = 6
66 TYPE_AFE_MARKER = 7 # http://www.w3.org/TR/html5/syntax.html#reconstruct-the-active-formatting-elements
67 TYPE_AAA_BOOKMARK = 8 # http://www.w3.org/TR/html5/syntax.html#adoption-agency-algorithm
68
69 # namespace constants
70 NS_HTML = 1
71 NS_MATHML = 2
72 NS_SVG = 3
73
74 g_debug_log = []
75 debug_log_reset = ->
76         g_debug_log = []
77 debug_log = (str) ->
78         g_debug_log.push str
79 debug_log_each = (cb) ->
80         for str in g_debug_log
81                 cb str
82
83 prev_node_id = 0
84 class Node
85         constructor: (type, args = {}) ->
86                 @type = type # one of the TYPE_* constants above
87                 @name = args.name ? '' # tag name
88                 @text = args.text ? '' # contents for text/comment nodes
89                 @attrs = args.attrs ? {}
90                 @attrs_a = args.attr_k ? [] # attrs in progress, TYPE_START_TAG only
91                 @children = args.children ? []
92                 @namespace = args.namespace ? NS_HTML
93                 @parent = args.parent ? null
94                 @token = args.token ? null
95                 if args.id?
96                         @id = "#{args.id}+"
97                 else
98                         @id = "#{++prev_node_id}"
99         acknowledge_self_closing: ->
100                 if @token?
101                         @token.flag 'did_self_close'
102                 else
103                         @flag 'did_self_close', true
104         flag: ->
105                 # fixfull
106         serialize: (shallow = false, show_ids = false) -> # for unit tests
107                 ret = ''
108                 switch @type
109                         when TYPE_TAG
110                                 ret += 'tag:'
111                                 ret += JSON.stringify @name
112                                 ret += ','
113                                 if show_ids
114                                         ret += "##{@id},"
115                                 if shallow
116                                         break
117                                 attr_keys = []
118                                 for k of @attrs
119                                         attr_keys.push k
120                                 attr_keys.sort()
121                                 ret += '{'
122                                 sep = ''
123                                 for k in attr_keys
124                                         ret += sep
125                                         sep = ','
126                                         ret += "#{JSON.stringify k}:#{JSON.stringify @attrs[k]}"
127                                 ret += '},['
128                                 sep = ''
129                                 for c in @children
130                                         ret += sep
131                                         sep = ','
132                                         ret += c.serialize shallow, show_ids
133                                 ret += ']'
134                         when TYPE_TEXT
135                                 ret += 'text:'
136                                 ret += JSON.stringify @text
137                         when TYPE_COMMENT
138                                 ret += 'comment:'
139                                 ret += JSON.stringify @text
140                         when TYPE_DOCTYPE
141                                 ret += "doctype:#{@name},#{JSON.stringify(@public_identifier ? '')},#{JSON.stringify(@system_identifier ? '')}"
142                         when TYPE_AFE_MARKER
143                                 ret += 'marker'
144                         when TYPE_AAA_BOOKMARK
145                                 ret += 'aaa_bookmark'
146                         else
147                                 ret += 'unknown:'
148                                 console.log "unknown: #{JSON.stringify @}" # backtrace is just as well
149                 return ret
150
151 # helpers: (only take args that are normally known when parser creates nodes)
152 new_open_tag = (name) ->
153         return new Node TYPE_START_TAG, name: name
154 new_end_tag = (name) ->
155         return new Node TYPE_END_TAG, name: name
156 new_element = (name) ->
157         return new Node TYPE_TAG, name: name
158 new_text_node = (txt) ->
159         return new Node TYPE_TEXT, text: txt
160 new_character_token = new_text_node
161 new_comment_token = (txt) ->
162         return new Node TYPE_COMMENT, text: txt
163 new_doctype_token = (name) ->
164         return new Node TYPE_DOCTYPE, name: name
165 new_eof_token = ->
166         return new Node TYPE_EOF
167 new_afe_marker = ->
168         return new Node TYPE_AFE_MARKER
169 new_aaa_bookmark = ->
170         return new Node TYPE_AAA_BOOKMARK
171
172 lc_alpha = "abcdefghijklmnopqrstuvwxyz"
173 uc_alpha = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
174 digits = "0123456789"
175 alnum = lc_alpha + uc_alpha + digits
176 hex_chars = digits + "abcdefABCDEF"
177
178 is_uc_alpha = (str) ->
179         return str.length is 1 and uc_alpha.indexOf(str) > -1
180 is_lc_alpha = (str) ->
181         return str.length is 1 and lc_alpha.indexOf(str) > -1
182
183 # some SVG elements have dashes in them
184 tag_name_chars = alnum + "-"
185
186 # http://www.w3.org/TR/html5/infrastructure.html#space-character
187 space_chars = "\u0009\u000a\u000c\u000d\u0020"
188 is_space = (txt) ->
189         return txt.length is 1 and space_chars.indexOf(txt) > -1
190 is_space_tok = (t) ->
191         return t.type is TYPE_TEXT && t.text.length is 1 and space_chars.indexOf(t.text) > -1
192
193 is_input_hidden_tok = (t) ->
194         return unless t.type is TYPE_START_TAG
195         for a of t.attrs_a
196                 if a[0] is 'type'
197                         if a[1].toLowerCase() is 'hidden'
198                                 return true
199                         return false
200         return false
201
202 # https://en.wikipedia.org/wiki/Whitespace_character#Unicode
203 whitespace_chars = "\u0009\u000a\u000b\u000c\u000d\u0020\u0085\u00a0\u1680\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200a\u2028\u2029\u202f\u205f\u3000"
204
205 # These are the character references that don't need a terminating semicolon
206 # min length: 2, max: 6, none are a prefix of any other.
207 legacy_char_refs = {
208         Aacute: 'Á', aacute: 'á', Acirc: 'Â', acirc: 'â', acute: '´', AElig: 'Æ',
209         aelig: 'æ', Agrave: 'À', agrave: 'à', AMP: '&', amp: '&', Aring: 'Å',
210         aring: 'å', Atilde: 'Ã', atilde: 'ã', Auml: 'Ä', auml: 'ä', brvbar: '¦',
211         Ccedil: 'Ç', ccedil: 'ç', cedil: '¸', cent: '¢', COPY: '©', copy: '©',
212         curren: '¤', deg: '°', divide: '÷', Eacute: 'É', eacute: 'é', Ecirc: 'Ê',
213         ecirc: 'ê', Egrave: 'È', egrave: 'è', ETH: 'Ð', eth: 'ð', Euml: 'Ë',
214         euml: 'ë', frac12: '½', frac14: '¼', frac34: '¾', GT: '>', gt: '>',
215         Iacute: 'Í', iacute: 'í', Icirc: 'Î', icirc: 'î', iexcl: '¡', Igrave: 'Ì',
216         igrave: 'ì', iquest: '¿', Iuml: 'Ï', iuml: 'ï', laquo: '«', LT: '<',
217         lt: '<', macr: '¯', micro: 'µ', middot: '·', nbsp: "\u00a0", not: '¬',
218         Ntilde: 'Ñ', ntilde: 'ñ', Oacute: 'Ó', oacute: 'ó', Ocirc: 'Ô', ocirc: 'ô',
219         Ograve: 'Ò', ograve: 'ò', ordf: 'ª', ordm: 'º', Oslash: 'Ø', oslash: 'ø',
220         Otilde: 'Õ', otilde: 'õ', Ouml: 'Ö', ouml: 'ö', para: '¶', plusmn: '±',
221         pound: '£', QUOT: '"', quot: '"', raquo: '»', REG: '®', reg: '®', sect: '§',
222         shy: '­', sup1: '¹', sup2: '²', sup3: '³', szlig: 'ß', THORN: 'Þ', thorn: 'þ',
223         times: '×', Uacute: 'Ú', uacute: 'ú', Ucirc: 'Û', ucirc: 'û', Ugrave: 'Ù',
224         ugrave: 'ù', uml: '¨', Uuml: 'Ü', uuml: 'ü', Yacute: 'Ý', yacute: 'ý',
225         yen: '¥', yuml: 'ÿ'
226 }
227
228 void_elements = ['area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input', 'keygen', 'link', 'meta', 'param', 'source', 'track', 'wbr']
229 raw_text_elements = ['script', 'style']
230 escapable_raw_text_elements = ['textarea', 'title']
231 # http://www.w3.org/TR/SVG/ 1.1 (Second Edition)
232 svg_elements = [
233         'a', 'altGlyph', 'altGlyphDef', 'altGlyphItem', 'animate', 'animateColor',
234         'animateMotion', 'animateTransform', 'circle', 'clipPath', 'color-profile',
235         'cursor', 'defs', 'desc', 'ellipse', 'feBlend', 'feColorMatrix',
236         'feComponentTransfer', 'feComposite', 'feConvolveMatrix',
237         'feDiffuseLighting', 'feDisplacementMap', 'feDistantLight', 'feFlood',
238         'feFuncA', 'feFuncB', 'feFuncG', 'feFuncR', 'feGaussianBlur', 'feImage',
239         'feMerge', 'feMergeNode', 'feMorphology', 'feOffset', 'fePointLight',
240         'feSpecularLighting', 'feSpotLight', 'feTile', 'feTurbulence', 'filter',
241         'font', 'font-face', 'font-face-format', 'font-face-name', 'font-face-src',
242         'font-face-uri', 'foreignObject', 'g', 'glyph', 'glyphRef', 'hkern',
243         'image', 'line', 'linearGradient', 'marker', 'mask', 'metadata',
244         'missing-glyph', 'mpath', 'path', 'pattern', 'polygon', 'polyline',
245         'radialGradient', 'rect', 'script', 'set', 'stop', 'style', 'svg',
246         'switch', 'symbol', 'text', 'textPath', 'title', 'tref', 'tspan', 'use',
247         'view', 'vkern'
248 ]
249
250 # http://www.w3.org/TR/MathML/ Version 3.0 2nd Edition
251 mathml_elements = [
252         'abs', 'and', 'annotation', 'annotation-xml', 'apply', 'approx', 'arccos',
253         'arccosh', 'arccot', 'arccoth', 'arccsc', 'arccsch', 'arcsec', 'arcsech',
254         'arcsin', 'arcsinh', 'arctan', 'arctanh', 'arg', 'bind', 'bvar', 'card',
255         'cartesianproduct', 'cbytes', 'ceiling', 'cerror', 'ci', 'cn', 'codomain',
256         'complexes', 'compose', 'condition', 'conjugate', 'cos', 'cosh', 'cot',
257         'coth', 'cs', 'csc', 'csch', 'csymbol', 'curl', 'declare', 'degree',
258         'determinant', 'diff', 'divergence', 'divide', 'domain',
259         'domainofapplication', 'emptyset', 'eq', 'equivalent', 'eulergamma',
260         'exists', 'exp', 'exponentiale', 'factorial', 'factorof', 'false', 'floor',
261         'fn', 'forall', 'gcd', 'geq', 'grad', 'gt', 'ident', 'image', 'imaginary',
262         'imaginaryi', 'implies', 'in', 'infinity', 'int', 'integers', 'intersect',
263         'interval', 'inverse', 'lambda', 'laplacian', 'lcm', 'leq', 'limit',
264         'list', 'ln', 'log', 'logbase', 'lowlimit', 'lt', 'maction', 'maligngroup',
265         'malignmark', 'math', 'matrix', 'matrixrow', 'max', 'mean', 'median',
266         'menclose', 'merror', 'mfenced', 'mfrac', 'mglyph', 'mi', 'mi', 'min',
267         'minus', 'mlabeledtr', 'mlongdiv', 'mmultiscripts', 'mn', 'mo', 'mode',
268         'moment', 'momentabout', 'mover', 'mpadded', 'mphantom', 'mprescripts',
269         'mroot', 'mrow', 'ms', 'mscarries', 'mscarry', 'msgroup', 'msline',
270         'mspace', 'msqrt', 'msrow', 'mstack', 'mstyle', 'msub', 'msubsup', 'msup',
271         'mtable', 'mtd', 'mtext', 'mtr', 'munder', 'munderover', 'naturalnumbers',
272         'neq', 'none', 'not', 'notanumber', 'notin', 'notprsubset', 'notsubset',
273         'or', 'otherwise', 'outerproduct', 'partialdiff', 'pi', 'piece',
274         'piecewise', 'plus', 'power', 'primes', 'product', 'prsubset', 'quotient',
275         'rationals', 'real', 'reals', 'reln', 'rem', 'root', 'scalarproduct',
276         'sdev', 'sec', 'sech', 'selector', 'semantics', 'sep', 'set', 'setdiff',
277         'share', 'sin', 'sinh', 'span', 'subset', 'sum', 'tan', 'tanh', 'tendsto',
278         'times', 'transpose', 'true', 'union', 'uplimit', 'variance', 'vector',
279         'vectorproduct', 'xor'
280 ]
281 # foreign_elements = [svg_elements..., mathml_elements...]
282 #normal_elements = All other allowed HTML elements are normal elements.
283
284 special_elements = {
285         # HTML:
286         address:NS_HTML, applet:NS_HTML, area:NS_HTML, article:NS_HTML,
287         aside:NS_HTML, base:NS_HTML, basefont:NS_HTML, bgsound:NS_HTML,
288         blockquote:NS_HTML, body:NS_HTML, br:NS_HTML, button:NS_HTML,
289         caption:NS_HTML, center:NS_HTML, col:NS_HTML, colgroup:NS_HTML, dd:NS_HTML,
290         details:NS_HTML, dir:NS_HTML, div:NS_HTML, dl:NS_HTML, dt:NS_HTML,
291         embed:NS_HTML, fieldset:NS_HTML, figcaption:NS_HTML, figure:NS_HTML,
292         footer:NS_HTML, form:NS_HTML, frame:NS_HTML, frameset:NS_HTML, h1:NS_HTML,
293         h2:NS_HTML, h3:NS_HTML, h4:NS_HTML, h5:NS_HTML, h6:NS_HTML, head:NS_HTML,
294         header:NS_HTML, hgroup:NS_HTML, hr:NS_HTML, html:NS_HTML, iframe:NS_HTML,
295         img:NS_HTML, input:NS_HTML, isindex:NS_HTML, li:NS_HTML, link:NS_HTML,
296         listing:NS_HTML, main:NS_HTML, marquee:NS_HTML, meta:NS_HTML, nav:NS_HTML,
297         noembed:NS_HTML, noframes:NS_HTML, noscript:NS_HTML, object:NS_HTML,
298         ol:NS_HTML, p:NS_HTML, param:NS_HTML, plaintext:NS_HTML, pre:NS_HTML,
299         script:NS_HTML, section:NS_HTML, select:NS_HTML, source:NS_HTML,
300         style:NS_HTML, summary:NS_HTML, table:NS_HTML, tbody:NS_HTML, td:NS_HTML,
301         template:NS_HTML, textarea:NS_HTML, tfoot:NS_HTML, th:NS_HTML,
302         thead:NS_HTML, title:NS_HTML, tr:NS_HTML, track:NS_HTML, ul:NS_HTML,
303         wbr:NS_HTML, xmp:NS_HTML,
304
305         # MathML:
306         mi:NS_MATHML, mo:NS_MATHML, mn:NS_MATHML, ms:NS_MATHML, mtext:NS_MATHML,
307         'annotation-xml':NS_MATHML,
308
309         # SVG:
310         foreignObject:NS_SVG, desc:NS_SVG, title:NS_SVG
311 }
312
313 formatting_elements = {
314          a: true, b: true, big: true, code: true, em: true, font: true, i: true,
315          nobr: true, s: true, small: true, strike: true, strong: true, tt: true,
316          u: true
317 }
318
319 h_tags = {
320         h1:NS_HTML, h2:NS_HTML, h3:NS_HTML, h4:NS_HTML, h5:NS_HTML, h6:NS_HTML
321 }
322
323 foster_parenting_targets = {
324         table: true
325         tbody: true
326         tfoot: true
327         thead: true
328         tr: true
329 }
330
331 # all html I presume
332 end_tag_implied = {
333         dd: true
334         dt: true
335         li: true
336         option: true
337         optgroup: true
338         p: true
339         rb: true
340         rp: true
341         rt: true
342         rtc: true
343 }
344
345 el_is_special = (e) ->
346         return special_elements[e.name] is e.namespace
347
348 adp_els = { address: NS_HTML, div: NS_HTML, p: NS_HTML }
349 el_is_special_not_adp = (el) ->
350         return special_elements[el.name] is el.namespace and adp_els[el.name] isnt el.namespace
351
352 # decode_named_char_ref()
353 #
354 # The list of named character references is _huge_ so ask the browser to decode
355 # for us instead of wasting bandwidth/space on including the table here.
356 #
357 # Pass without the "&" but with the ";" examples:
358 #    for "&amp" pass "amp;"
359 #    for "&#x2032" pass "x2032;"
360 g_dncr = {
361         cache: {}
362         textarea: document.createElement('textarea')
363 }
364 # TODO test this in IE8
365 decode_named_char_ref = (txt) ->
366         txt = "&#{txt}"
367         decoded = g_dncr.cache[txt]
368         return decoded if decoded?
369         g_dncr.textarea.innerHTML = txt
370         decoded = g_dncr.textarea.value
371         return null if decoded is txt
372         return g_dncr.cache[txt] = decoded
373
374 parse_html = (txt, parse_error_cb = null) ->
375         cur = 0 # index of next char in txt to be parsed
376         # declare doc and tokenizer variables so they're in scope below
377         doc = null
378         open_els = null # stack of open elements
379         afe = null # active formatting elements
380         template_ins_modes = null
381         ins_mode = null
382         original_ins_mode = null
383         tok_state = null
384         tok_cur_tag = null # partially parsed tag
385         flag_scripting = null
386         flag_frameset_ok = null
387         flag_parsing = null
388         flag_foster_parenting = null
389         form_element_pointer = null
390         temporary_buffer = null
391         pending_table_character_tokens = null
392         head_element_pointer = null
393         flag_fragment_parsing = null
394         context_element = null
395
396         stop_parsing = ->
397                 flag_parsing = false
398
399         parse_error = ->
400                 if parse_error_cb?
401                         parse_error_cb cur
402                 else
403                         console.log "Parse error at character #{cur} of #{txt.length}"
404
405         afe_push = (new_el) ->
406                 matches = 0
407                 for el, i in afe
408                         if el.name is new_el.name and el.namespace is new_el.namespace
409                                 for k, v of el.attrs
410                                         continue unless new_el.attrs[k] is v
411                                 for k, v of new_el.attrs
412                                         continue unless el.attrs[k] is v
413                                 matches += 1
414                                 if matches is 3
415                                         afe.splice i, 1
416                                         break
417                 afe.unshift new_el
418         afe_push_marker = ->
419                 afe.unshift new_afe_marker()
420
421         # the functions below impliment the Tree Contstruction algorithm
422         # http://www.w3.org/TR/html5/syntax.html#tree-construction
423
424         # But first... the helpers
425         template_tag_is_open = ->
426                 for t in open_els
427                         if t.name is 'template' # maybe should also check: and t.namespace is 'html'
428                                 return true
429                 return false
430         is_in_scope_x = (tag_name, scope, namespace) ->
431                 for t in open_els
432                         if t.name is tag_name and (namespace is null or namespace is t.namespace)
433                                 return true
434                         if scope[t.name] is t.namespace
435                                 return false
436                 return false
437         is_in_scope_x_y = (tag_name, scope, scope2, namespace) ->
438                 for t in open_els
439                         if t.name is tag_name and (namespace is null or namespace is t.namespace)
440                                 return true
441                         if scope[t.name] is t.namespace
442                                 return false
443                         if scope2[t.name] is t.namespace
444                                 return false
445                 return false
446         standard_scopers = {
447                 applet: NS_HTML, caption: NS_HTML, html: NS_HTML, table: NS_HTML,
448                 td: NS_HTML, th: NS_HTML, marquee: NS_HTML, object: NS_HTML,
449                 template: NS_HTML, mi: NS_MATHML,
450
451                 mo: NS_MATHML, mn: NS_MATHML, ms: NS_MATHML, mtext: NS_MATHML,
452                 'annotation-xml': NS_MATHML,
453
454                 foreignObject: NS_SVG, desc: NS_SVG, title: NS_SVG
455         }
456         button_scopers = button: NS_HTML
457         li_scopers = ol: NS_HTML, ul: NS_HTML
458         table_scopers = html: NS_HTML, table: NS_HTML, template: NS_HTML
459         is_in_scope = (tag_name, namespace = null) ->
460                 return is_in_scope_x tag_name, standard_scopers, namespace
461         is_in_button_scope = (tag_name, namespace = null) ->
462                 return is_in_scope_x_y tag_name, standard_scopers, button_scopers, namespace
463         is_in_table_scope = (tag_name, namespace = null) ->
464                 return is_in_scope_x tag_name, table_scopers, namespace
465         # aka is_in_list_item_scope
466         is_in_li_scope = (tag_name, namespace = null) ->
467                 return is_in_scope_x_y tag_name, standard_scopers, li_scopers, namespace
468         is_in_select_scope = (tag_name, namespace = null) ->
469                 for t in open_els
470                         if t.name is tag_name and (namespace is null or namespace is t.namespace)
471                                 return true
472                         if t.ns isnt NS_HTML and t.name isnt 'optgroup' and t.name isnt 'option'
473                                 return false
474                 return false
475         # this checks for a particular element, not by name
476         el_is_in_scope = (el) ->
477                 for t in open_els
478                         if t is el
479                                 return true
480                         if standard_scopers[t.name] is t.namespace
481                                 return false
482                 return false
483
484         clear_to_table_stopers = {
485                 'table': true
486                 'template': true
487                 'html': true
488         }
489         clear_stack_to_table_context = ->
490                 loop
491                         if clear_to_table_stopers[open_els[0].name]?
492                                 break
493                         open_els.shift()
494                 return
495         clear_to_table_body_stopers = {
496                 'tbody': true
497                 'tfoot': true
498                 'thead': true
499                 'template': true
500                 'html': true
501         }
502         clear_stack_to_table_body_context = ->
503                 loop
504                         if clear_to_table_body_stopers[open_els[0].name]?
505                                 break
506                         open_els.shift()
507                 return
508         clear_to_table_row_stopers = {
509                 'tr': true
510                 'template': true
511                 'html': true
512         }
513         clear_stack_to_table_row_context = ->
514                 loop
515                         if clear_to_table_row_stopers[open_els[0].name]?
516                                 break
517                         open_els.shift()
518                 return
519         clear_afe_to_marker = ->
520                 loop
521                         return unless afe.length > 0 # this happens in fragment case, ?spec error
522                         el = afe.shift()
523                         if el.type is TYPE_AFE_MARKER
524                                 return
525                 return
526
527         # 8.2.3.1 ...
528         # http://www.w3.org/TR/html5/syntax.html#reset-the-insertion-mode-appropriately
529         reset_ins_mode = ->
530                 # 1. Let last be false.
531                 last = false
532                 # 2. Let node be the last node in the stack of open elements.
533                 node_i = 0
534                 node = open_els[node_i]
535                 # 3. Loop: If node is the first node in the stack of open elements,
536                 # then set last to true, and, if the parser was originally created as
537                 # part of the HTML fragment parsing algorithm (fragment case) set node
538                 # to the context element.
539                 loop
540                         if node_i is open_els.length - 1
541                                 last = true
542                                 # fixfull (fragment case)
543
544                         # 4. If node is a select element, run these substeps:
545                         if node.name is 'select'
546                                 # 1. If last is true, jump to the step below labeled done.
547                                 unless last
548                                         # 2. Let ancestor be node.
549                                         ancestor_i = node_i
550                                         ancestor = node
551                                         # 3. Loop: If ancestor is the first node in the stack of
552                                         # open elements, jump to the step below labeled done.
553                                         loop
554                                                 if ancestor_i is open_els.length - 1
555                                                         break
556                                                 # 4. Let ancestor be the node before ancestor in the stack
557                                                 # of open elements.
558                                                 ancestor_i += 1
559                                                 ancestor = open_els[ancestor_i]
560                                                 # 5. If ancestor is a template node, jump to the step below
561                                                 # labeled done.
562                                                 if ancestor.name is 'template'
563                                                         break
564                                                 # 6. If ancestor is a table node, switch the insertion mode
565                                                 # to "in select in table" and abort these steps.
566                                                 if ancestor.name is 'table'
567                                                         ins_mode = ins_mode_in_select_in_table
568                                                         return
569                                                 # 7. Jump back to the step labeled loop.
570                                 # 8. Done: Switch the insertion mode to "in select" and abort
571                                 # these steps.
572                                 ins_mode = ins_mode_in_select
573                                 return
574                         # 5. If node is a td or th element and last is false, then switch
575                         # the insertion mode to "in cell" and abort these steps.
576                         if (node.name is 'td' or node.name is 'th') and last is false
577                                 ins_mode = ins_mode_in_cell
578                                 return
579                         # 6. If node is a tr element, then switch the insertion mode to "in
580                         # row" and abort these steps.
581                         if node.name is 'tr'
582                                 ins_mode = ins_mode_in_row
583                                 return
584                         # 7. If node is a tbody, thead, or tfoot element, then switch the
585                         # insertion mode to "in table body" and abort these steps.
586                         if node.name is 'tbody' or node.name is 'thead' or node.name is 'tfoot'
587                                 ins_mode = ins_mode_in_table_body
588                                 return
589                         # 8. If node is a caption element, then switch the insertion mode
590                         # to "in caption" and abort these steps.
591                         if node.name is 'caption'
592                                 ins_mode = ins_mode_in_caption
593                                 return
594                         # 9. If node is a colgroup element, then switch the insertion mode
595                         # to "in column group" and abort these steps.
596                         if node.name is 'colgroup'
597                                 ins_mode = ins_mode_in_column_group
598                                 return
599                         # 10. If node is a table element, then switch the insertion mode to
600                         # "in table" and abort these steps.
601                         if node.name is 'table'
602                                 ins_mode = ins_mode_in_table
603                                 return
604                         # 11. If node is a template element, then switch the insertion mode
605                         # to the current template insertion mode and abort these steps.
606                         # fixfull (template insertion mode stack)
607
608                         # 12. If node is a head element and last is true, then switch the
609                         # insertion mode to "in body" ("in body"! not "in head"!) and abort
610                         # these steps. (fragment case)
611                         if node.name is 'head' and last
612                                 ins_mode = ins_mode_in_body
613                                 return
614                         # 13. If node is a head element and last is false, then switch the
615                         # insertion mode to "in head" and abort these steps.
616                         if node.name is 'head' and last is false
617                                 ins_mode = ins_mode_in_head
618                                 return
619                         # 14. If node is a body element, then switch the insertion mode to
620                         # "in body" and abort these steps.
621                         if node.name is 'body'
622                                 ins_mode = ins_mode_in_body
623                                 return
624                         # 15. If node is a frameset element, then switch the insertion mode
625                         # to "in frameset" and abort these steps. (fragment case)
626                         if node.name is 'frameset'
627                                 ins_mode = ins_mode_in_frameset
628                                 return
629                         # 16. If node is an html element, run these substeps:
630                         if node.name is 'html'
631                                 # 1. If the head element pointer is null, switch the insertion
632                                 # mode to "before head" and abort these steps. (fragment case)
633                                 if head_element_pointer is null
634                                         ins_mode = ins_mode_before_head
635                                 else
636                                         # 2. Otherwise, the head element pointer is not null,
637                                         # switch the insertion mode to "after head" and abort these
638                                         # steps.
639                                         ins_mode = ins_mode_after_head
640                                 return
641                         # 17. If last is true, then switch the insertion mode to "in body"
642                         # and abort these steps. (fragment case)
643                         if last
644                                 ins_mode = ins_mode_in_body
645                                 return
646                         # 18. Let node now be the node before node in the stack of open
647                         # elements.
648                         node_i += 1
649                         node = open_els[node_i]
650                         # 19. Return to the step labeled loop.
651
652         # 8.2.3.2
653
654         # http://www.w3.org/TR/html5/syntax.html#adjusted-current-node
655         adjusted_current_node = ->
656                 if open_els.length is 1 and flag_fragment_parsing
657                         return context_element
658                 return open_els[0]
659
660         # http://www.w3.org/TR/html5/syntax.html#reconstruct-the-active-formatting-elements
661         # this implementation is structured (mostly) as described at the link above.
662         # capitalized comments are the "labels" described at the link above.
663         reconstruct_afe = ->
664                 return if afe.length is 0
665                 if afe[0].type is TYPE_AFE_MARKER or afe[0] in open_els
666                         return
667                 # Rewind
668                 i = 0
669                 loop
670                         if i is afe.length - 1
671                                 break
672                         i += 1
673                         if afe[i].type is TYPE_AFE_MARKER or afe[i] in open_els
674                                 i -= 1 # Advance
675                                 break
676                 # Create
677                 loop
678                         el = insert_html_element afe[i].token
679                         afe[i] = el
680                         break if i is 0
681                         i -= 1 # Advance
682
683         # http://www.w3.org/TR/html5/syntax.html#adoption-agency-algorithm
684         # adoption agency algorithm
685         # overview here:
686         #   http://www.w3.org/TR/html5/syntax.html#misnested-tags:-b-i-/b-/i
687         #   http://www.w3.org/TR/html5/syntax.html#misnested-tags:-b-p-/b-/p
688         #   http://www.w3.org/TR/html5/syntax.html#unclosed-formatting-elements
689         adoption_agency = (subject) ->
690                 debug_log "adoption_agency()"
691                 debug_log "tree: #{serialize_els doc.children, false, true}"
692                 debug_log "open_els: #{serialize_els open_els, true, true}"
693                 debug_log "afe: #{serialize_els afe, true, true}"
694                 if open_els[0].name is subject
695                         el = open_els[0]
696                         open_els.shift()
697                         # remove it from the list of active formatting elements (if found)
698                         for t, i in afe
699                                 if t is el
700                                         afe.splice i, 1
701                                         break
702                         debug_log "aaa: starting off with subject on top of stack, exiting"
703                         return
704                 outer = 0
705                 loop
706                         if outer >= 8
707                                 return
708                         outer += 1
709                         # 5. Let formatting element be the last element in the list of
710                         # active formatting elements that: is between the end of the list
711                         # and the last scope marker in the list, if any, or the start of
712                         # the list otherwise, and  has the tag name subject.
713                         fe = null
714                         for t, fe_of_afe in afe
715                                 if t.type is TYPE_AFE_MARKER
716                                         break
717                                 if t.name is subject
718                                         fe = t
719                                         break
720                         # If there is no such element, then abort these steps and instead
721                         # act as described in the "any other end tag" entry above.
722                         if fe is null
723                                 debug_log "aaa: fe not found in afe"
724                                 in_body_any_other_end_tag subject
725                                 return
726                         # 6. If formatting element is not in the stack of open elements,
727                         # then this is a parse error; remove the element from the list, and
728                         # abort these steps.
729                         in_open_els = false
730                         for t, fe_of_open_els in open_els
731                                 if t is fe
732                                         in_open_els = true
733                                         break
734                         unless in_open_els
735                                 debug_log "aaa: fe not found in open_els"
736                                 parse_error()
737                                 # "remove it from the list" must mean afe, since it's not in open_els
738                                 afe.splice fe_of_afe, 1
739                                 return
740                         # 7. If formatting element is in the stack of open elements, but
741                         # the element is not in scope, then this is a parse error; abort
742                         # these steps.
743                         unless el_is_in_scope fe
744                                 debug_log "aaa: fe not in scope"
745                                 parse_error()
746                                 return
747                         # 8. If formatting element is not the current node, this is a parse
748                         # error. (But do not abort these steps.)
749                         unless open_els[0] is fe
750                                 parse_error()
751                                 # continue
752                         # 9. Let furthest block be the topmost node in the stack of open
753                         # elements that is lower in the stack than formatting element, and
754                         # is an element in the special category. There might not be one.
755                         fb = null
756                         fb_of_open_els = null
757                         for t, i in open_els
758                                 if t is fe
759                                         break
760                                 if el_is_special t
761                                         fb = t
762                                         fb_of_open_els = i
763                                         # and continue, to see if there's one that's more "topmost"
764                         # 10. If there is no furthest block, then the UA must first pop all
765                         # the nodes from the bottom of the stack of open elements, from the
766                         # current node up to and including formatting element, then remove
767                         # formatting element from the list of active formatting elements,
768                         # and finally abort these steps.
769                         if fb is null
770                                 debug_log "aaa: no fb"
771                                 loop
772                                         t = open_els.shift()
773                                         if t is fe
774                                                 afe.splice fe_of_afe, 1
775                                                 return
776                         # 11. Let common ancestor be the element immediately above
777                         # formatting element in the stack of open elements.
778                         ca = open_els[fe_of_open_els + 1] # common ancestor
779
780                         node_above = open_els[fb_of_open_els + 1] # next node if node isn't in open_els anymore
781                         # 12. Let a bookmark note the position of formatting element in the list of active formatting elements relative to the elements on either side of it in the list.
782                         bookmark = new_aaa_bookmark()
783                         for t, i in afe
784                                 if t is fe
785                                         afe.splice i, 0, bookmark
786                                         break
787                         node = last_node = fb
788                         inner = 0
789                         loop
790                                 inner += 1
791                                 # 3. Let node be the element immediately above node in the
792                                 # stack of open elements, or if node is no longer in the stack
793                                 # of open elements (e.g. because it got removed by this
794                                 # algorithm), the element that was immediately above node in
795                                 # the stack of open elements before node was removed.
796                                 node_next = null
797                                 for t, i in open_els
798                                         if t is node
799                                                 node_next = open_els[i + 1]
800                                                 break
801                                 node = node_next ? node_above
802                                 debug_log "inner loop #{inner}"
803                                 debug_log "tree: #{serialize_els doc.children, false, true}"
804                                 debug_log "open_els: #{serialize_els open_els, true, true}"
805                                 debug_log "afe: #{serialize_els afe, true, true}"
806                                 debug_log "ca: #{ca.name}##{ca.id} children: #{serialize_els ca.children, true, true}"
807                                 debug_log "fe: #{fe.name}##{fe.id} children: #{serialize_els fe.children, true, true}"
808                                 debug_log "fb: #{fb.name}##{fb.id} children: #{serialize_els fb.children, true, true}"
809                                 debug_log "node: #{node.serialize true, true}"
810                                 # TODO make sure node_above gets re-set if/when node is removed from open_els
811
812                                 # 4. If node is formatting element, then go to the next step in
813                                 # the overall algorithm.
814                                 if node is fe
815                                         break
816                                 debug_log "the meat"
817                                 # 5. If inner loop counter is greater than three and node is in
818                                 # the list of active formatting elements, then remove node from
819                                 # the list of active formatting elements.
820                                 node_in_afe = false
821                                 for t, i in afe
822                                         if t is node
823                                                 if inner > 3
824                                                         afe.splice i, 1
825                                                         debug_log "max out inner"
826                                                 else
827                                                         node_in_afe = true
828                                                         debug_log "in afe"
829                                                 break
830                                 # 6. If node is not in the list of active formatting elements,
831                                 # then remove node from the stack of open elements and then go
832                                 # back to the step labeled inner loop.
833                                 unless node_in_afe
834                                         debug_log "not in afe"
835                                         for t, i in open_els
836                                                 if t is node
837                                                         node_above = open_els[i + 1]
838                                                         open_els.splice i, 1
839                                                         break
840                                         continue
841                                 debug_log "the bones"
842                                 # 7. create an element for the token for which the element node
843                                 # was created, in the HTML namespace, with common ancestor as
844                                 # the intended parent; replace the entry for node in the list
845                                 # of active formatting elements with an entry for the new
846                                 # element, replace the entry for node in the stack of open
847                                 # elements with an entry for the new element, and let node be
848                                 # the new element.
849                                 new_node = token_to_element node.token, NS_HTML, ca
850                                 for t, i in afe
851                                         if t is node
852                                                 afe[i] = new_node
853                                                 debug_log "replaced in afe"
854                                                 break
855                                 for t, i in open_els
856                                         if t is node
857                                                 node_above = open_els[i + 1]
858                                                 open_els[i] = new_node
859                                                 debug_log "replaced in open_els"
860                                                 break
861                                 node = new_node
862                                 # 8. If last node is furthest block, then move the
863                                 # aforementioned bookmark to be immediately after the new node
864                                 # in the list of active formatting elements.
865                                 if last_node is fb
866                                         for t, i in afe
867                                                 if t is bookmark
868                                                         afe.splice i, 1
869                                                         debug_log "removed bookmark"
870                                                         break
871                                         for t, i in afe
872                                                 if t is node
873                                                         # "after" means lower
874                                                         afe.splice i, 0, bookmark # "after as <-
875                                                         debug_log "placed bookmark after node"
876                                                         debug_log "node: #{node.id} afe: #{serialize_els afe, true, true}"
877                                                         break
878                                 # 9. Insert last node into node, first removing it from its
879                                 # previous parent node if any.
880                                 if last_node.parent?
881                                         debug_log "last_node has parent"
882                                         for c, i in last_node.parent.children
883                                                 if c is last_node
884                                                         debug_log "removing last_node from parent"
885                                                         last_node.parent.children.splice i, 1
886                                                         break
887                                 node.children.push last_node
888                                 last_node.parent = node
889                                 # 10. Let last node be node.
890                                 last_node = node
891                                 debug_log "at last"
892                                 # 11. Return to the step labeled inner loop.
893                         # 14. Insert whatever last node ended up being in the previous step
894                         # at the appropriate place for inserting a node, but using common
895                         # ancestor as the override target.
896
897                         # In the case where fe is immediately followed by fb:
898                         #   * inner loop exits out early (node==fe)
899                         #   * last_node is fb
900                         #   * last_node is still in the tree (not a duplicate)
901                         if last_node.parent?
902                                 debug_log "FEFIRST? last_node has parent"
903                                 for c, i in last_node.parent.children
904                                         if c is last_node
905                                                 debug_log "removing last_node from parent"
906                                                 last_node.parent.children.splice i, 1
907                                                 break
908
909                         debug_log "after aaa inner loop"
910                         debug_log "ca: #{ca.name}##{ca.id} children: #{serialize_els ca.children, true, true}"
911                         debug_log "fe: #{fe.name}##{fe.id} children: #{serialize_els fe.children, true, true}"
912                         debug_log "fb: #{fb.name}##{fb.id} children: #{serialize_els fb.children, true, true}"
913                         debug_log "last_node: #{last_node.name}##{last_node.id} children: #{serialize_els last_node.children, true, true}"
914                         debug_log "tree: #{serialize_els doc.children, false, true}"
915
916                         debug_log "insert"
917
918
919                         # can't use standard insert token thing, because it's already in
920                         # open_els and must stay at it's current position in open_els
921                         dest = adjusted_insertion_location ca
922                         dest[0].children.splice dest[1], 0, last_node
923                         last_node.parent = dest[0]
924
925
926                         debug_log "ca: #{ca.name}##{ca.id} children: #{serialize_els ca.children, true, true}"
927                         debug_log "fe: #{fe.name}##{fe.id} children: #{serialize_els fe.children, true, true}"
928                         debug_log "fb: #{fb.name}##{fb.id} children: #{serialize_els fb.children, true, true}"
929                         debug_log "last_node: #{last_node.name}##{last_node.id} children: #{serialize_els last_node.children, true, true}"
930                         debug_log "tree: #{serialize_els doc.children, false, true}"
931
932                         # 15. Create an element for the token for which formatting element
933                         # was created, in the HTML namespace, with furthest block as the
934                         # intended parent.
935                         new_element = token_to_element fe.token, NS_HTML, fb
936                         # 16. Take all of the child nodes of furthest block and append them
937                         # to the element created in the last step.
938                         while fb.children.length
939                                 t = fb.children.shift()
940                                 t.parent = new_element
941                                 new_element.children.push t
942                         # 17. Append that new element to furthest block.
943                         new_element.parent = fb
944                         fb.children.push new_element
945                         # 18. Remove formatting element from the list of active formatting
946                         # elements, and insert the new element into the list of active
947                         # formatting elements at the position of the aforementioned
948                         # bookmark.
949                         for t, i in afe
950                                 if t is fe
951                                         afe.splice i, 1
952                                         break
953                         for t, i in afe
954                                 if t is bookmark
955                                         afe[i] = new_element
956                                         break
957                         # 19. Remove formatting element from the stack of open elements,
958                         # and insert the new element into the stack of open elements
959                         # immediately below the position of furthest block in that stack.
960                         for t, i in open_els
961                                 if t is fe
962                                         open_els.splice i, 1
963                                         break
964                         for t, i in open_els
965                                 if t is fb
966                                         open_els.splice i, 0, new_element
967                                         break
968                         # 20. Jump back to the step labeled outer loop.
969                         debug_log "done wrapping fb's children. new_element: #{new_element.name}##{new_element.id}"
970                         debug_log "tree: #{serialize_els doc.children, false, true}"
971                         debug_log "open_els: #{serialize_els open_els, true, true}"
972                         debug_log "afe: #{serialize_els afe, true, true}"
973                 debug_log "AAA DONE"
974
975         # http://www.w3.org/TR/html5/syntax.html#close-a-p-element
976         close_p_element = ->
977                 generate_implied_end_tags 'p' # arg is exception
978                 if open_els[0].name isnt 'p'
979                         parse_error()
980                 while open_els.length > 1 # just in case
981                         el = open_els.shift()
982                         if el.name is 'p'
983                                 return
984         close_p_if_in_button_scope = ->
985                 if is_in_button_scope 'p'
986                         close_p_element()
987
988         # http://www.w3.org/TR/html5/syntax.html#insert-a-character
989         # aka insert_a_character = (t) ->
990         insert_character = (t) ->
991                 dest = adjusted_insertion_location()
992                 # fixfull check for Document node
993                 if dest[1] > 0
994                         prev = dest[0].children[dest[1] - 1]
995                         if prev.type is TYPE_TEXT
996                                 prev.text += t.text
997                                 return
998                 dest[0].children.splice dest[1], 0, t
999
1000         # 8.2.5.1
1001         # http://www.w3.org/TR/html5/syntax.html#creating-and-inserting-nodes
1002         # http://www.w3.org/TR/html5/syntax.html#appropriate-place-for-inserting-a-node
1003         adjusted_insertion_location = (override_target = null) ->
1004                 # 1. If there was an override target specified, then let target be the
1005                 # override target.
1006                 if override_target?
1007                         target = override_target
1008                 else # Otherwise, let target be the current node.
1009                         target = open_els[0]
1010                 # 2. Determine the adjusted insertion location using the first matching
1011                 # steps from the following list:
1012                 #
1013                 # If foster parenting is enabled and target is a table, tbody, tfoot,
1014                 # thead, or tr element Foster parenting happens when content is
1015                 # misnested in tables.
1016                 if flag_foster_parenting and foster_parenting_targets[target.name]
1017                         loop # once. this is here so we can ``break`` to "abort these substeps"
1018                                 # 1. Let last template be the last template element in the
1019                                 # stack of open elements, if any.
1020                                 last_template = null
1021                                 last_template_i = null
1022                                 for el, i in open_els
1023                                         if el.name is 'template'
1024                                                 last_template = el
1025                                                 last_template_i = i
1026                                                 break
1027                                 # 2. Let last table be the last table element in the stack of
1028                                 # open elements, if any.
1029                                 last_table = null
1030                                 last_table_i
1031                                 for el, i in open_els
1032                                         if el.name is 'table'
1033                                                 last_table = el
1034                                                 last_table_i = i
1035                                                 break
1036                                 # 3. If there is a last template and either there is no last
1037                                 # table, or there is one, but last template is lower (more
1038                                 # recently added) than last table in the stack of open
1039                                 # elements, then: let adjusted insertion location be inside
1040                                 # last template's template contents, after its last child (if
1041                                 # any), and abort these substeps.
1042                                 if last_template and (last_table is null or last_template_i < last_table_i)
1043                                         target = last_template # fixfull should be it's contents
1044                                         target_i = target.children.length
1045                                         break
1046                                 # 4. If there is no last table, then let adjusted insertion
1047                                 # location be inside the first element in the stack of open
1048                                 # elements (the html element), after its last child (if any),
1049                                 # and abort these substeps. (fragment case)
1050                                 if last_table is null
1051                                         # this is odd
1052                                         target = open_els[open_els.length - 1]
1053                                         target_i = target.children.length
1054                                 # 5. If last table has a parent element, then let adjusted
1055                                 # insertion location be inside last table's parent element,
1056                                 # immediately before last table, and abort these substeps.
1057                                 if last_table.parent?
1058                                         for c, i in last_table.parent.children
1059                                                 if c is last_table
1060                                                         target = last_table.parent
1061                                                         target_i = i
1062                                                         break
1063                                         break
1064                                 # 6. Let previous element be the element immediately above last
1065                                 # table in the stack of open elements.
1066                                 #
1067                                 # huh? how could it not have a parent?
1068                                 previous_element = open_els[last_table_i + 1]
1069                                 # 7. Let adjusted insertion location be inside previous
1070                                 # element, after its last child (if any).
1071                                 target = previous_element
1072                                 target_i = target.children.length
1073                                 # Note: These steps are involved in part because it's possible
1074                                 # for elements, the table element in this case in particular,
1075                                 # to have been moved by a script around in the DOM, or indeed
1076                                 # removed from the DOM entirely, after the element was inserted
1077                                 # by the parser.
1078                                 break # don't really loop
1079                 else
1080                         # Otherwise Let adjusted insertion location be inside target, after
1081                         # its last child (if any).
1082                         target_i = target.children.length
1083
1084                 # 3. If the adjusted insertion location is inside a template element,
1085                 # let it instead be inside the template element's template contents,
1086                 # after its last child (if any).
1087                 # fixfull (template)
1088
1089                 # 4. Return the adjusted insertion location.
1090                 return [target, target_i]
1091
1092         # http://www.w3.org/TR/html5/syntax.html#create-an-element-for-the-token
1093         # aka create_an_element_for_token
1094         token_to_element = (t, namespace, intended_parent) ->
1095                 # convert attributes into a hash
1096                 attrs = {}
1097                 for a in t.attrs_a
1098                         attrs[a[0]] = a[1] # TODO check what to do with dupilcate attrs
1099                 el = new Node TYPE_TAG, name: t.name, namespace: namespace, attrs: attrs, token: t
1100
1101                 # TODO 2. If the newly created element has an xmlns attribute in the
1102                 # XMLNS namespace whose value is not exactly the same as the element's
1103                 # namespace, that is a parse error. Similarly, if the newly created
1104                 # element has an xmlns:xlink attribute in the XMLNS namespace whose
1105                 # value is not the XLink Namespace, that is a parse error.
1106
1107                 # fixfull: the spec says stuff about form pointers and ownerDocument
1108
1109                 return el
1110
1111         # http://www.w3.org/TR/html5/syntax.html#insert-a-foreign-element
1112         insert_foreign_element = (token, namespace) ->
1113                 ail = adjusted_insertion_location()
1114                 ail_el = ail[0]
1115                 ail_i = ail[1]
1116                 el = token_to_element token, namespace, ail_el
1117                 # TODO skip this next step if it's broken (eg ail_el is document with child already)
1118                 el.parent = ail_el
1119                 ail_el.children.splice ail_i, 0, el
1120                 open_els.unshift el
1121                 return el
1122         # http://www.w3.org/TR/html5/syntax.html#insert-an-html-element
1123         insert_html_element = insert_foreign_element # (token, namespace) ->
1124
1125         # http://www.w3.org/TR/html5/syntax.html#insert-a-comment
1126         # position should be [node, index_within_children]
1127         insert_comment = (t, position = null) ->
1128                 position ?= adjusted_insertion_location()
1129                 position[0].children.splice position[1], 0, t
1130
1131         # 8.2.5.2
1132         # http://www.w3.org/TR/html5/syntax.html#generic-raw-text-element-parsing-algorithm
1133         parse_generic_raw_text = (t) ->
1134                 insert_html_element t
1135                 tok_state = tok_state_rawtext
1136                 original_ins_mode = ins_mode
1137                 ins_mode = ins_mode_text
1138         parse_generic_rcdata_text = (t) ->
1139                 insert_html_element t
1140                 tok_state = tok_state_rcdata
1141                 original_ins_mode = ins_mode
1142                 ins_mode = ins_mode_text
1143
1144         # 8.2.5.3 http://www.w3.org/TR/html5/syntax.html#closing-elements-that-have-implied-end-tags
1145         # http://www.w3.org/TR/html5/syntax.html#generate-implied-end-tags
1146         generate_implied_end_tags = (except = null) ->
1147                 while end_tag_implied[open_els[0].name] and open_els[0].name isnt except
1148                         open_els.shift()
1149
1150         # 8.2.5.4 The rules for parsing tokens in HTML content
1151         # http://www.w3.org/TR/html5/syntax.html#parsing-main-inhtml
1152
1153         # 8.2.5.4.1 The "initial" insertion mode
1154         # http://www.w3.org/TR/html5/syntax.html#the-initial-insertion-mode
1155         ins_mode_initial = (t) ->
1156                 if is_space_tok t
1157                         return
1158                 if t.type is TYPE_COMMENT
1159                         # ?fixfull
1160                         doc.children.push t
1161                         return
1162                 if t.type is TYPE_DOCTYPE
1163                         # FIXME check identifiers, set quirks, etc
1164                         # fixfull
1165                         doc.children.push t
1166                         ins_mode = ins_mode_before_html
1167                         return
1168                 # Anything else
1169                 #fixfull (iframe, quirks)
1170                 ins_mode = ins_mode_before_html
1171                 ins_mode t # reprocess the token
1172                 return
1173
1174         # 8.2.5.4.2 http://www.w3.org/TR/html5/syntax.html#the-before-html-insertion-mode
1175         ins_mode_before_html = (t) ->
1176                 if t.type is TYPE_DOCTYPE
1177                         parse_error()
1178                         return
1179                 if t.type is TYPE_COMMENT
1180                         doc.children.push t
1181                         return
1182                 if is_space_tok t
1183                         return
1184                 if t.type is TYPE_START_TAG and t.name is 'html'
1185                         el = token_to_element t, NS_HTML, doc
1186                         doc.children.push el
1187                         open_els.unshift(el)
1188                         # fixfull (big paragraph in spec about manifest, fragment, urls, etc)
1189                         ins_mode = ins_mode_before_head
1190                         return
1191                 if t.type is TYPE_END_TAG
1192                         if t.name is 'head' or t.name is 'body' or t.name is 'html' or t.name is 'br'
1193                                 # fall through to "anything else"
1194                         else
1195                                 parse_error()
1196                                 return
1197                 # Anything else
1198                 html_tok = new_open_tag 'html'
1199                 el = token_to_element html_tok, NS_HTML, doc
1200                 doc.children.push el
1201                 open_els.unshift el
1202                 # ?fixfull browsing context
1203                 ins_mode = ins_mode_before_head
1204                 ins_mode t
1205                 return
1206
1207         # 8.2.5.4.3 http://www.w3.org/TR/html5/syntax.html#the-before-head-insertion-mode
1208         ins_mode_before_head = (t) ->
1209                 if is_space_tok t
1210                         return
1211                 if t.type is TYPE_COMMENT
1212                         insert_comment t
1213                         return
1214                 if t.type is TYPE_DOCTYPE
1215                         parse_error()
1216                         return
1217                 if t.type is TYPE_START_TAG and t.name is 'html'
1218                         ins_mode_in_body t
1219                         return
1220                 if t.type is TYPE_START_TAG and t.name is 'head'
1221                         el = insert_html_element t
1222                         head_element_pointer = el
1223                         ins_mode = ins_mode_in_head
1224                 if t.type is TYPE_END_TAG
1225                         if t.name is 'head' or t.name is 'body' or t.name is 'html' or t.name is 'br'
1226                                 # fall through to Anything else below
1227                         else
1228                                 parse_error()
1229                                 return
1230                 # Anything else
1231                 head_tok = new_open_tag 'head'
1232                 el = insert_html_element head_tok
1233                 head_element_pointer = el
1234                 ins_mode = ins_mode_in_head
1235                 ins_mode t # reprocess current token
1236
1237         # 8.2.5.4.4 http://www.w3.org/TR/html5/syntax.html#parsing-main-inhead
1238         ins_mode_in_head_else = (t) -> # factored out for same-as-spec flow control
1239                 open_els.shift() # spec says this will be a 'head' node
1240                 ins_mode = ins_mode_after_head
1241                 ins_mode t
1242         ins_mode_in_head = (t) ->
1243                 if t.type is TYPE_TEXT and (t.text is "\t" or t.text is "\n" or t.text is "\u000c" or t.text is ' ')
1244                         insert_character t
1245                         return
1246                 if t.type is TYPE_COMMENT
1247                         insert_comment t
1248                         return
1249                 if t.type is TYPE_DOCTYPE
1250                         parse_error()
1251                         return
1252                 if t.type is TYPE_START_TAG and t.name is 'html'
1253                         ins_mode_in_body t
1254                         return
1255                 if t.type is TYPE_START_TAG and (t.name is 'base' or t.name is 'basefont' or t.name is 'bgsound' or t.name is 'link')
1256                         el = insert_html_element t
1257                         open_els.shift()
1258                         t.acknowledge_self_closing()
1259                         return
1260                 if t.type is TYPE_START_TAG and t.name is 'meta'
1261                         el = insert_html_element t
1262                         open_els.shift()
1263                         t.acknowledge_self_closing()
1264                         # fixfull encoding stuff
1265                         return
1266                 if t.type is TYPE_START_TAG and t.name is 'title'
1267                         parse_generic_rcdata_text t
1268                         return
1269                 if t.type is TYPE_START_TAG and ((t.name is 'noscript' and flag_scripting) or (t.name is 'noframes' or t.name is 'style'))
1270                         parse_generic_raw_text t
1271                         return
1272                 if t.type is TYPE_START_TAG and t.name is 'noscript' and flag_scripting is false
1273                         insert_html_element t
1274                         ins_mode = ins_mode_in_head_noscript
1275                         return
1276                 if t.type is TYPE_START_TAG and t.name is 'script'
1277                         ail = adjusted_insertion_location()
1278                         el = token_to_element t, NS_HTML, ail
1279                         el.flag 'parser-inserted', true
1280                         # fixfull frament case
1281                         ail[0].children.splice ail[1], 0, el
1282                         open_els.unshift el
1283                         tok_state = tok_state_script_data
1284                         original_ins_mode = ins_mode # make sure orig... is defined
1285                         ins_mode = ins_mode_text
1286                         return
1287                 if t.type is TYPE_END_TAG and t.name is 'head'
1288                         open_els.shift() # will be a head element... spec says so
1289                         ins_mode = ins_mode_after_head
1290                         return
1291                 if t.type is TYPE_END_TAG and (t.name is 'body' or t.name is 'html' or t.name is 'br')
1292                         ins_mode_in_head_else t
1293                         return
1294                 if t.type is TYPE_START_TAG and t.name is 'template'
1295                         insert_html_element t
1296                         afe_push_marker()
1297                         flag_frameset_ok = false
1298                         ins_mode = ins_mode_in_template
1299                         template_ins_modes.unshift ins_mode_in_template
1300                         return
1301                 if t.type is TYPE_END_TAG and t.name is 'template'
1302                         if template_tag_is_open()
1303                                 generate_implied_end_tags
1304                                 if open_els[0].name isnt 'template'
1305                                         parse_error()
1306                                 loop
1307                                         el = open_els.shift()
1308                                         if el.name is 'template'
1309                                                 break
1310                                 clear_afe_to_marker()
1311                                 template_ins_modes.shift()
1312                                 reset_ins_mode()
1313                         else
1314                                 parse_error()
1315                         return
1316                 if (t.type is TYPE_START_TAG and t.name is 'head') or t.type is TYPE_END_TAG
1317                         parse_error()
1318                         return
1319                 ins_mode_in_head_else t
1320
1321         # 8.2.5.4.5 http://www.w3.org/TR/html5/syntax.html#parsing-main-inheadnoscript
1322         ins_mode_in_head_noscript_else = (t) ->
1323                 parse_error()
1324                 open_els.shift()
1325                 ins_mode = ins_mode_in_head
1326                 ins_mode t
1327         ins_mode_in_head_noscript = (t) ->
1328                 if t.type is TYPE_DOCTYPE
1329                         parse_error()
1330                         return
1331                 if t.type is TYPE_START_TAG
1332                         ins_mode_in_body t
1333                         return
1334                 if t.type is TYPE_END_TAG and t.name is 'noscript'
1335                         open_els.shift()
1336                         ins_mode = ins_mode_in_head
1337                         return
1338                 if (t.type is TYPE_TEXT and (t.text is "\t" or t.text is "\u000a" or t.text is "\u000c" or t.text is "\u000d" or t.text is ' ')) or t.type is TYPE_COMMENT or (t.type is TYPE_START_TAG and (t.name is 'basefont' or t.name is 'bgsound' or t.name is 'link' or t.name is 'meta' or t.name is 'noframes' or t.name is 'style'))
1339                         ins_mode_in_head t
1340                         return
1341                 if t.type is TYPE_END_TAG and t.name is 'br'
1342                         ins_mode_in_head_noscript_else t
1343                         return
1344                 if (t.type is TYPE_START_TAG and (t.name is 'head' or t.name is 'noscript')) or t.type is TYPE_END_TAG
1345                         parse_error()
1346                         return
1347                 # Anything else
1348                 ins_mode_in_head_noscript_else t
1349                 return
1350
1351
1352
1353         # 8.2.5.4.6 http://www.w3.org/TR/html5/syntax.html#the-after-head-insertion-mode
1354         ins_mode_after_head_else = (t) ->
1355                 body_tok = new_open_tag 'body'
1356                 insert_html_element body_tok
1357                 ins_mode = ins_mode_in_body
1358                 ins_mode t # reprocess token
1359                 return
1360         ins_mode_after_head = (t) ->
1361                 if is_space_tok t
1362                         insert_character t
1363                         return
1364                 if t.type is TYPE_COMMENT
1365                         insert_comment t
1366                         return
1367                 if t.type is TYPE_DOCTYPE
1368                         parse_error()
1369                         return
1370                 if t.type is TYPE_START_TAG and t.name is 'html'
1371                         ins_mode_in_body t
1372                         return
1373                 if t.type is TYPE_START_TAG and t.name is 'body'
1374                         insert_html_element t
1375                         flag_frameset_ok = false
1376                         ins_mode = ins_mode_in_body
1377                         return
1378                 if t.type is TYPE_START_TAG and t.name is 'frameset'
1379                         insert_html_element t
1380                         ins_mode = ins_mode_in_frameset
1381                         return
1382                 if t.type is TYPE_START_TAG and (t.name is 'base' or t.name is 'basefont' or t.name is 'bgsound' or t.name is 'link' or t.name is 'meta' or t.name is 'noframes' or t.name is 'script' or t.name is 'style' or t.name is 'template' or t.name is 'title')
1383                         parse_error()
1384                         open_els.unshift head_element_pointer
1385                         ins_mode_in_head t
1386                         for el, i of open_els
1387                                 if el is head_element_pointer
1388                                         open_els.splice i, 1
1389                                         return
1390                         console.log "warning: 23904 couldn't find head element in open_els"
1391                         return
1392                 if t.type is TYPE_END_TAG and t.name is 'template'
1393                         ins_mode_in_head t
1394                         return
1395                 if t.type is TYPE_END_TAG and (t.name is 'body' or t.name is 'html' or t.name is 'br')
1396                         ins_mode_after_head_else t
1397                         return
1398                 if (t.type is TYPE_START_TAG and t.name is 'head') or t.type is TYPE_END_TAG
1399                         parse_error()
1400                         return
1401                 # Anything else
1402                 ins_mode_after_head_else t
1403
1404         # 8.2.5.4.7 http://www.w3.org/TR/html5/syntax.html#parsing-main-inbody
1405         in_body_any_other_end_tag = (name) -> # factored out because adoption agency calls it
1406                 for el, i in open_els
1407                         if el.namespace is NS_HTML and el.name is name
1408                                 generate_implied_end_tags name # arg is exception
1409                                 parse_error() unless i is 0
1410                                 while i >= 0
1411                                         open_els.shift()
1412                                         i -= 1
1413                                 return
1414                         if special_elements[el.name] is el.namespace
1415                                 parse_error()
1416                                 return
1417                 return
1418         ins_mode_in_body = (t) ->
1419                 if t.type is TYPE_TEXT and t.text is "\u0000"
1420                         parse_error()
1421                         return
1422                 if is_space_tok t
1423                         reconstruct_afe()
1424                         insert_character t
1425                         return
1426                 if t.type is TYPE_TEXT
1427                         reconstruct_afe()
1428                         insert_character t
1429                         flag_frameset_ok = false
1430                         return
1431                 if t.type is TYPE_COMMENT
1432                         insert_comment t
1433                         return
1434                 if t.type is TYPE_DOCTYPE
1435                         parse_error()
1436                         return
1437                 if t.type is TYPE_START_TAG and t.name is 'html'
1438                         parse_error()
1439                         return if template_tag_is_open()
1440                         root_attrs = open_els[open_els.length - 1].attrs
1441                         for a of t.attrs_a
1442                                 root_attrs[a[0]] = a[1] unless root_attrs[a[0]]?
1443                         return
1444
1445                 if (t.type is TYPE_START_TAG and (t.name is 'base' or t.name is 'basefont' or t.name is 'bgsound' or t.name is 'link' or t.name is 'meta' or t.name is 'noframes' or t.name is 'script' or t.name is 'style' or t.name is 'template' or t.name is 'title')) or (t.type is TYPE_END_TAG and t.name is 'template')
1446                         ins_mode_in_head t
1447                         return
1448                 if t.type is TYPE_START_TAG and t.name is 'body'
1449                         parse_error()
1450                         return if open_els.length < 2
1451                         second = open_els[open_els.length - 2]
1452                         return unless second.ns is NS_HTML
1453                         return unless second.name is 'body'
1454                         return if template_tag_is_open()
1455                         frameset_ok_flag = false
1456                         for a of t.attrs_a
1457                                 second.attrs[a[0]] = a[1] unless second.attrs[a[0]]?
1458                         return
1459                 if t.type is TYPE_START_TAG and t.name is 'frameset'
1460                         parse_error()
1461                         return if open_els.length < 2
1462                         second_i = open_els.length - 2
1463                         second = open_els[second_i]
1464                         return unless second.ns is NS_HTML
1465                         return unless second.name is 'body'
1466                         flag_frameset_ok = false
1467                         if second.parent?
1468                                 for el, i in second.parent.children
1469                                         if el is second
1470                                                 second.parent.children.splice i, 1
1471                                                 break
1472                         open_els.splice second_i, 1
1473                         # pop everything except the "root html element"
1474                         while open_els.length > 1
1475                                 open_els.shift()
1476                         insert_html_element t
1477                         ins_mode = ins_mode_in_frameset
1478                         return
1479                 if t.type is TYPE_EOF
1480                         ok_tags = {
1481                                 dd:NS_HTML, dt:NS_HTML, li:NS_HTML, p:NS_HTML, tbody:NS_HTML,
1482                                 td:NS_HTML, tfoot:NS_HTML, th:NS_HTML, thead:NS_HTML,
1483                                 tr:NS_HTML, body:NS_HTML, html:NS_HTML,
1484                         }
1485                         for el in open_els
1486                                 unless ok_tags[t.name] is el.namespace
1487                                         parse_error()
1488                                         break
1489                         if template_ins_modes.length > 0
1490                                 ins_mode_in_template t
1491                         else
1492                                 stop_parsing()
1493                         return
1494                 if t.type is TYPE_END_TAG and t.name is 'body'
1495                         unless is_in_scope 'body'
1496                                 parse_error()
1497                                 return
1498                         ok_tags = {
1499                                 dd:NS_HTML, dt:NS_HTML, li:NS_HTML, optgroup:NS_HTML,
1500                                 option:NS_HTML, p:NS_HTML, rb:NS_HTML, rp:NS_HTML, rt:NS_HTML,
1501                                 rtc:NS_HTML, tbody:NS_HTML, td:NS_HTML, tfoot:NS_HTML,
1502                                 th:NS_HTML, thead:NS_HTML, tr:NS_HTML, body:NS_HTML,
1503                                 html:NS_HTML
1504                         }
1505                         for el in open_els
1506                                 unless ok_tags[t.name] is el.namespace
1507                                         parse_error()
1508                                         break
1509                         ins_mode = ins_mode_after_body
1510                         return
1511                 if t.type is TYPE_END_TAG and t.name is 'html'
1512                         unless is_in_scope 'body'
1513                                 parse_error()
1514                                 return
1515                         ok_tags = {
1516                                 dd:NS_HTML, dt:NS_HTML, li:NS_HTML, optgroup:NS_HTML,
1517                                 option:NS_HTML, p:NS_HTML, rb:NS_HTML, rp:NS_HTML, rt:NS_HTML,
1518                                 rtc:NS_HTML, tbody:NS_HTML, td:NS_HTML, tfoot:NS_HTML,
1519                                 th:NS_HTML, thead:NS_HTML, tr:NS_HTML, body:NS_HTML,
1520                                 html:NS_HTML
1521                         }
1522                         for el in open_els
1523                                 unless ok_tags[t.name] is el.namespace
1524                                         parse_error()
1525                                         break
1526                         ins_mode = ins_mode_after_body
1527                         ins_mode t
1528                         return
1529                 if t.type is TYPE_START_TAG and (t.name is 'address' or t.name is 'article' or t.name is 'aside' or t.name is 'blockquote' or t.name is 'center' or t.name is 'details' or t.name is 'dialog' or t.name is 'dir' or t.name is 'div' or t.name is 'dl' or t.name is 'fieldset' or t.name is 'figcaption' or t.name is 'figure' or t.name is 'footer' or t.name is 'header' or t.name is 'hgroup' or t.name is 'main' or t.name is 'nav' or t.name is 'ol' or t.name is 'p' or t.name is 'section' or t.name is 'summary' or t.name is 'ul')
1530                         close_p_if_in_button_scope()
1531                         insert_html_element t
1532                         return
1533                 if t.type is TYPE_START_TAG and h_tags[t.name]?
1534                         close_p_if_in_button_scope()
1535                         if h_tags[open_els[0]] is NS_HTML
1536                                 parse_error()
1537                                 open_els.shift()
1538                         insert_html_element t
1539                         return
1540                 if t.type is TYPE_START_TAG and (t.name is 'pre' or t.name is 'listing')
1541                         close_p_if_in_button_scope()
1542                         insert_html_element t
1543                         # spec: If the next token is a "LF" (U+000A) character token, then
1544                         # ignore that token and move on to the next one. (Newlines at the
1545                         # start of pre blocks are ignored as an authoring convenience.)
1546                         if txt.charAt(cur) is "\u000a" # FIXME check for crlf?
1547                                 cur += 1
1548                         flag_frameset_ok = false
1549                         return
1550                 if t.type is TYPE_START_TAG and t.name is 'form'
1551                         unless form_element_pointer is null or template_tag_is_open()
1552                                 parse_error()
1553                                 return
1554                         close_p_if_in_button_scope()
1555                         el = insert_html_element t
1556                         unless template_tag_is_open()
1557                                 form_element_pointer = el
1558                         return
1559                 if t.type is TYPE_START_TAG and t.name is 'li'
1560                         flag_frameset_ok = false
1561                         for node in open_els
1562                                 if node.name is 'li' and node.namespace is NS_HTML
1563                                         generate_implied_end_tags 'li' # arg is exception
1564                                         if open_els[0].name isnt 'li' or open_els[0].namespace isnt NS_HTML
1565                                                 parse_error()
1566                                         loop
1567                                                 el = open_els.shift()
1568                                                 if el.name is 'li' and el.namespace is NS_HTML
1569                                                         break
1570                                         break
1571                                 if el_is_special_not_adp node
1572                                                 break
1573                         close_p_if_in_button_scope()
1574                         insert_html_element t
1575                         return
1576                 if t.type is TYPE_START_TAG and (t.name is 'dd' or t.name is 'dt')
1577                         flag_frameset_ok = false
1578                         for node in open_els
1579                                 if node.name is 'dd' and node.namespace is NS_HTML
1580                                         generate_implied_end_tags 'dd' # arg is exception
1581                                         if open_els[0].name isnt 'dd' or open_els[0].namespace isnt NS_HTML
1582                                                 parse_error()
1583                                         loop
1584                                                 el = open_els.shift()
1585                                                 if el.name is 'dd' and el.namespace is NS_HTML
1586                                                         break
1587                                         break
1588                                 if node.name is 'dt' and node.namespace is NS_HTML
1589                                         generate_implied_end_tags 'dt' # arg is exception
1590                                         if open_els[0].name isnt 'dt' or open_els[0].namespace isnt NS_HTML
1591                                                 parse_error()
1592                                         loop
1593                                                 el = open_els.shift()
1594                                                 if el.name is 'dt' and el.namespace is NS_HTML
1595                                                         break
1596                                         break
1597                                 if el_is_special_not_adp node
1598                                         break
1599                         close_p_if_in_button_scope()
1600                         insert_html_element t
1601                         return
1602                 if t.type is TYPE_START_TAG and t.name is 'plaintext'
1603                         close_p_if_in_button_scope()
1604                         insert_html_element t
1605                         tok_state = tok_state_plaintext
1606                         return
1607                 if t.type is TYPE_START_TAG and t.name is 'button'
1608                         if is_in_scope 'button', NS_HTML
1609                                 parse_error()
1610                                 generate_implied_end_tags()
1611                                 loop
1612                                         el = open_els.shift()
1613                                         if el.name is 'button' and el.namespace is NS_HTML
1614                                                 break
1615                         reconstruct_afe()
1616                         insert_html_element t
1617                         flag_frameset_ok = false
1618                         return
1619                 if t.type is TYPE_END_TAG and (t.name is 'address' or t.name is 'article' or t.name is 'aside' or t.name is 'blockquote' or t.name is 'button' or t.name is 'center' or t.name is 'details' or t.name is 'dialog' or t.name is 'dir' or t.name is 'div' or t.name is 'dl' or t.name is 'fieldset' or t.name is 'figcaption' or t.name is 'figure' or t.name is 'footer' or t.name is 'header' or t.name is 'hgroup' or t.name is 'listing' or t.name is 'main' or t.name is 'nav' or t.name is 'ol' or t.name is 'pre' or t.name is 'section' or t.name is 'summary' or t.name is 'ul')
1620                         unless is_in_scope t.name, NS_HTML
1621                                 parse_error()
1622                                 return
1623                         generate_implied_end_tags()
1624                         unless open_els[0].name is t.name and open_els[0].namespace is NS_HTML
1625                                 parse_error()
1626                         loop
1627                                 el = open_els.shift()
1628                                 if el.name is t.name and el.namespace is NS_HTML
1629                                         return
1630                         return
1631                 if t.type is TYPE_END_TAG and t.name is 'form'
1632                         unless template_tag_is_open()
1633                                 node = form_element_pointer
1634                                 form_element_pointer = null
1635                                 if node is null or not el_is_in_scope node
1636                                         parse_error()
1637                                         return
1638                                 generate_implied_end_tags()
1639                                 if open_els[0] isnt node
1640                                         parse_error()
1641                                 for el, i in open_els
1642                                         if el is node
1643                                                 open_els.splice i, 1
1644                                                 break
1645                         else
1646                                 unless is_in_scope 'form', NS_HTML
1647                                         parse_error()
1648                                         return
1649                                 generate_implied_end_tags()
1650                                 if open_els[0].name isnt 'form' or open_els[0].namespace isnt NS_HTML
1651                                         parse_error()
1652                                 loop
1653                                         el = open_els.shift()
1654                                         if el.name is 'form' and el.namespace is NS_HTML
1655                                                 break
1656                         return
1657                 if t.type is TYPE_END_TAG and t.name is 'p'
1658                         unless is_in_button_scope 'p', NS_HTML
1659                                 parse_error()
1660                                 insert_html_element new_open_tag 'p'
1661                         close_p_element()
1662                         return
1663                 if t.type is TYPE_END_TAG and t.name is 'li'
1664                         unless is_in_li_scope 'li', NS_HTML
1665                                 parse_error()
1666                                 return
1667                         generate_implied_end_tags 'li' # arg is exception
1668                         if open_els[0].name isnt 'li' or open_els[0].namespace isnt NS_HTML
1669                                 parse_error()
1670                         loop
1671                                 el = open_els.shift()
1672                                 if el.name is 'li' and el.namespace is NS_HTML
1673                                         break
1674                         return
1675                 if t.type is TYPE_END_TAG and (t.name is 'dd' or t.name is 'dt')
1676                         unless is_in_scope t.name, NS_HTML
1677                                 parse_error()
1678                                 return
1679                         generate_implied_end_tags t.name # arg is exception
1680                         if open_els[0].name isnt t.name or open_els[0].namespace isnt NS_HTML
1681                                 parse_error()
1682                         loop
1683                                 el = open_els.shift()
1684                                 if el.name is t.name and el.namespace is NS_HTML
1685                                         break
1686                         return
1687                 if t.type is TYPE_END_TAG and h_tags[t.name]?
1688                         h_in_scope = false
1689                         for el in open_els
1690                                 if h_tags[el.name] is el.namespace
1691                                         h_in_scope = true
1692                                         break
1693                                 if standard_scopers[el.name] is el.namespace
1694                                         break
1695                         unless h_in_scope
1696                                 parse_error()
1697                                 return
1698                         generate_implied_end_tags()
1699                         if open_els[0].name isnt t.name or open_els[0].namespace isnt NS_HTML
1700                                 parse_error()
1701                         loop
1702                                 el = open_els.shift()
1703                                 if h_tags[el.name] is el.namespace
1704                                         break
1705                         return
1706                 # deep breath!
1707                 if t.type is TYPE_START_TAG and t.name is 'a'
1708                         # If the list of active formatting elements contains an a element
1709                         # between the end of the list and the last marker on the list (or
1710                         # the start of the list if there is no marker on the list), then
1711                         # this is a parse error; run the adoption agency algorithm for the
1712                         # tag name "a", then remove that element from the list of active
1713                         # formatting elements and the stack of open elements if the
1714                         # adoption agency algorithm didn't already remove it (it might not
1715                         # have if the element is not in table scope).
1716                         found = false
1717                         for el in afe
1718                                 if el.type is TYPE_AFE_MARKER
1719                                         break
1720                                 if el.name is 'a' and el.namespace is NS_HTML
1721                                         found = el
1722                         if found?
1723                                 parse_error()
1724                                 adoption_agency 'a'
1725                                 for el, i in afe
1726                                         if el is found
1727                                                 afe.splice i, 1
1728                                 for el, i in open_els
1729                                         if el is found
1730                                                 open_els.splice i, 1
1731                         reconstruct_afe()
1732                         el = insert_html_element t
1733                         afe_push el
1734                         return
1735                 if t.type is TYPE_START_TAG and (t.name is 'b' or t.name is 'big' or t.name is 'code' or t.name is 'em' or t.name is 'font' or t.name is 'i' or t.name is 's' or t.name is 'small' or t.name is 'strike' or t.name is 'strong' or t.name is 'tt' or t.name is 'u')
1736                         reconstruct_afe()
1737                         el = insert_html_element t
1738                         afe_push el
1739                         return
1740                 if t.type is TYPE_START_TAG and t.name is 'nobr'
1741                         reconstruct_afe()
1742                         el = insert_html_element t
1743                         afe_push el
1744                         return
1745                 if t.type is TYPE_END_TAG and (t.name is 'a' or t.name is 'b' or t.name is 'big' or t.name is 'code' or t.name is 'em' or t.name is 'font' or t.name is 'i' or t.name is 'nobr' or t.name is 's' or t.name is 'small' or t.name is 'strike' or t.name is 'strong' or t.name is 'tt' or t.name is 'u')
1746                         adoption_agency t.name
1747                         return
1748                 if t.type is TYPE_START_TAG and (t.name is 'applet' or t.name is 'marquee' or t.name is 'object')
1749                         reconstruct_afe()
1750                         insert_html_element t
1751                         afe_push_marker()
1752                         flag_frameset_ok = false
1753                         return
1754                 if t.type is TYPE_END_TAG and (t.name is 'applet' or t.name is 'marquee' or t.name is 'object')
1755                         unless is_in_scope t.name, NS_HTML
1756                                 parse_error()
1757                                 return
1758                         generate_implied_end_tags()
1759                         if open_els[0].name isnt t.name or open_els[0].namespace isnt NS_HTML
1760                                 parse_error()
1761                         loop
1762                                 el = open_els.shift()
1763                                 if el.name is t.name and el.namespace is NS_HTML
1764                                         break
1765                         clear_afe_to_marker()
1766                         return
1767                 if t.type is TYPE_START_TAG and t.name is 'table'
1768                         close_p_if_in_button_scope() # fixfull quirksmode thing
1769                         insert_html_element t
1770                         flag_frameset_ok = false
1771                         ins_mode = ins_mode_in_table
1772                         return
1773                 if t.type is TYPE_END_TAG and t.name is 'br'
1774                         parse_error()
1775                         t.type is TYPE_START_TAG
1776                         # fall through
1777                 if t.type is TYPE_START_TAG and (t.name is 'area' or t.name is 'br' or t.name is 'embed' or t.name is 'img' or t.name is 'keygen' or t.name is 'wbr')
1778                         reconstruct_afe()
1779                         insert_html_element t
1780                         open_els.shift()
1781                         t.acknowledge_self_closing()
1782                         flag_frameset_ok = false
1783                         return
1784                 if t.type is TYPE_START_TAG and t.name is 'input'
1785                         reconstruct_afe()
1786                         insert_html_element t
1787                         open_els.shift()
1788                         t.acknowledge_self_closing()
1789                         unless is_input_hidden_tok t
1790                                 flag_frameset_ok = false
1791                         return
1792                 if t.type is TYPE_START_TAG and (t.name is 'param' or t.name is 'source' or t.name is 'track')
1793                         insert_html_element t
1794                         open_els.shift()
1795                         t.acknowledge_self_closing()
1796                         return
1797                 if t.type is TYPE_START_TAG and t.name is 'hr'
1798                         close_p_if_in_button_scope()
1799                         insert_html_element t
1800                         open_els.shift()
1801                         t.acknowledge_self_closing()
1802                         flag_frameset_ok = false
1803                         return
1804                 if t.type is TYPE_START_TAG and t.name is 'image'
1805                         parse_error()
1806                         t.name = 'img'
1807                         ins_mode t
1808                         return
1809                 if t.type is TYPE_START_TAG and t.name is 'isindex'
1810                         parse_error()
1811                         if template_tag_is_open() is false and form_element_pointer isnt null
1812                                 return
1813                         t.acknowledge_self_closing()
1814                         flag_frameset_ok = false
1815                         close_p_if_in_button_scope()
1816                         el = insert_html_element new_open_tag 'form'
1817                         unless template_tag_is_open()
1818                                 form_element_pointer = el
1819                         for a in t.attrs_a
1820                                 if a[0] is 'action'
1821                                         el.attrs['action'] = a[1]
1822                                         break
1823                         insert_html_element new_open_tag 'hr'
1824                         open_els.shift()
1825                         reconstruct_afe()
1826                         insert_html_element new_open_tag 'label'
1827                         # note: this is a little out-of-spec-order so we only have to scan t.attrs_a once
1828                         input_el = new_open_tag 'input'
1829                         prompt = null
1830                         for a in t.attrs_a
1831                                 if a[0] is 'prompt'
1832                                         prompt = a[1]
1833                                 if a[0] isnt 'name' and a[0] isnt 'action' and a[0] isnt 'prompt'
1834                                         input_el.attrs_a.push [a[0], a[1]]
1835                         input_el.attrs_a.push ['name', 'isindex']
1836                         # fixfull this next bit is in english... internationalize?
1837                         prompt ?= "This is a searchable index. Enter search keywords: "
1838                         insert_character prompt # fixfull split
1839                         # TODO submit typo "balue" in spec
1840                         insert_html_element input_el
1841                         open_els.shift()
1842                         # insert_character '' # you can put chars here if promt attr missing
1843                         open_els.shift()
1844                         insert_html_element new_open_tag 'hr'
1845                         open_els.shift()
1846                         open_els.shift()
1847                         unless template_tag_is_open()
1848                                 form_element_pointer = null
1849                         return
1850                 if t.type is TYPE_START_TAG and t.name is 'textarea'
1851                         insert_html_element t
1852                         if txt.charAt(cur) is "\u000a" # FIXME check for crlf?
1853                                 cur += 1
1854                         tok_state = tok_state_rcdata
1855                         original_ins_mode = ins_mode
1856                         flag_frameset_ok = false
1857                         ins_mode = ins_mode_text
1858                         return
1859                 if t.type is TYPE_START_TAG and t.name is 'xmp'
1860                         close_p_if_in_button_scope()
1861                         reconstruct_afe()
1862                         flag_frameset_ok = false
1863                         parse_generic_raw_text t
1864                         return
1865
1866                 # FIXME CONTINUE
1867
1868                 if t.type is TYPE_START_TAG # any other start tag
1869                         reconstruct_afe()
1870                         insert_html_element t
1871                         return
1872                 if t.type is TYPE_END_TAG # any other end tag
1873                         in_body_any_other_end_tag t.name
1874                 return
1875
1876         ins_mode_in_table_else = (t) ->
1877                 parse_error()
1878                 flag_foster_parenting = true # FIXME
1879                 ins_mode_in_body t
1880                 flag_foster_parenting = false
1881         can_in_table = { # FIXME do this inline like everywhere else
1882                 'table': true
1883                 'tbody': true
1884                 'tfoot': true
1885                 'thead': true
1886                 'tr': true
1887         }
1888
1889         # 8.2.5.4.8 http://www.w3.org/TR/html5/syntax.html#parsing-main-incdata
1890         ins_mode_text = (t) ->
1891                 if t.type is TYPE_TEXT
1892                         insert_character t
1893                         return
1894                 if t.type is TYPE_EOF
1895                         parse_error()
1896                         if open_els[0].name is 'script'
1897                                 open_els[0].flag 'already started', true
1898                         open_els.shift()
1899                         ins_mode = original_ins_mode
1900                         ins_mode t
1901                         return
1902                 if t.type is TYPE_END_TAG and t.name is 'script'
1903                         open_els.shift()
1904                         ins_mode = original_ins_mode
1905                         # fixfull the spec seems to assume that I'm going to run the script
1906                         # http://www.w3.org/TR/html5/syntax.html#scriptEndTag
1907                         return
1908                 if t.type is TYPE_END_TAG
1909                         open_els.shift()
1910                         ins_mode = original_ins_mode
1911                         return
1912                 console.log 'warning: end of ins_mode_text reached'
1913
1914         # the functions below implement the tokenizer stats described here:
1915         # http://www.w3.org/TR/html5/syntax.html#tokenization
1916
1917         # 8.2.5.4.9 http://www.w3.org/TR/html5/syntax.html#parsing-main-intable
1918         ins_mode_in_table = (t) ->
1919                 switch t.type
1920                         when TYPE_TEXT
1921                                 if can_in_table[t.name]
1922                                         original_ins_mode = ins_mode
1923                                         ins_mode = ins_mode_in_table_text
1924                                         ins_mode t
1925                                 else
1926                                         ins_mode_in_table_else t
1927                         when TYPE_COMMENT
1928                                 insert_comment t
1929                         when TYPE_DOCTYPE
1930                                 parse_error()
1931                         when TYPE_START_TAG
1932                                 switch t.name
1933                                         when 'caption'
1934                                                 clear_stack_to_table_context()
1935                                                 afe_push_marker()
1936                                                 insert_html_element t
1937                                                 ins_mode = ins_mode_in_caption
1938                                         when 'colgroup'
1939                                                 clear_stack_to_table_context()
1940                                                 insert_html_element t
1941                                                 ins_mode = ins_mode_in_column_group
1942                                         when 'col'
1943                                                 clear_stack_to_table_context()
1944                                                 insert_html_element new_open_tag 'colgroup'
1945                                                 ins_mode = ins_mode_in_column_group
1946                                                 ins_mode t
1947                                         when 'tbody', 'tfoot', 'thead'
1948                                                 clear_stack_to_table_context()
1949                                                 insert_html_element t
1950                                                 ins_mode = ins_mode_in_table_body
1951                                         when 'td', 'th', 'tr'
1952                                                 clear_stack_to_table_context()
1953                                                 insert_html_element new_open_tag 'tbody'
1954                                                 ins_mode = ins_mode_in_table_body
1955                                                 ins_mode t
1956                                         when 'table'
1957                                                 parse_error()
1958                                                 if is_in_table_scope 'table'
1959                                                         loop
1960                                                                 el = open_els.shift()
1961                                                                 if el.name is 'table'
1962                                                                         break
1963                                                         reset_ins_mode()
1964                                                         ins_mode t
1965                                         when 'style', 'script', 'template'
1966                                                 ins_mode_in_head t
1967                                         when 'input'
1968                                                 if is_input_hidden_tok t
1969                                                         ins_mode_in_table_else t
1970                                                 else
1971                                                         parse_error()
1972                                                         el = insert_html_element t
1973                                                         open_els.shift()
1974                                                         t.acknowledge_self_closing()
1975                                         when 'form'
1976                                                 parse_error()
1977                                                 if form_element_pointer?
1978                                                         return
1979                                                 if template_tag_is_open()
1980                                                         return
1981                                                 form_element_pointer = insert_html_element t
1982                                                 open_els.shift()
1983                                         else
1984                                                 ins_mode_in_table_else t
1985                         when TYPE_END_TAG
1986                                 switch t.name
1987                                         when 'table'
1988                                                 if is_in_table_scope 'table'
1989                                                         loop
1990                                                                 el = open_els.shift()
1991                                                                 if el.name is 'table'
1992                                                                         break
1993                                                         reset_ins_mode()
1994                                                 else
1995                                                         parse_error
1996                                         when 'body', 'caption', 'col', 'colgroup', 'html', 'tbody', 'td', 'tfoot', 'th', 'thead', 'tr'
1997                                                 parse_error()
1998                                         when 'template'
1999                                                 ins_mode_in_head t
2000                                         else
2001                                                 ins_mode_in_table_else t
2002                         when TYPE_EOF
2003                                 ins_mode_in_body t
2004                         else
2005                                 ins_mode_in_table_else t
2006
2007
2008         # 8.2.5.4.10 http://www.w3.org/TR/html5/syntax.html#parsing-main-intabletext
2009         ins_mode_in_table_text = (t) ->
2010                 if t.type is TYPE_TEXT and t.text is "\u0000"
2011                         # huh? I thought the tokenizer didn't emit these
2012                         parse_error()
2013                         return
2014                 if t.type is TYPE_TEXT
2015                         pending_table_character_tokens.push t
2016                         return
2017                 # Anything else
2018                 all_space = true
2019                 for old in pending_table_character_tokens
2020                         unless is_space_tok old
2021                                 all_space = false
2022                                 break
2023                 if all_space
2024                         for old in pending_table_character_tokens
2025                                 insert_character old
2026                 else
2027                         for old in pending_table_character_tokens
2028                                 ins_mode_table_else old
2029                 pending_table_character_tokens = [] # FIXME test (spec doesn't say this)
2030                 ins_mode = original_ins_mode
2031                 ins_mode t
2032
2033         # 8.2.5.4.11 http://www.w3.org/TR/html5/syntax.html#parsing-main-incaption
2034         ins_mode_in_caption = (t) ->
2035                 if t.type is TYPE_END_TAG and t.name is 'caption'
2036                         if is_in_table_scope 'caption'
2037                                 generate_implied_end_tags()
2038                                 if open_els[0].name isnt 'caption'
2039                                         parse_error()
2040                                 loop
2041                                         el = open_els.shift()
2042                                         if el.name is 'caption'
2043                                                 break
2044                                 clear_afe_to_marker()
2045                                 ins_mode = ins_mode_in_table
2046                         else
2047                                 parse_error()
2048                                 # fragment case
2049                         return
2050                 if (t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'tbody' or t.name is 'td' or t.name is 'tfoot' or t.name is 'th' or t.name is 'thead' or t.name is 'tr')) or t.type is TYPE_END_TAG and t.name is 'table'
2051                         parse_error()
2052                         if is_in_table_scope 'caption'
2053                                 loop
2054                                         el = open_els.shift()
2055                                         if el.name is 'caption'
2056                                                 break
2057                                 clear_afe_to_marker()
2058                                 ins_mode = ins_mode_in_table
2059                                 ins_mode t
2060                         # else fragment case
2061                         return
2062                 if t.type is TYPE_END_TAG and (t.name is 'body' or t.name is 'col' or t.name is 'colgroup' or t.name is 'html' or t.name is 'tbody' or t.name is 'td' or t.name is 'tfoot' or t.name is 'th' or t.name is 'thead' or t.name is 'tr')
2063                         parse_error()
2064                         return
2065                 # Anything else
2066                 ins_mode_in_body t
2067
2068         # 8.2.5.4.12 http://www.w3.org/TR/html5/syntax.html#parsing-main-incolgroup
2069         ins_mode_in_column_group = (t) ->
2070                 if is_space_tok t
2071                         insert_character t
2072                         return
2073                 if t.type is TYPE_COMMENT
2074                         insert_comment t
2075                         return
2076                 if t.type is TYPE_DOCTYPE
2077                         parse_error()
2078                         return
2079                 if t.type is TYPE_START_TAG and t.name is 'html'
2080                         ins_mode_in_body t
2081                         return
2082                 if t.type is TYPE_START_TAG and t.name is 'col'
2083                         el = insert_html_element t
2084                         open_els.shift()
2085                         t.acknowledge_self_closing()
2086                         return
2087                 if t.type is TYPE_END_TAG and t.name is 'colgroup'
2088                         if open_els[0].name is 'colgroup'
2089                                 open_els.shift()
2090                                 ins_mode = ins_mode_in_table
2091                         else
2092                                 parse_error()
2093                         return
2094                 if t.type is TYPE_END_TAG and t.name is 'col'
2095                         parse_error()
2096                         return
2097                 if (t.type is TYPE_START_TAG or t.type is TYPE_END_TAG) and t.name is 'template'
2098                         ins_mode_in_head t
2099                         return
2100                 if t.type is TYPE_EOF
2101                         ins_mode_in_body t
2102                         return
2103                 # Anything else
2104                 if open_els[0].name isnt 'colgroup'
2105                         parse_error()
2106                         return
2107                 open_els.shift()
2108                 ins_mode = ins_mode_in_table
2109                 ins_mode t
2110                 return
2111
2112         # 8.2.5.4.13 http://www.w3.org/TR/html5/syntax.html#parsing-main-intbody
2113         ins_mode_in_table_body = (t) ->
2114                 if t.type is TYPE_START_TAG and t.name is 'tr'
2115                         clear_stack_to_table_body_context()
2116                         insert_html_element t
2117                         ins_mode = ins_mode_in_row
2118                         return
2119                 if t.type is TYPE_START_TAG and (t.name is 'th' or t.name is 'td')
2120                         parse_error()
2121                         clear_stack_to_table_body_context()
2122                         insert_html_element new_open_tag 'tr'
2123                         ins_mode = ins_mode_in_row
2124                         ins_mode t
2125                         return
2126                 if t.type is TYPE_END_TAG and (t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead')
2127                         unless is_in_table_scope t.name # fixfull check namespace
2128                                 parse_error()
2129                                 return
2130                         clear_stack_to_table_body_context()
2131                         open_els.shift()
2132                         ins_mode = ins_mode_in_table
2133                         return
2134                 if (t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead')) or (t.type is TYPE_END_TAG and t.name is 'table')
2135                         has = false
2136                         for el in open_els
2137                                 if el.name is 'tbody' or el.name is 'tfoot' or el.name is 'thead'
2138                                         has = true
2139                                         break
2140                                 if table_scopers[el.name]
2141                                         break
2142                         if !has
2143                                 parse_error()
2144                                 return
2145                         clear_stack_to_table_body_context()
2146                         open_els.shift()
2147                         ins_mode = ins_mode_in_table
2148                         ins_mode t
2149                         return
2150                 if t.type is TYPE_END_TAG and (t.name is 'body' or t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'html' or t.name is 'td' or t.name is 'th' or t.name is 'tr')
2151                         parse_error()
2152                         return
2153                 # Anything else
2154                 ins_mode_in_table t
2155
2156         # 8.2.5.4.14 http://www.w3.org/TR/html5/syntax.html#parsing-main-intr
2157         ins_mode_in_row = (t) ->
2158                 if t.type is TYPE_START_TAG and (t.name is 'th' or t.name is 'td')
2159                         clear_stack_to_table_row_context()
2160                         insert_html_element t
2161                         ins_mode = ins_mode_in_cell
2162                         afe_push_marker()
2163                         return
2164                 if t.type is TYPE_END_TAG and t.name is 'tr'
2165                         if is_in_table_scope 'tr'
2166                                 clear_stack_to_table_row_context()
2167                                 open_els.shift()
2168                                 ins_mode = ins_mode_in_table_body
2169                         else
2170                                 parse_error()
2171                         return
2172                 if (t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead' or t.name is 'tr')) or t.type is TYPE_END_TAG and t.name is 'table'
2173                         if is_in_table_scope 'tr'
2174                                 clear_stack_to_table_row_context()
2175                                 open_els.shift()
2176                                 ins_mode = ins_mode_in_table_body
2177                                 ins_mode t
2178                         else
2179                                 parse_error()
2180                         return
2181                 if t.type is TYPE_END_TAG and (t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead')
2182                         if is_in_table_scope t.name # fixfull namespace
2183                                 if is_in_table_scope 'tr'
2184                                         clear_stack_to_table_row_context()
2185                                         open_els.shift()
2186                                         ins_mode = ins_mode_in_table_body
2187                                         ins_mode t
2188                         else
2189                                 parse_error()
2190                         return
2191                 if t.type is TYPE_END_TAG and (t.name is 'body' or t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'html' or t.name is 'td' or t.name is 'th')
2192                         parse_error()
2193                         return
2194                 # Anything else
2195                 ins_mode_in_table t
2196
2197         # http://www.w3.org/TR/html5/syntax.html#close-the-cell
2198         close_the_cell = ->
2199                 generate_implied_end_tags()
2200                 unless open_els[0].name is 'td' or open_els[0] is 'th'
2201                         parse_error()
2202                 loop
2203                         el = open_els.shift()
2204                         if el.name is 'td' or el.name is 'th'
2205                                 break
2206                 clear_afe_to_marker()
2207                 ins_mode = ins_mode_in_row
2208
2209         # 8.2.5.4.15 http://www.w3.org/TR/html5/syntax.html#parsing-main-intd
2210         ins_mode_in_cell = (t) ->
2211                 if t.type is TYPE_END_TAG and (t.name is 'td' or t.name is 'th')
2212                         if is_in_table_scope t.name
2213                                 generate_implied_end_tags()
2214                                 if open_els[0].name isnt t.name
2215                                         parse_error
2216                                 loop
2217                                         el = open_els.shift()
2218                                         if el.name is t.name
2219                                                 break
2220                                 clear_afe_to_marker()
2221                                 ins_mode = ins_mode_in_row
2222                         else
2223                                 parse_error()
2224                         return
2225                 if t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'tbody' or t.name is 'td' or t.name is 'tfoot' or t.name is 'th' or t.name is 'thead' or t.name is 'tr')
2226                         has = false
2227                         for el in open_els
2228                                 if el.name is 'td' or el.name is 'th'
2229                                         has = true
2230                                         break
2231                                 if table_scopers[el.name]
2232                                         break
2233                         if !has
2234                                 parse_error()
2235                                 return
2236                         close_the_cell()
2237                         ins_mode t
2238                         return
2239                 if t.type is TYPE_END_TAG and (t.name is 'body' or t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'html')
2240                         parse_error()
2241                         return
2242                 if t.type is TYPE_END_TAG and (t.name is 'table' or t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead' or t.name is 'tr')
2243                         if is_in_table_scope t.name # fixfull namespace
2244                                 close_the_cell()
2245                                 ins_mode t
2246                         else
2247                                 parse_error()
2248                         return
2249                 # Anything Else
2250                 ins_mode_in_body t
2251
2252         # 8.2.5.4.16 http://www.w3.org/TR/html5/syntax.html#parsing-main-inselect
2253         ins_mode_in_select = (t) ->
2254                 if t.type is TYPE_TEXT and t.text is "\u0000"
2255                         parse_error()
2256                         return
2257                 if t.type is TYPE_TEXT
2258                         insert_character t
2259                         return
2260                 if t.type is TYPE_COMMENT
2261                         insert_comment t
2262                         return
2263                 if t.type is TYPE_DOCTYPE
2264                         parse_error()
2265                         return
2266                 if t.type is TYPE_START_TAG and t.name is 'html'
2267                         ins_mode_in_body t
2268                         return
2269                 if t.type is TYPE_START_TAG and t.name is 'option'
2270                         if open_els[0].name is 'option'
2271                                 open_els.shift()
2272                         insert_html_element t
2273                         return
2274                 if t.type is TYPE_START_TAG and t.name is 'optgroup'
2275                         if open_els[0].name is 'option'
2276                                 open_els.shift()
2277                         if open_els[0].name is 'optgroup'
2278                                 open_els.shift()
2279                         insert_html_element t
2280                         return
2281                 if t.type is TYPE_END_TAG and t.name is 'optgroup'
2282                         if open_els[0].name is 'option' and open_els[1].name is 'optgroup'
2283                                 open_els.shift()
2284                         if open_els[0].name is 'optgroup'
2285                                 open_els.shift()
2286                         else
2287                                 parse_error()
2288                         return
2289                 if t.type is TYPE_END_TAG and t.name is 'option'
2290                         if open_els[0].name is 'option'
2291                                 open_els.shift()
2292                         else
2293                                 parse_error()
2294                         return
2295                 if t.type is TYPE_END_TAG and t.name is 'select'
2296                         if is_in_select_scope 'select'
2297                                 loop
2298                                         el = open_els.shift()
2299                                         if el.name is 'select'
2300                                                 break
2301                                 reset_ins_mode()
2302                         else
2303                                 parse_error()
2304                         return
2305                 if t.type is TYPE_START_TAG and t.name is 'select'
2306                         parse_error()
2307                         loop
2308                                 el = open_els.shift()
2309                                 if el.name is 'select'
2310                                         break
2311                         reset_ins_mode()
2312                         # spec says that this is the same as </select> but it doesn't say
2313                         # to check scope first
2314                         return
2315                 if t.type is TYPE_START_TAG and (t.name is 'input' or t.name is 'keygen' or t.name is 'textarea')
2316                         parse_error()
2317                         if is_in_select_scope 'select'
2318                                 return
2319                         loop
2320                                 el = open_els.shift()
2321                                 if el.name is 'select'
2322                                         break
2323                         reset_ins_mode()
2324                         ins_mode t
2325                         return
2326                 if t.type is TYPE_START_TAG and (t.name is 'script' or t.name is 'template')
2327                         ins_mode_in_head t
2328                         return
2329                 if t.type is TYPE_EOF
2330                         ins_mode_in_body t
2331                         return
2332                 # Anything else
2333                 parse_error()
2334                 return
2335
2336         # 8.2.5.4.17 http://www.w3.org/TR/html5/syntax.html#parsing-main-inselectintable
2337         ins_mode_in_select_in_table = (t) ->
2338                 if t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'table' or t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead' or t.name is 'tr' or t.name is 'td' or t.name is 'th')
2339                         parse_error()
2340                         loop
2341                                 el = open_els.shift()
2342                                 if el.name is 'select'
2343                                         break
2344                         reset_ins_mode()
2345                         ins_mode t
2346                         return
2347                 if t.type is TYPE_END_TAG and (t.name is 'caption' or t.name is 'table' or t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead' or t.name is 'tr' or t.name is 'td' or t.name is 'th')
2348                         parse_error()
2349                         unless is_in_table_scope t.name, NS_HTML
2350                                 return
2351                         loop
2352                                 el = open_els.shift()
2353                                 if el.name is 'select'
2354                                         break
2355                         reset_ins_mode()
2356                         ins_mode t
2357                         return
2358                 # Anything else
2359                 ins_mode_in_select t
2360                 return
2361
2362         # 8.2.5.4.18 http://www.w3.org/TR/html5/syntax.html#parsing-main-intemplate
2363         ins_mode_in_template = (t) ->
2364                 if t.type is TYPE_TEXT or t.type is TYPE_COMMENT or t.type is TYPE_DOCTYPE
2365                         ins_mode_in_body t
2366                         return
2367                 if (t.type is TYPE_START_TAG and (t.name is 'base' or t.name is 'basefont' or t.name is 'bgsound' or t.name is 'link' or t.name is 'meta' or t.name is 'noframes' or t.name is 'script' or t.name is 'style' or t.name is 'template' or t.name is 'title')) or (t.type is TYPE_END_TAG and t.name is 'template')
2368                         ins_mode_in_head t
2369                         return
2370                 if t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'colgroup' or t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead')
2371                         template_ins_modes.shift()
2372                         template_ins_modes.unshift ins_mode_in_table
2373                         ins_mode = ins_mode_in_table
2374                         ins_mode t
2375                         return
2376                 if t.type is TYPE_START_TAG and t.name is 'col'
2377                         template_ins_modes.shift()
2378                         template_ins_modes.unshift ins_mode_in_column_group
2379                         ins_mode = ins_mode_in_column_group
2380                         ins_mode t
2381                         return
2382                 if t.type is TYPE_START_TAG and t.name is 'tr'
2383                         template_ins_modes.shift()
2384                         template_ins_modes.unshift ins_mode_in_table_body
2385                         ins_mode = ins_mode_in_table_body
2386                         ins_mode t
2387                         return
2388                 if t.type is TYPE_START_TAG and (t.name is 'td' or t.name is 'th')
2389                         template_ins_modes.shift()
2390                         template_ins_modes.unshift ins_mode_in_row
2391                         ins_mode = ins_mode_in_row
2392                         ins_mode t
2393                         return
2394                 if t.type is TYPE_START_TAG
2395                         template_ins_modes.shift()
2396                         template_ins_modes.unshift ins_mode_in_body
2397                         ins_mode = ins_mode_in_body
2398                         ins_mode t
2399                         return
2400                 if t.type is TYPE_END_TAG
2401                         parse_error()
2402                         return
2403                 if t.type is TYPE_EOF
2404                         unless template_tag_is_open()
2405                                 stop_parsing()
2406                                 return
2407                         parse_error()
2408                         loop
2409                                 el = open_els.shift()
2410                                 if el.name is 'template' # fixfull check namespace
2411                                         break
2412                         clear_afe_to_marker()
2413                         template_ins_modes.shift()
2414                         reset_ins_mode()
2415                         ins_mode t
2416
2417         # 8.2.5.4.19 http://www.w3.org/TR/html5/syntax.html#parsing-main-afterbody
2418         ins_mode_after_body = (t) ->
2419                 if is_space_tok t
2420                         ins_mode_in_body t
2421                         return
2422                 if t.type is TYPE_COMMENT
2423                         insert_comment t, [open_els[0], open_els[0].children.length]
2424                         return
2425                 if t.type is TYPE_DOCTYPE
2426                         parse_error()
2427                         return
2428                 if t.type is TYPE_START_TAG and t.name is 'html'
2429                         ins_mode_in_body t
2430                         return
2431                 if t.type is TYPE_END_TAG and t.name is 'html'
2432                         # fixfull fragment case
2433                         ins_mode = ins_mode_after_after_body
2434                         return
2435                 if t.type is TYPE_EOF
2436                         stop_parsing()
2437                         return
2438                 # Anything ELse
2439                 parse_error()
2440                 ins_mode = ins_mode_in_body
2441                 ins_mode t
2442
2443         # 8.2.5.4.20 http://www.w3.org/TR/html5/syntax.html#parsing-main-inframeset
2444         ins_mode_in_frameset = (t) ->
2445                 if is_space_tok t
2446                         insert_character t
2447                         return
2448                 if t.type is TYPE_COMMENT
2449                         insert_comment t
2450                         return
2451                 if t.type is TYPE_DOCTYPE
2452                         parse_error()
2453                         return
2454                 if t.type is TYPE_START_TAG and t.name is 'html'
2455                         ins_mode_in_body t
2456                         return
2457                 if t.type is TYPE_START_TAG and t.name is 'frameset'
2458                         insert_html_element t
2459                         return
2460                 if t.type is TYPE_END_TAG and t.name is 'frameset'
2461                         # TODO ?correct for: "if the current node is the root html element"
2462                         if open_els.length is 1
2463                                 parse_error()
2464                                 return # fragment case
2465                         open_els.shift()
2466                         if flag_fragment_parsing is false and open_els[0].name isnt 'frameset'
2467                                 ins_mode = ins_mode_after_frameset
2468                         return
2469                 if t.type is TYPE_START_TAG and t.name is 'frame'
2470                         insert_html_element t
2471                         open_els.shift()
2472                         t.acknowledge_self_closing()
2473                         return
2474                 if t.type is TYPE_START_TAG and t.name is 'noframes'
2475                         ins_mode_in_head t
2476                         return
2477                 if t.type is TYPE_EOF
2478                         # TODO ?correct for: "if the current node is not the root html element"
2479                         if open_els.length isnt 1
2480                                 parse_error()
2481                         stop_parsing()
2482                         return
2483                 # Anything else
2484                 parse_error()
2485                 return
2486
2487         # 8.2.5.4.21 http://www.w3.org/TR/html5/syntax.html#parsing-main-afterframeset
2488         ins_mode_after_frameset = (t) ->
2489                 if is_space_tok t
2490                         insert_character t
2491                         return
2492                 if t.type is TYPE_COMMENT
2493                         insert_comment t
2494                         return
2495                 if t.type is TYPE_DOCTYPE
2496                         parse_error()
2497                         return
2498                 if t.type is TYPE_START_TAG and t.name is 'html'
2499                         ins_mode_in_body t
2500                         return
2501                 if t.type is TYPE_END_TAG and t.name is 'html'
2502                         insert_mode = ins_mode_after_after_frameset
2503                         return
2504                 if t.type is TYPE_START_TAG and t.name is 'noframes'
2505                         ins_mode_in_head t
2506                         return
2507                 if t.type is TYPE_EOF
2508                         stop_parsing()
2509                         return
2510                 # Anything else
2511                 parse_error()
2512                 return
2513
2514         # 8.2.5.4.22 http://www.w3.org/TR/html5/syntax.html#the-after-after-body-insertion-mode
2515         ins_mode_after_after_body = (t) ->
2516                 if t.type is TYPE_COMMENT
2517                         insert_comment t, [doc, doc.children.length]
2518                         return
2519                 if t.type is TYPE_DOCTYPE or is_space_tok(t) or (t.type is TYPE_START_TAG and t.name is 'html')
2520                         ins_mode_in_body t
2521                         return
2522                 if t.type is TYPE_EOF
2523                         stop_parsing()
2524                         return
2525                 # Anything else
2526                 parse_error()
2527                 ins_mode = ins_mode_in_body
2528                 return
2529
2530         # 8.2.5.4.23 http://www.w3.org/TR/html5/syntax.html#the-after-after-frameset-insertion-mode
2531         ins_mode_after_after_frameset = (t) ->
2532                 if t.type is TYPE_COMMENT
2533                         insert_comment t, [doc, doc.children.length]
2534                         return
2535                 if t.type is TYPE_DOCTYPE or is_space_tok(t) or (t.type is TYPE_START_TAG and t.name is 'html')
2536                         ins_mode_in_body t
2537                         return
2538                 if t.type is TYPE_EOF
2539                         stop_parsing()
2540                         return
2541                 if t.type is TYPE_START_TAG and t.name is 'noframes'
2542                         ins_mode_in_head t
2543                         return
2544                 # Anything else
2545                 parse_error()
2546                 return
2547
2548
2549
2550
2551
2552         # 8.2.4.1 http://www.w3.org/TR/html5/syntax.html#data-state
2553         tok_state_data = ->
2554                 switch c = txt.charAt(cur++)
2555                         when '&'
2556                                 return new_text_node parse_character_reference()
2557                         when '<'
2558                                 tok_state = tok_state_tag_open
2559                         when "\u0000"
2560                                 parse_error()
2561                                 return new_text_node c
2562                         when '' # EOF
2563                                 return new_eof_token()
2564                         else
2565                                 return new_text_node c
2566                 return null
2567
2568         # 8.2.4.2 http://www.w3.org/TR/html5/syntax.html#character-reference-in-data-state
2569         # not needed: tok_state_character_reference_in_data = ->
2570         # just call parse_character_reference()
2571
2572         # 8.2.4.3 http://www.w3.org/TR/html5/syntax.html#rcdata-state
2573         tok_state_rcdata = ->
2574                 switch c = txt.charAt(cur++)
2575                         when '&'
2576                                 return new_text_node parse_character_reference()
2577                         when '<'
2578                                 tok_state = tok_state_rcdata_less_than_sign
2579                         when "\u0000"
2580                                 parse_error()
2581                                 return new_character_token "\ufffd"
2582                         when '' # EOF
2583                                 return new_eof_token()
2584                         else
2585                                 return new_character_token c
2586                 return null
2587
2588         # 8.2.4.4 http://www.w3.org/TR/html5/syntax.html#character-reference-in-rcdata-state
2589         # not needed: tok_state_character_reference_in_rcdata = ->
2590         # just call parse_character_reference()
2591
2592         # 8.2.4.5 http://www.w3.org/TR/html5/syntax.html#rawtext-state
2593         tok_state_rawtext = ->
2594                 switch c = txt.charAt(cur++)
2595                         when '<'
2596                                 tok_state = tok_state_rawtext_less_than_sign
2597                         when "\u0000"
2598                                 parse_error()
2599                                 return new_character_token "\ufffd"
2600                         when '' # EOF
2601                                 return new_eof_token()
2602                         else
2603                                 return new_character_token c
2604                 return null
2605
2606         # 8.2.4.6 http://www.w3.org/TR/html5/syntax.html#script-data-state
2607         tok_state_script_data = ->
2608                 switch c = txt.charAt(cur++)
2609                         when '<'
2610                                 tok_state = tok_state_script_data_less_than_sign
2611                         when "\u0000"
2612                                 parse_error()
2613                                 return new_character_token "\ufffd"
2614                         when '' # EOF
2615                                 return new_eof_token()
2616                         else
2617                                 return new_character_token c
2618                 return null
2619
2620         # 8.2.4.7 http://www.w3.org/TR/html5/syntax.html#plaintext-state
2621         tok_state_plaintext = ->
2622                 switch c = txt.charAt(cur++)
2623                         when "\u0000"
2624                                 parse_error()
2625                                 return new_character_token "\ufffd"
2626                         when '' # EOF
2627                                 return new_eof_token()
2628                         else
2629                                 return new_character_token c
2630                 return null
2631
2632
2633         # 8.2.4.8 http://www.w3.org/TR/html5/syntax.html#tag-open-state
2634         tok_state_tag_open = ->
2635                 switch c = txt.charAt(cur++)
2636                         when '!'
2637                                 tok_state = tok_state_markup_declaration_open
2638                         when '/'
2639                                 tok_state = tok_state_end_tag_open
2640                         when '?'
2641                                 parse_error()
2642                                 tok_cur_tag = new_comment_token '?'
2643                                 tok_state = tok_state_bogus_comment
2644                         else
2645                                 if is_lc_alpha(c)
2646                                         tok_cur_tag = new_open_tag c
2647                                         tok_state = tok_state_tag_name
2648                                 else if is_uc_alpha(c)
2649                                         tok_cur_tag = new_open_tag c.toLowerCase()
2650                                         tok_state = tok_state_tag_name
2651                                 else
2652                                         parse_error()
2653                                         tok_state = tok_state_data
2654                                         cur -= 1 # we didn't parse/handle the char after <
2655                                         return new_text_node '<'
2656                 return null
2657
2658         # 8.2.4.9 http://www.w3.org/TR/html5/syntax.html#end-tag-open-state
2659         tok_state_end_tag_open = ->
2660                 switch c = txt.charAt(cur++)
2661                         when '>'
2662                                 parse_error()
2663                                 tok_state = tok_state_data
2664                         when '' # EOF
2665                                 parse_error()
2666                                 tok_state = tok_state_data
2667                                 return new_text_node '</'
2668                         else
2669                                 if is_uc_alpha(c)
2670                                         tok_cur_tag = new_end_tag c.toLowerCase()
2671                                         tok_state = tok_state_tag_name
2672                                 else if is_lc_alpha(c)
2673                                         tok_cur_tag = new_end_tag c
2674                                         tok_state = tok_state_tag_name
2675                                 else
2676                                         parse_error()
2677                                         tok_cur_tag = new_comment_token '/'
2678                                         tok_state = tok_state_bogus_comment
2679                 return null
2680
2681         # 8.2.4.10 http://www.w3.org/TR/html5/syntax.html#tag-name-state
2682         tok_state_tag_name = ->
2683                 switch c = txt.charAt(cur++)
2684                         when "\t", "\n", "\u000c", ' '
2685                                 tok_state = tok_state_before_attribute_name
2686                         when '/'
2687                                 tok_state = tok_state_self_closing_start_tag
2688                         when '>'
2689                                 tok_state = tok_state_data
2690                                 tmp = tok_cur_tag
2691                                 tok_cur_tag = null
2692                                 return tmp
2693                         when "\u0000"
2694                                 parse_error()
2695                                 tok_cur_tag.name += "\ufffd"
2696                         when '' # EOF
2697                                 parse_error()
2698                                 tok_state = tok_state_data
2699                         else
2700                                 if is_uc_alpha(c)
2701                                         tok_cur_tag.name += c.toLowerCase()
2702                                 else
2703                                         tok_cur_tag.name += c
2704                 return null
2705
2706         # 8.2.4.11 http://www.w3.org/TR/html5/syntax.html#rcdata-less-than-sign-state
2707         tok_state_rcdata_less_than_sign = ->
2708                 c = txt.charAt(cur++)
2709                 if c is '/'
2710                         temporary_buffer = ''
2711                         tok_state = tok_state_rcdata_end_tag_open
2712                         return null
2713                 # Anything else
2714                 tok_state = tok_state_rcdata
2715                 cur -= 1 # reconsume the input character
2716                 return new_character_token '<'
2717
2718         # 8.2.4.12 http://www.w3.org/TR/html5/syntax.html#rcdata-end-tag-open-state
2719         tok_state_rcdata_end_tag_open = ->
2720                 c = txt.charAt(cur++)
2721                 if is_uc_alpha(c)
2722                         tok_cur_tag = new_end_tag c.toLowerCase()
2723                         temporary_buffer += c
2724                         tok_state = tok_state_rcdata_end_tag_name
2725                         return null
2726                 if is_lc_alpha(c)
2727                         tok_cur_tag = new_end_tag c
2728                         temporary_buffer += c
2729                         tok_state = tok_state_rcdata_end_tag_name
2730                         return null
2731                 # Anything else
2732                 tok_state = tok_state_rcdata
2733                 cur -= 1 # reconsume the input character
2734                 return new_character_token "</" # fixfull separate these
2735
2736         # http://www.w3.org/TR/html5/syntax.html#appropriate-end-tag-token
2737         is_appropriate_end_tag = (t) ->
2738                 # spec says to check against "the tag name of the last start tag to
2739                 # have been emitted from this tokenizer", but this is only called from
2740                 # the various "raw" states, which I'm pretty sure all push the start
2741                 # token onto open_els. TODO: verify this after the script data states
2742                 # are implemented
2743                 debug_log "#{t.type}, #{t.name} open_els: #{serialize_els open_els, true, true}"
2744                 return t.type is TYPE_END_TAG and t.name is open_els[0].name
2745
2746         # 8.2.4.13 http://www.w3.org/TR/html5/syntax.html#rcdata-end-tag-name-state
2747         tok_state_rcdata_end_tag_name = ->
2748                 c = txt.charAt(cur++)
2749                 if c is "\t" or c is "\n" or c is "\u000c" or c is ' '
2750                         if is_appropriate_end_tag tok_cur_tag
2751                                 tok_state = tok_state_before_attribute_name
2752                                 return
2753                         # else fall through to "Anything else"
2754                 if c is '/'
2755                         if is_appropriate_end_tag tok_cur_tag
2756                                 tok_state = tok_state_self_closing_start_tag # FIXME spec typo?
2757                                 return
2758                         # else fall through to "Anything else"
2759                 if c is '>'
2760                         if is_appropriate_end_tag tok_cur_tag
2761                                 tok_state = tok_state_data
2762                                 return tok_cur_tag
2763                         # else fall through to "Anything else"
2764                 if is_uc_alpha(c)
2765                         tok_cur_tag.name += c.toLowerCase()
2766                         temporary_buffer += c
2767                         return null
2768                 if is_lc_alpha(c)
2769                         tok_cur_tag.name += c
2770                         temporary_buffer += c
2771                         return null
2772                 # Anything else
2773                 tok_state = tok_state_rcdata
2774                 cur -= 1 # reconsume the input character
2775                 return new_character_token '</' + temporary_buffer # fixfull separate these
2776
2777         # 8.2.4.14 http://www.w3.org/TR/html5/syntax.html#rawtext-less-than-sign-state
2778         tok_state_rawtext_less_than_sign = ->
2779                 c = txt.charAt(cur++)
2780                 if c is '/'
2781                         temporary_buffer = ''
2782                         tok_state = tok_state_rawtext_end_tag_open
2783                         return null
2784                 # Anything else
2785                 tok_state = tok_state_rawtext
2786                 cur -= 1 # reconsume the input character
2787                 return new_character_token '<'
2788
2789         # 8.2.4.15 http://www.w3.org/TR/html5/syntax.html#rawtext-end-tag-open-state
2790         tok_state_rawtext_end_tag_open = ->
2791                 c = txt.charAt(cur++)
2792                 if is_uc_alpha(c)
2793                         tok_cur_tag = new_end_tag c.toLowerCase()
2794                         temporary_buffer += c
2795                         tok_state = tok_state_rawtext_end_tag_name
2796                         return null
2797                 if is_lc_alpha(c)
2798                         tok_cur_tag = new_end_tag c
2799                         temporary_buffer += c
2800                         tok_state = tok_state_rawtext_end_tag_name
2801                         return null
2802                 # Anything else
2803                 tok_state = tok_state_rawtext
2804                 cur -= 1 # reconsume the input character
2805                 return new_character_token "</" # fixfull separate these
2806
2807         # 8.2.4.16 http://www.w3.org/TR/html5/syntax.html#rawtext-end-tag-name-state
2808         tok_state_rawtext_end_tag_name = ->
2809                 c = txt.charAt(cur++)
2810                 if c is "\t" or c is "\n" or c is "\u000c" or c is ' '
2811                         if is_appropriate_end_tag tok_cur_tag
2812                                 tok_state = tok_state_before_attribute_name
2813                                 return
2814                         # else fall through to "Anything else"
2815                 if c is '/'
2816                         if is_appropriate_end_tag tok_cur_tag
2817                                 tok_state = tok_state_self_closing_start_tag
2818                                 return
2819                         # else fall through to "Anything else"
2820                 if c is '>'
2821                         if is_appropriate_end_tag tok_cur_tag
2822                                 tok_state = tok_state_data
2823                                 return tok_cur_tag
2824                         # else fall through to "Anything else"
2825                 if is_uc_alpha(c)
2826                         tok_cur_tag.name += c.toLowerCase()
2827                         temporary_buffer += c
2828                         return null
2829                 if is_lc_alpha(c)
2830                         tok_cur_tag.name += c
2831                         temporary_buffer += c
2832                         return null
2833                 # Anything else
2834                 tok_state = tok_state_rawtext
2835                 cur -= 1 # reconsume the input character
2836                 return new_character_token '</' + temporary_buffer # fixfull separate these
2837
2838         # 8.2.4.17 http://www.w3.org/TR/html5/syntax.html#script-data-less-than-sign-state
2839         tok_state_script_data_less_than_sign = ->
2840                 c = txt.charAt(cur++)
2841                 if c is '/'
2842                         temporary_buffer = ''
2843                         tok_state = tok_state_script_data_end_tag_open
2844                         return
2845                 if c is '!'
2846                         tok_state = tok_state_script_data_escape_start
2847                         return new_character_token '<!' # fixfull split
2848                 # Anything else
2849                 tok_state = tok_state_script_data
2850                 cur -= 1 # Reconsume
2851                 return new_character_token '<'
2852
2853         # 8.2.4.18 http://www.w3.org/TR/html5/syntax.html#script-data-end-tag-open-state
2854         tok_state_script_data_end_tag_open = ->
2855                 c = txt.charAt(cur++)
2856                 if is_uc_alpha(c)
2857                         tok_cur_tag = new_end_tag c.toLowerCase()
2858                         temporary_buffer += c
2859                         tok_state = tok_state_script_data_end_tag_name
2860                         return
2861                 if is_lc_alpha(c)
2862                         tok_cur_tag = new_end_tag c
2863                         temporary_buffer += c
2864                         tok_state = tok_state_script_data_end_tag_name
2865                         return
2866                 # Anything else
2867                 tok_state = tok_state_script_data
2868                 cur -= 1 # Reconsume
2869                 return new_character_token '</'
2870
2871         # 8.2.4.19 http://www.w3.org/TR/html5/syntax.html#script-data-end-tag-open-state
2872         tok_state_script_data_end_tag_name = ->
2873                 c = txt.charAt(cur++)
2874                 if c is "\t" or c is "\n" or c is "\u000c" or c is ' '
2875                         if is_appropriate_end_tag tok_cur_tag
2876                                 tok_state = tok_state_before_attribute_name
2877                                 return
2878                         # fall through
2879                 if c is '/'
2880                         if is_appropriate_end_tag tok_cur_tag
2881                                 tok_state = tok_state_self_closing_start_tag
2882                                 return
2883                         # fall through
2884                 if is_uc_alpha(c)
2885                         tok_cur_tag.name += c.toLowerCase()
2886                         temporary_buffer += c
2887                         return
2888                 if is_lc_alpha(c)
2889                         tok_cur_tag.name += c
2890                         temporary_buffer += c
2891                         return
2892                 # Anything else
2893                 tok_state = tok_state_script_data
2894                 cur -= 1 # Reconsume
2895                 return new_character_token "</#{temporary_buffer}" # fixfull split
2896
2897         # 8.2.4.20 http://www.w3.org/TR/html5/syntax.html#script-data-escape-start-state
2898         tok_state_script_data_escape_start = ->
2899                 c = txt.charAt(cur++)
2900                 if c is '-'
2901                         tok_state = tok_state_script_data_escape_start_dash
2902                         return new_character_token '-'
2903                 # Anything else
2904                 tok_state = tok_state_script_data
2905                 cur -= 1 # Reconsume
2906                 return
2907
2908         # 8.2.4.21 http://www.w3.org/TR/html5/syntax.html#script-data-escape-start-dash-state
2909         tok_state_script_data_escape_start_dash = ->
2910                 c = txt.charAt(cur++)
2911                 if c is '-'
2912                         tok_state = tok_state_script_data_escaped_dash_dash
2913                         return new_character_token '-'
2914                 # Anything else
2915                 tok_state = tok_state_script_data
2916                 cur -= 1 # Reconsume
2917                 return
2918
2919         # 8.2.4.22 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-state
2920         tok_state_script_data_escaped = ->
2921                 c = txt.charAt(cur++)
2922                 if c is '-'
2923                         tok_state = tok_state_script_data_escaped_dash
2924                         return new_character_token '-'
2925                 if c is '<'
2926                         tok_state = tok_state_script_data_escaped_less_than_sign
2927                         return
2928                 if c is "\u0000"
2929                         parse_error()
2930                         return new_character_token "\ufffd"
2931                 if c is '' # EOF
2932                         tok_state = tok_state_data
2933                         parse_error()
2934                         cur -= 1 # Reconsume
2935                         return
2936                 # Anything else
2937                 return new_character_token c
2938
2939         # 8.2.4.23 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-dash-state
2940         tok_state_script_data_escaped_dash = ->
2941                 c = txt.charAt(cur++)
2942                 if c is '-'
2943                         tok_state = tok_state_script_data_escaped_dash_dash
2944                         return new_character_token '-'
2945                 if c is '<'
2946                         tok_state = tok_state_script_data_escaped_less_than_sign
2947                         return
2948                 if c is "\u0000"
2949                         parse_error()
2950                         tok_state = tok_state_script_data_escaped
2951                         return new_character_token "\ufffd"
2952                 if c is '' # EOF
2953                         tok_state = tok_state_data
2954                         parse_error()
2955                         cur -= 1 # Reconsume
2956                         return
2957                 # Anything else
2958                 tok_state = tok_state_script_data_escaped
2959                 return new_character_token c
2960
2961         # 8.2.4.24 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-dash-dash-state
2962         tok_state_script_data_escaped_dash_dash = ->
2963                 c = txt.charAt(cur++)
2964                 if c is '-'
2965                         return new_character_token '-'
2966                 if c is '<'
2967                         tok_state = tok_state_script_data_escaped_less_than_sign
2968                         return
2969                 if c is '>'
2970                         tok_state = tok_state_script_data
2971                         return new_character_token '>'
2972                 if c is "\u0000"
2973                         parse_error()
2974                         tok_state = tok_state_script_data_escaped
2975                         return new_character_token "\ufffd"
2976                 if c is '' # EOF
2977                         parse_error()
2978                         tok_state = tok_state_data
2979                         cur -= 1 # Reconsume
2980                         return
2981                 # Anything else
2982                 tok_state = tok_state_script_data_escaped
2983                 return new_character_token c
2984
2985         # 8.2.4.25 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-less-than-sign-state
2986         tok_state_script_data_escaped_less_than_sign = ->
2987                 c = txt.charAt(cur++)
2988                 if c is '/'
2989                         temporary_buffer = ''
2990                         tok_state = tok_state_script_data_escaped_end_tag_open
2991                         return
2992                 if is_uc_alpha(c)
2993                         temporary_buffer = c.toLowerCase() # yes, really
2994                         tok_state = tok_state_script_data_double_escape_start
2995                         return new_character_token "<#{c}" # fixfull split
2996                 if is_lc_alpha(c)
2997                         temporary_buffer = c
2998                         tok_state = tok_state_script_data_double_escape_start
2999                         return new_character_token "<#{c}" # fixfull split
3000                 # Anything else
3001                 tok_state = tok_state_script_data_escaped
3002                 cur -= 1 # Reconsume
3003                 return new_character_token c
3004
3005         # 8.2.4.26 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-end-tag-open-state
3006         tok_state_script_data_escaped_end_tag_open = ->
3007                 c = txt.charAt(cur++)
3008                 if is_uc_alpha(c)
3009                         tok_cur_tag = new_end_tag c.toLowerCase()
3010                         temporary_buffer += c
3011                         tok_state = tok_state_script_data_escaped_end_tag_name
3012                         return
3013                 if is_lc_alpha(c)
3014                         tok_cur_tag = new_end_tag c
3015                         temporary_buffer += c
3016                         tok_state = tok_state_script_data_escaped_end_tag_name
3017                         return
3018                 # Anything else
3019                 tok_state = tok_state_script_data_escaped
3020                 cur -= 1 # Reconsume
3021                 return new_character_token '</' # fixfull split
3022
3023         # 8.2.4.27 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-end-tag-name-state
3024         tok_state_script_data_escaped_end_tag_name = ->
3025                 c = txt.charAt(cur++)
3026                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
3027                         if is_appropriate_end_tag tok_cur_tag
3028                                 tok_state = tok_state_before_attribute_name
3029                                 return
3030                         # fall through
3031                 if c is '/'
3032                         if is_appropriate_end_tag tok_cur_tag
3033                                 tok_state = tok_state_self_closing_start_tag
3034                                 return
3035                         # fall through
3036                 if is_uc_alpha(c)
3037                         tok_cur_tag.name += c.toLowerCase()
3038                         temporary_buffer += c.toLowerCase()
3039                         return
3040                 if is_lc_alpha(c)
3041                         tok_cur_tag.name += c
3042                         temporary_buffer += c.toLowerCase()
3043                         return
3044                 # Anything else
3045                 tok_state = tok_state_script_data_escaped
3046                 cur -= 1 # Reconsume
3047                 return new_character_token "</#{temporary_buffer}" # fixfull split
3048
3049         # 8.2.4.28 http://www.w3.org/TR/html5/syntax.html#script-data-double-escape-start-state
3050         tok_state_script_data_double_escape_start = ->
3051                 c = txt.charAt(cur++)
3052                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' ' or c is '/' or c is '>'
3053                         if temporary_buffer is 'script'
3054                                 tok_state = tok_state_script_data_double_escaped
3055                         else
3056                                 tok_state = tok_state_script_data_escaped
3057                         return new_character_token c
3058                 if is_uc_alpha(c)
3059                         temporary_buffer += c.toLowerCase() # yes, really lowercase
3060                         return new_character_token c
3061                 if is_lc_alpha(c)
3062                         temporary_buffer += c
3063                         return new_character_token c
3064                 # Anything else
3065                 tok_state = tok_state_script_data_escaped
3066                 cur -= 1 # Reconsume
3067                 return
3068
3069         # 8.2.4.29 http://www.w3.org/TR/html5/syntax.html#script-data-double-escaped-state
3070         tok_state_script_data_double_escaped = ->
3071                 c = txt.charAt(cur++)
3072                 if c is '-'
3073                         tok_state = tok_state_script_data_double_escaped_dash
3074                         return new_character_token '-'
3075                 if c is '<'
3076                         tok_state = tok_state_script_data_double_escaped_less_than_sign
3077                         return new_character_token '<'
3078                 if c is "\u0000"
3079                         parse_error()
3080                         return new_character_token "\ufffd"
3081                 if c is '' # EOF
3082                         parse_error()
3083                         tok_state = tok_state_data
3084                         cur -= 1 # Reconsume
3085                         return
3086                 # Anything else
3087                 return new_character_token c
3088
3089         # 8.2.4.30 http://www.w3.org/TR/html5/syntax.html#script-data-double-escaped-dash-state
3090         tok_state_script_data_double_escaped_dash = ->
3091                 c = txt.charAt(cur++)
3092                 if c is '-'
3093                         tok_state = tok_state_script_data_double_escaped_dash_dash
3094                         return new_character_token '-'
3095                 if c is '<'
3096                         tok_state = tok_state_script_data_double_escaped_less_than_sign
3097                         return new_character_token '<'
3098                 if c is "\u0000"
3099                         parse_error()
3100                         tok_state = tok_state_script_data_double_escaped
3101                         return new_character_token "\ufffd"
3102                 if c is '' # EOF
3103                         parse_error()
3104                         tok_state = tok_state_data
3105                         cur -= 1 # Reconsume
3106                         return
3107                 # Anything else
3108                 tok_state = tok_state_script_data_double_escaped
3109                 return new_character_token c
3110
3111         # 8.2.4.31 http://www.w3.org/TR/html5/syntax.html#script-data-double-escaped-dash-dash-state
3112         tok_state_script_data_double_escaped_dash_dash = ->
3113                 c = txt.charAt(cur++)
3114                 if c is '-'
3115                         return new_character_token '-'
3116                 if c is '<'
3117                         tok_state = tok_state_script_data_double_escaped_less_than_sign
3118                         return new_character_token '<'
3119                 if c is '>'
3120                         tok_state = tok_state_script_data
3121                         return new_character_token '>'
3122                 if c is "\u0000"
3123                         parse_error()
3124                         tok_state = tok_state_script_data_double_escaped
3125                         return new_character_token "\ufffd"
3126                 if c is '' # EOF
3127                         parse_error()
3128                         tok_state = tok_state_data
3129                         cur -= 1 # Reconsume
3130                         return
3131                 # Anything else
3132                 tok_state = tok_state_script_data_double_escaped
3133                 return new_character_token c
3134
3135         # 8.2.4.32 http://www.w3.org/TR/html5/syntax.html#script-data-double-escaped-less-than-sign-state
3136         tok_state_script_data_double_escaped_less_than_sign = ->
3137                 c = txt.charAt(cur++)
3138                 if c is '/'
3139                         temporary_buffer = ''
3140                         tok_state = tok_state_script_data_double_escape_end
3141                         return new_character_token '/'
3142                 # Anything else
3143                 tok_state = tok_state_script_data_double_escaped
3144                 cur -= 1 # Reconsume
3145                 return
3146
3147         # 8.2.4.33 http://www.w3.org/TR/html5/syntax.html#script-data-double-escape-end-state
3148         tok_state_script_data_double_escape_end = ->
3149                 c = txt.charAt(cur++)
3150                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' ' or c is '/' or c is '>'
3151                         if temporary_buffer is 'script'
3152                                 tok_state = tok_state_script_data_escaped
3153                         else
3154                                 tok_state = tok_state_script_data_double_escaped
3155                         return new_character_token c
3156                 if is_uc_alpha(c)
3157                         temporary_buffer += c.toLowerCase() # yes, really lowercase
3158                         return new_character_token c
3159                 if is_lc_alpha(c)
3160                         temporary_buffer += c
3161                         return new_character_token c
3162                 # Anything else
3163                 tok_state = tok_state_script_data_double_escaped
3164                 cur -= 1 # Reconsume
3165                 return
3166
3167         # 8.2.4.34 http://www.w3.org/TR/html5/syntax.html#before-attribute-name-state
3168         tok_state_before_attribute_name = ->
3169                 attr_name = null
3170                 switch c = txt.charAt(cur++)
3171                         when "\t", "\n", "\u000c", ' '
3172                                 return null
3173                         when '/'
3174                                 tok_state = tok_state_self_closing_start_tag
3175                                 return null
3176                         when '>'
3177                                 tok_state = tok_state_data
3178                                 tmp = tok_cur_tag
3179                                 tok_cur_tag = null
3180                                 return tmp
3181                         when "\u0000"
3182                                 parse_error()
3183                                 attr_name = "\ufffd"
3184                         when '"', "'", '<', '='
3185                                 parse_error()
3186                                 attr_name = c
3187                         when '' # EOF
3188                                 parse_error()
3189                                 tok_state = tok_state_data
3190                         else
3191                                 if is_uc_alpha(c)
3192                                         attr_name = c.toLowerCase()
3193                                 else
3194                                         attr_name = c
3195                 if attr_name?
3196                         tok_cur_tag.attrs_a.unshift [attr_name, '']
3197                         tok_state = tok_state_attribute_name
3198                 return null
3199
3200         # 8.2.4.35 http://www.w3.org/TR/html5/syntax.html#attribute-name-state
3201         tok_state_attribute_name = ->
3202                 switch c = txt.charAt(cur++)
3203                         when "\t", "\n", "\u000c", ' '
3204                                 tok_state = tok_state_after_attribute_name
3205                         when '/'
3206                                 tok_state = tok_state_self_closing_start_tag
3207                         when '='
3208                                 tok_state = tok_state_before_attribute_value
3209                         when '>'
3210                                 tok_state = tok_state_data
3211                                 tmp = tok_cur_tag
3212                                 tok_cur_tag = null
3213                                 return tmp
3214                         when "\u0000"
3215                                 parse_error()
3216                                 tok_cur_tag.attrs_a[0][0] = "\ufffd"
3217                         when '"', "'", '<'
3218                                 parse_error()
3219                                 tok_cur_tag.attrs_a[0][0] = c
3220                         when '' # EOF
3221                                 parse_error()
3222                                 tok_state = tok_state_data
3223                         else
3224                                 if is_uc_alpha(c)
3225                                         tok_cur_tag.attrs_a[0][0] = c.toLowerCase()
3226                                 else
3227                                         tok_cur_tag.attrs_a[0][0] += c
3228                 return null
3229
3230         # 8.2.4.36 http://www.w3.org/TR/html5/syntax.html#after-attribute-name-state
3231         tok_state_after_attribute_name = ->
3232                 c = txt.charAt(cur++)
3233                 if c is "\t" or c is "\n" or c is "\u000c" or c is ' '
3234                         return
3235                 if c is '/'
3236                         tok_state = tok_state_self_closing_start_tag
3237                         return
3238                 if c is '='
3239                         tok_state = tok_state_before_attribute_value
3240                         return
3241                 if c is '>'
3242                         tok_state = tok_state_data
3243                         return
3244                 if is_uc_alpha(c)
3245                         tok_cur_tag.attrs_a.unshift [c.toLowerCase(), '']
3246                         tok_state = tok_state_attribute_name
3247                         return
3248                 if c is "\u0000"
3249                         parse_error()
3250                         tok_cur_tag.attrs_a.unshift ["\ufffd", '']
3251                         tok_state = tok_state_attribute_name
3252                         return
3253                 if c is '' # EOF
3254                         parse_error()
3255                         tok_state = tok_state_data
3256                         cur -= 1 # reconsume
3257                         return
3258                 if c is '"' or c is "'" or c is '<'
3259                         parse_error()
3260                         # fall through to Anything else
3261                 # Anything else
3262                 tok_cur_tag.attrs_a.unshift [c, '']
3263                 tok_state = tok_state_attribute_name
3264
3265         # 8.2.4.37 http://www.w3.org/TR/html5/syntax.html#before-attribute-value-state
3266         tok_state_before_attribute_value = ->
3267                 switch c = txt.charAt(cur++)
3268                         when "\t", "\n", "\u000c", ' '
3269                                 return null
3270                         when '"'
3271                                 tok_state = tok_state_attribute_value_double_quoted
3272                         when '&'
3273                                 tok_state = tok_state_attribute_value_unquoted
3274                                 cur -= 1
3275                         when "'"
3276                                 tok_state = tok_state_attribute_value_single_quoted
3277                         when "\u0000"
3278                                 # Parse error
3279                                 tok_cur_tag.attrs_a[0][1] += "\ufffd"
3280                                 tok_state = tok_state_attribute_value_unquoted
3281                         when '>'
3282                                 # Parse error
3283                                 tok_state = tok_state_data
3284                                 tmp = tok_cur_tag
3285                                 tok_cur_tag = null
3286                                 return tmp
3287                         when '' # EOF
3288                                 parse_error()
3289                                 tok_state = tok_state_data
3290                         else
3291                                 tok_cur_tag.attrs_a[0][1] += c
3292                                 tok_state = tok_state_attribute_value_unquoted
3293                 return null
3294
3295         # 8.2.4.38 http://www.w3.org/TR/html5/syntax.html#attribute-value-(double-quoted)-state
3296         tok_state_attribute_value_double_quoted = ->
3297                 switch c = txt.charAt(cur++)
3298                         when '"'
3299                                 tok_state = tok_state_after_attribute_value_quoted
3300                         when '&'
3301                                 tok_cur_tag.attrs_a[0][1] += parse_character_reference '"', true
3302                         when "\u0000"
3303                                 # Parse error
3304                                 tok_cur_tag.attrs_a[0][1] += "\ufffd"
3305                         when '' # EOF
3306                                 parse_error()
3307                                 tok_state = tok_state_data
3308                         else
3309                                 tok_cur_tag.attrs_a[0][1] += c
3310                 return null
3311
3312         # 8.2.4.39 http://www.w3.org/TR/html5/syntax.html#attribute-value-(single-quoted)-state
3313         tok_state_attribute_value_single_quoted = ->
3314                 switch c = txt.charAt(cur++)
3315                         when "'"
3316                                 tok_state = tok_state_after_attribute_value_quoted
3317                         when '&'
3318                                 tok_cur_tag.attrs_a[0][1] += parse_character_reference "'", true
3319                         when "\u0000"
3320                                 # Parse error
3321                                 tok_cur_tag.attrs_a[0][1] += "\ufffd"
3322                         when '' # EOF
3323                                 parse_error()
3324                                 tok_state = tok_state_data
3325                         else
3326                                 tok_cur_tag.attrs_a[0][1] += c
3327                 return null
3328
3329         # 8.2.4.40 http://www.w3.org/TR/html5/syntax.html#attribute-value-(unquoted)-state
3330         tok_state_attribute_value_unquoted = ->
3331                 switch c = txt.charAt(cur++)
3332                         when "\t", "\n", "\u000c", ' '
3333                                 tok_state = tok_state_before_attribute_name
3334                         when '&'
3335                                 tok_cur_tag.attrs_a[0][1] += parse_character_reference '>', true
3336                         when '>'
3337                                 tok_state = tok_state_data
3338                                 tmp = tok_cur_tag
3339                                 tok_cur_tag = null
3340                                 return tmp
3341                         when "\u0000"
3342                                 tok_cur_tag.attrs_a[0][1] += "\ufffd"
3343                         when '' # EOF
3344                                 parse_error()
3345                                 tok_state = tok_state_data
3346                         else
3347                                 # Parse Error if ', <, = or ` (backtick)
3348                                 tok_cur_tag.attrs_a[0][1] += c
3349                 return null
3350
3351         # 8.2.4.42 http://www.w3.org/TR/html5/syntax.html#after-attribute-value-(quoted)-state
3352         tok_state_after_attribute_value_quoted = ->
3353                 switch c = txt.charAt(cur++)
3354                         when "\t", "\n", "\u000c", ' '
3355                                 tok_state = tok_state_before_attribute_name
3356                         when '/'
3357                                 tok_state = tok_state_self_closing_start_tag
3358                         when '>'
3359                                 tok_state = tok_state_data
3360                                 tmp = tok_cur_tag
3361                                 tok_cur_tag = null
3362                                 return tmp
3363                         when '' # EOF
3364                                 parse_error()
3365                                 tok_state = tok_state_data
3366                         else
3367                                 # Parse Error
3368                                 tok_state = tok_state_before_attribute_name
3369                                 cur -= 1 # we didn't handle that char
3370                 return null
3371
3372         # 8.2.4.43 http://www.w3.org/TR/html5/syntax.html#self-closing-start-tag-state
3373         tok_state_self_closing_start_tag = ->
3374                 c = txt.charAt(cur++)
3375                 if c is '>'
3376                         tok_cur_tag.flag 'self-closing'
3377                         tok_state = tok_state_data
3378                         return tok_cur_tag
3379                 if c is ''
3380                         parse_error()
3381                         tok_state = tok_state_data
3382                         cur -= 1 # Reconsume
3383                         return
3384                 # Anything else
3385                 parse_error()
3386                 tok_state = tok_state_before_attribute_name
3387                 cur -= 1 # Reconsume
3388                 return
3389
3390         # 8.2.4.44 http://www.w3.org/TR/html5/syntax.html#bogus-comment-state
3391         # WARNING: put a comment token in tok_cur_tag before setting this state
3392         tok_state_bogus_comment = ->
3393                 next_gt = txt.indexOf '>', cur
3394                 if next_gt is -1
3395                         val = txt.substr cur
3396                         cur = txt.length
3397                 else
3398                         val = txt.substr cur, (next_gt - cur)
3399                         cur = next_gt + 1
3400                 val = val.replace "\u0000", "\ufffd"
3401                 tok_cur_tag.text += val
3402                 tok_state = tok_state_data
3403                 return tok_cur_tag
3404
3405         # 8.2.4.45 http://www.w3.org/TR/html5/syntax.html#markup-declaration-open-state
3406         tok_state_markup_declaration_open = ->
3407                 if txt.substr(cur, 2) is '--'
3408                         cur += 2
3409                         tok_cur_tag = new_comment_token ''
3410                         tok_state = tok_state_comment_start
3411                         return
3412                 if txt.substr(cur, 7).toLowerCase() is 'doctype'
3413                         cur += 7
3414                         tok_state = tok_state_doctype
3415                         return
3416                 acn = adjusted_current_node()
3417                 if acn and acn.namespace isnt NS_HTML and txt.substr(cur, 7) is '[CDATA['
3418                         cur += 7
3419                         tok_state = tok_state_cdata_section
3420                         return
3421                 # Otherwise
3422                 parse_error()
3423                 tok_cur_tag = new_comment_token '!' # TODO test ("!" right?)
3424                 tok_state = tok_state_bogus_comment
3425                 return
3426
3427         # 8.2.4.46 http://www.w3.org/TR/html5/syntax.html#comment-start-state
3428         tok_state_comment_start = ->
3429                 switch c = txt.charAt(cur++)
3430                         when '-'
3431                                 tok_state = tok_state_comment_start_dash
3432                         when "\u0000"
3433                                 parse_error()
3434                                 return new_character_token "\ufffd"
3435                         when '>'
3436                                 parse_error()
3437                                 tok_state = tok_state_data
3438                                 return tok_cur_tag
3439                         when '' # EOF
3440                                 parse_error()
3441                                 tok_state = tok_state_data
3442                                 cur -= 1 # Reconsume
3443                                 return tok_cur_tag
3444                         else
3445                                 tok_cur_tag.text += c
3446                 return null
3447
3448         # 8.2.4.47 http://www.w3.org/TR/html5/syntax.html#comment-start-dash-state
3449         tok_state_comment_start_dash = ->
3450                 switch c = txt.charAt(cur++)
3451                         when '-'
3452                                 tok_state = tok_state_comment_end
3453                         when "\u0000"
3454                                 parse_error()
3455                                 tok_cur_tag.text += "-\ufffd"
3456                                 tok_state = tok_state_comment
3457                         when '>'
3458                                 parse_error()
3459                                 tok_state = tok_state_data
3460                                 return tok_cur_tag
3461                         when '' # EOF
3462                                 parse_error()
3463                                 tok_state = tok_state_data
3464                                 cur -= 1 # Reconsume
3465                                 return tok_cur_tag
3466                         else
3467                                 tok_cur_tag.text += "-#{c}"
3468                                 tok_state = tok_state_comment
3469                 return null
3470
3471         # 8.2.4.48 http://www.w3.org/TR/html5/syntax.html#comment-state
3472         tok_state_comment = ->
3473                 switch c = txt.charAt(cur++)
3474                         when '-'
3475                                 tok_state = tok_state_comment_end_dash
3476                         when "\u0000"
3477                                 parse_error()
3478                                 tok_cur_tag.text += "\ufffd"
3479                         when '' # EOF
3480                                 parse_error()
3481                                 tok_state = tok_state_data
3482                                 cur -= 1 # Reconsume
3483                                 return tok_cur_tag
3484                         else
3485                                 tok_cur_tag.text += c
3486                 return null
3487
3488         # 8.2.4.49 http://www.w3.org/TR/html5/syntax.html#comment-end-dash-state
3489         tok_state_comment_end_dash = ->
3490                 switch c = txt.charAt(cur++)
3491                         when '-'
3492                                 tok_state = tok_state_comment_end
3493                         when "\u0000"
3494                                 parse_error()
3495                                 tok_cur_tag.text += "-\ufffd"
3496                                 tok_state = tok_state_comment
3497                         when '' # EOF
3498                                 parse_error()
3499                                 tok_state = tok_state_data
3500                                 cur -= 1 # Reconsume
3501                                 return tok_cur_tag
3502                         else
3503                                 tok_cur_tag.text += "-#{c}"
3504                                 tok_state = tok_state_comment
3505                 return null
3506
3507         # 8.2.4.50 http://www.w3.org/TR/html5/syntax.html#comment-end-state
3508         tok_state_comment_end = ->
3509                 switch c = txt.charAt(cur++)
3510                         when '>'
3511                                 tok_state = tok_state_data
3512                                 return tok_cur_tag
3513                         when "\u0000"
3514                                 parse_error()
3515                                 tok_cur_tag.text += "--\ufffd"
3516                                 tok_state = tok_state_comment
3517                         when '!'
3518                                 parse_error()
3519                                 tok_state = tok_state_comment_end_bang
3520                         when '-'
3521                                 parse_error()
3522                                 tok_cur_tag.text += '-'
3523                         when '' # EOF
3524                                 parse_error()
3525                                 tok_state = tok_state_data
3526                                 cur -= 1 # Reconsume
3527                                 return tok_cur_tag
3528                         else
3529                                 parse_error()
3530                                 tok_cur_tag.text += "--#{c}"
3531                                 tok_state = tok_state_comment
3532                 return null
3533
3534         # 8.2.4.51 http://www.w3.org/TR/html5/syntax.html#comment-end-bang-state
3535         tok_state_comment_end_bang = ->
3536                 switch c = txt.charAt(cur++)
3537                         when '-'
3538                                 tok_cur_tag.text += "--!#{c}"
3539                                 tok_state = tok_state_comment_end_dash
3540                         when '>'
3541                                 tok_state = tok_state_data
3542                                 return tok_cur_tag
3543                         when "\u0000"
3544                                 parse_error()
3545                                 tok_cur_tag.text += "--!\ufffd"
3546                                 tok_state = tok_state_comment
3547                         when '' # EOF
3548                                 parse_error()
3549                                 tok_state = tok_state_data
3550                                 cur -= 1 # Reconsume
3551                                 return tok_cur_tag
3552                         else
3553                                 tok_cur_tag.text += "--!#{c}"
3554                                 tok_state = tok_state_comment
3555                 return null
3556
3557         # 8.2.4.52 http://www.w3.org/TR/html5/syntax.html#doctype-state
3558         tok_state_doctype = ->
3559                 switch c = txt.charAt(cur++)
3560                         when "\t", "\u000a", "\u000c", ' '
3561                                 tok_state = tok_state_before_doctype_name
3562                         when '' # EOF
3563                                 parse_error()
3564                                 tok_state = tok_state_data
3565                                 el = new_doctype_token ''
3566                                 el.flag 'force-quirks', true
3567                                 cur -= 1 # Reconsume
3568                                 return el
3569                         else
3570                                 parse_error()
3571                                 tok_state = tok_state_before_doctype_name
3572                                 cur -= 1 # Reconsume
3573                 return null
3574
3575         # 8.2.4.52 http://www.w3.org/TR/html5/syntax.html#doctype-state
3576         tok_state_before_doctype_name = ->
3577                 c = txt.charAt(cur++)
3578                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
3579                         return
3580                 if is_uc_alpha(c)
3581                         tok_cur_tag = new_doctype_token c.toLowerCase()
3582                         tok_state = tok_state_doctype_name
3583                         return
3584                 if c is "\u0000"
3585                         parse_error()
3586                         tok_cur_tag = new_doctype_token "\ufffd"
3587                         tok_state = tok_state_doctype_name
3588                         return
3589                 if c is '>'
3590                         parse_error()
3591                         el = new_doctype_token ''
3592                         el.flag 'force-quirks', true
3593                         tok_state = tok_state_data
3594                         return el
3595                 if c is '' # EOF
3596                         parse_error()
3597                         tok_state = tok_state_data
3598                         el = new_doctype_token ''
3599                         el.flag 'force-quirks', true
3600                         cur -= 1 # Reconsume
3601                         return el
3602                 # Anything else
3603                 tok_cur_tag = new_doctype_token c
3604                 tok_state = tok_state_doctype_name
3605                 return null
3606
3607         # 8.2.4.54 http://www.w3.org/TR/html5/syntax.html#doctype-name-state
3608         tok_state_doctype_name = ->
3609                 c = txt.charAt(cur++)
3610                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
3611                         tok_state = tok_state_after_doctype_name
3612                         return
3613                 if c is '>'
3614                         tok_state = tok_state_data
3615                         return tok_cur_tag
3616                 if is_uc_alpha(c)
3617                         tok_cur_tag.name += c.toLowerCase()
3618                         return
3619                 if c is "\u0000"
3620                         parse_error()
3621                         tok_cur_tag.name += "\ufffd"
3622                         return
3623                 if c is '' # EOF
3624                         parse_error()
3625                         tok_state = tok_state_data
3626                         tok_cur_tag.flag 'force-quirks', true
3627                         cur -= 1 # Reconsume
3628                         return tok_cur_tag
3629                 # Anything else
3630                 tok_cur_tag.name += c
3631                 return null
3632
3633         # 8.2.4.55 http://www.w3.org/TR/html5/syntax.html#after-doctype-name-state
3634         tok_state_after_doctype_name = ->
3635                 c = txt.charAt(cur++)
3636                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
3637                         return
3638                 if c is '>'
3639                         tok_state = tok_state_data
3640                         return tok_cur_tag
3641                 if c is '' # EOF
3642                         parse_error()
3643                         tok_state = tok_state_data
3644                         tok_cur_tag.flag 'force-quirks', true
3645                         cur -= 1 # Reconsume
3646                         return tok_cur_tag
3647                 # Anything else
3648                 if txt.substr(cur - 1, 6).toLowerCase() is 'public'
3649                         cur += 5
3650                         tok_state = tok_state_after_doctype_public_keyword
3651                         return
3652                 if txt.substr(cur - 1, 6).toLowerCase() is 'system'
3653                         cur += 5
3654                         tok_state = tok_state_after_doctype_system_keyword
3655                         return
3656                 parse_error()
3657                 tok_cur_tag.flag 'force-quirks', true
3658                 tok_state = tok_state_bogus_doctype
3659                 return null
3660
3661         # 8.2.4.56 http://www.w3.org/TR/html5/syntax.html#after-doctype-public-keyword-state
3662         tok_state_after_doctype_public_keyword = ->
3663                 c = txt.charAt(cur++)
3664                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
3665                         tok_state = tok_state_before_doctype_public_identifier
3666                         return
3667                 if c is '"'
3668                         parse_error()
3669                         tok_cur_tag.public_identifier = '' # FIXME should this go in @attrs or @text?
3670                         tok_state = tok_state_doctype_public_identifier_double_quoted
3671                         return
3672                 if c is "'"
3673                         parse_error()
3674                         tok_cur_tag.public_identifier = '' # FIXME should this go in @attrs or @text?
3675                         tok_state = tok_state_doctype_public_identifier_single_quoted
3676                         return
3677                 if c is '>'
3678                         parse_error()
3679                         tok_cur_tag.flag 'force-quirks', true
3680                         tok_state = tok_state_data
3681                         return tok_cur_tag
3682                 if c is '' # EOF
3683                         parse_error()
3684                         tok_state = tok_state_data
3685                         tok_cur_tag.flag 'force-quirks', true
3686                         cur -= 1 # Reconsume
3687                         return tok_cur_tag
3688                 # Anything else
3689                 parse_error()
3690                 tok_cur_tag.flag 'force-quirks', true
3691                 tok_state = tok_state_bogus_doctype
3692                 return null
3693
3694         # 8.2.4.57 http://www.w3.org/TR/html5/syntax.html#before-doctype-public-identifier-state
3695         tok_state_before_doctype_public_identifier = ->
3696                 c = txt.charAt(cur++)
3697                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
3698                         return
3699                 if c is '"'
3700                         parse_error()
3701                         tok_cur_tag.public_identifier = '' # FIXME should this go in @attrs or @text?
3702                         tok_state = tok_state_doctype_public_identifier_double_quoted
3703                         return
3704                 if c is "'"
3705                         parse_error()
3706                         tok_cur_tag.public_identifier = '' # FIXME should this go in @attrs or @text?
3707                         tok_state = tok_state_doctype_public_identifier_single_quoted
3708                         return
3709                 if c is '>'
3710                         parse_error()
3711                         tok_cur_tag.flag 'force-quirks', true
3712                         tok_state = tok_state_data
3713                         return tok_cur_tag
3714                 if c is '' # EOF
3715                         parse_error()
3716                         tok_state = tok_state_data
3717                         tok_cur_tag.flag 'force-quirks', true
3718                         cur -= 1 # Reconsume
3719                         return tok_cur_tag
3720                 # Anything else
3721                 parse_error()
3722                 tok_cur_tag.flag 'force-quirks', true
3723                 tok_state = tok_state_bogus_doctype
3724                 return null
3725
3726
3727         # 8.2.4.58 http://www.w3.org/TR/html5/syntax.html#doctype-public-identifier-(double-quoted)-state
3728         tok_state_doctype_public_identifier_double_quoted = ->
3729                 c = txt.charAt(cur++)
3730                 if c is '"'
3731                         tok_state = tok_state_after_doctype_public_identifier
3732                         return
3733                 if c is "\u0000"
3734                         parse_error()
3735                         tok_cur_tag.public_identifier += "\ufffd"
3736                         return
3737                 if c is '>'
3738                         parse_error()
3739                         tok_cur_tag.flag 'force-quirks', true
3740                         tok_state = tok_state_data
3741                         return tok_cur_tag
3742                 if c is '' # EOF
3743                         parse_error()
3744                         tok_state = tok_state_data
3745                         tok_cur_tag.flag 'force-quirks', true
3746                         cur -= 1 # Reconsume
3747                         return tok_cur_tag
3748                 # Anything else
3749                 tok_cur_tag.public_identifier += c
3750                 return null
3751
3752         # 8.2.4.59 http://www.w3.org/TR/html5/syntax.html#doctype-public-identifier-(single-quoted)-state
3753         tok_state_doctype_public_identifier_single_quoted = ->
3754                 c = txt.charAt(cur++)
3755                 if c is "'"
3756                         tok_state = tok_state_after_doctype_public_identifier
3757                         return
3758                 if c is "\u0000"
3759                         parse_error()
3760                         tok_cur_tag.public_identifier += "\ufffd"
3761                         return
3762                 if c is '>'
3763                         parse_error()
3764                         tok_cur_tag.flag 'force-quirks', true
3765                         tok_state = tok_state_data
3766                         return tok_cur_tag
3767                 if c is '' # EOF
3768                         parse_error()
3769                         tok_state = tok_state_data
3770                         tok_cur_tag.flag 'force-quirks', true
3771                         cur -= 1 # Reconsume
3772                         return tok_cur_tag
3773                 # Anything else
3774                 tok_cur_tag.public_identifier += c
3775                 return null
3776
3777         # 8.2.4.60 http://www.w3.org/TR/html5/syntax.html#after-doctype-public-identifier-state
3778         tok_state_after_doctype_public_identifier = ->
3779                 c = txt.charAt(cur++)
3780                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
3781                         tok_state = tok_state_between_doctype_public_and_system_identifiers
3782                         return
3783                 if c is '>'
3784                         tok_state = tok_state_data
3785                         return tok_cur_tag
3786                 if c is '"'
3787                         parse_error()
3788                         tok_cur_tag.system_identifier = ''
3789                         tok_state = tok_state_doctype_system_identifier_double_quoted
3790                         return
3791                 if c is "'"
3792                         parse_error()
3793                         tok_cur_tag.system_identifier = ''
3794                         tok_state = tok_state_doctype_system_identifier_single_quoted
3795                         return
3796                 if c is '' # EOF
3797                         parse_error()
3798                         tok_state = tok_state_data
3799                         tok_cur_tag.flag 'force-quirks', true
3800                         cur -= 1 # Reconsume
3801                         return tok_cur_tag
3802                 # Anything else
3803                 parse_error()
3804                 tok_cur_tag.flag 'force-quirks', true
3805                 tok_state = tok_state_bogus_doctype
3806                 return null
3807
3808         # 8.2.4.61 http://www.w3.org/TR/html5/syntax.html#between-doctype-public-and-system-identifiers-state
3809         tok_state_between_doctype_public_and_system_identifiers = ->
3810                 c = txt.charAt(cur++)
3811                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
3812                         return
3813                 if c is '>'
3814                         tok_state = tok_state_data
3815                         return tok_cur_tag
3816                 if c is '"'
3817                         parse_error()
3818                         tok_cur_tag.system_identifier = ''
3819                         tok_state = tok_state_doctype_system_identifier_double_quoted
3820                         return
3821                 if c is "'"
3822                         parse_error()
3823                         tok_cur_tag.system_identifier = ''
3824                         tok_state = tok_state_doctype_system_identifier_single_quoted
3825                         return
3826                 if c is '' # EOF
3827                         parse_error()
3828                         tok_state = tok_state_data
3829                         tok_cur_tag.flag 'force-quirks', true
3830                         cur -= 1 # Reconsume
3831                         return tok_cur_tag
3832                 # Anything else
3833                 parse_error()
3834                 tok_cur_tag.flag 'force-quirks', true
3835                 tok_state = tok_state_bogus_doctype
3836                 return null
3837
3838         # 8.2.4.62 http://www.w3.org/TR/html5/syntax.html#after-doctype-system-keyword-state
3839         tok_state_after_doctype_system_keyword = ->
3840                 c = txt.charAt(cur++)
3841                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
3842                         tok_state = tok_state_before_doctype_system_identifier
3843                         return
3844                 if c is '"'
3845                         parse_error()
3846                         tok_cur_tag.system_identifier = ''
3847                         tok_state = tok_state_doctype_system_identifier_double_quoted
3848                         return
3849                 if c is "'"
3850                         parse_error()
3851                         tok_cur_tag.system_identifier = ''
3852                         tok_state = tok_state_doctype_system_identifier_single_quoted
3853                         return
3854                 if c is '>'
3855                         parse_error()
3856                         tok_cur_tag.flag 'force-quirks', true
3857                         tok_state = tok_state_data
3858                         return tok_cur_tag
3859                 if c is '' # EOF
3860                         parse_error()
3861                         tok_state = tok_state_data
3862                         tok_cur_tag.flag 'force-quirks', true
3863                         cur -= 1 # Reconsume
3864                         return tok_cur_tag
3865                 # Anything else
3866                 parse_error()
3867                 tok_cur_tag.flag 'force-quirks', true
3868                 tok_state = tok_state_bogus_doctype
3869                 return null
3870
3871         # 8.2.4.63 http://www.w3.org/TR/html5/syntax.html#before-doctype-system-identifier-state
3872         tok_state_before_doctype_system_identifier = ->
3873                 c = txt.charAt(cur++)
3874                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
3875                         return
3876                 if c is '"'
3877                         tok_cur_tag.system_identifier = ''
3878                         tok_state = tok_state_doctype_system_identifier_double_quoted
3879                         return
3880                 if c is "'"
3881                         tok_cur_tag.system_identifier = ''
3882                         tok_state = tok_state_doctype_system_identifier_single_quoted
3883                         return
3884                 if c is '>'
3885                         parse_error()
3886                         tok_cur_tag.flag 'force-quirks', true
3887                         tok_state = tok_state_data
3888                         return tok_cur_tag
3889                 if c is '' # EOF
3890                         parse_error()
3891                         tok_state = tok_state_data
3892                         tok_cur_tag.flag 'force-quirks', true
3893                         cur -= 1 # Reconsume
3894                         return tok_cur_tag
3895                 # Anything else
3896                 parse_error()
3897                 tok_cur_tag.flag 'force-quirks', true
3898                 tok_state = tok_state_bogus_doctype
3899                 return null
3900
3901         # 8.2.4.64 http://www.w3.org/TR/html5/syntax.html#doctype-system-identifier-(double-quoted)-state
3902         tok_state_doctype_system_identifier_double_quoted = ->
3903                 c = txt.charAt(cur++)
3904                 if c is '"'
3905                         tok_state = tok_state_after_doctype_system_identifier
3906                         return
3907                 if c is "\u0000"
3908                         parse_error()
3909                         tok_cur_tag.system_identifier += "\ufffd"
3910                         return
3911                 if c is '>'
3912                         parse_error()
3913                         tok_cur_tag.flag 'force-quirks', true
3914                         tok_state = tok_state_data
3915                         return tok_cur_tag
3916                 if c is '' # EOF
3917                         parse_error()
3918                         tok_state = tok_state_data
3919                         tok_cur_tag.flag 'force-quirks', true
3920                         cur -= 1 # Reconsume
3921                         return tok_cur_tag
3922                 # Anything else
3923                 tok_cur_tag.system_identifier += c
3924                 return null
3925
3926         # 8.2.4.65 http://www.w3.org/TR/html5/syntax.html#doctype-system-identifier-(single-quoted)-state
3927         tok_state_doctype_system_identifier_single_quoted = ->
3928                 c = txt.charAt(cur++)
3929                 if c is "'"
3930                         tok_state = tok_state_after_doctype_system_identifier
3931                         return
3932                 if c is "\u0000"
3933                         parse_error()
3934                         tok_cur_tag.system_identifier += "\ufffd"
3935                         return
3936                 if c is '>'
3937                         parse_error()
3938                         tok_cur_tag.flag 'force-quirks', true
3939                         tok_state = tok_state_data
3940                         return tok_cur_tag
3941                 if c is '' # EOF
3942                         parse_error()
3943                         tok_state = tok_state_data
3944                         tok_cur_tag.flag 'force-quirks', true
3945                         cur -= 1 # Reconsume
3946                         return tok_cur_tag
3947                 # Anything else
3948                 tok_cur_tag.system_identifier += c
3949                 return null
3950
3951         # 8.2.4.66 http://www.w3.org/TR/html5/syntax.html#after-doctype-system-identifier-state
3952         tok_state_after_doctype_system_identifier = ->
3953                 c = txt.charAt(cur++)
3954                 if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
3955                         return
3956                 if c is '>'
3957                         tok_state = tok_state_data
3958                         return tok_cur_tag
3959                 if c is '' # EOF
3960                         parse_error()
3961                         tok_state = tok_state_data
3962                         tok_cur_tag.flag 'force-quirks', true
3963                         cur -= 1 # Reconsume
3964                         return tok_cur_tag
3965                 # Anything else
3966                 parse_error()
3967                 # do _not_ tok_cur_tag.flag 'force-quirks', true
3968                 tok_state = tok_state_bogus_doctype
3969                 return null
3970
3971         # 8.2.4.67 http://www.w3.org/TR/html5/syntax.html#bogus-doctype-state
3972         tok_state_bogus_doctype = ->
3973                 c = txt.charAt(cur++)
3974                 if c is '>'
3975                         tok_state = tok_state_data
3976                         return tok_cur_tag
3977                 if c is '' # EOF
3978                         tok_state = tok_state_data
3979                         cur -= 1 # Reconsume
3980                         return tok_cur_tag
3981                 # Anything else
3982                 return null
3983
3984
3985         # 8.2.4.69 http://www.w3.org/TR/html5/syntax.html#consume-a-character-reference
3986         # Don't set this as a state, just call it
3987         # returns a string (NOT a text node)
3988         parse_character_reference = (allowed_char = null, in_attr = false) ->
3989                 if cur >= txt.length
3990                         return '&'
3991                 switch c = txt.charAt(cur)
3992                         when "\t", "\n", "\u000c", ' ', '<', '&', '', allowed_char
3993                                 # explicitly not a parse error
3994                                 return '&'
3995                         when ';'
3996                                 # there has to be "one or more" alnums between & and ; to be a parse error
3997                                 return '&'
3998                         when '#'
3999                                 if cur + 1 >= txt.length
4000                                         return '&'
4001                                 if txt.charAt(cur + 1).toLowerCase() is 'x'
4002                                         prefix = '#x'
4003                                         charset = hex_chars
4004                                         start = cur + 2
4005                                 else
4006                                         charset = digits
4007                                         start = cur + 1
4008                                         prefix = '#'
4009                                 i = 0
4010                                 while start + i < txt.length and charset.indexOf(txt.charAt(start + i)) > -1
4011                                         i += 1
4012                                 if i is 0
4013                                         return '&'
4014                                 if txt.charAt(start + i) is ';'
4015                                         i += 1
4016                                 # FIXME This is supposed to generate parse errors for some chars
4017                                 decoded = decode_named_char_ref(prefix + txt.substr(start, i).toLowerCase())
4018                                 if decoded?
4019                                         cur = start + i
4020                                         return decoded
4021                                 return '&'
4022                         else
4023                                 for i in [0...31]
4024                                         if alnum.indexOf(txt.charAt(cur + i)) is -1
4025                                                 break
4026                                 if i is 0
4027                                         # exit early, because parse_error() below needs at least one alnum
4028                                         return '&'
4029                                 if txt.charAt(cur + i) is ';'
4030                                         i += 1 # include ';' terminator in value
4031                                         decoded = decode_named_char_ref txt.substr(cur, i)
4032                                         if decoded?
4033                                                 cur += i
4034                                                 return decoded
4035                                         parse_error()
4036                                         return '&'
4037                                 else
4038                                         # no ';' terminator (only legacy char refs)
4039                                         max = i
4040                                         for i in [2..max] # no prefix matches, so ok to check shortest first
4041                                                 c = legacy_char_refs[txt.substr(cur, i)]
4042                                                 if c?
4043                                                         if in_attr
4044                                                                 if txt.charAt(cur + i) is '='
4045                                                                         # "because some legacy user agents will
4046                                                                         # misinterpret the markup in those cases"
4047                                                                         parse_error()
4048                                                                         return '&'
4049                                                                 if alnum.indexOf(txt.charAt(cur + i)) > -1
4050                                                                         # this makes attributes forgiving about url args
4051                                                                         return '&'
4052                                                         # ok, and besides the weird exceptions for attributes...
4053                                                         # return the matching char
4054                                                         cur += i # consume entity chars
4055                                                         parse_error() # because no terminating ";"
4056                                                         return c
4057                                         parse_error()
4058                                         return '&'
4059                 return # never reached
4060
4061         # tree constructor initialization
4062         # see comments on TYPE_TAG/etc for the structure of this data
4063         doc = new Node TYPE_TAG, name: 'html', namespace: NS_HTML
4064         open_els = []
4065         afe = [] # active formatting elements
4066         template_ins_modes = []
4067         ins_mode = ins_mode_initial
4068         original_ins_mode = ins_mode # TODO check spec
4069         flag_scripting = true # TODO might need an extra flag to get <noscript> to parse correctly
4070         flag_frameset_ok = true
4071         flag_parsing = true
4072         flag_foster_parenting = false
4073         form_element_pointer = null
4074         temporary_buffer = null
4075         pending_table_character_tokens = []
4076         head_element_pointer = null
4077         flag_fragment_parsing = false # parser originally created as part of the html fragment parsing algorithm (fragment case)
4078         context_element = null # FIXME initialize from args.fragment http://www.w3.org/TR/html5/syntax.html#parsing-html-fragments
4079
4080         # tokenizer initialization
4081         tok_state = tok_state_data
4082
4083         # proccess input
4084         while flag_parsing
4085                 t = tok_state()
4086                 if t?
4087                         ins_mode t
4088                         # fixfull parse error if has self-closing flag, but it wasn't acknolwedged
4089         return doc.children
4090
4091 serialize_els = (els, shallow, show_ids) ->
4092         serialized = ''
4093         sep = ''
4094         for t in els
4095                 serialized += sep
4096                 sep = ','
4097                 serialized += t.serialize shallow, show_ids
4098         return serialized
4099
4100 # TODO export TYPE_*
4101 module.exports.parse_html = parse_html
4102 module.exports.debug_log_reset = debug_log_reset
4103 module.exports.debug_log_each = debug_log_each
4104 module.exports.TYPE_TAG = TYPE_TAG
4105 module.exports.TYPE_TEXT = TYPE_TEXT
4106 module.exports.TYPE_COMMENT = TYPE_COMMENT
4107 module.exports.TYPE_DOCTYPE = TYPE_DOCTYPE