+ return
+ if t.type is TYPE_END_TAG and t.name is 'col'
+ parse_error()
+ return
+ if (t.type is TYPE_START_TAG or t.type is TYPE_END_TAG) and t.name is 'template'
+ ins_mode_in_head t
+ return
+ if t.type is TYPE_EOF
+ ins_mode_in_body t
+ return
+ # Anything else
+ if open_els[0].name isnt 'colgroup'
+ parse_error()
+ return
+ open_els.shift()
+ ins_mode = ins_mode_in_table
+ process_token t
+ return
+
+ # 8.2.5.4.13 http://www.w3.org/TR/html5/syntax.html#parsing-main-intbody
+ ins_mode_in_table_body = (t) ->
+ if t.type is TYPE_START_TAG and t.name is 'tr'
+ clear_stack_to_table_body_context()
+ insert_html_element t
+ ins_mode = ins_mode_in_row
+ return
+ if t.type is TYPE_START_TAG and (t.name is 'th' or t.name is 'td')
+ parse_error()
+ clear_stack_to_table_body_context()
+ insert_html_element new_open_tag 'tr'
+ ins_mode = ins_mode_in_row
+ process_token t
+ return
+ if t.type is TYPE_END_TAG and (t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead')
+ unless is_in_table_scope t.name # fixfull check namespace
+ parse_error()
+ return
+ clear_stack_to_table_body_context()
+ open_els.shift()
+ ins_mode = ins_mode_in_table
+ return
+ if (t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead')) or (t.type is TYPE_END_TAG and t.name is 'table')
+ has = false
+ for el in open_els
+ if el.name is 'tbody' or el.name is 'tfoot' or el.name is 'thead'
+ has = true
+ break
+ if table_scopers[el.name]
+ break
+ if !has
+ parse_error()
+ return
+ clear_stack_to_table_body_context()
+ open_els.shift()
+ ins_mode = ins_mode_in_table
+ process_token t
+ return
+ if t.type is TYPE_END_TAG and (t.name is 'body' or t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'html' or t.name is 'td' or t.name is 'th' or t.name is 'tr')
+ parse_error()
+ return
+ # Anything else
+ ins_mode_in_table t
+
+ # 8.2.5.4.14 http://www.w3.org/TR/html5/syntax.html#parsing-main-intr
+ ins_mode_in_row = (t) ->
+ if t.type is TYPE_START_TAG and (t.name is 'th' or t.name is 'td')
+ clear_stack_to_table_row_context()
+ insert_html_element t
+ ins_mode = ins_mode_in_cell
+ afe_push_marker()
+ return
+ if t.type is TYPE_END_TAG and t.name is 'tr'
+ if is_in_table_scope 'tr'
+ clear_stack_to_table_row_context()
+ open_els.shift()
+ ins_mode = ins_mode_in_table_body
+ else
+ parse_error()
+ return
+ if (t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead' or t.name is 'tr')) or t.type is TYPE_END_TAG and t.name is 'table'
+ if is_in_table_scope 'tr'
+ clear_stack_to_table_row_context()
+ open_els.shift()
+ ins_mode = ins_mode_in_table_body
+ process_token t
+ else
+ parse_error()
+ return
+ if t.type is TYPE_END_TAG and (t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead')
+ if is_in_table_scope t.name # fixfull namespace
+ if is_in_table_scope 'tr'
+ clear_stack_to_table_row_context()
+ open_els.shift()
+ ins_mode = ins_mode_in_table_body
+ process_token t
+ else
+ parse_error()
+ return
+ if t.type is TYPE_END_TAG and (t.name is 'body' or t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'html' or t.name is 'td' or t.name is 'th')
+ parse_error()
+ return
+ # Anything else
+ ins_mode_in_table t
+
+ # http://www.w3.org/TR/html5/syntax.html#close-the-cell
+ close_the_cell = ->
+ generate_implied_end_tags()
+ unless open_els[0].name is 'td' or open_els[0] is 'th'
+ parse_error()
+ loop
+ el = open_els.shift()
+ if el.name is 'td' or el.name is 'th'
+ break
+ clear_afe_to_marker()
+ ins_mode = ins_mode_in_row
+
+ # 8.2.5.4.15 http://www.w3.org/TR/html5/syntax.html#parsing-main-intd
+ ins_mode_in_cell = (t) ->
+ if t.type is TYPE_END_TAG and (t.name is 'td' or t.name is 'th')
+ if is_in_table_scope t.name
+ generate_implied_end_tags()
+ if open_els[0].name isnt t.name
+ parse_error
+ loop
+ el = open_els.shift()
+ if el.name is t.name
+ break
+ clear_afe_to_marker()
+ ins_mode = ins_mode_in_row
+ else
+ parse_error()
+ return
+ if t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'tbody' or t.name is 'td' or t.name is 'tfoot' or t.name is 'th' or t.name is 'thead' or t.name is 'tr')
+ has = false
+ for el in open_els
+ if el.name is 'td' or el.name is 'th'
+ has = true
+ break
+ if table_scopers[el.name]
+ break
+ if !has
+ parse_error()
+ return
+ close_the_cell()
+ process_token t
+ return
+ if t.type is TYPE_END_TAG and (t.name is 'body' or t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'html')
+ parse_error()
+ return
+ if t.type is TYPE_END_TAG and (t.name is 'table' or t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead' or t.name is 'tr')
+ if is_in_table_scope t.name # fixfull namespace
+ close_the_cell()
+ process_token t
+ else
+ parse_error()
+ return
+ # Anything Else
+ ins_mode_in_body t
+
+ # 8.2.5.4.16 http://www.w3.org/TR/html5/syntax.html#parsing-main-inselect
+ ins_mode_in_select = (t) ->
+ if t.type is TYPE_TEXT and t.text is "\u0000"
+ parse_error()
+ return
+ if t.type is TYPE_TEXT
+ insert_character t
+ return
+ if t.type is TYPE_COMMENT
+ insert_comment t
+ return
+ if t.type is TYPE_DOCTYPE
+ parse_error()
+ return
+ if t.type is TYPE_START_TAG and t.name is 'html'
+ ins_mode_in_body t
+ return
+ if t.type is TYPE_START_TAG and t.name is 'option'
+ if open_els[0].name is 'option'
+ open_els.shift()
+ insert_html_element t
+ return
+ if t.type is TYPE_START_TAG and t.name is 'optgroup'
+ if open_els[0].name is 'option'
+ open_els.shift()
+ if open_els[0].name is 'optgroup'
+ open_els.shift()
+ insert_html_element t
+ return
+ if t.type is TYPE_END_TAG and t.name is 'optgroup'
+ if open_els[0].name is 'option' and open_els[1].name is 'optgroup'
+ open_els.shift()
+ if open_els[0].name is 'optgroup'
+ open_els.shift()
+ else
+ parse_error()
+ return
+ if t.type is TYPE_END_TAG and t.name is 'option'
+ if open_els[0].name is 'option'
+ open_els.shift()
+ else
+ parse_error()
+ return
+ if t.type is TYPE_END_TAG and t.name is 'select'
+ if is_in_select_scope 'select'
+ loop
+ el = open_els.shift()
+ if el.name is 'select'
+ break
+ reset_ins_mode()
+ else
+ parse_error()
+ return
+ if t.type is TYPE_START_TAG and t.name is 'select'
+ parse_error()
+ loop
+ el = open_els.shift()
+ if el.name is 'select'
+ break
+ reset_ins_mode()
+ # spec says that this is the same as </select> but it doesn't say
+ # to check scope first
+ return
+ if t.type is TYPE_START_TAG and (t.name is 'input' or t.name is 'keygen' or t.name is 'textarea')
+ parse_error()
+ if is_in_select_scope 'select'
+ return
+ loop
+ el = open_els.shift()
+ if el.name is 'select'
+ break
+ reset_ins_mode()
+ process_token t
+ return
+ if t.type is TYPE_START_TAG and (t.name is 'script' or t.name is 'template')
+ ins_mode_in_head t
+ return
+ if t.type is TYPE_EOF
+ ins_mode_in_body t
+ return
+ # Anything else
+ parse_error()
+ return
+
+ # 8.2.5.4.17 http://www.w3.org/TR/html5/syntax.html#parsing-main-inselectintable
+ ins_mode_in_select_in_table = (t) ->
+ if t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'table' or t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead' or t.name is 'tr' or t.name is 'td' or t.name is 'th')
+ parse_error()
+ loop
+ el = open_els.shift()
+ if el.name is 'select'
+ break
+ reset_ins_mode()
+ process_token t
+ return
+ if t.type is TYPE_END_TAG and (t.name is 'caption' or t.name is 'table' or t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead' or t.name is 'tr' or t.name is 'td' or t.name is 'th')
+ parse_error()
+ unless is_in_table_scope t.name, NS_HTML
+ return
+ loop
+ el = open_els.shift()
+ if el.name is 'select'
+ break
+ reset_ins_mode()
+ process_token t
+ return
+ # Anything else
+ ins_mode_in_select t
+ return
+
+ # 8.2.5.4.18 http://www.w3.org/TR/html5/syntax.html#parsing-main-intemplate
+ ins_mode_in_template = (t) ->
+ if t.type is TYPE_TEXT or t.type is TYPE_COMMENT or t.type is TYPE_DOCTYPE
+ ins_mode_in_body t
+ return
+ if (t.type is TYPE_START_TAG and (t.name is 'base' or t.name is 'basefont' or t.name is 'bgsound' or t.name is 'link' or t.name is 'meta' or t.name is 'noframes' or t.name is 'script' or t.name is 'style' or t.name is 'template' or t.name is 'title')) or (t.type is TYPE_END_TAG and t.name is 'template')
+ ins_mode_in_head t
+ return
+ if t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'colgroup' or t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead')
+ template_ins_modes.shift()
+ template_ins_modes.unshift ins_mode_in_table
+ ins_mode = ins_mode_in_table
+ process_token t
+ return
+ if t.type is TYPE_START_TAG and t.name is 'col'
+ template_ins_modes.shift()
+ template_ins_modes.unshift ins_mode_in_column_group
+ ins_mode = ins_mode_in_column_group
+ process_token t
+ return
+ if t.type is TYPE_START_TAG and t.name is 'tr'
+ template_ins_modes.shift()
+ template_ins_modes.unshift ins_mode_in_table_body
+ ins_mode = ins_mode_in_table_body
+ process_token t
+ return
+ if t.type is TYPE_START_TAG and (t.name is 'td' or t.name is 'th')
+ template_ins_modes.shift()
+ template_ins_modes.unshift ins_mode_in_row
+ ins_mode = ins_mode_in_row
+ process_token t
+ return
+ if t.type is TYPE_START_TAG
+ template_ins_modes.shift()
+ template_ins_modes.unshift ins_mode_in_body
+ ins_mode = ins_mode_in_body
+ process_token t
+ return
+ if t.type is TYPE_END_TAG
+ parse_error()
+ return
+ if t.type is TYPE_EOF
+ unless template_tag_is_open()
+ stop_parsing()
+ return
+ parse_error()
+ loop
+ el = open_els.shift()
+ if el.name is 'template' # fixfull check namespace
+ break
+ clear_afe_to_marker()
+ template_ins_modes.shift()
+ reset_ins_mode()
+ process_token t
+
+ # 8.2.5.4.19 http://www.w3.org/TR/html5/syntax.html#parsing-main-afterbody
+ ins_mode_after_body = (t) ->
+ if is_space_tok t
+ ins_mode_in_body t
+ return
+ if t.type is TYPE_COMMENT
+ insert_comment t, [open_els[0], open_els[0].children.length]
+ return
+ if t.type is TYPE_DOCTYPE
+ parse_error()
+ return
+ if t.type is TYPE_START_TAG and t.name is 'html'
+ ins_mode_in_body t
+ return
+ if t.type is TYPE_END_TAG and t.name is 'html'
+ # fixfull fragment case
+ ins_mode = ins_mode_after_after_body
+ return
+ if t.type is TYPE_EOF
+ stop_parsing()
+ return
+ # Anything ELse
+ parse_error()
+ ins_mode = ins_mode_in_body
+ process_token t
+
+ # 8.2.5.4.20 http://www.w3.org/TR/html5/syntax.html#parsing-main-inframeset
+ ins_mode_in_frameset = (t) ->
+ if is_space_tok t
+ insert_character t
+ return
+ if t.type is TYPE_COMMENT
+ insert_comment t
+ return
+ if t.type is TYPE_DOCTYPE
+ parse_error()
+ return
+ if t.type is TYPE_START_TAG and t.name is 'html'
+ ins_mode_in_body t
+ return
+ if t.type is TYPE_START_TAG and t.name is 'frameset'
+ insert_html_element t
+ return
+ if t.type is TYPE_END_TAG and t.name is 'frameset'
+ # TODO ?correct for: "if the current node is the root html element"
+ if open_els.length is 1
+ parse_error()
+ return # fragment case
+ open_els.shift()
+ if flag_fragment_parsing is false and open_els[0].name isnt 'frameset'
+ ins_mode = ins_mode_after_frameset
+ return
+ if t.type is TYPE_START_TAG and t.name is 'frame'
+ insert_html_element t
+ open_els.shift()
+ t.acknowledge_self_closing()
+ return
+ if t.type is TYPE_START_TAG and t.name is 'noframes'
+ ins_mode_in_head t
+ return
+ if t.type is TYPE_EOF
+ # TODO ?correct for: "if the current node is not the root html element"
+ if open_els.length isnt 1
+ parse_error()
+ stop_parsing()
+ return
+ # Anything else
+ parse_error()
+ return
+
+ # 8.2.5.4.21 http://www.w3.org/TR/html5/syntax.html#parsing-main-afterframeset
+ ins_mode_after_frameset = (t) ->
+ if is_space_tok t
+ insert_character t
+ return
+ if t.type is TYPE_COMMENT
+ insert_comment t
+ return
+ if t.type is TYPE_DOCTYPE
+ parse_error()
+ return
+ if t.type is TYPE_START_TAG and t.name is 'html'
+ ins_mode_in_body t
+ return
+ if t.type is TYPE_END_TAG and t.name is 'html'
+ insert_mode = ins_mode_after_after_frameset
+ return
+ if t.type is TYPE_START_TAG and t.name is 'noframes'
+ ins_mode_in_head t
+ return
+ if t.type is TYPE_EOF
+ stop_parsing()
+ return
+ # Anything else
+ parse_error()
+ return
+
+ # 8.2.5.4.22 http://www.w3.org/TR/html5/syntax.html#the-after-after-body-insertion-mode
+ ins_mode_after_after_body = (t) ->
+ if t.type is TYPE_COMMENT
+ insert_comment t, [doc, doc.children.length]
+ return
+ if t.type is TYPE_DOCTYPE or is_space_tok(t) or (t.type is TYPE_START_TAG and t.name is 'html')
+ ins_mode_in_body t
+ return
+ if t.type is TYPE_EOF
+ stop_parsing()
+ return
+ # Anything else
+ parse_error()
+ ins_mode = ins_mode_in_body
+ return
+
+ # 8.2.5.4.23 http://www.w3.org/TR/html5/syntax.html#the-after-after-frameset-insertion-mode
+ ins_mode_after_after_frameset = (t) ->
+ if t.type is TYPE_COMMENT
+ insert_comment t, [doc, doc.children.length]
+ return
+ if t.type is TYPE_DOCTYPE or is_space_tok(t) or (t.type is TYPE_START_TAG and t.name is 'html')
+ ins_mode_in_body t
+ return
+ if t.type is TYPE_EOF
+ stop_parsing()
+ return
+ if t.type is TYPE_START_TAG and t.name is 'noframes'
+ ins_mode_in_head t
+ return
+ # Anything else
+ parse_error()
+ return
+
+ # 8.2.5.5 http://www.w3.org/TR/html5/syntax.html#parsing-main-inforeign
+ has_color_face_or_size = (t) ->
+ for a in t.attrs_a
+ if a[0] is 'color' or a[0] is 'face' or a[0] is 'size'
+ return true
+ return false
+ in_foreign_content_end_script = ->
+ open_els.shift()
+ # fixfull
+ return
+ in_foreign_content_other_start = (t) ->
+ acn = adjusted_current_node()
+ if acn.namespace is NS_MATHML
+ adjust_mathml_attributes t
+ if acn.namespace is NS_SVG and svg_name_fixes[t.name]?
+ t.name = svg_name_fixes[t.name]
+ if acn.namespace is NS_SVG
+ adjust_svg_attributes t
+ adjust_foreign_attributes t
+ insert_foreign_element t, acn.namespace
+ if t.flag 'self-closing'
+ if t.name is 'script'
+ t.acknowledge_self_closing()
+ in_foreign_content_end_script()
+ else
+ open_els.shift()
+ t.acknowledge_self_closing()
+ return
+ in_foreign_content = (t) ->
+ if t.type is TYPE_TEXT and t.text is "\u0000"
+ parse_error()
+ insert_character new_character_token "\ufffd"
+ return
+ if is_space_tok t
+ insert_character t
+ return
+ if t.type is TYPE_TEXT
+ flag_frameset_ok = false
+ insert_character t
+ return
+ if t.type is TYPE_COMMENT
+ insert_comment t
+ return
+ if t.type is TYPE_DOCTYPE
+ parse_error()
+ return
+ if t.type is TYPE_START_TAG and (t.name is 'b' or t.name is 'big' or t.name is 'blockquote' or t.name is 'body' or t.name is 'br' or t.name is 'center' or t.name is 'code' or t.name is 'dd' or t.name is 'div' or t.name is 'dl' or t.name is 'dt' or t.name is 'em' or t.name is 'embed' or t.name is 'h1' or t.name is 'h2' or t.name is 'h3' or t.name is 'h4' or t.name is 'h5' or t.name is 'h6' or t.name is 'head' or t.name is 'hr' or t.name is 'i' or t.name is 'img' or t.name is 'li' or t.name is 'listing' or t.name is 'main' or t.name is 'meta' or t.name is 'nobr' or t.name is 'ol' or t.name is 'p' or t.name is 'pre' or t.name is 'ruby' or t.name is 's' or t.name is 'small' or t.name is 'span' or t.name is 'strong' or t.name is 'strike' or t.name is 'sub' or t.name is 'sup' or t.name is 'table' or t.name is 'tt' or t.name is 'u' or t.name is 'ul' or t.name is 'var' or (t.name is 'font' and has_color_face_or_size(t)))
+ parse_error()
+ if flag_fragment_parsing
+ in_foreign_content_other_start t
+ return
+ loop # is this safe?
+ open_els.shift()
+ cn = open_els[0]
+ if is_mathml_text_integration_point(cn) or is_html_integration(cn) or cn.namespace is NS_HTML
+ break
+ process_token t
+ return
+ if t.type is TYPE_START_TAG
+ in_foreign_content_other_start t
+ return
+ if t.type is TYPE_END_TAG and t.name is 'script' and open_els[0].name is 'script' and open_els[0].namespace is NS_SVG
+ in_foreign_content_end_script()
+ return
+ if t.type is TYPE_END_TAG
+ if open_els[0].name.toLowerCase() isnt t.name
+ parse_error()
+ for node in open_els
+ if node is open_els[open_els.length - 1]
+ return
+ if node.name.toLowerCase() is t.name
+ loop
+ el = open_els.shift()
+ if el is node
+ return
+ if node.namespace is NS_HTML
+ break
+ ins_mode t # explicitly call HTML insertion mode
+
+
+ # 8.2.4.1 http://www.w3.org/TR/html5/syntax.html#data-state
+ tok_state_data = ->
+ switch c = txt.charAt(cur++)
+ when '&'
+ return new_text_node parse_character_reference()
+ when '<'
+ tok_state = tok_state_tag_open
+ when "\u0000"
+ parse_error()
+ return new_text_node c
+ when '' # EOF
+ return new_eof_token()
+ else
+ return new_text_node c
+ return null
+
+ # 8.2.4.2 http://www.w3.org/TR/html5/syntax.html#character-reference-in-data-state
+ # not needed: tok_state_character_reference_in_data = ->
+ # just call parse_character_reference()
+
+ # 8.2.4.3 http://www.w3.org/TR/html5/syntax.html#rcdata-state
+ tok_state_rcdata = ->
+ switch c = txt.charAt(cur++)
+ when '&'
+ return new_text_node parse_character_reference()
+ when '<'
+ tok_state = tok_state_rcdata_less_than_sign
+ when "\u0000"
+ parse_error()
+ return new_character_token "\ufffd"
+ when '' # EOF
+ return new_eof_token()
+ else
+ return new_character_token c
+ return null
+
+ # 8.2.4.4 http://www.w3.org/TR/html5/syntax.html#character-reference-in-rcdata-state
+ # not needed: tok_state_character_reference_in_rcdata = ->
+ # just call parse_character_reference()
+
+ # 8.2.4.5 http://www.w3.org/TR/html5/syntax.html#rawtext-state
+ tok_state_rawtext = ->
+ switch c = txt.charAt(cur++)
+ when '<'
+ tok_state = tok_state_rawtext_less_than_sign
+ when "\u0000"
+ parse_error()
+ return new_character_token "\ufffd"
+ when '' # EOF
+ return new_eof_token()
+ else
+ return new_character_token c
+ return null
+
+ # 8.2.4.6 http://www.w3.org/TR/html5/syntax.html#script-data-state
+ tok_state_script_data = ->
+ switch c = txt.charAt(cur++)
+ when '<'
+ tok_state = tok_state_script_data_less_than_sign
+ when "\u0000"
+ parse_error()
+ return new_character_token "\ufffd"
+ when '' # EOF
+ return new_eof_token()
+ else
+ return new_character_token c
+ return null
+
+ # 8.2.4.7 http://www.w3.org/TR/html5/syntax.html#plaintext-state
+ tok_state_plaintext = ->
+ switch c = txt.charAt(cur++)
+ when "\u0000"
+ parse_error()
+ return new_character_token "\ufffd"
+ when '' # EOF
+ return new_eof_token()
+ else
+ return new_character_token c
+ return null
+
+
+ # 8.2.4.8 http://www.w3.org/TR/html5/syntax.html#tag-open-state
+ tok_state_tag_open = ->
+ switch c = txt.charAt(cur++)
+ when '!'
+ tok_state = tok_state_markup_declaration_open
+ when '/'
+ tok_state = tok_state_end_tag_open
+ when '?'
+ parse_error()
+ tok_cur_tag = new_comment_token '?'
+ tok_state = tok_state_bogus_comment
+ else
+ if is_lc_alpha(c)
+ tok_cur_tag = new_open_tag c
+ tok_state = tok_state_tag_name
+ else if is_uc_alpha(c)
+ tok_cur_tag = new_open_tag c.toLowerCase()
+ tok_state = tok_state_tag_name
+ else
+ parse_error()
+ tok_state = tok_state_data
+ cur -= 1 # we didn't parse/handle the char after <
+ return new_text_node '<'
+ return null
+
+ # 8.2.4.9 http://www.w3.org/TR/html5/syntax.html#end-tag-open-state
+ tok_state_end_tag_open = ->
+ switch c = txt.charAt(cur++)
+ when '>'
+ parse_error()
+ tok_state = tok_state_data
+ when '' # EOF
+ parse_error()
+ tok_state = tok_state_data
+ return new_text_node '</'
+ else
+ if is_uc_alpha(c)
+ tok_cur_tag = new_end_tag c.toLowerCase()
+ tok_state = tok_state_tag_name
+ else if is_lc_alpha(c)
+ tok_cur_tag = new_end_tag c
+ tok_state = tok_state_tag_name
+ else
+ parse_error()
+ tok_cur_tag = new_comment_token '/'
+ tok_state = tok_state_bogus_comment
+ return null
+
+ # 8.2.4.10 http://www.w3.org/TR/html5/syntax.html#tag-name-state
+ tok_state_tag_name = ->
+ switch c = txt.charAt(cur++)
+ when "\t", "\n", "\u000c", ' '
+ tok_state = tok_state_before_attribute_name
+ when '/'
+ tok_state = tok_state_self_closing_start_tag
+ when '>'
+ tok_state = tok_state_data
+ tmp = tok_cur_tag
+ tok_cur_tag = null
+ return tmp
+ when "\u0000"
+ parse_error()
+ tok_cur_tag.name += "\ufffd"
+ when '' # EOF
+ parse_error()
+ tok_state = tok_state_data
+ else
+ if is_uc_alpha(c)
+ tok_cur_tag.name += c.toLowerCase()
+ else
+ tok_cur_tag.name += c
+ return null
+
+ # 8.2.4.11 http://www.w3.org/TR/html5/syntax.html#rcdata-less-than-sign-state
+ tok_state_rcdata_less_than_sign = ->
+ c = txt.charAt(cur++)
+ if c is '/'
+ temporary_buffer = ''
+ tok_state = tok_state_rcdata_end_tag_open
+ return null
+ # Anything else
+ tok_state = tok_state_rcdata
+ cur -= 1 # reconsume the input character
+ return new_character_token '<'
+
+ # 8.2.4.12 http://www.w3.org/TR/html5/syntax.html#rcdata-end-tag-open-state
+ tok_state_rcdata_end_tag_open = ->
+ c = txt.charAt(cur++)
+ if is_uc_alpha(c)
+ tok_cur_tag = new_end_tag c.toLowerCase()
+ temporary_buffer += c
+ tok_state = tok_state_rcdata_end_tag_name
+ return null
+ if is_lc_alpha(c)
+ tok_cur_tag = new_end_tag c
+ temporary_buffer += c
+ tok_state = tok_state_rcdata_end_tag_name
+ return null
+ # Anything else
+ tok_state = tok_state_rcdata
+ cur -= 1 # reconsume the input character
+ return new_character_token "</" # fixfull separate these
+
+ # http://www.w3.org/TR/html5/syntax.html#appropriate-end-tag-token
+ is_appropriate_end_tag = (t) ->
+ # spec says to check against "the tag name of the last start tag to
+ # have been emitted from this tokenizer", but this is only called from
+ # the various "raw" states, so it's hopefully ok to assume that
+ # open_els[0].name will work instead TODO: verify this after the script
+ # data states are implemented
+ debug_log "#{t.type}, #{t.name} open_els: #{serialize_els open_els, true, true}"
+ return t.type is TYPE_END_TAG and t.name is open_els[0].name
+
+ # 8.2.4.13 http://www.w3.org/TR/html5/syntax.html#rcdata-end-tag-name-state
+ tok_state_rcdata_end_tag_name = ->
+ c = txt.charAt(cur++)
+ if c is "\t" or c is "\n" or c is "\u000c" or c is ' '
+ if is_appropriate_end_tag tok_cur_tag
+ tok_state = tok_state_before_attribute_name
+ return
+ # else fall through to "Anything else"
+ if c is '/'
+ if is_appropriate_end_tag tok_cur_tag
+ tok_state = tok_state_self_closing_start_tag # FIXME spec typo?
+ return
+ # else fall through to "Anything else"
+ if c is '>'
+ if is_appropriate_end_tag tok_cur_tag
+ tok_state = tok_state_data
+ return tok_cur_tag
+ # else fall through to "Anything else"
+ if is_uc_alpha(c)
+ tok_cur_tag.name += c.toLowerCase()
+ temporary_buffer += c
+ return null
+ if is_lc_alpha(c)
+ tok_cur_tag.name += c
+ temporary_buffer += c
+ return null
+ # Anything else
+ tok_state = tok_state_rcdata
+ cur -= 1 # reconsume the input character
+ return new_character_token '</' + temporary_buffer # fixfull separate these
+
+ # 8.2.4.14 http://www.w3.org/TR/html5/syntax.html#rawtext-less-than-sign-state
+ tok_state_rawtext_less_than_sign = ->
+ c = txt.charAt(cur++)
+ if c is '/'
+ temporary_buffer = ''
+ tok_state = tok_state_rawtext_end_tag_open
+ return null
+ # Anything else
+ tok_state = tok_state_rawtext
+ cur -= 1 # reconsume the input character
+ return new_character_token '<'
+
+ # 8.2.4.15 http://www.w3.org/TR/html5/syntax.html#rawtext-end-tag-open-state
+ tok_state_rawtext_end_tag_open = ->
+ c = txt.charAt(cur++)
+ if is_uc_alpha(c)
+ tok_cur_tag = new_end_tag c.toLowerCase()
+ temporary_buffer += c
+ tok_state = tok_state_rawtext_end_tag_name
+ return null
+ if is_lc_alpha(c)
+ tok_cur_tag = new_end_tag c
+ temporary_buffer += c
+ tok_state = tok_state_rawtext_end_tag_name
+ return null
+ # Anything else
+ tok_state = tok_state_rawtext
+ cur -= 1 # reconsume the input character
+ return new_character_token "</" # fixfull separate these
+
+ # 8.2.4.16 http://www.w3.org/TR/html5/syntax.html#rawtext-end-tag-name-state
+ tok_state_rawtext_end_tag_name = ->
+ c = txt.charAt(cur++)
+ if c is "\t" or c is "\n" or c is "\u000c" or c is ' '
+ if is_appropriate_end_tag tok_cur_tag
+ tok_state = tok_state_before_attribute_name
+ return
+ # else fall through to "Anything else"
+ if c is '/'
+ if is_appropriate_end_tag tok_cur_tag
+ tok_state = tok_state_self_closing_start_tag
+ return
+ # else fall through to "Anything else"
+ if c is '>'
+ if is_appropriate_end_tag tok_cur_tag
+ tok_state = tok_state_data
+ return tok_cur_tag
+ # else fall through to "Anything else"
+ if is_uc_alpha(c)
+ tok_cur_tag.name += c.toLowerCase()
+ temporary_buffer += c
+ return null
+ if is_lc_alpha(c)
+ tok_cur_tag.name += c
+ temporary_buffer += c
+ return null
+ # Anything else
+ tok_state = tok_state_rawtext
+ cur -= 1 # reconsume the input character
+ return new_character_token '</' + temporary_buffer # fixfull separate these
+
+ # 8.2.4.17 http://www.w3.org/TR/html5/syntax.html#script-data-less-than-sign-state
+ tok_state_script_data_less_than_sign = ->
+ c = txt.charAt(cur++)
+ if c is '/'
+ temporary_buffer = ''
+ tok_state = tok_state_script_data_end_tag_open
+ return
+ if c is '!'
+ tok_state = tok_state_script_data_escape_start
+ return new_character_token '<!' # fixfull split
+ # Anything else
+ tok_state = tok_state_script_data
+ cur -= 1 # Reconsume
+ return new_character_token '<'
+
+ # 8.2.4.18 http://www.w3.org/TR/html5/syntax.html#script-data-end-tag-open-state
+ tok_state_script_data_end_tag_open = ->
+ c = txt.charAt(cur++)
+ if is_uc_alpha(c)
+ tok_cur_tag = new_end_tag c.toLowerCase()
+ temporary_buffer += c
+ tok_state = tok_state_script_data_end_tag_name
+ return
+ if is_lc_alpha(c)
+ tok_cur_tag = new_end_tag c
+ temporary_buffer += c
+ tok_state = tok_state_script_data_end_tag_name
+ return
+ # Anything else
+ tok_state = tok_state_script_data
+ cur -= 1 # Reconsume
+ return new_character_token '</'
+
+ # 8.2.4.19 http://www.w3.org/TR/html5/syntax.html#script-data-end-tag-open-state
+ tok_state_script_data_end_tag_name = ->
+ c = txt.charAt(cur++)
+ if c is "\t" or c is "\n" or c is "\u000c" or c is ' '
+ if is_appropriate_end_tag tok_cur_tag
+ tok_state = tok_state_before_attribute_name
+ return
+ # fall through
+ if c is '/'
+ if is_appropriate_end_tag tok_cur_tag
+ tok_state = tok_state_self_closing_start_tag
+ return
+ # fall through
+ if c is '>'
+ if is_appropriate_end_tag tok_cur_tag
+ tok_state = tok_state_data
+ return tok_cur_tag
+ # fall through
+ if is_uc_alpha(c)
+ tok_cur_tag.name += c.toLowerCase()
+ temporary_buffer += c
+ return
+ if is_lc_alpha(c)
+ tok_cur_tag.name += c
+ temporary_buffer += c
+ return
+ # Anything else
+ tok_state = tok_state_script_data
+ cur -= 1 # Reconsume
+ return new_character_token "</#{temporary_buffer}" # fixfull split
+
+ # 8.2.4.20 http://www.w3.org/TR/html5/syntax.html#script-data-escape-start-state
+ tok_state_script_data_escape_start = ->
+ c = txt.charAt(cur++)
+ if c is '-'
+ tok_state = tok_state_script_data_escape_start_dash
+ return new_character_token '-'
+ # Anything else
+ tok_state = tok_state_script_data
+ cur -= 1 # Reconsume
+ return
+
+ # 8.2.4.21 http://www.w3.org/TR/html5/syntax.html#script-data-escape-start-dash-state
+ tok_state_script_data_escape_start_dash = ->
+ c = txt.charAt(cur++)
+ if c is '-'
+ tok_state = tok_state_script_data_escaped_dash_dash
+ return new_character_token '-'
+ # Anything else
+ tok_state = tok_state_script_data
+ cur -= 1 # Reconsume
+ return
+
+ # 8.2.4.22 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-state
+ tok_state_script_data_escaped = ->
+ c = txt.charAt(cur++)
+ if c is '-'
+ tok_state = tok_state_script_data_escaped_dash
+ return new_character_token '-'
+ if c is '<'
+ tok_state = tok_state_script_data_escaped_less_than_sign
+ return
+ if c is "\u0000"
+ parse_error()
+ return new_character_token "\ufffd"
+ if c is '' # EOF
+ tok_state = tok_state_data
+ parse_error()
+ cur -= 1 # Reconsume
+ return
+ # Anything else
+ return new_character_token c
+
+ # 8.2.4.23 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-dash-state
+ tok_state_script_data_escaped_dash = ->
+ c = txt.charAt(cur++)
+ if c is '-'
+ tok_state = tok_state_script_data_escaped_dash_dash
+ return new_character_token '-'
+ if c is '<'
+ tok_state = tok_state_script_data_escaped_less_than_sign
+ return
+ if c is "\u0000"
+ parse_error()
+ tok_state = tok_state_script_data_escaped
+ return new_character_token "\ufffd"
+ if c is '' # EOF
+ tok_state = tok_state_data
+ parse_error()
+ cur -= 1 # Reconsume
+ return
+ # Anything else
+ tok_state = tok_state_script_data_escaped
+ return new_character_token c
+
+ # 8.2.4.24 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-dash-dash-state
+ tok_state_script_data_escaped_dash_dash = ->
+ c = txt.charAt(cur++)
+ if c is '-'
+ return new_character_token '-'
+ if c is '<'
+ tok_state = tok_state_script_data_escaped_less_than_sign
+ return
+ if c is '>'
+ tok_state = tok_state_script_data
+ return new_character_token '>'
+ if c is "\u0000"
+ parse_error()
+ tok_state = tok_state_script_data_escaped
+ return new_character_token "\ufffd"
+ if c is '' # EOF
+ parse_error()
+ tok_state = tok_state_data
+ cur -= 1 # Reconsume
+ return
+ # Anything else
+ tok_state = tok_state_script_data_escaped
+ return new_character_token c
+
+ # 8.2.4.25 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-less-than-sign-state
+ tok_state_script_data_escaped_less_than_sign = ->
+ c = txt.charAt(cur++)
+ if c is '/'
+ temporary_buffer = ''
+ tok_state = tok_state_script_data_escaped_end_tag_open
+ return
+ if is_uc_alpha(c)
+ temporary_buffer = c.toLowerCase() # yes, really
+ tok_state = tok_state_script_data_double_escape_start
+ return new_character_token "<#{c}" # fixfull split
+ if is_lc_alpha(c)
+ temporary_buffer = c
+ tok_state = tok_state_script_data_double_escape_start
+ return new_character_token "<#{c}" # fixfull split
+ # Anything else
+ tok_state = tok_state_script_data_escaped
+ cur -= 1 # Reconsume
+ return new_character_token c
+
+ # 8.2.4.26 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-end-tag-open-state
+ tok_state_script_data_escaped_end_tag_open = ->
+ c = txt.charAt(cur++)
+ if is_uc_alpha(c)
+ tok_cur_tag = new_end_tag c.toLowerCase()
+ temporary_buffer += c
+ tok_state = tok_state_script_data_escaped_end_tag_name
+ return
+ if is_lc_alpha(c)
+ tok_cur_tag = new_end_tag c
+ temporary_buffer += c
+ tok_state = tok_state_script_data_escaped_end_tag_name
+ return
+ # Anything else
+ tok_state = tok_state_script_data_escaped
+ cur -= 1 # Reconsume
+ return new_character_token '</' # fixfull split
+
+ # 8.2.4.27 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-end-tag-name-state
+ tok_state_script_data_escaped_end_tag_name = ->
+ c = txt.charAt(cur++)
+ if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
+ if is_appropriate_end_tag tok_cur_tag
+ tok_state = tok_state_before_attribute_name
+ return
+ # fall through
+ if c is '/'
+ if is_appropriate_end_tag tok_cur_tag
+ tok_state = tok_state_self_closing_start_tag
+ return
+ # fall through
+ if c is '>'
+ if is_appropriate_end_tag tok_cur_tag
+ tok_state = tok_state_data
+ return tok_cur_tag
+ # fall through
+ if is_uc_alpha(c)
+ tok_cur_tag.name += c.toLowerCase()
+ temporary_buffer += c.toLowerCase()
+ return
+ if is_lc_alpha(c)
+ tok_cur_tag.name += c
+ temporary_buffer += c.toLowerCase()
+ return
+ # Anything else
+ tok_state = tok_state_script_data_escaped
+ cur -= 1 # Reconsume
+ return new_character_token "</#{temporary_buffer}" # fixfull split
+
+ # 8.2.4.28 http://www.w3.org/TR/html5/syntax.html#script-data-double-escape-start-state
+ tok_state_script_data_double_escape_start = ->
+ c = txt.charAt(cur++)
+ if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' ' or c is '/' or c is '>'
+ if temporary_buffer is 'script'
+ tok_state = tok_state_script_data_double_escaped
+ else
+ tok_state = tok_state_script_data_escaped
+ return new_character_token c
+ if is_uc_alpha(c)
+ temporary_buffer += c.toLowerCase() # yes, really lowercase
+ return new_character_token c
+ if is_lc_alpha(c)
+ temporary_buffer += c
+ return new_character_token c
+ # Anything else
+ tok_state = tok_state_script_data_escaped
+ cur -= 1 # Reconsume
+ return
+
+ # 8.2.4.29 http://www.w3.org/TR/html5/syntax.html#script-data-double-escaped-state
+ tok_state_script_data_double_escaped = ->
+ c = txt.charAt(cur++)
+ if c is '-'
+ tok_state = tok_state_script_data_double_escaped_dash
+ return new_character_token '-'
+ if c is '<'
+ tok_state = tok_state_script_data_double_escaped_less_than_sign
+ return new_character_token '<'
+ if c is "\u0000"
+ parse_error()
+ return new_character_token "\ufffd"
+ if c is '' # EOF
+ parse_error()
+ tok_state = tok_state_data
+ cur -= 1 # Reconsume
+ return
+ # Anything else
+ return new_character_token c
+
+ # 8.2.4.30 http://www.w3.org/TR/html5/syntax.html#script-data-double-escaped-dash-state
+ tok_state_script_data_double_escaped_dash = ->
+ c = txt.charAt(cur++)
+ if c is '-'
+ tok_state = tok_state_script_data_double_escaped_dash_dash
+ return new_character_token '-'
+ if c is '<'
+ tok_state = tok_state_script_data_double_escaped_less_than_sign
+ return new_character_token '<'
+ if c is "\u0000"
+ parse_error()
+ tok_state = tok_state_script_data_double_escaped
+ return new_character_token "\ufffd"
+ if c is '' # EOF
+ parse_error()
+ tok_state = tok_state_data
+ cur -= 1 # Reconsume
+ return
+ # Anything else
+ tok_state = tok_state_script_data_double_escaped
+ return new_character_token c
+
+ # 8.2.4.31 http://www.w3.org/TR/html5/syntax.html#script-data-double-escaped-dash-dash-state
+ tok_state_script_data_double_escaped_dash_dash = ->
+ c = txt.charAt(cur++)
+ if c is '-'
+ return new_character_token '-'
+ if c is '<'
+ tok_state = tok_state_script_data_double_escaped_less_than_sign
+ return new_character_token '<'
+ if c is '>'
+ tok_state = tok_state_script_data
+ return new_character_token '>'
+ if c is "\u0000"
+ parse_error()
+ tok_state = tok_state_script_data_double_escaped
+ return new_character_token "\ufffd"
+ if c is '' # EOF
+ parse_error()
+ tok_state = tok_state_data
+ cur -= 1 # Reconsume
+ return
+ # Anything else
+ tok_state = tok_state_script_data_double_escaped
+ return new_character_token c
+
+ # 8.2.4.32 http://www.w3.org/TR/html5/syntax.html#script-data-double-escaped-less-than-sign-state
+ tok_state_script_data_double_escaped_less_than_sign = ->
+ c = txt.charAt(cur++)
+ if c is '/'
+ temporary_buffer = ''
+ tok_state = tok_state_script_data_double_escape_end
+ return new_character_token '/'
+ # Anything else
+ tok_state = tok_state_script_data_double_escaped
+ cur -= 1 # Reconsume
+ return
+
+ # 8.2.4.33 http://www.w3.org/TR/html5/syntax.html#script-data-double-escape-end-state
+ tok_state_script_data_double_escape_end = ->
+ c = txt.charAt(cur++)
+ if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' ' or c is '/' or c is '>'
+ if temporary_buffer is 'script'
+ tok_state = tok_state_script_data_escaped
+ else
+ tok_state = tok_state_script_data_double_escaped
+ return new_character_token c
+ if is_uc_alpha(c)
+ temporary_buffer += c.toLowerCase() # yes, really lowercase
+ return new_character_token c
+ if is_lc_alpha(c)
+ temporary_buffer += c
+ return new_character_token c
+ # Anything else
+ tok_state = tok_state_script_data_double_escaped
+ cur -= 1 # Reconsume
+ return
+
+ # 8.2.4.34 http://www.w3.org/TR/html5/syntax.html#before-attribute-name-state
+ tok_state_before_attribute_name = ->
+ attr_name = null
+ switch c = txt.charAt(cur++)
+ when "\t", "\n", "\u000c", ' '
+ return null
+ when '/'
+ tok_state = tok_state_self_closing_start_tag
+ return null
+ when '>'
+ tok_state = tok_state_data
+ tmp = tok_cur_tag
+ tok_cur_tag = null
+ return tmp
+ when "\u0000"
+ parse_error()
+ attr_name = "\ufffd"
+ when '"', "'", '<', '='
+ parse_error()
+ attr_name = c
+ when '' # EOF
+ parse_error()
+ tok_state = tok_state_data
+ else
+ if is_uc_alpha(c)
+ attr_name = c.toLowerCase()
+ else
+ attr_name = c
+ if attr_name?
+ tok_cur_tag.attrs_a.unshift [attr_name, '']
+ tok_state = tok_state_attribute_name
+ return null
+
+ # 8.2.4.35 http://www.w3.org/TR/html5/syntax.html#attribute-name-state
+ tok_state_attribute_name = ->
+ switch c = txt.charAt(cur++)
+ when "\t", "\n", "\u000c", ' '
+ tok_state = tok_state_after_attribute_name
+ when '/'
+ tok_state = tok_state_self_closing_start_tag
+ when '='
+ tok_state = tok_state_before_attribute_value
+ when '>'
+ tok_state = tok_state_data
+ tmp = tok_cur_tag
+ tok_cur_tag = null
+ return tmp
+ when "\u0000"
+ parse_error()
+ tok_cur_tag.attrs_a[0][0] += "\ufffd"
+ when '"', "'", '<'