+ return # fragment case
+ open_els.shift()
+ if flag_fragment_parsing is false and open_els[0].name isnt 'frameset'
+ ins_mode = ins_mode_after_frameset
+ return
+ if t.type is TYPE_START_TAG and t.name is 'frame'
+ insert_html_element t
+ open_els.shift()
+ t.acknowledge_self_closing()
+ return
+ if t.type is TYPE_START_TAG and t.name is 'noframes'
+ ins_mode_in_head t
+ return
+ if t.type is TYPE_EOF
+ if open_els.length isnt 1
+ parse_error()
+ stop_parsing()
+ return
+ # Anything else
+ parse_error()
+ return
+
+ # 8.2.5.4.21 http://www.w3.org/TR/html5/syntax.html#parsing-main-afterframeset
+ ins_mode_after_frameset = (t) ->
+ if is_space_tok t
+ insert_character t
+ return
+ if t.type is TYPE_COMMENT
+ insert_comment t
+ return
+ if t.type is TYPE_DOCTYPE
+ parse_error()
+ return
+ if t.type is TYPE_START_TAG and t.name is 'html'
+ ins_mode_in_body t
+ return
+ if t.type is TYPE_END_TAG and t.name is 'html'
+ ins_mode = ins_mode_after_after_frameset
+ return
+ if t.type is TYPE_START_TAG and t.name is 'noframes'
+ ins_mode_in_head t
+ return
+ if t.type is TYPE_EOF
+ stop_parsing()
+ return
+ # Anything else
+ parse_error()
+ return
+
+ # 8.2.5.4.22 http://www.w3.org/TR/html5/syntax.html#the-after-after-body-insertion-mode
+ ins_mode_after_after_body = (t) ->
+ if t.type is TYPE_COMMENT
+ insert_comment t, [doc, doc.children.length]
+ return
+ if t.type is TYPE_DOCTYPE or is_space_tok(t) or (t.type is TYPE_START_TAG and t.name is 'html')
+ ins_mode_in_body t
+ return
+ if t.type is TYPE_EOF
+ stop_parsing()
+ return
+ # Anything else
+ parse_error()
+ ins_mode = ins_mode_in_body
+ process_token t
+ return
+
+ # 8.2.5.4.23 http://www.w3.org/TR/html5/syntax.html#the-after-after-frameset-insertion-mode
+ ins_mode_after_after_frameset = (t) ->
+ if t.type is TYPE_COMMENT
+ insert_comment t, [doc, doc.children.length]
+ return
+ if t.type is TYPE_DOCTYPE or is_space_tok(t) or (t.type is TYPE_START_TAG and t.name is 'html')
+ ins_mode_in_body t
+ return
+ if t.type is TYPE_EOF
+ stop_parsing()
+ return
+ if t.type is TYPE_START_TAG and t.name is 'noframes'
+ ins_mode_in_head t
+ return
+ # Anything else
+ parse_error()
+ return
+
+ # 8.2.5.5 http://www.w3.org/TR/html5/syntax.html#parsing-main-inforeign
+ has_color_face_or_size = (t) ->
+ for a in t.attrs_a
+ if a[0] is 'color' or a[0] is 'face' or a[0] is 'size'
+ return true
+ return false
+ in_foreign_content_end_script = ->
+ open_els.shift()
+ # fixfull
+ return
+ in_foreign_content_other_start = (t) ->
+ acn = adjusted_current_node()
+ if acn.namespace is NS_MATHML
+ adjust_mathml_attributes t
+ if acn.namespace is NS_SVG and svg_name_fixes[t.name]?
+ t.name = svg_name_fixes[t.name]
+ if acn.namespace is NS_SVG
+ adjust_svg_attributes t
+ adjust_foreign_attributes t
+ insert_foreign_element t, acn.namespace
+ if t.flag 'self-closing'
+ if t.name is 'script'
+ t.acknowledge_self_closing()
+ in_foreign_content_end_script()
+ # fixfull
+ else
+ open_els.shift()
+ t.acknowledge_self_closing()
+ return
+ in_foreign_content = (t) ->
+ if t.type is TYPE_TEXT and t.text is "\u0000"
+ parse_error()
+ insert_character new_character_token "\ufffd"
+ return
+ if is_space_tok t
+ insert_character t
+ return
+ if t.type is TYPE_TEXT
+ flag_frameset_ok = false
+ insert_character t
+ return
+ if t.type is TYPE_COMMENT
+ insert_comment t
+ return
+ if t.type is TYPE_DOCTYPE
+ parse_error()
+ return
+ if t.type is TYPE_START_TAG and (t.name is 'b' or t.name is 'big' or t.name is 'blockquote' or t.name is 'body' or t.name is 'br' or t.name is 'center' or t.name is 'code' or t.name is 'dd' or t.name is 'div' or t.name is 'dl' or t.name is 'dt' or t.name is 'em' or t.name is 'embed' or t.name is 'h1' or t.name is 'h2' or t.name is 'h3' or t.name is 'h4' or t.name is 'h5' or t.name is 'h6' or t.name is 'head' or t.name is 'hr' or t.name is 'i' or t.name is 'img' or t.name is 'li' or t.name is 'listing' or t.name is 'main' or t.name is 'meta' or t.name is 'nobr' or t.name is 'ol' or t.name is 'p' or t.name is 'pre' or t.name is 'ruby' or t.name is 's' or t.name is 'small' or t.name is 'span' or t.name is 'strong' or t.name is 'strike' or t.name is 'sub' or t.name is 'sup' or t.name is 'table' or t.name is 'tt' or t.name is 'u' or t.name is 'ul' or t.name is 'var' or (t.name is 'font' and has_color_face_or_size(t)))
+ parse_error()
+ if flag_fragment_parsing
+ in_foreign_content_other_start t
+ return
+ loop # is this safe?
+ open_els.shift()
+ if is_mathml_text_integration_point(open_els[0]) or is_html_integration(open_els[0]) or open_els[0].namespace is NS_HTML
+ break
+ process_token t
+ return
+ if t.type is TYPE_START_TAG
+ in_foreign_content_other_start t
+ return
+ if t.type is TYPE_END_TAG and t.name is 'script' and open_els[0].name is 'script' and open_els[0].namespace is NS_SVG
+ in_foreign_content_end_script()
+ return
+ if t.type is TYPE_END_TAG
+ i = 0
+ node = open_els[i]
+ if node.name.toLowerCase() isnt t.name
+ parse_error()
+ loop
+ if node is open_els[open_els.length - 1]
+ return
+ if node.name.toLowerCase() is t.name
+ loop
+ el = open_els.shift()
+ if el is node
+ return
+ i += 1
+ node = open_els[i]
+ if node.namespace is NS_HTML
+ break
+ ins_mode t # explicitly call HTML insertion mode
+
+
+ # 8.2.4.1 http://www.w3.org/TR/html5/syntax.html#data-state
+ tok_state_data = ->
+ switch c = txt.charAt(cur++)
+ when '&'
+ return new_text_node parse_character_reference()
+ when '<'
+ tok_state = tok_state_tag_open
+ when "\u0000"
+ parse_error()
+ return new_text_node "\ufffd"
+ when '' # EOF
+ return new_eof_token()
+ else
+ return new_text_node c
+ return null
+
+ # 8.2.4.2 http://www.w3.org/TR/html5/syntax.html#character-reference-in-data-state
+ # not needed: tok_state_character_reference_in_data = ->
+ # just call parse_character_reference()
+
+ # 8.2.4.3 http://www.w3.org/TR/html5/syntax.html#rcdata-state
+ tok_state_rcdata = ->
+ switch c = txt.charAt(cur++)
+ when '&'
+ return new_text_node parse_character_reference()
+ when '<'
+ tok_state = tok_state_rcdata_less_than_sign
+ when "\u0000"
+ parse_error()
+ return new_character_token "\ufffd"
+ when '' # EOF
+ return new_eof_token()
+ else
+ return new_character_token c
+ return null
+
+ # 8.2.4.4 http://www.w3.org/TR/html5/syntax.html#character-reference-in-rcdata-state
+ # not needed: tok_state_character_reference_in_rcdata = ->
+ # just call parse_character_reference()
+
+ # 8.2.4.5 http://www.w3.org/TR/html5/syntax.html#rawtext-state
+ tok_state_rawtext = ->
+ switch c = txt.charAt(cur++)
+ when '<'
+ tok_state = tok_state_rawtext_less_than_sign
+ when "\u0000"
+ parse_error()
+ return new_character_token "\ufffd"
+ when '' # EOF
+ return new_eof_token()
+ else
+ return new_character_token c
+ return null
+
+ # 8.2.4.6 http://www.w3.org/TR/html5/syntax.html#script-data-state
+ tok_state_script_data = ->
+ switch c = txt.charAt(cur++)
+ when '<'
+ tok_state = tok_state_script_data_less_than_sign
+ when "\u0000"
+ parse_error()
+ return new_character_token "\ufffd"
+ when '' # EOF
+ return new_eof_token()
+ else
+ return new_character_token c
+ return null
+
+ # 8.2.4.7 http://www.w3.org/TR/html5/syntax.html#plaintext-state
+ tok_state_plaintext = ->
+ switch c = txt.charAt(cur++)
+ when "\u0000"
+ parse_error()
+ return new_character_token "\ufffd"
+ when '' # EOF
+ return new_eof_token()
+ else
+ return new_character_token c
+ return null
+
+
+ # 8.2.4.8 http://www.w3.org/TR/html5/syntax.html#tag-open-state
+ tok_state_tag_open = ->
+ c = txt.charAt(cur++)
+ if c is '!'
+ tok_state = tok_state_markup_declaration_open
+ return
+ if c is '/'
+ tok_state = tok_state_end_tag_open
+ return
+ if is_uc_alpha(c)
+ tok_cur_tag = new_open_tag c.toLowerCase()
+ tok_state = tok_state_tag_name
+ return
+ if is_lc_alpha(c)
+ tok_cur_tag = new_open_tag c
+ tok_state = tok_state_tag_name
+ return
+ if c is '?'
+ parse_error()
+ tok_cur_tag = new_comment_token '?' # FIXME right?
+ tok_state = tok_state_bogus_comment
+ return
+ # Anything else
+ parse_error()
+ tok_state = tok_state_data
+ cur -= 1 # we didn't parse/handle the char after <
+ return new_text_node '<'
+
+ # 8.2.4.9 http://www.w3.org/TR/html5/syntax.html#end-tag-open-state
+ tok_state_end_tag_open = ->
+ c = txt.charAt(cur++)
+ if is_uc_alpha(c)
+ tok_cur_tag = new_end_tag c.toLowerCase()
+ tok_state = tok_state_tag_name
+ return
+ if is_lc_alpha(c)
+ tok_cur_tag = new_end_tag c
+ tok_state = tok_state_tag_name
+ return
+ if c is '>'
+ parse_error()
+ tok_state = tok_state_data
+ return
+ if c is '' # EOF
+ parse_error()
+ tok_state = tok_state_data
+ return new_text_node '</'
+ # Anything else
+ parse_error()
+ tok_cur_tag = new_comment_token c
+ tok_state = tok_state_bogus_comment
+ return null
+
+ # 8.2.4.10 http://www.w3.org/TR/html5/syntax.html#tag-name-state
+ tok_state_tag_name = ->
+ switch c = txt.charAt(cur++)
+ when "\t", "\n", "\u000c", ' '
+ tok_state = tok_state_before_attribute_name
+ when '/'
+ tok_state = tok_state_self_closing_start_tag
+ when '>'
+ tok_state = tok_state_data
+ tmp = tok_cur_tag
+ tok_cur_tag = null
+ return tmp
+ when "\u0000"
+ parse_error()
+ tok_cur_tag.name += "\ufffd"
+ when '' # EOF
+ parse_error()
+ tok_state = tok_state_data
+ else
+ if is_uc_alpha(c)
+ tok_cur_tag.name += c.toLowerCase()
+ else
+ tok_cur_tag.name += c
+ return null
+
+ # 8.2.4.11 http://www.w3.org/TR/html5/syntax.html#rcdata-less-than-sign-state
+ tok_state_rcdata_less_than_sign = ->
+ c = txt.charAt(cur++)
+ if c is '/'
+ temporary_buffer = ''
+ tok_state = tok_state_rcdata_end_tag_open
+ return null
+ # Anything else
+ tok_state = tok_state_rcdata
+ cur -= 1 # reconsume the input character
+ return new_character_token '<'
+
+ # 8.2.4.12 http://www.w3.org/TR/html5/syntax.html#rcdata-end-tag-open-state
+ tok_state_rcdata_end_tag_open = ->
+ c = txt.charAt(cur++)
+ if is_uc_alpha(c)
+ tok_cur_tag = new_end_tag c.toLowerCase()
+ temporary_buffer += c
+ tok_state = tok_state_rcdata_end_tag_name
+ return null
+ if is_lc_alpha(c)
+ tok_cur_tag = new_end_tag c
+ temporary_buffer += c
+ tok_state = tok_state_rcdata_end_tag_name
+ return null
+ # Anything else
+ tok_state = tok_state_rcdata
+ cur -= 1 # reconsume the input character
+ return new_character_token "</" # fixfull separate these
+
+ # http://www.w3.org/TR/html5/syntax.html#appropriate-end-tag-token
+ is_appropriate_end_tag = (t) ->
+ # spec says to check against "the tag name of the last start tag to
+ # have been emitted from this tokenizer", but this is only called from
+ # the various "raw" states, so it's hopefully ok to assume that
+ # open_els[0].name will work instead TODO: verify this after the script
+ # data states are implemented
+ debug_log "#{t.type}, #{t.name} open_els: #{serialize_els open_els, true, true}"
+ return t.type is TYPE_END_TAG and t.name is open_els[0].name
+
+ # 8.2.4.13 http://www.w3.org/TR/html5/syntax.html#rcdata-end-tag-name-state
+ tok_state_rcdata_end_tag_name = ->
+ c = txt.charAt(cur++)
+ if c is "\t" or c is "\n" or c is "\u000c" or c is ' '
+ if is_appropriate_end_tag tok_cur_tag
+ tok_state = tok_state_before_attribute_name
+ return
+ # else fall through to "Anything else"
+ if c is '/'
+ if is_appropriate_end_tag tok_cur_tag
+ tok_state = tok_state_self_closing_start_tag # FIXME spec typo?
+ return
+ # else fall through to "Anything else"
+ if c is '>'
+ if is_appropriate_end_tag tok_cur_tag
+ tok_state = tok_state_data
+ return tok_cur_tag
+ # else fall through to "Anything else"
+ if is_uc_alpha(c)
+ tok_cur_tag.name += c.toLowerCase()
+ temporary_buffer += c
+ return null
+ if is_lc_alpha(c)
+ tok_cur_tag.name += c
+ temporary_buffer += c
+ return null
+ # Anything else
+ tok_state = tok_state_rcdata
+ cur -= 1 # reconsume the input character
+ return new_character_token '</' + temporary_buffer # fixfull separate these
+
+ # 8.2.4.14 http://www.w3.org/TR/html5/syntax.html#rawtext-less-than-sign-state
+ tok_state_rawtext_less_than_sign = ->
+ c = txt.charAt(cur++)
+ if c is '/'
+ temporary_buffer = ''
+ tok_state = tok_state_rawtext_end_tag_open
+ return null
+ # Anything else
+ tok_state = tok_state_rawtext
+ cur -= 1 # reconsume the input character
+ return new_character_token '<'
+
+ # 8.2.4.15 http://www.w3.org/TR/html5/syntax.html#rawtext-end-tag-open-state
+ tok_state_rawtext_end_tag_open = ->
+ c = txt.charAt(cur++)
+ if is_uc_alpha(c)
+ tok_cur_tag = new_end_tag c.toLowerCase()
+ temporary_buffer += c
+ tok_state = tok_state_rawtext_end_tag_name
+ return null
+ if is_lc_alpha(c)
+ tok_cur_tag = new_end_tag c
+ temporary_buffer += c
+ tok_state = tok_state_rawtext_end_tag_name
+ return null
+ # Anything else
+ tok_state = tok_state_rawtext
+ cur -= 1 # reconsume the input character
+ return new_character_token "</" # fixfull separate these
+
+ # 8.2.4.16 http://www.w3.org/TR/html5/syntax.html#rawtext-end-tag-name-state
+ tok_state_rawtext_end_tag_name = ->
+ c = txt.charAt(cur++)
+ if c is "\t" or c is "\n" or c is "\u000c" or c is ' '
+ if is_appropriate_end_tag tok_cur_tag
+ tok_state = tok_state_before_attribute_name
+ return
+ # else fall through to "Anything else"
+ if c is '/'
+ if is_appropriate_end_tag tok_cur_tag
+ tok_state = tok_state_self_closing_start_tag
+ return
+ # else fall through to "Anything else"
+ if c is '>'
+ if is_appropriate_end_tag tok_cur_tag
+ tok_state = tok_state_data
+ return tok_cur_tag
+ # else fall through to "Anything else"
+ if is_uc_alpha(c)
+ tok_cur_tag.name += c.toLowerCase()
+ temporary_buffer += c
+ return null
+ if is_lc_alpha(c)
+ tok_cur_tag.name += c
+ temporary_buffer += c
+ return null
+ # Anything else
+ tok_state = tok_state_rawtext
+ cur -= 1 # reconsume the input character
+ return new_character_token '</' + temporary_buffer # fixfull separate these
+
+ # 8.2.4.17 http://www.w3.org/TR/html5/syntax.html#script-data-less-than-sign-state
+ tok_state_script_data_less_than_sign = ->
+ c = txt.charAt(cur++)
+ if c is '/'
+ temporary_buffer = ''
+ tok_state = tok_state_script_data_end_tag_open
+ return
+ if c is '!'
+ tok_state = tok_state_script_data_escape_start
+ return new_character_token '<!' # fixfull split
+ # Anything else
+ tok_state = tok_state_script_data
+ cur -= 1 # Reconsume
+ return new_character_token '<'
+
+ # 8.2.4.18 http://www.w3.org/TR/html5/syntax.html#script-data-end-tag-open-state
+ tok_state_script_data_end_tag_open = ->
+ c = txt.charAt(cur++)
+ if is_uc_alpha(c)
+ tok_cur_tag = new_end_tag c.toLowerCase()
+ temporary_buffer += c
+ tok_state = tok_state_script_data_end_tag_name
+ return
+ if is_lc_alpha(c)
+ tok_cur_tag = new_end_tag c
+ temporary_buffer += c
+ tok_state = tok_state_script_data_end_tag_name
+ return
+ # Anything else
+ tok_state = tok_state_script_data
+ cur -= 1 # Reconsume
+ return new_character_token '</'
+
+ # 8.2.4.19 http://www.w3.org/TR/html5/syntax.html#script-data-end-tag-open-state
+ tok_state_script_data_end_tag_name = ->
+ c = txt.charAt(cur++)
+ if c is "\t" or c is "\n" or c is "\u000c" or c is ' '
+ if is_appropriate_end_tag tok_cur_tag
+ tok_state = tok_state_before_attribute_name
+ return
+ # fall through
+ if c is '/'
+ if is_appropriate_end_tag tok_cur_tag
+ tok_state = tok_state_self_closing_start_tag
+ return
+ # fall through
+ if c is '>'
+ if is_appropriate_end_tag tok_cur_tag
+ tok_state = tok_state_data
+ return tok_cur_tag
+ # fall through
+ if is_uc_alpha(c)
+ tok_cur_tag.name += c.toLowerCase()
+ temporary_buffer += c
+ return
+ if is_lc_alpha(c)
+ tok_cur_tag.name += c
+ temporary_buffer += c
+ return
+ # Anything else
+ tok_state = tok_state_script_data
+ cur -= 1 # Reconsume
+ return new_character_token "</#{temporary_buffer}" # fixfull split
+
+ # 8.2.4.20 http://www.w3.org/TR/html5/syntax.html#script-data-escape-start-state
+ tok_state_script_data_escape_start = ->
+ c = txt.charAt(cur++)
+ if c is '-'
+ tok_state = tok_state_script_data_escape_start_dash
+ return new_character_token '-'
+ # Anything else
+ tok_state = tok_state_script_data
+ cur -= 1 # Reconsume
+ return
+
+ # 8.2.4.21 http://www.w3.org/TR/html5/syntax.html#script-data-escape-start-dash-state
+ tok_state_script_data_escape_start_dash = ->
+ c = txt.charAt(cur++)
+ if c is '-'
+ tok_state = tok_state_script_data_escaped_dash_dash
+ return new_character_token '-'
+ # Anything else
+ tok_state = tok_state_script_data
+ cur -= 1 # Reconsume
+ return
+
+ # 8.2.4.22 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-state
+ tok_state_script_data_escaped = ->
+ c = txt.charAt(cur++)
+ if c is '-'
+ tok_state = tok_state_script_data_escaped_dash
+ return new_character_token '-'
+ if c is '<'
+ tok_state = tok_state_script_data_escaped_less_than_sign
+ return
+ if c is "\u0000"
+ parse_error()
+ return new_character_token "\ufffd"
+ if c is '' # EOF
+ tok_state = tok_state_data
+ parse_error()
+ cur -= 1 # Reconsume
+ return
+ # Anything else
+ return new_character_token c
+
+ # 8.2.4.23 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-dash-state
+ tok_state_script_data_escaped_dash = ->
+ c = txt.charAt(cur++)
+ if c is '-'
+ tok_state = tok_state_script_data_escaped_dash_dash
+ return new_character_token '-'
+ if c is '<'
+ tok_state = tok_state_script_data_escaped_less_than_sign
+ return
+ if c is "\u0000"
+ parse_error()
+ tok_state = tok_state_script_data_escaped
+ return new_character_token "\ufffd"
+ if c is '' # EOF
+ tok_state = tok_state_data
+ parse_error()
+ cur -= 1 # Reconsume
+ return
+ # Anything else
+ tok_state = tok_state_script_data_escaped
+ return new_character_token c
+
+ # 8.2.4.24 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-dash-dash-state
+ tok_state_script_data_escaped_dash_dash = ->
+ c = txt.charAt(cur++)
+ if c is '-'
+ return new_character_token '-'
+ if c is '<'
+ tok_state = tok_state_script_data_escaped_less_than_sign
+ return
+ if c is '>'
+ tok_state = tok_state_script_data
+ return new_character_token '>'
+ if c is "\u0000"
+ parse_error()
+ tok_state = tok_state_script_data_escaped
+ return new_character_token "\ufffd"
+ if c is '' # EOF
+ parse_error()
+ tok_state = tok_state_data
+ cur -= 1 # Reconsume
+ return
+ # Anything else
+ tok_state = tok_state_script_data_escaped
+ return new_character_token c
+
+ # 8.2.4.25 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-less-than-sign-state
+ tok_state_script_data_escaped_less_than_sign = ->
+ c = txt.charAt(cur++)
+ if c is '/'
+ temporary_buffer = ''
+ tok_state = tok_state_script_data_escaped_end_tag_open
+ return
+ if is_uc_alpha(c)
+ temporary_buffer = c.toLowerCase() # yes, really
+ tok_state = tok_state_script_data_double_escape_start
+ return new_character_token "<#{c}" # fixfull split
+ if is_lc_alpha(c)
+ temporary_buffer = c
+ tok_state = tok_state_script_data_double_escape_start
+ return new_character_token "<#{c}" # fixfull split
+ # Anything else
+ tok_state = tok_state_script_data_escaped
+ cur -= 1 # Reconsume
+ return new_character_token '<'
+
+ # 8.2.4.26 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-end-tag-open-state
+ tok_state_script_data_escaped_end_tag_open = ->
+ c = txt.charAt(cur++)
+ if is_uc_alpha(c)
+ tok_cur_tag = new_end_tag c.toLowerCase()
+ temporary_buffer += c
+ tok_state = tok_state_script_data_escaped_end_tag_name
+ return
+ if is_lc_alpha(c)
+ tok_cur_tag = new_end_tag c
+ temporary_buffer += c
+ tok_state = tok_state_script_data_escaped_end_tag_name
+ return
+ # Anything else
+ tok_state = tok_state_script_data_escaped
+ cur -= 1 # Reconsume
+ return new_character_token '</' # fixfull split
+
+ # 8.2.4.27 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-end-tag-name-state
+ tok_state_script_data_escaped_end_tag_name = ->
+ c = txt.charAt(cur++)
+ if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
+ if is_appropriate_end_tag tok_cur_tag
+ tok_state = tok_state_before_attribute_name
+ return
+ # fall through
+ if c is '/'
+ if is_appropriate_end_tag tok_cur_tag
+ tok_state = tok_state_self_closing_start_tag
+ return
+ # fall through
+ if c is '>'
+ if is_appropriate_end_tag tok_cur_tag
+ tok_state = tok_state_data
+ return tok_cur_tag
+ # fall through
+ if is_uc_alpha(c)
+ tok_cur_tag.name += c.toLowerCase()
+ temporary_buffer += c.toLowerCase()
+ return
+ if is_lc_alpha(c)
+ tok_cur_tag.name += c
+ temporary_buffer += c.toLowerCase()
+ return
+ # Anything else
+ tok_state = tok_state_script_data_escaped
+ cur -= 1 # Reconsume
+ return new_character_token "</#{temporary_buffer}" # fixfull split
+
+ # 8.2.4.28 http://www.w3.org/TR/html5/syntax.html#script-data-double-escape-start-state
+ tok_state_script_data_double_escape_start = ->
+ c = txt.charAt(cur++)
+ if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' ' or c is '/' or c is '>'
+ if temporary_buffer is 'script'
+ tok_state = tok_state_script_data_double_escaped
+ else
+ tok_state = tok_state_script_data_escaped
+ return new_character_token c
+ if is_uc_alpha(c)
+ temporary_buffer += c.toLowerCase() # yes, really lowercase
+ return new_character_token c
+ if is_lc_alpha(c)
+ temporary_buffer += c
+ return new_character_token c
+ # Anything else
+ tok_state = tok_state_script_data_escaped
+ cur -= 1 # Reconsume
+ return
+
+ # 8.2.4.29 http://www.w3.org/TR/html5/syntax.html#script-data-double-escaped-state
+ tok_state_script_data_double_escaped = ->
+ c = txt.charAt(cur++)
+ if c is '-'
+ tok_state = tok_state_script_data_double_escaped_dash
+ return new_character_token '-'
+ if c is '<'
+ tok_state = tok_state_script_data_double_escaped_less_than_sign
+ return new_character_token '<'
+ if c is "\u0000"
+ parse_error()
+ return new_character_token "\ufffd"
+ if c is '' # EOF
+ parse_error()
+ tok_state = tok_state_data
+ cur -= 1 # Reconsume
+ return
+ # Anything else
+ return new_character_token c
+
+ # 8.2.4.30 http://www.w3.org/TR/html5/syntax.html#script-data-double-escaped-dash-state
+ tok_state_script_data_double_escaped_dash = ->
+ c = txt.charAt(cur++)
+ if c is '-'
+ tok_state = tok_state_script_data_double_escaped_dash_dash
+ return new_character_token '-'
+ if c is '<'
+ tok_state = tok_state_script_data_double_escaped_less_than_sign
+ return new_character_token '<'
+ if c is "\u0000"
+ parse_error()
+ tok_state = tok_state_script_data_double_escaped
+ return new_character_token "\ufffd"
+ if c is '' # EOF
+ parse_error()
+ tok_state = tok_state_data
+ cur -= 1 # Reconsume
+ return
+ # Anything else
+ tok_state = tok_state_script_data_double_escaped
+ return new_character_token c
+
+ # 8.2.4.31 http://www.w3.org/TR/html5/syntax.html#script-data-double-escaped-dash-dash-state
+ tok_state_script_data_double_escaped_dash_dash = ->
+ c = txt.charAt(cur++)
+ if c is '-'
+ return new_character_token '-'
+ if c is '<'
+ tok_state = tok_state_script_data_double_escaped_less_than_sign
+ return new_character_token '<'
+ if c is '>'
+ tok_state = tok_state_script_data
+ return new_character_token '>'
+ if c is "\u0000"
+ parse_error()
+ tok_state = tok_state_script_data_double_escaped
+ return new_character_token "\ufffd"
+ if c is '' # EOF
+ parse_error()
+ tok_state = tok_state_data
+ cur -= 1 # Reconsume
+ return
+ # Anything else
+ tok_state = tok_state_script_data_double_escaped
+ return new_character_token c
+
+ # 8.2.4.32 http://www.w3.org/TR/html5/syntax.html#script-data-double-escaped-less-than-sign-state
+ tok_state_script_data_double_escaped_less_than_sign = ->
+ c = txt.charAt(cur++)
+ if c is '/'
+ temporary_buffer = ''
+ tok_state = tok_state_script_data_double_escape_end
+ return new_character_token '/'
+ # Anything else
+ tok_state = tok_state_script_data_double_escaped
+ cur -= 1 # Reconsume
+ return
+
+ # 8.2.4.33 http://www.w3.org/TR/html5/syntax.html#script-data-double-escape-end-state
+ tok_state_script_data_double_escape_end = ->
+ c = txt.charAt(cur++)
+ if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' ' or c is '/' or c is '>'
+ if temporary_buffer is 'script'
+ tok_state = tok_state_script_data_escaped
+ else
+ tok_state = tok_state_script_data_double_escaped
+ return new_character_token c
+ if is_uc_alpha(c)
+ temporary_buffer += c.toLowerCase() # yes, really lowercase
+ return new_character_token c
+ if is_lc_alpha(c)
+ temporary_buffer += c
+ return new_character_token c
+ # Anything else
+ tok_state = tok_state_script_data_double_escaped
+ cur -= 1 # Reconsume
+ return
+
+ # 8.2.4.34 http://www.w3.org/TR/html5/syntax.html#before-attribute-name-state
+ tok_state_before_attribute_name = ->
+ attr_name = null
+ switch c = txt.charAt(cur++)
+ when "\t", "\n", "\u000c", ' '
+ return null
+ when '/'
+ tok_state = tok_state_self_closing_start_tag
+ return null
+ when '>'
+ tok_state = tok_state_data
+ tmp = tok_cur_tag
+ tok_cur_tag = null
+ return tmp
+ when "\u0000"
+ parse_error()
+ attr_name = "\ufffd"
+ when '"', "'", '<', '='
+ parse_error()
+ attr_name = c
+ when '' # EOF
+ parse_error()
+ tok_state = tok_state_data
+ else
+ if is_uc_alpha(c)
+ attr_name = c.toLowerCase()
+ else
+ attr_name = c
+ if attr_name?
+ tok_cur_tag.attrs_a.unshift [attr_name, '']
+ tok_state = tok_state_attribute_name
+ return null
+
+ # 8.2.4.35 http://www.w3.org/TR/html5/syntax.html#attribute-name-state
+ tok_state_attribute_name = ->
+ switch c = txt.charAt(cur++)
+ when "\t", "\n", "\u000c", ' '
+ tok_state = tok_state_after_attribute_name
+ when '/'
+ tok_state = tok_state_self_closing_start_tag
+ when '='
+ tok_state = tok_state_before_attribute_value
+ when '>'
+ tok_state = tok_state_data
+ tmp = tok_cur_tag
+ tok_cur_tag = null
+ return tmp
+ when "\u0000"
+ parse_error()
+ tok_cur_tag.attrs_a[0][0] += "\ufffd"
+ when '"', "'", '<'