+ unless is_in_scope 'form', NS_HTML
+ parse_error()
+ return
+ generate_implied_end_tags()
+ if open_els[0].name isnt 'form' or open_els[0].namespace isnt NS_HTML
+ parse_error()
+ loop
+ el = open_els.shift()
+ if el.name is 'form' and el.namespace is NS_HTML
+ break
+ return
+ if t.type is TYPE_END_TAG and t.name is 'p'
+ unless is_in_button_scope 'p', NS_HTML
+ parse_error()
+ insert_html_element new_open_tag 'p'
+ close_p_element()
+ return
+ if t.type is TYPE_END_TAG and t.name is 'li'
+ unless is_in_li_scope 'li', NS_HTML
+ parse_error()
+ return
+ generate_implied_end_tags 'li' # arg is exception
+ if open_els[0].name isnt 'li' or open_els[0].namespace isnt NS_HTML
+ parse_error()
+ loop
+ el = open_els.shift()
+ if el.name is 'li' and el.namespace is NS_HTML
+ break
+ return
+ if t.type is TYPE_END_TAG and (t.name is 'dd' or t.name is 'dt')
+ unless is_in_scope t.name, NS_HTML
+ parse_error()
+ return
+ generate_implied_end_tags t.name # arg is exception
+ if open_els[0].name isnt t.name or open_els[0].namespace isnt NS_HTML
+ parse_error()
+ loop
+ el = open_els.shift()
+ if el.name is t.name and el.namespace is NS_HTML
+ break
+ return
+ if t.type is TYPE_END_TAG and h_tags[t.name]?
+ h_in_scope = false
+ for el in open_els
+ if h_tags[el.name] is el.namespace
+ h_in_scope = true
+ break
+ if standard_scopers[el.name] is el.namespace
+ break
+ unless h_in_scope
+ parse_error()
+ return
+ generate_implied_end_tags()
+ if open_els[0].name isnt t.name or open_els[0].namespace isnt NS_HTML
+ parse_error()
+ loop
+ el = open_els.shift()
+ if h_tags[el.name] is el.namespace
+ break
+ return
+ # deep breath!
+ if t.type is TYPE_START_TAG and t.name is 'a'
+ # If the list of active formatting elements contains an a element
+ # between the end of the list and the last marker on the list (or
+ # the start of the list if there is no marker on the list), then
+ # this is a parse error; run the adoption agency algorithm for the
+ # tag name "a", then remove that element from the list of active
+ # formatting elements and the stack of open elements if the
+ # adoption agency algorithm didn't already remove it (it might not
+ # have if the element is not in table scope).
+ found = false
+ for el in afe
+ if el.type is TYPE_AFE_MARKER
+ break
+ if el.name is 'a' and el.namespace is NS_HTML
+ found = el
+ if found?
+ parse_error()
+ adoption_agency 'a'
+ for el, i in afe
+ if el is found
+ afe.splice i, 1
+ for el, i in open_els
+ if el is found
+ open_els.splice i, 1
+ reconstruct_afe()
+ el = insert_html_element t
+ afe_push el
+ return
+ if t.type is TYPE_START_TAG and (t.name is 'b' or t.name is 'big' or t.name is 'code' or t.name is 'em' or t.name is 'font' or t.name is 'i' or t.name is 's' or t.name is 'small' or t.name is 'strike' or t.name is 'strong' or t.name is 'tt' or t.name is 'u')
+ reconstruct_afe()
+ el = insert_html_element t
+ afe_push el
+ return
+ if t.type is TYPE_START_TAG and t.name is 'nobr'
+ reconstruct_afe()
+ if is_in_scope 'nobr', NS_HTML
+ parse_error()
+ adoption_agency 'nobr'
+ reconstruct_afe()
+ el = insert_html_element t
+ afe_push el
+ return
+ if t.type is TYPE_END_TAG and (t.name is 'a' or t.name is 'b' or t.name is 'big' or t.name is 'code' or t.name is 'em' or t.name is 'font' or t.name is 'i' or t.name is 'nobr' or t.name is 's' or t.name is 'small' or t.name is 'strike' or t.name is 'strong' or t.name is 'tt' or t.name is 'u')
+ adoption_agency t.name
+ return
+ if t.type is TYPE_START_TAG and (t.name is 'applet' or t.name is 'marquee' or t.name is 'object')
+ reconstruct_afe()
+ insert_html_element t
+ afe_push_marker()
+ flag_frameset_ok = false
+ return
+ if t.type is TYPE_END_TAG and (t.name is 'applet' or t.name is 'marquee' or t.name is 'object')
+ unless is_in_scope t.name, NS_HTML
+ parse_error()
+ return
+ generate_implied_end_tags()
+ if open_els[0].name isnt t.name or open_els[0].namespace isnt NS_HTML
+ parse_error()
+ loop
+ el = open_els.shift()
+ if el.name is t.name and el.namespace is NS_HTML
+ break
+ clear_afe_to_marker()
+ return
+ if t.type is TYPE_START_TAG and t.name is 'table'
+ unless doc.flag('quirks mode') is QUIRKS_YES
+ close_p_if_in_button_scope() # test
+ insert_html_element t
+ flag_frameset_ok = false
+ ins_mode = ins_mode_in_table
+ return
+ if t.type is TYPE_END_TAG and t.name is 'br'
+ parse_error()
+ # W3C: t.type = TYPE_START_TAG
+ t = new_open_tag 'br' # WHATWG
+ # fall through
+ if t.type is TYPE_START_TAG and (t.name is 'area' or t.name is 'br' or t.name is 'embed' or t.name is 'img' or t.name is 'keygen' or t.name is 'wbr')
+ reconstruct_afe()
+ insert_html_element t
+ open_els.shift()
+ t.acknowledge_self_closing()
+ flag_frameset_ok = false
+ return
+ if t.type is TYPE_START_TAG and t.name is 'input'
+ reconstruct_afe()
+ insert_html_element t
+ open_els.shift()
+ t.acknowledge_self_closing()
+ unless is_input_hidden_tok t
+ flag_frameset_ok = false
+ return
+ if t.type is TYPE_START_TAG and (t.name is 'menuitem' or t.name is 'param' or t.name is 'source' or t.name is 'track')
+ # WHATWG adds 'menuitem' for this block
+ insert_html_element t
+ open_els.shift()
+ t.acknowledge_self_closing()
+ return
+ if t.type is TYPE_START_TAG and t.name is 'hr'
+ close_p_if_in_button_scope()
+ insert_html_element t
+ open_els.shift()
+ t.acknowledge_self_closing()
+ flag_frameset_ok = false
+ return
+ if t.type is TYPE_START_TAG and t.name is 'image'
+ parse_error()
+ t.name = 'img'
+ process_token t
+ return
+ if t.type is TYPE_START_TAG and t.name is 'isindex'
+ parse_error()
+ if template_tag_is_open() is false and form_element_pointer isnt null
+ return
+ t.acknowledge_self_closing()
+ flag_frameset_ok = false
+ close_p_if_in_button_scope()
+ el = insert_html_element new_open_tag 'form'
+ unless template_tag_is_open()
+ form_element_pointer = el
+ for a in t.attrs_a
+ if a[0] is 'action'
+ el.attrs['action'] = a[1]
+ break
+ insert_html_element new_open_tag 'hr'
+ open_els.shift()
+ reconstruct_afe()
+ insert_html_element new_open_tag 'label'
+ # note: this is a little out-of-spec-order so we only have to scan t.attrs_a once
+ input_el = new_open_tag 'input'
+ prompt = null
+ for a in t.attrs_a
+ if a[0] is 'prompt'
+ prompt = a[1]
+ if a[0] isnt 'name' and a[0] isnt 'action' and a[0] isnt 'prompt'
+ input_el.attrs_a.push [a[0], a[1]]
+ input_el.attrs_a.push ['name', 'isindex']
+ # fixfull this next bit is in english... internationalize?
+ prompt ?= "This is a searchable index. Enter search keywords: "
+ insert_character new_character_token prompt # fixfull split
+ # TODO submit typo "balue" in spec
+ insert_html_element input_el
+ open_els.shift()
+ # insert_character '' # you can put chars here if promt attr missing
+ open_els.shift()
+ insert_html_element new_open_tag 'hr'
+ open_els.shift()
+ open_els.shift()
+ unless template_tag_is_open()
+ form_element_pointer = null
+ return
+ if t.type is TYPE_START_TAG and t.name is 'textarea'
+ insert_html_element t
+ eat_next_token_if_newline()
+ tok_state = tok_state_rcdata
+ original_ins_mode = ins_mode
+ flag_frameset_ok = false
+ ins_mode = ins_mode_text
+ return
+ if t.type is TYPE_START_TAG and t.name is 'xmp'
+ close_p_if_in_button_scope()
+ reconstruct_afe()
+ flag_frameset_ok = false
+ parse_generic_raw_text t
+ return
+ if t.type is TYPE_START_TAG and t.name is 'iframe'
+ flag_frameset_ok = false
+ parse_generic_raw_text t
+ return
+ if t.type is TYPE_START_TAG and (t.name is 'noembed' or (t.name is 'noscript' and flag_scripting))
+ parse_generic_raw_text t
+ return
+ if t.type is TYPE_START_TAG and t.name is 'select'
+ reconstruct_afe()
+ insert_html_element t
+ flag_frameset_ok = false
+ if ins_mode is ins_mode_in_table or ins_mode is ins_mode_in_caption or ins_mode is ins_mode_in_table_body or ins_mode is ins_mode_in_row or ins_mode is ins_mode_in_cell
+ ins_mode = ins_mode_in_select_in_table
+ else
+ ins_mode = ins_mode_in_select
+ return
+ if t.type is TYPE_START_TAG and (t.name is 'optgroup' or t.name is 'option')
+ if open_els[0].name is 'option' and open_els[0].namespace is NS_HTML
+ open_els.shift()
+ reconstruct_afe()
+ insert_html_element t
+ return
+# this comment block implements the W3C spec
+# if t.type is TYPE_START_TAG and (t.name is 'rb' or t.name is 'rp' or t.name is 'rtc')
+# if is_in_scope 'ruby', NS_HTML
+# generate_implied_end_tags()
+# unless open_els[0].name is 'ruby' and open_els[0].namespace is NS_HTML
+# parse_error()
+# insert_html_element t
+# return
+# if t.type is TYPE_START_TAG and t.name is 'rt'
+# if is_in_scope 'ruby', NS_HTML
+# generate_implied_end_tags 'rtc' # arg is exception
+# unless (open_els[0].name is 'ruby' or open_els[0].name is 'rtc') and open_els[0].namespace is NS_HTML
+# parse_error()
+# insert_html_element t
+# return
+# below implements the WHATWG spec https://html.spec.whatwg.org/multipage/syntax.html#parsing-main-inbody
+ if t.type is TYPE_START_TAG and (t.name is 'rb' or t.name is 'rtc')
+ if is_in_scope 'ruby', NS_HTML
+ generate_implied_end_tags()
+ unless open_els[0].name is 'ruby' and open_els[0].namespace is NS_HTML
+ parse_error()
+ insert_html_element t
+ return
+ if t.type is TYPE_START_TAG and (t.name is 'rp' or t.name is 'rt')
+ if is_in_scope 'ruby', NS_HTML
+ generate_implied_end_tags 'rtc'
+ unless (open_els[0].name is 'ruby' or open_els[0].name is 'rtc') and open_els[0].namespace is NS_HTML
+ parse_error()
+ insert_html_element t
+ return
+# end WHATWG chunk
+ if t.type is TYPE_START_TAG and t.name is 'math'
+ reconstruct_afe()
+ adjust_mathml_attributes t
+ adjust_foreign_attributes t
+ insert_foreign_element t, NS_MATHML
+ if t.flag 'self-closing'
+ open_els.shift()
+ t.acknowledge_self_closing()
+ return
+ if t.type is TYPE_START_TAG and t.name is 'svg'
+ reconstruct_afe()
+ adjust_svg_attributes t
+ adjust_foreign_attributes t
+ insert_foreign_element t, NS_SVG
+ if t.flag 'self-closing'
+ open_els.shift()
+ t.acknowledge_self_closing()
+ return
+ if t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'frame' or t.name is 'head' or t.name is 'tbody' or t.name is 'td' or t.name is 'tfoot' or t.name is 'th' or t.name is 'thead' or t.name is 'tr')
+ parse_error()
+ return
+ if t.type is TYPE_START_TAG # any other start tag
+ reconstruct_afe()
+ insert_html_element t
+ return
+ if t.type is TYPE_END_TAG # any other end tag
+ in_body_any_other_end_tag t.name
+ return
+ return
+
+ # 8.2.5.4.8 http://www.w3.org/TR/html5/syntax.html#parsing-main-incdata
+ ins_mode_text = (t) ->
+ if t.type is TYPE_TEXT
+ insert_character t
+ return
+ if t.type is TYPE_EOF
+ parse_error()
+ if open_els[0].name is 'script' and open_els[0].namespace is NS_HTML
+ open_els[0].flag 'already started', true
+ open_els.shift()
+ ins_mode = original_ins_mode
+ process_token t
+ return
+ if t.type is TYPE_END_TAG and t.name is 'script'
+ open_els.shift()
+ ins_mode = original_ins_mode
+ # fixfull the spec seems to assume that I'm going to run the script
+ # http://www.w3.org/TR/html5/syntax.html#scriptEndTag
+ return
+ if t.type is TYPE_END_TAG
+ open_els.shift()
+ ins_mode = original_ins_mode
+ return
+ return
+
+ # the functions below implement the tokenizer stats described here:
+ # http://www.w3.org/TR/html5/syntax.html#tokenization
+
+ # 8.2.5.4.9 http://www.w3.org/TR/html5/syntax.html#parsing-main-intable
+ ins_mode_in_table_else = (t) ->
+ parse_error()
+ flag_foster_parenting = true
+ ins_mode_in_body t
+ flag_foster_parenting = false
+ return
+ ins_mode_in_table = (t) ->
+ switch t.type
+ when TYPE_TEXT
+ if (open_els[0].name is 'table' or open_els[0].name is 'tbody' or open_els[0].name is 'tfoot' or open_els[0].name is 'thead' or open_els[0].name is 'tr') and open_els[0].namespace is NS_HTML
+ pending_table_character_tokens = []
+ original_ins_mode = ins_mode
+ ins_mode = ins_mode_in_table_text
+ process_token t
+ else
+ ins_mode_in_table_else t
+ when TYPE_COMMENT
+ insert_comment t
+ when TYPE_DOCTYPE
+ parse_error()
+ when TYPE_START_TAG
+ switch t.name
+ when 'caption'
+ clear_stack_to_table_context()
+ afe_push_marker()
+ insert_html_element t
+ ins_mode = ins_mode_in_caption
+ when 'colgroup'
+ clear_stack_to_table_context()
+ insert_html_element t
+ ins_mode = ins_mode_in_column_group
+ when 'col'
+ clear_stack_to_table_context()
+ insert_html_element new_open_tag 'colgroup'
+ ins_mode = ins_mode_in_column_group
+ process_token t
+ when 'tbody', 'tfoot', 'thead'
+ clear_stack_to_table_context()
+ insert_html_element t
+ ins_mode = ins_mode_in_table_body
+ when 'td', 'th', 'tr'
+ clear_stack_to_table_context()
+ insert_html_element new_open_tag 'tbody'
+ ins_mode = ins_mode_in_table_body
+ process_token t
+ when 'table'
+ parse_error()
+ if is_in_table_scope 'table', NS_HTML
+ loop
+ el = open_els.shift()
+ if el.name is 'table' and el.namespace is NS_HTML
+ break
+ reset_ins_mode()
+ process_token t
+ when 'style', 'script', 'template'
+ ins_mode_in_head t
+ when 'input'
+ unless is_input_hidden_tok t
+ ins_mode_in_table_else t
+ else
+ parse_error()
+ el = insert_html_element t
+ open_els.shift()
+ t.acknowledge_self_closing()
+ when 'form'
+ parse_error()
+ if form_element_pointer?
+ return
+ if template_tag_is_open()
+ return
+ form_element_pointer = insert_html_element t
+ open_els.shift()
+ else
+ ins_mode_in_table_else t
+ when TYPE_END_TAG
+ switch t.name
+ when 'table'
+ if is_in_table_scope 'table', NS_HTML
+ loop
+ el = open_els.shift()
+ if el.name is 'table' and el.namespace is NS_HTML
+ break
+ reset_ins_mode()
+ else
+ parse_error()
+ when 'body', 'caption', 'col', 'colgroup', 'html', 'tbody', 'td', 'tfoot', 'th', 'thead', 'tr'
+ parse_error()
+ when 'template'
+ ins_mode_in_head t
+ else
+ ins_mode_in_table_else t
+ when TYPE_EOF
+ ins_mode_in_body t
+ else
+ ins_mode_in_table_else t
+ return
+
+
+ # 8.2.5.4.10 http://www.w3.org/TR/html5/syntax.html#parsing-main-intabletext
+ ins_mode_in_table_text = (t) ->
+ if t.type is TYPE_TEXT and t.text is "\u0000"
+ # from javascript?
+ parse_error()
+ return
+ if t.type is TYPE_TEXT
+ pending_table_character_tokens.push t
+ return
+ # Anything else
+ all_space = true
+ for old in pending_table_character_tokens
+ unless is_space_tok old
+ all_space = false
+ break
+ if all_space
+ for old in pending_table_character_tokens
+ insert_character old
+ else
+ for old in pending_table_character_tokens
+ ins_mode_in_table_else old
+ pending_table_character_tokens = []
+ ins_mode = original_ins_mode
+ process_token t
+ return
+
+ # 8.2.5.4.11 http://www.w3.org/TR/html5/syntax.html#parsing-main-incaption
+ ins_mode_in_caption = (t) ->
+ if t.type is TYPE_END_TAG and t.name is 'caption'
+ if is_in_table_scope 'caption', NS_HTML
+ generate_implied_end_tags()
+ if open_els[0].name isnt 'caption'
+ parse_error()
+ loop
+ el = open_els.shift()
+ if el.name is 'caption' and el.namespace is NS_HTML
+ break
+ clear_afe_to_marker()
+ ins_mode = ins_mode_in_table
+ else
+ parse_error()
+ # fragment case
+ return
+ if (t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'tbody' or t.name is 'td' or t.name is 'tfoot' or t.name is 'th' or t.name is 'thead' or t.name is 'tr')) or t.type is TYPE_END_TAG and t.name is 'table'
+ parse_error()
+ if is_in_table_scope 'caption', NS_HTML
+ loop
+ el = open_els.shift()
+ if el.name is 'caption' and el.namespace is NS_HTML
+ break
+ clear_afe_to_marker()
+ ins_mode = ins_mode_in_table
+ process_token t
+ # else fragment case
+ return
+ if t.type is TYPE_END_TAG and (t.name is 'body' or t.name is 'col' or t.name is 'colgroup' or t.name is 'html' or t.name is 'tbody' or t.name is 'td' or t.name is 'tfoot' or t.name is 'th' or t.name is 'thead' or t.name is 'tr')
+ parse_error()
+ return
+ # Anything else
+ ins_mode_in_body t
+ return
+
+ # 8.2.5.4.12 http://www.w3.org/TR/html5/syntax.html#parsing-main-incolgroup
+ ins_mode_in_column_group = (t) ->
+ if is_space_tok t
+ insert_character t
+ return
+ if t.type is TYPE_COMMENT
+ insert_comment t
+ return
+ if t.type is TYPE_DOCTYPE
+ parse_error()
+ return
+ if t.type is TYPE_START_TAG and t.name is 'html'
+ ins_mode_in_body t
+ return
+ if t.type is TYPE_START_TAG and t.name is 'col'
+ el = insert_html_element t
+ open_els.shift()
+ t.acknowledge_self_closing()
+ return
+ if t.type is TYPE_END_TAG and t.name is 'colgroup'
+ if open_els[0].name is 'colgroup' and open_els.namespace is NS_HTML
+ open_els.shift()
+ ins_mode = ins_mode_in_table
+ else
+ parse_error()
+ return
+ if t.type is TYPE_END_TAG and t.name is 'col'
+ parse_error()
+ return
+ if (t.type is TYPE_START_TAG or t.type is TYPE_END_TAG) and t.name is 'template'
+ ins_mode_in_head t
+ return
+ if t.type is TYPE_EOF
+ ins_mode_in_body t
+ return
+ # Anything else
+ if open_els[0].name isnt 'colgroup'
+ parse_error()
+ return
+ open_els.shift()
+ ins_mode = ins_mode_in_table
+ process_token t
+ return
+
+ # 8.2.5.4.13 http://www.w3.org/TR/html5/syntax.html#parsing-main-intbody
+ ins_mode_in_table_body = (t) ->
+ if t.type is TYPE_START_TAG and t.name is 'tr'
+ clear_stack_to_table_body_context()
+ insert_html_element t
+ ins_mode = ins_mode_in_row
+ return
+ if t.type is TYPE_START_TAG and (t.name is 'th' or t.name is 'td')
+ parse_error()
+ clear_stack_to_table_body_context()
+ insert_html_element new_open_tag 'tr'
+ ins_mode = ins_mode_in_row
+ process_token t
+ return
+ if t.type is TYPE_END_TAG and (t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead')
+ unless is_in_table_scope t.name, NS_HTML
+ parse_error()
+ return
+ clear_stack_to_table_body_context()
+ open_els.shift()
+ ins_mode = ins_mode_in_table
+ return
+ if (t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead')) or (t.type is TYPE_END_TAG and t.name is 'table')
+ has = false
+ for el in open_els
+ if el.namespace is NS_HTML and (el.name is 'tbody' or el.name is 'tfoot' or el.name is 'thead')
+ has = true
+ break
+ if table_scopers[el.name] is el.namespace
+ break
+ if !has
+ parse_error()
+ return
+ clear_stack_to_table_body_context()
+ open_els.shift()
+ ins_mode = ins_mode_in_table
+ process_token t
+ return
+ if t.type is TYPE_END_TAG and (t.name is 'body' or t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'html' or t.name is 'td' or t.name is 'th' or t.name is 'tr')
+ parse_error()
+ return
+ # Anything else
+ ins_mode_in_table t
+ return
+
+ # 8.2.5.4.14 http://www.w3.org/TR/html5/syntax.html#parsing-main-intr
+ ins_mode_in_row = (t) ->
+ if t.type is TYPE_START_TAG and (t.name is 'th' or t.name is 'td')
+ clear_stack_to_table_row_context()
+ insert_html_element t
+ ins_mode = ins_mode_in_cell
+ afe_push_marker()
+ return
+ if t.type is TYPE_END_TAG and t.name is 'tr'
+ if is_in_table_scope 'tr', NS_HTML
+ clear_stack_to_table_row_context()
+ open_els.shift()
+ ins_mode = ins_mode_in_table_body
+ else
+ parse_error()
+ return
+ if (t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead' or t.name is 'tr')) or t.type is TYPE_END_TAG and t.name is 'table'
+ if is_in_table_scope 'tr', NS_HTML
+ clear_stack_to_table_row_context()
+ open_els.shift()
+ ins_mode = ins_mode_in_table_body
+ process_token t
+ else
+ parse_error()
+ return
+ if t.type is TYPE_END_TAG and (t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead')
+ if is_in_table_scope t.name, NS_HTML
+ if is_in_table_scope 'tr', NS_HTML
+ clear_stack_to_table_row_context()
+ open_els.shift()
+ ins_mode = ins_mode_in_table_body
+ process_token t
+ else
+ parse_error()
+ return
+ if t.type is TYPE_END_TAG and (t.name is 'body' or t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'html' or t.name is 'td' or t.name is 'th')
+ parse_error()
+ return
+ # Anything else
+ ins_mode_in_table t
+ return
+
+ # http://www.w3.org/TR/html5/syntax.html#close-the-cell
+ close_the_cell = ->
+ generate_implied_end_tags()
+ unless (open_els[0].name is 'td' or open_els[0] is 'th') and open_els[0].namespace is NS_HTML
+ parse_error()
+ loop
+ el = open_els.shift()
+ if el.namespace is NS_HTML and (el.name is 'td' or el.name is 'th')
+ break
+ clear_afe_to_marker()
+ ins_mode = ins_mode_in_row
+ return
+
+ # 8.2.5.4.15 http://www.w3.org/TR/html5/syntax.html#parsing-main-intd
+ ins_mode_in_cell = (t) ->
+ if t.type is TYPE_END_TAG and (t.name is 'td' or t.name is 'th')
+ if is_in_table_scope t.name, NS_HTML
+ generate_implied_end_tags()
+ unless (open_els[0].name is t.name) and open_els[0].namespace is NS_HTML
+ parse_error()
+ loop
+ el = open_els.shift()
+ if el.name is t.name and el.namespace is NS_HTML
+ break
+ clear_afe_to_marker()
+ ins_mode = ins_mode_in_row
+ else
+ parse_error()
+ return
+ if t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'tbody' or t.name is 'td' or t.name is 'tfoot' or t.name is 'th' or t.name is 'thead' or t.name is 'tr')
+ has = false
+ for el in open_els
+ if el.namespace is NS_HTML and (el.name is 'td' or el.name is 'th')
+ has = true
+ break
+ if table_scopers[el.name] is el.namespace
+ break
+ if !has
+ parse_error()
+ return
+ close_the_cell()
+ process_token t
+ return
+ if t.type is TYPE_END_TAG and (t.name is 'body' or t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'html')
+ parse_error()
+ return
+ if t.type is TYPE_END_TAG and (t.name is 'table' or t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead' or t.name is 'tr')
+ if is_in_table_scope t.name, NS_HTML
+ close_the_cell()
+ process_token t
+ else
+ parse_error()
+ return
+ # Anything Else
+ ins_mode_in_body t
+ return
+
+ # 8.2.5.4.16 http://www.w3.org/TR/html5/syntax.html#parsing-main-inselect
+ ins_mode_in_select = (t) ->
+ if t.type is TYPE_TEXT and t.text is "\u0000"
+ parse_error()
+ return
+ if t.type is TYPE_TEXT
+ insert_character t
+ return
+ if t.type is TYPE_COMMENT
+ insert_comment t
+ return
+ if t.type is TYPE_DOCTYPE
+ parse_error()
+ return
+ if t.type is TYPE_START_TAG and t.name is 'html'
+ ins_mode_in_body t
+ return
+ if t.type is TYPE_START_TAG and t.name is 'option'
+ if open_els[0].name is 'option' and open_els[0].namespace is NS_HTML
+ open_els.shift()
+ insert_html_element t
+ return
+ if t.type is TYPE_START_TAG and t.name is 'optgroup'
+ if open_els[0].name is 'option' and open_els[0].namespace is NS_HTML
+ open_els.shift()
+ if open_els[0].name is 'optgroup' and open_els[0].namespace is NS_HTML
+ open_els.shift()
+ insert_html_element t
+ return
+ if t.type is TYPE_END_TAG and t.name is 'optgroup'
+ if open_els[0].name is 'option' and open_els[0].namespace is NS_HTML
+ if open_els[1].name is 'optgroup' and open_els[0].namespace is NS_HTML
+ open_els.shift()
+ if open_els[0].name is 'optgroup' and open_els[0].namespace is NS_HTML
+ open_els.shift()
+ else
+ parse_error()
+ return
+ if t.type is TYPE_END_TAG and t.name is 'option'
+ if open_els[0].name is 'option' and open_els[0].namespace is NS_HTML
+ open_els.shift()
+ else
+ parse_error()
+ return
+ if t.type is TYPE_END_TAG and t.name is 'select'
+ if is_in_select_scope 'select', NS_HTML
+ loop
+ el = open_els.shift()
+ if el.name is 'select' and el.namespace is NS_HTML
+ break
+ reset_ins_mode()
+ else
+ parse_error()
+ return
+ if t.type is TYPE_START_TAG and t.name is 'select'
+ parse_error()
+ loop
+ el = open_els.shift()
+ if el.name is 'select' and el.namespace is NS_HTML
+ break
+ reset_ins_mode()
+ # spec says that this is the same as </select> but it doesn't say
+ # to check scope first
+ return
+ if t.type is TYPE_START_TAG and (t.name is 'input' or t.name is 'keygen' or t.name is 'textarea')
+ parse_error()
+ unless is_in_select_scope 'select', NS_HTML
+ return
+ loop
+ el = open_els.shift()
+ if el.name is 'select' and el.namespace is NS_HTML
+ break
+ reset_ins_mode()
+ process_token t
+ return
+ if t.type is TYPE_START_TAG and (t.name is 'script' or t.name is 'template')
+ ins_mode_in_head t
+ return
+ if t.type is TYPE_EOF
+ ins_mode_in_body t
+ return
+ # Anything else
+ parse_error()
+ return
+
+ # 8.2.5.4.17 http://www.w3.org/TR/html5/syntax.html#parsing-main-inselectintable
+ ins_mode_in_select_in_table = (t) ->
+ if t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'table' or t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead' or t.name is 'tr' or t.name is 'td' or t.name is 'th')
+ parse_error()
+ loop
+ el = open_els.shift()
+ if el.name is 'select' and el.namespace is NS_HTML
+ break
+ reset_ins_mode()
+ process_token t
+ return
+ if t.type is TYPE_END_TAG and (t.name is 'caption' or t.name is 'table' or t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead' or t.name is 'tr' or t.name is 'td' or t.name is 'th')
+ parse_error()
+ unless is_in_table_scope t.name, NS_HTML
+ return
+ loop
+ el = open_els.shift()
+ if el.name is 'select' and el.namespace is NS_HTML
+ break
+ reset_ins_mode()
+ process_token t
+ return
+ # Anything else
+ ins_mode_in_select t
+ return
+
+ # 8.2.5.4.18 http://www.w3.org/TR/html5/syntax.html#parsing-main-intemplate
+ ins_mode_in_template = (t) ->
+ if t.type is TYPE_TEXT or t.type is TYPE_COMMENT or t.type is TYPE_DOCTYPE
+ ins_mode_in_body t
+ return
+ if (t.type is TYPE_START_TAG and (t.name is 'base' or t.name is 'basefont' or t.name is 'bgsound' or t.name is 'link' or t.name is 'meta' or t.name is 'noframes' or t.name is 'script' or t.name is 'style' or t.name is 'template' or t.name is 'title')) or (t.type is TYPE_END_TAG and t.name is 'template')
+ ins_mode_in_head t
+ return
+ if t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'colgroup' or t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead')
+ template_ins_modes.shift()
+ template_ins_modes.unshift ins_mode_in_table
+ ins_mode = ins_mode_in_table
+ process_token t
+ return
+ if t.type is TYPE_START_TAG and t.name is 'col'
+ template_ins_modes.shift()
+ template_ins_modes.unshift ins_mode_in_column_group
+ ins_mode = ins_mode_in_column_group
+ process_token t
+ return
+ if t.type is TYPE_START_TAG and t.name is 'tr'
+ template_ins_modes.shift()
+ template_ins_modes.unshift ins_mode_in_table_body
+ ins_mode = ins_mode_in_table_body
+ process_token t
+ return
+ if t.type is TYPE_START_TAG and (t.name is 'td' or t.name is 'th')
+ template_ins_modes.shift()
+ template_ins_modes.unshift ins_mode_in_row
+ ins_mode = ins_mode_in_row
+ process_token t
+ return
+ if t.type is TYPE_START_TAG
+ template_ins_modes.shift()
+ template_ins_modes.unshift ins_mode_in_body
+ ins_mode = ins_mode_in_body
+ process_token t
+ return
+ if t.type is TYPE_END_TAG
+ parse_error()
+ return
+ if t.type is TYPE_EOF
+ unless template_tag_is_open()
+ stop_parsing()
+ return
+ parse_error()
+ loop
+ el = open_els.shift()
+ if el.name is 'template' and el.namespace is NS_HTML
+ break
+ clear_afe_to_marker()
+ template_ins_modes.shift()
+ reset_ins_mode()
+ process_token t
+ return
+
+ # 8.2.5.4.19 http://www.w3.org/TR/html5/syntax.html#parsing-main-afterbody
+ ins_mode_after_body = (t) ->
+ if is_space_tok t
+ ins_mode_in_body t
+ return
+ if t.type is TYPE_COMMENT
+ first = open_els[open_els.length - 1]
+ insert_comment t, [first, first.children.length]
+ return
+ if t.type is TYPE_DOCTYPE
+ parse_error()
+ return
+ if t.type is TYPE_START_TAG and t.name is 'html'
+ ins_mode_in_body t
+ return
+ if t.type is TYPE_END_TAG and t.name is 'html'
+ if flag_fragment_parsing
+ parse_error()
+ return
+ ins_mode = ins_mode_after_after_body
+ return
+ if t.type is TYPE_EOF
+ stop_parsing()
+ return
+ # Anything ELse
+ parse_error()
+ ins_mode = ins_mode_in_body
+ process_token t
+ return
+
+ # 8.2.5.4.20 http://www.w3.org/TR/html5/syntax.html#parsing-main-inframeset
+ ins_mode_in_frameset = (t) ->
+ if is_space_tok t
+ insert_character t
+ return
+ if t.type is TYPE_COMMENT
+ insert_comment t
+ return
+ if t.type is TYPE_DOCTYPE
+ parse_error()
+ return
+ if t.type is TYPE_START_TAG and t.name is 'html'
+ ins_mode_in_body t
+ return
+ if t.type is TYPE_START_TAG and t.name is 'frameset'
+ insert_html_element t
+ return
+ if t.type is TYPE_END_TAG and t.name is 'frameset'
+ if open_els.length is 1
+ parse_error()
+ return # fragment case
+ open_els.shift()
+ if flag_fragment_parsing is false and open_els[0].name isnt 'frameset'
+ ins_mode = ins_mode_after_frameset
+ return
+ if t.type is TYPE_START_TAG and t.name is 'frame'
+ insert_html_element t
+ open_els.shift()
+ t.acknowledge_self_closing()
+ return
+ if t.type is TYPE_START_TAG and t.name is 'noframes'
+ ins_mode_in_head t
+ return
+ if t.type is TYPE_EOF
+ if open_els.length isnt 1
+ parse_error()
+ stop_parsing()
+ return
+ # Anything else
+ parse_error()
+ return
+
+ # 8.2.5.4.21 http://www.w3.org/TR/html5/syntax.html#parsing-main-afterframeset
+ ins_mode_after_frameset = (t) ->
+ if is_space_tok t
+ insert_character t
+ return
+ if t.type is TYPE_COMMENT
+ insert_comment t
+ return
+ if t.type is TYPE_DOCTYPE
+ parse_error()
+ return
+ if t.type is TYPE_START_TAG and t.name is 'html'
+ ins_mode_in_body t
+ return
+ if t.type is TYPE_END_TAG and t.name is 'html'
+ ins_mode = ins_mode_after_after_frameset
+ return
+ if t.type is TYPE_START_TAG and t.name is 'noframes'
+ ins_mode_in_head t
+ return
+ if t.type is TYPE_EOF
+ stop_parsing()
+ return
+ # Anything else
+ parse_error()
+ return
+
+ # 8.2.5.4.22 http://www.w3.org/TR/html5/syntax.html#the-after-after-body-insertion-mode
+ ins_mode_after_after_body = (t) ->
+ if t.type is TYPE_COMMENT
+ insert_comment t, [doc, doc.children.length]
+ return
+ if t.type is TYPE_DOCTYPE or is_space_tok(t) or (t.type is TYPE_START_TAG and t.name is 'html')
+ ins_mode_in_body t
+ return
+ if t.type is TYPE_EOF
+ stop_parsing()
+ return
+ # Anything else
+ parse_error()
+ ins_mode = ins_mode_in_body
+ process_token t
+ return
+
+ # 8.2.5.4.23 http://www.w3.org/TR/html5/syntax.html#the-after-after-frameset-insertion-mode
+ ins_mode_after_after_frameset = (t) ->
+ if t.type is TYPE_COMMENT
+ insert_comment t, [doc, doc.children.length]
+ return
+ if t.type is TYPE_DOCTYPE or is_space_tok(t) or (t.type is TYPE_START_TAG and t.name is 'html')
+ ins_mode_in_body t
+ return
+ if t.type is TYPE_EOF
+ stop_parsing()
+ return
+ if t.type is TYPE_START_TAG and t.name is 'noframes'
+ ins_mode_in_head t
+ return
+ # Anything else
+ parse_error()
+ return
+
+ # 8.2.5.5 http://www.w3.org/TR/html5/syntax.html#parsing-main-inforeign
+ has_color_face_or_size = (t) ->
+ for a in t.attrs_a
+ if a[0] is 'color' or a[0] is 'face' or a[0] is 'size'
+ return true
+ return false
+ in_foreign_content_end_script = ->
+ open_els.shift()
+ # fixfull
+ return
+ in_foreign_content_other_start = (t) ->
+ acn = adjusted_current_node()
+ if acn.namespace is NS_MATHML
+ adjust_mathml_attributes t
+ if acn.namespace is NS_SVG and svg_name_fixes[t.name]?
+ t.name = svg_name_fixes[t.name]
+ if acn.namespace is NS_SVG
+ adjust_svg_attributes t
+ adjust_foreign_attributes t
+ insert_foreign_element t, acn.namespace
+ if t.flag 'self-closing'
+ if t.name is 'script'
+ t.acknowledge_self_closing()
+ in_foreign_content_end_script()
+ # fixfull
+ else
+ open_els.shift()
+ t.acknowledge_self_closing()
+ return
+ in_foreign_content = (t) ->
+ if t.type is TYPE_TEXT and t.text is "\u0000"
+ parse_error()
+ insert_character new_character_token "\ufffd"
+ return
+ if is_space_tok t
+ insert_character t
+ return
+ if t.type is TYPE_TEXT
+ flag_frameset_ok = false
+ insert_character t
+ return
+ if t.type is TYPE_COMMENT
+ insert_comment t
+ return
+ if t.type is TYPE_DOCTYPE
+ parse_error()
+ return
+ if t.type is TYPE_START_TAG and (t.name is 'b' or t.name is 'big' or t.name is 'blockquote' or t.name is 'body' or t.name is 'br' or t.name is 'center' or t.name is 'code' or t.name is 'dd' or t.name is 'div' or t.name is 'dl' or t.name is 'dt' or t.name is 'em' or t.name is 'embed' or t.name is 'h1' or t.name is 'h2' or t.name is 'h3' or t.name is 'h4' or t.name is 'h5' or t.name is 'h6' or t.name is 'head' or t.name is 'hr' or t.name is 'i' or t.name is 'img' or t.name is 'li' or t.name is 'listing' or t.name is 'main' or t.name is 'meta' or t.name is 'nobr' or t.name is 'ol' or t.name is 'p' or t.name is 'pre' or t.name is 'ruby' or t.name is 's' or t.name is 'small' or t.name is 'span' or t.name is 'strong' or t.name is 'strike' or t.name is 'sub' or t.name is 'sup' or t.name is 'table' or t.name is 'tt' or t.name is 'u' or t.name is 'ul' or t.name is 'var' or (t.name is 'font' and has_color_face_or_size(t)))
+ parse_error()
+ if flag_fragment_parsing
+ in_foreign_content_other_start t
+ return
+ loop # is this safe?
+ open_els.shift()
+ if is_mathml_text_integration_point(open_els[0]) or is_html_integration(open_els[0]) or open_els[0].namespace is NS_HTML
+ break
+ process_token t
+ return
+ if t.type is TYPE_START_TAG
+ in_foreign_content_other_start t
+ return
+ if t.type is TYPE_END_TAG and t.name is 'script' and open_els[0].name is 'script' and open_els[0].namespace is NS_SVG
+ in_foreign_content_end_script()
+ return
+ if t.type is TYPE_END_TAG
+ i = 0
+ node = open_els[i]
+ if node.name.toLowerCase() isnt t.name
+ parse_error()
+ loop
+ if node is open_els[open_els.length - 1]
+ return
+ if node.name.toLowerCase() is t.name
+ loop
+ el = open_els.shift()
+ if el is node
+ return
+ i += 1
+ node = open_els[i]
+ if node.namespace is NS_HTML
+ break
+ ins_mode t # explicitly call HTML insertion mode
+ return
+
+
+ # 8.2.4.1 http://www.w3.org/TR/html5/syntax.html#data-state
+ tok_state_data = ->
+ switch c = txt.charAt(cur++)
+ when '&'
+ return new_text_node parse_character_reference()
+ when '<'
+ tok_state = tok_state_tag_open
+ when "\u0000"
+ parse_error()
+ return new_text_node c
+ when '' # EOF
+ return new_eof_token()
+ else
+ return new_text_node c
+ return null
+
+ # 8.2.4.2 http://www.w3.org/TR/html5/syntax.html#character-reference-in-data-state
+ # not needed: tok_state_character_reference_in_data = ->
+ # just call parse_character_reference()
+
+ # 8.2.4.3 http://www.w3.org/TR/html5/syntax.html#rcdata-state
+ tok_state_rcdata = ->
+ switch c = txt.charAt(cur++)
+ when '&'
+ return new_text_node parse_character_reference()
+ when '<'
+ tok_state = tok_state_rcdata_less_than_sign
+ when "\u0000"
+ parse_error()
+ return new_character_token "\ufffd"
+ when '' # EOF
+ return new_eof_token()
+ else
+ return new_character_token c
+ return null
+
+ # 8.2.4.4 http://www.w3.org/TR/html5/syntax.html#character-reference-in-rcdata-state
+ # not needed: tok_state_character_reference_in_rcdata = ->
+ # just call parse_character_reference()
+
+ # 8.2.4.5 http://www.w3.org/TR/html5/syntax.html#rawtext-state
+ tok_state_rawtext = ->
+ switch c = txt.charAt(cur++)
+ when '<'
+ tok_state = tok_state_rawtext_less_than_sign
+ when "\u0000"
+ parse_error()
+ return new_character_token "\ufffd"
+ when '' # EOF
+ return new_eof_token()
+ else
+ return new_character_token c
+ return null
+
+ # 8.2.4.6 http://www.w3.org/TR/html5/syntax.html#script-data-state
+ tok_state_script_data = ->
+ switch c = txt.charAt(cur++)
+ when '<'
+ tok_state = tok_state_script_data_less_than_sign
+ when "\u0000"
+ parse_error()
+ return new_character_token "\ufffd"
+ when '' # EOF
+ return new_eof_token()
+ else
+ return new_character_token c
+ return null
+
+ # 8.2.4.7 http://www.w3.org/TR/html5/syntax.html#plaintext-state
+ tok_state_plaintext = ->
+ switch c = txt.charAt(cur++)
+ when "\u0000"
+ parse_error()
+ return new_character_token "\ufffd"
+ when '' # EOF
+ return new_eof_token()
+ else
+ return new_character_token c
+ return null
+
+
+ # 8.2.4.8 http://www.w3.org/TR/html5/syntax.html#tag-open-state
+ tok_state_tag_open = ->
+ c = txt.charAt(cur++)
+ if c is '!'
+ tok_state = tok_state_markup_declaration_open
+ return
+ if c is '/'
+ tok_state = tok_state_end_tag_open
+ return
+ if is_uc_alpha(c)
+ tok_cur_tag = new_open_tag c.toLowerCase()
+ tok_state = tok_state_tag_name
+ return
+ if is_lc_alpha(c)
+ tok_cur_tag = new_open_tag c
+ tok_state = tok_state_tag_name
+ return
+ if c is '?'
+ parse_error()
+ tok_cur_tag = new_comment_token '?' # FIXME right?
+ tok_state = tok_state_bogus_comment
+ return
+ # Anything else
+ parse_error()
+ tok_state = tok_state_data
+ cur -= 1 # we didn't parse/handle the char after <
+ return new_text_node '<'
+
+ # 8.2.4.9 http://www.w3.org/TR/html5/syntax.html#end-tag-open-state
+ tok_state_end_tag_open = ->
+ c = txt.charAt(cur++)
+ if is_uc_alpha(c)
+ tok_cur_tag = new_end_tag c.toLowerCase()
+ tok_state = tok_state_tag_name
+ return
+ if is_lc_alpha(c)
+ tok_cur_tag = new_end_tag c
+ tok_state = tok_state_tag_name
+ return
+ if c is '>'
+ parse_error()
+ tok_state = tok_state_data
+ return
+ if c is '' # EOF
+ parse_error()
+ tok_state = tok_state_data
+ return new_text_node '</'
+ # Anything else
+ parse_error()
+ tok_cur_tag = new_comment_token c
+ tok_state = tok_state_bogus_comment
+ return null
+
+ # 8.2.4.10 http://www.w3.org/TR/html5/syntax.html#tag-name-state
+ tok_state_tag_name = ->
+ switch c = txt.charAt(cur++)
+ when "\t", "\n", "\u000c", ' '
+ tok_state = tok_state_before_attribute_name
+ when '/'
+ tok_state = tok_state_self_closing_start_tag
+ when '>'
+ tok_state = tok_state_data
+ tmp = tok_cur_tag
+ tok_cur_tag = null
+ return tmp
+ when "\u0000"
+ parse_error()
+ tok_cur_tag.name += "\ufffd"
+ when '' # EOF
+ parse_error()
+ tok_state = tok_state_data
+ else
+ if is_uc_alpha(c)
+ tok_cur_tag.name += c.toLowerCase()
+ else
+ tok_cur_tag.name += c
+ return null
+
+ # 8.2.4.11 http://www.w3.org/TR/html5/syntax.html#rcdata-less-than-sign-state
+ tok_state_rcdata_less_than_sign = ->
+ c = txt.charAt(cur++)
+ if c is '/'
+ temporary_buffer = ''
+ tok_state = tok_state_rcdata_end_tag_open
+ return null
+ # Anything else
+ tok_state = tok_state_rcdata
+ cur -= 1 # reconsume the input character
+ return new_character_token '<'
+
+ # 8.2.4.12 http://www.w3.org/TR/html5/syntax.html#rcdata-end-tag-open-state
+ tok_state_rcdata_end_tag_open = ->
+ c = txt.charAt(cur++)
+ if is_uc_alpha(c)
+ tok_cur_tag = new_end_tag c.toLowerCase()
+ temporary_buffer += c
+ tok_state = tok_state_rcdata_end_tag_name
+ return null
+ if is_lc_alpha(c)
+ tok_cur_tag = new_end_tag c
+ temporary_buffer += c
+ tok_state = tok_state_rcdata_end_tag_name
+ return null
+ # Anything else
+ tok_state = tok_state_rcdata
+ cur -= 1 # reconsume the input character
+ return new_character_token "</" # fixfull separate these
+
+ # http://www.w3.org/TR/html5/syntax.html#appropriate-end-tag-token
+ is_appropriate_end_tag = (t) ->
+ # fixfull: this assumes that open_els[0].name is "the tag name of the last
+ # start tag to have been emitted from this tokenizer"
+ return t.type is TYPE_END_TAG and t.name is open_els[0].name
+
+ # 8.2.4.13 http://www.w3.org/TR/html5/syntax.html#rcdata-end-tag-name-state
+ tok_state_rcdata_end_tag_name = ->
+ c = txt.charAt(cur++)
+ if c is "\t" or c is "\n" or c is "\u000c" or c is ' '
+ if is_appropriate_end_tag tok_cur_tag
+ tok_state = tok_state_before_attribute_name
+ return
+ # else fall through to "Anything else"
+ if c is '/'
+ if is_appropriate_end_tag tok_cur_tag
+ tok_state = tok_state_self_closing_start_tag # FIXME spec typo?
+ return
+ # else fall through to "Anything else"
+ if c is '>'
+ if is_appropriate_end_tag tok_cur_tag
+ tok_state = tok_state_data
+ return tok_cur_tag
+ # else fall through to "Anything else"
+ if is_uc_alpha(c)
+ tok_cur_tag.name += c.toLowerCase()
+ temporary_buffer += c
+ return null
+ if is_lc_alpha(c)
+ tok_cur_tag.name += c
+ temporary_buffer += c
+ return null
+ # Anything else
+ tok_state = tok_state_rcdata
+ cur -= 1 # reconsume the input character
+ return new_character_token '</' + temporary_buffer # fixfull separate these
+
+ # 8.2.4.14 http://www.w3.org/TR/html5/syntax.html#rawtext-less-than-sign-state
+ tok_state_rawtext_less_than_sign = ->
+ c = txt.charAt(cur++)
+ if c is '/'
+ temporary_buffer = ''
+ tok_state = tok_state_rawtext_end_tag_open
+ return null
+ # Anything else
+ tok_state = tok_state_rawtext
+ cur -= 1 # reconsume the input character
+ return new_character_token '<'
+
+ # 8.2.4.15 http://www.w3.org/TR/html5/syntax.html#rawtext-end-tag-open-state
+ tok_state_rawtext_end_tag_open = ->
+ c = txt.charAt(cur++)
+ if is_uc_alpha(c)
+ tok_cur_tag = new_end_tag c.toLowerCase()
+ temporary_buffer += c
+ tok_state = tok_state_rawtext_end_tag_name
+ return null
+ if is_lc_alpha(c)
+ tok_cur_tag = new_end_tag c
+ temporary_buffer += c
+ tok_state = tok_state_rawtext_end_tag_name
+ return null
+ # Anything else
+ tok_state = tok_state_rawtext
+ cur -= 1 # reconsume the input character
+ return new_character_token "</" # fixfull separate these
+
+ # 8.2.4.16 http://www.w3.org/TR/html5/syntax.html#rawtext-end-tag-name-state
+ tok_state_rawtext_end_tag_name = ->
+ c = txt.charAt(cur++)
+ if c is "\t" or c is "\n" or c is "\u000c" or c is ' '
+ if is_appropriate_end_tag tok_cur_tag
+ tok_state = tok_state_before_attribute_name
+ return
+ # else fall through to "Anything else"
+ if c is '/'
+ if is_appropriate_end_tag tok_cur_tag
+ tok_state = tok_state_self_closing_start_tag
+ return
+ # else fall through to "Anything else"
+ if c is '>'
+ if is_appropriate_end_tag tok_cur_tag
+ tok_state = tok_state_data
+ return tok_cur_tag
+ # else fall through to "Anything else"
+ if is_uc_alpha(c)
+ tok_cur_tag.name += c.toLowerCase()
+ temporary_buffer += c
+ return null
+ if is_lc_alpha(c)
+ tok_cur_tag.name += c
+ temporary_buffer += c
+ return null
+ # Anything else
+ tok_state = tok_state_rawtext
+ cur -= 1 # reconsume the input character
+ return new_character_token '</' + temporary_buffer # fixfull separate these
+
+ # 8.2.4.17 http://www.w3.org/TR/html5/syntax.html#script-data-less-than-sign-state
+ tok_state_script_data_less_than_sign = ->
+ c = txt.charAt(cur++)
+ if c is '/'
+ temporary_buffer = ''
+ tok_state = tok_state_script_data_end_tag_open
+ return
+ if c is '!'
+ tok_state = tok_state_script_data_escape_start
+ return new_character_token '<!' # fixfull split
+ # Anything else
+ tok_state = tok_state_script_data
+ cur -= 1 # Reconsume
+ return new_character_token '<'
+
+ # 8.2.4.18 http://www.w3.org/TR/html5/syntax.html#script-data-end-tag-open-state
+ tok_state_script_data_end_tag_open = ->
+ c = txt.charAt(cur++)
+ if is_uc_alpha(c)
+ tok_cur_tag = new_end_tag c.toLowerCase()
+ temporary_buffer += c
+ tok_state = tok_state_script_data_end_tag_name
+ return
+ if is_lc_alpha(c)
+ tok_cur_tag = new_end_tag c
+ temporary_buffer += c
+ tok_state = tok_state_script_data_end_tag_name
+ return
+ # Anything else
+ tok_state = tok_state_script_data
+ cur -= 1 # Reconsume
+ return new_character_token '</'
+
+ # 8.2.4.19 http://www.w3.org/TR/html5/syntax.html#script-data-end-tag-open-state
+ tok_state_script_data_end_tag_name = ->
+ c = txt.charAt(cur++)
+ if c is "\t" or c is "\n" or c is "\u000c" or c is ' '
+ if is_appropriate_end_tag tok_cur_tag
+ tok_state = tok_state_before_attribute_name
+ return
+ # fall through
+ if c is '/'
+ if is_appropriate_end_tag tok_cur_tag
+ tok_state = tok_state_self_closing_start_tag
+ return
+ # fall through
+ if c is '>'
+ if is_appropriate_end_tag tok_cur_tag
+ tok_state = tok_state_data
+ return tok_cur_tag
+ # fall through
+ if is_uc_alpha(c)
+ tok_cur_tag.name += c.toLowerCase()
+ temporary_buffer += c
+ return
+ if is_lc_alpha(c)
+ tok_cur_tag.name += c
+ temporary_buffer += c
+ return
+ # Anything else
+ tok_state = tok_state_script_data
+ cur -= 1 # Reconsume
+ return new_character_token "</#{temporary_buffer}" # fixfull split
+
+ # 8.2.4.20 http://www.w3.org/TR/html5/syntax.html#script-data-escape-start-state
+ tok_state_script_data_escape_start = ->
+ c = txt.charAt(cur++)
+ if c is '-'
+ tok_state = tok_state_script_data_escape_start_dash
+ return new_character_token '-'
+ # Anything else
+ tok_state = tok_state_script_data
+ cur -= 1 # Reconsume
+ return
+
+ # 8.2.4.21 http://www.w3.org/TR/html5/syntax.html#script-data-escape-start-dash-state
+ tok_state_script_data_escape_start_dash = ->
+ c = txt.charAt(cur++)
+ if c is '-'
+ tok_state = tok_state_script_data_escaped_dash_dash
+ return new_character_token '-'
+ # Anything else
+ tok_state = tok_state_script_data
+ cur -= 1 # Reconsume
+ return
+
+ # 8.2.4.22 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-state
+ tok_state_script_data_escaped = ->
+ c = txt.charAt(cur++)
+ if c is '-'
+ tok_state = tok_state_script_data_escaped_dash
+ return new_character_token '-'
+ if c is '<'
+ tok_state = tok_state_script_data_escaped_less_than_sign
+ return
+ if c is "\u0000"
+ parse_error()
+ return new_character_token "\ufffd"
+ if c is '' # EOF
+ tok_state = tok_state_data
+ parse_error()
+ cur -= 1 # Reconsume
+ return
+ # Anything else
+ return new_character_token c
+
+ # 8.2.4.23 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-dash-state
+ tok_state_script_data_escaped_dash = ->
+ c = txt.charAt(cur++)
+ if c is '-'
+ tok_state = tok_state_script_data_escaped_dash_dash
+ return new_character_token '-'
+ if c is '<'
+ tok_state = tok_state_script_data_escaped_less_than_sign
+ return
+ if c is "\u0000"
+ parse_error()
+ tok_state = tok_state_script_data_escaped
+ return new_character_token "\ufffd"
+ if c is '' # EOF
+ tok_state = tok_state_data
+ parse_error()
+ cur -= 1 # Reconsume
+ return
+ # Anything else
+ tok_state = tok_state_script_data_escaped
+ return new_character_token c
+
+ # 8.2.4.24 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-dash-dash-state
+ tok_state_script_data_escaped_dash_dash = ->
+ c = txt.charAt(cur++)
+ if c is '-'
+ return new_character_token '-'
+ if c is '<'
+ tok_state = tok_state_script_data_escaped_less_than_sign
+ return
+ if c is '>'
+ tok_state = tok_state_script_data
+ return new_character_token '>'
+ if c is "\u0000"
+ parse_error()
+ tok_state = tok_state_script_data_escaped
+ return new_character_token "\ufffd"
+ if c is '' # EOF
+ parse_error()
+ tok_state = tok_state_data
+ cur -= 1 # Reconsume
+ return
+ # Anything else
+ tok_state = tok_state_script_data_escaped
+ return new_character_token c
+
+ # 8.2.4.25 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-less-than-sign-state
+ tok_state_script_data_escaped_less_than_sign = ->
+ c = txt.charAt(cur++)
+ if c is '/'
+ temporary_buffer = ''
+ tok_state = tok_state_script_data_escaped_end_tag_open
+ return
+ if is_uc_alpha(c)
+ temporary_buffer = c.toLowerCase() # yes, really
+ tok_state = tok_state_script_data_double_escape_start
+ return new_character_token "<#{c}" # fixfull split
+ if is_lc_alpha(c)
+ temporary_buffer = c
+ tok_state = tok_state_script_data_double_escape_start
+ return new_character_token "<#{c}" # fixfull split
+ # Anything else
+ tok_state = tok_state_script_data_escaped
+ cur -= 1 # Reconsume
+ return new_character_token '<'
+
+ # 8.2.4.26 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-end-tag-open-state
+ tok_state_script_data_escaped_end_tag_open = ->
+ c = txt.charAt(cur++)
+ if is_uc_alpha(c)
+ tok_cur_tag = new_end_tag c.toLowerCase()
+ temporary_buffer += c
+ tok_state = tok_state_script_data_escaped_end_tag_name
+ return
+ if is_lc_alpha(c)
+ tok_cur_tag = new_end_tag c
+ temporary_buffer += c
+ tok_state = tok_state_script_data_escaped_end_tag_name
+ return
+ # Anything else
+ tok_state = tok_state_script_data_escaped
+ cur -= 1 # Reconsume
+ return new_character_token '</' # fixfull split
+
+ # 8.2.4.27 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-end-tag-name-state
+ tok_state_script_data_escaped_end_tag_name = ->
+ c = txt.charAt(cur++)
+ if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
+ if is_appropriate_end_tag tok_cur_tag
+ tok_state = tok_state_before_attribute_name
+ return
+ # fall through
+ if c is '/'
+ if is_appropriate_end_tag tok_cur_tag
+ tok_state = tok_state_self_closing_start_tag
+ return
+ # fall through
+ if c is '>'
+ if is_appropriate_end_tag tok_cur_tag
+ tok_state = tok_state_data
+ return tok_cur_tag
+ # fall through
+ if is_uc_alpha(c)
+ tok_cur_tag.name += c.toLowerCase()
+ temporary_buffer += c.toLowerCase()
+ return
+ if is_lc_alpha(c)
+ tok_cur_tag.name += c
+ temporary_buffer += c.toLowerCase()
+ return
+ # Anything else
+ tok_state = tok_state_script_data_escaped
+ cur -= 1 # Reconsume
+ return new_character_token "</#{temporary_buffer}" # fixfull split
+
+ # 8.2.4.28 http://www.w3.org/TR/html5/syntax.html#script-data-double-escape-start-state
+ tok_state_script_data_double_escape_start = ->
+ c = txt.charAt(cur++)
+ if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' ' or c is '/' or c is '>'
+ if temporary_buffer is 'script'
+ tok_state = tok_state_script_data_double_escaped
+ else
+ tok_state = tok_state_script_data_escaped
+ return new_character_token c
+ if is_uc_alpha(c)
+ temporary_buffer += c.toLowerCase() # yes, really lowercase
+ return new_character_token c
+ if is_lc_alpha(c)
+ temporary_buffer += c
+ return new_character_token c
+ # Anything else
+ tok_state = tok_state_script_data_escaped
+ cur -= 1 # Reconsume
+ return
+
+ # 8.2.4.29 http://www.w3.org/TR/html5/syntax.html#script-data-double-escaped-state
+ tok_state_script_data_double_escaped = ->
+ c = txt.charAt(cur++)
+ if c is '-'
+ tok_state = tok_state_script_data_double_escaped_dash
+ return new_character_token '-'
+ if c is '<'
+ tok_state = tok_state_script_data_double_escaped_less_than_sign
+ return new_character_token '<'
+ if c is "\u0000"
+ parse_error()
+ return new_character_token "\ufffd"
+ if c is '' # EOF
+ parse_error()
+ tok_state = tok_state_data
+ cur -= 1 # Reconsume
+ return
+ # Anything else
+ return new_character_token c
+
+ # 8.2.4.30 http://www.w3.org/TR/html5/syntax.html#script-data-double-escaped-dash-state
+ tok_state_script_data_double_escaped_dash = ->
+ c = txt.charAt(cur++)
+ if c is '-'
+ tok_state = tok_state_script_data_double_escaped_dash_dash
+ return new_character_token '-'
+ if c is '<'
+ tok_state = tok_state_script_data_double_escaped_less_than_sign
+ return new_character_token '<'
+ if c is "\u0000"
+ parse_error()
+ tok_state = tok_state_script_data_double_escaped
+ return new_character_token "\ufffd"
+ if c is '' # EOF
+ parse_error()
+ tok_state = tok_state_data
+ cur -= 1 # Reconsume
+ return
+ # Anything else
+ tok_state = tok_state_script_data_double_escaped
+ return new_character_token c
+
+ # 8.2.4.31 http://www.w3.org/TR/html5/syntax.html#script-data-double-escaped-dash-dash-state
+ tok_state_script_data_double_escaped_dash_dash = ->
+ c = txt.charAt(cur++)
+ if c is '-'
+ return new_character_token '-'
+ if c is '<'
+ tok_state = tok_state_script_data_double_escaped_less_than_sign
+ return new_character_token '<'
+ if c is '>'
+ tok_state = tok_state_script_data
+ return new_character_token '>'
+ if c is "\u0000"
+ parse_error()
+ tok_state = tok_state_script_data_double_escaped
+ return new_character_token "\ufffd"
+ if c is '' # EOF
+ parse_error()
+ tok_state = tok_state_data
+ cur -= 1 # Reconsume
+ return
+ # Anything else
+ tok_state = tok_state_script_data_double_escaped
+ return new_character_token c
+
+ # 8.2.4.32 http://www.w3.org/TR/html5/syntax.html#script-data-double-escaped-less-than-sign-state
+ tok_state_script_data_double_escaped_less_than_sign = ->
+ c = txt.charAt(cur++)
+ if c is '/'
+ temporary_buffer = ''
+ tok_state = tok_state_script_data_double_escape_end
+ return new_character_token '/'
+ # Anything else
+ tok_state = tok_state_script_data_double_escaped
+ cur -= 1 # Reconsume
+ return
+
+ # 8.2.4.33 http://www.w3.org/TR/html5/syntax.html#script-data-double-escape-end-state
+ tok_state_script_data_double_escape_end = ->
+ c = txt.charAt(cur++)
+ if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' ' or c is '/' or c is '>'
+ if temporary_buffer is 'script'
+ tok_state = tok_state_script_data_escaped
+ else
+ tok_state = tok_state_script_data_double_escaped
+ return new_character_token c
+ if is_uc_alpha(c)
+ temporary_buffer += c.toLowerCase() # yes, really lowercase
+ return new_character_token c
+ if is_lc_alpha(c)
+ temporary_buffer += c
+ return new_character_token c
+ # Anything else
+ tok_state = tok_state_script_data_double_escaped
+ cur -= 1 # Reconsume
+ return
+
+ # 8.2.4.34 http://www.w3.org/TR/html5/syntax.html#before-attribute-name-state
+ tok_state_before_attribute_name = ->
+ attr_name = null
+ switch c = txt.charAt(cur++)
+ when "\t", "\n", "\u000c", ' '
+ return null
+ when '/'
+ tok_state = tok_state_self_closing_start_tag
+ return null
+ when '>'
+ tok_state = tok_state_data
+ tmp = tok_cur_tag
+ tok_cur_tag = null
+ return tmp
+ when "\u0000"
+ parse_error()
+ attr_name = "\ufffd"
+ when '"', "'", '<', '='
+ parse_error()
+ attr_name = c
+ when '' # EOF
+ parse_error()
+ tok_state = tok_state_data
+ else
+ if is_uc_alpha(c)
+ attr_name = c.toLowerCase()
+ else
+ attr_name = c
+ if attr_name?
+ tok_cur_tag.attrs_a.unshift [attr_name, '']
+ tok_state = tok_state_attribute_name
+ return null
+
+ # 8.2.4.35 http://www.w3.org/TR/html5/syntax.html#attribute-name-state
+ tok_state_attribute_name = ->
+ switch c = txt.charAt(cur++)
+ when "\t", "\n", "\u000c", ' '
+ tok_state = tok_state_after_attribute_name
+ when '/'
+ tok_state = tok_state_self_closing_start_tag
+ when '='
+ tok_state = tok_state_before_attribute_value
+ when '>'
+ tok_state = tok_state_data
+ tmp = tok_cur_tag
+ tok_cur_tag = null
+ return tmp
+ when "\u0000"
+ parse_error()
+ tok_cur_tag.attrs_a[0][0] += "\ufffd"
+ when '"', "'", '<'
+ parse_error()
+ tok_cur_tag.attrs_a[0][0] += c
+ when '' # EOF
+ parse_error()
+ tok_state = tok_state_data
+ else
+ if is_uc_alpha(c)
+ tok_cur_tag.attrs_a[0][0] += c.toLowerCase()
+ else
+ tok_cur_tag.attrs_a[0][0] += c
+ return null
+
+ # 8.2.4.36 http://www.w3.org/TR/html5/syntax.html#after-attribute-name-state
+ tok_state_after_attribute_name = ->
+ c = txt.charAt(cur++)
+ if c is "\t" or c is "\n" or c is "\u000c" or c is ' '
+ return
+ if c is '/'
+ tok_state = tok_state_self_closing_start_tag
+ return
+ if c is '='
+ tok_state = tok_state_before_attribute_value
+ return
+ if c is '>'
+ tok_state = tok_state_data
+ return tok_cur_tag
+ if is_uc_alpha(c)
+ tok_cur_tag.attrs_a.unshift [c.toLowerCase(), '']
+ tok_state = tok_state_attribute_name
+ return
+ if c is "\u0000"
+ parse_error()
+ tok_cur_tag.attrs_a.unshift ["\ufffd", '']
+ tok_state = tok_state_attribute_name
+ return
+ if c is '' # EOF
+ parse_error()
+ tok_state = tok_state_data
+ cur -= 1 # reconsume
+ return
+ if c is '"' or c is "'" or c is '<'
+ parse_error()
+ # fall through to Anything else
+ # Anything else
+ tok_cur_tag.attrs_a.unshift [c, '']
+ tok_state = tok_state_attribute_name
+ return
+
+ # 8.2.4.37 http://www.w3.org/TR/html5/syntax.html#before-attribute-value-state
+ tok_state_before_attribute_value = ->
+ switch c = txt.charAt(cur++)
+ when "\t", "\n", "\u000c", ' '
+ return null
+ when '"'
+ tok_state = tok_state_attribute_value_double_quoted
+ when '&'
+ tok_state = tok_state_attribute_value_unquoted
+ cur -= 1
+ when "'"
+ tok_state = tok_state_attribute_value_single_quoted
+ when "\u0000"
+ # Parse error
+ tok_cur_tag.attrs_a[0][1] += "\ufffd"
+ tok_state = tok_state_attribute_value_unquoted
+ when '>'
+ # Parse error
+ tok_state = tok_state_data
+ tmp = tok_cur_tag
+ tok_cur_tag = null
+ return tmp
+ when '' # EOF
+ parse_error()
+ tok_state = tok_state_data
+ else
+ tok_cur_tag.attrs_a[0][1] += c
+ tok_state = tok_state_attribute_value_unquoted
+ return null
+
+ # 8.2.4.38 http://www.w3.org/TR/html5/syntax.html#attribute-value-(double-quoted)-state
+ tok_state_attribute_value_double_quoted = ->
+ switch c = txt.charAt(cur++)
+ when '"'
+ tok_state = tok_state_after_attribute_value_quoted
+ when '&'
+ tok_cur_tag.attrs_a[0][1] += parse_character_reference '"', true
+ when "\u0000"
+ # Parse error
+ tok_cur_tag.attrs_a[0][1] += "\ufffd"
+ when '' # EOF
+ parse_error()
+ tok_state = tok_state_data
+ else
+ tok_cur_tag.attrs_a[0][1] += c
+ return null
+
+ # 8.2.4.39 http://www.w3.org/TR/html5/syntax.html#attribute-value-(single-quoted)-state
+ tok_state_attribute_value_single_quoted = ->
+ switch c = txt.charAt(cur++)
+ when "'"
+ tok_state = tok_state_after_attribute_value_quoted
+ when '&'
+ tok_cur_tag.attrs_a[0][1] += parse_character_reference "'", true
+ when "\u0000"
+ # Parse error
+ tok_cur_tag.attrs_a[0][1] += "\ufffd"
+ when '' # EOF
+ parse_error()
+ tok_state = tok_state_data
+ else
+ tok_cur_tag.attrs_a[0][1] += c
+ return null
+
+ # 8.2.4.40 http://www.w3.org/TR/html5/syntax.html#attribute-value-(unquoted)-state
+ tok_state_attribute_value_unquoted = ->
+ switch c = txt.charAt(cur++)
+ when "\t", "\n", "\u000c", ' '
+ tok_state = tok_state_before_attribute_name
+ when '&'
+ tok_cur_tag.attrs_a[0][1] += parse_character_reference '>', true
+ when '>'
+ tok_state = tok_state_data
+ tmp = tok_cur_tag
+ tok_cur_tag = null
+ return tmp
+ when "\u0000"
+ tok_cur_tag.attrs_a[0][1] += "\ufffd"
+ when '' # EOF
+ parse_error()
+ tok_state = tok_state_data
+ else
+ # Parse Error if ', <, = or ` (backtick)
+ tok_cur_tag.attrs_a[0][1] += c
+ return null
+
+ # 8.2.4.42 http://www.w3.org/TR/html5/syntax.html#after-attribute-value-(quoted)-state
+ tok_state_after_attribute_value_quoted = ->
+ switch c = txt.charAt(cur++)
+ when "\t", "\n", "\u000c", ' '
+ tok_state = tok_state_before_attribute_name
+ when '/'
+ tok_state = tok_state_self_closing_start_tag
+ when '>'
+ tok_state = tok_state_data
+ tmp = tok_cur_tag
+ tok_cur_tag = null
+ return tmp
+ when '' # EOF