+ # 8.2.4.52 http://www.w3.org/TR/html5/syntax.html#doctype-state
+ tok_state_doctype = ->
+ switch c = txt.charAt(cur++)
+ when "\t", "\u000a", "\u000c", ' '
+ tok_state = tok_state_before_doctype_name
+ when '' # EOF
+ parse_error()
+ tok_state = tok_state_data
+ el = new_doctype_token ''
+ el.flag 'force-quirks', true
+ cur -= 1 # Reconsume
+ return el
+ else
+ parse_error()
+ tok_state = tok_state_before_doctype_name
+ cur -= 1 # Reconsume
+ return null
+
+ # 8.2.4.52 http://www.w3.org/TR/html5/syntax.html#doctype-state
+ tok_state_before_doctype_name = ->
+ c = txt.charAt(cur++)
+ if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
+ return
+ if uc_alpha.indexOf(c) > -1
+ tok_cur_tag = new_doctype_token c.toLowerCase()
+ tok_state = tok_state_doctype_name
+ return
+ if c is "\u0000"
+ parse_error()
+ tok_cur_tag = new_doctype_token "\ufffd"
+ tok_state = tok_state_doctype_name
+ return
+ if c is '>'
+ parse_error()
+ el = new_doctype_token ''
+ el.flag 'force-quirks', true
+ tok_state = tok_state_data
+ return el
+ if c is '' # EOF
+ parse_error()
+ tok_state = tok_state_data
+ el = new_doctype_token ''
+ el.flag 'force-quirks', true
+ cur -= 1 # Reconsume
+ return el
+ # Anything else
+ tok_cur_tag = new_doctype_token c
+ tok_state = tok_state_doctype_name
+ return null
+
+ # 8.2.4.54 http://www.w3.org/TR/html5/syntax.html#doctype-name-state
+ tok_state_doctype_name = ->
+ c = txt.charAt(cur++)
+ if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
+ tok_state = tok_state_after_doctype_name
+ return
+ if c is '>'
+ tok_state = tok_state_data
+ return tok_cur_tag
+ if uc_alpha.indexOf(c) > -1
+ tok_cur_tag.name += c.toLowerCase()
+ return
+ if c is "\u0000"
+ parse_error()
+ tok_cur_tag.name += "\ufffd"
+ return
+ if c is '' # EOF
+ parse_error()
+ tok_state = tok_state_data
+ tok_cur_tag.flag 'force-quirks', true
+ cur -= 1 # Reconsume
+ return tok_cur_tag
+ # Anything else
+ tok_cur_tag.name += c
+ return null
+
+ # 8.2.4.55 http://www.w3.org/TR/html5/syntax.html#after-doctype-name-state
+ tok_state_after_doctype_name = ->
+ c = txt.charAt(cur++)
+ if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
+ return
+ if c is '>'
+ tok_state = tok_state_data
+ return tok_cur_tag
+ if c is '' # EOF
+ parse_error()
+ tok_state = tok_state_data
+ tok_cur_tag.flag 'force-quirks', true
+ cur -= 1 # Reconsume
+ return tok_cur_tag
+ # Anything else
+ if txt.substr(cur - 1, 6).toLowerCase() is 'public'
+ cur += 5
+ tok_state = tok_state_after_doctype_public_keyword
+ return
+ if txt.substr(cur - 1, 6).toLowerCase() is 'system'
+ cur += 5
+ tok_state = tok_state_after_doctype_system_keyword
+ return
+ parse_error()
+ tok_cur_tag.flag 'force-quirks', true
+ tok_state = tok_state_bogus_doctype
+ return null
+
+ # 8.2.4.56 http://www.w3.org/TR/html5/syntax.html#after-doctype-public-keyword-state
+ tok_state_after_doctype_public_keyword = ->
+ c = txt.charAt(cur++)
+ if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
+ tok_state = tok_state_before_doctype_public_identifier
+ return
+ if c is '"'
+ parse_error()
+ tok_cur_tag.public_identifier = '' # FIXME should this go in @attrs or @text?
+ tok_state = tok_state_doctype_public_identifier_double_quoted
+ return
+ if c is "'"
+ parse_error()
+ tok_cur_tag.public_identifier = '' # FIXME should this go in @attrs or @text?
+ tok_state = tok_state_doctype_public_identifier_single_quoted
+ return
+ if c is '>'
+ parse_error()
+ tok_cur_tag.flag 'force-quirks', true
+ tok_state = tok_state_data
+ return tok_cur_tag
+ if c is '' # EOF
+ parse_error()
+ tok_state = tok_state_data
+ tok_cur_tag.flag 'force-quirks', true
+ cur -= 1 # Reconsume
+ return tok_cur_tag
+ # Anything else
+ parse_error()
+ tok_cur_tag.flag 'force-quirks', true
+ tok_state = tok_state_bogus_doctype
+ return null
+
+ # 8.2.4.57 http://www.w3.org/TR/html5/syntax.html#before-doctype-public-identifier-state
+ tok_state_before_doctype_public_identifier = ->
+ c = txt.charAt(cur++)
+ if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
+ return
+ if c is '"'
+ parse_error()
+ tok_cur_tag.public_identifier = '' # FIXME should this go in @attrs or @text?
+ tok_state = tok_state_doctype_public_identifier_double_quoted
+ return
+ if c is "'"
+ parse_error()
+ tok_cur_tag.public_identifier = '' # FIXME should this go in @attrs or @text?
+ tok_state = tok_state_doctype_public_identifier_single_quoted
+ return
+ if c is '>'
+ parse_error()
+ tok_cur_tag.flag 'force-quirks', true
+ tok_state = tok_state_data
+ return tok_cur_tag
+ if c is '' # EOF
+ parse_error()
+ tok_state = tok_state_data
+ tok_cur_tag.flag 'force-quirks', true
+ cur -= 1 # Reconsume
+ return tok_cur_tag
+ # Anything else
+ parse_error()
+ tok_cur_tag.flag 'force-quirks', true
+ tok_state = tok_state_bogus_doctype
+ return null
+
+
+ # 8.2.4.58 http://www.w3.org/TR/html5/syntax.html#doctype-public-identifier-(double-quoted)-state
+ tok_state_doctype_public_identifier_double_quoted = ->
+ c = txt.charAt(cur++)
+ if c is '"'
+ tok_state = tok_state_after_doctype_public_identifier
+ return
+ if c is "\u0000"
+ parse_error()
+ tok_cur_tag.public_identifier += "\ufffd"
+ return
+ if c is '>'
+ parse_error()
+ tok_cur_tag.flag 'force-quirks', true
+ tok_state = tok_state_data
+ return tok_cur_tag
+ if c is '' # EOF
+ parse_error()
+ tok_state = tok_state_data
+ tok_cur_tag.flag 'force-quirks', true
+ cur -= 1 # Reconsume
+ return tok_cur_tag
+ # Anything else
+ tok_cur_tag.public_identifier += c
+ return null
+
+ # 8.2.4.59 http://www.w3.org/TR/html5/syntax.html#doctype-public-identifier-(single-quoted)-state
+ tok_state_doctype_public_identifier_single_quoted = ->
+ c = txt.charAt(cur++)
+ if c is "'"
+ tok_state = tok_state_after_doctype_public_identifier
+ return
+ if c is "\u0000"
+ parse_error()
+ tok_cur_tag.public_identifier += "\ufffd"
+ return
+ if c is '>'
+ parse_error()
+ tok_cur_tag.flag 'force-quirks', true
+ tok_state = tok_state_data
+ return tok_cur_tag
+ if c is '' # EOF
+ parse_error()
+ tok_state = tok_state_data
+ tok_cur_tag.flag 'force-quirks', true
+ cur -= 1 # Reconsume
+ return tok_cur_tag
+ # Anything else
+ tok_cur_tag.public_identifier += c
+ return null
+
+ # 8.2.4.60 http://www.w3.org/TR/html5/syntax.html#after-doctype-public-identifier-state
+ tok_state_after_doctype_public_identifier = ->
+ c = txt.charAt(cur++)
+ if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
+ tok_state = tok_state_between_doctype_public_and_system_identifiers
+ return
+ if c is '>'
+ tok_state = tok_state_data
+ return tok_cur_tag
+ if c is '"'
+ parse_error()
+ tok_cur_tag.system_identifier = ''
+ tok_state = tok_state_doctype_system_identifier_double_quoted
+ return
+ if c is "'"
+ parse_error()
+ tok_cur_tag.system_identifier = ''
+ tok_state = tok_state_doctype_system_identifier_single_quoted
+ return
+ if c is '' # EOF
+ parse_error()
+ tok_state = tok_state_data
+ tok_cur_tag.flag 'force-quirks', true
+ cur -= 1 # Reconsume
+ return tok_cur_tag
+ # Anything else
+ parse_error()
+ tok_cur_tag.flag 'force-quirks', true
+ tok_state = tok_state_bogus_doctype
+ return null
+
+ # 8.2.4.61 http://www.w3.org/TR/html5/syntax.html#between-doctype-public-and-system-identifiers-state
+ tok_state_between_doctype_public_and_system_identifiers = ->
+ c = txt.charAt(cur++)
+ if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
+ return
+ if c is '>'
+ tok_state = tok_state_data
+ return tok_cur_tag
+ if c is '"'
+ parse_error()
+ tok_cur_tag.system_identifier = ''
+ tok_state = tok_state_doctype_system_identifier_double_quoted
+ return
+ if c is "'"
+ parse_error()
+ tok_cur_tag.system_identifier = ''
+ tok_state = tok_state_doctype_system_identifier_single_quoted
+ return
+ if c is '' # EOF
+ parse_error()
+ tok_state = tok_state_data
+ tok_cur_tag.flag 'force-quirks', true
+ cur -= 1 # Reconsume
+ return tok_cur_tag
+ # Anything else
+ parse_error()
+ tok_cur_tag.flag 'force-quirks', true
+ tok_state = tok_state_bogus_doctype
+ return null
+
+ # 8.2.4.62 http://www.w3.org/TR/html5/syntax.html#after-doctype-system-keyword-state
+ tok_state_after_doctype_system_keyword = ->
+ c = txt.charAt(cur++)
+ if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
+ tok_state = tok_state_before_doctype_system_identifier
+ return
+ if c is '"'
+ parse_error()
+ tok_cur_tag.system_identifier = ''
+ tok_state = tok_state_doctype_system_identifier_double_quoted
+ return
+ if c is "'"
+ parse_error()
+ tok_cur_tag.system_identifier = ''
+ tok_state = tok_state_doctype_system_identifier_single_quoted
+ return
+ if c is '>'
+ parse_error()
+ tok_cur_tag.flag 'force-quirks', true
+ tok_state = tok_state_data
+ return tok_cur_tag
+ if c is '' # EOF
+ parse_error()
+ tok_state = tok_state_data
+ tok_cur_tag.flag 'force-quirks', true
+ cur -= 1 # Reconsume
+ return tok_cur_tag
+ # Anything else
+ parse_error()
+ tok_cur_tag.flag 'force-quirks', true
+ tok_state = tok_state_bogus_doctype
+ return null
+
+ # 8.2.4.63 http://www.w3.org/TR/html5/syntax.html#before-doctype-system-identifier-state
+ tok_state_before_doctype_system_identifier = ->
+ c = txt.charAt(cur++)
+ if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
+ return
+ if c is '"'
+ tok_cur_tag.system_identifier = ''
+ tok_state = tok_state_doctype_system_identifier_double_quoted
+ return
+ if c is "'"
+ tok_cur_tag.system_identifier = ''
+ tok_state = tok_state_doctype_system_identifier_single_quoted
+ return
+ if c is '>'
+ parse_error()
+ tok_cur_tag.flag 'force-quirks', true
+ tok_state = tok_state_data
+ return tok_cur_tag
+ if c is '' # EOF
+ parse_error()
+ tok_state = tok_state_data
+ tok_cur_tag.flag 'force-quirks', true
+ cur -= 1 # Reconsume
+ return tok_cur_tag
+ # Anything else
+ parse_error()
+ tok_cur_tag.flag 'force-quirks', true
+ tok_state = tok_state_bogus_doctype
+ return null
+
+ # 8.2.4.64 http://www.w3.org/TR/html5/syntax.html#doctype-system-identifier-(double-quoted)-state
+ tok_state_doctype_system_identifier_double_quoted = ->
+ c = txt.charAt(cur++)
+ if c is '"'
+ tok_state = tok_state_after_doctype_system_identifier
+ return
+ if c is "\u0000"
+ parse_error()
+ tok_cur_tag.system_identifier += "\ufffd"
+ return
+ if c is '>'
+ parse_error()
+ tok_cur_tag.flag 'force-quirks', true
+ tok_state = tok_state_data
+ return tok_cur_tag
+ if c is '' # EOF
+ parse_error()
+ tok_state = tok_state_data
+ tok_cur_tag.flag 'force-quirks', true
+ cur -= 1 # Reconsume
+ return tok_cur_tag
+ # Anything else
+ tok_cur_tag.system_identifier += c
+ return null
+
+ # 8.2.4.65 http://www.w3.org/TR/html5/syntax.html#doctype-system-identifier-(single-quoted)-state
+ tok_state_doctype_system_identifier_single_quoted = ->
+ c = txt.charAt(cur++)
+ if c is "'"
+ tok_state = tok_state_after_doctype_system_identifier
+ return
+ if c is "\u0000"
+ parse_error()
+ tok_cur_tag.system_identifier += "\ufffd"
+ return
+ if c is '>'
+ parse_error()
+ tok_cur_tag.flag 'force-quirks', true
+ tok_state = tok_state_data
+ return tok_cur_tag
+ if c is '' # EOF
+ parse_error()
+ tok_state = tok_state_data
+ tok_cur_tag.flag 'force-quirks', true
+ cur -= 1 # Reconsume
+ return tok_cur_tag
+ # Anything else
+ tok_cur_tag.system_identifier += c
+ return null
+
+ # 8.2.4.66 http://www.w3.org/TR/html5/syntax.html#after-doctype-system-identifier-state
+ tok_state_after_doctype_system_identifier = ->
+ c = txt.charAt(cur++)
+ if c is "\t" or c is "\u000a" or c is "\u000c" or c is ' '
+ return
+ if c is '>'
+ tok_state = tok_state_data
+ return tok_cur_tag
+ if c is '' # EOF
+ parse_error()
+ tok_state = tok_state_data
+ tok_cur_tag.flag 'force-quirks', true
+ cur -= 1 # Reconsume
+ return tok_cur_tag
+ # Anything else
+ parse_error()
+ # do _not_ tok_cur_tag.flag 'force-quirks', true
+ tok_state = tok_state_bogus_doctype
+ return null
+
+ # 8.2.4.67 http://www.w3.org/TR/html5/syntax.html#bogus-doctype-state
+ tok_state_bogus_doctype = ->
+ c = txt.charAt(cur++)
+ if c is '>'
+ tok_state = tok_state_data
+ return tok_cur_tag
+ if c is '' # EOF
+ tok_state = tok_state_data
+ cur -= 1 # Reconsume
+ return tok_cur_tag
+ # Anything else
+ return null
+