new_text_node = (txt) ->
return new Node TYPE_TEXT, text: txt
new_character_token = new_text_node
-new_comment_node = (txt) ->
+new_comment_token = (txt) ->
return new Node TYPE_COMMENT, text: txt
new_eof_token = ->
return new Node TYPE_EOF
pending_table_character_tokens = null
head_element_pointer = null
flag_fragment_parsing = null
+ context_element = null
stop_parsing = ->
flag_parsing = false
node = open_els[node_i]
# 19. Return to the step labeled loop.
+ # 8.2.3.2
+
+ # http://www.w3.org/TR/html5/syntax.html#adjusted-current-node
+ adjusted_current_node = ->
+ if open_els.length is 1 and flag_fragment_parsing
+ return context_element
+ return open_els[0]
+
# http://www.w3.org/TR/html5/syntax.html#reconstruct-the-active-formatting-elements
# this implementation is structured (mostly) as described at the link above.
# capitalized comments are the "labels" described at the link above.
parse_error()
return
ins_mode_in_head_else t
-
+
# 8.2.5.4.5 http://www.w3.org/TR/html5/syntax.html#parsing-main-inheadnoscript
ins_mode_in_head_noscript = (t) ->
# FIXME ?fixfull
console.log "ins_mode_in_head_noscript unimplemented"
-
+
# 8.2.5.4.6 http://www.w3.org/TR/html5/syntax.html#the-after-head-insertion-mode
ins_mode_after_head_else = (t) ->
body_tok = new_open_tag 'body'
tok_state = tok_state_end_tag_open
when '?'
parse_error()
+ tok_cur_tag = new_comment_token '?'
tok_state = tok_state_bogus_comment
else
if lc_alpha.indexOf(c) > -1
tok_state = tok_state_tag_name
else
parse_error()
+ tok_cur_tag = new_comment_token '/'
tok_state = tok_state_bogus_comment
return null
cur -= 1 # we didn't handle that char
return null
+ # 8.2.4.44 http://www.w3.org/TR/html5/syntax.html#bogus-comment-state
+ # WARNING: put a comment token in tok_cur_tag before setting this state
+ tok_state_bogus_comment = ->
+ next_gt = txt.indexOf '>', cur
+ if next_gt is -1
+ val = txt.substr cur
+ cur = txt.length
+ else
+ val = txt.substr cur, (next_gt - cur)
+ cur = next_gt + 1
+ val = val.replace "\u0000", "\ufffd"
+ tok_cur_tag.text += val
+ tok_state = tok_state_data
+ return tok_cur_tag
+
+ # 8.2.4.45 http://www.w3.org/TR/html5/syntax.html#markup-declaration-open-state
+ tok_state_markup_declaration_open = ->
+ if txt.substr(cur, 2) is '--'
+ cur += 2
+ tok_cur_tag = new_comment_token ''
+ tok_state = tok_state_comment_start
+ return
+ if txt.substr(cur, 7).toLowerCase() is 'doctype'
+ cur += 7
+ tok_state = tok_state_doctype
+ return
+ acn = adjusted_current_node()
+ if acn and acn.namespace isnt NS_HTML and text.substr(cur, 7) is '[CDATA['
+ cur += 7
+ tok_state = tok_state_cdata_section
+ return
+ # Otherwise
+ parse_errer()
+ tok_cur_tag = new_comment_token '!' # TODO test ("!" right?)
+ tok_state = tok_state_bogus_comment
+ return
+
+ # 8.2.4.46 http://www.w3.org/TR/html5/syntax.html#comment-start-state
+ tok_state_comment_start = ->
+ switch c = txt.charAt(cur++)
+ when '-'
+ tok_state = tok_state_comment_start_dash
+ when "\u0000"
+ parse_error()
+ return new_character_token "\ufffd"
+ when '>'
+ parse_error()
+ tok_state = tok_state_data
+ return tok_cur_tag
+ when '' # EOF
+ parse_error()
+ tok_state = tok_state_data
+ cur -= 1 # Reconsume
+ return tok_cur_tag
+ else
+ tok_cur_tag.text += c
+ return null
+
+ # 8.2.4.47 http://www.w3.org/TR/html5/syntax.html#comment-start-dash-state
+ tok_state_comment_start_dash = ->
+ switch c = txt.charAt(cur++)
+ when '-'
+ tok_state = tok_state_comment_end
+ when "\u0000"
+ parse_error()
+ tok_cur_tag.text += "-\ufffd"
+ tok_state = tok_state_comment
+ when '>'
+ parse_error()
+ tok_state = tok_state_data
+ return tok_cur_tag
+ when '' # EOF
+ parse_error()
+ tok_state = tok_state_data
+ cur -= 1 # Reconsume
+ return tok_cur_tag
+ else
+ tok_cur_tag.text += "-#{c}"
+ tok_state = tok_state_comment
+ return null
+
+ # 8.2.4.48 http://www.w3.org/TR/html5/syntax.html#comment-state
+ tok_state_comment = ->
+ switch c = txt.charAt(cur++)
+ when '-'
+ tok_state = tok_state_comment_end_dash
+ when "\u0000"
+ parse_error()
+ tok_cur_tag.text += "\ufffd"
+ when '' # EOF
+ parse_error()
+ tok_state = tok_state_data
+ cur -= 1 # Reconsume
+ return tok_cur_tag
+ else
+ tok_cur_tag.text += c
+ return null
+
+ # 8.2.4.49 http://www.w3.org/TR/html5/syntax.html#comment-end-dash-state
+ tok_state_comment_end_dash = ->
+ switch c = txt.charAt(cur++)
+ when '-'
+ tok_state = tok_state_comment_end
+ when "\u0000"
+ parse_error()
+ tok_cur_tag.text += "-\ufffd"
+ tok_state = tok_state_comment
+ when '' # EOF
+ parse_error()
+ tok_state = tok_state_data
+ cur -= 1 # Reconsume
+ return tok_cur_tag
+ else
+ tok_cur_tag.text += "-#{c}"
+ tok_state = tok_state_comment
+ return null
+
+ # 8.2.4.50 http://www.w3.org/TR/html5/syntax.html#comment-end-state
+ tok_state_comment_end = ->
+ switch c = txt.charAt(cur++)
+ when '>'
+ tok_state = tok_state_data
+ return tok_cur_tag
+ when "\u0000"
+ parse_error()
+ tok_cur_tag.text += "--\ufffd"
+ tok_state = tok_state_comment
+ when '!'
+ parse_error()
+ tok_state = tok_state_comment_end_bang
+ when '-'
+ parse_error()
+ tok_cur_tag.text += '-'
+ when '' # EOF
+ parse_error()
+ tok_state = tok_state_data
+ cur -= 1 # Reconsume
+ return tok_cur_tag
+ else
+ parse_error()
+ tok_cur_tag.text += "--#{c}"
+ tok_state = tok_state_comment
+ return null
+
+ # 8.2.4.51 http://www.w3.org/TR/html5/syntax.html#comment-end-bang-state
+ tok_state_comment_end_bang = ->
+ switch c = txt.charAt(cur++)
+ when '-'
+ tok_cur_tag.text += "--!#{c}"
+ tok_state = tok_state_comment_end_dash
+ when '>'
+ tok_state = tok_state_data
+ return tok_cur_tag
+ when "\u0000"
+ parse_error()
+ tok_cur_tag.text += "--!\ufffd"
+ tok_state = tok_state_comment
+ when '' # EOF
+ parse_error()
+ tok_state = tok_state_data
+ cur -= 1 # Reconsume
+ return tok_cur_tag
+ else
+ tok_cur_tag.text += "--!#{c}"
+ tok_state = tok_state_comment
+ return null
+
+
# 8.2.4.69 http://www.w3.org/TR/html5/syntax.html#consume-a-character-reference
# Don't set this as a state, just call it
# returns a string (NOT a text node)
pending_table_character_tokens = []
head_element_pointer = null
flag_fragment_parsing = false # parser originally created as part of the html fragment parsing algorithm (fragment case)
+ context_element = null # FIXME initialize from args.fragment
# tokenizer initialization
tok_state = tok_state_data