@children = args.children ? []
@namespace = args.namespace ? NS_HTML
@parent = args.parent ? null
+ @token = args.token ? null
if args.id?
@id = "#{args.id}+"
else
# WARNING this doesn't work right on open tags that are still being parsed
attrs = {}
attrs[k] = v for k, v of @attrs
- return new Node @type, name: @name, text: @text, attrs: attrs, namespace: @namespace, id: @id
+ return new Node @type, name: @name, text: @text, attrs: attrs, namespace: @namespace, id: @id, token: @token
acknowledge_self_closing: ->
- @flag 'did_self_close', true
+ if @token?
+ @token.flag 'did_self_close'
+ else
+ @flag 'did_self_close', true
flag: ->
# fixfull
serialize: (shallow = false, show_ids = false) -> # for unit tests
temporary_buffer = null
pending_table_character_tokens = null
head_element_pointer = null
+ flag_fragment_parsing = null
+
+ stop_parsing = ->
+ flag_parsing = false
parse_error = ->
if parse_error_cb?
while t.attrs_a.length
a = t.attrs_a.pop()
attrs[a[0]] = a[1] # TODO check what to do with dupilcate attrs
- el = new Node TYPE_TAG, name: t.name, namespace: namespace, attrs: attrs
+ el = new Node TYPE_TAG, name: t.name, namespace: namespace, attrs: attrs, token: t
# TODO 2. If the newly created element has an xmlns attribute in the
# XMLNS namespace whose value is not exactly the same as the element's
if t.type is TYPE_START_TAG and (t.name is 'base' or t.name is 'basefont' or t.name is 'bgsound' or t.name is 'link')
el = insert_html_element t
open_els.shift()
- el.acknowledge_self_closing()
+ t.acknowledge_self_closing()
return
if t.type is TYPE_START_TAG and t.name is 'meta'
el = insert_html_element t
open_els.shift()
- el.acknowledge_self_closing()
+ t.acknowledge_self_closing()
# fixfull encoding stuff
return
if t.type is TYPE_START_TAG and t.name is 'title'
parse_error()
break
# TODO stack of template insertion modes thing
- flag_parsing = false # stop parsing
+ stop_parsing()
when TYPE_END_TAG
switch t.name
when 'body'
parse_error()
el = insert_html_element t
open_els.shift()
- el.acknowledge_self_closing()
+ t.acknowledge_self_closing()
when 'form'
parse_error()
if form_element_pointer?
if t.type is TYPE_START_TAG and t.name is 'col'
el = insert_html_element t
open_els.shift()
- el.acknowledge_self_closing()
+ t.acknowledge_self_closing()
return
if t.type is TYPE_END_TAG and t.name is 'colgroup'
if open_els[0].name is 'colgroup'
ins_mode_in_select t
return
- # CONTINUE more insertion modes!
-
-
+ # 8.2.5.4.18 http://www.w3.org/TR/html5/syntax.html#parsing-main-intemplate
+ ins_mode_in_template = (t) ->
+ if t.type is TYPE_TEXT or t.type is TYPE_COMMENT or t.type is TYPE_DOCTYPE
+ ins_mode_in_body t
+ return
+ if (t.type is TYPE_START_TAG and (t.name is 'base' or t.name is 'basefont' or t.name is 'bgsound' or t.name is 'link' or t.name is 'meta' or t.name is 'noframes' or t.name is 'script' or t.name is 'style' or t.name is 'template' or t.name is 'title')) or (t.type is TYPE_END_TAG and t.name is 'template')
+ ins_mode_in_head t
+ return
+ if t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'colgroup' or t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead')
+ template_insertion_modes.shift()
+ template_insertion_modes.unshift ins_mode_in_table
+ insertion_mode = ins_mode_in_table
+ insertion_mode t
+ return
+ if t.type is TYPE_START_TAG and t.name is 'col'
+ template_insertion_modes.shift()
+ template_insertion_modes.unshift ins_mode_in_column_group
+ insertion_mode = ins_mode_in_column_group
+ insertion_mode t
+ return
+ if t.type is TYPE_START_TAG and t.name is 'tr'
+ template_insertion_modes.shift()
+ template_insertion_modes.unshift ins_mode_in_table_body
+ insertion_mode = ins_mode_in_table_body
+ insertion_mode t
+ return
+ if t.type is TYPE_START_TAG and (t.name is 'td' or t.name is 'th')
+ template_insertion_modes.shift()
+ template_insertion_modes.unshift ins_mode_in_row
+ insertion_mode = ins_mode_in_row
+ insertion_mode t
+ return
+ if t.type is TYPE_START_TAG
+ template_insertion_modes.shift()
+ template_insertion_modes.unshift ins_mode_in_body
+ insertion_mode = ins_mode_in_body
+ insertion_mode t
+ return
+ if t.type is TYPE_END_TAG
+ parse_error()
+ return
+ if t.type is EOF
+ unless template_tag_is_open()
+ stop_parsing()
+ return
+ parse_error()
+ loop
+ el = open_els.shift()
+ if el.name is 'template' # fixfull check namespace
+ break
+ clear_afe_to_marker()
+ template_insertion_modes.shift()
+ reset_insertion_mode()
+ insertion_mode t
+ # 8.2.5.4.19 http://www.w3.org/TR/html5/syntax.html#parsing-main-afterbody
+ ins_mode_after_body = (t) ->
+ if is_space_tok t
+ ins_mode_in_body t
+ return
+ if t.type is TYPE_COMMENT
+ insert_comment t, [open_els[0], open_els[0].children.length]
+ return
+ if t.type is TYPE_DOCTYPE
+ parse_error()
+ return
+ if t.type is TYPE_START_TAG and t.name is 'html'
+ ins_mode_in_body t
+ return
+ if t.type is TYPE_END_TAG and t.name is 'html'
+ # fixfull fragment case
+ insertion_mode = ins_mode_after_after_body
+ return
+ if t.type is TYPE_EOF
+ stop_parsing()
+ return
+ # Anything ELse
+ parse_error()
+ insertion_mode = ins_mode_in_body
+ insertion_mode t
+ # 8.2.5.4.20 http://www.w3.org/TR/html5/syntax.html#parsing-main-inframeset
+ ins_mode_in_frameset = (t) ->
+ if is_space_tok t
+ insert_character t
+ return
+ if t.type is TYPE_COMMENT
+ insert_comment t
+ return
+ if t.type is TYPE_DOCTYPE
+ parse_error()
+ return
+ if t.type is TYPE_START_TAG and t.name is 'html'
+ ins_mode_in_body t
+ return
+ if t.type is TYPE_START_TAG and t.name is 'frameset'
+ insert_html_element t
+ return
+ if t.type is TYPE_END_TAG and t.name is 'frameset'
+ # TODO ?correct for: "if the current node is the root html element"
+ if open_els.length is 1
+ parse_error()
+ return # fragment case
+ open_els.shift()
+ if flag_fragment_parsing is false and open_els[0].name isnt 'frameset'
+ insertion_mode = ins_mode_after_frameset
+ return
+ if t.type is TYPE_START_TAG and t.name is 'frame'
+ insert_html_element t
+ open_els.shift()
+ t.acknowledge_self_closing()
+ return
+ if t.type is TYPE_START TAG and t.name is 'noframes'
+ ins_mode_in_head t
+ return
+ if t.type is TYPE_EOF
+ # TODO ?correct for: "if the current node is not the root html element"
+ if open_els.length isnt 1
+ parse_error()
+ stop_parsing()
+ return
+ # Anything else
+ parse_error()
+ return
+ # 8.2.5.4.21 http://www.w3.org/TR/html5/syntax.html#parsing-main-afterframeset
+ ins_mode_after_frameset = (t) ->
+ if is_space_tok t
+ insert_character t
+ return
+ if t.type is TYPE_COMMENT
+ insert_comment t
+ return
+ if t.type is TYPE_DOCTYPE
+ parse_error()
+ return
+ if t.type is TYPE_START_TAG and t.name is 'html'
+ ins_mode_in_body t
+ return
+ if t.type is TYPE_END_TAG and t.name is 'html'
+ insert_mode = ins_mode_after_after_frameset
+ return
+ if t.type is TYPE_START_TAG and t.name is 'noframes'
+ ins_mode_in_head t
+ return
+ if t.type is TYPE_EOF
+ stop_parsing()
+ return
+ # Anything else
+ parse_error()
+ return
+ # 8.2.5.4.22 http://www.w3.org/TR/html5/syntax.html#the-after-after-body-insertion-mode
+ ins_mode_after_after_body = (t) ->
+ if t.type is TYPE_COMMENT
+ insert_comment t, [doc, doc.children.length]
+ return
+ if t.type is TYPE_DOCTYPE or is_space_tok(t) or (t.type is TYPE_START_TAG and t.name is 'html')
+ ins_mode_in_body t
+ return
+ if t.type is TYPE_EOF
+ stop_parsing()
+ return
+ # Anything else
+ parse_error()
+ insertion_mode = ins_mode_in_body
+ return
+ # 8.2.5.4.23 http://www.w3.org/TR/html5/syntax.html#the-after-after-frameset-insertion-mode
+ ins_mode_after_after_frameset = (t) ->
+ if t.type is TYPE_COMMENT
+ insert_comment t, [doc, doc.children.length]
+ return
+ if t.type is TYPE_DOCTYPE or is_space_tok(t) or (t.type is TYPE_START_TAG and t.name is 'html')
+ ins_mode_in_body t
+ return
+ if t.type is TYPE_EOF
+ stop_parsing()
+ return
+ if t.type is TYPE_START_TAG and t.name is 'noframes'
+ ins_mode_in_head t
+ return
+ # Anything else
+ parse_error()
+ return
temporary_buffer = null
pending_table_character_tokens = []
head_element_pointer = null
+ flag_fragment_parsing = false # parser originally created as part of the html fragment parsing algorithm (fragment case)
# tokenizer initialization
tok_state = tok_state_data
t = tok_state()
if t?
insertion_mode t
+ # fixfull parse error if has self-closing flag, but it wasn't acknolwedged
return doc.children
+test_results = passed: 0, failed: 0
# everything below is tests on the above
test_equals = (description, output, expected_output) ->
if output is expected_output
console.log " parse errs: #{JSON.stringify parse_errors}"
else
console.log " No parse errors"
+ test_results.failed += 1
else
- console.log "passed \"#{args.name}\""
+ #console.log "passed \"#{args.name}\""
+ test_results.passed += 1
+test_summary = ->
+ console.log "Tests passed: #{test_results.passed}"
+ console.log "Tests Failed: #{test_results.failed}"
test_parser name: "empty", \
html: "",
#test_parser name: "html5lib tables 17", \
# html: '<table><tr><td><svg><desc><td>',
# expected: 'tag:"table",{},[tag:"tbody",{},[tag:"tr",{},[tag:"td",{},[svg:"svg",{},[svg:"desc",{},[]]],tag:"td",{},[]]]]'
+test_summary()