From: Jason Woofenden Date: Mon, 21 Dec 2015 04:13:18 +0000 (-0500) Subject: rest of insertion modes (untested) X-Git-Url: https://jasonwoof.com/gitweb/?a=commitdiff_plain;h=ffc91832d8b2c91ddd4407cf4036b6fc0eeca928;p=peach-html5-editor.git rest of insertion modes (untested) --- diff --git a/parse-html.coffee b/parse-html.coffee index d35dd88..c6ed9a5 100644 --- a/parse-html.coffee +++ b/parse-html.coffee @@ -85,6 +85,7 @@ class Node @children = args.children ? [] @namespace = args.namespace ? NS_HTML @parent = args.parent ? null + @token = args.token ? null if args.id? @id = "#{args.id}+" else @@ -93,9 +94,12 @@ class Node # WARNING this doesn't work right on open tags that are still being parsed attrs = {} attrs[k] = v for k, v of @attrs - return new Node @type, name: @name, text: @text, attrs: attrs, namespace: @namespace, id: @id + return new Node @type, name: @name, text: @text, attrs: attrs, namespace: @namespace, id: @id, token: @token acknowledge_self_closing: -> - @flag 'did_self_close', true + if @token? + @token.flag 'did_self_close' + else + @flag 'did_self_close', true flag: -> # fixfull serialize: (shallow = false, show_ids = false) -> # for unit tests @@ -362,6 +366,10 @@ parse_html = (txt, parse_error_cb = null) -> temporary_buffer = null pending_table_character_tokens = null head_element_pointer = null + flag_fragment_parsing = null + + stop_parsing = -> + flag_parsing = false parse_error = -> if parse_error_cb? @@ -1051,7 +1059,7 @@ parse_html = (txt, parse_error_cb = null) -> while t.attrs_a.length a = t.attrs_a.pop() attrs[a[0]] = a[1] # TODO check what to do with dupilcate attrs - el = new Node TYPE_TAG, name: t.name, namespace: namespace, attrs: attrs + el = new Node TYPE_TAG, name: t.name, namespace: namespace, attrs: attrs, token: t # TODO 2. If the newly created element has an xmlns attribute in the # XMLNS namespace whose value is not exactly the same as the element's @@ -1230,12 +1238,12 @@ parse_html = (txt, parse_error_cb = null) -> if t.type is TYPE_START_TAG and (t.name is 'base' or t.name is 'basefont' or t.name is 'bgsound' or t.name is 'link') el = insert_html_element t open_els.shift() - el.acknowledge_self_closing() + t.acknowledge_self_closing() return if t.type is TYPE_START_TAG and t.name is 'meta' el = insert_html_element t open_els.shift() - el.acknowledge_self_closing() + t.acknowledge_self_closing() # fixfull encoding stuff return if t.type is TYPE_START_TAG and t.name is 'title' @@ -1458,7 +1466,7 @@ parse_html = (txt, parse_error_cb = null) -> parse_error() break # TODO stack of template insertion modes thing - flag_parsing = false # stop parsing + stop_parsing() when TYPE_END_TAG switch t.name when 'body' @@ -1594,7 +1602,7 @@ parse_html = (txt, parse_error_cb = null) -> parse_error() el = insert_html_element t open_els.shift() - el.acknowledge_self_closing() + t.acknowledge_self_closing() when 'form' parse_error() if form_element_pointer? @@ -1705,7 +1713,7 @@ parse_html = (txt, parse_error_cb = null) -> if t.type is TYPE_START_TAG and t.name is 'col' el = insert_html_element t open_els.shift() - el.acknowledge_self_closing() + t.acknowledge_self_closing() return if t.type is TYPE_END_TAG and t.name is 'colgroup' if open_els[0].name is 'colgroup' @@ -1982,14 +1990,191 @@ parse_html = (txt, parse_error_cb = null) -> ins_mode_in_select t return - # CONTINUE more insertion modes! - - + # 8.2.5.4.18 http://www.w3.org/TR/html5/syntax.html#parsing-main-intemplate + ins_mode_in_template = (t) -> + if t.type is TYPE_TEXT or t.type is TYPE_COMMENT or t.type is TYPE_DOCTYPE + ins_mode_in_body t + return + if (t.type is TYPE_START_TAG and (t.name is 'base' or t.name is 'basefont' or t.name is 'bgsound' or t.name is 'link' or t.name is 'meta' or t.name is 'noframes' or t.name is 'script' or t.name is 'style' or t.name is 'template' or t.name is 'title')) or (t.type is TYPE_END_TAG and t.name is 'template') + ins_mode_in_head t + return + if t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'colgroup' or t.name is 'tbody' or t.name is 'tfoot' or t.name is 'thead') + template_insertion_modes.shift() + template_insertion_modes.unshift ins_mode_in_table + insertion_mode = ins_mode_in_table + insertion_mode t + return + if t.type is TYPE_START_TAG and t.name is 'col' + template_insertion_modes.shift() + template_insertion_modes.unshift ins_mode_in_column_group + insertion_mode = ins_mode_in_column_group + insertion_mode t + return + if t.type is TYPE_START_TAG and t.name is 'tr' + template_insertion_modes.shift() + template_insertion_modes.unshift ins_mode_in_table_body + insertion_mode = ins_mode_in_table_body + insertion_mode t + return + if t.type is TYPE_START_TAG and (t.name is 'td' or t.name is 'th') + template_insertion_modes.shift() + template_insertion_modes.unshift ins_mode_in_row + insertion_mode = ins_mode_in_row + insertion_mode t + return + if t.type is TYPE_START_TAG + template_insertion_modes.shift() + template_insertion_modes.unshift ins_mode_in_body + insertion_mode = ins_mode_in_body + insertion_mode t + return + if t.type is TYPE_END_TAG + parse_error() + return + if t.type is EOF + unless template_tag_is_open() + stop_parsing() + return + parse_error() + loop + el = open_els.shift() + if el.name is 'template' # fixfull check namespace + break + clear_afe_to_marker() + template_insertion_modes.shift() + reset_insertion_mode() + insertion_mode t + # 8.2.5.4.19 http://www.w3.org/TR/html5/syntax.html#parsing-main-afterbody + ins_mode_after_body = (t) -> + if is_space_tok t + ins_mode_in_body t + return + if t.type is TYPE_COMMENT + insert_comment t, [open_els[0], open_els[0].children.length] + return + if t.type is TYPE_DOCTYPE + parse_error() + return + if t.type is TYPE_START_TAG and t.name is 'html' + ins_mode_in_body t + return + if t.type is TYPE_END_TAG and t.name is 'html' + # fixfull fragment case + insertion_mode = ins_mode_after_after_body + return + if t.type is TYPE_EOF + stop_parsing() + return + # Anything ELse + parse_error() + insertion_mode = ins_mode_in_body + insertion_mode t + # 8.2.5.4.20 http://www.w3.org/TR/html5/syntax.html#parsing-main-inframeset + ins_mode_in_frameset = (t) -> + if is_space_tok t + insert_character t + return + if t.type is TYPE_COMMENT + insert_comment t + return + if t.type is TYPE_DOCTYPE + parse_error() + return + if t.type is TYPE_START_TAG and t.name is 'html' + ins_mode_in_body t + return + if t.type is TYPE_START_TAG and t.name is 'frameset' + insert_html_element t + return + if t.type is TYPE_END_TAG and t.name is 'frameset' + # TODO ?correct for: "if the current node is the root html element" + if open_els.length is 1 + parse_error() + return # fragment case + open_els.shift() + if flag_fragment_parsing is false and open_els[0].name isnt 'frameset' + insertion_mode = ins_mode_after_frameset + return + if t.type is TYPE_START_TAG and t.name is 'frame' + insert_html_element t + open_els.shift() + t.acknowledge_self_closing() + return + if t.type is TYPE_START TAG and t.name is 'noframes' + ins_mode_in_head t + return + if t.type is TYPE_EOF + # TODO ?correct for: "if the current node is not the root html element" + if open_els.length isnt 1 + parse_error() + stop_parsing() + return + # Anything else + parse_error() + return + # 8.2.5.4.21 http://www.w3.org/TR/html5/syntax.html#parsing-main-afterframeset + ins_mode_after_frameset = (t) -> + if is_space_tok t + insert_character t + return + if t.type is TYPE_COMMENT + insert_comment t + return + if t.type is TYPE_DOCTYPE + parse_error() + return + if t.type is TYPE_START_TAG and t.name is 'html' + ins_mode_in_body t + return + if t.type is TYPE_END_TAG and t.name is 'html' + insert_mode = ins_mode_after_after_frameset + return + if t.type is TYPE_START_TAG and t.name is 'noframes' + ins_mode_in_head t + return + if t.type is TYPE_EOF + stop_parsing() + return + # Anything else + parse_error() + return + # 8.2.5.4.22 http://www.w3.org/TR/html5/syntax.html#the-after-after-body-insertion-mode + ins_mode_after_after_body = (t) -> + if t.type is TYPE_COMMENT + insert_comment t, [doc, doc.children.length] + return + if t.type is TYPE_DOCTYPE or is_space_tok(t) or (t.type is TYPE_START_TAG and t.name is 'html') + ins_mode_in_body t + return + if t.type is TYPE_EOF + stop_parsing() + return + # Anything else + parse_error() + insertion_mode = ins_mode_in_body + return + # 8.2.5.4.23 http://www.w3.org/TR/html5/syntax.html#the-after-after-frameset-insertion-mode + ins_mode_after_after_frameset = (t) -> + if t.type is TYPE_COMMENT + insert_comment t, [doc, doc.children.length] + return + if t.type is TYPE_DOCTYPE or is_space_tok(t) or (t.type is TYPE_START_TAG and t.name is 'html') + ins_mode_in_body t + return + if t.type is TYPE_EOF + stop_parsing() + return + if t.type is TYPE_START_TAG and t.name is 'noframes' + ins_mode_in_head t + return + # Anything else + parse_error() + return @@ -2578,6 +2763,7 @@ parse_html = (txt, parse_error_cb = null) -> temporary_buffer = null pending_table_character_tokens = [] head_element_pointer = null + flag_fragment_parsing = false # parser originally created as part of the html fragment parsing algorithm (fragment case) # tokenizer initialization tok_state = tok_state_data @@ -2587,8 +2773,10 @@ parse_html = (txt, parse_error_cb = null) -> t = tok_state() if t? insertion_mode t + # fixfull parse error if has self-closing flag, but it wasn't acknolwedged return doc.children +test_results = passed: 0, failed: 0 # everything below is tests on the above test_equals = (description, output, expected_output) -> if output is expected_output @@ -2625,8 +2813,13 @@ test_parser = (args) -> console.log " parse errs: #{JSON.stringify parse_errors}" else console.log " No parse errors" + test_results.failed += 1 else - console.log "passed \"#{args.name}\"" + #console.log "passed \"#{args.name}\"" + test_results.passed += 1 +test_summary = -> + console.log "Tests passed: #{test_results.passed}" + console.log "Tests Failed: #{test_results.failed}" test_parser name: "empty", \ html: "", @@ -2801,3 +2994,4 @@ test_parser name: "html5lib tables 16", \ #test_parser name: "html5lib tables 17", \ # html: '
', # expected: 'tag:"table",{},[tag:"tbody",{},[tag:"tr",{},[tag:"td",{},[svg:"svg",{},[svg:"desc",{},[]]],tag:"td",{},[]]]]' +test_summary()