From 9ea62d134e20c523cd312144fb5700634aeacb44 Mon Sep 17 00:00:00 2001 From: Jason Woofenden Date: Sun, 20 Dec 2015 11:20:47 -0500 Subject: [PATCH] finish parsing tables, pass html5lib's table tests --- parse-html.coffee | 176 +++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 163 insertions(+), 13 deletions(-) diff --git a/parse-html.coffee b/parse-html.coffee index c63a42d..c16f690 100644 --- a/parse-html.coffee +++ b/parse-html.coffee @@ -95,6 +95,8 @@ class Node attrs[k] = v for k, v of @attrs return new Node @type, name: @name, text: @text, attrs: attrs, namespace: @namespace, id: @id acknowledge_self_closing: -> + @flag 'did_self_close', true + flag: -> # fixfull serialize: (shallow = false, show_ids = false) -> # for unit tests ret = '' @@ -356,6 +358,7 @@ parse_html = (txt, parse_error_cb = null) -> flag_foster_parenting = null form_element_pointer = null temporary_buffer = null + pending_table_character_tokens = null parse_error = -> if parse_error_cb? @@ -1053,7 +1056,7 @@ parse_html = (txt, parse_error_cb = null) -> # http://www.w3.org/TR/html5/syntax.html#insert-a-comment # position should be [node, index_within_children] - tree_insert_comment = (t, position = null) -> + insert_comment = (t, position = null) -> position ?= adjusted_insertion_location() position[0].children.splice position[1], 0, t @@ -1086,7 +1089,7 @@ parse_html = (txt, parse_error_cb = null) -> insert_character t return if t.type is TYPE_COMMENT - tree_insert_comment t + insert_comment t return if t.type is TYPE_DOCTYPE parse_error() @@ -1187,7 +1190,7 @@ parse_html = (txt, parse_error_cb = null) -> insert_character t flag_frameset_ok = false when TYPE_COMMENT - tree_insert_comment t + insert_comment t when TYPE_DOCTYPE parse_error() when TYPE_START_TAG @@ -1399,7 +1402,7 @@ parse_html = (txt, parse_error_cb = null) -> else ins_mode_in_table_else t when TYPE_COMMENT - tree_insert_comment t + insert_comment t when TYPE_DOCTYPE parse_error() when TYPE_START_TAG @@ -1479,22 +1482,116 @@ parse_html = (txt, parse_error_cb = null) -> ins_mode_in_table_else t - # 8.2.5.4.11 http://www.w3.org/TR/html5/syntax.html#parsing-main-intabletext + # 8.2.5.4.10 http://www.w3.org/TR/html5/syntax.html#parsing-main-intabletext ins_mode_in_table_text = (t) -> - switch t.type - when TYPE_TEXT - switch t.text - when "\u0000" - parse_error() - return - console.log "unimplemented ins_mode_in_table_text" - # FIXME CONTINUE + if t.type is TYPE_TEXT and t.text is "\u0000" + # huh? I thought the tokenizer didn't emit these + parse_error() + return + if t.type is TYPE_TEXT + pending_table_character_tokens.push t + return + # Anything else + all_space = true + for old in pending_table_character_tokens + unless space_chars.indexOf(old.text) > -1 + all_space = false + break + if all_space + for old in pending_table_character_tokens + insert_character old + else + for old in pending_table_character_tokens + ins_mode_table_else old + pending_table_character_tokens = [] # FIXME test (spec doesn't say this) + insertion_mode = original_insertion_mode + insertion_mode t + + # 8.2.5.4.11 http://www.w3.org/TR/html5/syntax.html#parsing-main-incaption + ins_mode_in_caption = (t) -> + if t.type is TYPE_END_TAG and t.name is 'caption' + if is_in_table_scope 'caption' + generate_implied_end_tags() + if open_els[0].name isnt 'caption' + parse_error() + loop + el = open_els.shift() + if el.name is 'caption' + break + clear_afe_to_marker() + insertion_mode = in_table + else + parse_error() + # fragment case + return + if (t.type is TYPE_START_TAG and (t.name is 'caption' or t.name is 'col' or t.name is 'colgroup' or t.name is 'tbody' or t.name is 'td' or t.name is 'tfoot' or t.name is 'th' or t.name is 'thead' or t.name is 'tr')) or t.type is TYPE_END_TAG and t.name is 'table' + parse_error() + if is_in_table_scope 'caption' + loop + el = open_els.shift() + if el.name is 'caption' + break + clear_afe_to_marker() + insertion_mode = in_table + insertion_mode t + # else fragment case + return + if t.type is TYPE_END_TAG and (t.name is 'body' or t.name is 'col' or t.name is 'colgroup' or t.name is 'html' or t.name is 'tbody' or t.name is 'td' or t.name is 'tfoot' or t.name is 'th' or t.name is 'thead' or t.name is 'tr') + parse_error() + return + # Anything else + ins_mode_in_body t + + # 8.2.5.4.12 http://www.w3.org/TR/html5/syntax.html#parsing-main-incolgroup + ins_mode_in_column_group = (t) -> + if t.type is TYPE_TEXT and space_chars.indexOf(t.text) > -1 + insert_character t + return + if t.type is TYPE_COMMENT + insert_comment t + return + if t.type is TYPE_DOCTYPE + parse_error() + return + if t.type is TYPE_START_TAG and t.name is 'html' + ins_mode_in_body t + return + if t.type is TYPE_START_TAG and t.name is 'col' + el = insert_html_element t + open_els.shift() + el.acknowledge_self_closing() + return + if t.type is TYPE_END_TAG and t.name is 'colgroup' + if open_els[0].name is 'colgroup' + open_els[0].shift() + insertion_mode = ins_mode_in_table + else + parse_error() + return + if t.type is TYPE_END_TAG and t.name is 'col' + parse_error() + return + if (t.type is TYPE_START_TAG or t.type is TYPE_END_TAG) and t.name is 'template' + ins_mode_in_head t + return + if t.type is TYPE_EOF + ins_mode_in_body t + return + # Anything else + if open_els[0].name isnt 'colgroup' + parse_error() + return + open_els.shift() + insertion_mode = ins_mode_in_table + insertion_mode t + return # 8.2.5.4.13 http://www.w3.org/TR/html5/syntax.html#parsing-main-intbody ins_mode_in_table_body = (t) -> if t.type is TYPE_START_TAG and t.name is 'tr' clear_stack_to_table_body_context() insert_html_element t + insertion_mode = ins_mode_in_row return if t.type is TYPE_START_TAG and (t.name is 'th' or t.name is 'td') parse_error() @@ -2210,6 +2307,7 @@ parse_html = (txt, parse_error_cb = null) -> flag_foster_parenting = false form_element_pointer = null temporary_buffer = null + pending_table_character_tokens = [] # tokenizer initialization tok_state = tok_state_data @@ -2380,3 +2478,55 @@ test_parser name: "html5lib aaa02 1", \ test_parser name: "html5lib aaa02 2", \ html: '
', expected: 'tag:"a",{},[],tag:"div",{},[tag:"a",{},[tag:"style",{},[]],tag:"address",{},[tag:"a",{},[],tag:"a",{},[]]]' +test_parser name: "html5lib tables 1", \ + html: '
', + expected: 'tag:"table",{},[tag:"tbody",{},[tag:"tr",{},[tag:"th",{},[]]]]' +test_parser name: "html5lib tables 2", \ + html: '
', + expected: 'tag:"table",{},[tag:"tbody",{},[tag:"tr",{},[tag:"td",{},[]]]]' +test_parser name: "html5lib tables 3", \ + html: "", + expected: 'tag:"table",{},[tag:"colgroup",{},[tag:"col",{"foo":"bar"},[]]]' +test_parser name: "html5lib tables 4", \ + html: '
foo', + expected: 'text:"foo",tag:"table",{},[tag:"colgroup",{},[]]' +test_parser name: "html5lib tables 5", \ + html: '

foo', + expected: 'tag:"table",{},[],tag:"p",{},[text:"foo"]' +test_parser name: "html5lib tables 6", \ + html: '
', + expected: 'tag:"table",{},[tag:"tbody",{},[tag:"tr",{},[tag:"td",{},[]]]]' +test_parser name: "html5lib tables 7", \ + html: '
', + expected: 'tag:"select",{},[tag:"option",{},[text:"3"]],tag:"table",{},[]' +test_parser name: "html5lib tables 8", \ + html: '
', + expected: 'tag:"select",{},[],tag:"table",{},[],tag:"table",{},[]' +test_parser name: "html5lib tables 9", \ + html: '
', + expected: 'tag:"select",{},[],tag:"table",{},[]' +test_parser name: "html5lib tables 10", \ + html: '
B
', + expected: 'tag:"select",{},[tag:"option",{},[text:"A"]],tag:"table",{},[tag:"tbody",{},[tag:"tr",{},[tag:"td",{},[text:"B"]]]]' +test_parser name: "html5lib tables 11", \ + html: '
foo', + expected: 'tag:"table",{},[tag:"tbody",{},[tag:"tr",{},[tag:"td",{},[text:"foo"]]]]' +test_parser name: "html5lib tables 12", \ + html: '
A
B', + expected: 'tag:"table",{},[tag:"tbody",{},[tag:"tr",{},[tag:"td",{},[text:"A"]]]],text:"B"' +test_parser name: "html5lib tables 13", \ + html: '
', + expected: 'tag:"table",{},[tag:"tbody",{},[tag:"tr",{},[]],tag:"caption",{},[]]' +test_parser name: "html5lib tables 14", \ + html: '
foo', + expected: 'tag:"table",{},[tag:"tbody",{},[tag:"tr",{},[tag:"td",{},[text:"foo"]]]]' +test_parser name: "html5lib tables 15", \ + html: '', + expected: 'tag:"table",{},[tag:"tbody",{},[tag:"tr",{},[tag:"td",{},[]],tag:"tr",{},[]]]' +test_parser name: "html5lib tables 16", \ + html: '
', + expected: 'tag:"table",{},[tag:"tbody",{},[tag:"tr",{},[tag:"td",{},[tag:"button",{},[]],tag:"td",{},[]]]]' +# TODO implement svg parsing +#test_parser name: "html5lib tables 17", \ +# html: '
', +# expected: 'tag:"table",{},[tag:"tbody",{},[tag:"tr",{},[tag:"td",{},[svg:"svg",{},[svg:"desc",{},[]]],tag:"td",{},[]]]]' -- 1.7.10.4