From 88b7a1d811b9abbd26f28f3f9c6090b3c82f9b92 Mon Sep 17 00:00:00 2001 From: Jason Woofenden Date: Sat, 19 Dec 2015 09:39:46 -0500 Subject: [PATCH] fix implied_end_tags and

--- parse-html.coffee | 119 ++++++++++++++++++++++++----------------------------- 1 file changed, 54 insertions(+), 65 deletions(-) diff --git a/parse-html.coffee b/parse-html.coffee index c71567d..25fa20d 100644 --- a/parse-html.coffee +++ b/parse-html.coffee @@ -553,7 +553,7 @@ parse_html = (txt, parse_error_cb = null) -> tree_insert_element el afe[i] = el break if i is 0 - i -= 1 + i -= 1 # Advance # http://www.w3.org/TR/html5/syntax.html#adoption-agency-algorithm # adoption agency algorithm @@ -562,6 +562,10 @@ parse_html = (txt, parse_error_cb = null) -> # http://www.w3.org/TR/html5/syntax.html#misnested-tags:-b-p-/b-/p # http://www.w3.org/TR/html5/syntax.html#unclosed-formatting-elements adoption_agency = (subject) -> + debug_log "adoption_agency()" + debug_log "tree: #{serialize_els tree.children, false, true}" + debug_log "open_els: #{serialize_els open_els, true, true}" + debug_log "afe: #{serialize_els afe, true, true}" if open_els[0].name is subject el = open_els[0] open_els.shift() @@ -570,6 +574,7 @@ parse_html = (txt, parse_error_cb = null) -> if t is el afe.splice i, 1 break + debug_log "aaa: starting off with subject on top of stack, exiting" return outer = 0 loop @@ -590,6 +595,7 @@ parse_html = (txt, parse_error_cb = null) -> # If there is no such element, then abort these steps and instead # act as described in the "any other end tag" entry above. if fe is null + debug_log "aaa: fe not found in afe" in_body_any_other_end_tag subject return # 6. If formatting element is not in the stack of open elements, @@ -601,6 +607,7 @@ parse_html = (txt, parse_error_cb = null) -> in_open_els = true break unless in_open_els + debug_log "aaa: fe not found in open_els" parse_error() # "remove it from the list" must mean afe, since it's not in open_els afe.splice fe_of_afe, 1 @@ -609,6 +616,7 @@ parse_html = (txt, parse_error_cb = null) -> # the element is not in scope, then this is a parse error; abort # these steps. unless el_is_in_scope fe + debug_log "aaa: fe not in scope" parse_error() return # 8. If formatting element is not the current node, this is a parse @@ -634,6 +642,7 @@ parse_html = (txt, parse_error_cb = null) -> # formatting element from the list of active formatting elements, # and finally abort these steps. if fb is null + debug_log "aaa: no fb" loop t = open_els.shift() if t is fe @@ -666,8 +675,8 @@ parse_html = (txt, parse_error_cb = null) -> break node = node_next ? node_above debug_log "inner loop #{inner}" - debug_log "open_els: #{serialize_els open_els, true, true}" debug_log "tree: #{serialize_els tree.children, false, true}" + debug_log "open_els: #{serialize_els open_els, true, true}" debug_log "afe: #{serialize_els afe, true, true}" debug_log "ca: #{ca.name}##{ca.id} children: #{serialize_els ca.children, true, true}" debug_log "fe: #{fe.name}##{fe.id} children: #{serialize_els fe.children, true, true}" @@ -845,8 +854,8 @@ parse_html = (txt, parse_error_cb = null) -> if open_els[0].name isnt 'p' parse_error() while open_els.length > 1 # just in case - t = open_els.shift() - if t.name is 'p' + el = open_els.shift() + if el.name is 'p' return close_p_if_in_button_scope = -> if is_in_button_scope 'p' @@ -855,6 +864,7 @@ parse_html = (txt, parse_error_cb = null) -> # http://www.w3.org/TR/html5/syntax.html#insert-a-character tree_insert_text = (t) -> dest = adjusted_insertion_location() + # fixfull check for Document node if dest[1] > 0 prev = dest[0].children[dest[1] - 1] if prev.type is TYPE_TEXT @@ -1019,7 +1029,7 @@ parse_html = (txt, parse_error_cb = null) -> # 8.2.5.3 http://www.w3.org/TR/html5/syntax.html#closing-elements-that-have-implied-end-tags # http://www.w3.org/TR/html5/syntax.html#generate-implied-end-tags generate_implied_end_tags = (except = null) -> - while end_tag_implied[open_els[0]] and open_els[0].name isnt except + while end_tag_implied[open_els[0].name] and open_els[0].name isnt except open_els.shift() # 8.2.5.4 http://www.w3.org/TR/html5/syntax.html#parsing-main-inbody @@ -1106,11 +1116,11 @@ parse_html = (txt, parse_error_cb = null) -> if el is found open_els.splice i, 1 reconstruct_active_formatting_elements() - el = tree_insert_element t + el = insert_html_element t afe.unshift el when 'b', 'big', 'code', 'em', 'font', 'i', 's', 'small', 'strike', 'strong', 'tt', 'u' reconstruct_active_formatting_elements() - el = tree_insert_element t + el = insert_html_element t afe.unshift el when 'table' # fixfull quirksmode thing @@ -1120,7 +1130,7 @@ parse_html = (txt, parse_error_cb = null) -> # TODO lots more to implement here else # any other start tag reconstruct_active_formatting_elements() - tree_insert_element t + insert_html_element t when TYPE_EOF ok_tags = { dd: true, dt: true, li: true, p: true, tbody: true, td: true, @@ -1160,7 +1170,7 @@ parse_html = (txt, parse_error_cb = null) -> unless is_in_button_scope 'p' parse_error() insert_html_element new_open_tag 'p' - close_p_element() + close_p_element() # TODO lots more close tags to implement here when 'a', 'b', 'big', 'code', 'em', 'font', 'i', 'nobr', 's', 'small', 'strike', 'strong', 'tt', 'u' adoption_agency t.name @@ -1844,119 +1854,98 @@ test_parser = (args) -> prev_node_id = 0 # reset counter parsed = parse_html args.html, errors_cb serialized = serialize_els parsed, false, false - if serialized isnt args.expected # or parse_errors.length isnt args.errors + if serialized isnt args.expected debug_log_each (str) -> console.log str console.log "FAILED: \"#{args.name}\"" - else - console.log "passed \"#{args.name}\"" - if serialized isnt args.expected console.log " Input: #{args.html}" console.log " Correct: #{args.expected}" console.log " Output: #{serialized}" - if parse_errors.length isnt args.errors - console.log " Expected #{args.errors} parse errors, but got these: #{JSON.stringify parse_errors}" + if parse_errors.length > 0 + console.log " parse errs: #{JSON.stringify parse_errors}" + else + console.log " No parse errors" + else + console.log "passed \"#{args.name}\"" test_parser name: "empty", \ html: "", - expected: '', - errors: 0 + expected: '' test_parser name: "just text", \ html: "abc", - expected: 'text:"abc"', - errors: 0 + expected: 'text:"abc"' test_parser name: "named entity", \ html: "a&1234", - expected: 'text:"a&1234"', - errors: 0 + expected: 'text:"a&1234"' test_parser name: "broken named character references", \ html: "1&2&&3&aabbcc;", - expected: 'text:"1&2&&3&aabbcc;"', - errors: 2 + expected: 'text:"1&2&&3&aabbcc;"' test_parser name: "numbered entity overrides", \ html: "1€€ ƒ", - expected: 'text:"1€€ ƒ"', - errors: 0 + expected: 'text:"1€€ ƒ"' test_parser name: "open tag", \ html: "foobar", - expected: 'text:"foo",tag:"span",{},[text:"bar"]', - errors: 1 # no close tag + expected: 'text:"foo",tag:"span",{},[text:"bar"]' test_parser name: "open tag with attributes", \ html: "foobar", - expected: 'text:"foo",tag:"span",{"style":"foo: bar","title":"hi"},[text:"bar"]', - errors: 1 # no close tag + expected: 'text:"foo",tag:"span",{"style":"foo: bar","title":"hi"},[text:"bar"]' test_parser name: "open tag with attributes of various quotings", \ html: "foobar", - expected: 'text:"foo",tag:"span",{"abc":"def","g":"hij","klm":"nopqrstuv\\"","autofocus":""},[text:"bar"]', - errors: 1 # no close tag + expected: 'text:"foo",tag:"span",{"abc":"def","g":"hij","klm":"nopqrstuv\\"","autofocus":""},[text:"bar"]' test_parser name: "attribute entity exceptions dq", \ html: "foobar", - expected: 'text:"foo",tag:"a",{"href":"foo?t=1&=2&o=3<=foo"},[text:"bar"]', - errors: 2 # no close tag, &= in attr + expected: 'text:"foo",tag:"a",{"href":"foo?t=1&=2&o=3<=foo"},[text:"bar"]' test_parser name: "attribute entity exceptions sq", \ html: "foobar", - expected: 'text:"foo",tag:"a",{"href":"foo?t=1&=2&o=3<=foo"},[text:"bar"]', - errors: 2 # no close tag, &= in attr + expected: 'text:"foo",tag:"a",{"href":"foo?t=1&=2&o=3<=foo"},[text:"bar"]' test_parser name: "attribute entity exceptions uq", \ html: "foobar", - expected: 'text:"foo",tag:"a",{"href":"foo?t=1&=2&o=3<=foo"},[text:"bar"]', - errors: 2 # no close tag, &= in attr + expected: 'text:"foo",tag:"a",{"href":"foo?t=1&=2&o=3<=foo"},[text:"bar"]' test_parser name: "matching closing tags", \ html: "foohi
1
foo
2
bar", - expected: 'text:"foo",tag:"a",{"href":"hi"},[text:"hi"],tag:"div",{},[text:"1",tag:"div",{},[text:"foo"],text:"2"],text:"bar"', - errors: 0 + expected: 'text:"foo",tag:"a",{"href":"hi"},[text:"hi"],tag:"div",{},[text:"1",tag:"div",{},[text:"foo"],text:"2"],text:"bar"' test_parser name: "missing closing tag inside", \ html: "foo
barbaz
qux", - expected: 'text:"foo",tag:"div",{},[text:"bar",tag:"span",{},[text:"baz"]],text:"qux"', - errors: 1 # close tag mismatch + expected: 'text:"foo",tag:"div",{},[text:"bar",tag:"span",{},[text:"baz"]],text:"qux"' test_parser name: "mis-matched closing tags", \ html: "12
3456
78", - expected: 'tag:"span",{},[text:"12",tag:"div",{},[text:"3456"],text:"78"]', - errors: 2 # misplaced
, no
at the end + expected: 'tag:"span",{},[text:"12",tag:"div",{},[text:"3456"],text:"78"]' test_parser name: "mis-matched formatting elements", \ html: "1234567890", - expected: 'text:"12",tag:"b",{},[text:"34",tag:"i",{},[text:"56"]],tag:"i",{},[text:"78"],text:"90"', - errors: 1 # no idea how many their should be + expected: 'text:"12",tag:"b",{},[text:"34",tag:"i",{},[text:"56"]],tag:"i",{},[text:"78"],text:"90"' test_parser name: "8.2.8.1 Misnested tags: ", \ html: '

12345

', - expected: 'tag:"p",{},[text:"1",tag:"b",{},[text:"2",tag:"i",{},[text:"3"]],tag:"i",{},[text:"4"],text:"5"]', - errors: 1 + expected: 'tag:"p",{},[text:"1",tag:"b",{},[text:"2",tag:"i",{},[text:"3"]],tag:"i",{},[text:"4"],text:"5"]' test_parser name: "8.2.8.2 Misnested tags:

", \ html: '1

23

', - expected: 'tag:"b",{},[text:"1"],tag:"p",{},[tag:"b",{},[text:"2"],text:"3"]', - errors: 1 + expected: 'tag:"b",{},[text:"1"],tag:"p",{},[tag:"b",{},[text:"2"],text:"3"]' test_parser name: "crazy formatting elements test", \ html: "second
first
", # chrome does this: expected: 'tag:"b",{},[tag:"i",{},[tag:"a",{},[tag:"s",{},[tag:"tt",{},[]]],text:"second"]],tag:"a",{},[tag:"s",{},[tag:"tt",{},[tag:"div",{},[tag:"b",{},[],text:"first"]]]]' # firefox does this: - expected: 'tag:"b",{},[tag:"i",{},[tag:"a",{},[tag:"s",{},[tag:"tt",{},[]]]]],tag:"a",{},[tag:"s",{},[tag:"tt",{},[tag:"div",{},[tag:"b",{},[],text:"first"]]]],text:"second"', - errors: 6 # no idea how many there should be + expected: 'tag:"b",{},[tag:"i",{},[tag:"a",{},[tag:"s",{},[tag:"tt",{},[]]]]],tag:"a",{},[tag:"s",{},[tag:"tt",{},[tag:"div",{},[tag:"b",{},[],text:"first"]]]],text:"second"' # tests from https://github.com/html5lib/html5lib-tests/blob/master/tree-construction/adoption01.dat test_parser name: "html5lib aaa 1", \ html: '

', - expected: 'tag:"a",{},[],tag:"p",{},[tag:"a",{},[]]', - errors: 2 + expected: 'tag:"a",{},[],tag:"p",{},[tag:"a",{},[]]' test_parser name: "html5lib aaa 2", \ html: '1

23

', - expected: 'tag:"a",{},[text:"1"],tag:"p",{},[tag:"a",{},[text:"2"],text:"3"]', - errors: 2 + expected: 'tag:"a",{},[text:"1"],tag:"p",{},[tag:"a",{},[text:"2"],text:"3"]' test_parser name: "html5lib aaa 3", \ html: '1', - expected: 'tag:"a",{},[text:"1"],tag:"button",{},[tag:"a",{},[text:"2"],text:"3"]', - errors: 2 + expected: 'tag:"a",{},[text:"1"],tag:"button",{},[tag:"a",{},[text:"2"],text:"3"]' test_parser name: "html5lib aaa 4", \ html: '123', - expected: 'tag:"a",{},[text:"1",tag:"b",{},[text:"2"]],tag:"b",{},[text:"3"]', - errors: 2 + expected: 'tag:"a",{},[text:"1",tag:"b",{},[text:"2"]],tag:"b",{},[text:"3"]' test_parser name: "html5lib aaa 5 (two divs deep)", \ html: '1
2
34
5
', - expected: 'tag:"a",{},[text:"1"],tag:"div",{},[tag:"a",{},[text:"2"],tag:"div",{},[tag:"a",{},[text:"3"],text:"4"],text:"5"]', - errors: 3 + expected: 'tag:"a",{},[text:"1"],tag:"div",{},[tag:"a",{},[text:"2"],tag:"div",{},[tag:"a",{},[text:"3"],text:"4"],text:"5"]' test_parser name: "html5lib aaa 6 (foster parenting)", \ html: '1

23

', - expected: 'tag:"a",{},[text:"1"],tag:"p",{},[tag:"a",{},[text:"2"],text:"3"],tag:"table",{},[]', - errors: 10 + expected: 'tag:"a",{},[text:"1"],tag:"p",{},[tag:"a",{},[text:"2"],text:"3"],tag:"table",{},[]' +test_parser name: "html5lib aaa 10 (formatting, nesting, attrs, aaa)", \ + html: '

123

45', + expected: 'tag:"p",{},[text:"1",tag:"s",{"id":"A"},[text:"2",tag:"b",{"id":"B"},[text:"3"]]],tag:"s",{"id":"A"},[tag:"b",{"id":"B"},[text:"4"]],tag:"b",{"id":"B"},[text:"5"]' test_parser name: "html5lib aaa 11 (table with foster parenting, formatting el and td)", \ html: '
13
2
', - expected: 'tag:"a",{},[text:"1"],tag:"a",{},[text:"3"],tag:"table",{},[tag:"tbody",{},[tag:"tr",{},[tag:"td",{},[text:"2"]]]]', - errors: 10 + expected: 'tag:"a",{},[text:"1"],tag:"a",{},[text:"3"],tag:"table",{},[tag:"tbody",{},[tag:"tr",{},[tag:"td",{},[text:"2"]]]]' -- 1.7.10.4