From: Jason Woofenden
Date: Sat, 19 Dec 2015 14:39:46 +0000 (-0500)
Subject: fix implied_end_tags and
X-Git-Url: https://jasonwoof.com/gitweb/?a=commitdiff_plain;ds=inline;h=88b7a1d811b9abbd26f28f3f9c6090b3c82f9b92;p=peach-html5-editor.git
fix implied_end_tags and
---
diff --git a/parse-html.coffee b/parse-html.coffee
index c71567d..25fa20d 100644
--- a/parse-html.coffee
+++ b/parse-html.coffee
@@ -553,7 +553,7 @@ parse_html = (txt, parse_error_cb = null) ->
tree_insert_element el
afe[i] = el
break if i is 0
- i -= 1
+ i -= 1 # Advance
# http://www.w3.org/TR/html5/syntax.html#adoption-agency-algorithm
# adoption agency algorithm
@@ -562,6 +562,10 @@ parse_html = (txt, parse_error_cb = null) ->
# http://www.w3.org/TR/html5/syntax.html#misnested-tags:-b-p-/b-/p
# http://www.w3.org/TR/html5/syntax.html#unclosed-formatting-elements
adoption_agency = (subject) ->
+ debug_log "adoption_agency()"
+ debug_log "tree: #{serialize_els tree.children, false, true}"
+ debug_log "open_els: #{serialize_els open_els, true, true}"
+ debug_log "afe: #{serialize_els afe, true, true}"
if open_els[0].name is subject
el = open_els[0]
open_els.shift()
@@ -570,6 +574,7 @@ parse_html = (txt, parse_error_cb = null) ->
if t is el
afe.splice i, 1
break
+ debug_log "aaa: starting off with subject on top of stack, exiting"
return
outer = 0
loop
@@ -590,6 +595,7 @@ parse_html = (txt, parse_error_cb = null) ->
# If there is no such element, then abort these steps and instead
# act as described in the "any other end tag" entry above.
if fe is null
+ debug_log "aaa: fe not found in afe"
in_body_any_other_end_tag subject
return
# 6. If formatting element is not in the stack of open elements,
@@ -601,6 +607,7 @@ parse_html = (txt, parse_error_cb = null) ->
in_open_els = true
break
unless in_open_els
+ debug_log "aaa: fe not found in open_els"
parse_error()
# "remove it from the list" must mean afe, since it's not in open_els
afe.splice fe_of_afe, 1
@@ -609,6 +616,7 @@ parse_html = (txt, parse_error_cb = null) ->
# the element is not in scope, then this is a parse error; abort
# these steps.
unless el_is_in_scope fe
+ debug_log "aaa: fe not in scope"
parse_error()
return
# 8. If formatting element is not the current node, this is a parse
@@ -634,6 +642,7 @@ parse_html = (txt, parse_error_cb = null) ->
# formatting element from the list of active formatting elements,
# and finally abort these steps.
if fb is null
+ debug_log "aaa: no fb"
loop
t = open_els.shift()
if t is fe
@@ -666,8 +675,8 @@ parse_html = (txt, parse_error_cb = null) ->
break
node = node_next ? node_above
debug_log "inner loop #{inner}"
- debug_log "open_els: #{serialize_els open_els, true, true}"
debug_log "tree: #{serialize_els tree.children, false, true}"
+ debug_log "open_els: #{serialize_els open_els, true, true}"
debug_log "afe: #{serialize_els afe, true, true}"
debug_log "ca: #{ca.name}##{ca.id} children: #{serialize_els ca.children, true, true}"
debug_log "fe: #{fe.name}##{fe.id} children: #{serialize_els fe.children, true, true}"
@@ -845,8 +854,8 @@ parse_html = (txt, parse_error_cb = null) ->
if open_els[0].name isnt 'p'
parse_error()
while open_els.length > 1 # just in case
- t = open_els.shift()
- if t.name is 'p'
+ el = open_els.shift()
+ if el.name is 'p'
return
close_p_if_in_button_scope = ->
if is_in_button_scope 'p'
@@ -855,6 +864,7 @@ parse_html = (txt, parse_error_cb = null) ->
# http://www.w3.org/TR/html5/syntax.html#insert-a-character
tree_insert_text = (t) ->
dest = adjusted_insertion_location()
+ # fixfull check for Document node
if dest[1] > 0
prev = dest[0].children[dest[1] - 1]
if prev.type is TYPE_TEXT
@@ -1019,7 +1029,7 @@ parse_html = (txt, parse_error_cb = null) ->
# 8.2.5.3 http://www.w3.org/TR/html5/syntax.html#closing-elements-that-have-implied-end-tags
# http://www.w3.org/TR/html5/syntax.html#generate-implied-end-tags
generate_implied_end_tags = (except = null) ->
- while end_tag_implied[open_els[0]] and open_els[0].name isnt except
+ while end_tag_implied[open_els[0].name] and open_els[0].name isnt except
open_els.shift()
# 8.2.5.4 http://www.w3.org/TR/html5/syntax.html#parsing-main-inbody
@@ -1106,11 +1116,11 @@ parse_html = (txt, parse_error_cb = null) ->
if el is found
open_els.splice i, 1
reconstruct_active_formatting_elements()
- el = tree_insert_element t
+ el = insert_html_element t
afe.unshift el
when 'b', 'big', 'code', 'em', 'font', 'i', 's', 'small', 'strike', 'strong', 'tt', 'u'
reconstruct_active_formatting_elements()
- el = tree_insert_element t
+ el = insert_html_element t
afe.unshift el
when 'table'
# fixfull quirksmode thing
@@ -1120,7 +1130,7 @@ parse_html = (txt, parse_error_cb = null) ->
# TODO lots more to implement here
else # any other start tag
reconstruct_active_formatting_elements()
- tree_insert_element t
+ insert_html_element t
when TYPE_EOF
ok_tags = {
dd: true, dt: true, li: true, p: true, tbody: true, td: true,
@@ -1160,7 +1170,7 @@ parse_html = (txt, parse_error_cb = null) ->
unless is_in_button_scope 'p'
parse_error()
insert_html_element new_open_tag 'p'
- close_p_element()
+ close_p_element()
# TODO lots more close tags to implement here
when 'a', 'b', 'big', 'code', 'em', 'font', 'i', 'nobr', 's', 'small', 'strike', 'strong', 'tt', 'u'
adoption_agency t.name
@@ -1844,119 +1854,98 @@ test_parser = (args) ->
prev_node_id = 0 # reset counter
parsed = parse_html args.html, errors_cb
serialized = serialize_els parsed, false, false
- if serialized isnt args.expected # or parse_errors.length isnt args.errors
+ if serialized isnt args.expected
debug_log_each (str) ->
console.log str
console.log "FAILED: \"#{args.name}\""
- else
- console.log "passed \"#{args.name}\""
- if serialized isnt args.expected
console.log " Input: #{args.html}"
console.log " Correct: #{args.expected}"
console.log " Output: #{serialized}"
- if parse_errors.length isnt args.errors
- console.log " Expected #{args.errors} parse errors, but got these: #{JSON.stringify parse_errors}"
+ if parse_errors.length > 0
+ console.log " parse errs: #{JSON.stringify parse_errors}"
+ else
+ console.log " No parse errors"
+ else
+ console.log "passed \"#{args.name}\""
test_parser name: "empty", \
html: "",
- expected: '',
- errors: 0
+ expected: ''
test_parser name: "just text", \
html: "abc",
- expected: 'text:"abc"',
- errors: 0
+ expected: 'text:"abc"'
test_parser name: "named entity", \
html: "a&1234",
- expected: 'text:"a&1234"',
- errors: 0
+ expected: 'text:"a&1234"'
test_parser name: "broken named character references", \
html: "1&2&&3&aabbcc;",
- expected: 'text:"1&2&&3&aabbcc;"',
- errors: 2
+ expected: 'text:"1&2&&3&aabbcc;"'
test_parser name: "numbered entity overrides", \
html: "1 ",
- expected: 'text:"1â¬â¬ Æ"',
- errors: 0
+ expected: 'text:"1â¬â¬ Æ"'
test_parser name: "open tag", \
html: "foobar",
- expected: 'text:"foo",tag:"span",{},[text:"bar"]',
- errors: 1 # no close tag
+ expected: 'text:"foo",tag:"span",{},[text:"bar"]'
test_parser name: "open tag with attributes", \
html: "foobar",
- expected: 'text:"foo",tag:"span",{"style":"foo: bar","title":"hi"},[text:"bar"]',
- errors: 1 # no close tag
+ expected: 'text:"foo",tag:"span",{"style":"foo: bar","title":"hi"},[text:"bar"]'
test_parser name: "open tag with attributes of various quotings", \
html: "foobar",
- expected: 'text:"foo",tag:"span",{"abc":"def","g":"hij","klm":"nopqrstuv\\"","autofocus":""},[text:"bar"]',
- errors: 1 # no close tag
+ expected: 'text:"foo",tag:"span",{"abc":"def","g":"hij","klm":"nopqrstuv\\"","autofocus":""},[text:"bar"]'
test_parser name: "attribute entity exceptions dq", \
html: "foobar",
- expected: 'text:"foo",tag:"a",{"href":"foo?t=1&=2&o=3<=foo"},[text:"bar"]',
- errors: 2 # no close tag, &= in attr
+ expected: 'text:"foo",tag:"a",{"href":"foo?t=1&=2&o=3<=foo"},[text:"bar"]'
test_parser name: "attribute entity exceptions sq", \
html: "foo bar",
- expected: 'text:"foo",tag:"a",{"href":"foo?t=1&=2&o=3<=foo"},[text:"bar"]',
- errors: 2 # no close tag, &= in attr
+ expected: 'text:"foo",tag:"a",{"href":"foo?t=1&=2&o=3<=foo"},[text:"bar"]'
test_parser name: "attribute entity exceptions uq", \
html: "foo bar",
- expected: 'text:"foo",tag:"a",{"href":"foo?t=1&=2&o=3<=foo"},[text:"bar"]',
- errors: 2 # no close tag, &= in attr
+ expected: 'text:"foo",tag:"a",{"href":"foo?t=1&=2&o=3<=foo"},[text:"bar"]'
test_parser name: "matching closing tags", \
html: "foo hi bar",
- expected: 'text:"foo",tag:"a",{"href":"hi"},[text:"hi"],tag:"div",{},[text:"1",tag:"div",{},[text:"foo"],text:"2"],text:"bar"',
- errors: 0
+ expected: 'text:"foo",tag:"a",{"href":"hi"},[text:"hi"],tag:"div",{},[text:"1",tag:"div",{},[text:"foo"],text:"2"],text:"bar"'
test_parser name: "missing closing tag inside", \
html: "foobarbaz
qux",
- expected: 'text:"foo",tag:"div",{},[text:"bar",tag:"span",{},[text:"baz"]],text:"qux"',
- errors: 1 # close tag mismatch
+ expected: 'text:"foo",tag:"div",{},[text:"bar",tag:"span",{},[text:"baz"]],text:"qux"'
test_parser name: "mis-matched closing tags", \
html: "123456
78",
- expected: 'tag:"span",{},[text:"12",tag:"div",{},[text:"3456"],text:"78"]',
- errors: 2 # misplaced , no at the end
+ expected: 'tag:"span",{},[text:"12",tag:"div",{},[text:"3456"],text:"78"]'
test_parser name: "mis-matched formatting elements", \
html: "123456 7890",
- expected: 'text:"12",tag:"b",{},[text:"34",tag:"i",{},[text:"56"]],tag:"i",{},[text:"78"],text:"90"',
- errors: 1 # no idea how many their should be
+ expected: 'text:"12",tag:"b",{},[text:"34",tag:"i",{},[text:"56"]],tag:"i",{},[text:"78"],text:"90"'
test_parser name: "8.2.8.1 Misnested tags: ", \
html: '123 45
',
- expected: 'tag:"p",{},[text:"1",tag:"b",{},[text:"2",tag:"i",{},[text:"3"]],tag:"i",{},[text:"4"],text:"5"]',
- errors: 1
+ expected: 'tag:"p",{},[text:"1",tag:"b",{},[text:"2",tag:"i",{},[text:"3"]],tag:"i",{},[text:"4"],text:"5"]'
test_parser name: "8.2.8.2 Misnested tags:
", \
html: '1 23
',
- expected: 'tag:"b",{},[text:"1"],tag:"p",{},[tag:"b",{},[text:"2"],text:"3"]',
- errors: 1
+ expected: 'tag:"b",{},[text:"1"],tag:"p",{},[tag:"b",{},[text:"2"],text:"3"]'
test_parser name: "crazy formatting elements test", \
html: "first
second ",
# chrome does this: expected: 'tag:"b",{},[tag:"i",{},[tag:"a",{},[tag:"s",{},[tag:"tt",{},[]]],text:"second"]],tag:"a",{},[tag:"s",{},[tag:"tt",{},[tag:"div",{},[tag:"b",{},[],text:"first"]]]]'
# firefox does this:
- expected: 'tag:"b",{},[tag:"i",{},[tag:"a",{},[tag:"s",{},[tag:"tt",{},[]]]]],tag:"a",{},[tag:"s",{},[tag:"tt",{},[tag:"div",{},[tag:"b",{},[],text:"first"]]]],text:"second"',
- errors: 6 # no idea how many there should be
+ expected: 'tag:"b",{},[tag:"i",{},[tag:"a",{},[tag:"s",{},[tag:"tt",{},[]]]]],tag:"a",{},[tag:"s",{},[tag:"tt",{},[tag:"div",{},[tag:"b",{},[],text:"first"]]]],text:"second"'
# tests from https://github.com/html5lib/html5lib-tests/blob/master/tree-construction/adoption01.dat
test_parser name: "html5lib aaa 1", \
html: '
',
- expected: 'tag:"a",{},[],tag:"p",{},[tag:"a",{},[]]',
- errors: 2
+ expected: 'tag:"a",{},[],tag:"p",{},[tag:"a",{},[]]'
test_parser name: "html5lib aaa 2", \
html: '12
3',
- expected: 'tag:"a",{},[text:"1"],tag:"p",{},[tag:"a",{},[text:"2"],text:"3"]',
- errors: 2
+ expected: 'tag:"a",{},[text:"1"],tag:"p",{},[tag:"a",{},[text:"2"],text:"3"]'
test_parser name: "html5lib aaa 3", \
html: '12 3',
- expected: 'tag:"a",{},[text:"1"],tag:"button",{},[tag:"a",{},[text:"2"],text:"3"]',
- errors: 2
+ expected: 'tag:"a",{},[text:"1"],tag:"button",{},[tag:"a",{},[text:"2"],text:"3"]'
test_parser name: "html5lib aaa 4", \
html: '12 3 ',
- expected: 'tag:"a",{},[text:"1",tag:"b",{},[text:"2"]],tag:"b",{},[text:"3"]',
- errors: 2
+ expected: 'tag:"a",{},[text:"1",tag:"b",{},[text:"2"]],tag:"b",{},[text:"3"]'
test_parser name: "html5lib aaa 5 (two divs deep)", \
html: '1',
- expected: 'tag:"a",{},[text:"1"],tag:"div",{},[tag:"a",{},[text:"2"],tag:"div",{},[tag:"a",{},[text:"3"],text:"4"],text:"5"]',
- errors: 3
+ expected: 'tag:"a",{},[text:"1"],tag:"div",{},[tag:"a",{},[text:"2"],tag:"div",{},[tag:"a",{},[text:"3"],text:"4"],text:"5"]'
test_parser name: "html5lib aaa 6 (foster parenting)", \
html: ' 12
3',
- expected: 'tag:"a",{},[text:"1"],tag:"p",{},[tag:"a",{},[text:"2"],text:"3"],tag:"table",{},[]',
- errors: 10
+ expected: 'tag:"a",{},[text:"1"],tag:"p",{},[tag:"a",{},[text:"2"],text:"3"],tag:"table",{},[]'
+test_parser name: "html5lib aaa 10 (formatting, nesting, attrs, aaa)", \
+ html: '123
45',
+ expected: 'tag:"p",{},[text:"1",tag:"s",{"id":"A"},[text:"2",tag:"b",{"id":"B"},[text:"3"]]],tag:"s",{"id":"A"},[tag:"b",{"id":"B"},[text:"4"]],tag:"b",{"id":"B"},[text:"5"]'
test_parser name: "html5lib aaa 11 (table with foster parenting, formatting el and td)", \
html: '',
- expected: 'tag:"a",{},[text:"1"],tag:"a",{},[text:"3"],tag:"table",{},[tag:"tbody",{},[tag:"tr",{},[tag:"td",{},[text:"2"]]]]',
- errors: 10
+ expected: 'tag:"a",{},[text:"1"],tag:"a",{},[text:"3"],tag:"table",{},[tag:"tbody",{},[tag:"tr",{},[tag:"td",{},[text:"2"]]]]'