tree_insert_element el
afe[i] = el
break if i is 0
- i -= 1
+ i -= 1 # Advance
# http://www.w3.org/TR/html5/syntax.html#adoption-agency-algorithm
# adoption agency algorithm
# http://www.w3.org/TR/html5/syntax.html#misnested-tags:-b-p-/b-/p
# http://www.w3.org/TR/html5/syntax.html#unclosed-formatting-elements
adoption_agency = (subject) ->
+ debug_log "adoption_agency()"
+ debug_log "tree: #{serialize_els tree.children, false, true}"
+ debug_log "open_els: #{serialize_els open_els, true, true}"
+ debug_log "afe: #{serialize_els afe, true, true}"
if open_els[0].name is subject
el = open_els[0]
open_els.shift()
if t is el
afe.splice i, 1
break
+ debug_log "aaa: starting off with subject on top of stack, exiting"
return
outer = 0
loop
# If there is no such element, then abort these steps and instead
# act as described in the "any other end tag" entry above.
if fe is null
+ debug_log "aaa: fe not found in afe"
in_body_any_other_end_tag subject
return
# 6. If formatting element is not in the stack of open elements,
in_open_els = true
break
unless in_open_els
+ debug_log "aaa: fe not found in open_els"
parse_error()
# "remove it from the list" must mean afe, since it's not in open_els
afe.splice fe_of_afe, 1
# the element is not in scope, then this is a parse error; abort
# these steps.
unless el_is_in_scope fe
+ debug_log "aaa: fe not in scope"
parse_error()
return
# 8. If formatting element is not the current node, this is a parse
# formatting element from the list of active formatting elements,
# and finally abort these steps.
if fb is null
+ debug_log "aaa: no fb"
loop
t = open_els.shift()
if t is fe
break
node = node_next ? node_above
debug_log "inner loop #{inner}"
- debug_log "open_els: #{serialize_els open_els, true, true}"
debug_log "tree: #{serialize_els tree.children, false, true}"
+ debug_log "open_els: #{serialize_els open_els, true, true}"
debug_log "afe: #{serialize_els afe, true, true}"
debug_log "ca: #{ca.name}##{ca.id} children: #{serialize_els ca.children, true, true}"
debug_log "fe: #{fe.name}##{fe.id} children: #{serialize_els fe.children, true, true}"
if open_els[0].name isnt 'p'
parse_error()
while open_els.length > 1 # just in case
- t = open_els.shift()
- if t.name is 'p'
+ el = open_els.shift()
+ if el.name is 'p'
return
close_p_if_in_button_scope = ->
if is_in_button_scope 'p'
# http://www.w3.org/TR/html5/syntax.html#insert-a-character
tree_insert_text = (t) ->
dest = adjusted_insertion_location()
+ # fixfull check for Document node
if dest[1] > 0
prev = dest[0].children[dest[1] - 1]
if prev.type is TYPE_TEXT
# 8.2.5.3 http://www.w3.org/TR/html5/syntax.html#closing-elements-that-have-implied-end-tags
# http://www.w3.org/TR/html5/syntax.html#generate-implied-end-tags
generate_implied_end_tags = (except = null) ->
- while end_tag_implied[open_els[0]] and open_els[0].name isnt except
+ while end_tag_implied[open_els[0].name] and open_els[0].name isnt except
open_els.shift()
# 8.2.5.4 http://www.w3.org/TR/html5/syntax.html#parsing-main-inbody
if el is found
open_els.splice i, 1
reconstruct_active_formatting_elements()
- el = tree_insert_element t
+ el = insert_html_element t
afe.unshift el
when 'b', 'big', 'code', 'em', 'font', 'i', 's', 'small', 'strike', 'strong', 'tt', 'u'
reconstruct_active_formatting_elements()
- el = tree_insert_element t
+ el = insert_html_element t
afe.unshift el
when 'table'
# fixfull quirksmode thing
# TODO lots more to implement here
else # any other start tag
reconstruct_active_formatting_elements()
- tree_insert_element t
+ insert_html_element t
when TYPE_EOF
ok_tags = {
dd: true, dt: true, li: true, p: true, tbody: true, td: true,
unless is_in_button_scope 'p'
parse_error()
insert_html_element new_open_tag 'p'
- close_p_element()
+ close_p_element()
# TODO lots more close tags to implement here
when 'a', 'b', 'big', 'code', 'em', 'font', 'i', 'nobr', 's', 'small', 'strike', 'strong', 'tt', 'u'
adoption_agency t.name
prev_node_id = 0 # reset counter
parsed = parse_html args.html, errors_cb
serialized = serialize_els parsed, false, false
- if serialized isnt args.expected # or parse_errors.length isnt args.errors
+ if serialized isnt args.expected
debug_log_each (str) ->
console.log str
console.log "FAILED: \"#{args.name}\""
- else
- console.log "passed \"#{args.name}\""
- if serialized isnt args.expected
console.log " Input: #{args.html}"
console.log " Correct: #{args.expected}"
console.log " Output: #{serialized}"
- if parse_errors.length isnt args.errors
- console.log " Expected #{args.errors} parse errors, but got these: #{JSON.stringify parse_errors}"
+ if parse_errors.length > 0
+ console.log " parse errs: #{JSON.stringify parse_errors}"
+ else
+ console.log " No parse errors"
+ else
+ console.log "passed \"#{args.name}\""
test_parser name: "empty", \
html: "",
- expected: '',
- errors: 0
+ expected: ''
test_parser name: "just text", \
html: "abc",
- expected: 'text:"abc"',
- errors: 0
+ expected: 'text:"abc"'
test_parser name: "named entity", \
html: "a&1234",
- expected: 'text:"a&1234"',
- errors: 0
+ expected: 'text:"a&1234"'
test_parser name: "broken named character references", \
html: "1&2&&3&aabbcc;",
- expected: 'text:"1&2&&3&aabbcc;"',
- errors: 2
+ expected: 'text:"1&2&&3&aabbcc;"'
test_parser name: "numbered entity overrides", \
html: "1€€ ƒ",
- expected: 'text:"1€€ ƒ"',
- errors: 0
+ expected: 'text:"1€€ ƒ"'
test_parser name: "open tag", \
html: "foo<span>bar",
- expected: 'text:"foo",tag:"span",{},[text:"bar"]',
- errors: 1 # no close tag
+ expected: 'text:"foo",tag:"span",{},[text:"bar"]'
test_parser name: "open tag with attributes", \
html: "foo<span style=\"foo: bar\" title=\"hi\">bar",
- expected: 'text:"foo",tag:"span",{"style":"foo: bar","title":"hi"},[text:"bar"]',
- errors: 1 # no close tag
+ expected: 'text:"foo",tag:"span",{"style":"foo: bar","title":"hi"},[text:"bar"]'
test_parser name: "open tag with attributes of various quotings", \
html: "foo<span abc=\"def\" g=hij klm='nopqrstuv\"' autofocus>bar",
- expected: 'text:"foo",tag:"span",{"abc":"def","g":"hij","klm":"nopqrstuv\\"","autofocus":""},[text:"bar"]',
- errors: 1 # no close tag
+ expected: 'text:"foo",tag:"span",{"abc":"def","g":"hij","klm":"nopqrstuv\\"","autofocus":""},[text:"bar"]'
test_parser name: "attribute entity exceptions dq", \
html: "foo<a href=\"foo?t=1&=2&o=3&lt=foo\">bar",
- expected: 'text:"foo",tag:"a",{"href":"foo?t=1&=2&o=3<=foo"},[text:"bar"]',
- errors: 2 # no close tag, &= in attr
+ expected: 'text:"foo",tag:"a",{"href":"foo?t=1&=2&o=3<=foo"},[text:"bar"]'
test_parser name: "attribute entity exceptions sq", \
html: "foo<a href='foo?t=1&=2&o=3&lt=foo'>bar",
- expected: 'text:"foo",tag:"a",{"href":"foo?t=1&=2&o=3<=foo"},[text:"bar"]',
- errors: 2 # no close tag, &= in attr
+ expected: 'text:"foo",tag:"a",{"href":"foo?t=1&=2&o=3<=foo"},[text:"bar"]'
test_parser name: "attribute entity exceptions uq", \
html: "foo<a href=foo?t=1&=2&o=3&lt=foo>bar",
- expected: 'text:"foo",tag:"a",{"href":"foo?t=1&=2&o=3<=foo"},[text:"bar"]',
- errors: 2 # no close tag, &= in attr
+ expected: 'text:"foo",tag:"a",{"href":"foo?t=1&=2&o=3<=foo"},[text:"bar"]'
test_parser name: "matching closing tags", \
html: "foo<a href=\"hi\">hi</a><div>1<div>foo</div>2</div>bar",
- expected: 'text:"foo",tag:"a",{"href":"hi"},[text:"hi"],tag:"div",{},[text:"1",tag:"div",{},[text:"foo"],text:"2"],text:"bar"',
- errors: 0
+ expected: 'text:"foo",tag:"a",{"href":"hi"},[text:"hi"],tag:"div",{},[text:"1",tag:"div",{},[text:"foo"],text:"2"],text:"bar"'
test_parser name: "missing closing tag inside", \
html: "foo<div>bar<span>baz</div>qux",
- expected: 'text:"foo",tag:"div",{},[text:"bar",tag:"span",{},[text:"baz"]],text:"qux"',
- errors: 1 # close tag mismatch
+ expected: 'text:"foo",tag:"div",{},[text:"bar",tag:"span",{},[text:"baz"]],text:"qux"'
test_parser name: "mis-matched closing tags", \
html: "<span>12<div>34</span>56</div>78",
- expected: 'tag:"span",{},[text:"12",tag:"div",{},[text:"3456"],text:"78"]',
- errors: 2 # misplaced </span>, no </span> at the end
+ expected: 'tag:"span",{},[text:"12",tag:"div",{},[text:"3456"],text:"78"]'
test_parser name: "mis-matched formatting elements", \
html: "12<b>34<i>56</b>78</i>90",
- expected: 'text:"12",tag:"b",{},[text:"34",tag:"i",{},[text:"56"]],tag:"i",{},[text:"78"],text:"90"',
- errors: 1 # no idea how many their should be
+ expected: 'text:"12",tag:"b",{},[text:"34",tag:"i",{},[text:"56"]],tag:"i",{},[text:"78"],text:"90"'
test_parser name: "8.2.8.1 Misnested tags: <b><i></b></i>", \
html: '<p>1<b>2<i>3</b>4</i>5</p>',
- expected: 'tag:"p",{},[text:"1",tag:"b",{},[text:"2",tag:"i",{},[text:"3"]],tag:"i",{},[text:"4"],text:"5"]',
- errors: 1
+ expected: 'tag:"p",{},[text:"1",tag:"b",{},[text:"2",tag:"i",{},[text:"3"]],tag:"i",{},[text:"4"],text:"5"]'
test_parser name: "8.2.8.2 Misnested tags: <b><p></b></p>", \
html: '<b>1<p>2</b>3</p>',
- expected: 'tag:"b",{},[text:"1"],tag:"p",{},[tag:"b",{},[text:"2"],text:"3"]',
- errors: 1
+ expected: 'tag:"b",{},[text:"1"],tag:"p",{},[tag:"b",{},[text:"2"],text:"3"]'
test_parser name: "crazy formatting elements test", \
html: "<b><i><a><s><tt><div></b>first</b></div></tt></s></a>second</i>",
# chrome does this: expected: 'tag:"b",{},[tag:"i",{},[tag:"a",{},[tag:"s",{},[tag:"tt",{},[]]],text:"second"]],tag:"a",{},[tag:"s",{},[tag:"tt",{},[tag:"div",{},[tag:"b",{},[],text:"first"]]]]'
# firefox does this:
- expected: 'tag:"b",{},[tag:"i",{},[tag:"a",{},[tag:"s",{},[tag:"tt",{},[]]]]],tag:"a",{},[tag:"s",{},[tag:"tt",{},[tag:"div",{},[tag:"b",{},[],text:"first"]]]],text:"second"',
- errors: 6 # no idea how many there should be
+ expected: 'tag:"b",{},[tag:"i",{},[tag:"a",{},[tag:"s",{},[tag:"tt",{},[]]]]],tag:"a",{},[tag:"s",{},[tag:"tt",{},[tag:"div",{},[tag:"b",{},[],text:"first"]]]],text:"second"'
# tests from https://github.com/html5lib/html5lib-tests/blob/master/tree-construction/adoption01.dat
test_parser name: "html5lib aaa 1", \
html: '<a><p></a></p>',
- expected: 'tag:"a",{},[],tag:"p",{},[tag:"a",{},[]]',
- errors: 2
+ expected: 'tag:"a",{},[],tag:"p",{},[tag:"a",{},[]]'
test_parser name: "html5lib aaa 2", \
html: '<a>1<p>2</a>3</p>',
- expected: 'tag:"a",{},[text:"1"],tag:"p",{},[tag:"a",{},[text:"2"],text:"3"]',
- errors: 2
+ expected: 'tag:"a",{},[text:"1"],tag:"p",{},[tag:"a",{},[text:"2"],text:"3"]'
test_parser name: "html5lib aaa 3", \
html: '<a>1<button>2</a>3</button>',
- expected: 'tag:"a",{},[text:"1"],tag:"button",{},[tag:"a",{},[text:"2"],text:"3"]',
- errors: 2
+ expected: 'tag:"a",{},[text:"1"],tag:"button",{},[tag:"a",{},[text:"2"],text:"3"]'
test_parser name: "html5lib aaa 4", \
html: '<a>1<b>2</a>3</b>',
- expected: 'tag:"a",{},[text:"1",tag:"b",{},[text:"2"]],tag:"b",{},[text:"3"]',
- errors: 2
+ expected: 'tag:"a",{},[text:"1",tag:"b",{},[text:"2"]],tag:"b",{},[text:"3"]'
test_parser name: "html5lib aaa 5 (two divs deep)", \
html: '<a>1<div>2<div>3</a>4</div>5</div>',
- expected: 'tag:"a",{},[text:"1"],tag:"div",{},[tag:"a",{},[text:"2"],tag:"div",{},[tag:"a",{},[text:"3"],text:"4"],text:"5"]',
- errors: 3
+ expected: 'tag:"a",{},[text:"1"],tag:"div",{},[tag:"a",{},[text:"2"],tag:"div",{},[tag:"a",{},[text:"3"],text:"4"],text:"5"]'
test_parser name: "html5lib aaa 6 (foster parenting)", \
html: '<table><a>1<p>2</a>3</p>',
- expected: 'tag:"a",{},[text:"1"],tag:"p",{},[tag:"a",{},[text:"2"],text:"3"],tag:"table",{},[]',
- errors: 10
+ expected: 'tag:"a",{},[text:"1"],tag:"p",{},[tag:"a",{},[text:"2"],text:"3"],tag:"table",{},[]'
+test_parser name: "html5lib aaa 10 (formatting, nesting, attrs, aaa)", \
+ html: '<p>1<s id="A">2<b id="B">3</p>4</s>5</b>',
+ expected: 'tag:"p",{},[text:"1",tag:"s",{"id":"A"},[text:"2",tag:"b",{"id":"B"},[text:"3"]]],tag:"s",{"id":"A"},[tag:"b",{"id":"B"},[text:"4"]],tag:"b",{"id":"B"},[text:"5"]'
test_parser name: "html5lib aaa 11 (table with foster parenting, formatting el and td)", \
html: '<table><a>1<td>2</td>3</table>',
- expected: 'tag:"a",{},[text:"1"],tag:"a",{},[text:"3"],tag:"table",{},[tag:"tbody",{},[tag:"tr",{},[tag:"td",{},[text:"2"]]]]',
- errors: 10
+ expected: 'tag:"a",{},[text:"1"],tag:"a",{},[text:"3"],tag:"table",{},[tag:"tbody",{},[tag:"tr",{},[tag:"td",{},[text:"2"]]]]'