JasonWoof Got questions, comments, patches, etc.? Contact Jason Woofenden
switch to tests from html5lib-tests/tree-construction
[peach-html5-editor.git] / parse-html.coffee
index c6ed9a5..385a2a0 100644 (file)
 #   0: a "end of the list", "current node", "bottommost", "last"
 
 
+# browser
+# note: to get this to run outside a browser, you'll have to write a native
+# implementation of decode_named_char_ref()
+unless module?.exports?
+       window.wheic = {}
+       module = exports: window.wheic
 
 # Each node is an obect of the Node class. Here are the Node types:
 TYPE_TAG = 0 # name, {attributes}, [children]
@@ -2776,15 +2782,6 @@ parse_html = (txt, parse_error_cb = null) ->
                        # fixfull parse error if has self-closing flag, but it wasn't acknolwedged
        return doc.children
 
-test_results = passed: 0, failed: 0
-# everything below is tests on the above
-test_equals = (description, output, expected_output) ->
-       if output is expected_output
-               console.log "passed." # don't say name, so smart consoles can merge all of these
-       else
-               console.log "FAILED: \"#{description}\""
-               console.log "   Expected: #{expected_output}"
-               console.log "     Actual: #{output}"
 serialize_els = (els, shallow, show_ids) ->
        serialized = ''
        sep = ''
@@ -2793,205 +2790,12 @@ serialize_els = (els, shallow, show_ids) ->
                sep = ','
                serialized += t.serialize shallow, show_ids
        return serialized
-test_parser = (args) ->
-       debug_log_reset()
-       parse_errors = []
-       errors_cb = (i) ->
-               parse_errors.push i
-       prev_node_id = 0 # reset counter
-       parsed = parse_html args.html, errors_cb
-       serialized = serialize_els parsed, false, false
-       expected = 'tag:"html",{},[tag:"head",{},[],tag:"body",{},[' + args.expected + ']]'
-       if serialized isnt expected
-               debug_log_each (str) ->
-                       console.log str
-               console.log "FAILED: \"#{args.name}\""
-               console.log "      Input: #{args.html}"
-               console.log "    Correct: #{expected}"
-               console.log "     Output: #{serialized}"
-               if parse_errors.length > 0
-                       console.log " parse errs: #{JSON.stringify parse_errors}"
-               else
-                       console.log "   No parse errors"
-               test_results.failed += 1
-       else
-               #console.log "passed \"#{args.name}\""
-               test_results.passed += 1
-test_summary = ->
-       console.log "Tests passed: #{test_results.passed}"
-       console.log "Tests Failed: #{test_results.failed}"
-
-test_parser name: "empty", \
-       html: "",
-       expected: ''
-test_parser name: "just text", \
-       html: "abc",
-       expected: 'text:"abc"'
-test_parser name: "named entity", \
-       html: "a&1234",
-       expected: 'text:"a&1234"'
-test_parser name: "broken named character references", \
-       html: "1&amp2&&3&aabbcc;",
-       expected: 'text:"1&2&&3&aabbcc;"'
-test_parser name: "numbered entity overrides", \
-       html: "1&#X80€ &#x83",
-       expected: 'text:"1€€ ƒ"'
-test_parser name: "open tag", \
-       html: "foo<span>bar",
-       expected: 'text:"foo",tag:"span",{},[text:"bar"]'
-test_parser name: "open tag with attributes", \
-       html: "foo<span style=\"foo: bar\" title=\"hi\">bar",
-       expected: 'text:"foo",tag:"span",{"style":"foo: bar","title":"hi"},[text:"bar"]'
-test_parser name: "open tag with attributes of various quotings", \
-       html: "foo<span abc=\"def\" g=hij klm='nopqrstuv\"' autofocus>bar",
-       expected: 'text:"foo",tag:"span",{"abc":"def","autofocus":"","g":"hij","klm":"nopqrstuv\\""},[text:"bar"]'
-test_parser name: "attribute entity exceptions dq", \
-       html: "foo<a href=\"foo?t=1&amp=2&ampo=3&amp;lt=foo\">bar",
-       expected: 'text:"foo",tag:"a",{"href":"foo?t=1&amp=2&ampo=3&lt=foo"},[text:"bar"]'
-test_parser name: "attribute entity exceptions sq", \
-       html: "foo<a href='foo?t=1&amp=2&ampo=3&amp;lt=foo'>bar",
-       expected: 'text:"foo",tag:"a",{"href":"foo?t=1&amp=2&ampo=3&lt=foo"},[text:"bar"]'
-test_parser name: "attribute entity exceptions uq", \
-       html: "foo<a href=foo?t=1&amp=2&ampo=3&amp;lt=foo>bar",
-       expected: 'text:"foo",tag:"a",{"href":"foo?t=1&amp=2&ampo=3&lt=foo"},[text:"bar"]'
-test_parser name: "matching closing tags", \
-       html: "foo<a href=\"hi\">hi</a><div>1<div>foo</div>2</div>bar",
-       expected: 'text:"foo",tag:"a",{"href":"hi"},[text:"hi"],tag:"div",{},[text:"1",tag:"div",{},[text:"foo"],text:"2"],text:"bar"'
-test_parser name: "missing closing tag inside", \
-       html: "foo<div>bar<span>baz</div>qux",
-       expected: 'text:"foo",tag:"div",{},[text:"bar",tag:"span",{},[text:"baz"]],text:"qux"'
-test_parser name: "mis-matched closing tags", \
-       html: "<span>12<div>34</span>56</div>78",
-       expected: 'tag:"span",{},[text:"12",tag:"div",{},[text:"3456"],text:"78"]'
-test_parser name: "mis-matched formatting elements", \
-       html: "12<b>34<i>56</b>78</i>90",
-       expected: 'text:"12",tag:"b",{},[text:"34",tag:"i",{},[text:"56"]],tag:"i",{},[text:"78"],text:"90"'
-test_parser name: "8.2.8.1 Misnested tags: <b><i></b></i>", \
-       html: '<p>1<b>2<i>3</b>4</i>5</p>',
-       expected: 'tag:"p",{},[text:"1",tag:"b",{},[text:"2",tag:"i",{},[text:"3"]],tag:"i",{},[text:"4"],text:"5"]'
-test_parser name: "8.2.8.2 Misnested tags: <b><p></b></p>", \
-       html: '<b>1<p>2</b>3</p>',
-       expected: 'tag:"b",{},[text:"1"],tag:"p",{},[tag:"b",{},[text:"2"],text:"3"]'
-test_parser name: "crazy formatting elements test", \
-       html: "<b><i><a><s><tt><div></b>first</b></div></tt></s></a>second</i>",
-       # chrome does this: expected: 'tag:"b",{},[tag:"i",{},[tag:"a",{},[tag:"s",{},[tag:"tt",{},[]]],text:"second"]],tag:"a",{},[tag:"s",{},[tag:"tt",{},[tag:"div",{},[tag:"b",{},[],text:"first"]]]]'
-       # firefox does this:
-       expected: 'tag:"b",{},[tag:"i",{},[tag:"a",{},[tag:"s",{},[tag:"tt",{},[]]]]],tag:"a",{},[tag:"s",{},[tag:"tt",{},[tag:"div",{},[tag:"b",{},[],text:"first"]]]],text:"second"'
-# tests from https://github.com/html5lib/html5lib-tests/blob/master/tree-construction/adoption01.dat
-test_parser name: "html5lib aaa 1", \
-       html: '<a><p></a></p>',
-       expected: 'tag:"a",{},[],tag:"p",{},[tag:"a",{},[]]'
-test_parser name: "html5lib aaa 2", \
-       html: '<a>1<p>2</a>3</p>',
-       expected: 'tag:"a",{},[text:"1"],tag:"p",{},[tag:"a",{},[text:"2"],text:"3"]'
-test_parser name: "html5lib aaa 3", \
-       html: '<a>1<button>2</a>3</button>',
-       expected: 'tag:"a",{},[text:"1"],tag:"button",{},[tag:"a",{},[text:"2"],text:"3"]'
-test_parser name: "html5lib aaa 4", \
-       html: '<a>1<b>2</a>3</b>',
-       expected: 'tag:"a",{},[text:"1",tag:"b",{},[text:"2"]],tag:"b",{},[text:"3"]'
-test_parser name: "html5lib aaa 5 (two divs deep)", \
-       html: '<a>1<div>2<div>3</a>4</div>5</div>',
-       expected: 'tag:"a",{},[text:"1"],tag:"div",{},[tag:"a",{},[text:"2"],tag:"div",{},[tag:"a",{},[text:"3"],text:"4"],text:"5"]'
-test_parser name: "html5lib aaa 6 (foster parenting)", \
-       html: '<table><a>1<p>2</a>3</p>',
-       expected: 'tag:"a",{},[text:"1"],tag:"p",{},[tag:"a",{},[text:"2"],text:"3"],tag:"table",{},[]'
-test_parser name: "html5lib aaa 7 (aaa, eof) 1", \
-       html: '<b><b><a><p></a>',
-       expected: 'tag:"b",{},[tag:"b",{},[tag:"a",{},[],tag:"p",{},[tag:"a",{},[]]]]'
-test_parser name: "html5lib aaa 8 (aaa, eof) 2", \
-       html: '<b><a><b><p></a>',
-       expected: 'tag:"b",{},[tag:"a",{},[tag:"b",{},[]],tag:"b",{},[tag:"p",{},[tag:"a",{},[]]]]'
-test_parser name: "html5lib aaa 9 (aaa, eof) 3", \
-       html: '<a><b><b><p></a>',
-       expected: 'tag:"a",{},[tag:"b",{},[tag:"b",{},[]]],tag:"b",{},[tag:"b",{},[tag:"p",{},[tag:"a",{},[]]]]'
-test_parser name: "html5lib aaa 10 (formatting, nesting, attrs, aaa)", \
-       html: '<p>1<s id="A">2<b id="B">3</p>4</s>5</b>',
-       expected: 'tag:"p",{},[text:"1",tag:"s",{"id":"A"},[text:"2",tag:"b",{"id":"B"},[text:"3"]]],tag:"s",{"id":"A"},[tag:"b",{"id":"B"},[text:"4"]],tag:"b",{"id":"B"},[text:"5"]'
-test_parser name: "html5lib aaa 11 (table with foster parenting, formatting el and td)", \
-       html: '<table><a>1<td>2</td>3</table>',
-       expected: 'tag:"a",{},[text:"1"],tag:"a",{},[text:"3"],tag:"table",{},[tag:"tbody",{},[tag:"tr",{},[tag:"td",{},[text:"2"]]]]'
-test_parser name: "html5lib aaa 12 (table with foster parenting, split text)", \
-       html: '<table>A<td>B</td>C</table>',
-       expected: 'text:"AC",tag:"table",{},[tag:"tbody",{},[tag:"tr",{},[tag:"td",{},[text:"B"]]]]'
-# TODO implement svg and namespacing
-#test_parser name: "html5lib aaa 13 (svg tr input)", \
-#      html: '<a><svg><tr><input></a>',
-#      expected: 'tag:"a",{},[svg:"svg",{},[svg:"tr",{},[svg:"input"]]]'
-test_parser name: "html5lib aaa 14 (deep ?outer aaa)", \
-       html: '<div><a><b><div><div><div><div><div><div><div><div><div><div></a>',
-       expected: 'tag:"div",{},[tag:"a",{},[tag:"b",{},[]],tag:"b",{},[tag:"div",{},[tag:"a",{},[],tag:"div",{},[tag:"a",{},[],tag:"div",{},[tag:"a",{},[],tag:"div",{},[tag:"a",{},[],tag:"div",{},[tag:"a",{},[],tag:"div",{},[tag:"a",{},[],tag:"div",{},[tag:"a",{},[],tag:"div",{},[tag:"a",{},[tag:"div",{},[tag:"div",{},[]]]]]]]]]]]]]'
-test_parser name: "html5lib aaa 15 (deep ?inner aaa)", \
-       html: '<div><a><b><u><i><code><div></a>',
-       expected: 'tag:"div",{},[tag:"a",{},[tag:"b",{},[tag:"u",{},[tag:"i",{},[tag:"code",{},[]]]]],tag:"u",{},[tag:"i",{},[tag:"code",{},[tag:"div",{},[tag:"a",{},[]]]]]]'
-test_parser name: "html5lib aaa 16 (correctly nested 4b)", \
-       html: '<b><b><b><b>x</b></b></b></b>y',
-       expected: 'tag:"b",{},[tag:"b",{},[tag:"b",{},[tag:"b",{},[text:"x"]]]],text:"y"'
-test_parser name: "html5lib aaa 17 (formatting, implied /p, noah's ark)", \
-       html: '<p><b><b><b><b><p>x',
-       expected: 'tag:"p",{},[tag:"b",{},[tag:"b",{},[tag:"b",{},[tag:"b",{},[]]]]],tag:"p",{},[tag:"b",{},[tag:"b",{},[tag:"b",{},[text:"x"]]]]'
-test_parser name: "variation on html5lib aaa 17 (with attributes in various orders)", \
-       html: '<p><b c="d" e="f"><b e="f" c="d"><b e="f" c="d"><b c="d" e="f"><p>x',
-       expected: 'tag:"p",{},[tag:"b",{"c":"d","e":"f"},[tag:"b",{"c":"d","e":"f"},[tag:"b",{"c":"d","e":"f"},[tag:"b",{"c":"d","e":"f"},[]]]]],tag:"p",{},[tag:"b",{"c":"d","e":"f"},[tag:"b",{"c":"d","e":"f"},[tag:"b",{"c":"d","e":"f"},[text:"x"]]]]'
-test_parser name: "junk after attribute close-quote", \
-       html: '<p><b c="d", e="f">foo<p>x',
-       expected: 'tag:"p",{},[tag:"b",{",":"","c":"d","e":"f"},[text:"foo"]],tag:"p",{},[tag:"b",{",":"","c":"d","e":"f"},[text:"x"]]'
-test_parser name: "html5lib aaa02 1", \
-       html: '<b>1<i>2<p>3</b>4',
-       expected: 'tag:"b",{},[text:"1",tag:"i",{},[text:"2"]],tag:"i",{},[tag:"p",{},[tag:"b",{},[text:"3"],text:"4"]]'
-test_parser name: "html5lib aaa02 2", \
-       html: '<a><div><style></style><address><a>',
-       expected: 'tag:"a",{},[],tag:"div",{},[tag:"a",{},[tag:"style",{},[]],tag:"address",{},[tag:"a",{},[],tag:"a",{},[]]]'
-test_parser name: "html5lib tables 1", \
-       html: '<table><th>',
-       expected: 'tag:"table",{},[tag:"tbody",{},[tag:"tr",{},[tag:"th",{},[]]]]'
-test_parser name: "html5lib tables 2", \
-       html: '<table><td>',
-       expected: 'tag:"table",{},[tag:"tbody",{},[tag:"tr",{},[tag:"td",{},[]]]]'
-test_parser name: "html5lib tables 3", \
-       html: "<table><col foo='bar'>",
-       expected: 'tag:"table",{},[tag:"colgroup",{},[tag:"col",{"foo":"bar"},[]]]'
-test_parser name: "html5lib tables 4", \
-       html: '<table><colgroup></html>foo',
-       expected: 'text:"foo",tag:"table",{},[tag:"colgroup",{},[]]'
-test_parser name: "html5lib tables 5", \
-       html: '<table></table><p>foo',
-       expected: 'tag:"table",{},[],tag:"p",{},[text:"foo"]'
-test_parser name: "html5lib tables 6", \
-       html: '<table></body></caption></col></colgroup></html></tbody></td></tfoot></th></thead></tr><td>',
-       expected: 'tag:"table",{},[tag:"tbody",{},[tag:"tr",{},[tag:"td",{},[]]]]'
-test_parser name: "html5lib tables 7", \
-       html: '<table><select><option>3</select></table>',
-       expected: 'tag:"select",{},[tag:"option",{},[text:"3"]],tag:"table",{},[]'
-test_parser name: "html5lib tables 8", \
-       html: '<table><select><table></table></select></table>',
-       expected: 'tag:"select",{},[],tag:"table",{},[],tag:"table",{},[]'
-test_parser name: "html5lib tables 9", \
-       html: '<table><select></table>',
-       expected: 'tag:"select",{},[],tag:"table",{},[]'
-test_parser name: "html5lib tables 10", \
-       html: '<table><select><option>A<tr><td>B</td></tr></table>',
-       expected: 'tag:"select",{},[tag:"option",{},[text:"A"]],tag:"table",{},[tag:"tbody",{},[tag:"tr",{},[tag:"td",{},[text:"B"]]]]'
-test_parser name: "html5lib tables 11", \
-       html: '<table><td></body></caption></col></colgroup></html>foo',
-       expected: 'tag:"table",{},[tag:"tbody",{},[tag:"tr",{},[tag:"td",{},[text:"foo"]]]]'
-test_parser name: "html5lib tables 12", \
-       html: '<table><td>A</table>B',
-       expected: 'tag:"table",{},[tag:"tbody",{},[tag:"tr",{},[tag:"td",{},[text:"A"]]]],text:"B"'
-test_parser name: "html5lib tables 13", \
-       html: '<table><tr><caption>',
-       expected: 'tag:"table",{},[tag:"tbody",{},[tag:"tr",{},[]],tag:"caption",{},[]]'
-test_parser name: "html5lib tables 14", \
-       html: '<table><tr></body></caption></col></colgroup></html></td></th><td>foo',
-       expected: 'tag:"table",{},[tag:"tbody",{},[tag:"tr",{},[tag:"td",{},[text:"foo"]]]]'
-test_parser name: "html5lib tables 15", \
-       html: '<table><td><tr>',
-       expected: 'tag:"table",{},[tag:"tbody",{},[tag:"tr",{},[tag:"td",{},[]],tag:"tr",{},[]]]'
-test_parser name: "html5lib tables 16", \
-       html: '<table><td><button><td>',
-       expected: 'tag:"table",{},[tag:"tbody",{},[tag:"tr",{},[tag:"td",{},[tag:"button",{},[]],tag:"td",{},[]]]]'
-# TODO implement svg parsing
-#test_parser name: "html5lib tables 17", \
-#      html: '<table><tr><td><svg><desc><td>',
-#      expected: 'tag:"table",{},[tag:"tbody",{},[tag:"tr",{},[tag:"td",{},[svg:"svg",{},[svg:"desc",{},[]]],tag:"td",{},[]]]]'
-test_summary()
+
+# TODO export TYPE_*
+module.exports.parse_html = parse_html
+module.exports.debug_log_reset = debug_log_reset
+module.exports.debug_log_each = debug_log_each
+module.exports.TYPE_TAG = TYPE_TAG
+module.exports.TYPE_TEXT = TYPE_TEXT
+module.exports.TYPE_COMMENT = TYPE_COMMENT
+module.exports.TYPE_DOCTYPE = TYPE_DOCTYPE