From 7f7d1a5bb2b882566b4c8f398d4bf02d6fcf0c6d Mon Sep 17 00:00:00 2001 From: Jason Woofenden Date: Mon, 21 Dec 2015 22:48:43 -0500 Subject: [PATCH] switch to tests from html5lib-tests/tree-construction --- Makefile | 4 +- index.html | 1 + parse-html.coffee | 226 +- test.coffee | 7858 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 7876 insertions(+), 213 deletions(-) create mode 100644 test.coffee diff --git a/Makefile b/Makefile index 24c826c..2e57917 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -all: parse-html.js +all: parse-html.js test.js -parse-html.js: parse-html.coffee +%.js: %.coffee coffee -c $< diff --git a/index.html b/index.html index f27ccb2..5750538 100644 --- a/index.html +++ b/index.html @@ -3,6 +3,7 @@ html parser tester +

check the console for test results

diff --git a/parse-html.coffee b/parse-html.coffee index c6ed9a5..385a2a0 100644 --- a/parse-html.coffee +++ b/parse-html.coffee @@ -47,6 +47,12 @@ # 0: a "end of the list", "current node", "bottommost", "last" +# browser +# note: to get this to run outside a browser, you'll have to write a native +# implementation of decode_named_char_ref() +unless module?.exports? + window.wheic = {} + module = exports: window.wheic # Each node is an obect of the Node class. Here are the Node types: TYPE_TAG = 0 # name, {attributes}, [children] @@ -2776,15 +2782,6 @@ parse_html = (txt, parse_error_cb = null) -> # fixfull parse error if has self-closing flag, but it wasn't acknolwedged return doc.children -test_results = passed: 0, failed: 0 -# everything below is tests on the above -test_equals = (description, output, expected_output) -> - if output is expected_output - console.log "passed." # don't say name, so smart consoles can merge all of these - else - console.log "FAILED: \"#{description}\"" - console.log " Expected: #{expected_output}" - console.log " Actual: #{output}" serialize_els = (els, shallow, show_ids) -> serialized = '' sep = '' @@ -2793,205 +2790,12 @@ serialize_els = (els, shallow, show_ids) -> sep = ',' serialized += t.serialize shallow, show_ids return serialized -test_parser = (args) -> - debug_log_reset() - parse_errors = [] - errors_cb = (i) -> - parse_errors.push i - prev_node_id = 0 # reset counter - parsed = parse_html args.html, errors_cb - serialized = serialize_els parsed, false, false - expected = 'tag:"html",{},[tag:"head",{},[],tag:"body",{},[' + args.expected + ']]' - if serialized isnt expected - debug_log_each (str) -> - console.log str - console.log "FAILED: \"#{args.name}\"" - console.log " Input: #{args.html}" - console.log " Correct: #{expected}" - console.log " Output: #{serialized}" - if parse_errors.length > 0 - console.log " parse errs: #{JSON.stringify parse_errors}" - else - console.log " No parse errors" - test_results.failed += 1 - else - #console.log "passed \"#{args.name}\"" - test_results.passed += 1 -test_summary = -> - console.log "Tests passed: #{test_results.passed}" - console.log "Tests Failed: #{test_results.failed}" - -test_parser name: "empty", \ - html: "", - expected: '' -test_parser name: "just text", \ - html: "abc", - expected: 'text:"abc"' -test_parser name: "named entity", \ - html: "a&1234", - expected: 'text:"a&1234"' -test_parser name: "broken named character references", \ - html: "1&2&&3&aabbcc;", - expected: 'text:"1&2&&3&aabbcc;"' -test_parser name: "numbered entity overrides", \ - html: "1€€ ƒ", - expected: 'text:"1€€ ƒ"' -test_parser name: "open tag", \ - html: "foobar", - expected: 'text:"foo",tag:"span",{},[text:"bar"]' -test_parser name: "open tag with attributes", \ - html: "foobar", - expected: 'text:"foo",tag:"span",{"style":"foo: bar","title":"hi"},[text:"bar"]' -test_parser name: "open tag with attributes of various quotings", \ - html: "foobar", - expected: 'text:"foo",tag:"span",{"abc":"def","autofocus":"","g":"hij","klm":"nopqrstuv\\""},[text:"bar"]' -test_parser name: "attribute entity exceptions dq", \ - html: "foobar", - expected: 'text:"foo",tag:"a",{"href":"foo?t=1&=2&o=3<=foo"},[text:"bar"]' -test_parser name: "attribute entity exceptions sq", \ - html: "foobar", - expected: 'text:"foo",tag:"a",{"href":"foo?t=1&=2&o=3<=foo"},[text:"bar"]' -test_parser name: "attribute entity exceptions uq", \ - html: "foobar", - expected: 'text:"foo",tag:"a",{"href":"foo?t=1&=2&o=3<=foo"},[text:"bar"]' -test_parser name: "matching closing tags", \ - html: "foohi
1
foo
2
bar", - expected: 'text:"foo",tag:"a",{"href":"hi"},[text:"hi"],tag:"div",{},[text:"1",tag:"div",{},[text:"foo"],text:"2"],text:"bar"' -test_parser name: "missing closing tag inside", \ - html: "foo
barbaz
qux", - expected: 'text:"foo",tag:"div",{},[text:"bar",tag:"span",{},[text:"baz"]],text:"qux"' -test_parser name: "mis-matched closing tags", \ - html: "12
3456
78", - expected: 'tag:"span",{},[text:"12",tag:"div",{},[text:"3456"],text:"78"]' -test_parser name: "mis-matched formatting elements", \ - html: "1234567890", - expected: 'text:"12",tag:"b",{},[text:"34",tag:"i",{},[text:"56"]],tag:"i",{},[text:"78"],text:"90"' -test_parser name: "8.2.8.1 Misnested tags: ", \ - html: '

12345

', - expected: 'tag:"p",{},[text:"1",tag:"b",{},[text:"2",tag:"i",{},[text:"3"]],tag:"i",{},[text:"4"],text:"5"]' -test_parser name: "8.2.8.2 Misnested tags:

", \ - html: '1

23

', - expected: 'tag:"b",{},[text:"1"],tag:"p",{},[tag:"b",{},[text:"2"],text:"3"]' -test_parser name: "crazy formatting elements test", \ - html: "second
first
", - # chrome does this: expected: 'tag:"b",{},[tag:"i",{},[tag:"a",{},[tag:"s",{},[tag:"tt",{},[]]],text:"second"]],tag:"a",{},[tag:"s",{},[tag:"tt",{},[tag:"div",{},[tag:"b",{},[],text:"first"]]]]' - # firefox does this: - expected: 'tag:"b",{},[tag:"i",{},[tag:"a",{},[tag:"s",{},[tag:"tt",{},[]]]]],tag:"a",{},[tag:"s",{},[tag:"tt",{},[tag:"div",{},[tag:"b",{},[],text:"first"]]]],text:"second"' -# tests from https://github.com/html5lib/html5lib-tests/blob/master/tree-construction/adoption01.dat -test_parser name: "html5lib aaa 1", \ - html: '

', - expected: 'tag:"a",{},[],tag:"p",{},[tag:"a",{},[]]' -test_parser name: "html5lib aaa 2", \ - html: '1

23

', - expected: 'tag:"a",{},[text:"1"],tag:"p",{},[tag:"a",{},[text:"2"],text:"3"]' -test_parser name: "html5lib aaa 3", \ - html: '1', - expected: 'tag:"a",{},[text:"1"],tag:"button",{},[tag:"a",{},[text:"2"],text:"3"]' -test_parser name: "html5lib aaa 4", \ - html: '123', - expected: 'tag:"a",{},[text:"1",tag:"b",{},[text:"2"]],tag:"b",{},[text:"3"]' -test_parser name: "html5lib aaa 5 (two divs deep)", \ - html: '1
2
34
5
', - expected: 'tag:"a",{},[text:"1"],tag:"div",{},[tag:"a",{},[text:"2"],tag:"div",{},[tag:"a",{},[text:"3"],text:"4"],text:"5"]' -test_parser name: "html5lib aaa 6 (foster parenting)", \ - html: '1

23

', - expected: 'tag:"a",{},[text:"1"],tag:"p",{},[tag:"a",{},[text:"2"],text:"3"],tag:"table",{},[]' -test_parser name: "html5lib aaa 7 (aaa, eof) 1", \ - html: '

', - expected: 'tag:"b",{},[tag:"b",{},[tag:"a",{},[],tag:"p",{},[tag:"a",{},[]]]]' -test_parser name: "html5lib aaa 8 (aaa, eof) 2", \ - html: '

', - expected: 'tag:"b",{},[tag:"a",{},[tag:"b",{},[]],tag:"b",{},[tag:"p",{},[tag:"a",{},[]]]]' -test_parser name: "html5lib aaa 9 (aaa, eof) 3", \ - html: '

', - expected: 'tag:"a",{},[tag:"b",{},[tag:"b",{},[]]],tag:"b",{},[tag:"b",{},[tag:"p",{},[tag:"a",{},[]]]]' -test_parser name: "html5lib aaa 10 (formatting, nesting, attrs, aaa)", \ - html: '

123

45', - expected: 'tag:"p",{},[text:"1",tag:"s",{"id":"A"},[text:"2",tag:"b",{"id":"B"},[text:"3"]]],tag:"s",{"id":"A"},[tag:"b",{"id":"B"},[text:"4"]],tag:"b",{"id":"B"},[text:"5"]' -test_parser name: "html5lib aaa 11 (table with foster parenting, formatting el and td)", \ - html: '
13
2
', - expected: 'tag:"a",{},[text:"1"],tag:"a",{},[text:"3"],tag:"table",{},[tag:"tbody",{},[tag:"tr",{},[tag:"td",{},[text:"2"]]]]' -test_parser name: "html5lib aaa 12 (table with foster parenting, split text)", \ - html: 'AC
B
', - expected: 'text:"AC",tag:"table",{},[tag:"tbody",{},[tag:"tr",{},[tag:"td",{},[text:"B"]]]]' -# TODO implement svg and namespacing -#test_parser name: "html5lib aaa 13 (svg tr input)", \ -# html: '
', -# expected: 'tag:"a",{},[svg:"svg",{},[svg:"tr",{},[svg:"input"]]]' -test_parser name: "html5lib aaa 14 (deep ?outer aaa)", \ - html: '
', - expected: 'tag:"div",{},[tag:"a",{},[tag:"b",{},[]],tag:"b",{},[tag:"div",{},[tag:"a",{},[],tag:"div",{},[tag:"a",{},[],tag:"div",{},[tag:"a",{},[],tag:"div",{},[tag:"a",{},[],tag:"div",{},[tag:"a",{},[],tag:"div",{},[tag:"a",{},[],tag:"div",{},[tag:"a",{},[],tag:"div",{},[tag:"a",{},[tag:"div",{},[tag:"div",{},[]]]]]]]]]]]]]' -test_parser name: "html5lib aaa 15 (deep ?inner aaa)", \ - html: '
', - expected: 'tag:"div",{},[tag:"a",{},[tag:"b",{},[tag:"u",{},[tag:"i",{},[tag:"code",{},[]]]]],tag:"u",{},[tag:"i",{},[tag:"code",{},[tag:"div",{},[tag:"a",{},[]]]]]]' -test_parser name: "html5lib aaa 16 (correctly nested 4b)", \ - html: 'xy', - expected: 'tag:"b",{},[tag:"b",{},[tag:"b",{},[tag:"b",{},[text:"x"]]]],text:"y"' -test_parser name: "html5lib aaa 17 (formatting, implied /p, noah's ark)", \ - html: '

x', - expected: 'tag:"p",{},[tag:"b",{},[tag:"b",{},[tag:"b",{},[tag:"b",{},[]]]]],tag:"p",{},[tag:"b",{},[tag:"b",{},[tag:"b",{},[text:"x"]]]]' -test_parser name: "variation on html5lib aaa 17 (with attributes in various orders)", \ - html: '

x', - expected: 'tag:"p",{},[tag:"b",{"c":"d","e":"f"},[tag:"b",{"c":"d","e":"f"},[tag:"b",{"c":"d","e":"f"},[tag:"b",{"c":"d","e":"f"},[]]]]],tag:"p",{},[tag:"b",{"c":"d","e":"f"},[tag:"b",{"c":"d","e":"f"},[tag:"b",{"c":"d","e":"f"},[text:"x"]]]]' -test_parser name: "junk after attribute close-quote", \ - html: '

foo

x', - expected: 'tag:"p",{},[tag:"b",{",":"","c":"d","e":"f"},[text:"foo"]],tag:"p",{},[tag:"b",{",":"","c":"d","e":"f"},[text:"x"]]' -test_parser name: "html5lib aaa02 1", \ - html: '12

34', - expected: 'tag:"b",{},[text:"1",tag:"i",{},[text:"2"]],tag:"i",{},[tag:"p",{},[tag:"b",{},[text:"3"],text:"4"]]' -test_parser name: "html5lib aaa02 2", \ - html: '

', - expected: 'tag:"a",{},[],tag:"div",{},[tag:"a",{},[tag:"style",{},[]],tag:"address",{},[tag:"a",{},[],tag:"a",{},[]]]' -test_parser name: "html5lib tables 1", \ - html: '
', - expected: 'tag:"table",{},[tag:"tbody",{},[tag:"tr",{},[tag:"th",{},[]]]]' -test_parser name: "html5lib tables 2", \ - html: '
', - expected: 'tag:"table",{},[tag:"tbody",{},[tag:"tr",{},[tag:"td",{},[]]]]' -test_parser name: "html5lib tables 3", \ - html: "", - expected: 'tag:"table",{},[tag:"colgroup",{},[tag:"col",{"foo":"bar"},[]]]' -test_parser name: "html5lib tables 4", \ - html: '
foo', - expected: 'text:"foo",tag:"table",{},[tag:"colgroup",{},[]]' -test_parser name: "html5lib tables 5", \ - html: '

foo', - expected: 'tag:"table",{},[],tag:"p",{},[text:"foo"]' -test_parser name: "html5lib tables 6", \ - html: '
', - expected: 'tag:"table",{},[tag:"tbody",{},[tag:"tr",{},[tag:"td",{},[]]]]' -test_parser name: "html5lib tables 7", \ - html: '
', - expected: 'tag:"select",{},[tag:"option",{},[text:"3"]],tag:"table",{},[]' -test_parser name: "html5lib tables 8", \ - html: '
', - expected: 'tag:"select",{},[],tag:"table",{},[],tag:"table",{},[]' -test_parser name: "html5lib tables 9", \ - html: '
', - expected: 'tag:"select",{},[],tag:"table",{},[]' -test_parser name: "html5lib tables 10", \ - html: '
B
', - expected: 'tag:"select",{},[tag:"option",{},[text:"A"]],tag:"table",{},[tag:"tbody",{},[tag:"tr",{},[tag:"td",{},[text:"B"]]]]' -test_parser name: "html5lib tables 11", \ - html: '
foo', - expected: 'tag:"table",{},[tag:"tbody",{},[tag:"tr",{},[tag:"td",{},[text:"foo"]]]]' -test_parser name: "html5lib tables 12", \ - html: '
A
B', - expected: 'tag:"table",{},[tag:"tbody",{},[tag:"tr",{},[tag:"td",{},[text:"A"]]]],text:"B"' -test_parser name: "html5lib tables 13", \ - html: '
', - expected: 'tag:"table",{},[tag:"tbody",{},[tag:"tr",{},[]],tag:"caption",{},[]]' -test_parser name: "html5lib tables 14", \ - html: '
foo', - expected: 'tag:"table",{},[tag:"tbody",{},[tag:"tr",{},[tag:"td",{},[text:"foo"]]]]' -test_parser name: "html5lib tables 15", \ - html: '', - expected: 'tag:"table",{},[tag:"tbody",{},[tag:"tr",{},[tag:"td",{},[]],tag:"tr",{},[]]]' -test_parser name: "html5lib tables 16", \ - html: '
', - expected: 'tag:"table",{},[tag:"tbody",{},[tag:"tr",{},[tag:"td",{},[tag:"button",{},[]],tag:"td",{},[]]]]' -# TODO implement svg parsing -#test_parser name: "html5lib tables 17", \ -# html: '
', -# expected: 'tag:"table",{},[tag:"tbody",{},[tag:"tr",{},[tag:"td",{},[svg:"svg",{},[svg:"desc",{},[]]],tag:"td",{},[]]]]' -test_summary() + +# TODO export TYPE_* +module.exports.parse_html = parse_html +module.exports.debug_log_reset = debug_log_reset +module.exports.debug_log_each = debug_log_each +module.exports.TYPE_TAG = TYPE_TAG +module.exports.TYPE_TEXT = TYPE_TEXT +module.exports.TYPE_COMMENT = TYPE_COMMENT +module.exports.TYPE_DOCTYPE = TYPE_DOCTYPE diff --git a/test.coffee b/test.coffee new file mode 100644 index 0000000..1c45b68 --- /dev/null +++ b/test.coffee @@ -0,0 +1,7858 @@ +# Copyright (c) 2006-2015 Jason Woofenden, James Graham, Geoffrey Sneddon, and +# other contributors +# +# The values in the following data structure were extracted from the project at +# https://github.com/html5lib/html5lib-tests which has the following notice: +# +# Copyright (c) 2006-2013 James Graham, Geoffrey Sneddon, and +# other contributors +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# +# The rest of this file was written by Jason Woofenden in 2015, and is released +# under the terms of the CC0 license: +# http://creativecommons.org/publicdomain/zero/1.0/ and into the public domain + +tests = [ + { + name: "adoption01.dat #1" + html: "

" + errors: 2 + expected: "| \n| \n| \n| \n|

\n| \n" + }, { + name: "adoption01.dat #2" + html: "1

23

" + errors: 2 + expected: "| \n| \n| \n| \n| \"1\"\n|

\n| \n| \"2\"\n| \"3\"\n" + }, { + name: "adoption01.dat #3" + html: "1" + errors: 2 + expected: "| \n| \n| \n| \n| \"1\"\n|