JasonWoof Got questions, comments, patches, etc.? Contact Jason Woofenden
test for correct number of parse errors
authorJason Woofenden <jason@jasonwoof.com>
Tue, 15 Dec 2015 06:11:36 +0000 (01:11 -0500)
committerJason Woofenden <jason@jasonwoof.com>
Tue, 15 Dec 2015 06:11:36 +0000 (01:11 -0500)
parse-html.coffee

index 6344839..927cb95 100644 (file)
@@ -189,7 +189,7 @@ decode_named_char_ref = (txt) ->
        return null if decoded is txt
        return g_dncr.cache[txt] = decoded
 
-parse_html = (txt) ->
+parse_html = (txt, parse_error_cb = null) ->
        cur = 0 # index of next char in txt to be parsed
        # declare tree and tokenizer variables so they're in scope below
        tree = null
@@ -201,7 +201,10 @@ parse_html = (txt) ->
        flag_parsing = null
 
        parse_error = ->
-               console.log "Parse error at character #{cur} of #{txt.length}"
+               if parse_error_cb?
+                       parse_error_cb cur
+               else
+                       console.log "Parse error at character #{cur} of #{txt.length}"
 
 
        # the functions below impliment the Tree Contstruction algorithm
@@ -709,26 +712,79 @@ parse_html = (txt) ->
        return tree[3]
 
 # everything below is tests on the above
-test_equals = (description, fn, args..., expected_output) ->
-       output = fn.apply this, args
+test_equals = (description, output, expected_output) ->
        if output is expected_output
-               console.log "passed: #{description}."
+               console.log "passed." # don't say name, so smart consoles can merge all of these
        else
-               console.log "FAILED: #{description}..."
+               console.log "FAILED: \"#{description}\""
                console.log "   Expected: #{expected_output}"
                console.log "     Actual: #{output}"
-html_to_json = (html) ->
-       return JSON.stringify parse_html html
-test_equals "empty", html_to_json, "", '[]'
-test_equals "just text", html_to_json, "abc", '[[1,"abc"]]'
-test_equals "named entity", html_to_json, "a&amp;1234", '[[1,"a&1234"]]'
-test_equals "broken named character references", html_to_json, "1&amp2&&amp;3&aabbcc;", '[[1,"1&2&&3&aabbcc;"]]'
-test_equals "numbered entity overrides", html_to_json, "1&#X80&#x80; &#x83", '[[1,"1€€ ƒ"]]'
-test_equals "open tag", html_to_json, "foo<span>bar", '[[1,"foo"],[0,"span",{},[[1,"bar"]]]]'
-test_equals "open tag with attributes", html_to_json, "foo<span style=\"foo: bar\" title=\"hi\">bar", '[[1,"foo"],[0,"span",{"style":"foo: bar","title":"hi"},[[1,"bar"]]]]'
-test_equals "open tag with attributes of various quotings", html_to_json, "foo<span abc=\"def\" g=hij klm='nopqrstuv\"' autofocus>bar", '[[1,"foo"],[0,"span",{"abc":"def","g":"hij","klm":"nopqrstuv\\\"","autofocus":""},[[1,"bar"]]]]'
-test_equals "attribute entity exceptions dq", html_to_json, "foo<a href=\"foo?t=1&amp=2&ampo=3&amp;lt=foo\">bar", '[[1,"foo"],[0,"a",{"href":"foo?t=1&amp=2&ampo=3&lt=foo"},[[1,"bar"]]]]'
-test_equals "attribute entity exceptions sq", html_to_json, "foo<a href='foo?t=1&amp=2&ampo=3&amp;lt=foo'>bar", '[[1,"foo"],[0,"a",{"href":"foo?t=1&amp=2&ampo=3&lt=foo"},[[1,"bar"]]]]'
-test_equals "attribute entity exceptions uq", html_to_json, "foo<a href=foo?t=1&amp=2&ampo=3&amp;lt=foo>bar", '[[1,"foo"],[0,"a",{"href":"foo?t=1&amp=2&ampo=3&lt=foo"},[[1,"bar"]]]]'
-test_equals "matching closing tags", html_to_json, "foo<a href=\"hi\">hi</a><div>1<div>foo</div>2</div>bar", '[[1,"foo"],[0,"a",{"href":"hi"},[[1,"hi"]]],[0,"div",{},[[1,"1"],[0,"div",{},[[1,"foo"]]],[1,"2"]]],[1,"bar"]]'
-test_equals "mis-matched closing tags", html_to_json, "foo<div>bar<span>baz</div>qux", '[[1,"foo"],[0,"div",{},[[1,"bar"],[0,"span",{},[[1,"baz"]]]]],[1,"qux"]]'
+test_parser = (args) ->
+       parse_errors = []
+       errors_cb = (i) ->
+               parse_errors.push i
+       parsed = parse_html args.html, errors_cb
+       parsed = JSON.stringify parsed
+       if parsed isnt args.expected or parse_errors.length isnt args.errors
+               console.log "test FAILED: \"#{args.name}\""
+       else
+               console.log 'test passed'
+       if parsed isnt args.expected
+               console.log "      Input: #{args.html}"
+               console.log "    Correct: #{args.expected}"
+               console.log "     Output: #{parsed}"
+       if parse_errors.length isnt args.errors
+               console.log "   Expected #{args.errors} parse errors, but got these: #{JSON.stringify parse_errors}"
+
+test_parser name: "empty", \
+       html: "",
+       expected: '[]',
+       errors: 0
+test_parser name: "just text", \
+       html: "abc",
+       expected: '[[1,"abc"]]',
+       errors: 0
+test_parser name: "named entity", \
+       html: "a&amp;1234",
+       expected: '[[1,"a&1234"]]',
+       errors: 0
+test_parser name: "broken named character references", \
+       html: "1&amp2&&amp;3&aabbcc;",
+       expected: '[[1,"1&2&&3&aabbcc;"]]',
+       errors: 2
+test_parser name: "numbered entity overrides", \
+       html: "1&#X80&#x80; &#x83",
+       expected: '[[1,"1€€ ƒ"]]',
+       errors: 0
+test_parser name: "open tag", \
+       html: "foo<span>bar",
+       expected: '[[1,"foo"],[0,"span",{},[[1,"bar"]]]]',
+       errors: 1 # no close tag
+test_parser name: "open tag with attributes", \
+       html: "foo<span style=\"foo: bar\" title=\"hi\">bar",
+       expected: '[[1,"foo"],[0,"span",{"style":"foo: bar","title":"hi"},[[1,"bar"]]]]',
+       errors: 1 # no close tag
+test_parser name: "open tag with attributes of various quotings", \
+       html: "foo<span abc=\"def\" g=hij klm='nopqrstuv\"' autofocus>bar",
+       expected: '[[1,"foo"],[0,"span",{"abc":"def","g":"hij","klm":"nopqrstuv\\\"","autofocus":""},[[1,"bar"]]]]',
+       errors: 1 # no close tag
+test_parser name: "attribute entity exceptions dq", \
+       html: "foo<a href=\"foo?t=1&amp=2&ampo=3&amp;lt=foo\">bar",
+       expected: '[[1,"foo"],[0,"a",{"href":"foo?t=1&amp=2&ampo=3&lt=foo"},[[1,"bar"]]]]',
+       errors: 2 # no close tag, &amp= in attr
+test_parser name: "attribute entity exceptions sq", \
+       html: "foo<a href='foo?t=1&amp=2&ampo=3&amp;lt=foo'>bar",
+       expected: '[[1,"foo"],[0,"a",{"href":"foo?t=1&amp=2&ampo=3&lt=foo"},[[1,"bar"]]]]',
+       errors: 2 # no close tag, &amp= in attr
+test_parser name: "attribute entity exceptions uq", \
+       html: "foo<a href=foo?t=1&amp=2&ampo=3&amp;lt=foo>bar",
+       expected: '[[1,"foo"],[0,"a",{"href":"foo?t=1&amp=2&ampo=3&lt=foo"},[[1,"bar"]]]]',
+       errors: 2 # no close tag, &amp= in attr
+test_parser name: "matching closing tags", \
+       html: "foo<a href=\"hi\">hi</a><div>1<div>foo</div>2</div>bar",
+       expected: '[[1,"foo"],[0,"a",{"href":"hi"},[[1,"hi"]]],[0,"div",{},[[1,"1"],[0,"div",{},[[1,"foo"]]],[1,"2"]]],[1,"bar"]]',
+       errors: 0
+test_parser name: "mis-matched closing tags", \
+       html: "foo<div>bar<span>baz</div>qux",
+       expected: '[[1,"foo"],[0,"div",{},[[1,"bar"],[0,"span",{},[[1,"baz"]]]]],[1,"qux"]]',
+       errors: 1 # close tag mismatch