return null if decoded is txt
return g_dncr.cache[txt] = decoded
-parse_html = (txt) ->
+parse_html = (txt, parse_error_cb = null) ->
cur = 0 # index of next char in txt to be parsed
# declare tree and tokenizer variables so they're in scope below
tree = null
flag_parsing = null
parse_error = ->
- console.log "Parse error at character #{cur} of #{txt.length}"
+ if parse_error_cb?
+ parse_error_cb cur
+ else
+ console.log "Parse error at character #{cur} of #{txt.length}"
# the functions below impliment the Tree Contstruction algorithm
return tree[3]
# everything below is tests on the above
-test_equals = (description, fn, args..., expected_output) ->
- output = fn.apply this, args
+test_equals = (description, output, expected_output) ->
if output is expected_output
- console.log "passed: #{description}."
+ console.log "passed." # don't say name, so smart consoles can merge all of these
else
- console.log "FAILED: #{description}..."
+ console.log "FAILED: \"#{description}\""
console.log " Expected: #{expected_output}"
console.log " Actual: #{output}"
-html_to_json = (html) ->
- return JSON.stringify parse_html html
-test_equals "empty", html_to_json, "", '[]'
-test_equals "just text", html_to_json, "abc", '[[1,"abc"]]'
-test_equals "named entity", html_to_json, "a&1234", '[[1,"a&1234"]]'
-test_equals "broken named character references", html_to_json, "1&2&&3&aabbcc;", '[[1,"1&2&&3&aabbcc;"]]'
-test_equals "numbered entity overrides", html_to_json, "1€€ ƒ", '[[1,"1€€ ƒ"]]'
-test_equals "open tag", html_to_json, "foo<span>bar", '[[1,"foo"],[0,"span",{},[[1,"bar"]]]]'
-test_equals "open tag with attributes", html_to_json, "foo<span style=\"foo: bar\" title=\"hi\">bar", '[[1,"foo"],[0,"span",{"style":"foo: bar","title":"hi"},[[1,"bar"]]]]'
-test_equals "open tag with attributes of various quotings", html_to_json, "foo<span abc=\"def\" g=hij klm='nopqrstuv\"' autofocus>bar", '[[1,"foo"],[0,"span",{"abc":"def","g":"hij","klm":"nopqrstuv\\\"","autofocus":""},[[1,"bar"]]]]'
-test_equals "attribute entity exceptions dq", html_to_json, "foo<a href=\"foo?t=1&=2&o=3&lt=foo\">bar", '[[1,"foo"],[0,"a",{"href":"foo?t=1&=2&o=3<=foo"},[[1,"bar"]]]]'
-test_equals "attribute entity exceptions sq", html_to_json, "foo<a href='foo?t=1&=2&o=3&lt=foo'>bar", '[[1,"foo"],[0,"a",{"href":"foo?t=1&=2&o=3<=foo"},[[1,"bar"]]]]'
-test_equals "attribute entity exceptions uq", html_to_json, "foo<a href=foo?t=1&=2&o=3&lt=foo>bar", '[[1,"foo"],[0,"a",{"href":"foo?t=1&=2&o=3<=foo"},[[1,"bar"]]]]'
-test_equals "matching closing tags", html_to_json, "foo<a href=\"hi\">hi</a><div>1<div>foo</div>2</div>bar", '[[1,"foo"],[0,"a",{"href":"hi"},[[1,"hi"]]],[0,"div",{},[[1,"1"],[0,"div",{},[[1,"foo"]]],[1,"2"]]],[1,"bar"]]'
-test_equals "mis-matched closing tags", html_to_json, "foo<div>bar<span>baz</div>qux", '[[1,"foo"],[0,"div",{},[[1,"bar"],[0,"span",{},[[1,"baz"]]]]],[1,"qux"]]'
+test_parser = (args) ->
+ parse_errors = []
+ errors_cb = (i) ->
+ parse_errors.push i
+ parsed = parse_html args.html, errors_cb
+ parsed = JSON.stringify parsed
+ if parsed isnt args.expected or parse_errors.length isnt args.errors
+ console.log "test FAILED: \"#{args.name}\""
+ else
+ console.log 'test passed'
+ if parsed isnt args.expected
+ console.log " Input: #{args.html}"
+ console.log " Correct: #{args.expected}"
+ console.log " Output: #{parsed}"
+ if parse_errors.length isnt args.errors
+ console.log " Expected #{args.errors} parse errors, but got these: #{JSON.stringify parse_errors}"
+
+test_parser name: "empty", \
+ html: "",
+ expected: '[]',
+ errors: 0
+test_parser name: "just text", \
+ html: "abc",
+ expected: '[[1,"abc"]]',
+ errors: 0
+test_parser name: "named entity", \
+ html: "a&1234",
+ expected: '[[1,"a&1234"]]',
+ errors: 0
+test_parser name: "broken named character references", \
+ html: "1&2&&3&aabbcc;",
+ expected: '[[1,"1&2&&3&aabbcc;"]]',
+ errors: 2
+test_parser name: "numbered entity overrides", \
+ html: "1€€ ƒ",
+ expected: '[[1,"1€€ ƒ"]]',
+ errors: 0
+test_parser name: "open tag", \
+ html: "foo<span>bar",
+ expected: '[[1,"foo"],[0,"span",{},[[1,"bar"]]]]',
+ errors: 1 # no close tag
+test_parser name: "open tag with attributes", \
+ html: "foo<span style=\"foo: bar\" title=\"hi\">bar",
+ expected: '[[1,"foo"],[0,"span",{"style":"foo: bar","title":"hi"},[[1,"bar"]]]]',
+ errors: 1 # no close tag
+test_parser name: "open tag with attributes of various quotings", \
+ html: "foo<span abc=\"def\" g=hij klm='nopqrstuv\"' autofocus>bar",
+ expected: '[[1,"foo"],[0,"span",{"abc":"def","g":"hij","klm":"nopqrstuv\\\"","autofocus":""},[[1,"bar"]]]]',
+ errors: 1 # no close tag
+test_parser name: "attribute entity exceptions dq", \
+ html: "foo<a href=\"foo?t=1&=2&o=3&lt=foo\">bar",
+ expected: '[[1,"foo"],[0,"a",{"href":"foo?t=1&=2&o=3<=foo"},[[1,"bar"]]]]',
+ errors: 2 # no close tag, &= in attr
+test_parser name: "attribute entity exceptions sq", \
+ html: "foo<a href='foo?t=1&=2&o=3&lt=foo'>bar",
+ expected: '[[1,"foo"],[0,"a",{"href":"foo?t=1&=2&o=3<=foo"},[[1,"bar"]]]]',
+ errors: 2 # no close tag, &= in attr
+test_parser name: "attribute entity exceptions uq", \
+ html: "foo<a href=foo?t=1&=2&o=3&lt=foo>bar",
+ expected: '[[1,"foo"],[0,"a",{"href":"foo?t=1&=2&o=3<=foo"},[[1,"bar"]]]]',
+ errors: 2 # no close tag, &= in attr
+test_parser name: "matching closing tags", \
+ html: "foo<a href=\"hi\">hi</a><div>1<div>foo</div>2</div>bar",
+ expected: '[[1,"foo"],[0,"a",{"href":"hi"},[[1,"hi"]]],[0,"div",{},[[1,"1"],[0,"div",{},[[1,"foo"]]],[1,"2"]]],[1,"bar"]]',
+ errors: 0
+test_parser name: "mis-matched closing tags", \
+ html: "foo<div>bar<span>baz</div>qux",
+ expected: '[[1,"foo"],[0,"div",{},[[1,"bar"],[0,"span",{},[[1,"baz"]]]]],[1,"qux"]]',
+ errors: 1 # close tag mismatch