From 55d1353f6b2bf2f9b056c1083125369dfcdf3102 Mon Sep 17 00:00:00 2001 From: Jason Woofenden Date: Thu, 24 Dec 2015 12:31:45 -0500 Subject: [PATCH] handle nulls properly --- parse-html.coffee | 4 ++-- test.coffee | 14 ++++---------- 2 files changed, 6 insertions(+), 12 deletions(-) diff --git a/parse-html.coffee b/parse-html.coffee index ed0070a..20bc99c 100644 --- a/parse-html.coffee +++ b/parse-html.coffee @@ -3093,7 +3093,7 @@ parse_html = (args) -> tok_state = tok_state_tag_open when "\u0000" parse_error() - return new_text_node "\ufffd" + return new_text_node c when '' # EOF return new_eof_token() else @@ -4543,6 +4543,7 @@ parse_html = (args) -> else val = txt.substr cur, (next_gt - cur) cur = next_gt + 3 + val = val.replace(new RegExp("\u0000", 'g'), "\ufffd") if val.length > 0 return new_character_token val # fixfull split return null @@ -4682,7 +4683,6 @@ parse_html = (args) -> # text pre-processing # FIXME http://www.w3.org/TR/html5/syntax.html#preprocessing-the-input-stream - txt = txt.replace(new RegExp("\u0000", 'g'), "\ufffd") # fixfull spec doesn't say this txt = txt.replace(new RegExp("\r\n", 'g'), "\n") # fixfull spec doesn't say this txt = txt.replace(new RegExp("\r", 'g'), "\n") # fixfull spec doesn't say this diff --git a/test.coffee b/test.coffee index 55470ce..327f800 100644 --- a/test.coffee +++ b/test.coffee @@ -1681,14 +1681,12 @@ tests = [ name: "plain-text-unsafe.dat #2" html: "\u0000" errors: 4 - #orig: expected: "| \n| \n| \n" - expected: "| \n| \n| \n| \"\ufffd\"\n" + expected: "| \n| \n| \n" }, { name: "plain-text-unsafe.dat #3" html: " \u0000 " errors: 4 - # orig: expected: "| \n| \n| \n" - expected: "| \n| \n| \n| \"\ufffd \"\n" + expected: "| \n| \n| \n" }, { name: "plain-text-unsafe.dat #4" html: "a\u0000a" @@ -7974,7 +7972,7 @@ serialize_els = (els, prefix = '| ') -> ret += "#{prefix}UNKNOWN TAG TYPE #{el.type}" return ret -test_results = passed: 0, failed: 0, fragment: 0, pending: 0, broken: 0 +test_results = passed: 0, failed: 0, fragment: 0, pending: 0 test_parser = (args) -> if args.fragment? # hide fragment tests for now test_results.fragment += 1 @@ -7982,10 +7980,6 @@ test_parser = (args) -> if args.name.substr(0, 20) is "pending-spec-changes" # hide for now test_results.pending += 1 return - if args.html.indexOf("\u0000") > -1 and args.expected.indexOf("\ufffd") is -1 - # these tests seem to think that \u0000 doesn't become \uffff in_body - test_results.broken += 1 - return wheic.debug_log_reset() parse_errors = [] args.error_cb = (i) -> @@ -8012,7 +8006,7 @@ test_parser = (args) -> test_results.passed += 1 # console.log "passed \"#{args.name}\"" test_summary = -> - console.log "Tests passed: #{test_results.passed}, Failed: #{test_results.failed}, fragment: #{test_results.fragment}, pending: #{test_results.pending}, broken: #{test_results.broken}" + console.log "Tests passed: #{test_results.passed}, Failed: #{test_results.failed}, fragment: #{test_results.fragment}, pending: #{test_results.pending}" next_test = 0 -- 1.7.10.4