JasonWoof Got questions, comments, patches, etc.? Contact Jason Woofenden
handle nulls properly
authorJason Woofenden <jason@jasonwoof.com>
Thu, 24 Dec 2015 17:31:45 +0000 (12:31 -0500)
committerJason Woofenden <jason@jasonwoof.com>
Thu, 24 Dec 2015 17:31:45 +0000 (12:31 -0500)
parse-html.coffee
test.coffee

index ed0070a..20bc99c 100644 (file)
@@ -3093,7 +3093,7 @@ parse_html = (args) ->
                                tok_state = tok_state_tag_open
                        when "\u0000"
                                parse_error()
-                               return new_text_node "\ufffd"
+                               return new_text_node c
                        when '' # EOF
                                return new_eof_token()
                        else
@@ -4543,6 +4543,7 @@ parse_html = (args) ->
                else
                        val = txt.substr cur, (next_gt - cur)
                        cur = next_gt + 3
+               val = val.replace(new RegExp("\u0000", 'g'), "\ufffd")
                if val.length > 0
                        return new_character_token val # fixfull split
                return null
@@ -4682,7 +4683,6 @@ parse_html = (args) ->
 
        # text pre-processing
        # FIXME http://www.w3.org/TR/html5/syntax.html#preprocessing-the-input-stream
-       txt = txt.replace(new RegExp("\u0000", 'g'), "\ufffd") # fixfull spec doesn't say this
        txt = txt.replace(new RegExp("\r\n", 'g'), "\n") # fixfull spec doesn't say this
        txt = txt.replace(new RegExp("\r", 'g'), "\n") # fixfull spec doesn't say this
 
index 55470ce..327f800 100644 (file)
@@ -1681,14 +1681,12 @@ tests = [
                name: "plain-text-unsafe.dat #2"
                html: "<html>\u0000<frameset></frameset>"
                errors: 4
-               #orig: expected: "| <html>\n|   <head>\n|   <frameset>\n"
-               expected: "| <html>\n|   <head>\n|   <body>\n|     \"\ufffd\"\n"
+               expected: "| <html>\n|   <head>\n|   <frameset>\n"
        }, {
                name: "plain-text-unsafe.dat #3"
                html: "<html> \u0000 <frameset></frameset>"
                errors: 4
-               # orig: expected: "| <html>\n|   <head>\n|   <frameset>\n"
-               expected: "| <html>\n|   <head>\n|   <body>\n|     \"\ufffd \"\n"
+               expected: "| <html>\n|   <head>\n|   <frameset>\n"
        }, {
                name: "plain-text-unsafe.dat #4"
                html: "<html>a\u0000a<frameset></frameset>"
@@ -7974,7 +7972,7 @@ serialize_els = (els, prefix = '| ') ->
                                ret += "#{prefix}UNKNOWN TAG TYPE #{el.type}"
        return ret
 
-test_results = passed: 0, failed: 0, fragment: 0, pending: 0, broken: 0
+test_results = passed: 0, failed: 0, fragment: 0, pending: 0
 test_parser = (args) ->
        if args.fragment? # hide fragment tests for now
                test_results.fragment += 1
@@ -7982,10 +7980,6 @@ test_parser = (args) ->
        if args.name.substr(0, 20) is "pending-spec-changes" # hide for now
                test_results.pending += 1
                return
-       if args.html.indexOf("\u0000") > -1 and args.expected.indexOf("\ufffd") is -1
-               # these tests seem to think that \u0000 doesn't become \uffff in_body
-               test_results.broken += 1
-               return
        wheic.debug_log_reset()
        parse_errors = []
        args.error_cb = (i) ->
@@ -8012,7 +8006,7 @@ test_parser = (args) ->
                test_results.passed += 1
                # console.log "passed \"#{args.name}\""
 test_summary = ->
-       console.log "Tests passed: #{test_results.passed}, Failed: #{test_results.failed}, fragment: #{test_results.fragment}, pending: #{test_results.pending}, broken: #{test_results.broken}"
+       console.log "Tests passed: #{test_results.passed}, Failed: #{test_results.failed}, fragment: #{test_results.fragment}, pending: #{test_results.pending}"
 
 
 next_test = 0