handle nulls properly

author Jason Woofenden <jason@jasonwoof.com>

Thu, 24 Dec 2015 17:31:45 +0000 (12:31 -0500)

committer Jason Woofenden <jason@jasonwoof.com>

Thu, 24 Dec 2015 17:31:45 +0000 (12:31 -0500)
author Jason Woofenden <jason@jasonwoof.com>
Thu, 24 Dec 2015 17:31:45 +0000 (12:31 -0500)
committer Jason Woofenden <jason@jasonwoof.com>
Thu, 24 Dec 2015 17:31:45 +0000 (12:31 -0500)
diff --git a/parse-html.coffee b/parse-html.coffee

index ed0070a..20bc99c 100644 (file)
--- a/parse-html.coffee
+++ b/parse-html.coffee
@@ -3093,7 +3093,7 @@ parse_html = (args) ->
                                 tok_state = tok_state_tag_open
                         when "\u0000"
                                 parse_error()
-                               return new_text_node "\ufffd"
+                               return new_text_node c
                         when '' # EOF
                                 return new_eof_token()
                         else
@@ -4543,6 +4543,7 @@ parse_html = (args) ->
                 else
                         val = txt.substr cur, (next_gt - cur)
                         cur = next_gt + 3
+               val = val.replace(new RegExp("\u0000", 'g'), "\ufffd")
                 if val.length > 0
                         return new_character_token val # fixfull split
                 return null
@@ -4682,7 +4683,6 @@ parse_html = (args) ->
  
         # text pre-processing
         # FIXME http://www.w3.org/TR/html5/syntax.html#preprocessing-the-input-stream
-       txt = txt.replace(new RegExp("\u0000", 'g'), "\ufffd") # fixfull spec doesn't say this
         txt = txt.replace(new RegExp("\r\n", 'g'), "\n") # fixfull spec doesn't say this
         txt = txt.replace(new RegExp("\r", 'g'), "\n") # fixfull spec doesn't say this
  
diff --git a/test.coffee b/test.coffee

index 55470ce..327f800 100644 (file)
--- a/test.coffee
+++ b/test.coffee
@@ -1681,14 +1681,12 @@ tests = [
                 name: "plain-text-unsafe.dat #2"
                 html: "<html>\u0000<frameset></frameset>"
                 errors: 4
-               #orig: expected: "| <html>\n|   <head>\n|   <frameset>\n"
-               expected: "| <html>\n|   <head>\n|   <body>\n|     \"\ufffd\"\n"
+               expected: "| <html>\n|   <head>\n|   <frameset>\n"
         }, {
                 name: "plain-text-unsafe.dat #3"
                 html: "<html> \u0000 <frameset></frameset>"
                 errors: 4
-               # orig: expected: "| <html>\n|   <head>\n|   <frameset>\n"
-               expected: "| <html>\n|   <head>\n|   <body>\n|     \"\ufffd \"\n"
+               expected: "| <html>\n|   <head>\n|   <frameset>\n"
         }, {
                 name: "plain-text-unsafe.dat #4"
                 html: "<html>a\u0000a<frameset></frameset>"
@@ -7974,7 +7972,7 @@ serialize_els = (els, prefix = '| ') ->
                                 ret += "#{prefix}UNKNOWN TAG TYPE #{el.type}"
         return ret
  
-test_results = passed: 0, failed: 0, fragment: 0, pending: 0, broken: 0
+test_results = passed: 0, failed: 0, fragment: 0, pending: 0
  test_parser = (args) ->
         if args.fragment? # hide fragment tests for now
                 test_results.fragment += 1
@@ -7982,10 +7980,6 @@ test_parser = (args) ->
         if args.name.substr(0, 20) is "pending-spec-changes" # hide for now
                 test_results.pending += 1
                 return
-       if args.html.indexOf("\u0000") > -1 and args.expected.indexOf("\ufffd") is -1
-               # these tests seem to think that \u0000 doesn't become \uffff in_body
-               test_results.broken += 1
-               return
         wheic.debug_log_reset()
         parse_errors = []
         args.error_cb = (i) ->
@@ -8012,7 +8006,7 @@ test_parser = (args) ->
                 test_results.passed += 1
                 # console.log "passed \"#{args.name}\""
  test_summary = ->
-       console.log "Tests passed: #{test_results.passed}, Failed: #{test_results.failed}, fragment: #{test_results.fragment}, pending: #{test_results.pending}, broken: #{test_results.broken}"
+       console.log "Tests passed: #{test_results.passed}, Failed: #{test_results.failed}, fragment: #{test_results.fragment}, pending: #{test_results.pending}"
  
  
  next_test = 0
author	Jason Woofenden <jason@jasonwoof.com>
	Thu, 24 Dec 2015 17:31:45 +0000 (12:31 -0500)
committer	Jason Woofenden <jason@jasonwoof.com>
	Thu, 24 Dec 2015 17:31:45 +0000 (12:31 -0500)
parse-html.coffee		patch \| blob \| history
test.coffee		patch \| blob \| history