JasonWoof Got questions, comments, patches, etc.? Contact Jason Woofenden
run "pending spec changes" tests too
[peach-html5-editor.git] / parse-html.coffee
index b3b3f7f..20bc99c 100644 (file)
@@ -1780,17 +1780,23 @@ parse_html = (args) ->
 
        # 8.2.5.4.7 http://www.w3.org/TR/html5/syntax.html#parsing-main-inbody
        in_body_any_other_end_tag = (name) -> # factored out because adoption agency calls it
-               for el, i in open_els
-                       if el.name is name and el.namespace is NS_HTML
+               node = open_els[0]
+               loop
+                       if node.name is name and node.namespace is NS_HTML
                                generate_implied_end_tags name # arg is exception
-                               parse_error() unless i is 0
-                               while i >= 0
-                                       open_els.shift()
-                                       i -= 1
-                               return
-                       if special_elements[el.name] is el.namespace
+                               unless node is open_els[0]
+                                       parse_error()
+                               loop
+                                       el = open_els.shift()
+                                       if el is node
+                                               return
+                       if special_elements[node.name] is node.namespace
                                parse_error()
                                return
+                       for el, i in open_els
+                               if node is el
+                                       node = open_els[i + 1]
+                                       break
                return
        ins_mode_in_body = (t) ->
                if t.type is TYPE_TEXT and t.text is "\u0000"
@@ -1918,11 +1924,7 @@ parse_html = (args) ->
                if t.type is TYPE_START_TAG and (t.name is 'pre' or t.name is 'listing')
                        close_p_if_in_button_scope()
                        insert_html_element t
-                       # spec: If the next token is a "LF" (U+000A) character token, then
-                       # ignore that token and move on to the next one. (Newlines at the
-                       # start of pre blocks are ignored as an authoring convenience.)
-                       if txt.charAt(cur) is "\u000a" # FIXME check for crlf?
-                               cur += 1
+                       eat_next_token_if_newline()
                        flag_frameset_ok = false
                        return
                if t.type is TYPE_START_TAG and t.name is 'form'
@@ -2117,6 +2119,10 @@ parse_html = (args) ->
                        return
                if t.type is TYPE_START_TAG and t.name is 'nobr'
                        reconstruct_afe()
+                       if is_in_scope 'nobr', NS_HTML
+                               parse_error()
+                               adoption_agency 'nobr'
+                               reconstruct_afe()
                        el = insert_html_element t
                        afe_push el
                        return
@@ -2151,7 +2157,8 @@ parse_html = (args) ->
                        return
                if t.type is TYPE_END_TAG and t.name is 'br'
                        parse_error()
-                       t.type = TYPE_START_TAG
+                       # W3C: t.type = TYPE_START_TAG
+                       t = new_open_tag 'br' # WHATWG
                        # fall through
                if t.type is TYPE_START_TAG and (t.name is 'area' or t.name is 'br' or t.name is 'embed' or t.name is 'img' or t.name is 'keygen' or t.name is 'wbr')
                        reconstruct_afe()
@@ -2168,7 +2175,8 @@ parse_html = (args) ->
                        unless is_input_hidden_tok t
                                flag_frameset_ok = false
                        return
-               if t.type is TYPE_START_TAG and (t.name is 'param' or t.name is 'source' or t.name is 'track')
+               if t.type is TYPE_START_TAG and (t.name is 'menuitem' or t.name is 'param' or t.name is 'source' or t.name is 'track')
+                       # WHATWG adds 'menuitem' for this block
                        insert_html_element t
                        open_els.shift()
                        t.acknowledge_self_closing()
@@ -2228,8 +2236,7 @@ parse_html = (args) ->
                        return
                if t.type is TYPE_START_TAG and t.name is 'textarea'
                        insert_html_element t
-                       if txt.charAt(cur) is "\u000a" # FIXME check for crlf?
-                               cur += 1
+                       eat_next_token_if_newline()
                        tok_state = tok_state_rcdata
                        original_ins_mode = ins_mode
                        flag_frameset_ok = false
@@ -2724,7 +2731,7 @@ parse_html = (args) ->
                        insert_html_element t
                        return
                if t.type is TYPE_END_TAG and t.name is 'optgroup'
-                       if open_els[0].name is 'option' and open_els[0].namespace in NS_HTML
+                       if open_els[0].name is 'option' and open_els[0].namespace is NS_HTML
                                if open_els[1].name is 'optgroup' and open_els[0].namespace is NS_HTML
                                        open_els.shift()
                        if open_els[0].name is 'optgroup' and open_els[0].namespace is NS_HTML
@@ -2760,7 +2767,7 @@ parse_html = (args) ->
                        return
                if t.type is TYPE_START_TAG and (t.name is 'input' or t.name is 'keygen' or t.name is 'textarea')
                        parse_error()
-                       if is_in_select_scope 'select', NS_HTML
+                       unless is_in_select_scope 'select', NS_HTML
                                return
                        loop
                                el = open_els.shift()
@@ -3086,7 +3093,7 @@ parse_html = (args) ->
                                tok_state = tok_state_tag_open
                        when "\u0000"
                                parse_error()
-                               return new_text_node "\ufffd"
+                               return new_text_node c
                        when '' # EOF
                                return new_eof_token()
                        else
@@ -3783,7 +3790,7 @@ parse_html = (args) ->
                        return
                if c is '>'
                        tok_state = tok_state_data
-                       return
+                       return tok_cur_tag
                if is_uc_alpha(c)
                        tok_cur_tag.attrs_a.unshift [c.toLowerCase(), '']
                        tok_state = tok_state_attribute_name
@@ -4536,6 +4543,7 @@ parse_html = (args) ->
                else
                        val = txt.substr cur, (next_gt - cur)
                        cur = next_gt + 3
+               val = val.replace(new RegExp("\u0000", 'g'), "\ufffd")
                if val.length > 0
                        return new_character_token val # fixfull split
                return null
@@ -4629,6 +4637,24 @@ parse_html = (args) ->
                                        return '&'
                return # never reached
 
+       eat_next_token_if_newline = ->
+               old_cur = cur
+               t = null
+               until t?
+                       t = tok_state()
+               if t.type is TYPE_TEXT
+                       # definition of a newline depends on whether it was a character ref or not
+                       if cur - old_cur is 1
+                               # not a character reference
+                               if t.text is "\u000d" or t.text is "\u000a"
+                                       return
+                       else
+                               if t.text is "\u000a"
+                                       return
+               # not a "newline"
+               cur = old_cur
+               return
+
        # tree constructor initialization
        # see comments on TYPE_TAG/etc for the structure of this data
        txt = args.html
@@ -4657,19 +4683,20 @@ parse_html = (args) ->
 
        # text pre-processing
        # FIXME http://www.w3.org/TR/html5/syntax.html#preprocessing-the-input-stream
-       txt = txt.replace(new RegExp("\u0000", 'g'), "\ufffd") # fixfull spec doesn't say this
        txt = txt.replace(new RegExp("\r\n", 'g'), "\n") # fixfull spec doesn't say this
        txt = txt.replace(new RegExp("\r", 'g'), "\n") # fixfull spec doesn't say this
 
-       if args.name is "tests23.dat #1"
+       if args.name is "webkit01.dat #12"
                console.log "hi"
        # proccess input
        # http://www.w3.org/TR/html5/syntax.html#tree-construction
-       while flag_parsing
-               t = tok_state()
-               if t?
-                       process_token t
-                       # fixfull parse error if has self-closing flag, but it wasn't acknolwedged
+       parse_main_loop = ->
+               while flag_parsing
+                       t = tok_state()
+                       if t?
+                               process_token t
+                               # fixfull parse error if has self-closing flag, but it wasn't acknolwedged
+       parse_main_loop()
        return doc.children
 
 serialize_els = (els, shallow, show_ids) ->