JasonWoof Got questions, comments, patches, etc.? Contact Jason Woofenden
fix minor parsing bugs
[peach-html5-editor.git] / parse-html.coffee
index 0e79a1d..b25359d 100644 (file)
@@ -112,7 +112,7 @@ class Node
                        @id = "#{++prev_node_id}"
        acknowledge_self_closing: ->
                if @token?
-                       @token.flag 'did_self_close'
+                       @token.flag 'did_self_close', true
                else
                        @flag 'did_self_close', true
        flag: (key, value = null) ->
@@ -468,7 +468,7 @@ svg_attribute_fixes = {
        diffuseconstant: 'diffuseConstant'
        edgemode: 'edgeMode'
        externalresourcesrequired: 'externalResourcesRequired'
-       filterres: 'filterRes'
+       # WTAG removes this: filterres: 'filterRes'
        filterunits: 'filterUnits'
        glyphref: 'glyphRef'
        gradienttransform: 'gradientTransform'
@@ -649,10 +649,10 @@ parse_html = (args) ->
        standard_scopers = {
                applet: NS_HTML, caption: NS_HTML, html: NS_HTML, table: NS_HTML,
                td: NS_HTML, th: NS_HTML, marquee: NS_HTML, object: NS_HTML,
-               template: NS_HTML, mi: NS_MATHML,
+               template: NS_HTML,
 
-               mo: NS_MATHML, mn: NS_MATHML, ms: NS_MATHML, mtext: NS_MATHML,
-               'annotation-xml': NS_MATHML,
+               mi: NS_MATHML, mo: NS_MATHML, mn: NS_MATHML, ms: NS_MATHML,
+               mtext: NS_MATHML, 'annotation-xml': NS_MATHML,
 
                foreignObject: NS_SVG, desc: NS_SVG, title: NS_SVG
        }
@@ -1213,7 +1213,7 @@ parse_html = (args) ->
                        ins_mode t
                        return
                if is_mathml_text_integration_point(acn)
-                       if t.type is TYPE_START_TAG and (t.name is 'mglyph' or t.name is 'malignmark')
+                       if t.type is TYPE_START_TAG and not (t.name is 'mglyph' or t.name is 'malignmark')
                                ins_mode t
                                return
                        if t.type is TYPE_TEXT
@@ -1676,7 +1676,7 @@ parse_html = (args) ->
                        parse_error()
                        return if template_tag_is_open()
                        root_attrs = open_els[open_els.length - 1].attrs
-                       for a of t.attrs_a
+                       for a in t.attrs_a
                                root_attrs[a[0]] = a[1] unless root_attrs[a[0]]?
                        return
 
@@ -2726,7 +2726,8 @@ parse_html = (args) ->
                        ins_mode_in_body t
                        return
                if t.type is TYPE_COMMENT
-                       insert_comment t, [open_els[0], open_els[0].children.length]
+                       first = open_els[open_els.length - 1]
+                       insert_comment t, [first, first.children.length]
                        return
                if t.type is TYPE_DOCTYPE
                        parse_error()
@@ -2805,7 +2806,7 @@ parse_html = (args) ->
                        ins_mode_in_body t
                        return
                if t.type is TYPE_END_TAG and t.name is 'html'
-                       insert_mode = ins_mode_after_after_frameset
+                       ins_mode = ins_mode_after_after_frameset
                        return
                if t.type is TYPE_START_TAG and t.name is 'noframes'
                        ins_mode_in_head t
@@ -2872,7 +2873,7 @@ parse_html = (args) ->
                        adjust_svg_attributes t
                adjust_foreign_attributes t
                insert_foreign_element t, acn.namespace
-               if t.flag 'self-closing' # FIXME CONTINUE this isn't getting set
+               if t.flag 'self-closing'
                        if t.name is 'script'
                                t.acknowledge_self_closing()
                                in_foreign_content_end_script()
@@ -2906,8 +2907,7 @@ parse_html = (args) ->
                                return
                        loop # is this safe?
                                open_els.shift()
-                               cn = open_els[0]
-                               if is_mathml_text_integration_point(cn) or is_html_integration(cn) or cn.namespace is NS_HTML
+                               if is_mathml_text_integration_point(open_els[0]) or is_html_integration(open_els[0]) or open_els[0].namespace is NS_HTML
                                        break
                        process_token t
                        return
@@ -2918,9 +2918,11 @@ parse_html = (args) ->
                        in_foreign_content_end_script()
                        return
                if t.type is TYPE_END_TAG
-                       if open_els[0].name.toLowerCase() isnt t.name
+                       i = 0
+                       node = open_els[i]
+                       if node.name.toLowerCase() isnt t.name
                                parse_error()
-                       for node in open_els
+                       loop
                                if node is open_els[open_els.length - 1]
                                        return
                                if node.name.toLowerCase() is t.name
@@ -2928,6 +2930,8 @@ parse_html = (args) ->
                                                el = open_els.shift()
                                                if el is node
                                                        return
+                               i += 1
+                               node = open_els[i]
                                if node.namespace is NS_HTML
                                        break
                        ins_mode t # explicitly call HTML insertion mode
@@ -3016,28 +3020,31 @@ parse_html = (args) ->
 
        # 8.2.4.8 http://www.w3.org/TR/html5/syntax.html#tag-open-state
        tok_state_tag_open = ->
-               switch c = txt.charAt(cur++)
-                       when '!'
-                               tok_state = tok_state_markup_declaration_open
-                       when '/'
-                               tok_state = tok_state_end_tag_open
-                       when '?'
-                               parse_error()
-                               tok_cur_tag = new_comment_token '?'
-                               tok_state = tok_state_bogus_comment
-                       else
-                               if is_lc_alpha(c)
-                                       tok_cur_tag = new_open_tag c
-                                       tok_state = tok_state_tag_name
-                               else if is_uc_alpha(c)
-                                       tok_cur_tag = new_open_tag c.toLowerCase()
-                                       tok_state = tok_state_tag_name
-                               else
-                                       parse_error()
-                                       tok_state = tok_state_data
-                                       cur -= 1 # we didn't parse/handle the char after <
-                                       return new_text_node '<'
-               return null
+               c = txt.charAt(cur++)
+               if c is '!'
+                       tok_state = tok_state_markup_declaration_open
+                       return
+               if c is '/'
+                       tok_state = tok_state_end_tag_open
+                       return
+               if is_uc_alpha(c)
+                       tok_cur_tag = new_open_tag c.toLowerCase()
+                       tok_state = tok_state_tag_name
+                       return
+               if is_lc_alpha(c)
+                       tok_cur_tag = new_open_tag c
+                       tok_state = tok_state_tag_name
+                       return
+               if c is '?'
+                       parse_error()
+                       tok_cur_tag = new_comment_token '?' # FIXME right?
+                       tok_state = tok_state_bogus_comment
+                       return
+               # Anything else
+               parse_error()
+               tok_state = tok_state_data
+               cur -= 1 # we didn't parse/handle the char after <
+               return new_text_node '<'
 
        # 8.2.4.9 http://www.w3.org/TR/html5/syntax.html#end-tag-open-state
        tok_state_end_tag_open = ->
@@ -3389,7 +3396,7 @@ parse_html = (args) ->
                # Anything else
                tok_state = tok_state_script_data_escaped
                cur -= 1 # Reconsume
-               return new_character_token c
+               return new_character_token '<'
 
        # 8.2.4.26 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-end-tag-open-state
        tok_state_script_data_escaped_end_tag_open = ->
@@ -3767,7 +3774,7 @@ parse_html = (args) ->
        tok_state_self_closing_start_tag = ->
                c = txt.charAt(cur++)
                if c is '>'
-                       tok_cur_tag.flag 'self-closing'
+                       tok_cur_tag.flag 'self-closing', true
                        tok_state = tok_state_data
                        return tok_cur_tag
                if c is ''
@@ -4508,7 +4515,7 @@ parse_html = (args) ->
        txt = txt.replace(new RegExp("\r\n", 'g'), "\n") # fixfull spec doesn't say this
        txt = txt.replace(new RegExp("\r", 'g'), "\n") # fixfull spec doesn't say this
 
-       if args.name is "plain-text-unsafe.dat #4"
+       if args.name is "tests18.dat #17"
                console.log "hi"
        # proccess input
        # http://www.w3.org/TR/html5/syntax.html#tree-construction