fix garbage after </

[peach-html5-editor.git] / parse-html.coffee
diff --git a/parse-html.coffee b/parse-html.coffee

index 2b8fda7..581002b 100644 (file)
--- a/parse-html.coffee
+++ b/parse-html.coffee
@@ -468,7 +468,7 @@ svg_attribute_fixes = {
         diffuseconstant: 'diffuseConstant'
         edgemode: 'edgeMode'
         externalresourcesrequired: 'externalResourcesRequired'
-       filterres: 'filterRes'
+       # WTAG removes this: filterres: 'filterRes'
         filterunits: 'filterUnits'
         glyphref: 'glyphRef'
         gradienttransform: 'gradientTransform'
@@ -896,9 +896,9 @@ parse_html = (args) ->
                 debug_log "tree: #{serialize_els doc.children, false, true}"
                 debug_log "open_els: #{serialize_els open_els, true, true}"
                 debug_log "afe: #{serialize_els afe, true, true}"
+               # FIXME CONTINUE do WATWG thing here
                 if open_els[0].name is subject and open_els[0].namespace is NS_HTML
-                       el = open_els[0]
-                       open_els.shift()
+                       el = open_els.shift()
                         # remove it from the list of active formatting elements (if found)
                         for t, i in afe
                                 if t is el
@@ -1676,7 +1676,7 @@ parse_html = (args) ->
                         parse_error()
                         return if template_tag_is_open()
                         root_attrs = open_els[open_els.length - 1].attrs
-                       for a of t.attrs_a
+                       for a in t.attrs_a
                                 root_attrs[a[0]] = a[1] unless root_attrs[a[0]]?
                         return
  
@@ -2726,7 +2726,8 @@ parse_html = (args) ->
                         ins_mode_in_body t
                         return
                 if t.type is TYPE_COMMENT
-                       insert_comment t, [open_els[0], open_els[0].children.length]
+                       first = open_els[open_els.length - 1]
+                       insert_comment t, [first, first.children.length]
                         return
                 if t.type is TYPE_DOCTYPE
                         parse_error()
@@ -2805,7 +2806,7 @@ parse_html = (args) ->
                         ins_mode_in_body t
                         return
                 if t.type is TYPE_END_TAG and t.name is 'html'
-                       insert_mode = ins_mode_after_after_frameset
+                       ins_mode = ins_mode_after_after_frameset
                         return
                 if t.type is TYPE_START_TAG and t.name is 'noframes'
                         ins_mode_in_head t
@@ -3019,50 +3020,55 @@ parse_html = (args) ->
  
         # 8.2.4.8 http://www.w3.org/TR/html5/syntax.html#tag-open-state
         tok_state_tag_open = ->
-               switch c = txt.charAt(cur++)
-                       when '!'
-                               tok_state = tok_state_markup_declaration_open
-                       when '/'
-                               tok_state = tok_state_end_tag_open
-                       when '?'
-                               parse_error()
-                               tok_cur_tag = new_comment_token '?'
-                               tok_state = tok_state_bogus_comment
-                       else
-                               if is_lc_alpha(c)
-                                       tok_cur_tag = new_open_tag c
-                                       tok_state = tok_state_tag_name
-                               else if is_uc_alpha(c)
-                                       tok_cur_tag = new_open_tag c.toLowerCase()
-                                       tok_state = tok_state_tag_name
-                               else
-                                       parse_error()
-                                       tok_state = tok_state_data
-                                       cur -= 1 # we didn't parse/handle the char after <
-                                       return new_text_node '<'
-               return null
+               c = txt.charAt(cur++)
+               if c is '!'
+                       tok_state = tok_state_markup_declaration_open
+                       return
+               if c is '/'
+                       tok_state = tok_state_end_tag_open
+                       return
+               if is_uc_alpha(c)
+                       tok_cur_tag = new_open_tag c.toLowerCase()
+                       tok_state = tok_state_tag_name
+                       return
+               if is_lc_alpha(c)
+                       tok_cur_tag = new_open_tag c
+                       tok_state = tok_state_tag_name
+                       return
+               if c is '?'
+                       parse_error()
+                       tok_cur_tag = new_comment_token '?' # FIXME right?
+                       tok_state = tok_state_bogus_comment
+                       return
+               # Anything else
+               parse_error()
+               tok_state = tok_state_data
+               cur -= 1 # we didn't parse/handle the char after <
+               return new_text_node '<'
  
         # 8.2.4.9 http://www.w3.org/TR/html5/syntax.html#end-tag-open-state
         tok_state_end_tag_open = ->
-               switch c = txt.charAt(cur++)
-                       when '>'
-                               parse_error()
-                               tok_state = tok_state_data
-                       when '' # EOF
-                               parse_error()
-                               tok_state = tok_state_data
-                               return new_text_node '</'
-                       else
-                               if is_uc_alpha(c)
-                                       tok_cur_tag = new_end_tag c.toLowerCase()
-                                       tok_state = tok_state_tag_name
-                               else if is_lc_alpha(c)
-                                       tok_cur_tag = new_end_tag c
-                                       tok_state = tok_state_tag_name
-                               else
-                                       parse_error()
-                                       tok_cur_tag = new_comment_token '/'
-                                       tok_state = tok_state_bogus_comment
+               c = txt.charAt(cur++)
+               if is_uc_alpha(c)
+                       tok_cur_tag = new_end_tag c.toLowerCase()
+                       tok_state = tok_state_tag_name
+                       return
+               if is_lc_alpha(c)
+                       tok_cur_tag = new_end_tag c
+                       tok_state = tok_state_tag_name
+                       return
+               if c is '>'
+                       parse_error()
+                       tok_state = tok_state_data
+                       return
+               if c is '' # EOF
+                       parse_error()
+                       tok_state = tok_state_data
+                       return new_text_node '</'
+               # Anything else
+               parse_error()
+               tok_cur_tag = new_comment_token c
+               tok_state = tok_state_bogus_comment
                 return null
  
         # 8.2.4.10 http://www.w3.org/TR/html5/syntax.html#tag-name-state
@@ -3392,7 +3398,7 @@ parse_html = (args) ->
                 # Anything else
                 tok_state = tok_state_script_data_escaped
                 cur -= 1 # Reconsume
-               return new_character_token c
+               return new_character_token '<'
  
         # 8.2.4.26 http://www.w3.org/TR/html5/syntax.html#script-data-escaped-end-tag-open-state
         tok_state_script_data_escaped_end_tag_open = ->
@@ -4511,7 +4517,7 @@ parse_html = (args) ->
         txt = txt.replace(new RegExp("\r\n", 'g'), "\n") # fixfull spec doesn't say this
         txt = txt.replace(new RegExp("\r", 'g'), "\n") # fixfull spec doesn't say this
  
-       if args.name is "plain-text-unsafe.dat #4"
+       if args.name is "tests18.dat #17"
                 console.log "hi"
         # proccess input
         # http://www.w3.org/TR/html5/syntax.html#tree-construction