JasonWoof Got questions, comments, patches, etc.? Contact Jason Woofenden
implement scope checkers, test for formatting els
authorJason Woofenden <jason@jasonwoof.com>
Tue, 15 Dec 2015 20:18:25 +0000 (15:18 -0500)
committerJason Woofenden <jason@jasonwoof.com>
Tue, 15 Dec 2015 20:18:25 +0000 (15:18 -0500)
parse-html.coffee

index ef5545f..b7421e5 100644 (file)
@@ -86,12 +86,6 @@ uc_alpha = "ABCDEFGHIJKLMNOPQRSTUVWXQZ"
 digits = "0123456789"
 alnum = lc_alpha + uc_alpha + digits
 hex_chars = digits + "abcdefABCDEF"
-scopers = { # FIXME these are supposed to be namespace specific
-       'applet': true, 'caption': true, 'html': true, 'table': true, 'td': true,
-       'th': true, 'marquee': true, 'object': true, 'template': true, 'mi': true,
-       'mo': true, 'mn': true, 'ms': true, 'mtext': true, 'annotation-xml': true,
-       'foreignObject': true, 'desc': true, 'title'
-}
 
 # some SVG elements have dashes in them
 tag_name_chars = alnum + "-"
@@ -259,15 +253,46 @@ parse_html = (txt, parse_error_cb = null) ->
 
        # But first... the helpers
        template_tag_is_open = ->
-               for t of open_tags
+               for t in open_tags
                        if t.type is TYPE_TAG and t.name is 'template'
                                return true
                return false
+       is_in_scope_x = (tag_name, scope) ->
+               for t in open_tags
+                       if t.name is tag_name
+                               return true
+                       if t.name of scope
+                               return false
+               return false
+       is_in_scope_x_y = (tag_name, scope, scope2) ->
+               for t in open_tags
+                       if t.name is tag_name
+                               return true
+                       if t.name of scope
+                               return false
+                       if t.name of scope2
+                               return false
+               return false
+       standard_scopers = { # FIXME these are supposed to be namespace specific
+               'applet': true, 'caption': true, 'html': true, 'table': true, 'td': true,
+               'th': true, 'marquee': true, 'object': true, 'template': true, 'mi': true,
+               'mo': true, 'mn': true, 'ms': true, 'mtext': true, 'annotation-xml': true,
+               'foreignObject': true, 'desc': true, 'title'
+       }
+       button_scopers = button: true
+       li_scopers = ol: true, ul: true
+       table_scopers = html: true, table: true, template: true
        is_in_scope = (tag_name) ->
-               for t of open_tags
+               return is_in_scope_x tag_name, standard_scopers
+       is_in_button_scope = (tag_name) ->
+               return is_in_scope_x_y tag_name, standard_scopers, button_scopers
+       is_in_table_scope = (tag_name) ->
+               return is_in_scope_x tag_name, table_scopers
+       is_in_select_scope = (tag_name) ->
+               for t in open_tags
                        if t.name is tag_name
                                return true
-                       if t.name of scopers
+                       if t.name isnt 'optgroup' and t.name isnt 'option'
                                return false
                return false
 
@@ -778,9 +803,9 @@ test_parser = (args) ->
                sep = ','
                serialized += t.serialize()
        if serialized isnt args.expected or parse_errors.length isnt args.errors
-               console.log "test FAILED: \"#{args.name}\""
+               console.log "FAILED: \"#{args.name}\""
        else
-               console.log 'test passed'
+               console.log "passed \"#{args.name}\""
        if serialized isnt args.expected
                console.log "      Input: #{args.html}"
                console.log "    Correct: #{args.expected}"
@@ -836,7 +861,15 @@ test_parser name: "matching closing tags", \
        html: "foo<a href=\"hi\">hi</a><div>1<div>foo</div>2</div>bar",
        expected: 'text:"foo",tag:"a",{"href":"hi"},[text:"hi"],tag:"div",{},[text:"1",tag:"div",{},[text:"foo"],text:"2"],text:"bar"',
        errors: 0
-test_parser name: "mis-matched closing tags", \
+test_parser name: "missing closing tag inside", \
        html: "foo<div>bar<span>baz</div>qux",
        expected: 'text:"foo",tag:"div",{},[text:"bar",tag:"span",{},[text:"baz"]],text:"qux"',
        errors: 1 # close tag mismatch
+test_parser name: "mis-matched closing tags", \
+       html: "<span>12<div>34</span>56</div>78",
+       expected: 'tag:"span",{},[text:"12",tag:"div",{},[text:"3456"],text:"78"]',
+       errors: 2 # misplaced </span>, no </span> at the end
+test_parser name: "mis-matched formatting elements", \
+       html: "12<b>34<i>56</b>78</i>90",
+       expected: 'text:"12",tag:"b",{},[text:"34",tag:"i",{},[text:"56"]],tag:"i",{},[text:"78"],text:"90"',
+       errors: 2 # FIXME dunno how many there should be