JasonWoof Got questions, comments, patches, etc.? Contact Jason Woofenden
implement most details about where to insert nodes
authorJason Woofenden <jason@jasonwoof.com>
Thu, 17 Dec 2015 04:56:13 +0000 (23:56 -0500)
committerJason Woofenden <jason@jasonwoof.com>
Thu, 17 Dec 2015 04:56:13 +0000 (23:56 -0500)
parse-html.coffee

index 5b1b175..08dd98f 100644 (file)
@@ -18,7 +18,9 @@
 # This file implements a parser for html snippets, meant to be used by a
 # WYSIWYG editor. Hence it does not attempt to parse doctypes, <html>, <head>
 # or <body> tags, nor does it produce the top level "document" node in the dom
-# tree, nor nodes for html, head or body.
+# tree, nor nodes for html, head or body. Comments containing "fixfull"
+# indicate places where additional code is needed for full HTML document
+# parsing.
 #
 # Instead, the data structure produced by this parser is an array of nodes.
 #
@@ -62,8 +64,8 @@ class Node
                                ret += JSON.stringify @name
                                ret += ','
                                ret += JSON.stringify @attrs
-                               ret += ','
-                               sep = '['
+                               ret += ',['
+                               sep = ''
                                for c in @children
                                        ret += sep
                                        sep = ','
@@ -261,6 +263,7 @@ parse_html = (txt, parse_error_cb = null) ->
        tok_cur_tag = null # partially parsed tag
        flag_frameset_ok = null
        flag_parsing = null
+       flag_foster_parenting = null
        afe = [] # active formatting elements
 
        parse_error = ->
@@ -345,7 +348,7 @@ parse_html = (txt, parse_error_cb = null) ->
                # Create
                loop
                        el = afe[i].shallow_clone()
-                       tree_insert_tag el
+                       tree_insert_element el
                        afe[i] = el
                        break if i is 0
                        i -= 1
@@ -484,7 +487,7 @@ parse_html = (txt, parse_error_cb = null) ->
                        # 14. Insert whatever last node ended up being in the previous step
                        # at the appropriate place for inserting a node, but using common
                        # ancestor as the override target.
-                       tree_insert_tag last_node, ca
+                       tree_insert_element last_node, ca
                        # 15. Create an element for the token for which formatting element
                        # was created, in the HTML namespace, with furthest block as the
                        # intended parent.
@@ -536,32 +539,110 @@ parse_html = (txt, parse_error_cb = null) ->
                        # TODO pop stack until 'p' popped
 
        # http://www.w3.org/TR/html5/syntax.html#insert-a-character
-       tree_insert_a_character = (t) ->
-               # FIXME read spec for "adjusted insertion location, etc, this might be wrong
-               dest = open_els[0].children
-               if dest.length > 0 and dest[dest.length - 1].type is TYPE_TEXT
-                       dest[dest.length - 1].text += t.text
+       tree_insert_text = (t) ->
+               dest = adjusted_insertion_location()
+               if dest[1] > 0
+                       prev = dest[0].children[dest[1] - 1]
+                       if prev.type is TYPE_TEXT
+                               prev.text += t.text
+                               return
+               dest[0].children.splice dest[1], 0, t
+
+       # 8.2.5.1
+       # http://www.w3.org/TR/html5/syntax.html#creating-and-inserting-nodes
+       # http://www.w3.org/TR/html5/syntax.html#appropriate-place-for-inserting-a-node
+       adjusted_insertion_location = (override_target = null) ->
+               # 1. If there was an override target specified, then let target be the
+               # override target.
+               if override_target?
+                       target = override_target
+               else # Otherwise, let target be the current node.
+                       target = open_els[0]
+               # 2. Determine the adjusted insertion location using the first matching
+               # steps from the following list:
+               #
+               # If foster parenting is enabled and target is a table, tbody, tfoot,
+               # thead, or tr element Foster parenting happens when content is
+               # misnested in tables.
+               if flag_foster_parenting and target.name in foster_parenting_targets
+                       console.log "foster parenting isn't implemented yet" # TODO
+                       # 1. Let last template be the last template element in the stack of
+                       # open elements, if any.
+                       # 2. Let last table be the last table element in the stack of open
+                       # elements, if any.
+
+                       # 3. If there is a last template and either there is no last table,
+                       # or there is one, but last template is lower (more recently added)
+                       # than last table in the stack of open elements, then: let adjusted
+                       # insertion location be inside last template's template contents,
+                       # after its last child (if any), and abort these substeps.
+
+                       # 4. If there is no last table, then let adjusted insertion
+                       # location be inside the first element in the stack of open
+                       # elements (the html element), after its last child (if any), and
+                       # abort these substeps. (fragment case)
+
+                       # 5. If last table has a parent element, then let adjusted
+                       # insertion location be inside last table's parent element,
+                       # immediately before last table, and abort these substeps.
+
+                       # 6. Let previous element be the element immediately above last
+                       # table in the stack of open elements.
+
+                       # 7. Let adjusted insertion location be inside previous element,
+                       # after its last child (if any).
+
+                       # Note: These steps are involved in part because it's possible for
+                       # elements, the table element in this case in particular, to have
+                       # been moved by a script around in the DOM, or indeed removed from
+                       # the DOM entirely, after the element was inserted by the parser.
                else
-                       dest.push t
+                       # Otherwise Let adjusted insertion location be inside target, after
+                       # its last child (if any).
+                       target_i = target.children.length
 
-       # FIXME read spec, do this right
-       # FIXME implement the override target thing
-       # note: this assumes it's an open tag
-       tree_insert_tag = (t, override_target = null) ->
+               # 3. If the adjusted insertion location is inside a template element,
+               # let it instead be inside the template element's template contents,
+               # after its last child (if any). TODO
+
+               # 4. Return the adjusted insertion location.
+               return [target, target_i]
+
+       # http://www.w3.org/TR/html5/syntax.html#create-an-element-for-the-token
+       # aka create_an_element_for_token
+       token_to_element = (t, namespace, intended_parent) ->
                t.type = TYPE_TAG # not TYPE_OPEN_TAG
                # convert attributes into a hash
+               attrs = {}
                while t.attrs_a.length
                        a = t.attrs_a.pop()
-                       t.attrs[a[0]] = a[1] # TODO check what to do with dupilcate attrs
-               if t.parent?
-                       for c, i of t.parent.children
-                               if c is t
-                                       t.parent.children.splice i, 1
-               # FIXME spec says to do something to figure out what parent should be
-               parent = open_els[0]
-               open_els.unshift t
-               parent.children.push t
-               t.parent = parent
+                       attrs[a[0]] = a[1] # TODO check what to do with dupilcate attrs
+               el = new Node TYPE_TAG, name: t.name, namespace: namespace, attrs: attrs
+
+               # TODO 2. If the newly created element has an xmlns attribute in the
+               # XMLNS namespace whose value is not exactly the same as the element's
+               # namespace, that is a parse error. Similarly, if the newly created
+               # element has an xmlns:xlink attribute in the XMLNS namespace whose
+               # value is not the XLink Namespace, that is a parse error.
+
+               # fixfull: the spec says stuff about form pointers and ownerDocument
+
+               return el
+
+       # FIXME read implement "foster parenting" part
+       # FIXME read spec, do this right
+       # FIXME implement the override target thing
+       # note: this assumes it's an open tag
+       # TODO tree_insert_html_element = (t, ...
+       tree_insert_element = (el, override_target = null, namespace = null) ->
+               dest = adjusted_insertion_location override_target
+               if el.type is TYPE_OPEN_TAG # means it's a "token"
+                       el = token_to_element el, namespace, dest[0]
+               # fixfull: Document nodes sometimes can't accept more chidren
+               dest[0].children.splice dest[1], 0, el
+               el.parent = dest[0]
+               open_els.unshift el
+               return el
 
        # http://www.w3.org/TR/html5/syntax.html#insert-a-comment
        tree_insert_a_comment = (t) ->
@@ -590,10 +671,10 @@ parse_html = (txt, parse_error_cb = null) ->
                                                parse_error()
                                        when "\t", "\u000a", "\u000c", "\u000d", ' '
                                                reconstruct_active_formatting_elements()
-                                               tree_insert_a_character t
+                                               tree_insert_text t
                                        else
                                                reconstruct_active_formatting_elements()
-                                               tree_insert_a_character t
+                                               tree_insert_text t
                                                flag_frameset_ok = false
                        when TYPE_COMMENT
                                tree_insert_a_comment t
@@ -618,22 +699,22 @@ parse_html = (txt, parse_error_cb = null) ->
                                                # TODO
                                        when 'address', 'article', 'aside', 'blockquote', 'center', 'details', 'dialog', 'dir', 'div', 'dl', 'fieldset', 'figcaption', 'figure', 'footer', 'header', 'hgroup', 'main', 'nav', 'ol', 'p', 'section', 'summary', 'ul'
                                                close_p_if_in_button_scope()
-                                               tree_insert_tag t
+                                               tree_insert_element t
                                        when 'h1', 'h2', 'h3', 'h4', 'h5', 'h6'
                                                close_p_if_in_button_scope()
                                                if open_els[0].name in ['h1', 'h2', 'h3', 'h4', 'h5', 'h6']
                                                        parse_error()
                                                        open_els.shift()
-                                               tree_insert_tag t
+                                               tree_insert_element t
                                        # TODO lots more to implement here
                                        when 'b', 'big', 'code', 'em', 'font', 'i', 's', 'small', 'strike', 'strong', 'tt', 'u'
                                                reconstruct_active_formatting_elements()
-                                               tree_insert_tag t
-                                               afe.push t
+                                               el = tree_insert_element t
+                                               afe.push el
                                        # TODO lots more to implement here
                                        else # any other start tag
                                                reconstruct_active_formatting_elements()
-                                               tree_insert_tag t
+                                               tree_insert_element t
                        when TYPE_EOF
                                ok_tags = {
                                        dd: true, dt: true, li: true, p: true, tbody: true, td: true,
@@ -1013,6 +1094,7 @@ parse_html = (txt, parse_error_cb = null) ->
        tree_state = tree_in_body
        flag_frameset_ok = true
        flag_parsing = true
+       flag_foster_parenting = false
        afe = [] # active formatting elements
 
        # tokenizer initialization
@@ -1119,5 +1201,5 @@ test_parser name: "crazy formatting elements test", \
        html: "<b><i><a><s><tt><div></b>first</b></div></tt></s></a>second</i>",
        # chrome does this: expected: 'tag:"b",{},[tag:"i",{},[tag:"a",{},[tag:"s",{},[tag:"tt",{},[]]],text:"second"]],tag:"a",{},[tag:"s",{},[tag:"tt",{},[tag:"div",{},[tag:"b",{},[],text:"first"]]]]'
        # firefox does this:
-       expected: 'tag:"b",{},[tag:"i",{},[tag:"a",{},[tag:"s",{},[tag:"tt",{},[]]]]],tag:"a",{},[tag:"s",{},[tag:"tt",{},[tag:"div",{},[tag:"b",{},[],text:"first"]]]],text:"second"'
+       expected: 'tag:"b",{},[tag:"i",{},[tag:"a",{},[tag:"s",{},[tag:"tt",{},[]]]]],tag:"a",{},[tag:"s",{},[tag:"tt",{},[tag:"div",{},[tag:"b",{},[],text:"first"]]]],text:"second"',
        errors: 6 # no idea how many there should be