From 30f16076d228a36c70d42c6ab6aa71c94bdf7a57 Mon Sep 17 00:00:00 2001 From: Jason Woofenden Date: Wed, 16 Dec 2015 23:56:13 -0500 Subject: [PATCH] implement most details about where to insert nodes --- parse-html.coffee | 148 +++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 115 insertions(+), 33 deletions(-) diff --git a/parse-html.coffee b/parse-html.coffee index 5b1b175..08dd98f 100644 --- a/parse-html.coffee +++ b/parse-html.coffee @@ -18,7 +18,9 @@ # This file implements a parser for html snippets, meant to be used by a # WYSIWYG editor. Hence it does not attempt to parse doctypes, , # or tags, nor does it produce the top level "document" node in the dom -# tree, nor nodes for html, head or body. +# tree, nor nodes for html, head or body. Comments containing "fixfull" +# indicate places where additional code is needed for full HTML document +# parsing. # # Instead, the data structure produced by this parser is an array of nodes. # @@ -62,8 +64,8 @@ class Node ret += JSON.stringify @name ret += ',' ret += JSON.stringify @attrs - ret += ',' - sep = '[' + ret += ',[' + sep = '' for c in @children ret += sep sep = ',' @@ -261,6 +263,7 @@ parse_html = (txt, parse_error_cb = null) -> tok_cur_tag = null # partially parsed tag flag_frameset_ok = null flag_parsing = null + flag_foster_parenting = null afe = [] # active formatting elements parse_error = -> @@ -345,7 +348,7 @@ parse_html = (txt, parse_error_cb = null) -> # Create loop el = afe[i].shallow_clone() - tree_insert_tag el + tree_insert_element el afe[i] = el break if i is 0 i -= 1 @@ -484,7 +487,7 @@ parse_html = (txt, parse_error_cb = null) -> # 14. Insert whatever last node ended up being in the previous step # at the appropriate place for inserting a node, but using common # ancestor as the override target. - tree_insert_tag last_node, ca + tree_insert_element last_node, ca # 15. Create an element for the token for which formatting element # was created, in the HTML namespace, with furthest block as the # intended parent. @@ -536,32 +539,110 @@ parse_html = (txt, parse_error_cb = null) -> # TODO pop stack until 'p' popped # http://www.w3.org/TR/html5/syntax.html#insert-a-character - tree_insert_a_character = (t) -> - # FIXME read spec for "adjusted insertion location, etc, this might be wrong - dest = open_els[0].children - if dest.length > 0 and dest[dest.length - 1].type is TYPE_TEXT - dest[dest.length - 1].text += t.text + tree_insert_text = (t) -> + dest = adjusted_insertion_location() + if dest[1] > 0 + prev = dest[0].children[dest[1] - 1] + if prev.type is TYPE_TEXT + prev.text += t.text + return + dest[0].children.splice dest[1], 0, t + + # 8.2.5.1 + # http://www.w3.org/TR/html5/syntax.html#creating-and-inserting-nodes + # http://www.w3.org/TR/html5/syntax.html#appropriate-place-for-inserting-a-node + adjusted_insertion_location = (override_target = null) -> + # 1. If there was an override target specified, then let target be the + # override target. + if override_target? + target = override_target + else # Otherwise, let target be the current node. + target = open_els[0] + # 2. Determine the adjusted insertion location using the first matching + # steps from the following list: + # + # If foster parenting is enabled and target is a table, tbody, tfoot, + # thead, or tr element Foster parenting happens when content is + # misnested in tables. + if flag_foster_parenting and target.name in foster_parenting_targets + console.log "foster parenting isn't implemented yet" # TODO + # 1. Let last template be the last template element in the stack of + # open elements, if any. + # 2. Let last table be the last table element in the stack of open + # elements, if any. + + # 3. If there is a last template and either there is no last table, + # or there is one, but last template is lower (more recently added) + # than last table in the stack of open elements, then: let adjusted + # insertion location be inside last template's template contents, + # after its last child (if any), and abort these substeps. + + # 4. If there is no last table, then let adjusted insertion + # location be inside the first element in the stack of open + # elements (the html element), after its last child (if any), and + # abort these substeps. (fragment case) + + # 5. If last table has a parent element, then let adjusted + # insertion location be inside last table's parent element, + # immediately before last table, and abort these substeps. + + # 6. Let previous element be the element immediately above last + # table in the stack of open elements. + + # 7. Let adjusted insertion location be inside previous element, + # after its last child (if any). + + # Note: These steps are involved in part because it's possible for + # elements, the table element in this case in particular, to have + # been moved by a script around in the DOM, or indeed removed from + # the DOM entirely, after the element was inserted by the parser. else - dest.push t + # Otherwise Let adjusted insertion location be inside target, after + # its last child (if any). + target_i = target.children.length - # FIXME read spec, do this right - # FIXME implement the override target thing - # note: this assumes it's an open tag - tree_insert_tag = (t, override_target = null) -> + # 3. If the adjusted insertion location is inside a template element, + # let it instead be inside the template element's template contents, + # after its last child (if any). TODO + + # 4. Return the adjusted insertion location. + return [target, target_i] + + # http://www.w3.org/TR/html5/syntax.html#create-an-element-for-the-token + # aka create_an_element_for_token + token_to_element = (t, namespace, intended_parent) -> t.type = TYPE_TAG # not TYPE_OPEN_TAG # convert attributes into a hash + attrs = {} while t.attrs_a.length a = t.attrs_a.pop() - t.attrs[a[0]] = a[1] # TODO check what to do with dupilcate attrs - if t.parent? - for c, i of t.parent.children - if c is t - t.parent.children.splice i, 1 - # FIXME spec says to do something to figure out what parent should be - parent = open_els[0] - open_els.unshift t - parent.children.push t - t.parent = parent + attrs[a[0]] = a[1] # TODO check what to do with dupilcate attrs + el = new Node TYPE_TAG, name: t.name, namespace: namespace, attrs: attrs + + # TODO 2. If the newly created element has an xmlns attribute in the + # XMLNS namespace whose value is not exactly the same as the element's + # namespace, that is a parse error. Similarly, if the newly created + # element has an xmlns:xlink attribute in the XMLNS namespace whose + # value is not the XLink Namespace, that is a parse error. + + # fixfull: the spec says stuff about form pointers and ownerDocument + + return el + + # FIXME read implement "foster parenting" part + # FIXME read spec, do this right + # FIXME implement the override target thing + # note: this assumes it's an open tag + # TODO tree_insert_html_element = (t, ... + tree_insert_element = (el, override_target = null, namespace = null) -> + dest = adjusted_insertion_location override_target + if el.type is TYPE_OPEN_TAG # means it's a "token" + el = token_to_element el, namespace, dest[0] + # fixfull: Document nodes sometimes can't accept more chidren + dest[0].children.splice dest[1], 0, el + el.parent = dest[0] + open_els.unshift el + return el # http://www.w3.org/TR/html5/syntax.html#insert-a-comment tree_insert_a_comment = (t) -> @@ -590,10 +671,10 @@ parse_html = (txt, parse_error_cb = null) -> parse_error() when "\t", "\u000a", "\u000c", "\u000d", ' ' reconstruct_active_formatting_elements() - tree_insert_a_character t + tree_insert_text t else reconstruct_active_formatting_elements() - tree_insert_a_character t + tree_insert_text t flag_frameset_ok = false when TYPE_COMMENT tree_insert_a_comment t @@ -618,22 +699,22 @@ parse_html = (txt, parse_error_cb = null) -> # TODO when 'address', 'article', 'aside', 'blockquote', 'center', 'details', 'dialog', 'dir', 'div', 'dl', 'fieldset', 'figcaption', 'figure', 'footer', 'header', 'hgroup', 'main', 'nav', 'ol', 'p', 'section', 'summary', 'ul' close_p_if_in_button_scope() - tree_insert_tag t + tree_insert_element t when 'h1', 'h2', 'h3', 'h4', 'h5', 'h6' close_p_if_in_button_scope() if open_els[0].name in ['h1', 'h2', 'h3', 'h4', 'h5', 'h6'] parse_error() open_els.shift() - tree_insert_tag t + tree_insert_element t # TODO lots more to implement here when 'b', 'big', 'code', 'em', 'font', 'i', 's', 'small', 'strike', 'strong', 'tt', 'u' reconstruct_active_formatting_elements() - tree_insert_tag t - afe.push t + el = tree_insert_element t + afe.push el # TODO lots more to implement here else # any other start tag reconstruct_active_formatting_elements() - tree_insert_tag t + tree_insert_element t when TYPE_EOF ok_tags = { dd: true, dt: true, li: true, p: true, tbody: true, td: true, @@ -1013,6 +1094,7 @@ parse_html = (txt, parse_error_cb = null) -> tree_state = tree_in_body flag_frameset_ok = true flag_parsing = true + flag_foster_parenting = false afe = [] # active formatting elements # tokenizer initialization @@ -1119,5 +1201,5 @@ test_parser name: "crazy formatting elements test", \ html: "second
first
", # chrome does this: expected: 'tag:"b",{},[tag:"i",{},[tag:"a",{},[tag:"s",{},[tag:"tt",{},[]]],text:"second"]],tag:"a",{},[tag:"s",{},[tag:"tt",{},[tag:"div",{},[tag:"b",{},[],text:"first"]]]]' # firefox does this: - expected: 'tag:"b",{},[tag:"i",{},[tag:"a",{},[tag:"s",{},[tag:"tt",{},[]]]]],tag:"a",{},[tag:"s",{},[tag:"tt",{},[tag:"div",{},[tag:"b",{},[],text:"first"]]]],text:"second"' + expected: 'tag:"b",{},[tag:"i",{},[tag:"a",{},[tag:"s",{},[tag:"tt",{},[]]]]],tag:"a",{},[tag:"s",{},[tag:"tt",{},[tag:"div",{},[tag:"b",{},[],text:"first"]]]],text:"second"', errors: 6 # no idea how many there should be -- 1.7.10.4